search_keywords_split
- Versions
- 4.6
search_keywords_split($text)
Splits a string into component words according to indexing rules.
Code
modules/search.module, line 236
<?php
function search_keywords_split($text) {
static $last = null;
static $lastsplit = null;
if ($last == $text) {
return $lastsplit;
}
// Decode entities to UTF-8
$text = decode_entities($text);
// Call an external processor for word handling.
search_preprocess($text);
// To improve searching for numerical data such as dates, IP addresses
// or version numbers, we consider a group of numerical characters
// separated only by punctuation characters to be one piece.
// This also means that searching for e.g. '20/03/1984' also returns
// results with '20-03-1984' in them.
// Readable regexp: ([number]+)[punctuation]+(?=[number])
$text = preg_replace('/(['. PREG_CLASS_NUMBERS .']+)['. PREG_CLASS_PUNCTUATION .']+(?=['. PREG_CLASS_NUMBERS .'])/u', '\1', $text);
// The dot, underscore and dash are simply removed. This allows meaningful
// search behaviour with acronyms and URLs.
$text = preg_replace('/[._-]+/', '', $text);
// With the exception of the rules above, we consider all punctuation,
// marks, spacers, etc, to be a word boundary.
$text = preg_replace('/['. PREG_CLASS_SEARCH_EXCLUDE . ']+/u', ' ', $text);
// Process words
$words = explode(' ', $text);
array_walk($words, '_search_keywords_truncate');
// Save last keyword result
$last = $text;
$lastsplit = $words;
return $words;
}
?>Login or register to post comments 