search.module
<?php
define('PREG_CLASS_SEARCH_EXCLUDE', '\x{0}-\x{23}\x{25}-\x{2a}\x{2c}-\x{2f}\x{3a}\x{3b}\x{3f}\x{40}\x{5b}-\x{5d}\x{5f}\x{7b}\x{7d}\x{7f}-\x{a1}\x{ab}\x{ad}\x{b7}\x{bb}\x{bf}\x{37e}\x{387}\x{488}\x{489}\x{55a}-\x{55f}\x{589}\x{58a}\x{5be}\x{5c0}\x{5c3}\x{5f3}\x{5f4}\x{600}-\x{603}\x{60c}\x{60d}\x{61b}\x{61f}\x{66a}-\x{66d}\x{6d4}\x{6dd}\x{6de}\x{700}-\x{70d}\x{70f}\x{964}\x{965}\x{970}\x{df4}\x{e4f}\x{e5a}\x{e5b}\x{f04}-\x{f12}\x{f3a}-\x{f3d}\x{f85}\x{104a}-\x{104f}\x{10fb}\x{1361}-\x{1368}\x{166d}\x{166e}\x{1680}\x{169b}\x{169c}\x{16eb}-\x{16ed}\x{1735}\x{1736}\x{17b4}\x{17b5}\x{17d4}-\x{17d6}\x{17d8}-\x{17da}\x{1800}-\x{180a}\x{180e}\x{1944}\x{1945}\x{2000}-\x{2043}\x{2045}-\x{2051}\x{2053}\x{2054}\x{2057}\x{205f}-\x{2063}\x{206a}-\x{206f}\x{207d}\x{207e}\x{208d}\x{208e}\x{20dd}-\x{20e0}\x{20e2}-\x{20e4}\x{2329}\x{232a}\x{23b4}-\x{23b6}\x{2768}-\x{2775}\x{27e6}-\x{27eb}\x{2983}-\x{2998}\x{29d8}-\x{29db}\x{29fc}\x{29fd}\x{3000}-\x{3003}\x{3008}-\x{3011}\x{3014}-\x{301f}\x{3030}\x{303d}\x{30a0}\x{30fb}\x{d800}\x{db7f}\x{db80}\x{dbff}\x{dc00}\x{dfff}\x{e000}\x{f8ff}\x{fd3e}\x{fd3f}\x{fe30}-\x{fe52}\x{fe54}-\x{fe61}\x{fe63}\x{fe68}\x{fe6a}\x{fe6b}\x{feff}\x{ff01}-\x{ff03}\x{ff05}-\x{ff0a}\x{ff0c}-\x{ff0f}\x{ff1a}\x{ff1b}\x{ff1f}\x{ff20}\x{ff3b}-\x{ff3d}\x{ff3f}\x{ff5b}\x{ff5d}\x{ff5f}-\x{ff65}\x{fff9}-\x{fffb}\x{10100}\x{10101}\x{1039f}\x{1d173}-\x{1d17a}\x{e0001}\x{e0020}-\x{e007f}\x{f0000}\x{ffffd}\x{100000}');
define('PREG_CLASS_NUMBERS', '\x{30}-\x{39}\x{b2}\x{b3}\x{b9}\x{bc}-\x{be}\x{660}-\x{669}\x{6f0}-\x{6f9}\x{966}-\x{96f}\x{9e6}-\x{9ef}\x{9f4}-\x{9f9}\x{a66}-\x{a6f}\x{ae6}-\x{aef}\x{b66}-\x{b6f}\x{be7}-\x{bf2}\x{c66}-\x{c6f}\x{ce6}-\x{cef}\x{d66}-\x{d6f}\x{e50}-\x{e59}\x{ed0}-\x{ed9}\x{f20}-\x{f33}\x{1040}-\x{1049}\x{1369}-\x{137c}\x{16ee}-\x{16f0}\x{17e0}-\x{17e9}\x{17f0}-\x{17f9}\x{1810}-\x{1819}\x{1946}-\x{194f}\x{2070}\x{2074}-\x{2079}\x{2080}-\x{2089}\x{2153}-\x{2183}\x{2460}-\x{249b}\x{24ea}-\x{24ff}\x{2776}-\x{2793}\x{3007}\x{3021}-\x{3029}\x{3038}-\x{303a}\x{3192}-\x{3195}\x{3220}-\x{3229}\x{3251}-\x{325f}\x{3280}-\x{3289}\x{32b1}-\x{32bf}\x{ff10}-\x{ff19}\x{10107}-\x{10133}\x{10320}-\x{10323}\x{1034a}\x{104a0}-\x{104a9}\x{1d7ce}-\x{1d7ff}');
define('PREG_CLASS_PUNCTUATION', '\x{21}-\x{23}\x{25}-\x{2a}\x{2c}-\x{2f}\x{3a}\x{3b}\x{3f}\x{40}\x{5b}-\x{5d}\x{5f}\x{7b}\x{7d}\x{a1}\x{ab}\x{b7}\x{bb}\x{bf}\x{37e}\x{387}\x{55a}-\x{55f}\x{589}\x{58a}\x{5be}\x{5c0}\x{5c3}\x{5f3}\x{5f4}\x{60c}\x{60d}\x{61b}\x{61f}\x{66a}-\x{66d}\x{6d4}\x{700}-\x{70d}\x{964}\x{965}\x{970}\x{df4}\x{e4f}\x{e5a}\x{e5b}\x{f04}-\x{f12}\x{f3a}-\x{f3d}\x{f85}\x{104a}-\x{104f}\x{10fb}\x{1361}-\x{1368}\x{166d}\x{166e}\x{169b}\x{169c}\x{16eb}-\x{16ed}\x{1735}\x{1736}\x{17d4}-\x{17d6}\x{17d8}-\x{17da}\x{1800}-\x{180a}\x{1944}\x{1945}\x{2010}-\x{2027}\x{2030}-\x{2043}\x{2045}-\x{2051}\x{2053}\x{2054}\x{2057}\x{207d}\x{207e}\x{208d}\x{208e}\x{2329}\x{232a}\x{23b4}-\x{23b6}\x{2768}-\x{2775}\x{27e6}-\x{27eb}\x{2983}-\x{2998}\x{29d8}-\x{29db}\x{29fc}\x{29fd}\x{3001}-\x{3003}\x{3008}-\x{3011}\x{3014}-\x{301f}\x{3030}\x{303d}\x{30a0}\x{30fb}\x{fd3e}\x{fd3f}\x{fe30}-\x{fe52}\x{fe54}-\x{fe61}\x{fe63}\x{fe68}\x{fe6a}\x{fe6b}\x{ff01}-\x{ff03}\x{ff05}-\x{ff0a}\x{ff0c}-\x{ff0f}\x{ff1a}\x{ff1b}\x{ff1f}\x{ff20}\x{ff3b}-\x{ff3d}\x{ff3f}\x{ff5b}\x{ff5d}\x{ff5f}-\x{ff65}\x{10100}\x{10101}\x{1039f}');
function search_help($section = 'admin/help#search') {
switch ($section) {
case 'admin/modules#description':
return t('Enables site-wide keyword searching.');
case 'admin/settings/search':
return t('
<p>The search engine works by maintaining an index of the words in your site\'s content. You can adjust the settings below to tweak the indexing behaviour. Note that the search requires cron to be set up correctly.</p>
');
case 'search#noresults':
return t('<p><ul>
<li>Check if your spelling is correct.</li>
<li>Try using wildcards: <em>walk*</em> matches <em>walker</em>, <em>walking</em>, ...</li>
<li>Use longer words (words shorter than %number letters are ignored).</li>
</ul></p>', array('%number' => variable_get('minimum_word_size', 3)));
}
}
function search_perm() {
return array('search content', 'administer search');
}
function search_block($op = 'list', $delta = 0) {
global $user;
if ($op == 'list') {
$blocks[0]['info'] = t('Search form');
return $blocks;
}
else if ($op == 'view' && user_access('search content') && arg(0) != 'search') {
$block['content'] = search_form('', '', null, '');
$block['subject'] = t('Search');
return $block;
}
}
function search_menu($may_cache) {
$items = array();
if ($may_cache) {
$items[] = array('path' => 'search', 'title' => t('search'),
'callback' => 'search_view',
'access' => user_access('search content'),
'type' => MENU_SUGGESTED_ITEM);
$items[] = array('path' => 'admin/settings/search', 'title' => t('search'),
'callback' => 'search_admin',
'type' => MENU_NORMAL_ITEM,
'access' => user_access('administer site configuration'));
}
else if (arg(0) == 'search') {
$keys = search_get_keys();
$keys = strlen($keys) ? '/'. urlencode($keys) : '';
foreach (module_list() as $name) {
if (module_hook($name, 'search') && $title = module_invoke($name, 'search', 'name')) {
$items[] = array('path' => 'search/'. $name . $keys, 'title' => $title,
'callback' => 'search_view',
'access' => user_access('search content'),
'type' => MENU_LOCAL_TASK);
}
}
}
return $items;
}
function search_admin() {
if ($_POST) {
if (variable_get('minimum_word_size', 3) != $_POST['edit']['minimum_word_size']) {
drupal_set_message(t('The index will be rebuilt.'));
search_wipe();
system_settings_save();
}
else {
system_settings_save();
}
}
$remaining = 0;
$total = 0;
foreach (module_list() as $module) {
if (module_hook($module, 'search')) {
$status = module_invoke($module, 'search', 'status');
$remaining += $status['remaining'];
$total += $status['total'];
}
}
$count = format_plural($remaining, 'There is 1 item left to index.', 'There are %count items left to index.');
$percentage = ((int)min(100, 100 * ($total - $remaining) / max(1, $total))) . '%';
$status = '<p><strong>'. t('%percentage of the site has been indexed.', array('%percentage' => $percentage)) .' '. $count .'</strong></p>';
$output = form_group('Indexing status', $status);
$items = drupal_map_assoc(array(10, 20, 50, 100, 200, 500));
$group = form_select(t('Items to index per cron run'), 'search_cron_limit', variable_get('search_cron_limit', 100), $items, t('The maximum amount of items that will be indexed in one cron run. Set this number lower if your cron is timing out or if PHP is running out of memory.'));
$output .= form_group(t('Indexing throttle'), $group);
$group = '<em>'. t('<p>Changing the setting below will cause the site index to be rebuilt. The search index is not cleared but systematically updated to reflect the new settings. Searching will continue to work but new content won\'t be indexed until all existing content has been re-indexed.</p><p>The default settings should be appropriate for the majority of sites.</p>') .'</em>';
$group .= form_textfield(t('Minimum word length to index'), 'minimum_word_size', variable_get('minimum_word_size', 3), 3, 3, t('The number of characters a word has to be to be indexed. Words shorter than this will not be searchable.'));
$group .= form_textfield(t('Minimum word length to search for'), 'remove_short', variable_get('remove_short', 3), 3, 3, t('The number of characters a word has to be to be searched for, including wildcard characters.'));
$output .= form_group(t('Indexing settings'), $group);
print theme('page', system_settings_form($output));
}
function search_wipe($sid = NULL, $type = NULL) {
if ($type == NULL && $sid == NULL) {
module_invoke_all('search', 'reset');
}
else {
db_query("DELETE FROM {search_index} WHERE sid = %d AND type = '%s'", $sid, $type);
db_query("DELETE FROM {search_index} WHERE fromsid = %d AND fromtype = '%s'", $sid, $type);
}
}
function search_dirty($word = null) {
static $dirty = array();
if ($word !== null) {
$dirty[$word] = true;
}
else {
return $dirty;
}
}
function search_cron() {
foreach (module_list() as $module) {
module_invoke($module, 'update_index');
}
foreach (search_dirty() as $word => $dummy) {
$total = db_result(db_query("SELECT SUM(score) FROM {search_index} WHERE word = '%s'", $word));
db_query("UPDATE {search_total} SET count = %d WHERE word = '%s'", $total, $word);
if (!db_affected_rows()) {
$exists = db_result(db_query("SELECT COUNT(*) FROM {search_total} WHERE word = '%s'", $word));
if (!$exists) {
db_query("INSERT INTO {search_total} (word, count) VALUES ('%s', %d)", $word, $total);
}
}
}
$result = db_query("SELECT t.word AS realword, i.word FROM {search_total} t LEFT JOIN {search_index} i ON t.word = i.word WHERE i.word IS NULL");
while ($word = db_fetch_object($result)) {
db_query("DELETE FROM {search_total} WHERE word = '%s'", $word->realword);
}
}
function search_keywords_split($text) {
static $last = null;
static $lastsplit = null;
if ($last == $text) {
return $lastsplit;
}
$text = decode_entities($text);
search_preprocess($text);
$text = preg_replace('/(['. PREG_CLASS_NUMBERS .']+)['. PREG_CLASS_PUNCTUATION .']+(?=['. PREG_CLASS_NUMBERS .'])/u', '\1', $text);
$text = preg_replace('/[._-]+/', '', $text);
$text = preg_replace('/['. PREG_CLASS_SEARCH_EXCLUDE . ']+/u', ' ', $text);
$words = explode(' ', $text);
array_walk($words, '_search_keywords_truncate');
$last = $text;
$lastsplit = $words;
return $words;
}
function _search_keywords_truncate(&$text) {
$text = truncate_utf8($text, 50);
}
function search_keywords_variation($text) {
$text = trim($text);
$new = preg_replace('/\*+/', '*', '*'. implode('* *', explode(' ', trim($text))) .'*');
return ($new != $text) ? $new : NULL;
}
function search_preprocess(&$text) {
foreach (module_implements('search_preprocess') as $module) {
$text = module_invoke($module, 'search_preprocess', $text);
}
}
function search_index($sid, $type, $text) {
$minimum_word_size = variable_get('minimum_word_size', 3);
global $base_url;
$node_regexp = '!href=[\'"]?(?:'. preg_quote($base_url) .'/)?(?:\?q=)?([^\'">]+)[\'">]!i';
$tags = array('h1' => 21,
'h2' => 18,
'h3' => 15,
'h4' => 12,
'h5' => 9,
'h6' => 6,
'u' => 5,
'b' => 5,
'strong' => 5,
'em' => 5,
'a' => 10);
$text = str_replace(array('<', '>'), array(' <', '> '), $text);
$text = strip_tags($text, '<'. implode('><', array_keys($tags)) .'>');
$split = preg_split('/\s*<([^>]+?)>\s*/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
$tag = false; $link = false; $score = 1;
$results = array(0 => array());
foreach ($split as $value) {
if ($tag) {
list($tagname) = explode(' ', $value, 2);
$tagname = strtolower($tagname);
if ($tagname{0} == '/') {
$score -= $tags[substr($tagname, 1)];
if ($score < 1) { $score = 1;
}
if ($tagname == '/a') {
$link = false;
}
}
else {
if ($tagname == 'a') {
if (preg_match($node_regexp, $value, $match)) {
$path = drupal_get_normal_path($match[1]);
if (preg_match('!(?:node|book)/(?:view/)?([0-9]+)!i', $path, $match)) {
$linknid = $match[1];
if ($linknid > 0) {
$link = true;
}
}
}
}
$score += $tags[$tagname];
}
}
else {
if ($value != '') {
$words = search_keywords_split($value);
foreach ($words as $word) {
if (string_length($word) >= $minimum_word_size) {
$word = strtolower($word);
if ($link) {
if (!isset($results[$linknid])) {
$results[$linknid] = array();
}
$results[$linknid][$word] += $score;
}
else {
$results[0][$word] += $score;
}
}
}
}
}
$tag = !$tag;
}
search_wipe($sid, $type);
foreach ($results[0] as $word => $score) {
db_query("INSERT INTO {search_index} (word, sid, type, score) VALUES ('%s', %d, '%s', %d)", $word, $sid, $type, $score);
search_dirty($word);
}
unset($results[0]);
foreach ($results as $nid => $words) {
foreach ($words as $word => $score) {
db_query("INSERT INTO {search_index} (word, sid, type, fromsid, fromtype, score) VALUES ('%s', %d, '%s', %d, '%s', %d)", $word, $nid, 'node', $sid, $type, $score);
search_dirty($word);
}
}
}
function do_search($keywords, $type, $join = '', $where = '1', $variation = true) {
$keys = preg_replace('!\*+!', '�', $keywords);
$keys = search_keywords_split($keys);
$words = array();
$arguments = array($type);
$refused = array();
foreach ($keys as $word) {
if (string_length($word) < variable_get('remove_short', 3)) {
if ($word != '') {
$refused[] = str_replace('�', '*', $word);
}
continue;
}
if (strpos($word, '�') !== false) {
$words[] = "i.word LIKE '%s'";
$arguments[] = str_replace('�', '%', strtolower($word));
}
else {
$words[] = "i.word = '%s'";
$arguments[] = strtolower($word);
}
}
if (count($refused) && $variation) {
$message = format_plural(count($refused),
'The word %words was not included because it is too short.',
'The words %words were not included because they were too short.');
drupal_set_message(strtr($message, array('%words' => theme('placeholder', implode(', ', $refused)))));
}
if (count($words) == 0) {
return array();
}
$conditions = $where .' AND i.type = \'%s\' AND ('. implode(' OR ', $words) .')';
$count = db_num_rows(db_query("SELECT DISTINCT i.sid, i.type FROM {search_index} i $join WHERE $conditions", $arguments));
if ($count == 0) {
if ($variation && $loose = search_keywords_variation($keywords)) {
return do_search($loose, $type, $join, $where, false);
}
else {
return array();
}
}
$count_query = "SELECT $count";
$query = "SELECT i.type, i.sid, SUM(i.score/t.count) AS score FROM {search_index} i $join INNER JOIN {search_total} t ON i.word = t.word WHERE $conditions GROUP BY i.type, i.sid ORDER BY score DESC";
$result = pager_query($query, 15, 0, $count_query, $arguments);
$results = array();
while ($item = db_fetch_object($result)) {
$results[] = $item->sid;
}
return $results;
}
function search_get_keys() {
$path = explode('/', $_GET['q'], 3);
return count($path) == 3 ? $path[2] : $_REQUEST['keys'];
}
function search_view() {
$type = arg(1);
if ($_POST['edit']['keys']) {
if ($type == '') {
$type = 'node';
}
drupal_goto('search/'. urlencode($type) .'/'. urlencode($_POST['edit']['keys']));
}
else if ($type == '') {
drupal_goto('search/node');
}
$keys = search_get_keys();
if (user_access('search content')) {
if (trim($keys)) {
watchdog('search',
t('Search: %keys (%type).', array('%keys' => theme('placeholder', $keys), '%type' => module_invoke($type, 'search', 'name'))),
WATCHDOG_NOTICE,
l(t('results'), 'search/'. urlencode($type) .'/'. urlencode($keys))
);
$results = search_data($keys, $type);
if ($results) {
$results = theme('box', t('Search results'), $results);
}
else {
$results = theme('box', t('Your search yielded no results'), search_help('search#noresults'));
}
}
else if (isset($_POST['edit'])) {
form_set_error('keys', t('Please enter some keywords.'));
}
$output = search_form(NULL, $keys, $type);
$output .= $results;
print theme('page', $output);
}
else {
drupal_access_denied();
}
}
function search_form($action = '', $keys = '', $type = null, $prompt = null) {
$edit = $_POST['edit'];
if (!$action) {
$action = url('search/'. $type);
}
if (!$type) {
$type = 'node';
}
if (is_null($prompt)) {
$prompt = t('Enter your keywords');
}
$output = ' <div class="search-form">';
$box = '<div class="container-inline">';
$box .= form_textfield('', 'keys', $keys, $prompt ? 40 : 20, 255);
$box .= form_submit(t('Search'));
$box .= '</div>';
$output .= form_item($prompt, $box);
$output .= '</div>';
return form($output, 'post', $action);
}
function search_data($keys = NULL, $type = 'node') {
$output = '';
if (isset($keys)) {
if (module_hook($type, 'search')) {
$results = module_invoke($type, 'search', 'search', $keys);
if (is_array($results) && count($results)) {
$output .= '<dl class="search-results">';
foreach ($results as $entry) {
$output .= theme('search_item', $entry, $type);
}
$output .= '</dl>';
$output .= theme('pager', NULL, 15, 0);
}
}
}
return $output;
}
function search_excerpt($keys, $text) {
$keys = search_keywords_split($keys);
$text = strip_tags(str_replace(array('<', '>'), array(' <', '> '), $text));
array_walk($keys, '_search_excerpt_replace');
$workkeys = $keys;
$ranges = array();
$included = array();
$length = 0;
while ($length < 256 && count($workkeys)) {
foreach ($workkeys as $k => $key) {
if (strlen($key) == 0) {
unset($workkeys[$k]);
continue;
}
if ($length >= 256) {
break;
}
if (!isset($included[$key])) {
$included[$key] = 0;
}
if (preg_match('/\b'. $key .'\b/iu', $text, $match, PREG_OFFSET_CAPTURE, $included[$key])) {
$p = $match[0][1];
if (($q = strpos($text, ' ', max(0, $p - 60))) !== false) {
$end = substr($text, $p, 80);
if (($s = strrpos($end, ' ')) !== false) {
$ranges[$q] = $p + $s;
$length += $p + $s - $q;
$included[$key] = $p + 1;
}
else {
unset($workkeys[$k]);
}
}
else {
unset($workkeys[$k]);
}
}
else {
unset($workkeys[$k]);
}
}
}
if (count($ranges) == 0) {
return truncate_utf8($text, 256) . ' ...';
}
ksort($ranges);
$newranges = array();
foreach ($ranges as $from2 => $to2) {
if (!isset($from1)) {
$from1 = $from2;
$to1 = $to2;
continue;
}
if ($from2 <= $to1) {
$to1 = max($to1, $to2);
}
else {
$newranges[$from1] = $to1;
$from1 = $from2;
$to1 = $to2;
}
}
$newranges[$from1] = $to1;
$out = array();
foreach ($newranges as $from => $to) {
$out[] = substr($text, $from, $to - $from);
}
$text = (isset($newranges[0]) ? '' : '... '). implode(' ... ', $out) .' ...';
$text = preg_replace('/\b('. implode('|', $keys) .')\b/iu', '<strong>\0</strong>', $text);
return $text;
}
function _search_excerpt_replace(&$text) {
$text = preg_quote($text, '/');
}
function theme_search_item($item, $type) {
if (module_hook($type, 'search_item')) {
$output = module_invoke($type, 'search_item', $item);
}
else {
$output = ' <dt class="title"><a href="'. check_url($item['link']) .'">'. check_plain($item['title']) .'</a></dt>';
$info = array();
if ($item['type']) {
$info[] = $item['type'];
}
if ($item['user']) {
$info[] = $item['user'];
}
if ($item['date']) {
$info[] = format_date($item['date'], 'small');
}
if (is_array($item['extra'])) {
$info = array_merge($info, $item['extra']);
}
$output .= ' <dd>'. ($item['snippet'] ? '<p>'. $item['snippet'] . '</p>' : '') . '<p class="search-info">' . implode(' - ', $info) .'</p></dd>';
}
return $output;
}
?>