search_index
Definition
search_index($sid, $type, $text)
modules/search.module, line 323
Description
Update the full-text search index for a particular item.
Parameters
$sid A number identifying this particular item (e.g. node id).
$type A string defining this type of item (e.g. 'node')
$text The content of this item. Must be a piece of HTML text.
Related topics
| Name | Description |
|---|---|
| Search interface | The Drupal search interface manages a global search mechanism. |
Code
<?php
function search_index($sid, $type, $text) {
$minimum_word_size = variable_get('minimum_word_size', 3);
global $base_url;
$node_regexp = '!href=[\'"]?(?:'. preg_quote($base_url) .'/)?(?:\?q=)?([^\'">]+)[\'">]!i';
// Multipliers for scores of words inside certain HTML tags.
// Note: 'a' must be included for link ranking to work.
$tags = array('h1' => 21,
'h2' => 18,
'h3' => 15,
'h4' => 12,
'h5' => 9,
'h6' => 6,
'u' => 5,
'b' => 5,
'strong' => 5,
'em' => 5,
'a' => 10);
// Strip off all ignored tags to speed up processing, but insert space before/after
// them to keep word boundaries.
$text = str_replace(array('<', '>'), array(' <', '> '), $text);
$text = strip_tags($text, '<'. implode('><', array_keys($tags)) .'>');
// Split HTML tags from plain text.
$split = preg_split('/\s*<([^>]+?)>\s*/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
// Note: PHP ensures the array consists of alternating delimiters and literals
// and begins and ends with a literal (inserting $null as required).
$tag = false; // Odd/even counter. Tag or no tag.
$link = false; // State variable for link analyser
$score = 1; // Starting score per word
$results = array(0 => array());
foreach ($split as $value) {
if ($tag) {
// Increase or decrease score per word based on tag
list($tagname) = explode(' ', $value, 2);
$tagname = strtolower($tagname);
if ($tagname{0} == '/') {
$score -= $tags[substr($tagname, 1)];
if ($score < 1) { // possible due to bad HTML
$score = 1;
}
if ($tagname == '/a') {
$link = false;
}
}
else {
if ($tagname == 'a') {
// Check if link points to a node on this site
if (preg_match($node_regexp, $value, $match)) {
$path = drupal_get_normal_path($match[1]);
if (preg_match('!(?:node|book)/(?:view/)?([0-9]+)!i', $path, $match)) {
$linknid = $match[1];
if ($linknid > 0) {
$link = true;
}
}
}
}
$score += $tags[$tagname];
}
}
else {
// Note: use of PREG_SPLIT_DELIM_CAPTURE above will introduce empty values
if ($value != '') {
$words = search_keywords_split($value);
foreach ($words as $word) {
// Check wordlength
if (string_length($word) >= $minimum_word_size) {
// Note: strtolower can be used because the value is only used internally.
$word = strtolower($word);
if ($link) {
if (!isset($results[$linknid])) {
$results[$linknid] = array();
}
$results[$linknid][$word] += $score;
}
else {
$results[0][$word] += $score;
}
}
}
}
}
$tag = !$tag;
}
search_wipe($sid, $type);
// Insert results into search index
foreach ($results[0] as $word => $score) {
db_query("INSERT INTO {search_index} (word, sid, type, score) VALUES ('%s', %d, '%s', %d)", $word, $sid, $type, $score);
search_dirty($word);
}
unset($results[0]);
// Now insert links to nodes
foreach ($results as $nid => $words) {
foreach ($words as $word => $score) {
db_query("INSERT INTO {search_index} (word, sid, type, fromsid, fromtype, score) VALUES ('%s', %d, '%s', %d, '%s', %d)", $word, $nid, 'node', $sid, $type, $score);
search_dirty($word);
}
}
}
?> 