Same name and namespace in other branches
- 7.x modules/filter/filter.module \_filter_htmlcorrector()
Scan input and make sure that all HTML tags are properly closed and nested.
Related topics
1 call to _filter_htmlcorrector()
- drupal_html_to_text in includes/
mail.inc - Transform an HTML string into plain text, preserving the structure of the markup. Useful for preparing the body of a node to be sent by e-mail.
File
- modules/
filter/ filter.module, line 773 - Framework for handling filtering of content.
Code
function _filter_htmlcorrector($text) {
// Prepare tag lists.
static $no_nesting, $single_use;
if (!isset($no_nesting)) {
// Tags which cannot be nested but are typically left unclosed.
$no_nesting = drupal_map_assoc(array(
'li',
'p',
));
// Single use tags in HTML4
$single_use = drupal_map_assoc(array(
'base',
'meta',
'link',
'hr',
'br',
'param',
'img',
'area',
'input',
'col',
'frame',
));
}
// Properly entify angles.
$text = preg_replace('@<(?=[^a-zA-Z!/]|$)@', '<', $text);
// Split tags from text.
$split = preg_split('/<(!--.*?--|[^>]+?)>/s', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
// Note: PHP ensures the array consists of alternating delimiters and literals
// and begins and ends with a literal (inserting $null as required).
$tag = false;
// Odd/even counter. Tag or no tag.
$stack = array();
$output = '';
foreach ($split as $value) {
// Process HTML tags.
if ($tag) {
// Passthrough comments.
if (substr($value, 0, 3) == '!--') {
$output .= '<' . $value . '>';
}
else {
list($tagname) = preg_split('/\\s/', strtolower($value), 2);
// Closing tag
if ($tagname[0] == '/') {
$tagname = substr($tagname, 1);
// Discard XHTML closing tags for single use tags.
if (!isset($single_use[$tagname])) {
// See if we possibly have a matching opening tag on the stack.
if (in_array($tagname, $stack)) {
// Close other tags lingering first.
do {
$output .= '</' . $stack[0] . '>';
} while (array_shift($stack) != $tagname);
}
// Otherwise, discard it.
}
}
else {
// See if we have an identical 'no nesting' tag already open and close it if found.
if (count($stack) && $stack[0] == $tagname && isset($no_nesting[$stack[0]])) {
$output .= '</' . array_shift($stack) . '>';
}
// Push non-single-use tags onto the stack
if (!isset($single_use[$tagname])) {
array_unshift($stack, $tagname);
}
else {
$value = rtrim($value, ' /') . ' /';
}
$output .= '<' . $value . '>';
}
}
}
else {
// Passthrough all text.
$output .= $value;
}
$tag = !$tag;
}
// Close remaining tags.
while (count($stack) > 0) {
$output .= '</' . array_shift($stack) . '>';
}
return $output;
}