Same name and namespace in other branches
  1. 7.x modules/filter/filter.module \_filter_htmlcorrector()

Scan input and make sure that all HTML tags are properly closed and nested.

Related topics

1 call to _filter_htmlcorrector()
drupal_html_to_text in includes/mail.inc
Transform an HTML string into plain text, preserving the structure of the markup. Useful for preparing the body of a node to be sent by e-mail.

File

modules/filter/filter.module, line 773
Framework for handling filtering of content.

Code

function _filter_htmlcorrector($text) {

  // Prepare tag lists.
  static $no_nesting, $single_use;
  if (!isset($no_nesting)) {

    // Tags which cannot be nested but are typically left unclosed.
    $no_nesting = drupal_map_assoc(array(
      'li',
      'p',
    ));

    // Single use tags in HTML4
    $single_use = drupal_map_assoc(array(
      'base',
      'meta',
      'link',
      'hr',
      'br',
      'param',
      'img',
      'area',
      'input',
      'col',
      'frame',
    ));
  }

  // Properly entify angles.
  $text = preg_replace('@<(?=[^a-zA-Z!/]|$)@', '&lt;', $text);

  // Split tags from text.
  $split = preg_split('/<(!--.*?--|[^>]+?)>/s', $text, -1, PREG_SPLIT_DELIM_CAPTURE);

  // Note: PHP ensures the array consists of alternating delimiters and literals
  // and begins and ends with a literal (inserting $null as required).
  $tag = false;

  // Odd/even counter. Tag or no tag.
  $stack = array();
  $output = '';
  foreach ($split as $value) {

    // Process HTML tags.
    if ($tag) {

      // Passthrough comments.
      if (substr($value, 0, 3) == '!--') {
        $output .= '<' . $value . '>';
      }
      else {
        list($tagname) = preg_split('/\\s/', strtolower($value), 2);

        // Closing tag
        if ($tagname[0] == '/') {
          $tagname = substr($tagname, 1);

          // Discard XHTML closing tags for single use tags.
          if (!isset($single_use[$tagname])) {

            // See if we possibly have a matching opening tag on the stack.
            if (in_array($tagname, $stack)) {

              // Close other tags lingering first.
              do {
                $output .= '</' . $stack[0] . '>';
              } while (array_shift($stack) != $tagname);
            }

            // Otherwise, discard it.
          }
        }
        else {

          // See if we have an identical 'no nesting' tag already open and close it if found.
          if (count($stack) && $stack[0] == $tagname && isset($no_nesting[$stack[0]])) {
            $output .= '</' . array_shift($stack) . '>';
          }

          // Push non-single-use tags onto the stack
          if (!isset($single_use[$tagname])) {
            array_unshift($stack, $tagname);
          }
          else {
            $value = rtrim($value, ' /') . ' /';
          }
          $output .= '<' . $value . '>';
        }
      }
    }
    else {

      // Passthrough all text.
      $output .= $value;
    }
    $tag = !$tag;
  }

  // Close remaining tags.
  while (count($stack) > 0) {
    $output .= '</' . array_shift($stack) . '>';
  }
  return $output;
}