function FilterUrl::process

Same name and namespace in other branches
  1. 10 core/modules/filter/src/Plugin/Filter/FilterUrl.php \Drupal\filter\Plugin\Filter\FilterUrl::process()
  2. 9 core/modules/filter/src/Plugin/Filter/FilterUrl.php \Drupal\filter\Plugin\Filter\FilterUrl::process()
  3. 8.9.x core/modules/filter/src/Plugin/Filter/FilterUrl.php \Drupal\filter\Plugin\Filter\FilterUrl::process()
  4. main core/modules/filter/src/Plugin/Filter/FilterUrl.php \Drupal\filter\Plugin\Filter\FilterUrl::process()

Performs the filter processing.

Parameters

string $text: The text string to be filtered.

string $langcode: The language code of the text to be filtered.

Return value

\Drupal\filter\FilterProcessResult The filtered text, wrapped in a FilterProcessResult object, and possibly with associated assets, cacheability metadata and placeholders.

Overrides FilterInterface::process

File

core/modules/filter/src/Plugin/Filter/FilterUrl.php, line 73

Class

FilterUrl
Provides a filter to convert URLs into links.

Namespace

Drupal\filter\Plugin\Filter

Code

public function process($text, $langcode) {
  // Store the current text in case any of the preg_* functions fail.
  $saved_text = $text;
  // Tags to skip and not recurse into.
  $ignore_tags = 'a|script|style|code|pre';
  // Create an array which contains the regexps for each type of link. The key
  // to the regexp is the name of a function that is used as a callback
  // function to process matches of the regexp. The callback function is to
  // return the replacement for the match. The array is used and
  // matching/replacement done below inside some loops.
  $tasks = [];
  // Prepare protocols pattern for absolute URLs.
  // \Drupal\Component\Utility\UrlHelper::stripDangerousProtocols() will
  // replace any bad protocols with HTTP, so we need to support the identical
  // list. While '//' is technically optional for MAILTO only, we cannot
  // cleanly differ between protocols here without hard-coding MAILTO, so '//'
  // is optional for all protocols.
  // @see \Drupal\Component\Utility\UrlHelper::stripDangerousProtocols()
  $protocols = implode(':(?://)?|', $this->filterProtocols) . ':(?://)?';
  $valid_url_path_characters = "[\\p{L}\\p{M}\\p{N}!\\*\\';:=\\+,\\.\$\\/%#\\[\\]\\-_~@&]";
  // Allow URL paths to contain balanced parens
  // 1. Used in Wikipedia URLs like /Primer_(film)
  // 2. Used in IIS sessions like /S(dfd346)/
  $valid_url_balanced_parens = '\\(' . $valid_url_path_characters . '+\\)';
  // Valid end-of-path characters (so /foo. does not gobble the period). Allow
  // =&# for empty URL parameters and other URL-join artifacts
  $valid_url_ending_characters = '[\\p{L}\\p{M}\\p{N}:_+~#=/]|(?:' . $valid_url_balanced_parens . ')';
  $valid_url_query_chars = '[a-zA-Z0-9!?\\*\'@\\(\\);:&=\\+\\$\\/%#\\[\\]\\-_\\.,~|]';
  $valid_url_query_ending_chars = '[a-zA-Z0-9_&=#\\/]';
  // Full path and allow @ in a URL, but only in the middle. Catch things like
  // http://example.com/@user/
  $valid_url_path = '(?:(?:' . $valid_url_path_characters . '*(?:' . $valid_url_balanced_parens . $valid_url_path_characters . '*)*' . $valid_url_ending_characters . ')|(?:@' . $valid_url_path_characters . '+\\/))';
  // Prepare the domain name pattern. The ICANN seems to be on track towards
  // accepting more diverse top level domains (TLDs), so this pattern has been
  // "future-proofed" to allow for TLDs of length 2-64.
  $domain = '(?:[\\p{L}\\p{M}\\p{N}._+-]+\\.)?[\\p{L}\\p{M}]{2,64}\\b';
  // Mail domains differ from the generic domain pattern, specifically: A "."
  // character must be present in the string that follows the @ character.
  $email_domain = '(?:[\\p{L}\\p{M}\\p{N}._+-]+\\.)+[\\p{L}\\p{M}]{2,64}\\b';
  $ip = '(?:[0-9]{1,3}\\.){3}[0-9]{1,3}';
  $auth = '[\\p{L}\\p{M}\\p{N}:%_+*~#?&=.,/;-]+@';
  $trail = '(' . $valid_url_path . '*)?(\\?' . $valid_url_query_chars . '*' . $valid_url_query_ending_chars . ')?';
  // Match absolute URLs.
  $url_pattern = "(?:{$auth})?(?:{$domain}|{$ip})/?(?:{$trail})?";
  $pattern = "`((?:{$protocols})(?:{$url_pattern}))`u";
  $tasks[] = [
    static::class . '::parseFullLinks',
    $pattern,
  ];
  // Match email addresses.
  $url_pattern = "[\\p{L}\\p{M}\\p{N}._+-]{1,254}@(?:{$email_domain})";
  $pattern = "`({$url_pattern})`u";
  $tasks[] = [
    static::class . '::parseEmailLinks',
    $pattern,
  ];
  // Match www domains.
  $url_pattern = "www\\.(?:{$domain})/?(?:{$trail})?";
  $pattern = "`({$url_pattern})`u";
  $tasks[] = [
    static::class . '::parsePartialLinks',
    $pattern,
  ];
  // Each type of URL needs to be processed separately. The text is joined and
  // re-split after each task, since all injected HTML tags must be correctly
  // protected before the next task.
  foreach ($tasks as [$callback, $pattern]) {
    // Initialize the HTML comment temporary storage.
    // @see self::escapeComments()
    // @see self::unescapeComments()
    $this->htmlComments = [];
    // HTML comments need to be handled separately, as they may contain HTML
    // markup, especially a '>'. Therefore, remove all comment contents and
    // add them back later.
    $text = is_null($text) ? '' : preg_replace_callback('`<!--(.*?)-->`s', static::class . '::escapeComments', $text);
    // Split at all tags; ensures that no tags or attributes are processed.
    $chunks = is_null($text) ? [
      '',
    ] : preg_split('/(<.+?>)/is', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
    // Do not attempt to convert links into URLs if preg_split() fails.
    if ($chunks !== FALSE) {
      // PHP ensures that the array consists of alternating delimiters and
      // literals, and begins and ends with a literal (inserting NULL as
      // required). Therefore, the first chunk is always text:
      $chunk_type = 'text';
      // If a tag of $ignore_tags is found, it is stored in $open_tag and only
      // removed when the closing tag is found. Until the closing tag is
      // found, no replacements are made.
      $open_tag = '';
      for ($i = 0; $i < count($chunks); $i++) {
        if ($chunk_type == 'text') {
          // Only process this text if there are no unclosed $ignore_tags.
          if ($open_tag == '') {
            // If there is a match, inject a link into this chunk via the
            // callback function contained in $task.
            $chunks[$i] = preg_replace_callback($pattern, $callback, $chunks[$i]);
          }
          // Text chunk is done, so the next chunk must be a tag.
          $chunk_type = 'tag';
        }
        else {
          // Only process this tag if there are no unclosed $ignore_tags.
          if ($open_tag == '') {
            // Check whether this tag is contained in $ignore_tags.
            if (preg_match("`<({$ignore_tags})(?:\\s|>)`i", $chunks[$i], $matches)) {
              $open_tag = $matches[1];
            }
          }
          else {
            if (preg_match("`</{$open_tag}>`i", $chunks[$i], $matches)) {
              $open_tag = '';
            }
          }
          // Tag chunk is done, so the next chunk must be text.
          $chunk_type = 'text';
        }
      }
      $text = implode($chunks);
    }
    // Revert to the original comment contents.
    $text = $text ? preg_replace_callback('`<!--(.*?)-->`', static::class . '::unescapeComments', $text) : $text;
  }
  // If there is no text at this point, revert to the previous text.
  $text = strlen((string) $text) > 0 ? $text : $saved_text;
  return new FilterProcessResult($text);
}

Buggy or inaccurate documentation? Please file an issue. Need support? Need help programming? Connect with the Drupal community.