Same name and namespace in other branches
  1. 8.9.x core/lib/Drupal/Component/Utility/Xss.php \Drupal\Component\Utility\Xss::attributes()
  2. 9 core/lib/Drupal/Component/Utility/Xss.php \Drupal\Component\Utility\Xss::attributes()

Processes a string of HTML attributes.

Parameters

string $attributes: The html attribute to process.

Return value

string Cleaned up version of the HTML attributes.

File

core/lib/Drupal/Component/Utility/Xss.php, line 202

Class

Xss
Provides helper to filter for cross-site scripting.

Namespace

Drupal\Component\Utility

Code

protected static function attributes($attributes) {
  $attributes_array = [];
  $mode = 0;
  $attribute_name = '';
  $skip = FALSE;
  $skip_protocol_filtering = FALSE;
  while (strlen($attributes) != 0) {

    // Was the last operation successful?
    $working = 0;
    switch ($mode) {
      case 0:

        // Attribute name, href for instance.
        if (preg_match('/^([-a-zA-Z][-a-zA-Z0-9]*)/', $attributes, $match)) {
          $attribute_name = strtolower($match[1]);
          $skip = $attribute_name == 'style' || str_starts_with($attribute_name, 'on') || str_starts_with($attribute_name, '-') || strlen($attribute_name) > 96;

          // Values for attributes of type URI should be filtered for
          // potentially malicious protocols (for example, an href-attribute
          // starting with "javascript:"). However, for some non-URI
          // attributes performing this filtering causes valid and safe data
          // to be mangled. We prevent this by skipping protocol filtering on
          // such attributes.
          // @see \Drupal\Component\Utility\UrlHelper::filterBadProtocol()
          // @see http://www.w3.org/TR/html4/index/attributes.html
          $skip_protocol_filtering = str_starts_with($attribute_name, 'data-') || in_array($attribute_name, [
            'title',
            'alt',
            'rel',
            'property',
            'class',
            'datetime',
          ]);
          $working = $mode = 1;
          $attributes = preg_replace('/^[-a-zA-Z][-a-zA-Z0-9]*/', '', $attributes);
        }
        break;
      case 1:

        // Equals sign or valueless ("selected").
        if (preg_match('/^\\s*=\\s*/', $attributes)) {
          $working = 1;
          $mode = 2;
          $attributes = preg_replace('/^\\s*=\\s*/', '', $attributes);
          break;
        }
        if (preg_match('/^\\s+/', $attributes)) {
          $working = 1;
          $mode = 0;
          if (!$skip) {
            $attributes_array[] = $attribute_name;
          }
          $attributes = preg_replace('/^\\s+/', '', $attributes);
        }
        break;
      case 2:

        // Once we've finished processing the attribute value continue to look
        // for attributes.
        $mode = 0;
        $working = 1;

        // Attribute value, a URL after href= for instance.
        if (preg_match('/^"([^"]*)"(\\s+|$)/', $attributes, $match)) {
          $value = $skip_protocol_filtering ? $match[1] : UrlHelper::filterBadProtocol($match[1]);
          if (!$skip) {
            $attributes_array[] = "{$attribute_name}=\"{$value}\"";
          }
          $attributes = preg_replace('/^"[^"]*"(\\s+|$)/', '', $attributes);
          break;
        }
        if (preg_match("/^'([^']*)'(\\s+|\$)/", $attributes, $match)) {
          $value = $skip_protocol_filtering ? $match[1] : UrlHelper::filterBadProtocol($match[1]);
          if (!$skip) {
            $attributes_array[] = "{$attribute_name}='{$value}'";
          }
          $attributes = preg_replace("/^'[^']*'(\\s+|\$)/", '', $attributes);
          break;
        }
        if (preg_match("%^([^\\s\"']+)(\\s+|\$)%", $attributes, $match)) {
          $value = $skip_protocol_filtering ? $match[1] : UrlHelper::filterBadProtocol($match[1]);
          if (!$skip) {
            $attributes_array[] = "{$attribute_name}=\"{$value}\"";
          }
          $attributes = preg_replace("%^[^\\s\"']+(\\s+|\$)%", '', $attributes);
        }
        break;
    }
    if ($working == 0) {

      // Not well-formed; remove and try again.
      $attributes = preg_replace('/
          ^
          (
          "[^"]*("|$)     # - a string that starts with a double quote, up until the next double quote or the end of the string
          |               # or
          \'[^\']*(\'|$)| # - a string that starts with a quote, up until the next quote or the end of the string
          |               # or
          \\S              # - a non-whitespace character
          )*              # any number of the above three
          \\s*             # any number of whitespaces
          /x', '', $attributes);
      $mode = 0;
    }
  }

  // The attribute list ends with a valueless attribute like "selected".
  if ($mode == 1 && !$skip) {
    $attributes_array[] = $attribute_name;
  }
  return $attributes_array;
}