decode_entities
Definition
decode_entities($text, $exclude = array())
includes/unicode.inc, line 330
Description
Decode all HTML entities (including numerical ones) to regular UTF-8 bytes. Double-escaped entities will only be decoded once ("&lt;" becomes "<", not "<").
Parameters
$text The text to decode entities in.
$exclude An array of characters which should not be decoded. For example, array('<', '&', '"'). This affects both named and numerical entities.
Code
<?php
function decode_entities($text, $exclude = array()) {
static $table;
// We store named entities in a table for quick processing.
if (!isset($table)) {
// Get all named HTML entities.
$table = array_flip(get_html_translation_table(HTML_ENTITIES));
// PHP gives us ISO-8859-1 data, we need UTF-8.
$table = array_map('utf8_encode', $table);
// Add apostrophe (XML)
$table['''] = "'";
}
$newtable = array_diff($table, $exclude);
// Use a regexp to select all entities in one pass, to avoid decoding double-escaped entities twice.
return preg_replace('/&(#x?)?([A-Za-z0-9]+);/e', '_decode_entities("$1", "$2", "$0", $newtable, $exclude)', $text);
}
?> 