From dd205fbad642ace6d0e33c8553f7d73404f140b4 Mon Sep 17 00:00:00 2001 From: Michael Kuhn Date: Sat, 28 Apr 2012 14:37:51 +0200 Subject: Update HTML Purifier to version 4.4.0. --- .../library/HTMLPurifier/AttrDef/CSS/Ident.php | 24 ++++++++++++ .../library/HTMLPurifier/AttrDef/Clone.php | 28 ++++++++++++++ .../library/HTMLPurifier/AttrDef/HTML/Color.php | 2 +- .../library/HTMLPurifier/AttrDef/HTML/ID.php | 22 ++++++++--- .../library/HTMLPurifier/AttrDef/URI.php | 2 +- .../library/HTMLPurifier/AttrDef/URI/Host.php | 45 +++++++++++++++++++--- 6 files changed, 109 insertions(+), 14 deletions(-) create mode 100644 lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Ident.php create mode 100644 lib/htmlpurifier/library/HTMLPurifier/AttrDef/Clone.php (limited to 'lib/htmlpurifier/library/HTMLPurifier/AttrDef') diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Ident.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Ident.php new file mode 100644 index 000000000..779794a0b --- /dev/null +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/CSS/Ident.php @@ -0,0 +1,24 @@ +clone = $clone; + } + + public function validate($v, $config, $context) { + return $this->clone->validate($v, $config, $context); + } + + public function make($string) { + return clone $this->clone; + } + +} + +// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Color.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Color.php index d01e20454..00d865723 100644 --- a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Color.php +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/Color.php @@ -14,7 +14,7 @@ class HTMLPurifier_AttrDef_HTML_Color extends HTMLPurifier_AttrDef $string = trim($string); if (empty($string)) return false; - if (isset($colors[$string])) return $colors[$string]; + if (isset($colors[strtolower($string)])) return $colors[$string]; if ($string[0] === '#') $hex = substr($string, 1); else $hex = $string; diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/ID.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/ID.php index 81d03762d..0015fa1eb 100644 --- a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/ID.php +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/HTML/ID.php @@ -12,12 +12,22 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef { - // ref functionality disabled, since we also have to verify - // whether or not the ID it refers to exists + // selector is NOT a valid thing to use for IDREFs, because IDREFs + // *must* target IDs that exist, whereas selector #ids do not. + + /** + * Determines whether or not we're validating an ID in a CSS + * selector context. + */ + protected $selector; + + public function __construct($selector = false) { + $this->selector = $selector; + } public function validate($id, $config, $context) { - if (!$config->get('Attr.EnableID')) return false; + if (!$this->selector && !$config->get('Attr.EnableID')) return false; $id = trim($id); // trim it first @@ -33,10 +43,10 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef '%Attr.IDPrefix is set', E_USER_WARNING); } - //if (!$this->ref) { + if (!$this->selector) { $id_accumulator =& $context->get('IDAccumulator'); if (isset($id_accumulator->ids[$id])) return false; - //} + } // we purposely avoid using regex, hopefully this is faster @@ -56,7 +66,7 @@ class HTMLPurifier_AttrDef_HTML_ID extends HTMLPurifier_AttrDef return false; } - if (/*!$this->ref && */$result) $id_accumulator->add($id); + if (!$this->selector && $result) $id_accumulator->add($id); // if no change was made to the ID, return the result // else, return the new id if stripping whitespace made it diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI.php index 01a6d83e9..c2b684671 100644 --- a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI.php +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI.php @@ -19,7 +19,7 @@ class HTMLPurifier_AttrDef_URI extends HTMLPurifier_AttrDef } public function make($string) { - $embeds = (bool) $string; + $embeds = ($string === 'embedded'); return new HTMLPurifier_AttrDef_URI($embeds); } diff --git a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Host.php b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Host.php index feca469d7..125decb2d 100644 --- a/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Host.php +++ b/lib/htmlpurifier/library/HTMLPurifier/AttrDef/URI/Host.php @@ -44,9 +44,8 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef // A regular domain name. - // This breaks I18N domain names, but we don't have proper IRI support, - // so force users to insert Punycode. If there's complaining we'll - // try to fix things into an international friendly form. + // This doesn't match I18N domain names, but we don't have proper IRI support, + // so force users to insert Punycode. // The productions describing this are: $a = '[a-z]'; // alpha @@ -57,10 +56,44 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef // toplabel = alpha | alpha *( alphanum | "-" ) alphanum $toplabel = "$a($and*$an)?"; // hostname = *( domainlabel "." ) toplabel [ "." ] - $match = preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string); - if (!$match) return false; + if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) { + return $string; + } + + // If we have Net_IDNA2 support, we can support IRIs by + // punycoding them. (This is the most portable thing to do, + // since otherwise we have to assume browsers support + + if ($config->get('Core.EnableIDNA')) { + $idna = new Net_IDNA2(array('encoding' => 'utf8', 'overlong' => false, 'strict' => true)); + // we need to encode each period separately + $parts = explode('.', $string); + try { + $new_parts = array(); + foreach ($parts as $part) { + $encodable = false; + for ($i = 0, $c = strlen($part); $i < $c; $i++) { + if (ord($part[$i]) > 0x7a) { + $encodable = true; + break; + } + } + if (!$encodable) { + $new_parts[] = $part; + } else { + $new_parts[] = $idna->encode($part); + } + } + $string = implode('.', $new_parts); + if (preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string)) { + return $string; + } + } catch (Exception $e) { + // XXX error reporting + } + } - return $string; + return false; } } -- cgit v1.2.3