From cb73535c8eae02092df984bafbecabbce8049cd0 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Tue, 5 Jun 2012 21:52:37 +0400 Subject: Revert "Update HTML Purifier to version 4.4.0." This reverts commit dd205fbad642ace6d0e33c8553f7d73404f140b4. --- .../HTMLPurifier/Filter/ExtractStyleBlocks.php | 175 ++------------------- 1 file changed, 11 insertions(+), 164 deletions(-) (limited to 'lib/htmlpurifier/library/HTMLPurifier/Filter') diff --git a/lib/htmlpurifier/library/HTMLPurifier/Filter/ExtractStyleBlocks.php b/lib/htmlpurifier/library/HTMLPurifier/Filter/ExtractStyleBlocks.php index 320aa4f16..bbf78a663 100644 --- a/lib/htmlpurifier/library/HTMLPurifier/Filter/ExtractStyleBlocks.php +++ b/lib/htmlpurifier/library/HTMLPurifier/Filter/ExtractStyleBlocks.php @@ -1,11 +1,5 @@ blocks from input HTML, cleans them up * using CSSTidy, and then places them in $purifier->context->get('StyleBlocks') @@ -27,15 +21,8 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter private $_styleMatches = array(); private $_tidy; - private $_id_attrdef; - private $_class_attrdef; - private $_enum_attrdef; - public function __construct() { $this->_tidy = new csstidy(); - $this->_id_attrdef = new HTMLPurifier_AttrDef_HTML_ID(true); - $this->_class_attrdef = new HTMLPurifier_AttrDef_CSS_Ident(); - $this->_enum_attrdef = new HTMLPurifier_AttrDef_Enum(array('first-child', 'link', 'visited', 'active', 'hover', 'focus')); } /** @@ -90,166 +77,27 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter $css = substr($css, 0, -3); } $css = trim($css); - set_error_handler('htmlpurifier_filter_extractstyleblocks_muteerrorhandler'); $this->_tidy->parse($css); - restore_error_handler(); $css_definition = $config->getDefinition('CSS'); - $html_definition = $config->getDefinition('HTML'); - $new_css = array(); foreach ($this->_tidy->css as $k => $decls) { // $decls are all CSS declarations inside an @ selector $new_decls = array(); foreach ($decls as $selector => $style) { $selector = trim($selector); if ($selector === '') continue; // should not happen - // Parse the selector - // Here is the relevant part of the CSS grammar: - // - // ruleset - // : selector [ ',' S* selector ]* '{' ... - // selector - // : simple_selector [ combinator selector | S+ [ combinator? selector ]? ]? - // combinator - // : '+' S* - // : '>' S* - // simple_selector - // : element_name [ HASH | class | attrib | pseudo ]* - // | [ HASH | class | attrib | pseudo ]+ - // element_name - // : IDENT | '*' - // ; - // class - // : '.' IDENT - // ; - // attrib - // : '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S* - // [ IDENT | STRING ] S* ]? ']' - // ; - // pseudo - // : ':' [ IDENT | FUNCTION S* [IDENT S*]? ')' ] - // ; - // - // For reference, here are the relevant tokens: - // - // HASH #{name} - // IDENT {ident} - // INCLUDES == - // DASHMATCH |= - // STRING {string} - // FUNCTION {ident}\( - // - // And the lexical scanner tokens - // - // name {nmchar}+ - // nmchar [_a-z0-9-]|{nonascii}|{escape} - // nonascii [\240-\377] - // escape {unicode}|\\[^\r\n\f0-9a-f] - // unicode \\{h}}{1,6}(\r\n|[ \t\r\n\f])? - // ident -?{nmstart}{nmchar*} - // nmstart [_a-z]|{nonascii}|{escape} - // string {string1}|{string2} - // string1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\" - // string2 \'([^\n\r\f\\"]|\\{nl}|{escape})*\' - // - // We'll implement a subset (in order to reduce attack - // surface); in particular: - // - // - No Unicode support - // - No escapes support - // - No string support (by proxy no attrib support) - // - element_name is matched against allowed - // elements (some people might find this - // annoying...) - // - Pseudo-elements one of :first-child, :link, - // :visited, :active, :hover, :focus - - // handle ruleset - $selectors = array_map('trim', explode(',', $selector)); - $new_selectors = array(); - foreach ($selectors as $sel) { - // split on +, > and spaces - $basic_selectors = preg_split('/\s*([+> ])\s*/', $sel, -1, PREG_SPLIT_DELIM_CAPTURE); - // even indices are chunks, odd indices are - // delimiters - $nsel = null; - $delim = null; // guaranteed to be non-null after - // two loop iterations - for ($i = 0, $c = count($basic_selectors); $i < $c; $i++) { - $x = $basic_selectors[$i]; - if ($i % 2) { - // delimiter - if ($x === ' ') { - $delim = ' '; - } else { - $delim = ' ' . $x . ' '; - } - } else { - // simple selector - $components = preg_split('/([#.:])/', $x, -1, PREG_SPLIT_DELIM_CAPTURE); - $sdelim = null; - $nx = null; - for ($j = 0, $cc = count($components); $j < $cc; $j ++) { - $y = $components[$j]; - if ($j === 0) { - if ($y === '*' || isset($html_definition->info[$y = strtolower($y)])) { - $nx = $y; - } else { - // $nx stays null; this matters - // if we don't manage to find - // any valid selector content, - // in which case we ignore the - // outer $delim - } - } elseif ($j % 2) { - // set delimiter - $sdelim = $y; - } else { - $attrdef = null; - if ($sdelim === '#') { - $attrdef = $this->_id_attrdef; - } elseif ($sdelim === '.') { - $attrdef = $this->_class_attrdef; - } elseif ($sdelim === ':') { - $attrdef = $this->_enum_attrdef; - } else { - throw new HTMLPurifier_Exception('broken invariant sdelim and preg_split'); - } - $r = $attrdef->validate($y, $config, $context); - if ($r !== false) { - if ($r !== true) { - $y = $r; - } - if ($nx === null) { - $nx = ''; - } - $nx .= $sdelim . $y; - } - } - } - if ($nx !== null) { - if ($nsel === null) { - $nsel = $nx; - } else { - $nsel .= $delim . $nx; - } - } else { - // delimiters to the left of invalid - // basic selector ignored - } - } - } - if ($nsel !== null) { - if (!empty($scopes)) { - foreach ($scopes as $s) { - $new_selectors[] = "$s $nsel"; - } - } else { - $new_selectors[] = $nsel; + if ($selector[0] === '+') { + if ($selector !== '' && $selector[0] === '+') continue; + } + if (!empty($scopes)) { + $new_selector = array(); // because multiple ones are possible + $selectors = array_map('trim', explode(',', $selector)); + foreach ($scopes as $s1) { + foreach ($selectors as $s2) { + $new_selector[] = "$s1 $s2"; } } + $selector = implode(', ', $new_selector); // now it's a string } - if (empty($new_selectors)) continue; - $selector = implode(', ', $new_selectors); foreach ($style as $name => $value) { if (!isset($css_definition->info[$name])) { unset($style[$name]); @@ -262,11 +110,10 @@ class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter } $new_decls[$selector] = $style; } - $new_css[$k] = $new_decls; + $this->_tidy->css[$k] = $new_decls; } // remove stuff that shouldn't be used, could be reenabled // after security risks are analyzed - $this->_tidy->css = $new_css; $this->_tidy->import = array(); $this->_tidy->charset = null; $this->_tidy->namespace = null; -- cgit v1.2.3