From acccafe3daee1c94064202d38fa244bd5a15c2e7 Mon Sep 17 00:00:00 2001 From: Andrew Dolgov Date: Sun, 28 Oct 2012 12:21:21 +0400 Subject: replace htmlpurifier with htmlawed --- .../library/HTMLPurifier/ChildDef/Chameleon.php | 48 ------- .../library/HTMLPurifier/ChildDef/Custom.php | 90 ------------- .../library/HTMLPurifier/ChildDef/Empty.php | 20 --- .../library/HTMLPurifier/ChildDef/Optional.php | 26 ---- .../library/HTMLPurifier/ChildDef/Required.php | 117 ----------------- .../HTMLPurifier/ChildDef/StrictBlockquote.php | 88 ------------- .../library/HTMLPurifier/ChildDef/Table.php | 142 --------------------- 7 files changed, 531 deletions(-) delete mode 100644 lib/htmlpurifier/library/HTMLPurifier/ChildDef/Chameleon.php delete mode 100644 lib/htmlpurifier/library/HTMLPurifier/ChildDef/Custom.php delete mode 100644 lib/htmlpurifier/library/HTMLPurifier/ChildDef/Empty.php delete mode 100644 lib/htmlpurifier/library/HTMLPurifier/ChildDef/Optional.php delete mode 100644 lib/htmlpurifier/library/HTMLPurifier/ChildDef/Required.php delete mode 100644 lib/htmlpurifier/library/HTMLPurifier/ChildDef/StrictBlockquote.php delete mode 100644 lib/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php (limited to 'lib/htmlpurifier/library/HTMLPurifier/ChildDef') diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Chameleon.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Chameleon.php deleted file mode 100644 index 15c364ee3..000000000 --- a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Chameleon.php +++ /dev/null @@ -1,48 +0,0 @@ -inline = new HTMLPurifier_ChildDef_Optional($inline); - $this->block = new HTMLPurifier_ChildDef_Optional($block); - $this->elements = $this->block->elements; - } - - public function validateChildren($tokens_of_children, $config, $context) { - if ($context->get('IsInline') === false) { - return $this->block->validateChildren( - $tokens_of_children, $config, $context); - } else { - return $this->inline->validateChildren( - $tokens_of_children, $config, $context); - } - } -} - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Custom.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Custom.php deleted file mode 100644 index b68047b4b..000000000 --- a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Custom.php +++ /dev/null @@ -1,90 +0,0 @@ -dtd_regex = $dtd_regex; - $this->_compileRegex(); - } - /** - * Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex) - */ - protected function _compileRegex() { - $raw = str_replace(' ', '', $this->dtd_regex); - if ($raw{0} != '(') { - $raw = "($raw)"; - } - $el = '[#a-zA-Z0-9_.-]+'; - $reg = $raw; - - // COMPLICATED! AND MIGHT BE BUGGY! I HAVE NO CLUE WHAT I'M - // DOING! Seriously: if there's problems, please report them. - - // collect all elements into the $elements array - preg_match_all("/$el/", $reg, $matches); - foreach ($matches[0] as $match) { - $this->elements[$match] = true; - } - - // setup all elements as parentheticals with leading commas - $reg = preg_replace("/$el/", '(,\\0)', $reg); - - // remove commas when they were not solicited - $reg = preg_replace("/([^,(|]\(+),/", '\\1', $reg); - - // remove all non-paranthetical commas: they are handled by first regex - $reg = preg_replace("/,\(/", '(', $reg); - - $this->_pcre_regex = $reg; - } - public function validateChildren($tokens_of_children, $config, $context) { - $list_of_children = ''; - $nesting = 0; // depth into the nest - foreach ($tokens_of_children as $token) { - if (!empty($token->is_whitespace)) continue; - - $is_child = ($nesting == 0); // direct - - if ($token instanceof HTMLPurifier_Token_Start) { - $nesting++; - } elseif ($token instanceof HTMLPurifier_Token_End) { - $nesting--; - } - - if ($is_child) { - $list_of_children .= $token->name . ','; - } - } - // add leading comma to deal with stray comma declarations - $list_of_children = ',' . rtrim($list_of_children, ','); - $okay = - preg_match( - '/^,?'.$this->_pcre_regex.'$/', - $list_of_children - ); - - return (bool) $okay; - } -} - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Empty.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Empty.php deleted file mode 100644 index 13171f665..000000000 --- a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Empty.php +++ /dev/null @@ -1,20 +0,0 @@ -whitespace) return $tokens_of_children; - else return array(); - } - return $result; - } -} - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Required.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Required.php deleted file mode 100644 index 4889f249b..000000000 --- a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Required.php +++ /dev/null @@ -1,117 +0,0 @@ - $x) { - $elements[$i] = true; - if (empty($i)) unset($elements[$i]); // remove blank - } - } - $this->elements = $elements; - } - public $allow_empty = false; - public $type = 'required'; - public function validateChildren($tokens_of_children, $config, $context) { - // Flag for subclasses - $this->whitespace = false; - - // if there are no tokens, delete parent node - if (empty($tokens_of_children)) return false; - - // the new set of children - $result = array(); - - // current depth into the nest - $nesting = 0; - - // whether or not we're deleting a node - $is_deleting = false; - - // whether or not parsed character data is allowed - // this controls whether or not we silently drop a tag - // or generate escaped HTML from it - $pcdata_allowed = isset($this->elements['#PCDATA']); - - // a little sanity check to make sure it's not ALL whitespace - $all_whitespace = true; - - // some configuration - $escape_invalid_children = $config->get('Core.EscapeInvalidChildren'); - - // generator - $gen = new HTMLPurifier_Generator($config, $context); - - foreach ($tokens_of_children as $token) { - if (!empty($token->is_whitespace)) { - $result[] = $token; - continue; - } - $all_whitespace = false; // phew, we're not talking about whitespace - - $is_child = ($nesting == 0); - - if ($token instanceof HTMLPurifier_Token_Start) { - $nesting++; - } elseif ($token instanceof HTMLPurifier_Token_End) { - $nesting--; - } - - if ($is_child) { - $is_deleting = false; - if (!isset($this->elements[$token->name])) { - $is_deleting = true; - if ($pcdata_allowed && $token instanceof HTMLPurifier_Token_Text) { - $result[] = $token; - } elseif ($pcdata_allowed && $escape_invalid_children) { - $result[] = new HTMLPurifier_Token_Text( - $gen->generateFromToken($token) - ); - } - continue; - } - } - if (!$is_deleting || ($pcdata_allowed && $token instanceof HTMLPurifier_Token_Text)) { - $result[] = $token; - } elseif ($pcdata_allowed && $escape_invalid_children) { - $result[] = - new HTMLPurifier_Token_Text( - $gen->generateFromToken($token) - ); - } else { - // drop silently - } - } - if (empty($result)) return false; - if ($all_whitespace) { - $this->whitespace = true; - return false; - } - if ($tokens_of_children == $result) return true; - return $result; - } -} - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/StrictBlockquote.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/StrictBlockquote.php deleted file mode 100644 index dfae8a6e5..000000000 --- a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/StrictBlockquote.php +++ /dev/null @@ -1,88 +0,0 @@ -init($config); - return $this->fake_elements; - } - - public function validateChildren($tokens_of_children, $config, $context) { - - $this->init($config); - - // trick the parent class into thinking it allows more - $this->elements = $this->fake_elements; - $result = parent::validateChildren($tokens_of_children, $config, $context); - $this->elements = $this->real_elements; - - if ($result === false) return array(); - if ($result === true) $result = $tokens_of_children; - - $def = $config->getHTMLDefinition(); - $block_wrap_start = new HTMLPurifier_Token_Start($def->info_block_wrapper); - $block_wrap_end = new HTMLPurifier_Token_End( $def->info_block_wrapper); - $is_inline = false; - $depth = 0; - $ret = array(); - - // assuming that there are no comment tokens - foreach ($result as $i => $token) { - $token = $result[$i]; - // ifs are nested for readability - if (!$is_inline) { - if (!$depth) { - if ( - ($token instanceof HTMLPurifier_Token_Text && !$token->is_whitespace) || - (!$token instanceof HTMLPurifier_Token_Text && !isset($this->elements[$token->name])) - ) { - $is_inline = true; - $ret[] = $block_wrap_start; - } - } - } else { - if (!$depth) { - // starting tokens have been inline text / empty - if ($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) { - if (isset($this->elements[$token->name])) { - // ended - $ret[] = $block_wrap_end; - $is_inline = false; - } - } - } - } - $ret[] = $token; - if ($token instanceof HTMLPurifier_Token_Start) $depth++; - if ($token instanceof HTMLPurifier_Token_End) $depth--; - } - if ($is_inline) $ret[] = $block_wrap_end; - return $ret; - } - - private function init($config) { - if (!$this->init) { - $def = $config->getHTMLDefinition(); - // allow all inline elements - $this->real_elements = $this->elements; - $this->fake_elements = $def->info_content_sets['Flow']; - $this->fake_elements['#PCDATA'] = true; - $this->init = true; - } - } -} - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php deleted file mode 100644 index 34f0227dd..000000000 --- a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php +++ /dev/null @@ -1,142 +0,0 @@ - true, 'tbody' => true, 'thead' => true, - 'tfoot' => true, 'caption' => true, 'colgroup' => true, 'col' => true); - public function __construct() {} - public function validateChildren($tokens_of_children, $config, $context) { - if (empty($tokens_of_children)) return false; - - // this ensures that the loop gets run one last time before closing - // up. It's a little bit of a hack, but it works! Just make sure you - // get rid of the token later. - $tokens_of_children[] = false; - - // only one of these elements is allowed in a table - $caption = false; - $thead = false; - $tfoot = false; - - // as many of these as you want - $cols = array(); - $content = array(); - - $nesting = 0; // current depth so we can determine nodes - $is_collecting = false; // are we globbing together tokens to package - // into one of the collectors? - $collection = array(); // collected nodes - $tag_index = 0; // the first node might be whitespace, - // so this tells us where the start tag is - - foreach ($tokens_of_children as $token) { - $is_child = ($nesting == 0); - - if ($token === false) { - // terminating sequence started - } elseif ($token instanceof HTMLPurifier_Token_Start) { - $nesting++; - } elseif ($token instanceof HTMLPurifier_Token_End) { - $nesting--; - } - - // handle node collection - if ($is_collecting) { - if ($is_child) { - // okay, let's stash the tokens away - // first token tells us the type of the collection - switch ($collection[$tag_index]->name) { - case 'tr': - case 'tbody': - $content[] = $collection; - break; - case 'caption': - if ($caption !== false) break; - $caption = $collection; - break; - case 'thead': - case 'tfoot': - // access the appropriate variable, $thead or $tfoot - $var = $collection[$tag_index]->name; - if ($$var === false) { - $$var = $collection; - } else { - // transmutate the first and less entries into - // tbody tags, and then put into content - $collection[$tag_index]->name = 'tbody'; - $collection[count($collection)-1]->name = 'tbody'; - $content[] = $collection; - } - break; - case 'colgroup': - $cols[] = $collection; - break; - } - $collection = array(); - $is_collecting = false; - $tag_index = 0; - } else { - // add the node to the collection - $collection[] = $token; - } - } - - // terminate - if ($token === false) break; - - if ($is_child) { - // determine what we're dealing with - if ($token->name == 'col') { - // the only empty tag in the possie, we can handle it - // immediately - $cols[] = array_merge($collection, array($token)); - $collection = array(); - $tag_index = 0; - continue; - } - switch($token->name) { - case 'caption': - case 'colgroup': - case 'thead': - case 'tfoot': - case 'tbody': - case 'tr': - $is_collecting = true; - $collection[] = $token; - continue; - default: - if (!empty($token->is_whitespace)) { - $collection[] = $token; - $tag_index++; - } - continue; - } - } - } - - if (empty($content)) return false; - - $ret = array(); - if ($caption !== false) $ret = array_merge($ret, $caption); - if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array); - if ($thead !== false) $ret = array_merge($ret, $thead); - if ($tfoot !== false) $ret = array_merge($ret, $tfoot); - foreach ($content as $token_array) $ret = array_merge($ret, $token_array); - if (!empty($collection) && $is_collecting == false){ - // grab the trailing space - $ret = array_merge($ret, $collection); - } - - array_pop($tokens_of_children); // remove phantom token - - return ($ret === $tokens_of_children) ? true : $ret; - - } -} - -// vim: et sw=4 sts=4 -- cgit v1.2.3