summaryrefslogtreecommitdiff
path: root/lib/htmlpurifier/library/HTMLPurifier/ChildDef
diff options
context:
space:
mode:
authorAndrew Dolgov <[email protected]>2012-06-05 21:52:21 +0400
committerAndrew Dolgov <[email protected]>2012-06-05 21:52:21 +0400
commit010efc9b814b433bc60353caec185d905688a32b (patch)
treeb2b4f62cbc2d10cf75386e992434be1f4013dc13 /lib/htmlpurifier/library/HTMLPurifier/ChildDef
parent705b97b7fca9ea70820af5fcd926f88903eaa430 (diff)
Revert "remove htmlpurifier"
This reverts commit c21a462d52bd32737c32c29b060da03b38f1c2e6.
Diffstat (limited to 'lib/htmlpurifier/library/HTMLPurifier/ChildDef')
-rw-r--r--lib/htmlpurifier/library/HTMLPurifier/ChildDef/Chameleon.php48
-rw-r--r--lib/htmlpurifier/library/HTMLPurifier/ChildDef/Custom.php90
-rw-r--r--lib/htmlpurifier/library/HTMLPurifier/ChildDef/Empty.php20
-rw-r--r--lib/htmlpurifier/library/HTMLPurifier/ChildDef/List.php120
-rw-r--r--lib/htmlpurifier/library/HTMLPurifier/ChildDef/Optional.php26
-rw-r--r--lib/htmlpurifier/library/HTMLPurifier/ChildDef/Required.php117
-rw-r--r--lib/htmlpurifier/library/HTMLPurifier/ChildDef/StrictBlockquote.php88
-rw-r--r--lib/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php227
8 files changed, 736 insertions, 0 deletions
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Chameleon.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Chameleon.php
new file mode 100644
index 000000000..15c364ee3
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Chameleon.php
@@ -0,0 +1,48 @@
+<?php
+
+/**
+ * Definition that uses different definitions depending on context.
+ *
+ * The del and ins tags are notable because they allow different types of
+ * elements depending on whether or not they're in a block or inline context.
+ * Chameleon allows this behavior to happen by using two different
+ * definitions depending on context. While this somewhat generalized,
+ * it is specifically intended for those two tags.
+ */
+class HTMLPurifier_ChildDef_Chameleon extends HTMLPurifier_ChildDef
+{
+
+ /**
+ * Instance of the definition object to use when inline. Usually stricter.
+ */
+ public $inline;
+
+ /**
+ * Instance of the definition object to use when block.
+ */
+ public $block;
+
+ public $type = 'chameleon';
+
+ /**
+ * @param $inline List of elements to allow when inline.
+ * @param $block List of elements to allow when block.
+ */
+ public function __construct($inline, $block) {
+ $this->inline = new HTMLPurifier_ChildDef_Optional($inline);
+ $this->block = new HTMLPurifier_ChildDef_Optional($block);
+ $this->elements = $this->block->elements;
+ }
+
+ public function validateChildren($tokens_of_children, $config, $context) {
+ if ($context->get('IsInline') === false) {
+ return $this->block->validateChildren(
+ $tokens_of_children, $config, $context);
+ } else {
+ return $this->inline->validateChildren(
+ $tokens_of_children, $config, $context);
+ }
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Custom.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Custom.php
new file mode 100644
index 000000000..b68047b4b
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Custom.php
@@ -0,0 +1,90 @@
+<?php
+
+/**
+ * Custom validation class, accepts DTD child definitions
+ *
+ * @warning Currently this class is an all or nothing proposition, that is,
+ * it will only give a bool return value.
+ */
+class HTMLPurifier_ChildDef_Custom extends HTMLPurifier_ChildDef
+{
+ public $type = 'custom';
+ public $allow_empty = false;
+ /**
+ * Allowed child pattern as defined by the DTD
+ */
+ public $dtd_regex;
+ /**
+ * PCRE regex derived from $dtd_regex
+ * @private
+ */
+ private $_pcre_regex;
+ /**
+ * @param $dtd_regex Allowed child pattern from the DTD
+ */
+ public function __construct($dtd_regex) {
+ $this->dtd_regex = $dtd_regex;
+ $this->_compileRegex();
+ }
+ /**
+ * Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex)
+ */
+ protected function _compileRegex() {
+ $raw = str_replace(' ', '', $this->dtd_regex);
+ if ($raw{0} != '(') {
+ $raw = "($raw)";
+ }
+ $el = '[#a-zA-Z0-9_.-]+';
+ $reg = $raw;
+
+ // COMPLICATED! AND MIGHT BE BUGGY! I HAVE NO CLUE WHAT I'M
+ // DOING! Seriously: if there's problems, please report them.
+
+ // collect all elements into the $elements array
+ preg_match_all("/$el/", $reg, $matches);
+ foreach ($matches[0] as $match) {
+ $this->elements[$match] = true;
+ }
+
+ // setup all elements as parentheticals with leading commas
+ $reg = preg_replace("/$el/", '(,\\0)', $reg);
+
+ // remove commas when they were not solicited
+ $reg = preg_replace("/([^,(|]\(+),/", '\\1', $reg);
+
+ // remove all non-paranthetical commas: they are handled by first regex
+ $reg = preg_replace("/,\(/", '(', $reg);
+
+ $this->_pcre_regex = $reg;
+ }
+ public function validateChildren($tokens_of_children, $config, $context) {
+ $list_of_children = '';
+ $nesting = 0; // depth into the nest
+ foreach ($tokens_of_children as $token) {
+ if (!empty($token->is_whitespace)) continue;
+
+ $is_child = ($nesting == 0); // direct
+
+ if ($token instanceof HTMLPurifier_Token_Start) {
+ $nesting++;
+ } elseif ($token instanceof HTMLPurifier_Token_End) {
+ $nesting--;
+ }
+
+ if ($is_child) {
+ $list_of_children .= $token->name . ',';
+ }
+ }
+ // add leading comma to deal with stray comma declarations
+ $list_of_children = ',' . rtrim($list_of_children, ',');
+ $okay =
+ preg_match(
+ '/^,?'.$this->_pcre_regex.'$/',
+ $list_of_children
+ );
+
+ return (bool) $okay;
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Empty.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Empty.php
new file mode 100644
index 000000000..13171f665
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Empty.php
@@ -0,0 +1,20 @@
+<?php
+
+/**
+ * Definition that disallows all elements.
+ * @warning validateChildren() in this class is actually never called, because
+ * empty elements are corrected in HTMLPurifier_Strategy_MakeWellFormed
+ * before child definitions are parsed in earnest by
+ * HTMLPurifier_Strategy_FixNesting.
+ */
+class HTMLPurifier_ChildDef_Empty extends HTMLPurifier_ChildDef
+{
+ public $allow_empty = true;
+ public $type = 'empty';
+ public function __construct() {}
+ public function validateChildren($tokens_of_children, $config, $context) {
+ return array();
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/List.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/List.php
new file mode 100644
index 000000000..cdaa2893a
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/List.php
@@ -0,0 +1,120 @@
+<?php
+
+/**
+ * Definition for list containers ul and ol.
+ */
+class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef
+{
+ public $type = 'list';
+ // lying a little bit, so that we can handle ul and ol ourselves
+ // XXX: This whole business with 'wrap' is all a bit unsatisfactory
+ public $elements = array('li' => true, 'ul' => true, 'ol' => true);
+ public function validateChildren($tokens_of_children, $config, $context) {
+ // Flag for subclasses
+ $this->whitespace = false;
+
+ // if there are no tokens, delete parent node
+ if (empty($tokens_of_children)) return false;
+
+ // the new set of children
+ $result = array();
+
+ // current depth into the nest
+ $nesting = 0;
+
+ // a little sanity check to make sure it's not ALL whitespace
+ $all_whitespace = true;
+
+ $seen_li = false;
+ $need_close_li = false;
+
+ foreach ($tokens_of_children as $token) {
+ if (!empty($token->is_whitespace)) {
+ $result[] = $token;
+ continue;
+ }
+ $all_whitespace = false; // phew, we're not talking about whitespace
+
+ if ($nesting == 1 && $need_close_li) {
+ $result[] = new HTMLPurifier_Token_End('li');
+ $nesting--;
+ $need_close_li = false;
+ }
+
+ $is_child = ($nesting == 0);
+
+ if ($token instanceof HTMLPurifier_Token_Start) {
+ $nesting++;
+ } elseif ($token instanceof HTMLPurifier_Token_End) {
+ $nesting--;
+ }
+
+ if ($is_child) {
+ if ($token->name === 'li') {
+ // good
+ $seen_li = true;
+ } elseif ($token->name === 'ul' || $token->name === 'ol') {
+ // we want to tuck this into the previous li
+ $need_close_li = true;
+ $nesting++;
+ if (!$seen_li) {
+ // create a new li element
+ $result[] = new HTMLPurifier_Token_Start('li');
+ } else {
+ // backtrack until </li> found
+ while(true) {
+ $t = array_pop($result);
+ if ($t instanceof HTMLPurifier_Token_End) {
+ // XXX actually, these invariants could very plausibly be violated
+ // if we are doing silly things with modifying the set of allowed elements.
+ // FORTUNATELY, it doesn't make a difference, since the allowed
+ // elements are hard-coded here!
+ if ($t->name !== 'li') {
+ trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR);
+ return false;
+ }
+ break;
+ } elseif ($t instanceof HTMLPurifier_Token_Empty) { // bleagh
+ if ($t->name !== 'li') {
+ trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR);
+ return false;
+ }
+ // XXX this should have a helper for it...
+ $result[] = new HTMLPurifier_Token_Start('li', $t->attr, $t->line, $t->col, $t->armor);
+ break;
+ } else {
+ if (!$t->is_whitespace) {
+ trigger_error("Only whitespace present invariant violated in List ChildDef", E_USER_ERROR);
+ return false;
+ }
+ }
+ }
+ }
+ } else {
+ // start wrapping (this doesn't precisely mimic
+ // browser behavior, but what browsers do is kind of
+ // hard to mimic in a standards compliant way
+ // XXX Actually, this has no impact in practice,
+ // because this gets handled earlier. Arguably,
+ // we should rip out all of that processing
+ $result[] = new HTMLPurifier_Token_Start('li');
+ $nesting++;
+ $seen_li = true;
+ $need_close_li = true;
+ }
+ }
+ $result[] = $token;
+ }
+ if ($need_close_li) {
+ $result[] = new HTMLPurifier_Token_End('li');
+ }
+ if (empty($result)) return false;
+ if ($all_whitespace) {
+ return false;
+ }
+ if ($tokens_of_children == $result) return true;
+ return $result;
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Optional.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Optional.php
new file mode 100644
index 000000000..32bcb9898
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Optional.php
@@ -0,0 +1,26 @@
+<?php
+
+/**
+ * Definition that allows a set of elements, and allows no children.
+ * @note This is a hack to reuse code from HTMLPurifier_ChildDef_Required,
+ * really, one shouldn't inherit from the other. Only altered behavior
+ * is to overload a returned false with an array. Thus, it will never
+ * return false.
+ */
+class HTMLPurifier_ChildDef_Optional extends HTMLPurifier_ChildDef_Required
+{
+ public $allow_empty = true;
+ public $type = 'optional';
+ public function validateChildren($tokens_of_children, $config, $context) {
+ $result = parent::validateChildren($tokens_of_children, $config, $context);
+ // we assume that $tokens_of_children is not modified
+ if ($result === false) {
+ if (empty($tokens_of_children)) return true;
+ elseif ($this->whitespace) return $tokens_of_children;
+ else return array();
+ }
+ return $result;
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Required.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Required.php
new file mode 100644
index 000000000..4889f249b
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Required.php
@@ -0,0 +1,117 @@
+<?php
+
+/**
+ * Definition that allows a set of elements, but disallows empty children.
+ */
+class HTMLPurifier_ChildDef_Required extends HTMLPurifier_ChildDef
+{
+ /**
+ * Lookup table of allowed elements.
+ * @public
+ */
+ public $elements = array();
+ /**
+ * Whether or not the last passed node was all whitespace.
+ */
+ protected $whitespace = false;
+ /**
+ * @param $elements List of allowed element names (lowercase).
+ */
+ public function __construct($elements) {
+ if (is_string($elements)) {
+ $elements = str_replace(' ', '', $elements);
+ $elements = explode('|', $elements);
+ }
+ $keys = array_keys($elements);
+ if ($keys == array_keys($keys)) {
+ $elements = array_flip($elements);
+ foreach ($elements as $i => $x) {
+ $elements[$i] = true;
+ if (empty($i)) unset($elements[$i]); // remove blank
+ }
+ }
+ $this->elements = $elements;
+ }
+ public $allow_empty = false;
+ public $type = 'required';
+ public function validateChildren($tokens_of_children, $config, $context) {
+ // Flag for subclasses
+ $this->whitespace = false;
+
+ // if there are no tokens, delete parent node
+ if (empty($tokens_of_children)) return false;
+
+ // the new set of children
+ $result = array();
+
+ // current depth into the nest
+ $nesting = 0;
+
+ // whether or not we're deleting a node
+ $is_deleting = false;
+
+ // whether or not parsed character data is allowed
+ // this controls whether or not we silently drop a tag
+ // or generate escaped HTML from it
+ $pcdata_allowed = isset($this->elements['#PCDATA']);
+
+ // a little sanity check to make sure it's not ALL whitespace
+ $all_whitespace = true;
+
+ // some configuration
+ $escape_invalid_children = $config->get('Core.EscapeInvalidChildren');
+
+ // generator
+ $gen = new HTMLPurifier_Generator($config, $context);
+
+ foreach ($tokens_of_children as $token) {
+ if (!empty($token->is_whitespace)) {
+ $result[] = $token;
+ continue;
+ }
+ $all_whitespace = false; // phew, we're not talking about whitespace
+
+ $is_child = ($nesting == 0);
+
+ if ($token instanceof HTMLPurifier_Token_Start) {
+ $nesting++;
+ } elseif ($token instanceof HTMLPurifier_Token_End) {
+ $nesting--;
+ }
+
+ if ($is_child) {
+ $is_deleting = false;
+ if (!isset($this->elements[$token->name])) {
+ $is_deleting = true;
+ if ($pcdata_allowed && $token instanceof HTMLPurifier_Token_Text) {
+ $result[] = $token;
+ } elseif ($pcdata_allowed && $escape_invalid_children) {
+ $result[] = new HTMLPurifier_Token_Text(
+ $gen->generateFromToken($token)
+ );
+ }
+ continue;
+ }
+ }
+ if (!$is_deleting || ($pcdata_allowed && $token instanceof HTMLPurifier_Token_Text)) {
+ $result[] = $token;
+ } elseif ($pcdata_allowed && $escape_invalid_children) {
+ $result[] =
+ new HTMLPurifier_Token_Text(
+ $gen->generateFromToken($token)
+ );
+ } else {
+ // drop silently
+ }
+ }
+ if (empty($result)) return false;
+ if ($all_whitespace) {
+ $this->whitespace = true;
+ return false;
+ }
+ if ($tokens_of_children == $result) return true;
+ return $result;
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/StrictBlockquote.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/StrictBlockquote.php
new file mode 100644
index 000000000..dfae8a6e5
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/StrictBlockquote.php
@@ -0,0 +1,88 @@
+<?php
+
+/**
+ * Takes the contents of blockquote when in strict and reformats for validation.
+ */
+class HTMLPurifier_ChildDef_StrictBlockquote extends HTMLPurifier_ChildDef_Required
+{
+ protected $real_elements;
+ protected $fake_elements;
+ public $allow_empty = true;
+ public $type = 'strictblockquote';
+ protected $init = false;
+
+ /**
+ * @note We don't want MakeWellFormed to auto-close inline elements since
+ * they might be allowed.
+ */
+ public function getAllowedElements($config) {
+ $this->init($config);
+ return $this->fake_elements;
+ }
+
+ public function validateChildren($tokens_of_children, $config, $context) {
+
+ $this->init($config);
+
+ // trick the parent class into thinking it allows more
+ $this->elements = $this->fake_elements;
+ $result = parent::validateChildren($tokens_of_children, $config, $context);
+ $this->elements = $this->real_elements;
+
+ if ($result === false) return array();
+ if ($result === true) $result = $tokens_of_children;
+
+ $def = $config->getHTMLDefinition();
+ $block_wrap_start = new HTMLPurifier_Token_Start($def->info_block_wrapper);
+ $block_wrap_end = new HTMLPurifier_Token_End( $def->info_block_wrapper);
+ $is_inline = false;
+ $depth = 0;
+ $ret = array();
+
+ // assuming that there are no comment tokens
+ foreach ($result as $i => $token) {
+ $token = $result[$i];
+ // ifs are nested for readability
+ if (!$is_inline) {
+ if (!$depth) {
+ if (
+ ($token instanceof HTMLPurifier_Token_Text && !$token->is_whitespace) ||
+ (!$token instanceof HTMLPurifier_Token_Text && !isset($this->elements[$token->name]))
+ ) {
+ $is_inline = true;
+ $ret[] = $block_wrap_start;
+ }
+ }
+ } else {
+ if (!$depth) {
+ // starting tokens have been inline text / empty
+ if ($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) {
+ if (isset($this->elements[$token->name])) {
+ // ended
+ $ret[] = $block_wrap_end;
+ $is_inline = false;
+ }
+ }
+ }
+ }
+ $ret[] = $token;
+ if ($token instanceof HTMLPurifier_Token_Start) $depth++;
+ if ($token instanceof HTMLPurifier_Token_End) $depth--;
+ }
+ if ($is_inline) $ret[] = $block_wrap_end;
+ return $ret;
+ }
+
+ private function init($config) {
+ if (!$this->init) {
+ $def = $config->getHTMLDefinition();
+ // allow all inline elements
+ $this->real_elements = $this->elements;
+ $this->fake_elements = $def->info_content_sets['Flow'];
+ $this->fake_elements['#PCDATA'] = true;
+ $this->init = true;
+ }
+ }
+}
+
+// vim: et sw=4 sts=4
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php
new file mode 100644
index 000000000..9a93421a1
--- /dev/null
+++ b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php
@@ -0,0 +1,227 @@
+<?php
+
+/**
+ * Definition for tables. The general idea is to extract out all of the
+ * essential bits, and then reconstruct it later.
+ *
+ * This is a bit confusing, because the DTDs and the W3C
+ * validators seem to disagree on the appropriate definition. The
+ * DTD claims:
+ *
+ * (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+)
+ *
+ * But actually, the HTML4 spec then has this to say:
+ *
+ * The TBODY start tag is always required except when the table
+ * contains only one table body and no table head or foot sections.
+ * The TBODY end tag may always be safely omitted.
+ *
+ * So the DTD is kind of wrong. The validator is, unfortunately, kind
+ * of on crack.
+ *
+ * The definition changed again in XHTML1.1; and in my opinion, this
+ * formulation makes the most sense.
+ *
+ * caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ ))
+ *
+ * Essentially, we have two modes: thead/tfoot/tbody mode, and tr mode.
+ * If we encounter a thead, tfoot or tbody, we are placed in the former
+ * mode, and we *must* wrap any stray tr segments with a tbody. But if
+ * we don't run into any of them, just have tr tags is OK.
+ */
+class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
+{
+ public $allow_empty = false;
+ public $type = 'table';
+ public $elements = array('tr' => true, 'tbody' => true, 'thead' => true,
+ 'tfoot' => true, 'caption' => true, 'colgroup' => true, 'col' => true);
+ public function __construct() {}
+ public function validateChildren($tokens_of_children, $config, $context) {
+ if (empty($tokens_of_children)) return false;
+
+ // this ensures that the loop gets run one last time before closing
+ // up. It's a little bit of a hack, but it works! Just make sure you
+ // get rid of the token later.
+ $tokens_of_children[] = false;
+
+ // only one of these elements is allowed in a table
+ $caption = false;
+ $thead = false;
+ $tfoot = false;
+
+ // as many of these as you want
+ $cols = array();
+ $content = array();
+
+ $nesting = 0; // current depth so we can determine nodes
+ $is_collecting = false; // are we globbing together tokens to package
+ // into one of the collectors?
+ $collection = array(); // collected nodes
+ $tag_index = 0; // the first node might be whitespace,
+ // so this tells us where the start tag is
+ $tbody_mode = false; // if true, then we need to wrap any stray
+ // <tr>s with a <tbody>.
+
+ foreach ($tokens_of_children as $token) {
+ $is_child = ($nesting == 0);
+
+ if ($token === false) {
+ // terminating sequence started
+ } elseif ($token instanceof HTMLPurifier_Token_Start) {
+ $nesting++;
+ } elseif ($token instanceof HTMLPurifier_Token_End) {
+ $nesting--;
+ }
+
+ // handle node collection
+ if ($is_collecting) {
+ if ($is_child) {
+ // okay, let's stash the tokens away
+ // first token tells us the type of the collection
+ switch ($collection[$tag_index]->name) {
+ case 'tbody':
+ $tbody_mode = true;
+ case 'tr':
+ $content[] = $collection;
+ break;
+ case 'caption':
+ if ($caption !== false) break;
+ $caption = $collection;
+ break;
+ case 'thead':
+ case 'tfoot':
+ $tbody_mode = true;
+ // XXX This breaks rendering properties with
+ // Firefox, which never floats a <thead> to
+ // the top. Ever. (Our scheme will float the
+ // first <thead> to the top.) So maybe
+ // <thead>s that are not first should be
+ // turned into <tbody>? Very tricky, indeed.
+
+ // access the appropriate variable, $thead or $tfoot
+ $var = $collection[$tag_index]->name;
+ if ($$var === false) {
+ $$var = $collection;
+ } else {
+ // Oops, there's a second one! What
+ // should we do? Current behavior is to
+ // transmutate the first and last entries into
+ // tbody tags, and then put into content.
+ // Maybe a better idea is to *attach
+ // it* to the existing thead or tfoot?
+ // We don't do this, because Firefox
+ // doesn't float an extra tfoot to the
+ // bottom like it does for the first one.
+ $collection[$tag_index]->name = 'tbody';
+ $collection[count($collection)-1]->name = 'tbody';
+ $content[] = $collection;
+ }
+ break;
+ case 'colgroup':
+ $cols[] = $collection;
+ break;
+ }
+ $collection = array();
+ $is_collecting = false;
+ $tag_index = 0;
+ } else {
+ // add the node to the collection
+ $collection[] = $token;
+ }
+ }
+
+ // terminate
+ if ($token === false) break;
+
+ if ($is_child) {
+ // determine what we're dealing with
+ if ($token->name == 'col') {
+ // the only empty tag in the possie, we can handle it
+ // immediately
+ $cols[] = array_merge($collection, array($token));
+ $collection = array();
+ $tag_index = 0;
+ continue;
+ }
+ switch($token->name) {
+ case 'caption':
+ case 'colgroup':
+ case 'thead':
+ case 'tfoot':
+ case 'tbody':
+ case 'tr':
+ $is_collecting = true;
+ $collection[] = $token;
+ continue;
+ default:
+ if (!empty($token->is_whitespace)) {
+ $collection[] = $token;
+ $tag_index++;
+ }
+ continue;
+ }
+ }
+ }
+
+ if (empty($content)) return false;
+
+ $ret = array();
+ if ($caption !== false) $ret = array_merge($ret, $caption);
+ if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array);
+ if ($thead !== false) $ret = array_merge($ret, $thead);
+ if ($tfoot !== false) $ret = array_merge($ret, $tfoot);
+
+ if ($tbody_mode) {
+ // a little tricky, since the start of the collection may be
+ // whitespace
+ $inside_tbody = false;
+ foreach ($content as $token_array) {
+ // find the starting token
+ foreach ($token_array as $t) {
+ if ($t->name === 'tr' || $t->name === 'tbody') {
+ break;
+ }
+ } // iterator variable carries over
+ if ($t->name === 'tr') {
+ if ($inside_tbody) {
+ $ret = array_merge($ret, $token_array);
+ } else {
+ $ret[] = new HTMLPurifier_Token_Start('tbody');
+ $ret = array_merge($ret, $token_array);
+ $inside_tbody = true;
+ }
+ } elseif ($t->name === 'tbody') {
+ if ($inside_tbody) {
+ $ret[] = new HTMLPurifier_Token_End('tbody');
+ $inside_tbody = false;
+ $ret = array_merge($ret, $token_array);
+ } else {
+ $ret = array_merge($ret, $token_array);
+ }
+ } else {
+ trigger_error("tr/tbody in content invariant failed in Table ChildDef", E_USER_ERROR);
+ }
+ }
+ if ($inside_tbody) {
+ $ret[] = new HTMLPurifier_Token_End('tbody');
+ }
+ } else {
+ foreach ($content as $token_array) {
+ // invariant: everything in here is <tr>s
+ $ret = array_merge($ret, $token_array);
+ }
+ }
+
+ if (!empty($collection) && $is_collecting == false){
+ // grab the trailing space
+ $ret = array_merge($ret, $collection);
+ }
+
+ array_pop($tokens_of_children); // remove phantom token
+
+ return ($ret === $tokens_of_children) ? true : $ret;
+
+ }
+}
+
+// vim: et sw=4 sts=4