diff options
Diffstat (limited to 'lib/htmlpurifier/library/HTMLPurifier/ChildDef')
-rw-r--r-- | lib/htmlpurifier/library/HTMLPurifier/ChildDef/List.php | 120 | ||||
-rw-r--r-- | lib/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php | 95 |
2 files changed, 5 insertions, 210 deletions
diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/List.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/List.php deleted file mode 100644 index cdaa2893a..000000000 --- a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/List.php +++ /dev/null @@ -1,120 +0,0 @@ -<?php - -/** - * Definition for list containers ul and ol. - */ -class HTMLPurifier_ChildDef_List extends HTMLPurifier_ChildDef -{ - public $type = 'list'; - // lying a little bit, so that we can handle ul and ol ourselves - // XXX: This whole business with 'wrap' is all a bit unsatisfactory - public $elements = array('li' => true, 'ul' => true, 'ol' => true); - public function validateChildren($tokens_of_children, $config, $context) { - // Flag for subclasses - $this->whitespace = false; - - // if there are no tokens, delete parent node - if (empty($tokens_of_children)) return false; - - // the new set of children - $result = array(); - - // current depth into the nest - $nesting = 0; - - // a little sanity check to make sure it's not ALL whitespace - $all_whitespace = true; - - $seen_li = false; - $need_close_li = false; - - foreach ($tokens_of_children as $token) { - if (!empty($token->is_whitespace)) { - $result[] = $token; - continue; - } - $all_whitespace = false; // phew, we're not talking about whitespace - - if ($nesting == 1 && $need_close_li) { - $result[] = new HTMLPurifier_Token_End('li'); - $nesting--; - $need_close_li = false; - } - - $is_child = ($nesting == 0); - - if ($token instanceof HTMLPurifier_Token_Start) { - $nesting++; - } elseif ($token instanceof HTMLPurifier_Token_End) { - $nesting--; - } - - if ($is_child) { - if ($token->name === 'li') { - // good - $seen_li = true; - } elseif ($token->name === 'ul' || $token->name === 'ol') { - // we want to tuck this into the previous li - $need_close_li = true; - $nesting++; - if (!$seen_li) { - // create a new li element - $result[] = new HTMLPurifier_Token_Start('li'); - } else { - // backtrack until </li> found - while(true) { - $t = array_pop($result); - if ($t instanceof HTMLPurifier_Token_End) { - // XXX actually, these invariants could very plausibly be violated - // if we are doing silly things with modifying the set of allowed elements. - // FORTUNATELY, it doesn't make a difference, since the allowed - // elements are hard-coded here! - if ($t->name !== 'li') { - trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR); - return false; - } - break; - } elseif ($t instanceof HTMLPurifier_Token_Empty) { // bleagh - if ($t->name !== 'li') { - trigger_error("Only li present invariant violated in List ChildDef", E_USER_ERROR); - return false; - } - // XXX this should have a helper for it... - $result[] = new HTMLPurifier_Token_Start('li', $t->attr, $t->line, $t->col, $t->armor); - break; - } else { - if (!$t->is_whitespace) { - trigger_error("Only whitespace present invariant violated in List ChildDef", E_USER_ERROR); - return false; - } - } - } - } - } else { - // start wrapping (this doesn't precisely mimic - // browser behavior, but what browsers do is kind of - // hard to mimic in a standards compliant way - // XXX Actually, this has no impact in practice, - // because this gets handled earlier. Arguably, - // we should rip out all of that processing - $result[] = new HTMLPurifier_Token_Start('li'); - $nesting++; - $seen_li = true; - $need_close_li = true; - } - } - $result[] = $token; - } - if ($need_close_li) { - $result[] = new HTMLPurifier_Token_End('li'); - } - if (empty($result)) return false; - if ($all_whitespace) { - return false; - } - if ($tokens_of_children == $result) return true; - return $result; - } -} - -// vim: et sw=4 sts=4 diff --git a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php index 9a93421a1..34f0227dd 100644 --- a/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php +++ b/lib/htmlpurifier/library/HTMLPurifier/ChildDef/Table.php @@ -1,33 +1,7 @@ <?php /** - * Definition for tables. The general idea is to extract out all of the - * essential bits, and then reconstruct it later. - * - * This is a bit confusing, because the DTDs and the W3C - * validators seem to disagree on the appropriate definition. The - * DTD claims: - * - * (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+) - * - * But actually, the HTML4 spec then has this to say: - * - * The TBODY start tag is always required except when the table - * contains only one table body and no table head or foot sections. - * The TBODY end tag may always be safely omitted. - * - * So the DTD is kind of wrong. The validator is, unfortunately, kind - * of on crack. - * - * The definition changed again in XHTML1.1; and in my opinion, this - * formulation makes the most sense. - * - * caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ )) - * - * Essentially, we have two modes: thead/tfoot/tbody mode, and tr mode. - * If we encounter a thead, tfoot or tbody, we are placed in the former - * mode, and we *must* wrap any stray tr segments with a tbody. But if - * we don't run into any of them, just have tr tags is OK. + * Definition for tables */ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef { @@ -59,8 +33,6 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef $collection = array(); // collected nodes $tag_index = 0; // the first node might be whitespace, // so this tells us where the start tag is - $tbody_mode = false; // if true, then we need to wrap any stray - // <tr>s with a <tbody>. foreach ($tokens_of_children as $token) { $is_child = ($nesting == 0); @@ -79,9 +51,8 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef // okay, let's stash the tokens away // first token tells us the type of the collection switch ($collection[$tag_index]->name) { - case 'tbody': - $tbody_mode = true; case 'tr': + case 'tbody': $content[] = $collection; break; case 'caption': @@ -90,28 +61,13 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef break; case 'thead': case 'tfoot': - $tbody_mode = true; - // XXX This breaks rendering properties with - // Firefox, which never floats a <thead> to - // the top. Ever. (Our scheme will float the - // first <thead> to the top.) So maybe - // <thead>s that are not first should be - // turned into <tbody>? Very tricky, indeed. - // access the appropriate variable, $thead or $tfoot $var = $collection[$tag_index]->name; if ($$var === false) { $$var = $collection; } else { - // Oops, there's a second one! What - // should we do? Current behavior is to - // transmutate the first and last entries into - // tbody tags, and then put into content. - // Maybe a better idea is to *attach - // it* to the existing thead or tfoot? - // We don't do this, because Firefox - // doesn't float an extra tfoot to the - // bottom like it does for the first one. + // transmutate the first and less entries into + // tbody tags, and then put into content $collection[$tag_index]->name = 'tbody'; $collection[count($collection)-1]->name = 'tbody'; $content[] = $collection; @@ -170,48 +126,7 @@ class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array); if ($thead !== false) $ret = array_merge($ret, $thead); if ($tfoot !== false) $ret = array_merge($ret, $tfoot); - - if ($tbody_mode) { - // a little tricky, since the start of the collection may be - // whitespace - $inside_tbody = false; - foreach ($content as $token_array) { - // find the starting token - foreach ($token_array as $t) { - if ($t->name === 'tr' || $t->name === 'tbody') { - break; - } - } // iterator variable carries over - if ($t->name === 'tr') { - if ($inside_tbody) { - $ret = array_merge($ret, $token_array); - } else { - $ret[] = new HTMLPurifier_Token_Start('tbody'); - $ret = array_merge($ret, $token_array); - $inside_tbody = true; - } - } elseif ($t->name === 'tbody') { - if ($inside_tbody) { - $ret[] = new HTMLPurifier_Token_End('tbody'); - $inside_tbody = false; - $ret = array_merge($ret, $token_array); - } else { - $ret = array_merge($ret, $token_array); - } - } else { - trigger_error("tr/tbody in content invariant failed in Table ChildDef", E_USER_ERROR); - } - } - if ($inside_tbody) { - $ret[] = new HTMLPurifier_Token_End('tbody'); - } - } else { - foreach ($content as $token_array) { - // invariant: everything in here is <tr>s - $ret = array_merge($ret, $token_array); - } - } - + foreach ($content as $token_array) $ret = array_merge($ret, $token_array); if (!empty($collection) && $is_collecting == false){ // grab the trailing space $ret = array_merge($ret, $collection); |