From 23479b1e18c4d037115f11b3aa5c2ff1cae86700 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mi=C5=A1o=20Belica?= <miso.belica@gmail.com>
Date: Tue, 11 Feb 2014 13:59:10 +0100
Subject: Removed trailing whitespace

---
 src/HTML5/Parser/Tokenizer.php      | 12 ++++++------
 test/HTML5/Parser/TokenizerTest.php | 10 +++++-----
 2 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
index 04baa10..08b898d 100644
--- a/src/HTML5/Parser/Tokenizer.php
+++ b/src/HTML5/Parser/Tokenizer.php
@@ -774,7 +774,7 @@ class Tokenizer {
    *
    * XML processing instructions are supposed to be ignored in HTML5,
    * treated as "bogus comments". However, since we're not a user
-   * agent, we allow them. We consume until ?> and then issue a 
+   * agent, we allow them. We consume until ?> and then issue a
    * EventListener::processingInstruction() event.
    */
   protected function processingInstruction() {
@@ -819,7 +819,7 @@ class Tokenizer {
   // ================================================================
 
   /**
-   * Read from the input stream until we get to the desired sequene 
+   * Read from the input stream until we get to the desired sequene
    * or hit the end of the input stream.
    */
   protected function readUntilSequence($sequence) {
@@ -849,11 +849,11 @@ class Tokenizer {
    * This will read the stream for the $sequence. If it's
    * found, this will return TRUE. If not, return FALSE.
    * Since this unconsumes any chars it reads, the caller
-   * will still need to read the next sequence, even if 
+   * will still need to read the next sequence, even if
    * this returns TRUE.
    *
    * Example: $this->sequenceMatches('</script>') will
-   * see if the input stream is at the start of a 
+   * see if the input stream is at the start of a
    * '</script>' string.
    */
   protected function sequenceMatches($sequence) {
@@ -902,7 +902,7 @@ class Tokenizer {
   /**
    * Emit a parse error.
    *
-   * A parse error always returns FALSE because it never consumes any 
+   * A parse error always returns FALSE because it never consumes any
    * characters.
    */
   protected function parseError($msg) {
@@ -1008,7 +1008,7 @@ class Tokenizer {
       return $entity;
     }
 
-    // If in an attribute, then failing to match ; means unconsume the 
+    // If in an attribute, then failing to match ; means unconsume the
     // entire string. Otherwise, failure to match is an error.
     if ($inAttribute) {
       $this->scanner->unconsume($this->scanner->position() - $start);
diff --git a/test/HTML5/Parser/TokenizerTest.php b/test/HTML5/Parser/TokenizerTest.php
index 9f335b0..6c91455 100644
--- a/test/HTML5/Parser/TokenizerTest.php
+++ b/test/HTML5/Parser/TokenizerTest.php
@@ -109,7 +109,7 @@ class TokenizerTest extends \HTML5\Tests\TestCase {
     $e1 = $events->get(0);
     $this->assertEquals('error', $e1['name']);
 
-    // FIXME: Once the text processor is done, need to verify that the 
+    // FIXME: Once the text processor is done, need to verify that the
     // tokens are transformed correctly into text.
   }
 
@@ -139,12 +139,12 @@ class TokenizerTest extends \HTML5\Tests\TestCase {
     $succeed = array(
       '</a>' => 'a',
       '</test>' => 'test',
-      '</test 
+      '</test
       >' => 'test',
       '</thisIsTheTagThatDoesntEndItJustGoesOnAndOnMyFriend>' =>
         'thisisthetagthatdoesntenditjustgoesonandonmyfriend',
       // See 8.2.4.10, which requires this and does not say error.
-      '</a<b>' => 'a<b', 
+      '</a<b>' => 'a<b',
     );
     $this->isAllGood('endTag', 2, $succeed);
 
@@ -271,8 +271,8 @@ class TokenizerTest extends \HTML5\Tests\TestCase {
   public function testProcessorInstruction() {
     $good = array(
       '<?hph ?>' => 'hph',
-      '<?hph echo "Hello World"; ?>' => array('hph', 'echo "Hello World"; '), 
-      "<?hph \necho 'Hello World';\n?>" => array('hph', "echo 'Hello World';\n"), 
+      '<?hph echo "Hello World"; ?>' => array('hph', 'echo "Hello World"; '),
+      "<?hph \necho 'Hello World';\n?>" => array('hph', "echo 'Hello World';\n"),
     );
     $this->isAllGood('pi', 2, $good);
   }
-- 
cgit v1.2.3


From a1e7f337a4bab48a55e0b5f5cf4c6a2da1530ade Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mi=C5=A1o=20Belica?= <miso.belica@gmail.com>
Date: Tue, 11 Feb 2014 14:12:40 +0100
Subject: Fixed infinite loop for char "&" in unquoted attribute

---
 src/HTML5/Parser/Tokenizer.php      | 1 +
 test/HTML5/Parser/TokenizerTest.php | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
index 08b898d..d4ba583 100644
--- a/src/HTML5/Parser/Tokenizer.php
+++ b/src/HTML5/Parser/Tokenizer.php
@@ -483,6 +483,7 @@ class Tokenizer {
     while (strspn($tok, $stoplist) == 0 && $tok !== FALSE) {
       if ($tok == '&') {
         $val .= $this->decodeCharacterReference(TRUE);
+        $tok = $this->scanner->current();
       }
       else {
         if(strspn($tok, "\"'<=`") > 0) {
diff --git a/test/HTML5/Parser/TokenizerTest.php b/test/HTML5/Parser/TokenizerTest.php
index 6c91455..245604f 100644
--- a/test/HTML5/Parser/TokenizerTest.php
+++ b/test/HTML5/Parser/TokenizerTest.php
@@ -379,6 +379,8 @@ class TokenizerTest extends \HTML5\Tests\TestCase {
     $reallyBad = array(
       '<foo ="bar">' => array('foo', array('=' => NULL, '"bar"' => NULL), FALSE),
       '<foo////>' => array('foo', array(), TRUE),
+      // character "&" in unquoted attribute shouldn't cause an infinite loop
+      '<foo bar=index.php?str=1&amp;id=29>' => array('foo', array('bar' => 'index.php?str=1&id=29'), FALSE),
     );
     foreach ($reallyBad as $test => $expects) {
       $events = $this->parse($test);
-- 
cgit v1.2.3