summaryrefslogtreecommitdiff
path: root/src/HTML5
diff options
context:
space:
mode:
authorAsmir Mustafic <[email protected]>2018-11-27 12:29:35 +0100
committerGitHub <[email protected]>2018-11-27 12:29:35 +0100
commit182f34ddba2e31e625f255416d85bba38cc6b39b (patch)
tree066cceaabbc47c07b9147ba42b9e11cc24306b67 /src/HTML5
parent88b7c663e69673f16f4ce10db947135b97f40e00 (diff)
parent6cdf4283046325b9bc2671d0648b73a2be1d0946 (diff)
Merge pull request #161 from stof/optimize_main_loop
Optimize main loop
Diffstat (limited to 'src/HTML5')
-rw-r--r--src/HTML5/Parser/Tokenizer.php74
1 files changed, 31 insertions, 43 deletions
diff --git a/src/HTML5/Parser/Tokenizer.php b/src/HTML5/Parser/Tokenizer.php
index 44b6875..a41fcb4 100644
--- a/src/HTML5/Parser/Tokenizer.php
+++ b/src/HTML5/Parser/Tokenizer.php
@@ -131,22 +131,28 @@ class Tokenizer
$tok = $this->scanner->next();
- $this->markupDeclaration($tok)
- || $this->endTag()
- || $this->processingInstruction()
- || $this->tagName()
- // This always returns false.
- || $this->parseError('Illegal tag opening')
- || $this->characterData();
+ if ('!' === $tok) {
+ $this->markupDeclaration();
+ } elseif ('/' === $tok) {
+ $this->endTag();
+ } elseif ('?' === $tok) {
+ $this->processingInstruction();
+ } elseif (ctype_alpha($tok)) {
+ $this->tagName();
+ } else {
+ $this->parseError('Illegal tag opening');
+ // TODO is this necessary ?
+ $this->characterData();
+ }
$tok = $this->scanner->current();
}
- // Handle end of document
- $this->eof($tok);
-
- // Parse character
- if (false !== $tok) {
+ if (false === $tok) {
+ // Handle end of document
+ $this->eof();
+ } else {
+ // Parse character
switch ($this->textMode) {
case Elements::TEXT_RAW:
$this->rawText($tok);
@@ -288,29 +294,19 @@ class Tokenizer
/**
* If the document is read, emit an EOF event.
*/
- protected function eof($tok)
+ protected function eof()
{
- if (false === $tok) {
- // fprintf(STDOUT, "EOF");
- $this->flushBuffer();
- $this->events->eof();
- $this->carryOn = false;
-
- return true;
- }
-
- return false;
+ // fprintf(STDOUT, "EOF");
+ $this->flushBuffer();
+ $this->events->eof();
+ $this->carryOn = false;
}
/**
* Look for markup.
*/
- protected function markupDeclaration($tok)
+ protected function markupDeclaration()
{
- if ('!' != $tok) {
- return false;
- }
-
$tok = $this->scanner->next();
// Comment:
@@ -377,11 +373,6 @@ class Tokenizer
*/
protected function tagName()
{
- $tok = $this->scanner->current();
- if (!ctype_alpha($tok)) {
- return false;
- }
-
// We know this is at least one char.
$name = $this->scanner->charsWhile(':_-0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz');
$name = self::CONFORMANT_XML === $this->mode ? $name : strtolower($name);
@@ -743,12 +734,11 @@ class Tokenizer
*/
protected function doctype()
{
- if (strcasecmp($this->scanner->current(), 'D')) {
- return false;
- }
// Check that string is DOCTYPE.
- $chars = $this->scanner->charsWhile('DOCTYPEdoctype');
- if (strcasecmp($chars, 'DOCTYPE')) {
+ if ($this->scanner->sequenceMatches('DOCTYPE', false)) {
+ $this->scanner->consume(7);
+ } else {
+ $chars = $this->scanner->charsWhile('DOCTYPEdoctype');
$this->parseError('Expected DOCTYPE, got %s', $chars);
return $this->bogusComment('<!' . $chars);
@@ -760,8 +750,9 @@ class Tokenizer
// EOF: die.
if (false === $tok) {
$this->events->doctype('html5', EventHandler::DOCTYPE_NONE, '', true);
+ $this->eof();
- return $this->eof($tok);
+ return true;
}
// NULL char: convert.
@@ -812,7 +803,7 @@ class Tokenizer
if (false === $id) {
$this->events->doctype($doctypeName, $type, $pub, false);
- return false;
+ return true;
}
// Premature EOF.
@@ -887,9 +878,6 @@ class Tokenizer
*/
protected function cdataSection()
{
- if ('[' != $this->scanner->current()) {
- return false;
- }
$cdata = '';
$this->scanner->consume();