diff options
author | Andres Rey <[email protected]> | 2017-03-26 11:34:29 +0100 |
---|---|---|
committer | Andres Rey <[email protected]> | 2017-03-26 11:34:29 +0100 |
commit | 01fb375746d7ef9178c4bf651774da67632b7454 (patch) | |
tree | 8d141fcefa0dc643d1188ee01ffbf1952ed9406a /src | |
parent | 361a1f73048a3f68af539344b0c294c9633f5820 (diff) |
Added normalizeEntities flag.
Diffstat (limited to 'src')
-rw-r--r-- | src/HTMLParser.php | 6 |
1 files changed, 6 insertions, 0 deletions
diff --git a/src/HTMLParser.php b/src/HTMLParser.php index 3226904..105e50c 100644 --- a/src/HTMLParser.php +++ b/src/HTMLParser.php @@ -100,6 +100,7 @@ class HTMLParser 'removeReadabilityTags' => true, 'fixRelativeURLs' => false, 'substituteEntities' => true, + 'normalizeEntities' => false, 'originalURL' => 'http://fakehost', ]; @@ -208,6 +209,11 @@ class HTMLParser $this->dom->substituteEntities = false; } + if ($this->getConfig()->getOption('normalizeEntities')) { + // Replace UTF-8 characters with the HTML Entity equivalent. Useful to fix html with mixed content + $html = mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'); + } + // Prepend the XML tag to avoid having issues with special characters. Should be harmless. $this->dom->loadHTML('<?xml encoding="UTF-8">' . $html); $this->dom->encoding = 'UTF-8'; |