diff options
author | Andres Rey <[email protected]> | 2017-06-15 17:54:39 +0100 |
---|---|---|
committer | Andres Rey <[email protected]> | 2017-06-15 17:54:39 +0100 |
commit | 8a33bcbed46eccd2860136ec2c7332c9cfa639f7 (patch) | |
tree | 110f960b53e5329aa352edf6fa9260123de2f1b5 | |
parent | 7e2c7ca4d8f78d5c2413e9bfa83a4f2342fbc8c1 (diff) |
Safecheck for really bad HTML
-rw-r--r-- | CHANGELOG.md | 2 | ||||
-rw-r--r-- | src/HTMLParser.php | 2 |
2 files changed, 3 insertions, 1 deletions
diff --git a/CHANGELOG.md b/CHANGELOG.md index e181081..685cfb3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,8 @@ All notable changes to this project will be documented in this file. ## Unreleased +- Added a safecheck for really nasty HTML + ## [v0.2.1](https://github.com/andreskrey/readability.php/releases/tag/v0.2.1) - Added `normalizeEntities` flag to convert UTF-8 characters to its HTML Entity equivalent. Fixes bugs on htmls with mixed encoding. diff --git a/src/HTMLParser.php b/src/HTMLParser.php index 6e77e3f..9aa9974 100644 --- a/src/HTMLParser.php +++ b/src/HTMLParser.php @@ -125,7 +125,7 @@ class HTMLParser $this->metadata['title'] = $this->getTitle(); // Checking for minimum HTML to work with. - if (!($root = $this->dom->getElementsByTagName('body')->item(0))) { + if (!($root = $this->dom->getElementsByTagName('body')->item(0)) || !$root->firstChild) { return false; } |