From 8a33bcbed46eccd2860136ec2c7332c9cfa639f7 Mon Sep 17 00:00:00 2001 From: Andres Rey Date: Thu, 15 Jun 2017 17:54:39 +0100 Subject: Safecheck for really bad HTML --- CHANGELOG.md | 2 ++ src/HTMLParser.php | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e181081..685cfb3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,8 @@ All notable changes to this project will be documented in this file. ## Unreleased +- Added a safecheck for really nasty HTML + ## [v0.2.1](https://github.com/andreskrey/readability.php/releases/tag/v0.2.1) - Added `normalizeEntities` flag to convert UTF-8 characters to its HTML Entity equivalent. Fixes bugs on htmls with mixed encoding. diff --git a/src/HTMLParser.php b/src/HTMLParser.php index 6e77e3f..9aa9974 100644 --- a/src/HTMLParser.php +++ b/src/HTMLParser.php @@ -125,7 +125,7 @@ class HTMLParser $this->metadata['title'] = $this->getTitle(); // Checking for minimum HTML to work with. - if (!($root = $this->dom->getElementsByTagName('body')->item(0))) { + if (!($root = $this->dom->getElementsByTagName('body')->item(0)) || !$root->firstChild) { return false; } -- cgit v1.2.3