summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Dolgov <[email protected]>2024-04-03 17:58:29 +0000
committerAndrew Dolgov <[email protected]>2024-04-03 17:58:29 +0000
commitf2169ca419be339fa5ba32d0a50770f3d442d8b7 (patch)
tree05d540d97ad9d1b3bdb65ddbe26c1410658b1b28
parentcdc97d886cb7085f9c44a1796ee4bbbf57534d06 (diff)
parent9ba1adcde4953c8e04630b7b31c29b66d158d428 (diff)
Merge branch 'feature/readability-ignore-more-elements' into 'master'HEADmaster
Have Readability ignore/strip 'template' elements. See merge request tt-rss/plugins/ttrss-af-readability!1
-rw-r--r--composer.lock6
-rwxr-xr-xinit.php5
-rw-r--r--vendor/composer/ClassLoader.php137
-rw-r--r--vendor/composer/InstalledVersions.php17
-rw-r--r--vendor/composer/autoload_real.php2
-rw-r--r--vendor/composer/installed.json4
-rw-r--r--vendor/composer/installed.php14
-rw-r--r--vendor/composer/platform_check.php26
-rw-r--r--vendor/fivefilters/readability.php/README.md1
-rw-r--r--vendor/fivefilters/readability.php/src/Configuration.php25
-rw-r--r--vendor/fivefilters/readability.php/src/Readability.php8
11 files changed, 129 insertions, 116 deletions
diff --git a/composer.lock b/composer.lock
index f4fc172..1f87b0a 100644
--- a/composer.lock
+++ b/composer.lock
@@ -12,7 +12,7 @@
"source": {
"type": "git",
"url": "https://dev.tt-rss.org/fox/readability-php.git",
- "reference": "8ac5abdd497b37d2be4833bcf18d6819bba4d9c9"
+ "reference": "37c0c2ce76aa90e8adafbd5cb0b0332a54df1523"
},
"require": {
"ext-dom": "*",
@@ -64,7 +64,7 @@
"html",
"readability"
],
- "time": "2022-07-31T06:02:47+00:00"
+ "time": "2024-04-03T17:38:00+00:00"
},
{
"name": "league/uri",
@@ -469,5 +469,5 @@
"prefer-lowest": false,
"platform": [],
"platform-dev": [],
- "plugin-api-version": "2.3.0"
+ "plugin-api-version": "2.6.0"
}
diff --git a/init.php b/init.php
index 1deb1fc..591be6e 100755
--- a/init.php
+++ b/init.php
@@ -220,8 +220,9 @@ class Af_Readability extends Plugin {
try {
$r = new Readability(new Configuration([
- 'fixRelativeURLs' => true,
- 'originalURL' => $url,
+ 'FixRelativeURLs' => true,
+ 'OriginalURL' => $url,
+ 'ExtraIgnoredElements' => ['template'],
]));
if ($r->parse($tmp)) {
diff --git a/vendor/composer/ClassLoader.php b/vendor/composer/ClassLoader.php
index afef3fa..7824d8f 100644
--- a/vendor/composer/ClassLoader.php
+++ b/vendor/composer/ClassLoader.php
@@ -42,35 +42,37 @@ namespace Composer\Autoload;
*/
class ClassLoader
{
- /** @var ?string */
+ /** @var \Closure(string):void */
+ private static $includeFile;
+
+ /** @var string|null */
private $vendorDir;
// PSR-4
/**
- * @var array[]
- * @psalm-var array<string, array<string, int>>
+ * @var array<string, array<string, int>>
*/
private $prefixLengthsPsr4 = array();
/**
- * @var array[]
- * @psalm-var array<string, array<int, string>>
+ * @var array<string, list<string>>
*/
private $prefixDirsPsr4 = array();
/**
- * @var array[]
- * @psalm-var array<string, string>
+ * @var list<string>
*/
private $fallbackDirsPsr4 = array();
// PSR-0
/**
- * @var array[]
- * @psalm-var array<string, array<string, string[]>>
+ * List of PSR-0 prefixes
+ *
+ * Structured as array('F (first letter)' => array('Foo\Bar (full prefix)' => array('path', 'path2')))
+ *
+ * @var array<string, array<string, list<string>>>
*/
private $prefixesPsr0 = array();
/**
- * @var array[]
- * @psalm-var array<string, string>
+ * @var list<string>
*/
private $fallbackDirsPsr0 = array();
@@ -78,8 +80,7 @@ class ClassLoader
private $useIncludePath = false;
/**
- * @var string[]
- * @psalm-var array<string, string>
+ * @var array<string, string>
*/
private $classMap = array();
@@ -87,29 +88,29 @@ class ClassLoader
private $classMapAuthoritative = false;
/**
- * @var bool[]
- * @psalm-var array<string, bool>
+ * @var array<string, bool>
*/
private $missingClasses = array();
- /** @var ?string */
+ /** @var string|null */
private $apcuPrefix;
/**
- * @var self[]
+ * @var array<string, self>
*/
private static $registeredLoaders = array();
/**
- * @param ?string $vendorDir
+ * @param string|null $vendorDir
*/
public function __construct($vendorDir = null)
{
$this->vendorDir = $vendorDir;
+ self::initializeIncludeClosure();
}
/**
- * @return string[]
+ * @return array<string, list<string>>
*/
public function getPrefixes()
{
@@ -121,8 +122,7 @@ class ClassLoader
}
/**
- * @return array[]
- * @psalm-return array<string, array<int, string>>
+ * @return array<string, list<string>>
*/
public function getPrefixesPsr4()
{
@@ -130,8 +130,7 @@ class ClassLoader
}
/**
- * @return array[]
- * @psalm-return array<string, string>
+ * @return list<string>
*/
public function getFallbackDirs()
{
@@ -139,8 +138,7 @@ class ClassLoader
}
/**
- * @return array[]
- * @psalm-return array<string, string>
+ * @return list<string>
*/
public function getFallbackDirsPsr4()
{
@@ -148,8 +146,7 @@ class ClassLoader
}
/**
- * @return string[] Array of classname => path
- * @psalm-return array<string, string>
+ * @return array<string, string> Array of classname => path
*/
public function getClassMap()
{
@@ -157,8 +154,7 @@ class ClassLoader
}
/**
- * @param string[] $classMap Class to filename map
- * @psalm-param array<string, string> $classMap
+ * @param array<string, string> $classMap Class to filename map
*
* @return void
*/
@@ -175,24 +171,25 @@ class ClassLoader
* Registers a set of PSR-0 directories for a given prefix, either
* appending or prepending to the ones previously set for this prefix.
*
- * @param string $prefix The prefix
- * @param string[]|string $paths The PSR-0 root directories
- * @param bool $prepend Whether to prepend the directories
+ * @param string $prefix The prefix
+ * @param list<string>|string $paths The PSR-0 root directories
+ * @param bool $prepend Whether to prepend the directories
*
* @return void
*/
public function add($prefix, $paths, $prepend = false)
{
+ $paths = (array) $paths;
if (!$prefix) {
if ($prepend) {
$this->fallbackDirsPsr0 = array_merge(
- (array) $paths,
+ $paths,
$this->fallbackDirsPsr0
);
} else {
$this->fallbackDirsPsr0 = array_merge(
$this->fallbackDirsPsr0,
- (array) $paths
+ $paths
);
}
@@ -201,19 +198,19 @@ class ClassLoader
$first = $prefix[0];
if (!isset($this->prefixesPsr0[$first][$prefix])) {
- $this->prefixesPsr0[$first][$prefix] = (array) $paths;
+ $this->prefixesPsr0[$first][$prefix] = $paths;
return;
}
if ($prepend) {
$this->prefixesPsr0[$first][$prefix] = array_merge(
- (array) $paths,
+ $paths,
$this->prefixesPsr0[$first][$prefix]
);
} else {
$this->prefixesPsr0[$first][$prefix] = array_merge(
$this->prefixesPsr0[$first][$prefix],
- (array) $paths
+ $paths
);
}
}
@@ -222,9 +219,9 @@ class ClassLoader
* Registers a set of PSR-4 directories for a given namespace, either
* appending or prepending to the ones previously set for this namespace.
*
- * @param string $prefix The prefix/namespace, with trailing '\\'
- * @param string[]|string $paths The PSR-4 base directories
- * @param bool $prepend Whether to prepend the directories
+ * @param string $prefix The prefix/namespace, with trailing '\\'
+ * @param list<string>|string $paths The PSR-4 base directories
+ * @param bool $prepend Whether to prepend the directories
*
* @throws \InvalidArgumentException
*
@@ -232,17 +229,18 @@ class ClassLoader
*/
public function addPsr4($prefix, $paths, $prepend = false)
{
+ $paths = (array) $paths;
if (!$prefix) {
// Register directories for the root namespace.
if ($prepend) {
$this->fallbackDirsPsr4 = array_merge(
- (array) $paths,
+ $paths,
$this->fallbackDirsPsr4
);
} else {
$this->fallbackDirsPsr4 = array_merge(
$this->fallbackDirsPsr4,
- (array) $paths
+ $paths
);
}
} elseif (!isset($this->prefixDirsPsr4[$prefix])) {
@@ -252,18 +250,18 @@ class ClassLoader
throw new \InvalidArgumentException("A non-empty PSR-4 prefix must end with a namespace separator.");
}
$this->prefixLengthsPsr4[$prefix[0]][$prefix] = $length;
- $this->prefixDirsPsr4[$prefix] = (array) $paths;
+ $this->prefixDirsPsr4[$prefix] = $paths;
} elseif ($prepend) {
// Prepend directories for an already registered namespace.
$this->prefixDirsPsr4[$prefix] = array_merge(
- (array) $paths,
+ $paths,
$this->prefixDirsPsr4[$prefix]
);
} else {
// Append directories for an already registered namespace.
$this->prefixDirsPsr4[$prefix] = array_merge(
$this->prefixDirsPsr4[$prefix],
- (array) $paths
+ $paths
);
}
}
@@ -272,8 +270,8 @@ class ClassLoader
* Registers a set of PSR-0 directories for a given prefix,
* replacing any others previously set for this prefix.
*
- * @param string $prefix The prefix
- * @param string[]|string $paths The PSR-0 base directories
+ * @param string $prefix The prefix
+ * @param list<string>|string $paths The PSR-0 base directories
*
* @return void
*/
@@ -290,8 +288,8 @@ class ClassLoader
* Registers a set of PSR-4 directories for a given namespace,
* replacing any others previously set for this namespace.
*
- * @param string $prefix The prefix/namespace, with trailing '\\'
- * @param string[]|string $paths The PSR-4 base directories
+ * @param string $prefix The prefix/namespace, with trailing '\\'
+ * @param list<string>|string $paths The PSR-4 base directories
*
* @throws \InvalidArgumentException
*
@@ -425,7 +423,8 @@ class ClassLoader
public function loadClass($class)
{
if ($file = $this->findFile($class)) {
- includeFile($file);
+ $includeFile = self::$includeFile;
+ $includeFile($file);
return true;
}
@@ -476,9 +475,9 @@ class ClassLoader
}
/**
- * Returns the currently registered loaders indexed by their corresponding vendor directories.
+ * Returns the currently registered loaders keyed by their corresponding vendor directories.
*
- * @return self[]
+ * @return array<string, self>
*/
public static function getRegisteredLoaders()
{
@@ -555,18 +554,26 @@ class ClassLoader
return false;
}
-}
-/**
- * Scope isolated include.
- *
- * Prevents access to $this/self from included files.
- *
- * @param string $file
- * @return void
- * @private
- */
-function includeFile($file)
-{
- include $file;
+ /**
+ * @return void
+ */
+ private static function initializeIncludeClosure()
+ {
+ if (self::$includeFile !== null) {
+ return;
+ }
+
+ /**
+ * Scope isolated include.
+ *
+ * Prevents access to $this/self from included files.
+ *
+ * @param string $file
+ * @return void
+ */
+ self::$includeFile = \Closure::bind(static function($file) {
+ include $file;
+ }, null, null);
+ }
}
diff --git a/vendor/composer/InstalledVersions.php b/vendor/composer/InstalledVersions.php
index c6b54af..51e734a 100644
--- a/vendor/composer/InstalledVersions.php
+++ b/vendor/composer/InstalledVersions.php
@@ -98,7 +98,7 @@ class InstalledVersions
{
foreach (self::getInstalled() as $installed) {
if (isset($installed['versions'][$packageName])) {
- return $includeDevRequirements || empty($installed['versions'][$packageName]['dev_requirement']);
+ return $includeDevRequirements || !isset($installed['versions'][$packageName]['dev_requirement']) || $installed['versions'][$packageName]['dev_requirement'] === false;
}
}
@@ -119,7 +119,7 @@ class InstalledVersions
*/
public static function satisfies(VersionParser $parser, $packageName, $constraint)
{
- $constraint = $parser->parseConstraints($constraint);
+ $constraint = $parser->parseConstraints((string) $constraint);
$provided = $parser->parseConstraints(self::getVersionRanges($packageName));
return $provided->matches($constraint);
@@ -328,7 +328,9 @@ class InstalledVersions
if (isset(self::$installedByVendor[$vendorDir])) {
$installed[] = self::$installedByVendor[$vendorDir];
} elseif (is_file($vendorDir.'/composer/installed.php')) {
- $installed[] = self::$installedByVendor[$vendorDir] = require $vendorDir.'/composer/installed.php';
+ /** @var array{root: array{name: string, pretty_version: string, version: string, reference: string|null, type: string, install_path: string, aliases: string[], dev: bool}, versions: array<string, array{pretty_version?: string, version?: string, reference?: string|null, type?: string, install_path?: string, aliases?: string[], dev_requirement: bool, replaced?: string[], provided?: string[]}>} $required */
+ $required = require $vendorDir.'/composer/installed.php';
+ $installed[] = self::$installedByVendor[$vendorDir] = $required;
if (null === self::$installed && strtr($vendorDir.'/composer', '\\', '/') === strtr(__DIR__, '\\', '/')) {
self::$installed = $installed[count($installed) - 1];
}
@@ -340,12 +342,17 @@ class InstalledVersions
// only require the installed.php file if this file is loaded from its dumped location,
// and not from its source location in the composer/composer package, see https://github.com/composer/composer/issues/9937
if (substr(__DIR__, -8, 1) !== 'C') {
- self::$installed = require __DIR__ . '/installed.php';
+ /** @var array{root: array{name: string, pretty_version: string, version: string, reference: string|null, type: string, install_path: string, aliases: string[], dev: bool}, versions: array<string, array{pretty_version?: string, version?: string, reference?: string|null, type?: string, install_path?: string, aliases?: string[], dev_requirement: bool, replaced?: string[], provided?: string[]}>} $required */
+ $required = require __DIR__ . '/installed.php';
+ self::$installed = $required;
} else {
self::$installed = array();
}
}
- $installed[] = self::$installed;
+
+ if (self::$installed !== array()) {
+ $installed[] = self::$installed;
+ }
return $installed;
}
diff --git a/vendor/composer/autoload_real.php b/vendor/composer/autoload_real.php
index 0ab935b..63b0d87 100644
--- a/vendor/composer/autoload_real.php
+++ b/vendor/composer/autoload_real.php
@@ -22,8 +22,6 @@ class ComposerAutoloaderInitb44cc79a0eaef9cd9c2f2ac697cbe9c0
return self::$loader;
}
- require __DIR__ . '/platform_check.php';
-
spl_autoload_register(array('ComposerAutoloaderInitb44cc79a0eaef9cd9c2f2ac697cbe9c0', 'loadClassLoader'), true, true);
self::$loader = $loader = new \Composer\Autoload\ClassLoader(\dirname(__DIR__));
spl_autoload_unregister(array('ComposerAutoloaderInitb44cc79a0eaef9cd9c2f2ac697cbe9c0', 'loadClassLoader'));
diff --git a/vendor/composer/installed.json b/vendor/composer/installed.json
index 9e3c8f6..df421eb 100644
--- a/vendor/composer/installed.json
+++ b/vendor/composer/installed.json
@@ -7,7 +7,7 @@
"source": {
"type": "git",
"url": "https://dev.tt-rss.org/fox/readability-php.git",
- "reference": "8ac5abdd497b37d2be4833bcf18d6819bba4d9c9"
+ "reference": "37c0c2ce76aa90e8adafbd5cb0b0332a54df1523"
},
"require": {
"ext-dom": "*",
@@ -25,7 +25,7 @@
"suggest": {
"monolog/monolog": "Allow logging debug information"
},
- "time": "2022-07-31T06:02:47+00:00",
+ "time": "2024-04-03T17:38:00+00:00",
"default-branch": true,
"type": "library",
"installation-source": "source",
diff --git a/vendor/composer/installed.php b/vendor/composer/installed.php
index 165891d..f929588 100644
--- a/vendor/composer/installed.php
+++ b/vendor/composer/installed.php
@@ -1,9 +1,9 @@
<?php return array(
'root' => array(
'name' => '__root__',
- 'pretty_version' => '1.0.0+no-version-set',
- 'version' => '1.0.0.0',
- 'reference' => NULL,
+ 'pretty_version' => 'dev-master',
+ 'version' => 'dev-master',
+ 'reference' => '684721a181377915e42c7a04c1a099be12eb4a38',
'type' => 'library',
'install_path' => __DIR__ . '/../../',
'aliases' => array(),
@@ -11,9 +11,9 @@
),
'versions' => array(
'__root__' => array(
- 'pretty_version' => '1.0.0+no-version-set',
- 'version' => '1.0.0.0',
- 'reference' => NULL,
+ 'pretty_version' => 'dev-master',
+ 'version' => 'dev-master',
+ 'reference' => '684721a181377915e42c7a04c1a099be12eb4a38',
'type' => 'library',
'install_path' => __DIR__ . '/../../',
'aliases' => array(),
@@ -22,7 +22,7 @@
'fivefilters/readability.php' => array(
'pretty_version' => 'dev-master',
'version' => 'dev-master',
- 'reference' => '8ac5abdd497b37d2be4833bcf18d6819bba4d9c9',
+ 'reference' => '37c0c2ce76aa90e8adafbd5cb0b0332a54df1523',
'type' => 'library',
'install_path' => __DIR__ . '/../fivefilters/readability.php',
'aliases' => array(
diff --git a/vendor/composer/platform_check.php b/vendor/composer/platform_check.php
deleted file mode 100644
index 580fa96..0000000
--- a/vendor/composer/platform_check.php
+++ /dev/null
@@ -1,26 +0,0 @@
-<?php
-
-// platform_check.php @generated by Composer
-
-$issues = array();
-
-if (!(PHP_VERSION_ID >= 70400)) {
- $issues[] = 'Your Composer dependencies require a PHP version ">= 7.4.0". You are running ' . PHP_VERSION . '.';
-}
-
-if ($issues) {
- if (!headers_sent()) {
- header('HTTP/1.1 500 Internal Server Error');
- }
- if (!ini_get('display_errors')) {
- if (PHP_SAPI === 'cli' || PHP_SAPI === 'phpdbg') {
- fwrite(STDERR, 'Composer detected issues in your platform:' . PHP_EOL.PHP_EOL . implode(PHP_EOL, $issues) . PHP_EOL.PHP_EOL);
- } elseif (!headers_sent()) {
- echo 'Composer detected issues in your platform:' . PHP_EOL.PHP_EOL . str_replace('You are running '.PHP_VERSION.'.', '', implode(PHP_EOL, $issues)) . PHP_EOL.PHP_EOL;
- }
- }
- trigger_error(
- 'Composer detected issues in your platform: ' . implode(' ', $issues),
- E_USER_ERROR
- );
-}
diff --git a/vendor/fivefilters/readability.php/README.md b/vendor/fivefilters/readability.php/README.md
index c910714..547178c 100644
--- a/vendor/fivefilters/readability.php/README.md
+++ b/vendor/fivefilters/readability.php/README.md
@@ -117,6 +117,7 @@ Then you pass this Configuration object to Readability. The following options ar
- **KeepClasses**: default value `false`, which removes all `class="..."` attribute values from HTML elements.
- **Parser**: default value `html5`, which uses HTML5-PHP for parsing. Set to `libxml` to use that instead (not recommended for modern HTML documents).
- **SummonCthulhu**: default value `false`, remove all `<script>` nodes via regex. This is not ideal as it might break things, but if you've set the parser to libxml (see above), it might be the only solution to [libxml problems with unescaped javascript](https://github.com/fivefilters/readability.php#known-libxml-parsing-issues).
+- **ExtraIgnoredElements**: default value `[]`, additional DOM elements that should be ignored. `noscript` and `script` are always ignored.
### Debug log
diff --git a/vendor/fivefilters/readability.php/src/Configuration.php b/vendor/fivefilters/readability.php/src/Configuration.php
index 6d1f03f..0659a82 100644
--- a/vendor/fivefilters/readability.php/src/Configuration.php
+++ b/vendor/fivefilters/readability.php/src/Configuration.php
@@ -84,6 +84,11 @@ class Configuration
protected $disableJSONLD = false;
/**
+ * @var array<string>
+ */
+ protected $extraIgnoredElements = [];
+
+ /**
* Configuration constructor.
*
* @param array $params
@@ -420,4 +425,24 @@ class Configuration
return $this;
}
+
+ /**
+ * @return array<string>
+ */
+ public function getExtraIgnoredElements()
+ {
+ return $this->extraIgnoredElements;
+ }
+
+ /**
+ * @param array<string>
+ *
+ * @return $this
+ */
+ public function setExtraIgnoredElements($extraIgnoredElements)
+ {
+ $this->extraIgnoredElements = $extraIgnoredElements;
+
+ return $this;
+ }
}
diff --git a/vendor/fivefilters/readability.php/src/Readability.php b/vendor/fivefilters/readability.php/src/Readability.php
index 6407a92..c5318a4 100644
--- a/vendor/fivefilters/readability.php/src/Readability.php
+++ b/vendor/fivefilters/readability.php/src/Readability.php
@@ -342,7 +342,7 @@ class Readability
// Extract JSON-LD metadata before removing scripts
$this->jsonld = $this->configuration->getDisableJSONLD() ? [] : $this->getJSONLD($dom);
- $this->removeScripts($dom);
+ $this->removeIgnoredElements($dom);
$this->prepDocument($dom);
@@ -1188,13 +1188,13 @@ class Readability
}
/**
- * Removes all the scripts of the html.
+ * Removes elements that should be ignored.
*
* @param DOMDocument $dom
*/
- private function removeScripts(DOMDocument $dom)
+ private function removeIgnoredElements(DOMDocument $dom)
{
- foreach (['script', 'noscript'] as $tag) {
+ foreach (['noscript', 'script', ...$this->configuration->getExtraIgnoredElements()] as $tag) {
$nodes = $dom->getElementsByTagName($tag);
foreach (iterator_to_array($nodes) as $node) {
NodeUtility::removeNode($node);