From 72a26f10493ceb677fdfc50823d44ca759301175 Mon Sep 17 00:00:00 2001 From: Pedro Amorim Date: Thu, 16 Nov 2017 16:54:48 +0100 Subject: Add getImages() Get all images URL of current DOM at once. --- src/HTMLParser.php | 51 +++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 47 insertions(+), 4 deletions(-) diff --git a/src/HTMLParser.php b/src/HTMLParser.php index 77c68cf..a492307 100644 --- a/src/HTMLParser.php +++ b/src/HTMLParser.php @@ -187,6 +187,7 @@ class HTMLParser 'title' => isset($this->metadata['title']) ? $this->metadata['title'] : null, 'author' => isset($this->metadata['author']) ? $this->metadata['author'] : null, 'image' => isset($this->metadata['image']) ? $this->metadata['image'] : null, + 'images' => $this->getImages(), 'article' => $result, 'html' => $result->C14N(), 'dir' => isset($this->metadata['articleDir']) ? $this->metadata['articleDir'] : null, @@ -338,10 +339,7 @@ class HTMLParser public function postProcessContent(DOMDocument $article) { - $url = $this->getConfig()->getOption('originalURL'); - $pathBase = parse_url($url, PHP_URL_SCHEME) . '://' . parse_url($url, PHP_URL_HOST) . dirname(parse_url($url, PHP_URL_PATH)) . '/'; - $scheme = parse_url($pathBase, PHP_URL_SCHEME); - $prePath = $scheme . '://' . parse_url($pathBase, PHP_URL_HOST); + list($pathBase, $scheme, $prePath) = $this->getPathInfo($this->getConfig()->getOption('originalURL')); // Readability cannot open relative uris so we convert them to absolute uris. if ($this->getConfig()->getOption('fixRelativeURLs')) { @@ -403,6 +401,20 @@ class HTMLParser return $pathBase . $uri; } + /** + * @param string $url + * + * @return array [$pathBase, $scheme, $prePath] + */ + public function getPathInfo($url) + { + $pathBase = parse_url($url, PHP_URL_SCHEME) . '://' . parse_url($url, PHP_URL_HOST) . dirname(parse_url($url, PHP_URL_PATH)) . '/'; + $scheme = parse_url($pathBase, PHP_URL_SCHEME); + $prePath = $scheme . '://' . parse_url($pathBase, PHP_URL_HOST); + + return [$pathBase, $scheme, $prePath]; + } + private function nextElement($node) { $next = $node; @@ -510,6 +522,37 @@ class HTMLParser return false; } + /** + * @return array + */ + public function getImages() + { + $result = []; + if (!empty($this->metadata['image'])) { + $result[] = $this->metadata['image']; + } + if (null == $this->dom) { + return $result; + } + + foreach ($this->dom->getElementsByTagName('img') as $img) { + if ($src = $img->getAttribute('src')) { + $result[] = $src; + } + } + + if ($this->getConfig()->getOption('fixRelativeURLs')) { + list($pathBase, $scheme, $prePath) = $this->getPathInfo($this->getConfig()->getOption('originalURL')); + foreach ($result as &$imgSrc) { + $imgSrc = $this->toAbsoluteURI($imgSrc, $pathBase, $scheme, $prePath); + } + } + + $result = array_unique(array_filter($result)); + + return $result; + } + /** * Get the density of links as a percentage of the content * This is the amount of text that is inside a link divided by the total text in the node. -- cgit v1.2.3 From f7b3b3e0d71dcf84b4be4699a25df95c29682e34 Mon Sep 17 00:00:00 2001 From: Pedro Amorim Date: Mon, 20 Nov 2017 15:44:34 +0100 Subject: Add test for getImages() --- test/HTMLParserTest.php | 6 ++++-- test/test-pages/001/expected-images.json | 1 + test/test-pages/002/expected-images.json | 1 + test/test-pages/ars-1/expected-images.json | 1 + test/test-pages/base-url/expected-images.json | 1 + test/test-pages/basic-tags-cleaning/expected-images.json | 1 + test/test-pages/bbc-1/expected-images.json | 1 + test/test-pages/blogger/expected-images.json | 1 + test/test-pages/breitbart/expected-images.json | 1 + test/test-pages/bug-1255978/expected-images.json | 1 + test/test-pages/buzzfeed-1/expected-images.json | 1 + test/test-pages/clean-links/expected-images.json | 1 + test/test-pages/cnet/expected-images.json | 1 + test/test-pages/cnn/expected-images.json | 1 + test/test-pages/comment-inside-script-parsing/expected-images.json | 1 + test/test-pages/daringfireball-1/expected-images.json | 1 + test/test-pages/ehow-1/expected-images.json | 1 + test/test-pages/ehow-2/expected-images.json | 1 + test/test-pages/embedded-videos/expected-images.json | 1 + test/test-pages/gmw/expected-images.json | 1 + test/test-pages/heise/expected-images.json | 1 + test/test-pages/herald-sun-1/expected-images.json | 1 + test/test-pages/iab-1/expected-images.json | 1 + test/test-pages/ietf-1/expected-images.json | 1 + test/test-pages/keep-images/expected-images.json | 1 + test/test-pages/lemonde-1/expected-images.json | 1 + test/test-pages/liberation-1/expected-images.json | 1 + test/test-pages/lifehacker-post-comment-load/expected-images.json | 1 + test/test-pages/lifehacker-working/expected-images.json | 1 + test/test-pages/links-in-tables/expected-images.json | 1 + test/test-pages/lwn-1/expected-images.json | 1 + test/test-pages/medium-1/expected-images.json | 1 + test/test-pages/medium-2/expected-images.json | 1 + test/test-pages/medium-3/expected-images.json | 1 + test/test-pages/missing-paragraphs/expected-images.json | 1 + test/test-pages/mozilla-1/expected-images.json | 1 + test/test-pages/mozilla-2/expected-images.json | 1 + test/test-pages/msn/expected-images.json | 1 + test/test-pages/needs-entity-normalization/expected-images.json | 1 + test/test-pages/nytimes-1/expected-images.json | 1 + test/test-pages/nytimes-2/expected-images.json | 1 + test/test-pages/pixnet/expected-images.json | 1 + test/test-pages/remove-extra-brs/expected-images.json | 1 + test/test-pages/remove-extra-paragraphs/expected-images.json | 1 + test/test-pages/remove-script-tags/expected-images.json | 1 + test/test-pages/reordering-paragraphs/expected-images.json | 1 + test/test-pages/replace-brs/expected-images.json | 1 + test/test-pages/replace-font-tags/expected-images.json | 1 + test/test-pages/rtl-1/expected-images.json | 1 + test/test-pages/rtl-2/expected-images.json | 1 + test/test-pages/rtl-3/expected-images.json | 1 + test/test-pages/rtl-4/expected-images.json | 1 + test/test-pages/salon-1/expected-images.json | 1 + test/test-pages/simplyfound-1/expected-images.json | 1 + test/test-pages/social-buttons/expected-images.json | 1 + test/test-pages/style-tags-removal/expected-images.json | 1 + test/test-pages/svg-parsing/expected-images.json | 1 + test/test-pages/table-style-attributes/expected-images.json | 1 + test/test-pages/telegraph/expected-images.json | 1 + test/test-pages/tmz-1/expected-images.json | 1 + test/test-pages/tumblr/expected-images.json | 1 + test/test-pages/wapo-1/expected-images.json | 1 + test/test-pages/wapo-2/expected-images.json | 1 + test/test-pages/webmd-1/expected-images.json | 1 + test/test-pages/webmd-2/expected-images.json | 1 + test/test-pages/wikia/expected-images.json | 1 + test/test-pages/wikipedia/expected-images.json | 1 + test/test-pages/wordpress/expected-images.json | 1 + test/test-pages/yahoo-1/expected-images.json | 1 + test/test-pages/yahoo-2/expected-images.json | 1 + test/test-pages/yahoo-3/expected-images.json | 1 + test/test-pages/yahoo-4/expected-images.json | 1 + test/test-pages/youth/expected-images.json | 1 + 73 files changed, 76 insertions(+), 2 deletions(-) create mode 100644 test/test-pages/001/expected-images.json create mode 100644 test/test-pages/002/expected-images.json create mode 100644 test/test-pages/ars-1/expected-images.json create mode 100644 test/test-pages/base-url/expected-images.json create mode 100644 test/test-pages/basic-tags-cleaning/expected-images.json create mode 100644 test/test-pages/bbc-1/expected-images.json create mode 100644 test/test-pages/blogger/expected-images.json create mode 100644 test/test-pages/breitbart/expected-images.json create mode 100644 test/test-pages/bug-1255978/expected-images.json create mode 100644 test/test-pages/buzzfeed-1/expected-images.json create mode 100644 test/test-pages/clean-links/expected-images.json create mode 100644 test/test-pages/cnet/expected-images.json create mode 100644 test/test-pages/cnn/expected-images.json create mode 100644 test/test-pages/comment-inside-script-parsing/expected-images.json create mode 100644 test/test-pages/daringfireball-1/expected-images.json create mode 100644 test/test-pages/ehow-1/expected-images.json create mode 100644 test/test-pages/ehow-2/expected-images.json create mode 100644 test/test-pages/embedded-videos/expected-images.json create mode 100644 test/test-pages/gmw/expected-images.json create mode 100644 test/test-pages/heise/expected-images.json create mode 100644 test/test-pages/herald-sun-1/expected-images.json create mode 100644 test/test-pages/iab-1/expected-images.json create mode 100644 test/test-pages/ietf-1/expected-images.json create mode 100644 test/test-pages/keep-images/expected-images.json create mode 100644 test/test-pages/lemonde-1/expected-images.json create mode 100644 test/test-pages/liberation-1/expected-images.json create mode 100644 test/test-pages/lifehacker-post-comment-load/expected-images.json create mode 100644 test/test-pages/lifehacker-working/expected-images.json create mode 100644 test/test-pages/links-in-tables/expected-images.json create mode 100644 test/test-pages/lwn-1/expected-images.json create mode 100644 test/test-pages/medium-1/expected-images.json create mode 100644 test/test-pages/medium-2/expected-images.json create mode 100644 test/test-pages/medium-3/expected-images.json create mode 100644 test/test-pages/missing-paragraphs/expected-images.json create mode 100644 test/test-pages/mozilla-1/expected-images.json create mode 100644 test/test-pages/mozilla-2/expected-images.json create mode 100644 test/test-pages/msn/expected-images.json create mode 100644 test/test-pages/needs-entity-normalization/expected-images.json create mode 100644 test/test-pages/nytimes-1/expected-images.json create mode 100644 test/test-pages/nytimes-2/expected-images.json create mode 100644 test/test-pages/pixnet/expected-images.json create mode 100644 test/test-pages/remove-extra-brs/expected-images.json create mode 100644 test/test-pages/remove-extra-paragraphs/expected-images.json create mode 100644 test/test-pages/remove-script-tags/expected-images.json create mode 100644 test/test-pages/reordering-paragraphs/expected-images.json create mode 100644 test/test-pages/replace-brs/expected-images.json create mode 100644 test/test-pages/replace-font-tags/expected-images.json create mode 100644 test/test-pages/rtl-1/expected-images.json create mode 100644 test/test-pages/rtl-2/expected-images.json create mode 100644 test/test-pages/rtl-3/expected-images.json create mode 100644 test/test-pages/rtl-4/expected-images.json create mode 100644 test/test-pages/salon-1/expected-images.json create mode 100644 test/test-pages/simplyfound-1/expected-images.json create mode 100644 test/test-pages/social-buttons/expected-images.json create mode 100644 test/test-pages/style-tags-removal/expected-images.json create mode 100644 test/test-pages/svg-parsing/expected-images.json create mode 100644 test/test-pages/table-style-attributes/expected-images.json create mode 100644 test/test-pages/telegraph/expected-images.json create mode 100644 test/test-pages/tmz-1/expected-images.json create mode 100644 test/test-pages/tumblr/expected-images.json create mode 100644 test/test-pages/wapo-1/expected-images.json create mode 100644 test/test-pages/wapo-2/expected-images.json create mode 100644 test/test-pages/webmd-1/expected-images.json create mode 100644 test/test-pages/webmd-2/expected-images.json create mode 100644 test/test-pages/wikia/expected-images.json create mode 100644 test/test-pages/wikipedia/expected-images.json create mode 100644 test/test-pages/wordpress/expected-images.json create mode 100644 test/test-pages/yahoo-1/expected-images.json create mode 100644 test/test-pages/yahoo-2/expected-images.json create mode 100644 test/test-pages/yahoo-3/expected-images.json create mode 100644 test/test-pages/yahoo-4/expected-images.json create mode 100644 test/test-pages/youth/expected-images.json diff --git a/test/HTMLParserTest.php b/test/HTMLParserTest.php index ad6d943..96c07df 100644 --- a/test/HTMLParserTest.php +++ b/test/HTMLParserTest.php @@ -9,7 +9,7 @@ class HTMLParserTest extends \PHPUnit_Framework_TestCase /** * @dataProvider getSamplePages */ - public function testHTMLParserParsesHTML($html, $expectedResult, $expectedMetadata, $config) + public function testHTMLParserParsesHTML($html, $expectedResult, $expectedMetadata, $config, $expectedImages) { $options = ['originalURL' => 'http://fakehost/test/test.html', 'fixRelativeURLs' => true, @@ -24,6 +24,7 @@ class HTMLParserTest extends \PHPUnit_Framework_TestCase $result = $readability->parse($html); $this->assertEquals($expectedResult, $result['html']); + $this->assertEquals($expectedImages, json_encode($result['images'])); } public function getSamplePages() @@ -40,6 +41,7 @@ class HTMLParserTest extends \PHPUnit_Framework_TestCase $source = file_get_contents($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'source.html'); $expectedHTML = file_get_contents($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'expected.html'); $expectedMetadata = file_get_contents($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'expected-metadata.json'); + $expectedImages = file_get_contents($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'expected-images.json'); $config = null; if (file_exists($path . DIRECTORY_SEPARATOR . $testPage . DIRECTORY_SEPARATOR . 'config.json')) { @@ -49,7 +51,7 @@ class HTMLParserTest extends \PHPUnit_Framework_TestCase } } - $pages[$testPage] = [$source, $expectedHTML, $expectedMetadata, $config]; + $pages[$testPage] = [$source, $expectedHTML, $expectedMetadata, $config, $expectedImages]; } return $pages; diff --git a/test/test-pages/001/expected-images.json b/test/test-pages/001/expected-images.json new file mode 100644 index 0000000..b8720fe --- /dev/null +++ b/test/test-pages/001/expected-images.json @@ -0,0 +1 @@ +["http:\/\/fakehost\/static\/code\/2013\/blanket-coverage.png"] \ No newline at end of file diff --git a/test/test-pages/002/expected-images.json b/test/test-pages/002/expected-images.json new file mode 100644 index 0000000..e80f99e --- /dev/null +++ b/test/test-pages/002/expected-images.json @@ -0,0 +1 @@ +["https:\/\/secure.gravatar.com\/avatar\/098bc26c1d2d2b425b258cff67044ac1?s=48&d=https%3A%2F%2Fsecure.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D48&r=G"] \ No newline at end of file diff --git a/test/test-pages/ars-1/expected-images.json b/test/test-pages/ars-1/expected-images.json new file mode 100644 index 0000000..1736325 --- /dev/null +++ b/test/test-pages/ars-1/expected-images.json @@ -0,0 +1 @@ +{"0":"http:\/\/cdn.arstechnica.net\/wp-content\/uploads\/2015\/04\/server-crash-640x426.jpg","2":"http:\/\/cdn.arstechnica.net\/wp-content\/uploads\/authors\/Dan-Goodin-sq.jpg","3":"http:\/\/cdn.arstechnica.net\/wp-content\/uploads\/2015\/05\/DSC00588-300x100.jpg","4":"https:\/\/cdn.arstechnica.net\/i\/http:\/\/brightcove.vo.llnwd.net\/v1\/unsecured\/media\/636468927001\/201504\/641\/636468927001_4202397654001_LG-G4.jpg?pubId=636468927001","5":"http:\/\/cdn.arstechnica.net\/wp-content\/uploads\/2015\/05\/Screen-Shot-2015-05-04-at-2.45.22-PM-150x150.png","6":"http:\/\/cdn.arstechnica.net\/wp-content\/uploads\/2015\/05\/Screen-Shot-2015-05-04-at-11.09.05-AM-150x150.png"} \ No newline at end of file diff --git a/test/test-pages/base-url/expected-images.json b/test/test-pages/base-url/expected-images.json new file mode 100644 index 0000000..d3eed6f --- /dev/null +++ b/test/test-pages/base-url/expected-images.json @@ -0,0 +1 @@ +{"0":"http:\/\/fakehost\/test\/foo\/bar\/baz.png","2":"http:\/\/fakehost\/foo\/bar\/baz.png","3":"http:\/\/test\/foo\/bar\/baz.png","4":"https:\/\/test\/foo\/bar\/baz.png"} \ No newline at end of file diff --git a/test/test-pages/basic-tags-cleaning/expected-images.json b/test/test-pages/basic-tags-cleaning/expected-images.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/test/test-pages/basic-tags-cleaning/expected-images.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/test/test-pages/bbc-1/expected-images.json b/test/test-pages/bbc-1/expected-images.json new file mode 100644 index 0000000..458705c --- /dev/null +++ b/test/test-pages/bbc-1/expected-images.json @@ -0,0 +1 @@ +{"0":"http:\/\/ichef.bbci.co.uk\/news\/1024\/cpsprodpb\/3D8B\/production\/_84455751_84455749.jpg","1":"http:\/\/static.bbci.co.uk\/news\/1.79.0380\/img\/brand\/news.png","2":"http:\/\/beacon-us-east.rubiconproject.com\/beacon\/d\/c7fc9a18-e658-40cb-b122-2a67ac137253?accountId=9176&siteId=21342&zoneId=72686&e=6A1E40E384DA563BC35D424639CBC3996755004D5E9E92FCC5D5975482C06D202AFB0309050CE4C9FDBCCA75EDC6570644136BD09AD42E178F043B3A9B3D44DFCACFFC2D53F6EA7AC793078C8D240C81172CD28438FCBB6A0A85F1B2498A61AE3074348E62474919C04AE81C6C1B6F10D4119A84C501229B0FF33AE9256AA201","3":"http:\/\/ichef.bbci.co.uk\/news\/555\/cpsprodpb\/462D\/production\/_84456971_gettyimages-167501087.jpg","4":"http:\/\/ichef.bbci.co.uk\/news\/555\/media\/images\/76020000\/jpg\/_76020974_line976.jpg","5":"http:\/\/ichef-1.bbci.co.uk\/news\/555\/cpsprodpb\/6D3D\/production\/_84456972_p072315al-0500.jpg","7":"http:\/\/ichef-1.bbci.co.uk\/news\/555\/cpsprodpb\/142FD\/production\/_84458628_shirtreuters.jpg","8":"http:\/\/ichef-1.bbci.co.uk\/news\/200\/cpsprodpb\/77FD\/production\/_84471703_houser.jpg","9":"http:\/\/ichef.bbci.co.uk\/news\/200\/cpsprodpb\/FFB1\/production\/_84475456_84474093.jpg","10":"http:\/\/ichef.bbci.co.uk\/news\/200\/cpsprodpb\/1657D\/production\/_84471519_0a0e2152-cb23-4b8a-91cd-715e7dd42c1c.jpg","11":"http:\/\/images.outbrain.com\/imageserver\/v2\/s\/Mmsn\/n\/19bHAj\/abc\/15R3qX\/19bHAj-1Vhi-360x202.jpg","12":"http:\/\/widgets.outbrain.com\/images\/widgetIcons\/play_100x100.png","13":"http:\/\/images.outbrain.com\/imageserver\/v2\/s\/Mmsn\/n\/19bhaS\/abc\/15RUvY\/19bhaS-1Vhi-360x202.jpg","15":"http:\/\/images.outbrain.com\/imageserver\/v2\/s\/Mmsn\/n\/19SE13\/abc\/15IA8u\/19SE13-1Vhi-360x202.jpg","17":"http:\/\/images.outbrain.com\/imageserver\/v2\/s\/Mmsn\/n\/19c1tt\/abc\/15Rr3E\/19c1tt-1Vhi-360x202.jpg","19":"http:\/\/images.outbrain.com\/imageserver\/v2\/s\/Mmsn\/n\/19eehl\/abc\/15UV9a\/19eehl-1Vhi-360x202.jpg","21":"http:\/\/images.outbrain.com\/imageserver\/v2\/s\/Mmsn\/n\/19jeNg\/abc\/15Zgfg\/19jeNg-1Vhi-360x202.jpg","23":"http:\/\/ichef.bbci.co.uk\/news\/304\/cpsprodpb\/787E\/production\/_84464803_ifidie4.jpg","24":"http:\/\/ichef-1.bbci.co.uk\/news\/304\/cpsprodpb\/A7B7\/production\/_84453924_5fa7af8b-bd47-44b4-a454-160cb7e8e49a.jpg","25":"http:\/\/ichef.bbci.co.uk\/news\/304\/cpsprodpb\/A91F\/production\/_84459234_u1ntitled.jpg","26":"http:\/\/ichef.bbci.co.uk\/news\/304\/cpsprodpb\/183D5\/production\/_84458299_p072315al-0500.jpg","27":"http:\/\/ichef.bbci.co.uk\/news\/304\/cpsprodpb\/D510\/production\/_84444545_gettyimages-479284580.jpg","28":"http:\/\/ichef-1.bbci.co.uk\/news\/304\/cpsprodpb\/372C\/production\/_84442141_ntv_2.jpg","29":"http:\/\/ichef-1.bbci.co.uk\/news\/304\/cpsprodpb\/0DF4\/production\/_84427530_thinkstockphotos-87713420.jpg","30":"http:\/\/ichef.bbci.co.uk\/news\/304\/cpsprodpb\/C415\/production\/_84379105_84306592.jpg","31":"http:\/\/ichef.bbci.co.uk\/news\/304\/cpsprodpb\/313D\/production\/_84450621_thinkstockphotos-482433887.jpg","32":"http:\/\/b.scorecardresearch.com\/p?c1=2&c2=6035051&c4=www.bbc.com\/news\/world-us-canada-33646704","33":"http:\/\/stats.bbc.co.uk\/o.gif?~RS~s~RS~News~RS~t~RS~HighWeb_Story~RS~i~RS~33646704~RS~p~RS~99127~RS~a~RS~US~RS~u~RS~\/news\/world-us-canada-33646704~RS~r~RS~0~RS~q~RS~0~RS~z~RS~8469~RS~"} \ No newline at end of file diff --git a/test/test-pages/blogger/expected-images.json b/test/test-pages/blogger/expected-images.json new file mode 100644 index 0000000..fc0d8bf --- /dev/null +++ b/test/test-pages/blogger/expected-images.json @@ -0,0 +1 @@ +["https:\/\/1.bp.blogspot.com\/-YIPC5jkXkDE\/Vy7YPSqFKWI\/AAAAAAAAAxI\/a7D6Ji2GxoUvcrwUkI4RLZcr2LFQEJCTACLcB\/w1200-h630-p-nu\/block-diagram.png","https:\/\/1.bp.blogspot.com\/-YIPC5jkXkDE\/Vy7YPSqFKWI\/AAAAAAAAAxI\/a7D6Ji2GxoUvcrwUkI4RLZcr2LFQEJCTACLcB\/s640\/block-diagram.png","https:\/\/1.bp.blogspot.com\/-k3naUT3uXao\/Vy7WFac246I\/AAAAAAAAAw8\/mePy_ostO8QJra5ZJrbP2WGhTlJ0B_r8gCLcB\/s640\/schematic-from-hell.png","https:\/\/2.bp.blogspot.com\/-kIekczO693g\/Vy7dBqYifXI\/AAAAAAAAAxc\/hMNJBs5bedIQOrBzzkhq4gbmhR-n58EQwCLcB\/s400\/graph-labels.png"] \ No newline at end of file diff --git a/test/test-pages/breitbart/expected-images.json b/test/test-pages/breitbart/expected-images.json new file mode 100644 index 0000000..4bd2502 --- /dev/null +++ b/test/test-pages/breitbart/expected-images.json @@ -0,0 +1 @@ +{"0":"http:\/\/media.breitbart.com\/media\/2016\/11\/GettyImages-621866810.jpg","1":"http:\/\/fakehost\/t\/assets\/i\/store\/dd\/bbs_fullstore-takeover-mobile.gif","2":"http:\/\/media.breitbart.com\/media\/2016\/11\/GettyImages-621866810-640x480.jpg","3":"http:\/\/distroscale.s3.amazonaws.com\/uploads\/apps.distroscale.com\/4df7ede9d2a024f5\/Breitbart-LOGO.jpg","4":"http:\/\/img2.zergnet.com\/1423529_300.jpg","5":"http:\/\/img2.zergnet.com\/1381153_300.jpg","6":"http:\/\/img5.zergnet.com\/1394710_300.jpg","7":"http:\/\/fakehost\/t\/assets\/i\/on-air\/curtschilling-sidebar.jpg","8":"http:\/\/assets.newsinc.com\/10449_75X27.png?t=1485174240","9":"http:\/\/content.newsinc.com\/jpg\/320\/31887330\/49530028.jpg?t=1485174240","10":"http:\/\/assets.newsinc.com\/ap_logo_75x27.png?t=1485254340","11":"http:\/\/content.newsinc.com\/jpg\/499\/31887884\/49537140.jpg?t=1485254340","12":"http:\/\/assets.newsinc.com\/thehill_75x27.png?t=1485180060","13":"http:\/\/content.newsinc.com\/jpg\/2124\/31887786\/49535903.jpg?t=1485180060","27":"https:\/\/s.newsmaxfeednetwork.com\/static\/img\/newsmax_templates\/newsmax-powered-by.png","28":"http:\/\/media.breitbart.com\/media\/2017\/01\/Chelsea-Handler-Melania-Trump-2-Getty-200x150.jpg","29":"http:\/\/media.breitbart.com\/media\/2017\/01\/Steinem-200x150.jpg","30":"http:\/\/media.breitbart.com\/media\/2017\/01\/pre-natal-care-Getty-200x150.jpg","31":"http:\/\/media.breitbart.com\/media\/2017\/01\/Virgil-Donald-Trump-Andrew-Jackson-Getty-200x150.jpg","32":"http:\/\/media.breitbart.com\/media\/2017\/01\/DACA-Joe-Raedle-Getty-Images-200x150.jpg","33":"http:\/\/media.breitbart.com\/media\/2017\/01\/Pompeo-Grilled-AP-200x150.png","34":"http:\/\/media.breitbart.com\/media\/2017\/01\/Donald-Trump-Barack-Obama-January-20-2017-200x150.jpg","35":"http:\/\/media.breitbart.com\/media\/2016\/05\/Radio-Daily2-200x150.jpg","36":"http:\/\/media.breitbart.com\/media\/2017\/01\/man-uses-gun-silencer-suppressor-rifle-getty-200x150.jpg","37":"http:\/\/media.breitbart.com\/media\/2017\/01\/1-200x150.jpg","38":"http:\/\/media.breitbart.com\/media\/2017\/01\/James-P.-Hoffa-200x133.jpg","39":"http:\/\/media.breitbart.com\/media\/2016\/12\/Trump-Laredo-Police-2016-ap-200x150.jpg","40":"http:\/\/media.breitbart.com\/media\/2017\/01\/Heather-Lowrey-200x150.png","41":"http:\/\/media.breitbart.com\/media\/2017\/01\/Pro-Life-Protester-US-SCOTUS-2016-Getty-200x150.jpg","42":"http:\/\/media.breitbart.com\/media\/2015\/11\/Mitchell-1124-Thumbnail-200x150.jpg","43":"http:\/\/media.breitbart.com\/media\/2017\/01\/Sean-Spicer-At-First-Press-Conference-200x150.jpg","44":"http:\/\/media.breitbart.com\/media\/2016\/09\/Guardian-200x150.jpg","45":"http:\/\/media.breitbart.com\/media\/2017\/01\/AP_090224058041-200x150.jpg","46":"http:\/\/media.breitbart.com\/media\/2017\/01\/Trump-Brady-200x150.png","47":"http:\/\/media.breitbart.com\/media\/2017\/01\/MadonnaClarifies-200x150.jpg","48":"http:\/\/media.breitbart.com\/media\/2015\/09\/send-tip.jpg","49":"http:\/\/rudy.adsnative.com\/cm.gif"} \ No newline at end of file diff --git a/test/test-pages/bug-1255978/expected-images.json b/test/test-pages/bug-1255978/expected-images.json new file mode 100644 index 0000000..e15a44e --- /dev/null +++ b/test/test-pages/bug-1255978/expected-images.json @@ -0,0 +1 @@ +{"0":"https:\/\/static.independent.co.uk\/s3fs-public\/thumbnails\/image\/2015\/12\/06\/10\/bed-hotel-room.jpg","1":"http:\/\/fakehost\/sites\/all\/themes\/ines_themes\/independent_theme\/img\/facebook.png","2":"http:\/\/fakehost\/sites\/all\/themes\/ines_themes\/independent_theme\/img\/twitter.png","3":"http:\/\/fakehost\/sites\/all\/themes\/ines_themes\/independent_theme\/img\/whatsapp.png","4":"http:\/\/fakehost\/sites\/all\/themes\/ines_themes\/independent_theme\/img\/email.png","5":"http:\/\/fakehost\/sites\/all\/themes\/ines_themes\/ines_theme\/img\/blank.gif","6":"https:\/\/static.independent.co.uk\/s3fs-public\/styles\/article_small\/public\/thumbnails\/image\/2015\/12\/06\/10\/bed-hotel-room.jpg","7":"https:\/\/static.independent.co.uk\/s3fs-public\/styles\/story_medium\/public\/thumbnails\/image\/2014\/03\/18\/10\/bandb2.jpg","8":"https:\/\/static.independent.co.uk\/s3fs-public\/styles\/story_medium\/public\/thumbnails\/image\/2015\/05\/26\/11\/hotel-door-getty.jpg","9":"https:\/\/static.independent.co.uk\/s3fs-public\/styles\/story_medium\/public\/thumbnails\/image\/2013\/07\/31\/15\/luggage-3.jpg","10":"https:\/\/static.independent.co.uk\/s3fs-public\/styles\/story_medium\/public\/thumbnails\/image\/2015\/04\/13\/11\/Lifestyle-hotels.jpg","11":"https:\/\/static.independent.co.uk\/s3fs-public\/styles\/story_medium\/public\/thumbnails\/image\/2014\/03\/13\/16\/agenda7.jpg","17":"http:\/\/fakehost\/sites\/all\/themes\/ines_themes\/independent_theme\/img\/reuse.png","18":"https:\/\/sp.mdotlabs.com\/images\/p?c1=9&c2=6035250&t=abt&v=2.14&cms=0&jsl=1&abgp=0&abgb=obj_not_present&jrd=0&gac=0&ifb=no_addoc_frames&cn=&uid=19220369a1385aabcaba9dg1479202584&bl1=0:0:0&bl2=0:0:0&dnt=0&wc=0&ss=1436:877:24:24&ibv=ff32&et=1635&os=MacIntel&ps=TUraU8gr&cb=ZGteY5Ve&durl=http%3A%2F%2Fwww.independent.co.uk%2Fnews%2Fbusiness%2Fnews%2Fseven-secrets-that-hotel-owners-dont-want-you-to-know-10506160.html"} \ No newline at end of file diff --git a/test/test-pages/buzzfeed-1/expected-images.json b/test/test-pages/buzzfeed-1/expected-images.json new file mode 100644 index 0000000..bd6ca31 --- /dev/null +++ b/test/test-pages/buzzfeed-1/expected-images.json @@ -0,0 +1 @@ +{"0":"http:\/\/s3-static-ak.buzzfed.com\/static\/2015-04\/22\/5\/campaign_images\/webdr03\/student-dies-after-diet-pills-she-bought-online-b-2-28712-1429696299-24_dblbig.jpg","1":"http:\/\/s3-ak.buzzfeed.com\/static\/badge_images\/viral.png?v=201504241415","2":"http:\/\/ak-hdl.buzzfed.com\/static\/2015-04\/21\/4\/enhanced\/webdr10\/enhanced-6418-1429605460-16.jpg","3":"http:\/\/ak-hdl.buzzfed.com\/static\/2015-04\/21\/5\/enhanced\/webdr12\/grid-cell-2501-1429608056-15.jpg","4":"http:\/\/ak-hdl.buzzfed.com\/static\/2015-04\/21\/5\/enhanced\/webdr12\/grid-cell-2501-1429608057-18.jpg","5":"http:\/\/s3-static-ak.buzzfed.com\/static\/2015-04\/12\/10\/campaign_images\/webdr15\/the-bride-paradox-2-24817-1428850014-4_wide.jpg","6":"http:\/\/s3-static-ak.buzzfed.com\/static\/2015-04\/7\/15\/campaign_images\/webdr03\/rihanna-da-gawd-2-17340-1428435377-8_wide.jpg","7":"http:\/\/s3-static-ak.buzzfed.com\/static\/2015-04\/27\/7\/campaign_images\/webdr15\/this-is-why-streaking-during-a-rugby-match-is-nev-2-21907-1430135173-1_wide.jpg","8":"http:\/\/s3-static-ak.buzzfed.com\/static\/2015-04\/25\/15\/campaign_images\/webdr07\/people-in-nepal-are-letting-their-loved-ones-know-2-22144-1429990512-0_wide.jpg","9":"data:image\/gif;base64,R0lGODlhAQABAPAAAAAAAAAAACH5BAEAAAAALAAAAAABAAEAAAICRAEAOw==","13":"http:\/\/s3-ak.buzzfeed.com\/static\/2015-04\/22\/14\/campaign_images\/webdr15\/gone-girl-an-interview-with-an-american-in-isis-2-4632-1429727152-8_wide.jpg","14":"http:\/\/s3-static-ak.buzzfed.com\/static\/2015-04\/27\/9\/campaign_images\/webdr05\/ultra-patriotic-russian-biker-gang-refused-entry--2-17023-1430140685-30_wide.jpg","15":"http:\/\/s3-static-ak.buzzfed.com\/static\/2015-04\/27\/8\/campaign_images\/webdr07\/andrew-chan-marries-partner-a-day-before-execution-2-26101-1430136567-7_wide.jpg","16":"http:\/\/s3-ak.buzzfeed.com\/static\/2015-04\/27\/9\/campaign_images\/webdr02\/mormon-childless-and-constantly-condescended-to-2-8804-1430140118-12_wide.jpg","17":"http:\/\/s3-static-ak.buzzfed.com\/static\/2015-04\/25\/4\/campaign_images\/webdr11\/49-choses-que-ceux-qui-ont-grandi-avec-kd2a-noubl-2-19465-1429948902-10_wide.jpg","18":"#{img_src}","19":"data:image\/gif;base64,R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw=="} \ No newline at end of file diff --git a/test/test-pages/clean-links/expected-images.json b/test/test-pages/clean-links/expected-images.json new file mode 100644 index 0000000..7a85954 --- /dev/null +++ b/test/test-pages/clean-links/expected-images.json @@ -0,0 +1 @@ +{"0":"http:\/\/fakehost\/test\/hmhome.gif ","1":"http:\/\/fakehost\/test\/..\/bar.gif","3":"http:\/\/fakehost\/test\/..\/myhome.jpg"} \ No newline at end of file diff --git a/test/test-pages/cnet/expected-images.json b/test/test-pages/cnet/expected-images.json new file mode 100644 index 0000000..605a11f --- /dev/null +++ b/test/test-pages/cnet/expected-images.json @@ -0,0 +1 @@ +["https:\/\/cnet3.cbsistatic.com\/img\/1JaRRjqhoGxDVkFxTRRWkZgyK2Q=\/670x503\/2014\/03\/21\/863df5d9-e8b8-4b38-851b-5e3f77f2cf0e\/mark-zuckerberg-facebook-home-10671610x407.jpg","https:\/\/cnet4.cbsistatic.com\/img\/GKAekmXh44rNOqaTl91J3WOQrSU=\/70x70\/2013\/07\/29\/d7513a63-f8ee-11e2-8c7c-d4ae52e62bcc\/Steven_Musil2.jpg","https:\/\/cnet1.cbsistatic.com\/img\/nAMdBzIE1ogVw5bOBZBaiJCt3Ro=\/570x0\/2014\/03\/21\/863df5d9-e8b8-4b38-851b-5e3f77f2cf0e\/mark-zuckerberg-facebook-home-10671610x407.jpg","https:\/\/d3qdfnco3bamip.cloudfront.net\/wjs\/v3.0.1484263742\/images\/down_arrow.png","data:image\/gif;base64,R0lGODlhAQABAAD\/ACwAAAAAAQABAAACADs=","https:\/\/cnet4.cbsistatic.com\/img\/nEQp5MZWOoB48gCDqN6BYecID1A=\/770x433\/2017\/01\/18\/7c9ebfcb-37d7-4155-871e-14e083fa6cbf\/ces17robotbattlebarista.jpg","http:\/\/dw.cbsi.com\/levt\/video\/e.gif?bitrate=0&blockcntry=&codec=&componentid=9f4fae40de2e11e6a3947d73dcb07f9e&contsessid=&device=type%3Adesktop%3Bos%3Aos%2520x%3Bver%3A10.11%3Bscreensz%3A1440x900&distntwrk=&encodeprfl=&event=init&eventdur=0&gestval=&ip=&mapp=UVPJS%3B1.7.2&medastid=0&medid=0&medlength=0&mednum=0&medrls=&medtime=0&medtitle=&medtype=0&mso=&pageurl=https%3A%2F%2Fwww.cnet.com%2Fnews%2Fzuckerberg-offers-peek-at-facebooks-acquisition-strategies%2F&part=cnet&playerembed=0&playersz=&playertime=0&qosnum=0&recommend=&relsessid=&sdlvrytype=&siteid=1&sponsored=&srchost=www.cnet.com&subj=&ts=1484820250362&ua=&ursuid=&usrbndwdth=0&v16=9ebac140-de2e-11e6-80be-03a3e80ab46f&v17=tech%20industry&v18=news%2Ctech-industry&v19=article&v20=ca76438a-4f9c-425f-bdb7-a69a7121cef0&v21=desktop%2520web&v22=&v23=cbsicnetglobalsite&v25=anon&v26=&videosz=&volume=0","http:\/\/dw.cbsi.com\/levt\/video\/e.gif?bitrate=0&blockcntry=&codec=&componentid=9f4fae40de2e11e6a3947d73dcb07f9e&contsessid=&device=type%3Adesktop%3Bos%3Aos%2520x%3Bver%3A10.11%3Bscreensz%3A1440x900&distntwrk=cnet&encodeprfl=&event=start&eventdur=0&gestval=paywall%3A0%3Bcaption_available%3A1&ip=&mapp=UVPJS%3B1.7.2&medastid=595&medid=&medlength=-1&mednum=1&medrls=&medtime=0&medtitle=&medtype=0&mso=&pageurl=https%3A%2F%2Fwww.cnet.com%2Fnews%2Fzuckerberg-offers-peek-at-facebooks-acquisition-strategies%2F&part=cnet&playerembed=0&playersz=0x0&playertime=0&qosnum=0&recommend=&relsessid=&sdlvrytype=3&siteid=1&sponsored=&srchost=www.cnet.com&subj=&ts=1484820250362&ua=&ursuid=&usrbndwdth=0&v16=9ebac140-de2e-11e6-80be-03a3e80ab46f&v17=tech%20industry&v18=news%2Ctech-industry&v19=article&v20=ca76438a-4f9c-425f-bdb7-a69a7121cef0&v21=desktop%2520web&v22=&v23=cbsicnetglobalsite&v25=anon&v26=&videosz=&volume=0&adastid=43&adbreak=&adid=259895529&adlength=15&adnum=1&adpod=0&adpodpos=1&adpos=pre&adtime=0&adtitle=7HF71V15SE-promos&adtype=1","http:\/\/dw.cbsi.com\/levt\/video\/e.gif?bitrate=0&blockcntry=&codec=&componentid=9f4fae40de2e11e6a3947d73dcb07f9e&contsessid=&device=type%3Adesktop%3Bos%3Aos%2520x%3Bver%3A10.11%3Bscreensz%3A1440x900&distntwrk=cnet&encodeprfl=&event=play&eventdur=0&gestval=&ip=&mapp=UVPJS%3B1.7.2&medastid=595&medid=&medlength=-1&mednum=1&medrls=&medtime=0&medtitle=&medtype=0&mso=&pageurl=https%3A%2F%2Fwww.cnet.com%2Fnews%2Fzuckerberg-offers-peek-at-facebooks-acquisition-strategies%2F&part=cnet&playerembed=0&playersz=0x0&playertime=7&qosnum=0&recommend=&relsessid=&sdlvrytype=3&siteid=1&sponsored=&srchost=www.cnet.com&subj=&ts=1484820250362&ua=&ursuid=&usrbndwdth=0&v16=9ebac140-de2e-11e6-80be-03a3e80ab46f&v17=tech%20industry&v18=news%2Ctech-industry&v19=article&v20=ca76438a-4f9c-425f-bdb7-a69a7121cef0&v21=desktop%2520web&v22=&v23=cbsicnetglobalsite&v25=anon&v26=&videosz=&volume=0&adastid=43&adbreak=&adid=259895529&adlength=15&adnum=1&adpod=0&adpodpos=1&adpos=pre&adtime=2&adtitle=7HF71V15SE-promos&adtype=1","http:\/\/dw.cbsi.com\/levt\/video\/e.gif?bitrate=0&blockcntry=&codec=&componentid=9f4fae40de2e11e6a3947d73dcb07f9e&contsessid=&device=type%3Adesktop%3Bos%3Aos%2520x%3Bver%3A10.11%3Bscreensz%3A1440x900&distntwrk=cnet&encodeprfl=&event=play&eventdur=0&gestval=&ip=&mapp=UVPJS%3B1.7.2&medastid=595&medid=&medlength=-1&mednum=1&medrls=&medtime=0&medtitle=&medtype=0&mso=&pageurl=https%3A%2F%2Fwww.cnet.com%2Fnews%2Fzuckerberg-offers-peek-at-facebooks-acquisition-strategies%2F&part=cnet&playerembed=0&playersz=0x0&playertime=11&qosnum=0&recommend=&relsessid=&sdlvrytype=3&siteid=1&sponsored=&srchost=www.cnet.com&subj=&ts=1484820250362&ua=&ursuid=&usrbndwdth=0&v16=9ebac140-de2e-11e6-80be-03a3e80ab46f&v17=tech%20industry&v18=news%2Ctech-industry&v19=article&v20=ca76438a-4f9c-425f-bdb7-a69a7121cef0&v21=desktop%2520web&v22=&v23=cbsicnetglobalsite&v25=anon&v26=&videosz=&volume=0&adastid=43&adbreak=&adid=259895529&adlength=15&adnum=1&adpod=0&adpodpos=1&adpos=pre&adtime=7&adtitle=7HF71V15SE-promos&adtype=1"] \ No newline at end of file diff --git a/test/test-pages/cnn/expected-images.json b/test/test-pages/cnn/expected-images.json new file mode 100644 index 0000000..cb7c061 --- /dev/null +++ b/test/test-pages/cnn/expected-images.json @@ -0,0 +1 @@ +["http:\/\/i2.cdn.turner.com\/money\/dam\/assets\/141103182938-income-inequality-780x439.png","http:\/\/i.cdn.turner.com\/money\/.element\/img\/8.0\/logos\/cnn-logo.png","http:\/\/i.cdn.turner.com\/money\/.element\/img\/8.0\/logos\/money-logo.png","http:\/\/i2.cdn.turner.com\/money\/dam\/assets\/170118130731-wilbur-ross-hearing-commerce-336x188.jpg","http:\/\/i2.cdn.turner.com\/money\/dam\/assets\/170123154121-sean-spicer-white-house-336x188.jpg","http:\/\/i2.cdn.turner.com\/money\/dam\/assets\/161213114825-microsoft-zo-ai-336x188.png","http:\/\/i2.cdn.turner.com\/money\/dam\/assets\/170124115244-gm-cerv-1-barrett-jackson-336x188.jpg","http:\/\/i.cdn.turner.com\/money\/.element\/img\/8.0\/misc\/icon-search-intl.png","https:\/\/smrt.as\/ck","http:\/\/i2.cdn.turner.com\/money\/dam\/assets\/170123170010-cfpb-citi-subsidiaries-124x70.jpg","http:\/\/i2.cdn.turner.com\/money\/dam\/assets\/170124063354-air-asia-x-124x70.jpg","http:\/\/i2.cdn.turner.com\/money\/dam\/assets\/170123114700-scottsdale-auctions-1963-jaguar-etype-124x70.jpg","http:\/\/widgets.lendingtree.com\/Content\/images\/white-logo.jpg","http:\/\/fakehost\/.element\/ssi\/partners\/indeed\/8.0\/indeed_blue.png","http:\/\/i.cdn.turner.com\/money\/.element\/img\/8.0\/newsletters\/beforethebell\/right-rail-sign-up-logo_2X.png","http:\/\/i.cdn.turner.com\/money\/.element\/img\/8.0\/newsletters\/beforethebell\/etrade-logo-2.png","http:\/\/i.cdn.turner.com\/money\/.element\/img\/8.0\/logos\/CNNMoney-logo.png","http:\/\/i.cdn.turner.com\/money\/video\/bvp\/images\/1.gif","http:\/\/i.cdn.turner.com\/money\/images\/1.gif","http:\/\/googleads.g.doubleclick.net\/pagead\/viewthroughconversion\/925133270\/?value=1.00¤cy_code=USD&label=OuhECLD29GcQ1tORuQM&guid=ON&script=0","http:\/\/bea4.v.fwmrm.net\/ad\/u?mode=echo&cr=http%3A%2F%2Fbeacon.krxd.net%2Fusermatch.gif%3Fpartner%3Dfreewheel%26partner_uid%3D%23%7Buser.id%7D"] \ No newline at end of file diff --git a/test/test-pages/comment-inside-script-parsing/expected-images.json b/test/test-pages/comment-inside-script-parsing/expected-images.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/test/test-pages/comment-inside-script-parsing/expected-images.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/test/test-pages/daringfireball-1/expected-images.json b/test/test-pages/daringfireball-1/expected-images.json new file mode 100644 index 0000000..3566bf3 --- /dev/null +++ b/test/test-pages/daringfireball-1/expected-images.json @@ -0,0 +1 @@ +["http:\/\/fakehost\/graphics\/author\/addison-bw-425.jpg"] \ No newline at end of file diff --git a/test/test-pages/ehow-1/expected-images.json b/test/test-pages/ehow-1/expected-images.json new file mode 100644 index 0000000..149298a --- /dev/null +++ b/test/test-pages/ehow-1/expected-images.json @@ -0,0 +1 @@ +["http:\/\/img-aws.ehowcdn.com\/200x200\/cme\/photography.prod.demandstudios.com\/16149374-814f-40bc-baf3-ca20f149f0ba.jpg","http:\/\/img-aws.ehowcdn.com\/640\/cme\/photography.prod.demandstudios.com\/16149374-814f-40bc-baf3-ca20f149f0ba.jpg","http:\/\/img-aws.ehowcdn.com\/default\/cme\/photography.prod.demandstudios.com\/fc249ef6-4d27-41b4-8c21-15f7a8512b50.jpg","http:\/\/img-aws.ehowcdn.com\/default\/cme\/photography.prod.demandstudios.com\/aae11d4d-a4aa-4251-a4d9-41023ebf6d84.jpg","http:\/\/img-aws.ehowcdn.com\/default\/cme\/photography.prod.demandstudios.com\/7afdfa1e-da74-44b5-b89c-ca8123516272.jpg","http:\/\/img-aws.ehowcdn.com\/default\/cme\/photography.prod.demandstudios.com\/4f6612c0-316a-4c74-bb03-cb4e778f6d72.jpg","http:\/\/img-aws.ehowcdn.com\/default\/cme\/photography.prod.demandstudios.com\/eeb1e0b4-e573-40a3-8db1-2c76f0b13b84.jpg","http:\/\/img-aws.ehowcdn.com\/default\/cme\/photography.prod.demandstudios.com\/812d4649-4152-4363-97c0-f181d02e709a.jpg","http:\/\/img-aws.ehowcdn.com\/default\/cme\/photography.prod.demandstudios.com\/0cb3988c-9318-47d6-bc9c-c798da1ede72.jpg","http:\/\/img-aws.ehowcdn.com\/640\/cme\/photography.prod.demandstudios.com\/e3e18f0b-ab2c-4ffb-9988-a1ea63faef8b.jpg","http:\/\/img-aws.ehowcdn.com\/640\/cme\/photography.prod.demandstudios.com\/2cd79f8d-0d16-4573-8861-e47fb74b0638.jpg","http:\/\/img-aws.ehowcdn.com\/640\/cme\/photography.prod.demandstudios.com\/78670312-8636-4c42-a75c-3029f7aa6c73.jpg","http:\/\/img-aws.ehowcdn.com\/200x133\/cme\/photography.prod.demandstudios.com\/7f06be69-3650-4e94-88d4-2289c1a5c6a2.jpg","http:\/\/img-aws.ehowcdn.com\/200x133\/cme\/uploadedimages.demandmedia\/tulips-1.jpg","http:\/\/img-aws.ehowcdn.com\/200x133\/cme\/uploadedimages.demandmedia\/kitchen-1.jpg","http:\/\/img-aws.ehowcdn.com\/300x200\/cme\/uploadedimages.demandmedia\/sunflowers-1.jpg","http:\/\/img-aws.ehowcdn.com\/150X100\/cme\/photography.prod.demandstudios.com\/90f9f3d7-6e4f-4be8-b379-3d994ccbf69c.jpg","http:\/\/img-aws.ehowcdn.com\/150X100\/viper\/media\/2b3830ca-4eb6-4bfd-a1cd-d430b3e28d59\/jpeg\/47b02568-2cde-44f4-a39c-494294e50ccd_1.jpg","http:\/\/img-aws.ehowcdn.com\/150X100\/ehow\/images\/a00\/09\/4u\/make-terrarium-800x800.jpg","http:\/\/img-aws.ehowcdn.com\/150X100\/viper\/media\/2b3830ca-4eb6-4bfd-a1cd-d430b3e28d59\/jpeg\/dc66dd37-78b6-4494-a8db-7bbe697cedda_0.jpg","http:\/\/img-aws.ehowcdn.com\/150X100\/viper\/media\/2b3830ca-4eb6-4bfd-a1cd-d430b3e28d59\/jpeg\/70ebb809-634d-41df-9fff-9b183f81e9e0_1.jpg","http:\/\/img-aws.ehowcdn.com\/150X100\/cme\/photography.prod.demandstudios.com\/2ecfca5a-1c35-4fbe-9ff5-8f5bd8129d97.jpg","http:\/\/img-aws.ehowcdn.com\/150X100\/ds-cdn-write\/upload\/image\/39\/4D\/34B73D38-EC9F-4415-A740-23DCF2F94D39\/34B73D38-EC9F-4415-A740-23DCF2F94D39.jpg","http:\/\/img-aws.ehowcdn.com\/150X100\/ehow-mom-blog-us\/files\/2015\/03\/easterbark0.jpg"] \ No newline at end of file diff --git a/test/test-pages/ehow-2/expected-images.json b/test/test-pages/ehow-2/expected-images.json new file mode 100644 index 0000000..020f4a2 --- /dev/null +++ b/test/test-pages/ehow-2/expected-images.json @@ -0,0 +1 @@ +{"0":"http:\/\/img-aws.ehowcdn.com\/200x200\/cme\/cme_public_images\/www_ehow_com\/cdn-write.demandstudios.com\/upload\/image\/2F\/86\/5547EF62-EAF5-4256-945D-0496F61C862F\/5547EF62-EAF5-4256-945D-0496F61C862F.jpg","1":"http:\/\/img-aws.ehowcdn.com\/60x60\/cme\/cme_public_images\/www_demandstudios_com\/sitelife.studiod.com\/ver1.0\/Content\/images\/store\/9\/2\/d9dd6f61-b183-4893-927f-5b540e45be91.Small.jpg","3":"http:\/\/img-aws.ehowcdn.com\/640\/cme\/cme_public_images\/www_ehow_com\/cdn-write.demandstudios.com\/upload\/image\/2F\/86\/5547EF62-EAF5-4256-945D-0496F61C862F\/5547EF62-EAF5-4256-945D-0496F61C862F.jpg","4":"http:\/\/img-aws.ehowcdn.com\/640\/cme\/cme_public_images\/www_ehow_com\/cdn-write.demandstudios.com\/upload\/image\/FE\/CB\/121569D2-6984-4B2F-83C4-9D2D9A27CBFE\/121569D2-6984-4B2F-83C4-9D2D9A27CBFE.jpg","5":"http:\/\/img-aws.ehowcdn.com\/640\/cme\/cme_public_images\/www_ehow_com\/cdn-write.demandstudios.com\/upload\/image\/DF\/FC\/A05B0252-BD73-4BC7-A09A-96F0A504FCDF\/A05B0252-BD73-4BC7-A09A-96F0A504FCDF.jpg","6":"http:\/\/img-aws.ehowcdn.com\/640\/cme\/cme_public_images\/www_ehow_com\/cdn-write.demandstudios.com\/upload\/image\/EB\/DB\/8A04CCA7-3255-4225-B59A-C41441F8DBEB\/8A04CCA7-3255-4225-B59A-C41441F8DBEB.jpg","7":"http:\/\/img-aws.ehowcdn.com\/640\/cme\/cme_public_images\/www_ehow_com\/cdn-write.demandstudios.com\/upload\/image\/94\/10\/08035476-0167-4A03-AADC-13A7E7AA1094\/08035476-0167-4A03-AADC-13A7E7AA1094.jpg","8":"http:\/\/img-aws.ehowcdn.com\/640\/cme\/cme_public_images\/www_ehow_com\/cdn-write.demandstudios.com\/upload\/image\/06\/49\/4AD62696-FC95-4DA2-8351-42740C7B4906\/4AD62696-FC95-4DA2-8351-42740C7B4906.jpg","9":"http:\/\/img-aws.ehowcdn.com\/640\/cme\/cme_public_images\/www_ehow_com\/cdn-write.demandstudios.com\/upload\/image\/D0\/51\/B6AED06C-5E19-4A26-9AAD-0E175F6251D0\/B6AED06C-5E19-4A26-9AAD-0E175F6251D0.jpg","10":"http:\/\/img-aws.ehowcdn.com\/640\/cme\/cme_public_images\/www_ehow_com\/cdn-write.demandstudios.com\/upload\/image\/35\/B4\/DD5FD05A-B631-4AFE-BC8F-FDACAD1EB435\/DD5FD05A-B631-4AFE-BC8F-FDACAD1EB435.jpg","11":"http:\/\/img-aws.ehowcdn.com\/640\/cme\/cme_public_images\/www_ehow_com\/cdn-write.demandstudios.com\/upload\/image\/A1\/FA\/2C368B34-8F6A-45F6-9DFC-0B0C4E33FAA1\/2C368B34-8F6A-45F6-9DFC-0B0C4E33FAA1.jpg","12":"http:\/\/img-aws.ehowcdn.com\/300x200\/cme\/photography.prod.demandstudios.com\/082b04c2-3114-44fb-8be6-4e0e84e9b045.jpg","13":"http:\/\/img-aws.ehowcdn.com\/150X100\/cme\/cme_public_images\/www_ehow_com\/photos.demandstudios.com\/getty\/article\/103\/89\/dv1850012_XS.jpg","14":"http:\/\/img-aws.ehowcdn.com\/150X100\/cme\/cme_public_images\/www_ehow_com\/i.ehow.com\/images\/a05\/1n\/b8\/etiquette-high-school-graduation-party-800x800.jpg","15":"http:\/\/img-aws.ehowcdn.com\/150X100\/cme\/photography.prod.demandstudios.com\/33c61293-ddba-4178-ae65-5eb82bab4a0c.jpg","16":"http:\/\/img-aws.ehowcdn.com\/150X100\/cme\/cme_public_images\/www_ehow_com\/i.ehow.com\/images\/a02\/39\/hc\/throw-upscale-party-budget-800x800.jpg","17":"http:\/\/img-aws.ehowcdn.com\/150X100\/cme\/cme_public_images\/www_ehow_com\/photos.demandstudios.com\/getty\/article\/190\/137\/452190602_XS.jpg","18":"http:\/\/img-aws.ehowcdn.com\/150X100\/cme\/cme_public_images\/www_ehow_com\/home-ehow-com.blog.ehow.com\/files\/2013\/05\/graduation_ceremony1.jpg","19":"http:\/\/d.agkn.com\/pixel\/7698\/?che=005022400148404245675981&atr=HolidaysCelebrations,MoreHolidaysCelebrationsHolidays"} \ No newline at end of file diff --git a/test/test-pages/embedded-videos/expected-images.json b/test/test-pages/embedded-videos/expected-images.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/test/test-pages/embedded-videos/expected-images.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/test/test-pages/gmw/expected-images.json b/test/test-pages/gmw/expected-images.json new file mode 100644 index 0000000..5a00940 --- /dev/null +++ b/test/test-pages/gmw/expected-images.json @@ -0,0 +1 @@ +{"0":"http:\/\/img.gmw.cn\/pic\/Logo.png","1":"http:\/\/imgtech.gmw.cn\/attachement\/jpg\/site2\/20170310\/448a5bc1e2861a2c4e5929.jpg","2":"http:\/\/imgtech.gmw.cn\/attachement\/jpg\/site2\/20170310\/448a5bc1e2861a2c4e592a.jpg","3":"http:\/\/imgtech.gmw.cn\/attachement\/jpg\/site2\/20170310\/448a5bc1e2861a2c4e592d.jpg","4":"https:\/\/img.gmw.cn\/pic\/content_logo.png","5":"http:\/\/img.gmw.cn\/pic\/gmwLogo_share.jpg","6":"http:\/\/ad.gmw.cn\/content_banner\/content_650x120_qrcodes.jpg","7":"http:\/\/static.bshare.cn\/frame\/images\/\/forground.gif","10":"http:\/\/static.bshare.cn\/frame\/images\/\/more.gif","11":"http:\/\/imgp.gmw.cn\/motionsimg\/0\/m1.gif","12":"http:\/\/imgp.gmw.cn\/motionsimg\/0\/m2.gif","13":"http:\/\/imgp.gmw.cn\/motionsimg\/0\/m3.gif","14":"http:\/\/imgp.gmw.cn\/motionsimg\/0\/m4.gif","15":"http:\/\/imgp.gmw.cn\/motionsimg\/0\/m5.gif","16":"http:\/\/imgp.gmw.cn\/motionsimg\/0\/m6.gif","17":"http:\/\/imgp.gmw.cn\/motionsimg\/0\/m7.gif","18":"http:\/\/imgp.gmw.cn\/motionsimg\/0\/m8.gif","19":"http:\/\/afp.alicdn.com\/afp-creative\/creative\/u113716014\/8fffe82208528de01df9dd6c686cdfed.jpg","20":"http:\/\/img.gmw.cn\/pic\/skypelogo.jpg","21":"http:\/\/imgtech.gmw.cn\/attachement\/gif\/site2\/20170228\/002564a5d1f31a1f298305.gif","22":"http:\/\/imgtech.gmw.cn\/attachement\/gif\/site2\/20170228\/002564a5d1f31a1f294604.gif","23":"http:\/\/imgtech.gmw.cn\/attachement\/gif\/site2\/20170228\/002564a5d1f31a1f291e03.gif","24":"http:\/\/imgtech.gmw.cn\/attachement\/gif\/site2\/20170228\/002564a5d1f31a1f28ea02.gif","25":"http:\/\/imgtech.gmw.cn\/attachement\/jpg\/site2\/20161010\/448a5b7c82a419653f8e01.jpg","26":"http:\/\/imgtech.gmw.cn\/attachement\/jpg\/site2\/20161010\/448a5b7c82a4196542d504.jpg","27":"http:\/\/imgtech.gmw.cn\/attachement\/jpg\/site2\/20161010\/448a5b7c82a41965427203.jpg","28":"http:\/\/imgtech.gmw.cn\/attachement\/jpg\/site2\/20161010\/448a5b7c82a41965411e02.jpg","29":"http:\/\/atanx.alicdn.com\/t\/img\/TB1tWvVJFXXXXc_aXXXXXXXXXXX-40-26.png","30":"http:\/\/atanx.alicdn.com\/t\/img\/TB1upAiJXXXXXa5aXXXXXXXXXXX-116-30.png","31":"http:\/\/imgm.gmw.cn\/images\/index_QRCode_btn_close.png","32":"http:\/\/imgm.gmw.cn\/images\/gmrb_logo_white_180x180.png","33":"http:\/\/afp.alicdn.com\/afp-creative\/creative\/u113716014\/37c9f0d8c68fe9458ff53caee567332f.jpg"} \ No newline at end of file diff --git a/test/test-pages/heise/expected-images.json b/test/test-pages/heise/expected-images.json new file mode 100644 index 0000000..8d10aa2 --- /dev/null +++ b/test/test-pages/heise/expected-images.json @@ -0,0 +1 @@ +["http:\/\/www.heise.de\/imgs\/18\/1\/4\/6\/2\/3\/5\/1\/Barcode-Scanner-With-Border-f0c62350bd8d9d96.jpeg","http:\/\/3.f.ix.de\/mac-and-i\/icons\/heise_mac-and-i_logo.gif","http:\/\/1.f.ix.de\/icons\/ho\/navi_icon_twitter_big.png","http:\/\/3.f.ix.de\/icons\/ho\/navi_icon_facebook_big.png","http:\/\/3.f.ix.de\/icons\/ho\/navi_icon_google_big.png","http:\/\/3.f.ix.de\/icons\/ho\/navi_icon_youtube_black_big.png","http:\/\/2.f.ix.de\/mac-and-i\/icons\/navi_icon_apps.png","http:\/\/2.f.ix.de\/icons\/ho\/navi_icon_mobi_mac.png","http:\/\/2.f.ix.de\/icons\/ho\/navi_icon_ipad.png","http:\/\/3.f.ix.de\/scale\/geometry\/600\/q75\/imgs\/18\/1\/4\/6\/2\/3\/5\/1\/Barcode-Scanner-With-Border-fc08c913da5cea5d.jpeg","http:\/\/1.f.ix.de\/mac-and-i\/imgs\/65\/1\/3\/7\/0\/9\/3\/7\/mac-2015-02-92ccee0374aa9def.jpeg","http:\/\/1.f.ix.de\/mac-and-i\/imgs\/65\/1\/3\/8\/3\/2\/3\/7\/Titel-97485b58f23ceb92.png","http:\/\/2.f.ix.de\/mac-and-i\/imgs\/65\/1\/4\/5\/5\/6\/0\/2\/alto_heise_hp_tour_mpu__20150324-e4f5620ffc9f3323.jpg","http:\/\/fakehost\/avw-bin\/ivw\/CP\/barfoo\/ho\/2585134\/0.gif?d=1029284408","http:\/\/1.f.ix.de\/scale\/geometry\/160x90\/q75\/imgs\/18\/1\/4\/6\/2\/8\/0\/3\/urn-newsml-dpa-com-20090101-141017-99-01179_large_4_3-c0577d3c5419b335.jpeg","http:\/\/3.f.ix.de\/scale\/geometry\/160x90\/q75\/imgs\/18\/1\/4\/6\/2\/4\/2\/4\/1024px-Carly_Fiorina_by_Gage_Skidmore-e15691fffb93dd28.jpeg","http:\/\/2.f.ix.de\/scale\/geometry\/160x90\/q75\/imgs\/18\/1\/4\/6\/2\/8\/1\/7\/apple_watch_liefer1-ecb875926df4a41b.jpeg","http:\/\/3.f.ix.de\/scale\/geometry\/160x90\/q75\/imgs\/18\/1\/4\/6\/2\/8\/1\/5\/screen800x500-ed2bb768f9ab35c2.jpeg","http:\/\/3.f.ix.de\/mac-and-i\/imgs\/65\/1\/4\/1\/5\/1\/2\/2\/Quiz-Frage-5b1e2e989f5a3cc9.png","http:\/\/3.f.ix.de\/scale\/geometry\/160x90\/q75\/mac-and-i\/imgs\/65\/1\/4\/5\/2\/5\/6\/5\/Screen_Shot-4a5e2819f374a051.jpeg","http:\/\/1.f.ix.de\/scale\/geometry\/160x90\/q75\/mac-and-i\/imgs\/65\/1\/4\/4\/0\/1\/2\/3\/MacBook-mit-SSD-breit-d974d1a97ebb442b.png","http:\/\/2.f.ix.de\/mac-and-i\/imgs\/65\/1\/4\/2\/2\/6\/3\/4\/Voransicht-8552184f61c1fab3.jpeg","http:\/\/3.f.ix.de\/scale\/geometry\/160x90\/q75\/mac-and-i\/imgs\/65\/1\/3\/7\/4\/7\/9\/6\/MacMiniDiagonal-ddc35b43efc0f9d5-f08e367febe26a99.jpeg","http:\/\/2.f.ix.de\/mac-and-i\/imgs\/65\/1\/1\/8\/9\/2\/5\/6\/14_auf_iPad_Air-32d65b8788224ed5.png","http:\/\/fakehost\/ivw-bin\/ivw\/CP\/mac-and-i\/meldung\/1Password-fuer-Mac-generiert-Einmal-Passwoerter-2596987.html?url=%2Fmac-and-i%2Fmeldung%2F1Password-fuer-Mac-generiert-Einmal-Passwoerter-2596987.html","http:\/\/heise.met.vgwort.de\/na\/c9016af5c9984399bf413fa66e5ba786"] \ No newline at end of file diff --git a/test/test-pages/herald-sun-1/expected-images.json b/test/test-pages/herald-sun-1/expected-images.json new file mode 100644 index 0000000..33bdd24 --- /dev/null +++ b/test/test-pages/herald-sun-1/expected-images.json @@ -0,0 +1 @@ +["http:\/\/api.news.com.au\/content\/1.0\/heraldsun\/images\/1227261885862?format=jpg&group=iphone&size=medium","http:\/\/tcog.news.com.au\/track\/news\/content\/v1\/?category=\/section\/heraldsun.com.au\/collection\/popular-content\/all\/24hours\/&format=module&t_product=most-popular-mobile&maxRelated=5&t_template=popular-plain&origin=omniture&domain=heraldsun.com.au"] \ No newline at end of file diff --git a/test/test-pages/iab-1/expected-images.json b/test/test-pages/iab-1/expected-images.json new file mode 100644 index 0000000..91b2b56 --- /dev/null +++ b/test/test-pages/iab-1/expected-images.json @@ -0,0 +1 @@ +{"0":"http:\/\/www.iab.com\/wp-content\/uploads\/2015\/10\/getting-lean-with-digital-ad-ux-300x250.jpg","1":"http:\/\/www.iab.com\/wp-content\/uploads\/2015\/10\/getting-lean-with-digital-ad-ux-2-1000x305.jpg","3":"http:\/\/www.iab.com\/wp-content\/uploads\/2015\/05\/auto-draft-16-150x150.jpg"} \ No newline at end of file diff --git a/test/test-pages/ietf-1/expected-images.json b/test/test-pages/ietf-1/expected-images.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/test/test-pages/ietf-1/expected-images.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/test/test-pages/keep-images/expected-images.json b/test/test-pages/keep-images/expected-images.json new file mode 100644 index 0000000..65213ea --- /dev/null +++ b/test/test-pages/keep-images/expected-images.json @@ -0,0 +1 @@ +{"0":"https:\/\/d262ilb51hltx0.cloudfront.net\/max\/800\/1*sLDnS1UWEFIS33uLMxq3cw.jpeg","1":"https:\/\/d262ilb51hltx0.cloudfront.net\/fit\/c\/32\/32\/1*VN9AINEbi2CS8bHWeu80GQ.png","2":"https:\/\/d262ilb51hltx0.cloudfront.net\/fit\/c\/280\/36\/1*CBYh7ADvcY9Z-WYUrJN2Gw.png?source=logoAvatar-lo_69de9372095b-d16afa0ae7c","4":"https:\/\/d262ilb51hltx0.cloudfront.net\/fit\/c\/32\/32\/0*caZ172e36yu_pElb.png","5":"https:\/\/d262ilb51hltx0.cloudfront.net\/max\/2000\/1*sLDnS1UWEFIS33uLMxq3cw.jpeg","6":"https:\/\/d262ilb51hltx0.cloudfront.net\/max\/800\/1*3vIhkoHIzcxvUdijoCVx6w.png","7":"https:\/\/d262ilb51hltx0.cloudfront.net\/max\/2000\/1*4gN1-fzOwCniw-DbqQjDeQ.jpeg","8":"https:\/\/d262ilb51hltx0.cloudfront.net\/max\/2000\/1*2KPmZkIBUrhps-2uwDvYFQ.jpeg","9":"https:\/\/d262ilb51hltx0.cloudfront.net\/max\/2000\/1*PU40bbbox2Ompc5I3RE99A.jpeg","10":"https:\/\/d262ilb51hltx0.cloudfront.net\/max\/800\/1*ohyycinH18fz98TCyUzVgQ.png","11":"https:\/\/d262ilb51hltx0.cloudfront.net\/max\/2000\/1*mKvUNOAVQxl6atCbxbCZsg.jpeg","12":"https:\/\/d262ilb51hltx0.cloudfront.net\/max\/2000\/1*knT10_FNVUmqQIBLnutmzQ.jpeg","14":"https:\/\/d262ilb51hltx0.cloudfront.net\/max\/2000\/1*Vr61dyCTRwk6CemmVF8YAQ.jpeg","15":"https:\/\/d262ilb51hltx0.cloudfront.net\/max\/2000\/1*a-1_13xE6_ErQ-QSlz6myw.jpeg","17":"https:\/\/d262ilb51hltx0.cloudfront.net\/max\/2000\/1*IWXhtSsVv0gNnCwnDEXk-Q.jpeg","18":"https:\/\/d262ilb51hltx0.cloudfront.net\/max\/2000\/1*NGcrjfkV0l37iQH2uyYjEw.jpeg","19":"https:\/\/d262ilb51hltx0.cloudfront.net\/max\/2000\/1*WRlKt3q3mt7utmwxcbl3sQ.jpeg","20":"https:\/\/d262ilb51hltx0.cloudfront.net\/max\/800\/1*320_4I0lxbn5x3bx4XPI5Q.png","21":"https:\/\/d262ilb51hltx0.cloudfront.net\/fit\/c\/60\/60\/1*VN9AINEbi2CS8bHWeu80GQ.png","22":"https:\/\/d262ilb51hltx0.cloudfront.net\/fit\/c\/60\/60\/0*caZ172e36yu_pElb.png"} \ No newline at end of file diff --git a/test/test-pages/lemonde-1/expected-images.json b/test/test-pages/lemonde-1/expected-images.json new file mode 100644 index 0000000..97a38e7 --- /dev/null +++ b/test/test-pages/lemonde-1/expected-images.json @@ -0,0 +1 @@ +["http:\/\/s1.lemde.fr\/image\/2015\/05\/05\/600x315\/4628128_3_47fc_projet-de-loi-renseignement_aeba800424730d672d1bd08faf203438.jpg","data:image\/gif;base64,R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw=="] \ No newline at end of file diff --git a/test/test-pages/liberation-1/expected-images.json b/test/test-pages/liberation-1/expected-images.json new file mode 100644 index 0000000..cd3c954 --- /dev/null +++ b/test/test-pages/liberation-1/expected-images.json @@ -0,0 +1 @@ +["http:\/\/md1.libe.com\/photo\/755923-000_hkg10175905.jpg?modified_at=1430371146&width=750"] \ No newline at end of file diff --git a/test/test-pages/lifehacker-post-comment-load/expected-images.json b/test/test-pages/lifehacker-post-comment-load/expected-images.json new file mode 100644 index 0000000..cce3650 --- /dev/null +++ b/test/test-pages/lifehacker-post-comment-load/expected-images.json @@ -0,0 +1 @@ +{"0":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--hqqO9fze--\/n1s6c2m6kc07iqdyllj6.jpg","1":"http:\/\/x.kinja-static.com\/assets\/images\/logos\/navbar\/logo-lifehacker.png","2":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--1YlShHHH--\/c_fill,fl_progressive,g_north,h_180,q_80,w_320\/rm2n6mfzgsokjaqqiohy.png","3":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--eJ7nuZsO--\/c_fill,fl_progressive,g_center,h_180,q_80,w_320\/wuhmkpql6xdxuu0xnmc5.jpg","4":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--D5VWh239--\/c_fill,fl_progressive,g_center,h_180,q_80,w_320\/cuu0eau8uoxgunp37kgb.jpg","5":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--xcx_Hkf8--\/c_fill,fl_progressive,g_center,h_180,q_80,w_320\/qqwf5kig4pevcjrprzie.jpg","6":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--TBjOdHcL--\/c_fit,fl_progressive,q_80,w_600\/bwiqhbo45ulldozwchb0.png","7":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--e6LwIJS6--\/c_fill,fl_progressive,g_center,h_80,q_80,w_80\/gzonxhes6bl4p9k4gzpl.jpg","9":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--QeUTCiuW--\/o4dpyrcbiqyfrc3bxx6p.jpg","10":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--9a3fSAiv--\/c_fill,fl_progressive,g_center,h_77,q_80,w_137\/18l0k4fog1iixjpg.jpg","11":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--ZasswoU_--\/c_fill,fl_progressive,g_center,h_77,q_80,w_137\/1888vgzhqw1whjpg.jpg","12":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--mlR3Ku0_--\/xrhkwleyurcizy4akiae.jpg","13":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--fN3JVNsL--\/c_fill,fl_progressive,g_center,h_77,q_80,w_137\/196y8tsoeyd5njpg.jpg","14":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--Tacb0tyW--\/qodag11euf2npkawkn9v.jpg","15":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--x9hLbIKJ--\/imfc9ybqfw0jmztbhfrh.jpg","16":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--6NwBgQLy--\/afy7n45jfvsjdmmhonct.jpg","17":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--ciqk42G0--\/s3pq8vjrvyjgne4lfsod.jpg","18":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--mtob1sjR--\/y2ldv5eufb3jcrtfouye.jpg","19":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--tAWrlvyk--\/c_fill,fl_progressive,g_center,h_77,q_80,w_137\/17qctz7rnypm1jpg.jpg","20":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--NdSICOre--\/c_fill,fl_progressive,g_center,h_77,q_80,w_137\/18ixwh1tdragspng.png","21":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--mPo7bScD--\/c_fill,fl_progressive,g_center,h_77,q_80,w_137\/18iy6n7z0n7qnjpg.jpg","22":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--4Ajak63w--\/atb9qm07fvvg7hqkumkw.jpg","23":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--8HWYGx2u--\/c_fill,fl_progressive,g_center,h_77,q_80,w_137\/hf8qe29n5uww7y0k8l9d.jpg","24":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/c_fill,fl_progressive,g_center,h_80,q_80,w_80\/17jcxnuqmm7depng.png","25":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/c_fill,fl_progressive,g_center,h_80,q_80,w_80\/gzonxhes6bl4p9k4gzpl.jpg","27":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/c_fill,fl_progressive,g_center,h_80,q_80,w_80\/yntem4p7h8vpkry50vxr.jpg","29":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/c_fill,fl_progressive,g_center,h_80,q_80,w_80\/17jcxjgw9pi83png.png","32":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/c_fill,fl_progressive,g_center,h_80,q_80,w_80\/17r5ww2lnwctgjpg.jpg","36":"http:\/\/kinja.com\/api\/analytics\/stats\/track.gif?&objectType=permalinkTag&objectType=permalinkTag&objectType=permalinkTag&objectType=permalinkTag&objectType=permalinkTag&objectType=permalinkTag&objectType=permalinkTag&objectType=permalinkTag&objectType=permalinkTag&objectType=post&objectId=1183447227&objectId=1183572883&objectId=1183447223&objectId=226344621&objectId=1203987700&objectId=1217291859&objectId=450629315&objectId=1178784105&objectId=1679306583&objectId=1690268064&blogId=17&authorId=602884910","37":"http:\/\/secure-us.imrworldwide.com\/cgi-bin\/m?ci=us-803450h&cg=0&cc=1&si=http%3A%2F%2Flifehacker.com%2Fhow-to-program-your-mind-to-stop-buying-crap-you-don-t-1690268064&rp=&ts=compact&rnd=1427305231910","38":"http:\/\/pubads.g.doubleclick.net\/activity;dc_iu=\/4246\/DFP_Audience_Pixel;dc_seg=22540930;blog=lifehacker;ord=463536460263.444;postId=1690268064;tags=money,shopping,saving%20money,personal%20finances,mind%20hacks,habits,budget,psychology,editors%20picks?","39":"http:\/\/pubads.g.doubleclick.net\/activity;dc_iu=\/4246\/DFP_Audience_Pixel;dc_seg=23702290;blog=lifehacker;ord=5586835666437.667;postId=1690268064;tags=money,shopping,saving%20money,personal%20finances,mind%20hacks,habits,budget,psychology,editors%20picks;refer=?"} \ No newline at end of file diff --git a/test/test-pages/lifehacker-working/expected-images.json b/test/test-pages/lifehacker-working/expected-images.json new file mode 100644 index 0000000..ddeefa3 --- /dev/null +++ b/test/test-pages/lifehacker-working/expected-images.json @@ -0,0 +1 @@ +{"0":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--hqqO9fze--\/n1s6c2m6kc07iqdyllj6.jpg","1":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--1YlShHHH--\/c_fill,fl_progressive,g_north,h_180,q_80,w_320\/rm2n6mfzgsokjaqqiohy.png","2":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--eJ7nuZsO--\/c_fill,fl_progressive,g_center,h_180,q_80,w_320\/wuhmkpql6xdxuu0xnmc5.jpg","3":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--D5VWh239--\/c_fill,fl_progressive,g_center,h_180,q_80,w_320\/cuu0eau8uoxgunp37kgb.jpg","4":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--xcx_Hkf8--\/c_fill,fl_progressive,g_center,h_180,q_80,w_320\/qqwf5kig4pevcjrprzie.jpg","5":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--TBjOdHcL--\/c_fit,fl_progressive,q_80,w_600\/bwiqhbo45ulldozwchb0.png","6":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--e6LwIJS6--\/c_fill,fl_progressive,g_center,h_80,q_80,w_80\/gzonxhes6bl4p9k4gzpl.jpg","8":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--QeUTCiuW--\/o4dpyrcbiqyfrc3bxx6p.jpg","9":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--9a3fSAiv--\/c_fill,fl_progressive,g_center,h_77,q_80,w_137\/18l0k4fog1iixjpg.jpg","10":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--ZasswoU_--\/c_fill,fl_progressive,g_center,h_77,q_80,w_137\/1888vgzhqw1whjpg.jpg","11":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--mlR3Ku0_--\/xrhkwleyurcizy4akiae.jpg","12":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--fN3JVNsL--\/c_fill,fl_progressive,g_center,h_77,q_80,w_137\/196y8tsoeyd5njpg.jpg","13":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--Tacb0tyW--\/qodag11euf2npkawkn9v.jpg","14":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--x9hLbIKJ--\/imfc9ybqfw0jmztbhfrh.jpg","15":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--6NwBgQLy--\/afy7n45jfvsjdmmhonct.jpg","16":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--ciqk42G0--\/s3pq8vjrvyjgne4lfsod.jpg","17":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--mtob1sjR--\/y2ldv5eufb3jcrtfouye.jpg","18":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--tAWrlvyk--\/c_fill,fl_progressive,g_center,h_77,q_80,w_137\/17qctz7rnypm1jpg.jpg","19":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--NdSICOre--\/c_fill,fl_progressive,g_center,h_77,q_80,w_137\/18ixwh1tdragspng.png","20":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--mPo7bScD--\/c_fill,fl_progressive,g_center,h_77,q_80,w_137\/18iy6n7z0n7qnjpg.jpg","21":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--4Ajak63w--\/atb9qm07fvvg7hqkumkw.jpg","22":"http:\/\/i.kinja-img.com\/gawker-media\/image\/upload\/s--8HWYGx2u--\/c_fill,fl_progressive,g_center,h_77,q_80,w_137\/hf8qe29n5uww7y0k8l9d.jpg"} \ No newline at end of file diff --git a/test/test-pages/links-in-tables/expected-images.json b/test/test-pages/links-in-tables/expected-images.json new file mode 100644 index 0000000..1d81db6 --- /dev/null +++ b/test/test-pages/links-in-tables/expected-images.json @@ -0,0 +1 @@ +["https:\/\/2.bp.blogspot.com\/-chCZZinlUTg\/WEcxvJo9gdI\/AAAAAAAADnk\/3ND_BspqN6Y2j5xxkLFW3RyS2Ig0NHZpQCLcB\/w1200-h630-p-k-nu\/ipsum-opsum.gif","https:\/\/2.bp.blogspot.com\/-chCZZinlUTg\/WEcxvJo9gdI\/AAAAAAAADnk\/3ND_BspqN6Y2j5xxkLFW3RyS2Ig0NHZpQCLcB\/s640\/ipsum-opsum.gif","https:\/\/2.bp.blogspot.com\/-5aRh1dM6Unc\/WEcNs55RGhI\/AAAAAAAADnI\/tzr_oOJjZwgWd9Vu25ydY0UwB3eXKupXwCLcB\/s200\/image01.png"] \ No newline at end of file diff --git a/test/test-pages/lwn-1/expected-images.json b/test/test-pages/lwn-1/expected-images.json new file mode 100644 index 0000000..226193a --- /dev/null +++ b/test/test-pages/lwn-1/expected-images.json @@ -0,0 +1 @@ +["http:\/\/fakehost\/images\/lcorner.png","http:\/\/fakehost\/images\/2015\/03-qgis-map-sm.png","http:\/\/fakehost\/images\/2015\/03-qgis-query-sm.png","http:\/\/fakehost\/images\/2015\/03-qgis-simplify-sm.png","http:\/\/fakehost\/images\/2015\/03-qgis-style-sm.png"] \ No newline at end of file diff --git a/test/test-pages/medium-1/expected-images.json b/test/test-pages/medium-1/expected-images.json new file mode 100644 index 0000000..d899891 --- /dev/null +++ b/test/test-pages/medium-1/expected-images.json @@ -0,0 +1 @@ +["https:\/\/d262ilb51hltx0.cloudfront.net\/max\/800\/1*oBWUXtszDsiv_-Qq2bFLTQ.png","https:\/\/d262ilb51hltx0.cloudfront.net\/max\/2000\/1*oBWUXtszDsiv_-Qq2bFLTQ.png","https:\/\/d262ilb51hltx0.cloudfront.net\/max\/800\/1*AzYWbe4cZkMMEUbfRjysLQ.png","https:\/\/d262ilb51hltx0.cloudfront.net\/max\/800\/1*d0Hp6KlzyIcGHcL6to1sYQ.png","https:\/\/d262ilb51hltx0.cloudfront.net\/max\/800\/1*_9KYIFrk_PqWFgptsMDeww.png","https:\/\/d262ilb51hltx0.cloudfront.net\/max\/800\/1*Vh2MpQjqjPkzYJaaWExoVg.png","https:\/\/d262ilb51hltx0.cloudfront.net\/max\/800\/1*2Ln_DmC95Xpz6LzgywkcFQ.png","https:\/\/d262ilb51hltx0.cloudfront.net\/max\/1200\/1*Zz5haO6iz7Hlj0z2IUHulg.png","https:\/\/d262ilb51hltx0.cloudfront.net\/max\/800\/1*bXaR_NBJdoHpRc8lUWSsow.png","https:\/\/d262ilb51hltx0.cloudfront.net\/max\/800\/1*lulfisQxgSQ209vPHMAifg.png"] \ No newline at end of file diff --git a/test/test-pages/medium-2/expected-images.json b/test/test-pages/medium-2/expected-images.json new file mode 100644 index 0000000..ece26bc --- /dev/null +++ b/test/test-pages/medium-2/expected-images.json @@ -0,0 +1 @@ +["https:\/\/d262ilb51hltx0.cloudfront.net\/max\/1600\/1*eR_J8DurqygbhrwDg-WPnQ.png"] \ No newline at end of file diff --git a/test/test-pages/medium-3/expected-images.json b/test/test-pages/medium-3/expected-images.json new file mode 100644 index 0000000..dad3ca5 --- /dev/null +++ b/test/test-pages/medium-3/expected-images.json @@ -0,0 +1 @@ +["https:\/\/cdn-images-1.medium.com\/max\/1200\/1*kbPh7V97eyRodSOw2-ALDw.png","https:\/\/cdn-images-1.medium.com\/freeze\/max\/60\/1*kbPh7V97eyRodSOw2-ALDw.png?q=20","https:\/\/cdn-images-1.medium.com\/freeze\/max\/60\/1*Fpb2Bvdx7Q-688vdm-NdkQ.png?q=20","https:\/\/cdn-images-1.medium.com\/freeze\/max\/60\/1*lvOySry5gHHJfGU_bQXrzA.png?q=20","https:\/\/cdn-images-1.medium.com\/fit\/c\/80\/80\/0*qPHQu8WqsC6cV_ud.jpg"] \ No newline at end of file diff --git a/test/test-pages/missing-paragraphs/expected-images.json b/test/test-pages/missing-paragraphs/expected-images.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/test/test-pages/missing-paragraphs/expected-images.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/test/test-pages/mozilla-1/expected-images.json b/test/test-pages/mozilla-1/expected-images.json new file mode 100644 index 0000000..42421a2 --- /dev/null +++ b/test/test-pages/mozilla-1/expected-images.json @@ -0,0 +1 @@ +["https:\/\/mozorg.cdn.mozilla.net\/media\/img\/firefox\/template\/page-image.af8027a425de.png","http:\/\/mozorg.cdn.mozilla.net\/media\/img\/firefox\/desktop\/customize\/animations\/flexible-top-fallback.c960365ba781.png","http:\/\/mozorg.cdn.mozilla.net\/media\/img\/firefox\/desktop\/customize\/animations\/flexible-bottom-fallback.cafd48a3d0a4.png","http:\/\/mozorg.cdn.mozilla.net\/media\/img\/firefox\/desktop\/customize\/theme-red.61611c5734ab.png","http:\/\/mozorg.cdn.mozilla.net\/media\/img\/firefox\/desktop\/customize\/add-ons.63a4b761f822.png","http:\/\/mozorg.cdn.mozilla.net\/media\/img\/firefox\/desktop\/customize\/awesome-bar.437df162126c.png"] \ No newline at end of file diff --git a/test/test-pages/mozilla-2/expected-images.json b/test/test-pages/mozilla-2/expected-images.json new file mode 100644 index 0000000..dd00d59 --- /dev/null +++ b/test/test-pages/mozilla-2/expected-images.json @@ -0,0 +1 @@ +["https:\/\/mozorg.cdn.mozilla.net\/media\/img\/firefox\/developer\/page-image.03bbe7da3199.png","http:\/\/mozorg.cdn.mozilla.net\/media\/img\/firefox\/firstrun\/dev\/feature-webide.16763db341cb.jpg","http:\/\/mozorg.cdn.mozilla.net\/media\/img\/firefox\/firstrun\/dev\/feature-valence.251f9def4d8d.jpg","http:\/\/mozorg.cdn.mozilla.net\/media\/img\/firefox\/firstrun\/dev\/feature-inspector.c791bf1f1a59.jpg","http:\/\/mozorg.cdn.mozilla.net\/media\/img\/firefox\/firstrun\/dev\/feature-console.42666aaf6d03.jpg","http:\/\/mozorg.cdn.mozilla.net\/media\/img\/firefox\/firstrun\/dev\/feature-debugger.02ed86fb0c9f.jpg","http:\/\/mozorg.cdn.mozilla.net\/media\/img\/firefox\/firstrun\/dev\/feature-network.740d6082b3f6.jpg","http:\/\/mozorg.cdn.mozilla.net\/media\/img\/firefox\/firstrun\/dev\/feature-webaudio.a10ebc48d017.jpg","http:\/\/mozorg.cdn.mozilla.net\/media\/img\/firefox\/firstrun\/dev\/feature-style-editor.87c5d2017506.jpg"] \ No newline at end of file diff --git a/test/test-pages/msn/expected-images.json b/test/test-pages/msn/expected-images.json new file mode 100644 index 0000000..0b09401 --- /dev/null +++ b/test/test-pages/msn/expected-images.json @@ -0,0 +1 @@ +{"0":"http:\/\/img-s-msn-com.akamaized.net\/tenant\/amp\/entityid\/AAkk5fh.img","1":"http:\/\/img-s-msn-com.akamaized.net\/tenant\/amp\/entityid\/BBlfvkM.img?h=100&w=100&m=6&q=60&u=t&o=t&l=f&f=png","2":"http:\/\/static-entertainment-eas-s-msn-com.akamaized.net\/sc\/9b\/e151e5.gif","3":"http:\/\/img.s-msn.com\/tenant\/amp\/entityid\/BBdAmDL.img?h=150&w=200&m=6&q=60&u=t&o=t&l=f&x=939&y=445","4":"http:\/\/img-s-msn-com.akamaized.net\/tenant\/amp\/entityid\/AAklvVK.img?h=150&w=200&m=6&q=60&u=t&o=t&l=f&x=517&y=530","5":"http:\/\/img-s-msn-com.akamaized.net\/tenant\/amp\/entityid\/AAkk5fh.img?h=820&w=1456&m=6&q=60&o=f&l=f&x=1162&y=540","6":"http:\/\/img-s-msn-com.akamaized.net\/tenant\/amp\/entityid\/AAkhPV2.img?h=132&w=176&m=6&q=60&u=t&o=t&l=f","7":"http:\/\/img-s-msn-com.akamaized.net\/tenant\/amp\/entityid\/BBlfFAC.img?h=32&w=32&m=6&q=60&u=t&o=t&l=f","8":"http:\/\/img-s-msn-com.akamaized.net\/tenant\/amp\/entityid\/AAkbiyJ.img?h=132&w=176&m=6&q=60&u=t&o=t&l=f","10":"http:\/\/img-s-msn-com.akamaized.net\/tenant\/amp\/entityid\/AAk3FBL.img?h=132&w=176&m=6&q=60&u=t&o=t&l=f","12":"http:\/\/img-s-msn-com.akamaized.net\/tenant\/amp\/entityid\/AAk3dSu.img?h=132&w=176&m=6&q=60&u=t&o=t&l=f&x=1330&y=274","14":"http:\/\/img-s-msn-com.akamaized.net\/tenant\/amp\/entityid\/BBlfKKG.img?h=200&w=200&m=6&q=60&u=t&o=f&l=f"} \ No newline at end of file diff --git a/test/test-pages/needs-entity-normalization/expected-images.json b/test/test-pages/needs-entity-normalization/expected-images.json new file mode 100644 index 0000000..6f7904d --- /dev/null +++ b/test/test-pages/needs-entity-normalization/expected-images.json @@ -0,0 +1 @@ +{"0":"http:\/\/www.cadena3.com\/admin\/playerswf\/fotos\/ARCHI_366549.jpg","1":"http:\/\/fakehost\/test\/..\/..\/..\/..\/imagenes\/logo.png","2":"http:\/\/fakehost\/test\/..\/..\/..\/..\/imagenes\/home.jpg","3":"http:\/\/fakehost\/test\/..\/..\/..\/..\/imagenes\/icono-clima.png","4":"http:\/\/fakehost\/test\/..\/..\/..\/..\/imagenes\/icono-hashtag.png","5":"http:\/\/fakehost\/test\/..\/..\/..\/..\/imagenes\/icono-usuario.png","6":"http:\/\/fakehost\/test\/..\/..\/..\/..\/imagenes\/icono-facebook-notas.png","7":"http:\/\/fakehost\/test\/..\/..\/..\/..\/imagenes\/icono-twitter-notas.png","8":"http:\/\/fakehost\/test\/..\/..\/..\/..\/imagenes\/icono-buscar.png","9":"http:\/\/fakehost\/test\/..\/..\/..\/..\/imagenes\/icono-nota-comentar.png","10":"http:\/\/fakehost\/test\/..\/..\/..\/..\/imagenes\/icono-nota-fb.png","11":"http:\/\/fakehost\/test\/..\/..\/..\/..\/imagenes\/icono-nota-tw.png","12":"http:\/\/fakehost\/test\/..\/..\/..\/..\/imagenes\/icono-nota-google.png","13":"http:\/\/fakehost\/test\/..\/..\/..\/..\/imagenes\/icono-nota-imprimir.png","14":"http:\/\/fakehost\/test\/..\/..\/..\/..\/admin\/playerswf\/fotos\/ARCHI_366549.jpg","15":"http:\/\/fakehost\/test\/..\/..\/..\/..\/admin\/playerswf\/fotos\/ARCHI_366539.jpg","16":"http:\/\/fakehost\/test\/..\/..\/..\/..\/admin\/playerswf\/fotos\/ARCHI_366548.jpg","17":"http:\/\/fakehost\/test\/..\/..\/..\/..\/admin\/playerswf\/fotos\/ARCHI_366540.jpg","18":"http:\/\/www.cadena3.com\/imagenes\/play-principal.png","20":"http:\/\/www.cadena3.com\/imagenes\/flecha-audio-anterior.gif","21":"http:\/\/www.cadena3.com\/imagenes\/flecha-audio-siguiente.gif","27":"http:\/\/fakehost\/test\/..\/..\/..\/..\/admin\/playerswf\/fotos\/ARCHI_366525200x113.jpg","28":"http:\/\/fakehost\/test\/..\/..\/..\/..\/admin\/playerswf\/fotos\/ARCHI_365592200x113.jpg","29":"http:\/\/fakehost\/test\/..\/..\/..\/..\/admin\/playerswf\/fotos\/ARCHI_330908200x113.jpg","32":"http:\/\/fakehost\/test\/..\/..\/..\/..\/imagenes\/play-secundario.png","33":"http:\/\/www.cadena3.com\/admin\/playerswf\/fotos\/ARCHI_366155400x285.jpg","36":"http:\/\/fakehost\/test\/..\/..\/..\/..\/imagenes\/flecha-audio-anterior.gif","37":"http:\/\/fakehost\/test\/..\/..\/..\/..\/imagenes\/flecha-audio-siguiente.gif","40":"http:\/\/fakehost\/test\/..\/..\/..\/..\/imagenes\/play-secundario-video.png","41":"http:\/\/www.cadena3.com\/admin\/playerswf\/fotos\/ARCHI_366153400x285.jpg","45":"http:\/\/www.cadena3.com\/admin\/playerswf\/fotos\/ARCHI_366380400x285.jpg","49":"http:\/\/www.cadena3.com\/admin\/playerswf\/fotos\/ARCHI_366451400x285.jpg","57":"http:\/\/www.cadena3.com\/admin\/playerswf\/fotos\/ARCHI_366240400x285.jpg","60":"http:\/\/www.cadena3.com\/admin\/playerswf\/fotos\/ARCHI_366484400x285.jpg","70":"http:\/\/fakehost\/test\/..\/..\/..\/..\/admin\/playerswf\/fotos\/ARCHI_362059400x285.jpg","74":"http:\/\/fakehost\/test\/..\/..\/..\/..\/admin\/playerswf\/fotos\/ARCHI_336880400x285.jpg","76":"http:\/\/fakehost\/test\/..\/..\/..\/..\/imagenes\/logo-clasificados.png","77":"http:\/\/cadena3.com\/clasificados-2013\/uploadify\/userfiles\/402833124201718852390-1.jpg","78":"http:\/\/cadena3.com\/clasificados-2013\/uploadify\/userfiles\/4036631752017121910390-1.jpg","79":"http:\/\/cadena3.com\/clasificados-2013\/uploadify\/userfiles\/251259552017173954390-1.jpg","80":"http:\/\/cadena3.com\/clasificados-2013\/uploadify\/userfiles\/387484273201720918390-1.jpg","81":"http:\/\/cadena3.com\/clasificados-2013\/uploadify\/userfiles\/402569242017145032390-1.jpg","82":"http:\/\/cadena3.com\/clasificados-2013\/uploadify\/userfiles\/39710125420171373390-1.jpg","83":"http:\/\/cadena3.com\/clasificados-2013\/uploadify\/userfiles\/150882742017112532390-1.jpg","84":"http:\/\/cadena3.com\/clasificados-2013\/uploadify\/userfiles\/3098321311201691244390-1.jpg","85":"http:\/\/fakehost\/test\/..\/..\/..\/..\/imagenes\/data-fiscal.png"} \ No newline at end of file diff --git a/test/test-pages/nytimes-1/expected-images.json b/test/test-pages/nytimes-1/expected-images.json new file mode 100644 index 0000000..a96b16b --- /dev/null +++ b/test/test-pages/nytimes-1/expected-images.json @@ -0,0 +1 @@ +["https:\/\/static01.nyt.com\/images\/2017\/01\/14\/world\/13SUDAN-1\/13SUDAN-1-facebookJumbo.jpg","https:\/\/a1.nyt.com\/assets\/article\/20170112-121631\/images\/foundation\/logos\/nyt-logo-185x26.png","https:\/\/static01.nyt.com\/images\/2017\/01\/14\/world\/13SUDAN-1\/13SUDAN-1-master768.jpg","https:\/\/static01.nyt.com\/images\/2017\/01\/13\/us\/13cabinet\/13cabinet-thumbStandard.jpg","https:\/\/static01.nyt.com\/images\/2017\/01\/13\/us\/13LLBean2_xp\/13LLBean2_xp-thumbStandard.jpg","https:\/\/static01.nyt.com\/images\/2017\/01\/13\/us\/13comey\/13comey-thumbStandard.jpg","https:\/\/static01.nyt.com\/images\/2017\/01\/15\/magazine\/15neanderthals1\/15neanderthals1-thumbStandard-v3.jpg","https:\/\/static01.nyt.com\/images\/2017\/01\/13\/us\/13cuba\/13cuba-thumbStandard.jpg","https:\/\/static01.nyt.com\/images\/2017\/01\/13\/opinion\/13krugmanWeb\/13krugmanWeb-thumbStandard.jpg","https:\/\/static01.nyt.com\/images\/2017\/01\/13\/us\/13chaffetz_web1\/13chaffetz_web1-thumbStandard.jpg"] \ No newline at end of file diff --git a/test/test-pages/nytimes-2/expected-images.json b/test/test-pages/nytimes-2/expected-images.json new file mode 100644 index 0000000..9c7809a --- /dev/null +++ b/test/test-pages/nytimes-2/expected-images.json @@ -0,0 +1 @@ +["https:\/\/static01.nyt.com\/images\/2016\/07\/30\/business\/db-dealprof\/db-dealprof-facebookJumbo.jpg","https:\/\/a1.nyt.com\/assets\/article\/20170112-121631\/images\/foundation\/logos\/nyt-logo-185x26.png","https:\/\/static01.nyt.com\/images\/2016\/07\/30\/business\/db-dealprof\/db-dealprof-master315.jpg","https:\/\/static01.nyt.com\/images\/2017\/01\/13\/us\/13cabinet\/13cabinet-thumbStandard.jpg","https:\/\/static01.nyt.com\/images\/2017\/01\/15\/magazine\/15neanderthals1\/15neanderthals1-thumbStandard-v3.jpg","https:\/\/static01.nyt.com\/images\/2017\/01\/13\/us\/13LLBean2_xp\/13LLBean2_xp-thumbStandard.jpg","https:\/\/static01.nyt.com\/images\/2017\/01\/12\/fashion\/12THIELSUB\/12THIELSUB-thumbStandard-v2.jpg","https:\/\/static01.nyt.com\/images\/2017\/01\/13\/us\/13comey\/13comey-thumbStandard.jpg","https:\/\/static01.nyt.com\/images\/2017\/01\/13\/business\/13xp-cspan_web1\/13xp-cspan_web1-thumbStandard.jpg","https:\/\/static01.nyt.com\/images\/2017\/01\/12\/us\/12intel\/12intel-thumbStandard.jpg","https:\/\/static01.nyt.com\/images\/2017\/01\/13\/business\/13DB-WHITECOLLAR3\/13DB-WHITECOLLAR3-thumbStandard.jpg"] \ No newline at end of file diff --git a/test/test-pages/pixnet/expected-images.json b/test/test-pages/pixnet/expected-images.json new file mode 100644 index 0000000..f386bd2 --- /dev/null +++ b/test/test-pages/pixnet/expected-images.json @@ -0,0 +1 @@ +{"0":"http:\/\/pic.pimg.tw\/stevenhgm\/1387895093-631461272.jpg","1":"http:\/\/s6.pimg.tw\/album\/irischew\/element\/63726186_1362903876-2078272255\/zoomcrop\/200x200.jpg","2":"http:\/\/s5.pimg.tw\/album\/chinyu0801\/element\/554560495_1414416897-3484204755\/zoomcrop\/200x200.jpg","3":"http:\/\/s2.pimg.tw\/album\/carol781219\/element\/611043112_1441034316-3234816764\/zoomcrop\/200x200.png","4":"http:\/\/s2.pimg.tw\/album\/phy227\/element\/84036252_1372424992-4238267340\/zoomcrop\/200x200.jpg","5":"http:\/\/s1.pimg.tw\/album\/iko40623\/element\/620822731_1446559530-1279435851\/zoomcrop\/200x200.jpg","6":"http:\/\/s2.pimg.tw\/album\/joycebe\/element\/151863102_1420990775-1163836851\/zoomcrop\/200x200.jpg","7":"http:\/\/s8.pimg.tw\/album\/abby0318\/element\/191825088_1428857257-694873255\/zoomcrop\/200x200.jpg","8":"http:\/\/s7.pimg.tw\/album\/emily710223\/element\/191462337_1428423090-729151016\/zoomcrop\/200x200.jpg","9":"http:\/\/s7.pimg.tw\/album\/yourtyng0104\/element\/58931947_1358830455-2197812000\/zoomcrop\/200x200.jpg","10":"http:\/\/s8.pimg.tw\/album\/vivian00002000\/element\/202980528_1445242718-1039065392\/zoomcrop\/200x200.jpg","11":"http:\/\/imageproxy.pimg.tw\/zoomcrop?url=https%3A%2F%2Ffarm6.staticflickr.com%2F5635%2F20392516933_8a6101444d_c.jpg&width=200&height=200","12":"http:\/\/s5.pimg.tw\/album\/mimg47\/element\/290248885_1430225926-3573469794\/zoomcrop\/200x200.jpg","13":"http:\/\/s5.pimg.tw\/album\/always1027\/element\/147969525_1391227799-698593706\/zoomcrop\/200x200.jpg","14":"http:\/\/s6.pimg.tw\/album\/rmlove30\/element\/213338796_1366033207-323149691\/zoomcrop\/200x200.jpg","15":"http:\/\/s.pimg.tw\/album\/m08142001\/element\/196762710_1435532679-2621009829\/zoomcrop\/200x200.jpg","16":"http:\/\/s4.pimg.tw\/album\/futuresfiona\/element\/279287214_1396926665-1131375440\/zoomcrop\/200x200.jpg","17":"http:\/\/s6.pimg.tw\/album\/estherhsiao\/element\/299115906_1433352810-3818657366\/zoomcrop\/200x200.jpg","18":"http:\/\/s.pimg.tw\/album\/gntooly123\/element\/508218600_1396652860-621839862\/zoomcrop\/200x200.jpg","19":"http:\/\/imageproxy.pimg.tw\/zoomcrop?url=http%3A%2F%2Ffarm4.static.flickr.com%2F3771%2F11206755646_eeb52f5b20_b.jpg&width=200&height=200","20":"http:\/\/s4.pimg.tw\/album\/meena1985\/element\/541479094_1409054065-197791004\/zoomcrop\/200x200.jpg","21":"http:\/\/s.pimg.tw\/album\/peihsuan0729\/element\/611199250_1441118094-2467301170\/zoomcrop\/200x200.jpg","22":"http:\/\/s8.pimg.tw\/album\/adgsfh1745\/element\/13081288_1446709410-4125554129\/zoomcrop\/200x200.jpg?v=1446709466","23":"http:\/\/imageproxy.pimg.tw\/zoomcrop?url=http%3A%2F%2Fupload.wikimedia.org%2Fwikipedia%2Fcommons%2F1%2F10%2FTRA_Hsinchu_Station.jpg&width=200&height=200","24":"http:\/\/s4.pimg.tw\/album\/minin0128\/element\/89462344_1374926655-1364602499\/zoomcrop\/200x200.jpg","25":"http:\/\/pic.pimg.tw\/stevenhgm\/1387894842-1217674167.jpg","26":"http:\/\/pic.pimg.tw\/stevenhgm\/1387894971-1486345289.jpg","27":"http:\/\/pic.pimg.tw\/stevenhgm\/1387894979-1252095111.jpg","28":"http:\/\/pic.pimg.tw\/stevenhgm\/1387971416-4261675924.jpg","29":"http:\/\/pic.pimg.tw\/stevenhgm\/1387971406-2480195851.jpg","30":"http:\/\/pic.pimg.tw\/stevenhgm\/1387894752-3567294980.jpg","31":"http:\/\/pic.pimg.tw\/stevenhgm\/1387894771-2897027724.jpg","32":"http:\/\/pic.pimg.tw\/stevenhgm\/1387894778-2035483089.jpg","33":"http:\/\/pic.pimg.tw\/stevenhgm\/1387897405-3236217457.jpg","34":"http:\/\/pic.pimg.tw\/stevenhgm\/1387894848-3695967443.jpg","35":"http:\/\/pic.pimg.tw\/stevenhgm\/1387894863-3269042540.jpg","36":"http:\/\/pic.pimg.tw\/stevenhgm\/1387894868-3997219746.jpg","37":"http:\/\/pic.pimg.tw\/stevenhgm\/1387894873-1524806724.jpg","38":"http:\/\/pic.pimg.tw\/stevenhgm\/1387894788-105924953.jpg","39":"http:\/\/pic.pimg.tw\/stevenhgm\/1387894798-1063855065.jpg","40":"http:\/\/pic.pimg.tw\/stevenhgm\/1387894807-309560703.jpg","41":"http:\/\/pic.pimg.tw\/stevenhgm\/1387894882-1881930036.jpg","42":"http:\/\/pic.pimg.tw\/stevenhgm\/1387894887-407829597.jpg","43":"http:\/\/pic.pimg.tw\/stevenhgm\/1387894857-470378275.jpg","44":"http:\/\/pic.pimg.tw\/stevenhgm\/1387895099-4119123008.jpg","45":"http:\/\/pic.pimg.tw\/stevenhgm\/1387971426-4277312474.jpg","46":"http:\/\/pic.pimg.tw\/stevenhgm\/1387971436-2828193592.jpg","47":"http:\/\/pic.pimg.tw\/stevenhgm\/1387894823-4061326865.jpg","48":"http:\/\/pic.pimg.tw\/stevenhgm\/1387894911-3706194096.jpg","49":"http:\/\/pic.pimg.tw\/stevenhgm\/1387894894-1173705525.jpg","50":"http:\/\/pic.pimg.tw\/stevenhgm\/1387894901-1058040075.jpg","51":"http:\/\/pic.pimg.tw\/stevenhgm\/1387894925-1582979930.jpg","52":"http:\/\/pic.pimg.tw\/stevenhgm\/1387894989-1689510758.jpg","53":"http:\/\/pic.pimg.tw\/stevenhgm\/1387894933-2886337976.jpg","54":"http:\/\/pic.pimg.tw\/stevenhgm\/1387895113-4041265313.jpg","55":"http:\/\/pic.pimg.tw\/stevenhgm\/1387894940-3359449338.jpg","56":"http:\/\/pic.pimg.tw\/stevenhgm\/1387895106-1387217970.jpg","57":"http:\/\/pic.pimg.tw\/stevenhgm\/1387894947-2636431527.jpg","58":"http:\/\/pic.pimg.tw\/stevenhgm\/1387894956-618198074.jpg","59":"http:\/\/pic.pimg.tw\/stevenhgm\/1387894961-2201609427.jpg","60":"http:\/\/pic.pimg.tw\/stevenhgm\/1387971396-2999285851.jpg","61":"http:\/\/pic.pimg.tw\/stevenhgm\/1387894999-1588465034.jpg","62":"http:\/\/pic.pimg.tw\/stevenhgm\/1387895007-4184988815.jpg","63":"http:\/\/pic.pimg.tw\/stevenhgm\/1387895016-2193615729.jpg","64":"http:\/\/pic.pimg.tw\/stevenhgm\/1387895047-92554161.jpg","65":"http:\/\/pic.pimg.tw\/stevenhgm\/1387971446-966387512.jpg","66":"http:\/\/pic.pimg.tw\/stevenhgm\/1387895036-848978834.jpg","67":"http:\/\/pic.pimg.tw\/stevenhgm\/1387895067-717977929.jpg","68":"http:\/\/pic.pimg.tw\/stevenhgm\/1387895083-1227791497.jpg","69":"http:\/\/pic.pimg.tw\/stevenhgm\/1387895075-2647157523.jpg","71":"http:\/\/panel.pixfs.net\/images\/icons\/tag_blue.gif","72":"http:\/\/s8.pimg.tw\/album\/stevenhgm\/element\/457637358_1387458392-1693665696\/zoomcrop\/90x90.jpg","73":"http:\/\/s9.pimg.tw\/album\/stevenhgm\/element\/485324079_1388755991-1712764152\/zoomcrop\/90x90.jpg","74":"http:\/\/s.pixfs.net\/blog\/images\/common\/push_0rz_icon.gif","75":"http:\/\/s9.pimg.tw\/avatar\/stevenhgm\/0\/0\/zoomcrop\/90x90.png?v=1377876799","76":"http:\/\/s.pixfs.net\/blog\/images\/choc\/set-to-top.gif","89":"http:\/\/s.pixfs.net\/blog\/images\/choc\/expand.gif","90":"http:\/\/s.pixfs.net\/blog\/images\/choc\/collapse.gif","91":"http:\/\/s.pixfs.net\/blog\/images\/choc\/plus.gif","114":"http:\/\/s1.pimg.tw\/avatar\/pixnetad2017\/0\/0\/zoomcrop\/50x50.jpg?v=1486377143","115":"http:\/\/s8.pimg.tw\/avatar\/blogbackup\/0\/0\/zoomcrop\/50x50.jpg?v=1381715322","116":"http:\/\/s6.pimg.tw\/avatar\/letyoung\/0\/0\/zoomcrop\/50x50.jpg?v=1429796178","117":"http:\/\/s9.pimg.tw\/avatar\/coffee1686\/0\/0\/zoomcrop\/50x50.jpg?v=1477368652","118":"http:\/\/s7.pimg.tw\/avatar\/a87192611\/0\/0\/zoomcrop\/50x50.jpg?v=1252651730","119":"http:\/\/s.pimg.tw\/qrcode\/stevenhgm\/blog\/post\/39926056-%e6%96%b0%e7%ab%b9%e5%b0%96%e7%9f%b3_%e7%be%8e%e6%a8%b9%e7%87%9f%e5%9c%b0%e8%b3%9e%e6%a5%93-%282%29.png","120":"http:\/\/tags.bluekai.com\/site\/27675?id=2DE7B66BC2869D58E462F17202212B81&ret=html&phint=__bk_t%3D%E6%96%B0%E7%AB%B9%E5%B0%96%E7%9F%B3_%E7%BE%8E%E6%A8%B9%E7%87%9F%E5%9C%B0%E8%B3%9E%E6%A5%93%20%282%29%20%40%20%E5%8F%B2%E8%92%82%E6%96%87%E7%9A%84%E5%AE%B6_%E8%97%8D%E5%A4%A9%20%3A%3A%20%E7%97%9E%E5%AE%A2%E9%82%A6%20PIXNET%20%3A%3A&phint=__bk_k%3D%E8%97%8D%E5%A4%A9%E9%83%A8%E8%90%BD%E5%AE%A2%2C%E9%83%A8%E8%90%BD%E5%AE%A2%E8%97%8D%E5%A4%A9%2C%E5%8F%B2%E8%92%82%E6%96%87%E8%97%8D%E5%A4%A9%2C%E5%8F%B2%E8%92%82%E6%96%87%E7%9A%84%E5%AE%B6%2C%E8%97%8D%E5%A4%A9%E5%AE%B6%E6%97%8F%2C%E9%9C%B2%E7%87%9F%E6%97%85%E9%81%8A%2C%E8%97%8D%E5%A4%A9%E7%97%9E%E5%AE%A2%E5%B9%AB%2C%E5%9C%8B%E5%85%A7%E6%97%85%E9%81%8A%2C%E5%B0%96%E7%9F%B3_%E7%BE%8E%E6%A8%B9%E7%87%9F%E5%9C%B0%2C%E6%96%B0%E7%AB%B9%E5%B0%96%E7%9F%B3_%E7%BE%8E%E6%A8%B9%E7%87%9F%E5%9C%B0%E8%B3%9E%E6%A5%93%20%282%29&phint=__bk_l%3Dhttp%3A%2F%2Fstevenhgm.pixnet.net%2Fblog%2Fpost%2F39926056-%25e6%2596%25b0%25e7%25ab%25b9%25e5%25b0%2596%25e7%259f%25b3_%25e7%25be%258e%25e6%25a8%25b9%25e7%2587%259f%25e5%259c%25b0%25e8%25b3%259e%25e6%25a5%2593-%25282%2529&r=96649251"} \ No newline at end of file diff --git a/test/test-pages/remove-extra-brs/expected-images.json b/test/test-pages/remove-extra-brs/expected-images.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/test/test-pages/remove-extra-brs/expected-images.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/test/test-pages/remove-extra-paragraphs/expected-images.json b/test/test-pages/remove-extra-paragraphs/expected-images.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/test/test-pages/remove-extra-paragraphs/expected-images.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/test/test-pages/remove-script-tags/expected-images.json b/test/test-pages/remove-script-tags/expected-images.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/test/test-pages/remove-script-tags/expected-images.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/test/test-pages/reordering-paragraphs/expected-images.json b/test/test-pages/reordering-paragraphs/expected-images.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/test/test-pages/reordering-paragraphs/expected-images.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/test/test-pages/replace-brs/expected-images.json b/test/test-pages/replace-brs/expected-images.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/test/test-pages/replace-brs/expected-images.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/test/test-pages/replace-font-tags/expected-images.json b/test/test-pages/replace-font-tags/expected-images.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/test/test-pages/replace-font-tags/expected-images.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/test/test-pages/rtl-1/expected-images.json b/test/test-pages/rtl-1/expected-images.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/test/test-pages/rtl-1/expected-images.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/test/test-pages/rtl-2/expected-images.json b/test/test-pages/rtl-2/expected-images.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/test/test-pages/rtl-2/expected-images.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/test/test-pages/rtl-3/expected-images.json b/test/test-pages/rtl-3/expected-images.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/test/test-pages/rtl-3/expected-images.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/test/test-pages/rtl-4/expected-images.json b/test/test-pages/rtl-4/expected-images.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/test/test-pages/rtl-4/expected-images.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/test/test-pages/salon-1/expected-images.json b/test/test-pages/salon-1/expected-images.json new file mode 100644 index 0000000..15d7fa9 --- /dev/null +++ b/test/test-pages/salon-1/expected-images.json @@ -0,0 +1 @@ +{"0":"http:\/\/media.salon.com\/2014\/12\/uber_rand_paul.jpg","2":"http:\/\/media.salon.com\/2015\/02\/bitcoin_figure.jpg","3":"http:\/\/media.salon.com\/2015\/03\/Photo11-150x150.jpg","4":"http:\/\/media.salon.com\/2015\/02\/Material-World-Containers-and-City-Lights-e1425042763140-150x150.jpg","5":"http:\/\/media.salon.com\/2015\/02\/shutterstock_126021596-150x150.jpg","6":"http:\/\/media.salon.com\/2015\/02\/EKjxq8WT6kSWxQlwHYja1eGDW72-Bukqll1h5XWkCyMg0PbYIScTDjFV2h2nIez044-ShPyW2WEE02X4KBg6Ro1-150x150.jpg"} \ No newline at end of file diff --git a/test/test-pages/simplyfound-1/expected-images.json b/test/test-pages/simplyfound-1/expected-images.json new file mode 100644 index 0000000..8ca0a70 --- /dev/null +++ b/test/test-pages/simplyfound-1/expected-images.json @@ -0,0 +1 @@ +{"0":"https:\/\/d34hb2g9mvfppu.cloudfront.net\/m\/images\/cache\/images\/2016\/02\/29\/apcnews2012raspberry_pi_logo_mainimage8_jpg8_322_27630a8388eb_lg.jpg","2":"https:\/\/d34hb2g9mvfppu.cloudfront.net\/m\/images\/cache\/images\/2016\/02\/29\/teachers_classroom_guide_324_a221bf31d64c_lg.png","3":"https:\/\/d34hb2g9mvfppu.cloudfront.net\/m\/images\/cache\/images\/2016\/02\/29\/handj_326_eb8b50597a3f_lg.jpg","4":"https:\/\/d34hb2g9mvfppu.cloudfront.net\/m\/images\/cache\/images\/2016\/02\/29\/images_326_b1f81e087284_lg.jpeg","5":"https:\/\/d34hb2g9mvfppu.cloudfront.net\/m\/images\/cache\/images\/2016\/02\/29\/images_2_326_c32fa7688f70_lg.jpeg"} \ No newline at end of file diff --git a/test/test-pages/social-buttons/expected-images.json b/test/test-pages/social-buttons/expected-images.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/test/test-pages/social-buttons/expected-images.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/test/test-pages/style-tags-removal/expected-images.json b/test/test-pages/style-tags-removal/expected-images.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/test/test-pages/style-tags-removal/expected-images.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/test/test-pages/svg-parsing/expected-images.json b/test/test-pages/svg-parsing/expected-images.json new file mode 100644 index 0000000..0637a08 --- /dev/null +++ b/test/test-pages/svg-parsing/expected-images.json @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/test/test-pages/table-style-attributes/expected-images.json b/test/test-pages/table-style-attributes/expected-images.json new file mode 100644 index 0000000..f6032c1 --- /dev/null +++ b/test/test-pages/table-style-attributes/expected-images.json @@ -0,0 +1 @@ +["http:\/\/fakehost\/test\/..\/compass1.gif"] \ No newline at end of file diff --git a/test/test-pages/telegraph/expected-images.json b/test/test-pages/telegraph/expected-images.json new file mode 100644 index 0000000..dd7fcb3 --- /dev/null +++ b/test/test-pages/telegraph/expected-images.json @@ -0,0 +1 @@ +["http:\/\/www.telegraph.co.uk\/content\/dam\/news\/2017\/11\/16\/TELEMMGLPICT000146889449-xlarge_trans_NvBQzQNjv4BqySoB6nTCgtc7U4LQ_FPO4hKi2sT3vi7ux2-RDZwC4QA.jpeg","http:\/\/fakehost\/content\/dam\/TheTelegraph_portal_white-320-small.png","http:\/\/fakehost\/content\/dam\/news\/2017\/11\/16\/TELEMMGLPICT000146889449_trans_NvBQzQNjv4BqySoB6nTCgtc7U4LQ_FPO4hKi2sT3vi7ux2-RDZwC4QA.jpeg?imwidth=450","http:\/\/fakehost\/content\/dam\/video_previews\/f\/o\/foyzhnzde6x9hfbbkxn9dumgnkyzdey9-small.jpg","http:\/\/fakehost\/content\/dam\/video_previews\/p\/p\/ppzzdnzde6biv1rtixuprdeby5aeyik-small.jpg","http:\/\/fakehost\/content\/dam\/video_previews\/m\/1\/m1mtdnzde6egx6afc92n7ygcnaaczsdz-small.jpg","http:\/\/fakehost\/content\/dam\/generic\/Matt-cartoon-255x206px-small.png"] \ No newline at end of file diff --git a/test/test-pages/tmz-1/expected-images.json b/test/test-pages/tmz-1/expected-images.json new file mode 100644 index 0000000..9210f05 --- /dev/null +++ b/test/test-pages/tmz-1/expected-images.json @@ -0,0 +1 @@ +["http:\/\/ll-media.tmz.com\/2015\/02\/26\/0225-lupita-nyongo-getty-01-1200x630.jpg","http:\/\/ll-assets.tmz.com\/www.tmz.com\/main\/default\/cache\/img\/widgets\/showtimes\/overlay-close.v2014_05_09_134122.png","http:\/\/ll-assets.tmz.com\/www.tmz.com\/main\/default\/cache\/img\/widgets\/showtimes\/tmz-showtimes-logo.v2014_05_09_134122.png","http:\/\/ll-assets.tmz.com\/www.tmz.com\/main\/default\/cache\/img\/widgets\/showtimes\/tmzlive-showtimes-logo.v2014_05_09_134122.png","http:\/\/ll-assets.tmz.com\/www.tmz.com\/main\/default\/cache\/img\/widgets\/masthead\/tmz-logo-62x23.v2014_05_09_134122.png","http:\/\/ll-assets.tmz.com\/www.tmz.com\/main\/default\/cache\/img\/widgets\/masthead\/rightarrow_13x13.v2014_05_09_134122.png","http:\/\/ll-media.tmz.com\/2015\/03\/18\/031815-drake-dubai-primary-117x81.jpg","http:\/\/ll-media.tmz.com\/2015\/03\/17\/0317-tom-cruise-katie-holmes-suri-tmz-getty-117x81.jpg","http:\/\/ll-media.tmz.com\/2015\/03\/16\/0316-karrueche-own-01-117x81.jpg","http:\/\/ll-media.tmz.com\/2015\/03\/18\/0318-robert-kym-tmz-01-117x81.jpg","http:\/\/ll-media.tmz.com\/2015\/03\/18\/031815-adam-lavine-primary-117x81.jpg","http:\/\/ll-media.tmz.com\/2015\/03\/16\/0316-robert-durst-hbo-10-117x81.jpg","http:\/\/ll-media.tmz.com\/2015\/03\/18\/0318-kylie-jenner-splash-117x81.jpg","http:\/\/ll-media.tmz.com\/2015\/03\/18\/0318-miley-patrick-splash-03-117x81.jpg","http:\/\/ll-media.tmz.com\/2015\/02\/26\/0225-lupita-nyongo-getty-4.jpg","http:\/\/ll-media.tmz.com\/2013\/11\/20\/update-graphic-red-bar.jpg","http:\/\/ll-media.tmz.com\/2015\/02\/26\/0226-sub-london-hotel-swipe-tmz-11.jpg","http:\/\/ll-media.tmz.com\/2013\/04\/26\/partner-asset-us-magazine-logo2-cut-v2-1.gif","http:\/\/ll-media.tmz.com\/2013\/04\/26\/partner-rss-egotastic-v3.png","http:\/\/ll-media.tmz.com\/2013\/04\/26\/partner-rss-chive-v2-cut-v2.png","http:\/\/ll-media.tmz.com\/2013\/04\/26\/partner-asset-mto.png","http:\/\/ll-media.tmz.com\/2013\/04\/26\/partner-rss-gossipgirl-bw.png","http:\/\/ll-media.tmz.com\/2013\/04\/26\/partner-rsss-justjared-logo-cut-v2.gif","http:\/\/ll-media.tmz.com\/2013\/04\/26\/partner-rss-huffpo-celebrity-bw.png","http:\/\/ll-media.tmz.com\/2013\/04\/26\/partner-asset-lainey-gossip-bw-1.png"] \ No newline at end of file diff --git a/test/test-pages/tumblr/expected-images.json b/test/test-pages/tumblr/expected-images.json new file mode 100644 index 0000000..1b0151f --- /dev/null +++ b/test/test-pages/tumblr/expected-images.json @@ -0,0 +1 @@ +["http:\/\/assets.tumblr.com\/images\/og\/fb_landscape_share.png"] \ No newline at end of file diff --git a/test/test-pages/wapo-1/expected-images.json b/test/test-pages/wapo-1/expected-images.json new file mode 100644 index 0000000..f0aaba8 --- /dev/null +++ b/test/test-pages/wapo-1/expected-images.json @@ -0,0 +1 @@ +{"0":"http:\/\/img.washingtonpost.com\/rw\/2010-2019\/WashingtonPost\/2015\/03\/18\/Foreign\/Images\/Nic6429927.jpg","1":"https:\/\/img.washingtonpost.com\/rf\/image_606w\/2010-2019\/WashingtonPost\/2015\/03\/18\/Foreign\/Images\/Nic6429732.jpg","2":"https:\/\/img.washingtonpost.com\/rf\/image_606w\/2010-2019\/WashingtonPost\/2015\/03\/18\/Foreign\/Images\/144714787.jpg","3":"https:\/\/img.washingtonpost.com\/rf\/image_480w\/2010-2019\/WashingtonPost\/2015\/01\/27\/Foreign\/Graphics\/foreignFighters-Jan14-GS.jpg?uuid=CDUNJKYyEeShYhIdBsp38Q","4":"https:\/\/img.washingtonpost.com\/rf\/image_480w\/2010-2019\/WashingtonPost\/2015\/03\/18\/Foreign\/Graphics\/tunisia600.jpg?uuid=1_yuLs2LEeSHME9HNBbnWQ","5":"http:\/\/img.washingtonpost.com\/rf\/image_60x60\/2010-2019\/WashingtonPost\/2015\/03\/18\/Interactivity\/Images\/339633.jpg","6":"http:\/\/img.washingtonpost.com\/rf\/image_138x92\/2010-2019\/WashingtonPost\/2015\/03\/18\/Interactivity\/Images\/339633.jpg","7":"http:\/\/img.washingtonpost.com\/rf\/image_60x60\/2010-2019\/WashingtonPost\/2015\/03\/18\/Foreign\/Images\/Nic6429732-6547.jpg","8":"http:\/\/img.washingtonpost.com\/rf\/image_138x92\/2010-2019\/WashingtonPost\/2015\/03\/18\/Foreign\/Images\/Nic6429732-6547.jpg","9":"http:\/\/img.washingtonpost.com\/rf\/image_60x60\/2010-2019\/Wires\/Videos\/2015-03-18\/AP\/Images\/TS-Was8911803.jpg","10":"http:\/\/img.washingtonpost.com\/rf\/image_138x92\/2010-2019\/Wires\/Videos\/2015-03-18\/AP\/Images\/TS-Was8911803.jpg","11":"http:\/\/img.washingtonpost.com\/rf\/image_60x60\/WashingtonPost\/Content\/Blogs\/worldviews\/201503\/Images\/2015-03-18T000133Z_01_NIR01_RTRIDSP_3_ISRAEL-ELECTION.jpg","12":"http:\/\/img.washingtonpost.com\/rf\/image_138x92\/WashingtonPost\/Content\/Blogs\/worldviews\/201503\/Images\/2015-03-18T000133Z_01_NIR01_RTRIDSP_3_ISRAEL-ELECTION.jpg","13":"http:\/\/img.washingtonpost.com\/rf\/image_60x60\/2010-2019\/Wires\/Videos\/201503\/Reuters\/Images\/Singapore_Lee_Kuan_Yew-0934c.jpg","14":"http:\/\/img.washingtonpost.com\/rf\/image_138x92\/2010-2019\/Wires\/Videos\/201503\/Reuters\/Images\/Singapore_Lee_Kuan_Yew-0934c.jpg","17":"http:\/\/img.washingtonpost.com\/rf\/image_60x60\/2010-2019\/WashingtonPost\/2015\/03\/18\/National-Economy\/Images\/Nic6429750-1138.jpg","18":"http:\/\/img.washingtonpost.com\/rf\/image_138x92\/2010-2019\/WashingtonPost\/2015\/03\/18\/National-Economy\/Images\/Nic6429750-1138.jpg","19":"http:\/\/img.washingtonpost.com\/rf\/image_60x60\/2010-2019\/WashingtonPost\/2015\/03\/18\/Foreign\/Images\/Singapore_Lee_Kuan_Yew-0934c-6506.jpg","20":"http:\/\/img.washingtonpost.com\/rf\/image_138x92\/2010-2019\/WashingtonPost\/2015\/03\/18\/Foreign\/Images\/Singapore_Lee_Kuan_Yew-0934c-6506.jpg","21":"http:\/\/img.washingtonpost.com\/rf\/image_60x60\/2010-2019\/WashingtonPost\/2015\/03\/18\/Foreign\/Images\/Nic6429927.jpg","22":"http:\/\/img.washingtonpost.com\/rf\/image_138x92\/2010-2019\/WashingtonPost\/2015\/03\/18\/Foreign\/Images\/Nic6429927.jpg","23":"http:\/\/img.washingtonpost.com\/rf\/image_60x60\/2010-2019\/WashingtonPost\/2015\/03\/18\/Foreign\/Images\/2015-03-18T004343Z_01_JER149_RTRIDSP_3_ISRAEL-ELECTION-6494.jpg","24":"http:\/\/img.washingtonpost.com\/rf\/image_138x92\/2010-2019\/WashingtonPost\/2015\/03\/18\/Foreign\/Images\/2015-03-18T004343Z_01_JER149_RTRIDSP_3_ISRAEL-ELECTION-6494.jpg","25":"http:\/\/img.washingtonpost.com\/wp-apps\/imrs.php?src=https:\/\/assets.rbl.ms\/605929\/300x.jpg&h=60&w=60","26":"http:\/\/img.washingtonpost.com\/wp-apps\/imrs.php?src=https:\/\/assets.rbl.ms\/605929\/300x.jpg&h=92&w=138","27":"http:\/\/img.washingtonpost.com\/wp-apps\/imrs.php?src=https:\/\/media2.wnyc.org\/i\/1500\/1053\/c\/80\/1\/Rodner_Figueroa_getty.jpg&h=60&w=60","28":"http:\/\/img.washingtonpost.com\/wp-apps\/imrs.php?src=https:\/\/media2.wnyc.org\/i\/1500\/1053\/c\/80\/1\/Rodner_Figueroa_getty.jpg&h=92&w=138","29":"http:\/\/img.washingtonpost.com\/wp-apps\/imrs.php?src=https:\/\/timedotcom.files.wordpress.com\/2015\/03\/hillary-clinton8.jpg?quality=65&strip=color&w=150&h=60&w=60","30":"http:\/\/img.washingtonpost.com\/wp-apps\/imrs.php?src=https:\/\/timedotcom.files.wordpress.com\/2015\/03\/hillary-clinton8.jpg?quality=65&strip=color&w=150&h=92&w=138","31":"http:\/\/cdn.arenaconnect.arkadiumhosted.com\/games-storage\/sudoku\/img\/148x148.png","32":"http:\/\/cdn.arenaconnect.arkadiumhosted.com\/games-storage\/daily-crossword\/img\/148x148.png","33":"http:\/\/cdn.arenaconnect.arkadiumhosted.com\/games-storage\/masque-just-words\/img\/148x148.png","34":"http:\/\/cdn.arenaconnect.arkadiumhosted.com\/games-storage\/html5-trizzle\/img\/292x194.png"} \ No newline at end of file diff --git a/test/test-pages/wapo-2/expected-images.json b/test/test-pages/wapo-2/expected-images.json new file mode 100644 index 0000000..5279026 --- /dev/null +++ b/test/test-pages/wapo-2/expected-images.json @@ -0,0 +1 @@ +{"0":"http:\/\/img.washingtonpost.com\/rw\/2010-2019\/WashingtonPost\/2015\/03\/18\/National-Economy\/Images\/Nic6429750-1138.jpg","1":"https:\/\/img.washingtonpost.com\/rf\/image_400w\/2010-2019\/WashingtonPost\/2015\/03\/18\/National-Economy\/Images\/Nic6429750-1140.jpg?uuid=zLIZQs2KEeSip5UXo6cFBg","2":"http:\/\/img.washingtonpost.com\/wp-apps\/imrs.php?src=http:\/\/www.washingtonpost.com\/blogs\/wonkblog\/files\/2014\/07\/mufson_steve.jpg&h=180&w=180","3":"http:\/\/img.washingtonpost.com\/rf\/image_60x60\/WashingtonPost\/Content\/Blogs\/energy-environment\/201503\/Images\/2012-02-07T003457Z_01_TOR105_RTRIDSP_3_USA.jpg","4":"http:\/\/img.washingtonpost.com\/rf\/image_138x92\/WashingtonPost\/Content\/Blogs\/energy-environment\/201503\/Images\/2012-02-07T003457Z_01_TOR105_RTRIDSP_3_USA.jpg","5":"http:\/\/img.washingtonpost.com\/rf\/image_60x60\/WashingtonPost\/Content\/Blogs\/wonkblog\/201503\/Images\/tumblr_n1zot9AsFt1s3dn7vo1_1280.png","6":"http:\/\/img.washingtonpost.com\/rf\/image_138x92\/WashingtonPost\/Content\/Blogs\/wonkblog\/201503\/Images\/tumblr_n1zot9AsFt1s3dn7vo1_1280.png","7":"http:\/\/img.washingtonpost.com\/rf\/image_60x60\/WashingtonPost\/Content\/Blogs\/wonkblog\/201503\/Images\/Was8907355.jpg","8":"http:\/\/img.washingtonpost.com\/rf\/image_138x92\/WashingtonPost\/Content\/Blogs\/wonkblog\/201503\/Images\/Was8907355.jpg","9":"http:\/\/img.washingtonpost.com\/rf\/image_60x60\/WashingtonPost\/Content\/Blogs\/wonkblog\/201503\/Images\/2015-03-05T143249Z_01_TOR117_RTRIDSP_3_KROGER-RESULTS.jpg","10":"http:\/\/img.washingtonpost.com\/rf\/image_138x92\/WashingtonPost\/Content\/Blogs\/wonkblog\/201503\/Images\/2015-03-05T143249Z_01_TOR117_RTRIDSP_3_KROGER-RESULTS.jpg","11":"http:\/\/img.washingtonpost.com\/rf\/image_60x60\/WashingtonPost\/Content\/Blogs\/the-switch\/201503\/Images\/Screen-Shot-2014-10-22-at-11.32.40.jpg","12":"http:\/\/img.washingtonpost.com\/rf\/image_138x92\/WashingtonPost\/Content\/Blogs\/the-switch\/201503\/Images\/Screen-Shot-2014-10-22-at-11.32.40.jpg","13":"http:\/\/img.washingtonpost.com\/rf\/image_60x60\/2010-2019\/WashingtonPost\/2015\/03\/18\/National-Economy\/Images\/Nic6429750-1138.jpg","14":"http:\/\/img.washingtonpost.com\/rf\/image_138x92\/2010-2019\/WashingtonPost\/2015\/03\/18\/National-Economy\/Images\/Nic6429750-1138.jpg","21":"http:\/\/img.washingtonpost.com\/rf\/image_60x60\/WashingtonPost\/Content\/Blogs\/wonkblog\/201503\/Images\/NCAA_South_Georgetowns_Past-0e6bf.jpg","22":"http:\/\/img.washingtonpost.com\/rf\/image_138x92\/WashingtonPost\/Content\/Blogs\/wonkblog\/201503\/Images\/NCAA_South_Georgetowns_Past-0e6bf.jpg","23":"http:\/\/img.washingtonpost.com\/wp-apps\/imrs.php?src=https:\/\/assets.rbl.ms\/605929\/300x.jpg&h=60&w=60","24":"http:\/\/img.washingtonpost.com\/wp-apps\/imrs.php?src=https:\/\/assets.rbl.ms\/605929\/300x.jpg&h=92&w=138","25":"http:\/\/img.washingtonpost.com\/wp-apps\/imrs.php?src=https:\/\/media2.wnyc.org\/i\/1500\/1053\/c\/80\/1\/Rodner_Figueroa_getty.jpg&h=60&w=60","26":"http:\/\/img.washingtonpost.com\/wp-apps\/imrs.php?src=https:\/\/media2.wnyc.org\/i\/1500\/1053\/c\/80\/1\/Rodner_Figueroa_getty.jpg&h=92&w=138","27":"http:\/\/img.washingtonpost.com\/wp-apps\/imrs.php?src=https:\/\/timedotcom.files.wordpress.com\/2015\/03\/terry-mcauliffe.jpg?quality=65&strip=color&w=150&h=60&w=60","28":"http:\/\/img.washingtonpost.com\/wp-apps\/imrs.php?src=https:\/\/timedotcom.files.wordpress.com\/2015\/03\/terry-mcauliffe.jpg?quality=65&strip=color&w=150&h=92&w=138","29":"http:\/\/cdn.arenaconnect.arkadiumhosted.com\/games-storage\/sudoku\/img\/148x148.png","30":"http:\/\/cdn.arenaconnect.arkadiumhosted.com\/games-storage\/daily-crossword\/img\/148x148.png","31":"http:\/\/cdn.arenaconnect.arkadiumhosted.com\/games-storage\/masque-just-words\/img\/148x148.png","32":"http:\/\/cdn.arenaconnect.arkadiumhosted.com\/games-storage\/html5-trizzle\/img\/292x194.png"} \ No newline at end of file diff --git a/test/test-pages/webmd-1/expected-images.json b/test/test-pages/webmd-1/expected-images.json new file mode 100644 index 0000000..a83cf0d --- /dev/null +++ b/test/test-pages/webmd-1/expected-images.json @@ -0,0 +1 @@ +{"0":"http:\/\/img.webmd.com\/dtmcms\/live\/webmd\/consumer_assets\/site_images\/miscellaneous\/webmd_FB_logo_bluebkgrd.jpg?v=1","1":"http:\/\/img.webmd.com\/dtmcms\/live\/webmd\/consumer_assets\/site_images\/layout\/shared\/divider_breadcrumb.gif","3":"http:\/\/img.webmd.com\/dtmcms\/live\/webmd\/consumer_assets\/site_images\/modules\/article\/readspeaker\/listen_en_us.gif","4":"http:\/\/img.webmd.com\/dtmcms\/live\/webmd\/consumer_assets\/site_images\/article_thumbnails\/news\/2013\/01_2013\/cdc_flu_update_02012013xml\/110x70_cdc_flu_update_02012013xml.jpg","5":"http:\/\/img.webmd.com\/dtmcms\/live\/webmd\/consumer_assets\/site_images\/article_thumbnails\/slideshows\/nasal_allergy_relief\/110x70_nasal_allergy_relief.jpg","6":"http:\/\/img.webmd.com\/dtmcms\/live\/webmd\/consumer_assets\/site_images\/article_thumbnails\/health_checks\/pet_health_evaluator_healthcheck\/110x70_pet_health_evaluator_cat_healthcheck.jpg","7":"http:\/\/img.webmd.com\/dtmcms\/live\/webmd\/consumer_assets\/site_images\/article_thumbnails\/slideshows\/allergy_triggers_slideshow\/110x70_allergy_triggers_slideshow.jpg","8":"http:\/\/img.webmd.com\/dtmcms\/live\/webmd\/consumer_assets\/site_images\/article_thumbnails\/features\/_2012\/04_2012\/truth_about_mucus_features\/69x75_truth_about_mucus_features.jpg","9":"http:\/\/img.webmd.com\/dtmcms\/live\/webmd\/consumer_assets\/site_images\/article_thumbnails\/news\/2011\/11_2011\/acne_and_sore_throat\/69x75_acne_and_sore_throat.jpg","10":"http:\/\/img.webmd.com\/dtmcms\/live\/webmd\/consumer_assets\/site_images\/article_thumbnails\/news\/2012\/08_2012\/new_tickborne_disease\/69x75_new_tickborne_disease.jpg","11":"http:\/\/img.webmd.com\/dtmcms\/live\/webmd\/consumer_assets\/site_images\/article_thumbnails\/slideshows\/12_natural_ways_to_defeat_allergies\/69x75_12_natural_ways_to_defeat_allergies.jpg","12":"http:\/\/img.webmd.com\/dtmcms\/live\/webmd\/consumer_assets\/site_images\/article_thumbnails\/features\/_2011\/11_2011\/healthy_cat_gift_guide_features\/69x75_healthy_cat_gift_guide_features.jpg","13":"http:\/\/img.webmd.com\/dtmcms\/live\/webmd\/consumer_assets\/site_images\/article_thumbnails\/reference_guide\/allergy_tests_ref_guide\/69x75_allergy_tests_ref_guide.jpg","14":"http:\/\/img.webmd.com\/dtmcms\/live\/webmd\/consumer_assets\/site_images\/article_thumbnails\/health_checks\/allergy_sinus_symptoms_evaluator\/69x75_allergy_sinus_symptoms_evaluator.jpg","15":"http:\/\/img.webmd.com\/dtmcms\/live\/webmd\/consumer_assets\/site_images\/article_thumbnails\/quizzes\/indoor_allergies_rmq\/69x75_indoor_allergies_rmq.jpg"} \ No newline at end of file diff --git a/test/test-pages/webmd-2/expected-images.json b/test/test-pages/webmd-2/expected-images.json new file mode 100644 index 0000000..702245e --- /dev/null +++ b/test/test-pages/webmd-2/expected-images.json @@ -0,0 +1 @@ +{"0":"http:\/\/img.webmd.com\/dtmcms\/live\/webmd\/consumer_assets\/site_images\/miscellaneous\/webmd_FB_logo_bluebkgrd.jpg?v=1","1":"http:\/\/img.webmd.com\/dtmcms\/live\/webmd\/consumer_assets\/site_images\/layout\/shared\/divider_breadcrumb.gif","2":"http:\/\/img.webmd.com\/dtmcms\/live\/webmd\/consumer_assets\/site_images\/modules\/article\/readspeaker\/listen_en_us.gif","3":"http:\/\/img.webmd.com\/dtmcms\/live\/webmd\/consumer_assets\/site_images\/dam\/editorial\/infectious-disease\/cold-and-flu\/dirty-truth-handwashing\/graphics\/thumbnails\/final\/dirty-truth-handwashing-48x48.jpg","4":"http:\/\/img.webmd.com\/dtmcms\/live\/webmd\/consumer_assets\/site_images\/modules\/multi_watch.jpg","5":"http:\/\/img.webmd.com\/dtmcms\/live\/webmd\/consumer_assets\/site_images\/dam\/editorial\/dental-health\/miscellaneous\/causes-cures-bad-breath\/graphics\/thumbnails\/final\/causes-cures-bad-breath-48x48.jpg","7":"http:\/\/img.webmd.com\/dtmcms\/live\/webmd\/consumer_assets\/site_images\/dam\/editorial\/dental-health\/miscellaneous\/dangers-gum-disease\/graphics\/thumbnails\/final\/dangers-gum-disease-48x48.jpg","9":"http:\/\/img.webmd.com\/dtmcms\/live\/webmd\/consumer_assets\/site_images\/dam\/editorial\/skin-and-beauty\/beauty\/hair-care-shampoo\/graphics\/thumbnails\/final\/hair-care-shampoo-48x48.jpg","11":"http:\/\/img.webmd.com\/dtmcms\/live\/webmd\/consumer_assets\/site_images\/dam\/editorial\/dental-health\/healthy-teeth\/graphics\/thumbnails\/final\/vd-2125-wrld-3001-48x48.jpg"} \ No newline at end of file diff --git a/test/test-pages/wikia/expected-images.json b/test/test-pages/wikia/expected-images.json new file mode 100644 index 0000000..b3a55db --- /dev/null +++ b/test/test-pages/wikia/expected-images.json @@ -0,0 +1 @@ +{"0":"https:\/\/vignette.wikia.nocookie.net\/1fb5ee36-d9ae-4125-96d9-f52eb403f1c9\/thumbnail-down\/width\/1280\/height\/720","1":"http:\/\/fandom.wikia.com\/wp-content\/themes\/upstream\/dist\/svg\/logo-fandom-tagline.svg?v=2","2":"http:\/\/fandom.wikia.com\/wp-content\/themes\/upstream\/dist\/svg\/logo-fandom.svg","3":"http:\/\/fandom.wikia.com\/wp-content\/themes\/upstream\/dist\/svg\/icon-sign-in-white.svg?v=2","4":"http:\/\/fandom.wikia.com\/wp-content\/themes\/upstream\/dist\/svg\/icon-sign-in-white.svg","5":"http:\/\/fandom.wikia.com\/wp-content\/themes\/upstream\/dist\/svg\/icon-sign-in-gray.svg","7":"http:\/\/fandom.wikia.com\/wp-content\/themes\/upstream\/dist\/svg\/tagline-powered-by-wikia.svg?v=2","8":"http:\/\/static.wikia.nocookie.net\/6eef5bc5-c8a0-4da1-9ba1-066e6b2cf8e9","9":"https:\/\/vignette.wikia.nocookie.net\/e80dae8a-b955-43f6-8ada-f023385e622b\/scale-to-width-down\/627","10":"https:\/\/vignette.wikia.nocookie.net\/375e0e5a-170d-4560-8f20-240c9f0624e9\/scale-to-width-down\/627","11":"https:\/\/vignette.wikia.nocookie.net\/1fb5ee36-d9ae-4125-96d9-f52eb403f1c9\/scale-to-width-down\/627","13":"https:\/\/vignette.wikia.nocookie.net\/e8cd4f01-7b93-4520-96b9-0d0fff411644\/thumbnail-down\/width\/400\/height\/225","14":"http:\/\/static.wikia.nocookie.net\/8a35b75d-730b-45ae-9d87-ba78fde72959","15":"https:\/\/vignette.wikia.nocookie.net\/2d27def1-0a6b-4471-af7e-0acdcbfe4cdc\/thumbnail-down\/width\/400\/height\/225","17":"https:\/\/vignette.wikia.nocookie.net\/e5d7165e-71ba-47a0-9bd3-e593cf469b9b\/thumbnail-down\/width\/400\/height\/225","18":"http:\/\/static.wikia.nocookie.net\/ece53f92-e810-4687-992b-021775864ccf","19":"https:\/\/vignette.wikia.nocookie.net\/174df909-103a-471f-b582-99119e64ef80\/thumbnail-down\/width\/400\/height\/225","20":"http:\/\/static.wikia.nocookie.net\/e3803b59-b3c1-4533-8513-9192f39310b8","21":"https:\/\/vignette.wikia.nocookie.net\/941e2129-fc45-4fd9-929b-eb133c1efd79\/thumbnail-down\/width\/400\/height\/225","22":"http:\/\/static.wikia.nocookie.net\/39101a69-3beb-49f5-a40c-21820b4cfd6d","23":"https:\/\/vignette.wikia.nocookie.net\/b968ada1-c2f4-4126-af61-286935e668cb\/thumbnail-down\/width\/400\/height\/225","24":"http:\/\/static.wikia.nocookie.net\/7d1b855e-a181-4f55-85a4-16045dd2549a","25":"https:\/\/vignette.wikia.nocookie.net\/b2eea388-4eb3-4532-9352-9a060c09e0b9\/thumbnail-down\/width\/400\/height\/225","27":"https:\/\/vignette.wikia.nocookie.net\/f0127060-55ef-43db-b0c3-51b0b1676ac9\/thumbnail-down\/width\/400\/height\/225","28":"http:\/\/static.wikia.nocookie.net\/cffee4db-9472-4d40-93d8-c42475e8444d","29":"https:\/\/vignette.wikia.nocookie.net\/ce87556c-22ef-4827-9002-bc0c9ca947a8\/thumbnail-down\/width\/400\/height\/225","30":"http:\/\/static.wikia.nocookie.net\/38cd0c49-d652-4950-8206-f85b3b804d23","31":"https:\/\/vignette.wikia.nocookie.net\/6463530d-2842-47c9-8ecd-58ea25b7f716\/thumbnail-down\/width\/400\/height\/225","33":"https:\/\/vignette.wikia.nocookie.net\/fc7164d9-06e8-4f91-9115-1745783792da\/thumbnail-down\/width\/400\/height\/225","35":"https:\/\/vignette.wikia.nocookie.net\/96f9f44f-abb8-4125-b338-fcec161f6832\/thumbnail-down\/width\/400\/height\/225","36":"http:\/\/static.wikia.nocookie.net\/f3f8b3d4-5f2b-4de2-918d-460fe7349036","37":"http:\/\/beacon.krxd.net\/event.gif?event_id=KPRU8DbB&event_type=dmtc","38":"http:\/\/segs.btrll.com\/v1\/tpix\/-\/-\/-\/-\/-\/sid.6614149"} \ No newline at end of file diff --git a/test/test-pages/wikipedia/expected-images.json b/test/test-pages/wikipedia/expected-images.json new file mode 100644 index 0000000..de03bda --- /dev/null +++ b/test/test-pages/wikipedia/expected-images.json @@ -0,0 +1 @@ +["http:\/\/upload.wikimedia.org\/wikipedia\/commons\/thumb\/5\/5c\/Mozilla_dinosaur_head_logo.png\/200px-Mozilla_dinosaur_head_logo.png","http:\/\/upload.wikimedia.org\/wikipedia\/commons\/thumb\/7\/76\/Mozilla_Firefox_logo_2013.svg\/220px-Mozilla_Firefox_logo_2013.svg.png","http:\/\/upload.wikimedia.org\/wikipedia\/commons\/0\/0d\/SeaMonkey.png","http:\/\/upload.wikimedia.org\/wikipedia\/commons\/thumb\/d\/d7\/Buggie.svg\/220px-Buggie.svg.png","http:\/\/upload.wikimedia.org\/wikipedia\/commons\/thumb\/2\/2a\/London_Mozilla_Workspace.jpg\/220px-London_Mozilla_Workspace.jpg","http:\/\/upload.wikimedia.org\/wikipedia\/commons\/thumb\/0\/0b\/Mozilla_Reps.png\/220px-Mozilla_Reps.png","http:\/\/upload.wikimedia.org\/wikipedia\/commons\/thumb\/e\/e4\/Fireside_Chat%2C_Knight%27s_Michael_Maness_and_Dan_Sinker_-_Flickr_-_Knight_Foundation.jpg\/220px-Fireside_Chat%2C_Knight%27s_Michael_Maness_and_Dan_Sinker_-_Flickr_-_Knight_Foundation.jpg","http:\/\/upload.wikimedia.org\/wikipedia\/commons\/thumb\/3\/31\/Free_and_open-source_software_logo_%282009%29.svg\/28px-Free_and_open-source_software_logo_%282009%29.svg.png","http:\/\/upload.wikimedia.org\/wikipedia\/en\/thumb\/4\/4a\/Commons-logo.svg\/30px-Commons-logo.svg.png","http:\/\/upload.wikimedia.org\/wikipedia\/commons\/thumb\/8\/89\/Symbol_book_class2.svg\/16px-Symbol_book_class2.svg.png","http:\/\/upload.wikimedia.org\/wikipedia\/en\/thumb\/4\/48\/Folder_Hexagonal_Icon.svg\/16px-Folder_Hexagonal_Icon.svg.png","http:\/\/upload.wikimedia.org\/wikipedia\/en\/thumb\/4\/4a\/Commons-logo.svg\/12px-Commons-logo.svg.png","http:\/\/upload.wikimedia.org\/wikipedia\/en\/thumb\/f\/fd\/Portal-puzzle.svg\/16px-Portal-puzzle.svg.png"] \ No newline at end of file diff --git a/test/test-pages/wordpress/expected-images.json b/test/test-pages/wordpress/expected-images.json new file mode 100644 index 0000000..2e90609 --- /dev/null +++ b/test/test-pages/wordpress/expected-images.json @@ -0,0 +1 @@ +["https:\/\/i0.wp.com\/wptavern.com\/wp-content\/uploads\/2016\/07\/stack-overflow.png?fit=1200%2C470&ssl=1","https:\/\/i0.wp.com\/wptavern.com\/wp-content\/uploads\/2016\/07\/stack-overflow.png?resize=1025%2C401&ssl=1","https:\/\/i2.wp.com\/wptavern.com\/wp-content\/uploads\/2017\/03\/ChangesinDemand.png?resize=975%2C1115&ssl=1","https:\/\/i1.wp.com\/wptavern.com\/wp-content\/uploads\/2017\/03\/HighDemand.png?resize=975%2C854&ssl=1","https:\/\/pixel.wp.com\/g.gif?v=ext&j=1%3A4.7.1&blog=9006382&post=67202&tz=-4&srv=wptavern.com&host=wptavern.com&ref=&rand=0.17647913145625704","https:\/\/wptavern.com\/wp-content\/plugins\/jetpack\/modules\/sharedaddy\/images\/loading.gif","https:\/\/www.gstatic.com\/images\/branding\/product\/1x\/translate_24dp.png"] \ No newline at end of file diff --git a/test/test-pages/yahoo-1/expected-images.json b/test/test-pages/yahoo-1/expected-images.json new file mode 100644 index 0000000..98c0a68 --- /dev/null +++ b/test/test-pages/yahoo-1/expected-images.json @@ -0,0 +1 @@ +["http:\/\/l3.yimg.com\/uu\/api\/res\/1.2\/4eRCPf9lJt_3q29.outekQ--\/aD02Njk7dz03NDQ7c209MTthcHBpZD15dGFjaHlvbg--\/http:\/\/media.zenfs.com\/en\/homerun\/feed_manager_auto_publish_494\/4406ef57dcb40376c513903b03bef048","http:\/\/l4.yimg.com\/uu\/api\/res\/1.2\/CR1v_hSPghpHrl0a4OKYqQ--\/YXBwaWQ9eXRhY2h5b24-\/https:\/\/media.zenfs.com\/creatr-images\/GLB\/2016-08-26\/7ac3a4f0-6bba-11e6-b52d-c59238e28a69_yahoologo.png","http:\/\/l1.yimg.com\/ny\/api\/res\/1.2\/589noY9BZNdmsUUQf6L1AQ--\/YXBwaWQ9aGlnaGxhbmRlcjtzbT0xO3c9NzQ0O2g9NjY5\/http:\/\/media.zenfs.com\/en\/homerun\/feed_manager_auto_publish_494\/4406ef57dcb40376c513903b03bef048","https:\/\/s.yimg.com\/g\/images\/spaceball.gif"] \ No newline at end of file diff --git a/test/test-pages/yahoo-2/expected-images.json b/test/test-pages/yahoo-2/expected-images.json new file mode 100644 index 0000000..e3df277 --- /dev/null +++ b/test/test-pages/yahoo-2/expected-images.json @@ -0,0 +1 @@ +{"0":"https:\/\/s.yimg.com\/os\/mit\/media\/m\/social\/images\/social_default_logo-1481777.png","1":"http:\/\/fakehost\/sy\/uu\/api\/res\/1.2\/x_tcbZOygbxFrzTIs.Wb2w--\/Zmk9c3RyaW07aD0zNzg7cHlvZmY9MDtxPTk1O3c9NjcyO3NtPTE7YXBwaWQ9eXRhY2h5b24-\/https:\/\/s.yimg.com\/lo\/api\/res\/1.2\/OsxooCBCKqU8KAJk0gF.Tg--%7EC\/Zmk9Zml0O3c9MTQwNDtoPTc1NjtweG9mZj0wO3B5b2ZmPTA7O2FwcGlkPXByb2Rlc2sy\/https:\/\/media.zenfs.com\/creatr-images\/GLB\/2016-12-01\/077ba5f0-b805-11e6-9597-bb741f194c03_Syed-Farook-nh.jpg.cf.jpg","2":"http:\/\/fakehost\/sy\/uu\/api\/res\/1.2\/c_5m.q81fhiHR_OtVl4LXg--\/Zmk9c3RyaW07aD0yMzI7cHlvZmY9MDtxPTgwO3c9NDEyO3NtPTE7YXBwaWQ9eXRhY2h5b24-\/https:\/\/s.yimg.com\/lo\/api\/res\/1.2\/Kwclfh9FhnLRkXCAxyhr1A--%7EC\/Zmk9Zml0O3c9MTQwNDtoPTc1NjtweG9mZj0wO3B5b2ZmPTA7O2FwcGlkPXByb2Rlc2sy\/https:\/\/media.zenfs.com\/creatr-images\/GLB\/2016-12-01\/60b4c4f0-b821-11e6-a523-df2c277ad130_carrier3.jpg.cf.jpg","3":"http:\/\/fakehost\/sy\/uu\/api\/res\/1.2\/nRoupSfchK9xlLaGwfqbrQ--\/Zmk9c3RyaW07aD0yMzI7cHlvZmY9MDtxPTgwO3c9NDEyO3NtPTE7YXBwaWQ9eXRhY2h5b24-\/https:\/\/s.yimg.com\/lo\/api\/res\/1.2\/jBk_K19QS_R2N6cv1Jo.nQ--%7EC\/Zmk9Zml0O3c9MTQwNDtoPTc1NjtweG9mZj0wO3B5b2ZmPTA7O2FwcGlkPXByb2Rlc2sy\/https:\/\/media.zenfs.com\/creatr-images\/GLB\/2016-12-01\/578a60f0-b7e1-11e6-84d7-afd41e7a457a_cereal-nh.jpg.cf.jpg","4":"http:\/\/fakehost\/sy\/uu\/api\/res\/1.2\/lBBjVn8yZTJMYlmvXks9kg--\/Zmk9c3RyaW07aD0yMzI7cHlvZmY9MDtxPTgwO3c9NDEyO3NtPTE7YXBwaWQ9eXRhY2h5b24-\/https:\/\/s.yimg.com\/lo\/api\/res\/1.2\/9ybaU0mkis_MEU8KEXWu_A--%7EC\/Zmk9Zml0O3c9MTQwNDtoPTc1NjtweG9mZj0wO3B5b2ZmPTA7O2FwcGlkPXByb2Rlc2sy\/https:\/\/media.zenfs.com\/creatr-images\/GLB\/2016-12-01\/b1c4e1d0-b7e6-11e6-acc0-99cee64c4025_gross-nh.jpg.cf.jpg","5":"http:\/\/fakehost\/sy\/uu\/api\/res\/1.2\/mvvblrt5TPrXbJGw4wICFg--\/Zmk9c3RyaW07aD0yMzI7cHlvZmY9MDtxPTgwO3c9NDEyO3NtPTE7YXBwaWQ9eXRhY2h5b24-\/https:\/\/s.yimg.com\/lo\/api\/res\/1.2\/T0TYaaPEXUYHNHelmXiDtA--%7EC\/Zmk9Zml0O3c9MTQwNDtoPTc1NjtweG9mZj0wO3B5b2ZmPTA7O2FwcGlkPXByb2Rlc2sy\/https:\/\/media.zenfs.com\/creatr-images\/GLB\/2016-12-01\/8d3461c0-b7db-11e6-8d5b-f33bfbc5e37a_papini.jpeg","6":"http:\/\/fakehost\/sy\/uu\/api\/res\/1.2\/cpzruLLm4bRxs_goFjFgag--\/Zmk9c3RyaW07aD0yMzI7cHlvZmY9MDtxPTgwO3c9NDEyO3NtPTE7YXBwaWQ9eXRhY2h5b24-\/https:\/\/s.yimg.com\/lo\/api\/res\/1.2\/o6yR2afc_lIt7zODYEEznA--%7EC\/Zmk9Zml0O3c9MTQwNDtoPTc1NjtweG9mZj0wO3B5b2ZmPTA7O2FwcGlkPXByb2Rlc2sy\/https:\/\/media.zenfs.com\/creatr-images\/GLB\/2016-12-01\/90778330-b7f9-11e6-8883-b573f7cbf769_trump-nh.jpg.cf.jpg","7":"http:\/\/fakehost\/sy\/uu\/api\/res\/1.2\/U6EahToM6GV_v_u7IMQLTA--\/Zmk9c3RyaW07aD0yMzI7cHlvZmY9MDtxPTgwO3c9NDEyO3NtPTE7YXBwaWQ9eXRhY2h5b24-\/https:\/\/s.yimg.com\/lo\/api\/res\/1.2\/bMDj1RG6kYv1ci1TJb3hRw--%7EC\/Zmk9Zml0O3c9MTQwNDtoPTc1NjtweG9mZj0wO3B5b2ZmPTA7O2FwcGlkPXByb2Rlc2sy\/https:\/\/media.zenfs.com\/creatr-images\/GLB\/2016-12-01\/7e6aa6f0-b7f8-11e6-8713-53806c57f2fe_AP_16189675823987.jpg.cf.jpg","8":"https:\/\/s.yimg.com\/g\/images\/spaceball.gif","20":"http:\/\/fakehost\/sy\/uu\/api\/res\/1.2\/rjByh6oEyxjzJF.bBGQncw--\/Zmk9c3RyaW07aD0xOTM7cHlvZmY9MDtxPTgwO3c9MjIwO3NtPTE7YXBwaWQ9eXRhY2h5b24-\/http:\/\/media.zenfs.com\/en_us\/News\/ap_webfeeds\/c9375de6acbf4c168be78ffe4ec71ea9.jpg.cf.jpg","21":"http:\/\/fakehost\/sy\/uu\/api\/res\/1.2\/VTyRBpgI2e0bCUnxX5Rgpg--\/Zmk9c3RyaW07aD03MjtweW9mZj0wO3E9ODA7dz03MjtzbT0xO2FwcGlkPXl0YWNoeW9u\/http:\/\/slingstone.zenfs.com\/offnetwork\/9a3eaeb44ce43eb0e6bbd26e3b0b34f3","22":"http:\/\/fakehost\/sy\/uu\/api\/res\/1.2\/RZdz1Te.05Qfasd5Wfbp8Q--\/Zmk9c3RyaW07aD03MjtweW9mZj0wO3E9ODA7dz03MjtzbT0xO2FwcGlkPXl0YWNoeW9u\/http:\/\/slingstone.zenfs.com\/offnetwork\/3e956d3b2b73c0d66b383324df4e3d8c","23":"http:\/\/fakehost\/sy\/uu\/api\/res\/1.2\/2SChHrdWdg0VIIG0njDK4g--\/Zmk9c3RyaW07aD0xMjM7cHlvZmY9MDtxPTgwO3c9MjIwO3NtPTE7YXBwaWQ9eXRhY2h5b24-\/https:\/\/s.yimg.com\/av\/moneyball\/ads\/1476367816981-4689.jpg.cf.jpg","24":"http:\/\/fakehost\/sy\/uu\/api\/res\/1.2\/suS69_6DnHbDv..IcWI.Kg--\/Zmk9c3RyaW07aD0xOTM7cHlvZmY9MDtxPTgwO3c9MjIwO3NtPTE7YXBwaWQ9eXRhY2h5b24-\/http:\/\/media.zenfs.com\/en-US\/homerun\/inside_edition\/f16f4eebbcfaa1b365e3a7fe7d93672d","25":"http:\/\/fakehost\/sy\/uu\/api\/res\/1.2\/__TdQihFmqPBav5eWGCEcg--\/Zmk9c3RyaW07aD03MjtweW9mZj0wO3E9ODA7dz03MjtzbT0xO2FwcGlkPXl0YWNoeW9u\/http:\/\/media.zenfs.com\/en-US\/homerun\/people_218\/4dd2b3063c6e44496b9c69d3e1c54883","26":"http:\/\/fakehost\/sy\/uu\/api\/res\/1.2\/MK46Tx1JrmKc8WJzn27pmA--\/Zmk9c3RyaW07aD03MjtweW9mZj0wO3E9ODA7dz03MjtzbT0xO2FwcGlkPXl0YWNoeW9u\/http:\/\/media.zenfs.com\/en-US\/homerun\/people_218\/bc00455c8724c57de05799b959b551b8","27":"http:\/\/fakehost\/sy\/uu\/api\/res\/1.2\/mzvz49PUTQK.b.__Gd9HKw--\/Zmk9c3RyaW07aD0xOTM7cHlvZmY9MDtxPTgwO3c9MjIwO3NtPTE7YXBwaWQ9eXRhY2h5b24-\/http:\/\/media.zenfs.com\/en_us\/News\/afp.com\/f36535039305b6d05ae5610fc7a562994c6650be.jpg.cf.jpg","28":"http:\/\/fakehost\/sy\/uu\/api\/res\/1.2\/XlgPAviFNQEKyX6WQAoWFw--\/Zmk9c3RyaW07aD03MjtweW9mZj0wO3E9ODA7dz03MjtzbT0xO2FwcGlkPXl0YWNoeW9u\/http:\/\/slingstone.zenfs.com\/offnetwork\/89842973b3907e6a0c42f2a9895f84b6","29":"http:\/\/fakehost\/sy\/uu\/api\/res\/1.2\/gHqKy_nouHib387jGHw1dQ--\/Zmk9c3RyaW07aD03MjtweW9mZj0wO3E9ODA7dz03MjtzbT0xO2FwcGlkPXl0YWNoeW9u\/http:\/\/slingstone.zenfs.com\/offnetwork\/82f7c8b7a0eb9a29b9efdbf41ea15421","30":"https:\/\/s.yimg.com\/uu\/api\/res\/1.2\/YtynaEAbpOhfQOgYxXwGDw--\/Zmk9c3RyaW07aD0xMjM7cHlvZmY9MDtxPTgwO3c9MjIwO3NtPTE7YXBwaWQ9eXRhY2h5b24-\/https:\/\/s.yimg.com\/av\/moneyball\/ads\/1471893837023-4662.jpg.cf.jpg","31":"http:\/\/fakehost\/sy\/uu\/api\/res\/1.2\/jcmVppAqfnBmP1zyTTaExQ--\/Zmk9c3RyaW07aD0xOTM7cHlvZmY9MDtxPTgwO3c9MjIwO3NtPTE7YXBwaWQ9eXRhY2h5b24-\/http:\/\/media.zenfs.com\/en-US\/homerun\/inside_edition\/04d1fa12b56c43046d626647c13c9aa9","32":"http:\/\/fakehost\/sy\/uu\/api\/res\/1.2\/ahAFtiKFxn_LEExDOHz1NQ--\/Zmk9c3RyaW07aD03MjtweW9mZj0wO3E9ODA7dz03MjtzbT0xO2FwcGlkPXl0YWNoeW9u\/http:\/\/slingstone.zenfs.com\/offnetwork\/f6d005d2b9903476d617f5334994c495","33":"http:\/\/fakehost\/sy\/uu\/api\/res\/1.2\/OkQYQfNP8HwVQ4xIznTP4A--\/Zmk9c3RyaW07aD03MjtweW9mZj0wO3E9ODA7dz03MjtzbT0xO2FwcGlkPXl0YWNoeW9u\/http:\/\/slingstone.zenfs.com\/offnetwork\/d17e8a97bc2e48d9694b996c217c47b2","34":"http:\/\/fakehost\/sy\/uu\/api\/res\/1.2\/wTn7e9Tfw3gF4fgyGvZwaA--\/Zmk9c3RyaW07aD0xMjM7cHlvZmY9MDtxPTgwO3c9MjIwO3NtPTE7YXBwaWQ9eXRhY2h5b24-\/http:\/\/media.zenfs.com\/en_us\/News\/ap_webfeeds\/217b21b588ac490fbb6c681e454dd4f0.jpg.cf.jpg","35":"http:\/\/fakehost\/sy\/uu\/api\/res\/1.2\/3ta3kmZHJjA.Rg6dZ8WOZw--\/Zmk9c3RyaW07aD0xOTM7cHlvZmY9MDtxPTgwO3c9MjIwO3NtPTE7YXBwaWQ9eXRhY2h5b24-\/http:\/\/media.zenfs.com\/en_us\/News\/afp.com\/Part-HKG-Hkg9294085-1-1-0.jpg.cf.jpg","36":"http:\/\/fakehost\/sy\/uu\/api\/res\/1.2\/FqmXgpRbQeFUeyOFvNSGlQ--\/Zmk9c3RyaW07aD03MjtweW9mZj0wO3E9ODA7dz03MjtzbT0xO2FwcGlkPXl0YWNoeW9u\/http:\/\/media.zenfs.com\/en_us\/News\/Reuters\/2016-12-01T091822Z_1_LYNXMPECB027A_RTROPTP_2_KOREA-NORTH-USA.JPG.cf.jpg","37":"http:\/\/fakehost\/sy\/uu\/api\/res\/1.2\/Lou5neeIrq52Z5SKFYvzLw--\/Zmk9c3RyaW07aD03MjtweW9mZj0wO3E9ODA7dz03MjtzbT0xO2FwcGlkPXl0YWNoeW9u\/http:\/\/d.yimg.com\/hd\/afpus\/2\/4\/19ce7942-245c-4b74-998a-881e7a64514f__1Wz8TBxC8KCxkQXSdMoBIshbTJBEOtaWP3nvpIa7CzO0i2t3oKyRsiYnZLmryU8RZkA2Sgyquk-_2_0.jpg?s=cdbf80bd3a19e34fb6cca7a7460caab0&c=1d9843bde92c11fbb433931538cb85d6&a=afpus&mr=0","38":"http:\/\/fakehost\/sy\/uu\/api\/res\/1.2\/oGO0fqiXxjVPOA9wK1rQLg--\/Zmk9c3RyaW07aD0xOTM7cHlvZmY9MDtxPTgwO3c9MjIwO3NtPTE7YXBwaWQ9eXRhY2h5b24-\/http:\/\/media.zenfs.com\/en\/homerun\/feed_manager_auto_publish_494\/8fd4a9cd64a49401c32b71f260825423","39":"http:\/\/fakehost\/sy\/uu\/api\/res\/1.2\/8XM7mUBfaFZco.tTMGVaTw--\/Zmk9c3RyaW07aD03MjtweW9mZj0wO3E9ODA7dz03MjtzbT0xO2FwcGlkPXl0YWNoeW9u\/http:\/\/globalfinance.zenfs.com\/images\/US_AHTTP_REUTERS_OLUSBUS_WRAPPER_H_LIVE_NEW\/2016-11-30T170525Z_3_LYNXMPECAT0US_RTROPTP_3_USA-TRUMP_original.jpg.cf.jpg","40":"http:\/\/fakehost\/sy\/uu\/api\/res\/1.2\/VzRwftxZGNKSbcgJdy3.oQ--\/Zmk9c3RyaW07aD03MjtweW9mZj0wO3E9ODA7dz03MjtzbT0xO2FwcGlkPXl0YWNoeW9u\/http:\/\/slingstone.zenfs.com\/offnetwork\/f63b0c1a967def685914cec395952194","41":"http:\/\/fakehost\/sy\/uu\/api\/res\/1.2\/IlTIe2sWvV.uTCHYzOaU4A--\/Zmk9c3RyaW07aD0xMjM7cHlvZmY9MDtxPTgwO3c9MjIwO3NtPTE7YXBwaWQ9eXRhY2h5b24-\/http:\/\/media.zenfs.com\/en-US\/homerun\/the_drive_165\/d8b3b02b586e6f32a6b108fa7e058a7a.cf.jpg","42":"http:\/\/fakehost\/sy\/dh\/ap\/default\/160914\/kc2.jpg","43":"http:\/\/fakehost\/sy\/dh\/ap\/default\/160914\/mi2.jpg","44":"http:\/\/fakehost\/sy\/dh\/ap\/default\/160914\/mb2.jpg","45":"https:\/\/s.yimg.com\/wv\/images\/alphatar_100x100_J_kl_6-16.jpg","46":"https:\/\/s.yimg.com\/wv\/images\/alphatar_100x100_S_ald.jpg","48":"https:\/\/s.yimg.com\/os\/weather\/1.0.1\/dark_icon\/64x64\/scattered_showers_day_night@2x.png","50":"https:\/\/s.yimg.com\/os\/weather\/1.0.1\/dark_icon\/64x64\/mostly_cloudy_day_night@2x.png","52":"https:\/\/s.yimg.com\/os\/mit\/media\/m\/content_index\/images\/sidekick_tv_news-2e9c408.png","53":"https:\/\/s.yimg.com\/uu\/api\/res\/1.2\/9jHpA98Rf0k5TZel8HOVIQ--\/Zmk9c3RyaW07aD0yMjI7cHlvZmY9MDtxPTgwO3c9MzAwO3NtPTE7YXBwaWQ9eXRhY2h5b24-\/https:\/\/media.zenfs.com\/creatr-images\/GLB\/2016-12-01\/98baf690-b75d-11e6-b4d7-7549993b31c7_9c8af290-a06d-11e6-bdfd-4ddc0a732700_ford.gif","54":"https:\/\/s.yimg.com\/uu\/api\/res\/1.2\/JRMsDJSMu_vCqkLVkpnmiA--\/Zmk9c3RyaW07aD0yMjI7cHlvZmY9MDtxPTgwO3c9MzAwO3NtPTE7YXBwaWQ9eXRhY2h5b24-\/http:\/\/media.zenfs.com\/en_us\/News\/ap_webfeeds\/3bb2278df96048098ae83026310340c6.jpg","55":"https:\/\/s.yimg.com\/uu\/api\/res\/1.2\/iu5tNfG3zLOh3WEdqShqLA--\/Zmk9c3RyaW07aD0yMjI7cHlvZmY9MDtxPTgwO3c9MzAwO3NtPTE7YXBwaWQ9eXRhY2h5b24-\/http:\/\/media.zenfs.com\/en\/homerun\/feed_manager_auto_publish_494\/1512a9e6f903b7e1cd786f7fe2326866","56":"https:\/\/s.yimg.com\/uu\/api\/res\/1.2\/H58kB1kmKZsVS8b3uJ8LBw--\/Zmk9c3RyaW07aD0yMjI7cHlvZmY9MDtxPTgwO3c9MzAwO3NtPTE7YXBwaWQ9eXRhY2h5b24-\/https:\/\/media.zenfs.com\/creatr-images\/GLB\/2016-12-01\/64c09c20-b814-11e6-914d-b5ce7dec3223_GettyImages-627024488.jpg","57":"https:\/\/s.yimg.com\/uu\/api\/res\/1.2\/65p_9yZez6XxWsuj5irjXA--\/Zmk9c3RyaW07aD0yMjI7cHlvZmY9MDtxPTgwO3c9MzAwO3NtPTE7YXBwaWQ9eXRhY2h5b24-\/http:\/\/media.zenfs.com\/en-US\/homerun\/foreign_policy_magazine\/7843baa2ae1e72999e1487175924cf33","58":"https:\/\/s.yimg.com\/uu\/api\/res\/1.2\/FKxSqKNwseSUq0sIlACj4Q--\/YXBwaWQ9eXRhY2h5b24-\/http:\/\/l.yimg.com\/os\/152\/2012\/04\/21\/image001-png_162613.png","59":"https:\/\/s.yimg.com\/ny\/api\/res\/1.2\/lNHGeC84b29OU62FMISl7g--\/YXBwaWQ9aGlnaGxhbmRlcjtzbT0xO3c9ODAwO2g9NjAwO2lsPXBsYW5l\/http:\/\/media.zenfs.com\/en_us\/News\/ap_webfeeds\/c9375de6acbf4c168be78ffe4ec71ea9.jpg","60":"https:\/\/s.yimg.com\/ny\/api\/res\/1.2\/lweVDC7ZjZpHtU0S8gpbNA--\/YXBwaWQ9aGlnaGxhbmRlcjtzbT0xO3c9ODAwO2g9NjAwO2lsPXBsYW5l\/http:\/\/media.zenfs.com\/en_us\/News\/ap_webfeeds\/daa4cec687a64fff8c4060e11ee9e39d.jpg","61":"https:\/\/s.yimg.com\/ny\/api\/res\/1.2\/AE_dA6RMl8RF7SQmMQl_pw--\/YXBwaWQ9aGlnaGxhbmRlcjtzbT0xO3c9ODAwO2g9NjAwO2lsPXBsYW5l\/http:\/\/media.zenfs.com\/en_us\/News\/gettyimages.com\/day-seven-championships-wimbledon-2016-20160704-162516-521.jpg","62":"https:\/\/s.yimg.com\/uu\/api\/res\/1.2\/JJzKP5p68JZVB1jJIbzsXQ--\/Zmk9c3RyaW07aD05MDtweW9mZj0wO3E9ODA7dz05MDtzbT0xO2FwcGlkPXl0YWNoeW9u\/https:\/\/media.zenfs.com\/creatr-images\/GLB\/2016-12-01\/98baf690-b75d-11e6-b4d7-7549993b31c7_9c8af290-a06d-11e6-bdfd-4ddc0a732700_ford.gif","63":"https:\/\/s.yimg.com\/dg\/users\/1VVImoX-WAAAC7-L_VCY=.large.png","64":"https:\/\/s.yimg.com\/wv\/images\/alphatar_100x100_L_ad.jpg","65":"https:\/\/s.yimg.com\/wv\/images\/alphatar_100x100_Y_jl.jpg"} \ No newline at end of file diff --git a/test/test-pages/yahoo-3/expected-images.json b/test/test-pages/yahoo-3/expected-images.json new file mode 100644 index 0000000..0b8c16e --- /dev/null +++ b/test/test-pages/yahoo-3/expected-images.json @@ -0,0 +1 @@ +["https:\/\/s.yimg.com\/bt\/api\/res\/1.2\/qZaM9MLUOrxLg4IfXt_Niw--\/YXBwaWQ9eW5ld3NfbGVnbztxPTc1O3c9NjAw\/http:\/\/media.zenfs.com\/en-US\/video\/video.abcnewsplus.com\/559ecdbafdb839129816b5c79a996975.cf.png","https:\/\/s3.yimg.com\/bt\/api\/res\/1.2\/GNtA09EDJWzWfpBzGYJS0Q--\/YXBwaWQ9eW5ld3NfbGVnbztxPTg1O3c9NjMw\/http:\/\/media.zenfs.com\/en_us\/gma\/us.abcnews.gma.com\/HT_flag_baby_jtm_150311_16x9_992.jpg","https:\/\/s.yimg.com\/os\/mit\/media\/m\/base\/images\/transparent-1093278.png"] \ No newline at end of file diff --git a/test/test-pages/yahoo-4/expected-images.json b/test/test-pages/yahoo-4/expected-images.json new file mode 100644 index 0000000..57a8591 --- /dev/null +++ b/test/test-pages/yahoo-4/expected-images.json @@ -0,0 +1 @@ +["http:\/\/i.yimg.jp\/images\/jpnews\/cre\/common\/all\/images\/fbico_ogp_600x600.png","http:\/\/i.yimg.jp\/images\/clear.gif","https:\/\/s.yimg.jp\/images\/news\/cobranding\/cnetj.gif","http:\/\/yjtag.yahoo.co.jp\/csx?tp=8FzrfRY","http:\/\/yjtag.yahoo.co.jp\/csx?tp=vGtt1zQG","http:\/\/yj.adnxs.com\/seg?add=7103556&t=2","http:\/\/ib.adnxs.com\/seg?add=7103559&t=2","https:\/\/ib.adnxs.com\/getuid?https%3A%2F%2Fcm.g.doubleclick.net%2Fpixel%3Fgoogle_nid%3Dappnexus1%26google_sc%26google_hm%3D%24%7BBASE64_UID_ENC%7D%26google_cm","https:\/\/amd.c.yimg.jp\/im_siggGHp0PtHcrZGu5Iq67PQa7w---x175-y200-q90\/amd\/20170309-00051174-gendaibiz-000-1-view.jpg","https:\/\/amd.c.yimg.jp\/im_siggtrspYBymXhUeVMDgTDKNdQ---x200-y143-q90\/amd\/20170309-00000070-reut-000-2-view.jpg","https:\/\/amd.c.yimg.jp\/im_siggabcSvIDl9lXYdh8WSP9KAQ---x175-y200-q90\/amd\/20170309-00051119-gendaibiz-000-1-view.png","https:\/\/amd.c.yimg.jp\/im_siggDN4l56rbYkDHeOjFb4gq8Q---x200-y133-q90\/amd\/20170309-00000004-wordleaf-000-3-view.jpg","https:\/\/amd.c.yimg.jp\/im_siggCyDjV0LRxxIcICnxJ7fHTQ---x200-y133-q90\/amd\/20170309-00010009-newswitch-000-1-view.jpg","https:\/\/amd.c.yimg.jp\/im_siggoPOTrACscYfQWlmVkm5jIg---x57-y60-q90\/amd\/20170303-00000047-zdn_n-000-2-thumb.jpg","https:\/\/amd.c.yimg.jp\/im_siggxwb8h68bfwnwHnnqc45Ddg---x45-y60-q90\/amd\/20170303-00000059-it_nlab-000-4-thumb.jpg","https:\/\/amd.c.yimg.jp\/im_sigg.RW1SnJzB1NId78lzy3KZQ---x60-y45-q90\/amd\/20170306-00000007-withnews-000-2-view.jpg","https:\/\/amd.c.yimg.jp\/im_siggwg1MNZPMYtSM03H.lwUo_g---x60-y60-q90\/amd\/20170306-00000006-withnews-000-1-view.jpg","https:\/\/amd.c.yimg.jp\/im_siggXQLPPIJsTaiR1fwcOQ.zKA---x60-y30-q90\/amd\/20170304-00010003-binsider-000-2-view.jpg","https:\/\/amd.c.yimg.jp\/im_siggSZupUpIc2KL3I43aGZh7rw---x60-y34-q90\/amd\/20170309-00010000-nknatiogeo-000-6-view.jpg","https:\/\/amd.c.yimg.jp\/im_siggdLlvwNn._mN8jQ1BLyUUgQ---x60-y60-q90\/amd\/20170305-00010000-dime-000-1-view.jpg","https:\/\/amd.c.yimg.jp\/im_siggfN8Wv5Th6v9ZQVX5PT70qw---x60-y40-q90\/amd\/20170307-00010003-wired-000-2-view.jpg","https:\/\/amd.c.yimg.jp\/im_siggl0OOhW0N1WPJiUT2SQT9ZQ---x60-y34-q90\/amd\/20170308-00010003-biz_lifeh-000-1-view.jpg","https:\/\/amd.c.yimg.jp\/im_siggUq4yrxVQZTHvlwp5E22ucQ---x60-y42-q90\/amd\/20170309-00010000-dime-000-1-view.jpg"] \ No newline at end of file diff --git a/test/test-pages/youth/expected-images.json b/test/test-pages/youth/expected-images.json new file mode 100644 index 0000000..1b30669 --- /dev/null +++ b/test/test-pages/youth/expected-images.json @@ -0,0 +1 @@ +["http:\/\/fakehost\/test\/..\/..\/..\/images\/youth_sjy_logo.png","http:\/\/fakehost\/test\/W020170310313653868929.jpg","http:\/\/fakehost\/test\/W020170310313654043127.jpg","http:\/\/fakehost\/test\/W020170310313654453091.jpg","http:\/\/d.youth.cn\/qwtf2015\/wzy\/rtph360\/201506\/W020160714572680467502.jpg","http:\/\/d.youth.cn\/qwtf2015\/wzy\/rtph360\/201506\/W020160714571929707734.jpg","http:\/\/d.youth.cn\/qwtf2015\/wzy\/rtph360\/201506\/W020160715537222104338.jpg","http:\/\/d.youth.cn\/qwtf2015\/wzy\/rtph360\/201506\/W020160715538060041700.jpg","http:\/\/atanx.alicdn.com\/t\/img\/TB1tWvVJFXXXXc_aXXXXXXXXXXX-40-26.png","http:\/\/atanx.alicdn.com\/t\/img\/TB1upAiJXXXXXa5aXXXXXXXXXXX-116-30.png","http:\/\/d.youth.cn\/qwtf2015\/wzy\/rtph2\/201506\/W020160714573239657025.jpg","http:\/\/d.youth.cn\/qwtf2015\/wzy\/rtph2\/201506\/W020160714573440760368.jpg","http:\/\/d.youth.cn\/qwtf2015\/wzy\/rtph2\/201506\/W020160714573625412280.jpg","http:\/\/d.youth.cn\/qwtf2015\/wzy\/rtph2\/201506\/W020160714573823818109.jpg","http:\/\/d.youth.cn\/qwtf2015\/wzy\/rsph\/201506\/W020160714574130058490.jpg","http:\/\/d.youth.cn\/qwtf2015\/wzy\/rsph\/201506\/W020160714574684855114.jpg","http:\/\/d.youth.cn\/qwtf2015\/wzy\/rsph\/201506\/W020160714574904869909.jpg","http:\/\/d.youth.cn\/qwtf2015\/wzy\/rsph\/201506\/W020160714575330896977.jpg","http:\/\/fakehost\/test\/..\/..\/..\/images\/thumb_140_100_7_11141036306491.jpg","http:\/\/fakehost\/test\/..\/..\/..\/images\/thumb_140_100_7_1114095IH418.jpg","http:\/\/fakehost\/test\/..\/..\/..\/images\/thumb_100_70_1372553783760.jpg","http:\/\/fakehost\/test\/..\/..\/..\/images\/thumb_100_70_1372548043500.jpg","http:\/\/fakehost\/test\/..\/..\/..\/images\/thumb_100_70_1372496480745.jpg","http:\/\/fakehost\/test\/..\/..\/..\/images\/thumb_100_70_1372045720214.jpg","http:\/\/fakehost\/test\/..\/..\/..\/images\/thumb_100_70_1372468067229.jpg"] \ No newline at end of file -- cgit v1.2.3 From 99db71502dd7221bd251785dba68a8fe93fdcb39 Mon Sep 17 00:00:00 2001 From: Pedro Amorim Date: Tue, 21 Nov 2017 18:03:13 +0100 Subject: Simplify calls to toAbsoluteUri() --- src/HTMLParser.php | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/HTMLParser.php b/src/HTMLParser.php index a492307..b63deb7 100644 --- a/src/HTMLParser.php +++ b/src/HTMLParser.php @@ -339,8 +339,6 @@ class HTMLParser public function postProcessContent(DOMDocument $article) { - list($pathBase, $scheme, $prePath) = $this->getPathInfo($this->getConfig()->getOption('originalURL')); - // Readability cannot open relative uris so we convert them to absolute uris. if ($this->getConfig()->getOption('fixRelativeURLs')) { foreach (iterator_to_array($article->getElementsByTagName('a')) as $link) { @@ -353,7 +351,7 @@ class HTMLParser $text = $article->createTextNode($link->textContent); $link->parentNode->replaceChild($text, $link); } else { - $link->setAttribute('href', $this->toAbsoluteURI($href, $pathBase, $scheme, $prePath)); + $link->setAttribute('href', $this->toAbsoluteURI($href)); } } } @@ -362,7 +360,7 @@ class HTMLParser /** @var \DOMElement $img */ $src = $img->getAttribute('src'); if ($src) { - $img->setAttribute('src', $this->toAbsoluteURI($src, $pathBase, $scheme, $prePath)); + $img->setAttribute('src', $this->toAbsoluteURI($src)); } } } @@ -370,8 +368,10 @@ class HTMLParser return $article; } - private function toAbsoluteURI($uri, $pathBase, $scheme, $prePath) + private function toAbsoluteURI($uri) { + list($pathBase, $scheme, $prePath) = $this->getPathInfo($this->getConfig()->getOption('originalURL')); + // If this is already an absolute URI, return it. if (preg_match('/^[a-zA-Z][a-zA-Z0-9\+\-\.]*:/', $uri)) { return $uri; @@ -542,9 +542,8 @@ class HTMLParser } if ($this->getConfig()->getOption('fixRelativeURLs')) { - list($pathBase, $scheme, $prePath) = $this->getPathInfo($this->getConfig()->getOption('originalURL')); foreach ($result as &$imgSrc) { - $imgSrc = $this->toAbsoluteURI($imgSrc, $pathBase, $scheme, $prePath); + $imgSrc = $this->toAbsoluteURI($imgSrc); } } -- cgit v1.2.3 From e33dd203ca16c1827e6e2b25b7961dcbd77f3cb2 Mon Sep 17 00:00:00 2001 From: Pedro Amorim Date: Wed, 22 Nov 2017 10:50:09 +0100 Subject: Add test testHTMLParserParsesImages() --- test/HTMLParserTest.php | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/test/HTMLParserTest.php b/test/HTMLParserTest.php index 96c07df..2aafa65 100644 --- a/test/HTMLParserTest.php +++ b/test/HTMLParserTest.php @@ -24,6 +24,24 @@ class HTMLParserTest extends \PHPUnit_Framework_TestCase $result = $readability->parse($html); $this->assertEquals($expectedResult, $result['html']); + } + + /** + * @dataProvider getSamplePages + */ + public function testHTMLParserParsesImages($html, $expectedResult, $expectedMetadata, $config, $expectedImages) + { + $options = ['originalURL' => 'http://fakehost/test/test.html', + 'fixRelativeURLs' => true, + 'substituteEntities' => true, + ]; + + if ($config) { + $options = array_merge($options, $config); + } + + $readability = new HTMLParser($options); + $result = $readability->parse($html); $this->assertEquals($expectedImages, json_encode($result['images'])); } -- cgit v1.2.3 From be0285ffc340dcbca76908caa0ce513706f0b316 Mon Sep 17 00:00:00 2001 From: Pedro Amorim Date: Wed, 22 Nov 2017 11:09:56 +0100 Subject: Update AUTHORS :) --- AUTHORS.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/AUTHORS.md b/AUTHORS.md index 3929b1e..06e38aa 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -5,4 +5,5 @@ Readability.php developed by **Andres Rey**. Copyright (c) 2010 Arc90 Inc The AUTHORS/Contributors are (and/or have been): * Andres Rey -* Sergiy Lavryk \ No newline at end of file +* Sergiy Lavryk +* Pedro Amorim -- cgit v1.2.3 From 420215cb30a69d64cb4d233cd7a0885ae6474710 Mon Sep 17 00:00:00 2001 From: Pedro Amorim Date: Tue, 21 Nov 2017 17:47:48 +0100 Subject: Use absolute Uri on main image. Sometimes og:image use a relative URI too. --- src/HTMLParser.php | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/src/HTMLParser.php b/src/HTMLParser.php index b63deb7..9dff4bd 100644 --- a/src/HTMLParser.php +++ b/src/HTMLParser.php @@ -504,22 +504,31 @@ class HTMLParser */ public function getMainImage() { + $imgUrl = false; + if ($this->metadata['image'] !== null) { - return $this->metadata['image']; + $imgUrl = $this->metadata['image']; } - foreach ($this->dom->getElementsByTagName('link') as $link) { - /** @var \DOMElement $link */ - /* - * Check for the rel attribute, then check if the rel attribute is either img_src or image_src, and - * finally check for the existence of the href attribute, which should hold the image url. - */ - if ($link->hasAttribute('rel') && ($link->getAttribute('rel') === 'img_src' || $link->getAttribute('rel') === 'image_src') && $link->hasAttribute('href')) { - return $link->getAttribute('href'); + if (!$imgUrl) { + foreach ($this->dom->getElementsByTagName('link') as $link) { + /** @var \DOMElement $link */ + /* + * Check for the rel attribute, then check if the rel attribute is either img_src or image_src, and + * finally check for the existence of the href attribute, which should hold the image url. + */ + if ($link->hasAttribute('rel') && ($link->getAttribute('rel') === 'img_src' || $link->getAttribute('rel') === 'image_src') && $link->hasAttribute('href')) { + $imgUrl = $link->getAttribute('href'); + break; + } } } - return false; + if (!empty($imgUrl) && $this->getConfig()->getOption('fixRelativeURLs')) { + $imgUrl = $this->toAbsoluteURI($imgUrl); + } + + return $imgUrl; } /** -- cgit v1.2.3 From e6693de508c605a2f3e7ead9589b1b313430ba37 Mon Sep 17 00:00:00 2001 From: Pedro Amorim Date: Thu, 23 Nov 2017 15:17:10 +0100 Subject: Add test for Challenges.fr Use absolute URI for main image too. https://www.challenges.fr/monde/les-motivations-du-tueur-de-las-vegas-demeurent-floues_503396 --- test/test-pages/challenges/expected-images.json | 1 + test/test-pages/challenges/expected-metadata.json | 0 test/test-pages/challenges/expected.html | 35 + test/test-pages/challenges/source.html | 1744 +++++++++++++++++++++ 4 files changed, 1780 insertions(+) create mode 100644 test/test-pages/challenges/expected-images.json create mode 100644 test/test-pages/challenges/expected-metadata.json create mode 100644 test/test-pages/challenges/expected.html create mode 100644 test/test-pages/challenges/source.html diff --git a/test/test-pages/challenges/expected-images.json b/test/test-pages/challenges/expected-images.json new file mode 100644 index 0000000..e1716a0 --- /dev/null +++ b/test/test-pages/challenges/expected-images.json @@ -0,0 +1 @@ +["http:\/\/fakehost\/img\/cha\/placeholders\/placeholder_1000x750.jpg","http:\/\/fakehost\/img\/cha\/placeholders\/placeholder_200x150.jpg","http:\/\/fakehost\/img\/cha\/placeholders\/placeholder_400x300.jpg"] \ No newline at end of file diff --git a/test/test-pages/challenges/expected-metadata.json b/test/test-pages/challenges/expected-metadata.json new file mode 100644 index 0000000..e69de29 diff --git a/test/test-pages/challenges/expected.html b/test/test-pages/challenges/expected.html new file mode 100644 index 0000000..b204943 --- /dev/null +++ b/test/test-pages/challenges/expected.html @@ -0,0 +1,35 @@ +
+

par Alexandria Sage et Lisa Girion

+

LAS VEGAS, Nevada (Reuters) - La police américaine peinait mardi à établir les motivations qui ont poussé un retraité de 64 ans à tirer sur la foule depuis une chambre d'hôtel de Las Vegas, faisant 59 morts et 527 blessés dans la plus meurtrière fusillade de l'histoire des Etats-Unis.

+

Le tireur, qui s'est donné la mort peu avant l'arrivée de police, a été identifié comme Stephen Paddock, un individu apparemment sans histoire, inconnu des services de police et vivant dans un lotissement en périphérie de la ville. Le seul fait notable le concernant était une infraction au code de la route.

+

Paddock n'avait pas de passé militaire et n'était pas fiché pour d'éventuels troubles psychologiques, des comportements antisociaux ou l'expression d'opinions politiques et religieuses extrémistes sur les réseaux sociaux.

+

L'homme, qui était semble-t-il un pratiquant assidu des jeux de hasards, avait amassé 42 armes à feu et s'est servi de certaines d'entre elles pour tirer depuis la fenêtre de sa chambre, au 32e étage de l'hôtel Mandalay Bay, sur une foule de 22.000 personnes qui assistaient à un concert de country dimanche soir.

+

Vingt-trois armes ont été retrouvées dans sa chambre du Mandalay Bay, dont des fusils d'assaut et des armes altérées pour opérer de manière automatique. Un stock de nitrate d'ammonium, un engrais susceptible d'être utilisé dans l'élaboration d'explosifs, a été retrouvé dans sa voiture.

+

"C'était un homme malade, un type dément", a déclaré Donald Trump devant la presse, sans fournir de précisions. "Beaucoup de problèmes, j'imagine. On examine son cas de manière très attentive parce qu'on a affaire à un individu très très malade", a ajouté le président américain.

+

Interrogé sur un éventuel lien entre le meurtrier et une organisation terroriste, Donald Trump n'a pas répondu. La police estime, en l'état de l'enquête, que l'homme a agi seul.

+

"On n'a aucune idée de la nature de ses croyances religieuses", a déclaré Joseph Lombardo, shérif du comté de Clark. "On ne peut pas comprendre ce qui se passe dans la tête d'un psychopathe", a-t-il ajouté.

+ +

PISTE D'UN INDIVIDU ISOLE

+

Le groupe djihadiste Etat islamique a revendiqué l'attaque dans un communiqué qui affirme que le suspect s'était récemment converti à l'islam, mais un responsable américain s'est dit très sceptique sur cette revendication et a évoqué des "antécédents psychiatriques".

+

Aucun lien avec une organisation terroriste internationale n'a été établi pour le moment, a indiqué de son côté le FBI, tandis que la CIA a invité à la prudence.

+

Si la piste d'un individu isolé semble s'imposer, les enquêteurs souhaitent entendre la petite amie de Stephen Paddock avec laquelle il vivait, Marilou Danley, actuellement en voyage à l'étranger, sans doute au Japon.

+

La police s'intéresse également "à d'autres individus" qui seraient impliqués dans la vente des armes que possédait Stephen Paddock.

+

Le massacre commis dimanche soir est le plus meurtrier de l'histoire des Etats-Unis, dépassant celui perpétré par un homme lié à l'EI l'an passé dans une boîte de nuit homosexuelle d'Orlando en Floride. L'assaillant avait tué 49 personnes.

+

Stephen Paddock ne présentait aucune des caractéristiques habituelles de ce genre de tueur de masse, qui sont le plus souvent des hommes jeunes en proie à des troubles psychologiques, disent les experts.

+

L'homme a mené une existence paisible, occupant des emplois de concierge et d'employé de l'industrie aéronautique et ayant résidé dans plusieurs régions du sud-est et de l'ouest des Etats-Unis.

+

Il s'était installé, il y a quelques années, dans un lotissement pour retraités du Nevada, à environ une heure de voiture de Las Vegas où il pouvait s'adonner à sa passion pour le jeu et pour les casinos.

+

Dix-neuf armes, des explosifs et quantité de munitions y ont été retrouvés, a annoncé lundi soir le shérif du comté.

+ +

PAS L'HEURE D'UN DÉBAT SUR LES ARMES

+

Un mandat a été émis pour fouiller une seconde maison située à Reno, une ville du Nevada à plus de 600 km au nord-ouest de Las Vegas, a annoncé lundi soir l'adjoint au shérif Todd Fasulo.

+

A Mesquite, l'armurier local Chris Sullivan a confirmé dans un communiqué que Paddock comptait parmi ses clients et qu'il avait passé toutes les "vérifications et procédures" nécessaires à l'achat d'armes à feu.

+

Contacté par Reuters, le frère de l'assaillant présumé, Eric Paddock, s'est dit "horrifié" et a présenté ses condoléances aux victimes. Son frère n'appartenait à aucun mouvement religieux ou politique, a-t-il ajouté, précisant qu'il n'avait pas à sa connaissance d'antécédents psychiatriques.

+

Le père des deux hommes avait commis plusieurs braquages de banque et a un temps fait partie des fugitifs les plus recherchés par le FBI.

+

Depuis sa maison d'Orlando, en Floride, devant laquelle se sont pressés des journalistes, Eric Paddock a décrit en outre son frère comme un "type riche" qui aimait jouer au poker en ligne, partir en croisière et qui coulait une retraite "paisible" dans le Nevada après des années passées en Floride.

+

Cette nouvelle tuerie a ranimé le débat sur le contrôle des armes à feu aux Etats-Unis, une mesure contre laquelle le président Trump s'était fermement opposé lors de la campagne présidentielle l'an passé.

+

"Nous discuterons d'une loi sur les armes en temps voulu", a commenté l'ancien homme d'affaires devant des journalistes à la Maison blanche. La présidence avait indiqué dès lundi que le débat sur ce sujet était, selon elle, "prématuré", répondant ainsi aux demandes d'élus démocrates.

+ +

(Avec Jonathan Allen, Chris Michaud et Frank McGurty à New York, Susan Cornwell et Mark Hosenball à Washington, Ali Abdelaty au Caire, Tangi Salaün, Julie Carriat et Pierre Sérisier pour le service français)

+ + +
\ No newline at end of file diff --git a/test/test-pages/challenges/source.html b/test/test-pages/challenges/source.html new file mode 100644 index 0000000..b6f0267 --- /dev/null +++ b/test/test-pages/challenges/source.html @@ -0,0 +1,1744 @@ + + + + + Les motivations du tueur de Las Vegas demeurent floues - Challenges.fr + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + +
+
+
+
+
+ + +
+
+
+
+ +
+ + + +
+
+
+
+ +
+
+
+ + +
+ + + +
+ +
+ + + +
+
+
+
+ + +
+ + + +
+ + + + + + + + + + + + + + + + + +
+ +
+ + + + + + + + + + + + + + + + + + +
+ + +
+ +
+ +
+
+ + + +
+ + + + +
+ +
+ + +
+ + + + + + + +
+ + + + + + + +
+ + + + +
+ + + + Monde + + + + + +
+ + + + + + + +

Les motivations du tueur de Las Vegas demeurent floues

+ + +
+ +
+ + + + + +
+ +
+ + + + + + + + +

+ + + +
+ + 0 réactions + + +
+ + + + + + +
+ + + + + +
+ + 0 réactions + + +
+ + + + + + + + +
+ + + + + + +
+

par Alexandria Sage et Lisa Girion

+

LAS VEGAS, Nevada (Reuters) - La police américaine peinait mardi à établir les motivations qui ont poussé un retraité de 64 ans à tirer sur la foule depuis une chambre d'hôtel de Las Vegas, faisant 59 morts et 527 blessés dans la plus meurtrière fusillade de l'histoire des Etats-Unis.

+

Le tireur, qui s'est donné la mort peu avant l'arrivée de police, a été identifié comme Stephen Paddock, un individu apparemment sans histoire, inconnu des services de police et vivant dans un lotissement en périphérie de la ville. Le seul fait notable le concernant était une infraction au code de la route.

+

Paddock n'avait pas de passé militaire et n'était pas fiché pour d'éventuels troubles psychologiques, des comportements antisociaux ou l'expression d'opinions politiques et religieuses extrémistes sur les réseaux sociaux.

+

L'homme, qui était semble-t-il un pratiquant assidu des jeux de hasards, avait amassé 42 armes à feu et s'est servi de certaines d'entre elles pour tirer depuis la fenêtre de sa chambre, au 32e étage de l'hôtel Mandalay Bay, sur une foule de 22.000 personnes qui assistaient à un concert de country dimanche soir.

+

Vingt-trois armes ont été retrouvées dans sa chambre du Mandalay Bay, dont des fusils d'assaut et des armes altérées pour opérer de manière automatique. Un stock de nitrate d'ammonium, un engrais susceptible d'être utilisé dans l'élaboration d'explosifs, a été retrouvé dans sa voiture.

+

"C'était un homme malade, un type dément", a déclaré Donald Trump devant la presse, sans fournir de précisions. "Beaucoup de problèmes, j'imagine. On examine son cas de manière très attentive parce qu'on a affaire à un individu très très malade", a ajouté le président américain.

+

Interrogé sur un éventuel lien entre le meurtrier et une organisation terroriste, Donald Trump n'a pas répondu. La police estime, en l'état de l'enquête, que l'homme a agi seul.

+

"On n'a aucune idée de la nature de ses croyances religieuses", a déclaré Joseph Lombardo, shérif du comté de Clark. "On ne peut pas comprendre ce qui se passe dans la tête d'un psychopathe", a-t-il ajouté.

+

+

PISTE D'UN INDIVIDU ISOLE

+

Le groupe djihadiste Etat islamique a revendiqué l'attaque dans un communiqué qui affirme que le suspect s'était récemment converti à l'islam, mais un responsable américain s'est dit très sceptique sur cette revendication et a évoqué des "antécédents psychiatriques".

+

Aucun lien avec une organisation terroriste internationale n'a été établi pour le moment, a indiqué de son côté le FBI, tandis que la CIA a invité à la prudence.

+

Si la piste d'un individu isolé semble s'imposer, les enquêteurs souhaitent entendre la petite amie de Stephen Paddock avec laquelle il vivait, Marilou Danley, actuellement en voyage à l'étranger, sans doute au Japon.

+

La police s'intéresse également "à d'autres individus" qui seraient impliqués dans la vente des armes que possédait Stephen Paddock.

+

Le massacre commis dimanche soir est le plus meurtrier de l'histoire des Etats-Unis, dépassant celui perpétré par un homme lié à l'EI l'an passé dans une boîte de nuit homosexuelle d'Orlando en Floride. L'assaillant avait tué 49 personnes.

+

Stephen Paddock ne présentait aucune des caractéristiques habituelles de ce genre de tueur de masse, qui sont le plus souvent des hommes jeunes en proie à des troubles psychologiques, disent les experts.

+

L'homme a mené une existence paisible, occupant des emplois de concierge et d'employé de l'industrie aéronautique et ayant résidé dans plusieurs régions du sud-est et de l'ouest des Etats-Unis.

+

Il s'était installé, il y a quelques années, dans un lotissement pour retraités du Nevada, à environ une heure de voiture de Las Vegas où il pouvait s'adonner à sa passion pour le jeu et pour les casinos.

+

Dix-neuf armes, des explosifs et quantité de munitions y ont été retrouvés, a annoncé lundi soir le shérif du comté.

+

+

PAS L'HEURE D'UN DÉBAT SUR LES ARMES

+

Un mandat a été émis pour fouiller une seconde maison située à Reno, une ville du Nevada à plus de 600 km au nord-ouest de Las Vegas, a annoncé lundi soir l'adjoint au shérif Todd Fasulo.

+

A Mesquite, l'armurier local Chris Sullivan a confirmé dans un communiqué que Paddock comptait parmi ses clients et qu'il avait passé toutes les "vérifications et procédures" nécessaires à l'achat d'armes à feu.

+

Contacté par Reuters, le frère de l'assaillant présumé, Eric Paddock, s'est dit "horrifié" et a présenté ses condoléances aux victimes. Son frère n'appartenait à aucun mouvement religieux ou politique, a-t-il ajouté, précisant qu'il n'avait pas à sa connaissance d'antécédents psychiatriques.

+

Le père des deux hommes avait commis plusieurs braquages de banque et a un temps fait partie des fugitifs les plus recherchés par le FBI.

+

Depuis sa maison d'Orlando, en Floride, devant laquelle se sont pressés des journalistes, Eric Paddock a décrit en outre son frère comme un "type riche" qui aimait jouer au poker en ligne, partir en croisière et qui coulait une retraite "paisible" dans le Nevada après des années passées en Floride.

+

Cette nouvelle tuerie a ranimé le débat sur le contrôle des armes à feu aux Etats-Unis, une mesure contre laquelle le président Trump s'était fermement opposé lors de la campagne présidentielle l'an passé.

+

"Nous discuterons d'une loi sur les armes en temps voulu", a commenté l'ancien homme d'affaires devant des journalistes à la Maison blanche. La présidence avait indiqué dès lundi que le débat sur ce sujet était, selon elle, "prématuré", répondant ainsi aux demandes d'élus démocrates.

+

+

(Avec Jonathan Allen, Chris Michaud et Frank McGurty à New York, Susan Cornwell et Mark Hosenball à Washington, Ali Abdelaty au Caire, Tangi Salaün, Julie Carriat et Pierre Sérisier pour le service français)

+ + +
+ + + + + + + 0 réactions + + + + + + + + + + + 0 réactions + + + + +
+ + + + + + + + + + +
+ + + + + +
+ + + +
+ + + + +
+ + + + + +
+ + 0 réactions + + + + Pour réagir, veuillez vous connecter en cliquant ici + + +
    + + + + +
    + + + +
    + + + + +
    + + +
    + + + + + + + + + + + +
    +
    +
    + + + + + + + + + + + + + + +
    +
    + +
    +
    + + + + + + + + + + + + + + + + + + + + + + +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + +
    + +
    +
    + + + + + + + + + + + + + + +
    + à la une cette semaine + +
    + + + + + + + + + + + + + + +
    +
    +
    + + + + + + + + + + + + +
    + Galeries Photo +
    +
    + + + + +
    +
    +
    +
    + + Aston Martin Vantage + +
    +
    +
    +
    + En images : Aston Martin Vantage +
    + + +
    + Les dernières galeries photo + + + +
    + +
    +
    +
    + + + + + + + + + + + +
    +
    +
    + + + + + + + + + + + + +
    + Vidéos + +
    +
    + + +
    +
    +
    +
    + + Esclavage en Libye: Macron parle de "crime contre l'Humanité" + +
    +
    +
    +
    +

    Esclavage en Libye: Macron parle de "crime contre l'Humanité"

    +
    +
    + Dernières vidéos + + + +
    +
    +
    + +
    + + + + + + + + + + + + + +
    + + + Notre rubrique Monde + + + +
    + + + + + + + + + +
    + + + + +
    +
    + + + + + + + + + + + + + + + + + + + + + + +
    + + +
    +
    +
    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + + + + + + + + +
    + + -- cgit v1.2.3 From b6847639a1a7f5c54fd8611cca2c57178d9c375a Mon Sep 17 00:00:00 2001 From: Pedro Amorim Date: Thu, 23 Nov 2017 15:53:17 +0100 Subject: Update README --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index b9cd5b8..58cdcae 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,7 @@ $result = [ 'title' => 'Title of the article', 'author' => 'Name of the author of the article', 'image' => 'Main image of the article', + 'images' => 'All images of the article', 'article' => 'DOMDocument with the full article text, scored and parsed' ] ``` -- cgit v1.2.3