Skip to content

Commit

Permalink
Add custom rules directory support
Browse files Browse the repository at this point in the history
  • Loading branch information
fguillot committed Apr 11, 2015
1 parent f346af2 commit 578f9cb
Show file tree
Hide file tree
Showing 36 changed files with 274 additions and 118 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,4 @@ Thumbs.db
config.php
!models/*
!controllers/*
rules/*.php
2 changes: 2 additions & 0 deletions common.php
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
defined('AUTO_UPDATE_ARCHIVE_DIRECTORY') or define('AUTO_UPDATE_ARCHIVE_DIRECTORY', DATA_DIRECTORY.DIRECTORY_SEPARATOR.'archive');
defined('AUTO_UPDATE_BACKUP_DIRECTORY') or define('AUTO_UPDATE_BACKUP_DIRECTORY', DATA_DIRECTORY.DIRECTORY_SEPARATOR.'backup');

defined('RULES_DIRECTORY') or define('RULES_DIRECTORY', ROOT_DIRECTORY.DIRECTORY_SEPARATOR.'rules');

require __DIR__.'/check_setup.php';

PicoDb\Database::bootstrap('db', function() {
Expand Down
6 changes: 2 additions & 4 deletions docs/full-article-download.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ However the content grabber doesn't work very well with all websites.
How to write a grabber rules file?
----------------------------------

Add a PHP file to the directory `vendor/fguillot/picofeed/lib/PicoFeed/Rules`, the filename must be the domain name:
Add a PHP file to the directory `rules`, the filename must be the domain name with the suffix `.php`:

Example with the BBC website, `www.bbc.co.uk.php`:

Expand Down Expand Up @@ -55,6 +55,4 @@ Don't forget to send a pull request or a ticket to share your contribution with
List of content grabber rules
-----------------------------

[List of existing rules on the repository](https://github.com/miniflux/miniflux/tree/master/vendor/fguillot/picofeed/lib/PicoFeed/Rules)

If you want to add new rules, just open a ticket and I will do it.
[List of rules included by default](https://github.com/miniflux/miniflux/tree/master/vendor/fguillot/picofeed/lib/PicoFeed/Rules).
1 change: 1 addition & 0 deletions models/auto_update.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ function get_files_list($directory)
'data',
'scripts',
'config.php',
'rules',
);

$it = new RecursiveIteratorIterator(new RecursiveDirectoryIterator($directory), RecursiveIteratorIterator::SELF_FIRST);
Expand Down
4 changes: 4 additions & 0 deletions models/config.php
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,11 @@ function get_reader_config()
// Client
$config->setClientTimeout(HTTP_TIMEOUT);
$config->setClientUserAgent(HTTP_USER_AGENT);

// Grabber
$config->setGrabberTimeout(HTTP_TIMEOUT);
$config->setGrabberUserAgent(HTTP_USER_AGENT);
$config->setGrabberRulesFolder(RULES_DIRECTORY);

// Proxy
$config->setProxyHostname(PROXY_HOSTNAME);
Expand Down
1 change: 1 addition & 0 deletions rules/.htaccess
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Deny from all
2 changes: 1 addition & 1 deletion vendor/autoload.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@

require_once __DIR__ . '/composer' . '/autoload_real.php';

return ComposerAutoloaderInit177dcd3c68ed52652977fcc464bd77bd::getLoader();
return ComposerAutoloaderInitfa142d3c582de229e6ed8fd200703820::getLoader();
10 changes: 5 additions & 5 deletions vendor/composer/autoload_real.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

// autoload_real.php @generated by Composer

class ComposerAutoloaderInit177dcd3c68ed52652977fcc464bd77bd
class ComposerAutoloaderInitfa142d3c582de229e6ed8fd200703820
{
private static $loader;

Expand All @@ -19,9 +19,9 @@ public static function getLoader()
return self::$loader;
}

spl_autoload_register(array('ComposerAutoloaderInit177dcd3c68ed52652977fcc464bd77bd', 'loadClassLoader'), true, true);
spl_autoload_register(array('ComposerAutoloaderInitfa142d3c582de229e6ed8fd200703820', 'loadClassLoader'), true, true);
self::$loader = $loader = new \Composer\Autoload\ClassLoader();
spl_autoload_unregister(array('ComposerAutoloaderInit177dcd3c68ed52652977fcc464bd77bd', 'loadClassLoader'));
spl_autoload_unregister(array('ComposerAutoloaderInitfa142d3c582de229e6ed8fd200703820', 'loadClassLoader'));

$map = require __DIR__ . '/autoload_namespaces.php';
foreach ($map as $namespace => $path) {
Expand All @@ -42,14 +42,14 @@ public static function getLoader()

$includeFiles = require __DIR__ . '/autoload_files.php';
foreach ($includeFiles as $file) {
composerRequire177dcd3c68ed52652977fcc464bd77bd($file);
composerRequirefa142d3c582de229e6ed8fd200703820($file);
}

return $loader;
}
}

function composerRequire177dcd3c68ed52652977fcc464bd77bd($file)
function composerRequirefa142d3c582de229e6ed8fd200703820($file)
{
require $file;
}
24 changes: 12 additions & 12 deletions vendor/composer/installed.json
Original file line number Diff line number Diff line change
Expand Up @@ -84,18 +84,18 @@
"source": {
"type": "git",
"url": "https://github.com/fguillot/simpleValidator.git",
"reference": "5ebdb6df4c5f3aa2539b633eb4ae94c9e8c4ada7"
"reference": "41655dc7b9224395f5bb3b5623f6e428fe6d64e8"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/fguillot/simpleValidator/zipball/5ebdb6df4c5f3aa2539b633eb4ae94c9e8c4ada7",
"reference": "5ebdb6df4c5f3aa2539b633eb4ae94c9e8c4ada7",
"url": "https://api.github.com/repos/fguillot/simpleValidator/zipball/41655dc7b9224395f5bb3b5623f6e428fe6d64e8",
"reference": "41655dc7b9224395f5bb3b5623f6e428fe6d64e8",
"shasum": ""
},
"require": {
"php": ">=5.3.0"
},
"time": "2015-02-14 21:04:14",
"time": "2015-04-05 21:44:06",
"type": "library",
"installation-source": "dist",
"autoload": {
Expand Down Expand Up @@ -123,18 +123,18 @@
"source": {
"type": "git",
"url": "https://github.com/fguillot/JsonRPC.git",
"reference": "d0feab084422fa937da10e3551196b1c6fdf6918"
"reference": "29d63a09ecd450d5e29fef74f687aab221055910"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/fguillot/JsonRPC/zipball/d0feab084422fa937da10e3551196b1c6fdf6918",
"reference": "d0feab084422fa937da10e3551196b1c6fdf6918",
"url": "https://api.github.com/repos/fguillot/JsonRPC/zipball/29d63a09ecd450d5e29fef74f687aab221055910",
"reference": "29d63a09ecd450d5e29fef74f687aab221055910",
"shasum": ""
},
"require": {
"php": ">=5.3.0"
},
"time": "2015-03-25 23:55:18",
"time": "2015-04-05 21:49:38",
"type": "library",
"installation-source": "dist",
"autoload": {
Expand Down Expand Up @@ -162,12 +162,12 @@
"source": {
"type": "git",
"url": "https://github.com/fguillot/picoFeed.git",
"reference": "7c28753d5936ba635435a8e0e941dcabee67b243"
"reference": "3a0dce6bd3a62566c5f8414f7884f959753762f7"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/7c28753d5936ba635435a8e0e941dcabee67b243",
"reference": "7c28753d5936ba635435a8e0e941dcabee67b243",
"url": "https://api.github.com/repos/fguillot/picoFeed/zipball/3a0dce6bd3a62566c5f8414f7884f959753762f7",
"reference": "3a0dce6bd3a62566c5f8414f7884f959753762f7",
"shasum": ""
},
"require": {
Expand All @@ -181,7 +181,7 @@
"suggest": {
"ext-curl": "PicoFeed will use cURL if present"
},
"time": "2015-03-30 23:34:59",
"time": "2015-04-10 23:28:18",
"bin": [
"picofeed"
],
Expand Down
2 changes: 2 additions & 0 deletions vendor/fguillot/json-rpc/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
.DS_Store
vendor/
7 changes: 7 additions & 0 deletions vendor/fguillot/json-rpc/phpunit.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<phpunit bootstrap="./vendor/autoload.php">
<testsuites>
<testsuite name="JsonRPC">
<directory>tests</directory>
</testsuite>
</testsuites>
</phpunit>
4 changes: 1 addition & 3 deletions vendor/fguillot/json-rpc/tests/ClientTest.php
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
<?php

require_once 'src/JsonRPC/Client.php';

use JsonRPC\Client;

class ClientTest extends PHPUnit_Framework_TestCase
Expand Down Expand Up @@ -111,4 +109,4 @@ public function testBatchRequest()
$this->assertTrue($client->is_batch);
$this->assertEmpty($client->batch);
}
}
}
2 changes: 0 additions & 2 deletions vendor/fguillot/json-rpc/tests/ServerProcedureTest.php
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
<?php

require_once 'src/JsonRPC/Server.php';

use JsonRPC\Server;

class A
Expand Down
4 changes: 1 addition & 3 deletions vendor/fguillot/json-rpc/tests/ServerProtocolTest.php
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
<?php

require_once 'src/JsonRPC/Server.php';

use JsonRPC\Server;

class ServerProtocolTest extends PHPUnit_Framework_TestCase
Expand Down Expand Up @@ -214,4 +212,4 @@ public function testBatchNotifications()

$this->assertEquals('', $server->execute());
}
}
}
10 changes: 10 additions & 0 deletions vendor/fguillot/picofeed/docs/config.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,16 @@ $config->setGrabberTimeout(20); // 20 seconds
$config->setGrabberUserAgent('My content scraper');
```

### Add a rules folder

- Method name: `setGrabberRulesFolder()`
- Default value: `null`
- Argument value: string

```php
$config->setGrabberRulesFolder('/path/to/my/grabber/rules');
```

Parser
------

Expand Down
22 changes: 20 additions & 2 deletions vendor/fguillot/picofeed/lib/PicoFeed/Client/Client.php
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,14 @@ abstract class Client
*/
private $encoding = '';

/**
* HTTP request headers
*
* @access protected
* @var array
*/
protected $request_headers = array();

/**
* HTTP Etag header
*
Expand Down Expand Up @@ -193,6 +201,16 @@ public static function getInstance()
throw new LogicException('You must have "allow_url_fopen=1" or curl extension installed');
}

/**
* Add HTTP Header to the request
*
* @access public
* @param array $headers
*/
public function setHeaders($headers) {
$this->request_headers = $headers;
}

/**
* Perform the HTTP request
*
Expand Down Expand Up @@ -645,8 +663,8 @@ public function disablePassthroughMode()
public function setConfig($config)
{
if ($config !== null) {
$this->setTimeout($config->getGrabberTimeout());
$this->setUserAgent($config->getGrabberUserAgent());
$this->setTimeout($config->getClientTimeout());
$this->setUserAgent($config->getClientUserAgent());
$this->setMaxRedirections($config->getMaxRedirections());
$this->setMaxBodySize($config->getMaxBodySize());
$this->setProxyHostname($config->getProxyHostname());
Expand Down
24 changes: 13 additions & 11 deletions vendor/fguillot/picofeed/lib/PicoFeed/Client/Curl.php
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,15 @@ class Curl extends Client
* @access private
* @var array
*/
private $headers = array();
private $response_headers = array();

/**
* Counter on the number of header received
*
* @access private
* @var integer
*/
private $headers_counter = 0;
private $response_headers_count = 0;

/**
* cURL callback to read the HTTP body
Expand Down Expand Up @@ -81,15 +81,15 @@ public function readHeaders($ch, $buffer)
$length = strlen($buffer);

if ($buffer === "\r\n") {
$this->headers_counter++;
$this->response_headers_count++;
}
else {

if (! isset($this->headers[$this->headers_counter])) {
$this->headers[$this->headers_counter] = '';
if (! isset($this->response_headers[$this->response_headers_count])) {
$this->response_headers[$this->response_headers_count] = '';
}

$this->headers[$this->headers_counter] .= $buffer;
$this->response_headers[$this->response_headers_count] .= $buffer;
}

return $length;
Expand Down Expand Up @@ -153,6 +153,8 @@ private function prepareHeaders()
$headers[] = 'If-Modified-Since: '.$this->last_modified;
}

$headers = array_merge($headers, $this->request_headers);

return $headers;
}

Expand Down Expand Up @@ -302,7 +304,7 @@ public function doRequest($follow_location = true)
{
$this->executeContext();

list($status, $headers) = HttpHeaders::parse(explode("\r\n", $this->headers[$this->headers_counter - 1]));
list($status, $headers) = HttpHeaders::parse(explode("\r\n", $this->response_headers[$this->response_headers_count - 1]));

// When restricted with open_basedir
if ($this->needToHandleRedirection($follow_location, $status)) {
Expand Down Expand Up @@ -343,8 +345,8 @@ private function handleRedirection($location)
$this->url = Url::resolve($location, $this->url);
$this->body = '';
$this->body_length = 0;
$this->headers = array();
$this->headers_counter = 0;
$this->response_headers = array();
$this->response_headers_count = 0;

while (true) {

Expand All @@ -360,8 +362,8 @@ private function handleRedirection($location)
$this->url = Url::resolve($result['headers']['Location'], $this->url);
$this->body = '';
$this->body_length = 0;
$this->headers = array();
$this->headers_counter = 0;
$this->response_headers = array();
$this->response_headers_count = 0;
}
else {
break;
Expand Down
Loading

0 comments on commit 578f9cb

Please sign in to comment.