generated from spatie/package-skeleton-php
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
26 changed files
with
970 additions
and
16 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
<?php | ||
|
||
namespace OneOffTech\Parse\Client\Connectors; | ||
|
||
use OneOffTech\Parse\Client\DocumentProcessor; | ||
use OneOffTech\Parse\Client\Dto\DocumentDto; | ||
use OneOffTech\Parse\Client\ParseOption; | ||
use OneOffTech\Parse\Client\Requests\ExtractTextRequest; | ||
use OneOffTech\Parse\Client\Responses\ParseResponse; | ||
use Saloon\Contracts\Authenticator; | ||
use Saloon\Http\Auth\NullAuthenticator; | ||
use Saloon\Http\Auth\TokenAuthenticator; | ||
use Saloon\Http\Connector; | ||
use Saloon\Http\Response; | ||
use Saloon\Traits\Plugins\AcceptsJson; | ||
use Saloon\Traits\Plugins\AlwaysThrowOnErrors; | ||
use Saloon\Traits\Plugins\HasTimeout; | ||
use SensitiveParameter; | ||
|
||
class ParseConnector extends Connector | ||
{ | ||
use AcceptsJson; | ||
use AlwaysThrowOnErrors; | ||
use HasTimeout; | ||
|
||
protected int $connectTimeout = 30; | ||
|
||
protected int $requestTimeout = 120; | ||
|
||
protected ?string $response = ParseResponse::class; | ||
|
||
public function __construct( | ||
|
||
/** | ||
* The authentication token | ||
*/ | ||
#[SensitiveParameter] | ||
public readonly ?string $token = null, | ||
|
||
/** | ||
* The base url where the API listen | ||
*/ | ||
protected readonly string $baseUrl = 'https://parse.oneofftech.de/api/v0', | ||
) { | ||
// | ||
} | ||
|
||
public function resolveBaseUrl(): string | ||
{ | ||
return $this->baseUrl; | ||
} | ||
|
||
protected function defaultAuth(): Authenticator | ||
{ | ||
if (is_null($this->token)) { | ||
return new NullAuthenticator; | ||
} | ||
|
||
return new TokenAuthenticator($this->token); | ||
} | ||
|
||
/** | ||
* Determine if the request has failed. | ||
*/ | ||
public function hasRequestFailed(Response $response): ?bool | ||
{ | ||
return $response->serverError() || $response->clientError(); | ||
} | ||
|
||
// Resources and helper methods | ||
|
||
/** | ||
* Parse a document hosted on a web server | ||
* | ||
* @param string $url The URL under which the document is accessible | ||
* @param string $mimeType The mime type of the document. Default application/pdf | ||
* @param \OneOffTech\Parse\Client\ParseOption $options Specifiy additional options for the specific parsing processor | ||
*/ | ||
public function parse(string $url, string $mimeType = 'application/pdf', ?ParseOption $options = null): DocumentDto | ||
{ | ||
return $this | ||
->send((new ExtractTextRequest( | ||
url: $url, | ||
mimeType: $mimeType, | ||
preferredDocumentProcessor: $options?->processor?->value ?? DocumentProcessor::PDFACT->value, | ||
))->validate()) | ||
->dto(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
<?php | ||
|
||
namespace OneOffTech\Parse\Client\DocumentFormat; | ||
|
||
use Countable; | ||
use OneOffTech\Parse\Client\Exceptions\EmptyDocumentException; | ||
use OneOffTech\Parse\Client\Exceptions\InvalidDocumentFormatException; | ||
use RecursiveArrayIterator; | ||
use RecursiveIteratorIterator; | ||
|
||
class DocumentNode implements Countable | ||
{ | ||
|
||
public function __construct( | ||
public readonly array $content, | ||
public readonly array $attributes = [], | ||
) {} | ||
|
||
|
||
public function type(): string | ||
{ | ||
return 'doc'; | ||
} | ||
|
||
|
||
/** | ||
* The number of pages in this document as extracted by the parser. | ||
*/ | ||
public function count(): int | ||
{ | ||
return count($this->content); | ||
} | ||
|
||
/** | ||
* Test if the document is empty, i.e. contains no pages or has no textual content on any of the pages | ||
*/ | ||
public function isEmpty(): bool | ||
{ | ||
return $this->count() === 0 || !$this->hasContent(); | ||
} | ||
|
||
/** | ||
* Test if the document has discernible textual content on any of the pages | ||
*/ | ||
public function hasContent(): bool | ||
{ | ||
foreach (new RecursiveIteratorIterator(new RecursiveArrayIterator($this->content), RecursiveIteratorIterator::LEAVES_ONLY) as $key => $value) { | ||
if($key === 'text' && !empty($value)){ | ||
return true; | ||
} | ||
} | ||
|
||
return false; | ||
} | ||
|
||
|
||
/** | ||
* The pages in this document | ||
* | ||
* @return \OneOffTech\Parse\Client\DocumentFormat\PageNode[] | ||
*/ | ||
public function pages(): array | ||
{ | ||
return array_map(fn($page) => PageNode::fromArray($page), $this->content); | ||
} | ||
|
||
public function text(): string | ||
{ | ||
$text = []; | ||
|
||
foreach (new RecursiveIteratorIterator(new RecursiveArrayIterator($this->content), RecursiveIteratorIterator::LEAVES_ONLY) as $key => $value) { | ||
if($key === 'text' && !empty($value)){ | ||
$text[] = $value; | ||
} | ||
} | ||
|
||
return join(PHP_EOL, $text); | ||
} | ||
|
||
|
||
/** | ||
* Throw exception if document has no textual content | ||
* | ||
* @throws OneOffTech\Parse\Client\Exceptions\EmptyDocumentException when document has no textual content | ||
*/ | ||
public function throwIfNoContent(): self | ||
{ | ||
if(!$this->hasContent()){ | ||
throw new EmptyDocumentException("Document has no textual content."); | ||
} | ||
|
||
return $this; | ||
} | ||
|
||
|
||
/** | ||
* Create a document node from associative array | ||
*/ | ||
public static function fromArray(array $data): DocumentNode | ||
{ | ||
if(!(isset($data['category']) && isset($data['content']))){ | ||
throw new InvalidDocumentFormatException("Unexpected document structure. Missing category or content."); | ||
} | ||
|
||
if($data['category'] !== 'doc'){ | ||
throw new InvalidDocumentFormatException("Unexpected node category. Expecting [doc] found [{$data['category']}]."); | ||
} | ||
|
||
if(!is_array($data['content'])){ | ||
throw new InvalidDocumentFormatException("Unexpected content format. Expecting [array]."); | ||
} | ||
|
||
return new DocumentNode($data['content'] ?? [], $data['attributes'] ?? []); | ||
} | ||
} |
Oops, something went wrong.