From 4916deda6f3398c11a98536582e748051d3d32de Mon Sep 17 00:00:00 2001 From: Carlos C Soto Date: Tue, 17 Sep 2024 10:04:59 -0600 Subject: [PATCH 01/16] Create http client with retry --- bin/sat-pys-scraper | 20 +++++++++++++++++++- tests/Integration/ScraperTest.php | 22 +++++++++++++++++++++- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/bin/sat-pys-scraper b/bin/sat-pys-scraper index 20750d5..4fe3640 100644 --- a/bin/sat-pys-scraper +++ b/bin/sat-pys-scraper @@ -3,8 +3,26 @@ declare(strict_types=1); +use GuzzleHttp\Client; +use GuzzleHttp\HandlerStack; +use GuzzleHttp\Middleware; use PhpCfdi\SatPysScraper\App\SatPysScraper; +use PhpCfdi\SatPysScraper\Scraper; +use Psr\Http\Message\RequestInterface; +use Psr\Http\Message\ResponseInterface; require __DIR__ . '/../vendor/autoload.php'; -exit(SatPysScraper::run($argv)); +$scraperWithRetry = (function () { + $decider = fn (int $retries, RequestInterface $request, ResponseInterface $response = null): bool + => $retries < 5 && null !== $response && $response->getStatusCode() >= 500; + $delay = fn (int $retries): int => 1000 * ($retries + 1); + + $stack = HandlerStack::create(); + $stack->push(Middleware::retry($decider, $delay)); + $client = new Client(['handler' => $stack]); + + return new Scraper($client); +})(); + +exit(SatPysScraper::run($argv, $scraperWithRetry)); diff --git a/tests/Integration/ScraperTest.php b/tests/Integration/ScraperTest.php index 0ebcfe2..7a7c466 100644 --- a/tests/Integration/ScraperTest.php +++ b/tests/Integration/ScraperTest.php @@ -5,14 +5,34 @@ namespace PhpCfdi\SatPysScraper\Tests\Integration; use GuzzleHttp\Client; +use GuzzleHttp\HandlerStack; +use GuzzleHttp\Middleware; use PhpCfdi\SatPysScraper\Scraper; +use PhpCfdi\SatPysScraper\ScraperInterface; use PhpCfdi\SatPysScraper\Tests\TestCase; +use Psr\Http\Message\RequestInterface; +use Psr\Http\Message\ResponseInterface; class ScraperTest extends TestCase { + private const MAX_RETRIES = 5; + + private function createScraper(): ScraperInterface + { + $decider = fn (int $retries, RequestInterface $request, ResponseInterface $response = null): bool + => $retries < self::MAX_RETRIES && null !== $response && $response->getStatusCode() >= 500; + $delay = fn (int $retries): int => 1000 * ($retries + 1); + + $stack = HandlerStack::create(); + $stack->push(Middleware::retry($decider, $delay)); + $client = new Client(['handler' => $stack]); + + return new Scraper($client); + } + public function testObtainSequence(): void { - $scraper = new Scraper(new Client()); + $scraper = $this->createScraper(); $types = $scraper->obtainTypes(); $expectedTypeId = 1; From 8610f901e09b6a430ce65a13c895c3a5c7797d9a Mon Sep 17 00:00:00 2001 From: Carlos C Soto Date: Tue, 17 Sep 2024 10:05:20 -0600 Subject: [PATCH 02/16] Update license year to 2024 --- LICENSE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index 7b43fa5..8e5765a 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2023 PhpCfdi https://www.phpcfdi.com/ +Copyright (c) 2023 - 2024 PhpCfdi https://www.phpcfdi.com/ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal From 233f143c477096b1a0d4d27cb351241d5326a48a Mon Sep 17 00:00:00 2001 From: Carlos C Soto Date: Tue, 17 Sep 2024 10:06:40 -0600 Subject: [PATCH 03/16] Base docker image on php:8.3-cli-alpine instead of debian:bookworm --- Docker.README.md | 3 +-- Dockerfile | 40 ++++++++++++---------------------------- 2 files changed, 13 insertions(+), 30 deletions(-) diff --git a/Docker.README.md b/Docker.README.md index ea1f3df..92e0549 100644 --- a/Docker.README.md +++ b/Docker.README.md @@ -21,11 +21,10 @@ The project installed on `/opt/sat-pys-scraper/` and the entry point is the comm docker run -it --rm --user="$(id -u):$(id -g)" \ sat-pys-scraper --help -# generar en un volumen +# create output using volume docker run -it --rm --user="$(id -u):$(id -g)" --volume="${PWD}:/local" \ sat-pys-scraper --xml /local/output.xml - # pipe output to file (xml, sorted by key) docker run -it --rm --user="$(id -u):$(id -g)" \ sat-pys-scraper --xml - > output.xml diff --git a/Dockerfile b/Dockerfile index 9bdb27e..dcdc2b6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,41 +1,25 @@ -FROM debian:bookworm +FROM php:8.3-cli-alpine -COPY . /opt/sat-pys-scraper/ +COPY . /opt/sat-pys-scraper +COPY --from=composer:latest /usr/bin/composer /usr/local/bin/composer +# install dependencies for php modules RUN set -e \ - && export DEBIAN_FRONTEND=noninteractive \ - # Update debian base system - && apt-get update -y \ - && apt-get dist-upgrade -y \ - # Install repository PHP from Ondřej Surý - && apt-get install -y lsb-release ca-certificates curl \ - && curl --no-progress-meter https://packages.sury.org/php/apt.gpg --output /etc/apt/trusted.gpg.d/php.gpg \ - && echo "deb https://packages.sury.org/php/ $(lsb_release -sc) main" | tee /etc/apt/sources.list.d/php.list \ - && apt-get update -y \ - && apt-get dist-upgrade -y \ - # Install required packages - && apt-get install -y \ - unzip git \ - php-cli php-curl php-zip php-xml \ - # Clean APT - && rm -rf /var/lib/apt/lists/* + && apk add git libzip-dev \ + && docker-php-ext-install zip +# set up php RUN set -e \ - # Set up PHP - && find /etc/php/ -type f -name "*.ini" -exec sed -i 's/^variables_order.*/variables_order=EGPCS/' "{}" \; \ + && mv /usr/local/etc/php/php.ini-production /usr/local/etc/php/php.ini \ + && sed -i 's/^variables_order.*/variables_order=EGPCS/' /usr/local/etc/php/php.ini \ && php -i +# build project RUN set -e \ - # Install composer - && curl --progress-bar https://getcomposer.org/download/latest-stable/composer.phar --output /usr/local/bin/composer \ - && chmod +x /usr/local/bin/composer \ - && export COMPOSER_ALLOW_SUPERUSER=1 \ - && (composer diagnose --no-interaction || true) - -RUN set -e \ + && rm -r -f /opt/sat-pys-scraper/composer.lock /opt/sat-pys-scraper/vendor \ && composer update --working-dir=/opt/sat-pys-scraper --no-dev --prefer-dist --optimize-autoloader --no-interaction \ && rm -rf "$(composer config cache-dir --global)" "$(composer config data-dir --global)" "$(composer config home --global)" ENV TZ="America/Mexico_City" -ENTRYPOINT ["/usr/bin/php", "/opt/sat-pys-scraper/bin/sat-pys-scraper"] +ENTRYPOINT ["/usr/local/bin/php", "/opt/sat-pys-scraper/bin/sat-pys-scraper"] From e96a8eea39b60534c03e21fe920ccb604596206e Mon Sep 17 00:00:00 2001 From: Carlos C Soto Date: Tue, 17 Sep 2024 10:07:04 -0600 Subject: [PATCH 04/16] Remove done tasks --- docs/TODO.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/docs/TODO.md b/docs/TODO.md index 0f53d69..ca24320 100644 --- a/docs/TODO.md +++ b/docs/TODO.md @@ -2,12 +2,6 @@ ## Lista de tareas pendientes -### `phcps` - -En el flujo de trabajo `build.yml` usando `actions/setup-php-action` la herramienta `phcps` se está instalando usando `squizlabs` en lugar de `PHPCSStandards`. -Por lo tanto, en lugar de usar la herramienta de la acción, se está usando `phive` para instalarla. -Cuando se actualice la herramienta `actions/setup-php-action` se debe cambiar a la instalación normal. - ### PHP 8.3 Migrar a PHP 8.3 en cuanto las herramientas (como *PHP_CodeSniffer*) lo permitan. From 1eefbb4ea4cbd4cec1d94ea05bbbbaa77a86aae5 Mon Sep 17 00:00:00 2001 From: Carlos C Soto Date: Tue, 17 Sep 2024 10:08:03 -0600 Subject: [PATCH 05/16] Use variable matrix.php-version (singular) --- .github/workflows/build.yml | 6 +++--- .github/workflows/system.yml | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f83d401..45f5bbf 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -76,18 +76,18 @@ jobs: run: phpstan analyse --no-progress --verbose tests: - name: Tests on PHP ${{ matrix.php-versions }} + name: Tests on PHP ${{ matrix.php-version }} runs-on: "ubuntu-latest" strategy: matrix: - php-versions: ['8.2', '8.3'] + php-version: ['8.2', '8.3'] steps: - name: Checkout uses: actions/checkout@v4 - name: Setup PHP uses: shivammathur/setup-php@v2 with: - php-version: ${{ matrix.php-versions }} + php-version: ${{ matrix.php-version }} coverage: xdebug tools: composer:v2 env: diff --git a/.github/workflows/system.yml b/.github/workflows/system.yml index 0e0b033..51683ae 100644 --- a/.github/workflows/system.yml +++ b/.github/workflows/system.yml @@ -15,18 +15,18 @@ on: jobs: system-tests: - name: System test on PHP ${{ matrix.php-versions }} + name: System test on PHP ${{ matrix.php-version }} runs-on: "ubuntu-latest" strategy: matrix: - php-versions: ['8.2', '8.3'] + php-version: ['8.2', '8.3'] steps: - name: Checkout uses: actions/checkout@v4 - name: Setup PHP uses: shivammathur/setup-php@v2 with: - php-version: ${{ matrix.php-versions }} + php-version: ${{ matrix.php-version }} coverage: xdebug tools: composer:v2 env: @@ -42,5 +42,5 @@ jobs: restore-keys: ${{ runner.os }}-composer- - name: Install project dependencies run: composer upgrade --no-interaction --no-progress --prefer-dist --no-dev - - name: System test with PHP ${{ matrix.php-versions }} + - name: System test with PHP ${{ matrix.php-version }} run: php bin/sat-pys-scraper --json build/result.json --xml build/result.xml --sort key From e87ba438aeffbfb84eae26024a7789f287a6a219 Mon Sep 17 00:00:00 2001 From: Carlos C Soto Date: Tue, 17 Sep 2024 10:08:30 -0600 Subject: [PATCH 06/16] Improve tests-coverage job title --- .github/workflows/sonarcloud.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sonarcloud.yml b/.github/workflows/sonarcloud.yml index 97533e9..0bbedc7 100644 --- a/.github/workflows/sonarcloud.yml +++ b/.github/workflows/sonarcloud.yml @@ -11,7 +11,7 @@ on: jobs: tests-coverage: - name: Tests on PHP 8.3 (code coverage) + name: Create code coverage runs-on: "ubuntu-latest" steps: - name: Checkout From e80233d41062c1edbe07c7e14eb688a81ad71644 Mon Sep 17 00:00:00 2001 From: Carlos C Soto Date: Tue, 17 Sep 2024 10:08:42 -0600 Subject: [PATCH 07/16] Update development tools --- .phive/phars.xml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.phive/phars.xml b/.phive/phars.xml index d17b9d2..c776c4e 100644 --- a/.phive/phars.xml +++ b/.phive/phars.xml @@ -1,8 +1,8 @@ - - - - - + + + + + From 184f73128d3e09bf9b0e0077ca8844c3bd88996e Mon Sep 17 00:00:00 2001 From: Carlos C Soto Date: Tue, 17 Sep 2024 14:11:04 -0600 Subject: [PATCH 08/16] Fix release dates --- docs/CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 8fedec2..1f282b0 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -11,13 +11,13 @@ versión, aunque sí su incorporación en la rama principal de trabajo. Generalm ## Listado de cambios -### Versión 3.0.0 2023-03-07 +### Versión 3.0.0 2024-03-07 - Se cambia el método `SatPysScraper::run()` para una mejor inyección de dependencias y capacidad de pruebas. - Se introduce una excepción dedicada para los errores de procesamiento de argumentos. - Se cambia la forma de procesar los argumentos para usar `array_shift`. -### Versión 2.0.0 2023-03-07 +### Versión 2.0.0 2024-03-07 - Se corrige el nodo principal, el nombre correcto es ``. - Se cambia el comando de ejecución `bin/sat-pys-scraper` para exportar a JSON y XML al mismo tiempo. From 31a14a65aaa1e60fd00300ae25be26a885009851 Mon Sep 17 00:00:00 2001 From: Carlos C Soto Date: Tue, 17 Sep 2024 14:25:16 -0600 Subject: [PATCH 09/16] Improve code coverage --- src/Scraper.php | 2 +- tests/Unit/ScraperTest.php | 31 +++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 tests/Unit/ScraperTest.php diff --git a/src/Scraper.php b/src/Scraper.php index be6fec8..3129921 100644 --- a/src/Scraper.php +++ b/src/Scraper.php @@ -14,7 +14,7 @@ final class Scraper implements ScraperInterface /** @noinspection HttpUrlsUsage */ public const PYS_URL = 'http://pys.sat.gob.mx/PyS/catPyS.aspx'; - private Crawler|null $crawler; + private Crawler|null $crawler = null; public function __construct(private readonly ClientInterface $client) { diff --git a/tests/Unit/ScraperTest.php b/tests/Unit/ScraperTest.php new file mode 100644 index 0000000..9fa7f36 --- /dev/null +++ b/tests/Unit/ScraperTest.php @@ -0,0 +1,31 @@ +createFakeScraper(); + $this->expectException(LogicException::class); + $scraper->obtainSegments(1); + } + + public function testCallObtainFamiliesWithoutCorrectSequenceThrowsLogicException(): void + { + $scraper = $this->createFakeScraper(); + $this->expectException(LogicException::class); + $scraper->obtainFamilies(27, 1); + } + + public function testCallObtainClassesWithoutCorrectSequenceThrowsLogicException(): void + { + $scraper = $this->createFakeScraper(); + $this->expectException(LogicException::class); + $scraper->obtainClasses(2711, 27, 1); + } +} From c20f2644e67a45c61300d365a0be3428e40c8788 Mon Sep 17 00:00:00 2001 From: Carlos C Soto Date: Tue, 17 Sep 2024 14:27:36 -0600 Subject: [PATCH 10/16] Document changes for version 3.0.1 --- docs/CHANGELOG.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 1f282b0..6f81ef7 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -11,6 +11,21 @@ versión, aunque sí su incorporación en la rama principal de trabajo. Generalm ## Listado de cambios +### Versión 3.0.1 2024-09-17 + +Se modifica el script de ejecución y la prueba funcional para poder reintentar en caso de que el +servidor del SAT devuelva un estado HTTP 500. Esto sucede frecuentemente desde hace un par de meses. + +Se cambia la construcción de imagen de docker, ahora depende de `php:8.3-cli-alpine`. + +Se actualiza el archivo de licencia a 2024. + +Se hacen otros cambios en el entorno de desarrollo: + +- Se prueba el correcto orden para llamar a los métodos para obtener datos. +- Se utiliza la variable `php-version` en singular para las matrices de pruebas. +- Se actualizan las herramientas de desarrollo. + ### Versión 3.0.0 2024-03-07 - Se cambia el método `SatPysScraper::run()` para una mejor inyección de dependencias y capacidad de pruebas. From d61a1ced78512ad282bc874e6082edebf2bee477 Mon Sep 17 00:00:00 2001 From: Carlos C Soto Date: Wed, 18 Sep 2024 10:21:28 -0600 Subject: [PATCH 11/16] Revert "Create http client with retry" This reverts commit 4916deda6f3398c11a98536582e748051d3d32de. --- bin/sat-pys-scraper | 20 +------------------- tests/Integration/ScraperTest.php | 22 +--------------------- 2 files changed, 2 insertions(+), 40 deletions(-) diff --git a/bin/sat-pys-scraper b/bin/sat-pys-scraper index 4fe3640..20750d5 100644 --- a/bin/sat-pys-scraper +++ b/bin/sat-pys-scraper @@ -3,26 +3,8 @@ declare(strict_types=1); -use GuzzleHttp\Client; -use GuzzleHttp\HandlerStack; -use GuzzleHttp\Middleware; use PhpCfdi\SatPysScraper\App\SatPysScraper; -use PhpCfdi\SatPysScraper\Scraper; -use Psr\Http\Message\RequestInterface; -use Psr\Http\Message\ResponseInterface; require __DIR__ . '/../vendor/autoload.php'; -$scraperWithRetry = (function () { - $decider = fn (int $retries, RequestInterface $request, ResponseInterface $response = null): bool - => $retries < 5 && null !== $response && $response->getStatusCode() >= 500; - $delay = fn (int $retries): int => 1000 * ($retries + 1); - - $stack = HandlerStack::create(); - $stack->push(Middleware::retry($decider, $delay)); - $client = new Client(['handler' => $stack]); - - return new Scraper($client); -})(); - -exit(SatPysScraper::run($argv, $scraperWithRetry)); +exit(SatPysScraper::run($argv)); diff --git a/tests/Integration/ScraperTest.php b/tests/Integration/ScraperTest.php index 7a7c466..0ebcfe2 100644 --- a/tests/Integration/ScraperTest.php +++ b/tests/Integration/ScraperTest.php @@ -5,34 +5,14 @@ namespace PhpCfdi\SatPysScraper\Tests\Integration; use GuzzleHttp\Client; -use GuzzleHttp\HandlerStack; -use GuzzleHttp\Middleware; use PhpCfdi\SatPysScraper\Scraper; -use PhpCfdi\SatPysScraper\ScraperInterface; use PhpCfdi\SatPysScraper\Tests\TestCase; -use Psr\Http\Message\RequestInterface; -use Psr\Http\Message\ResponseInterface; class ScraperTest extends TestCase { - private const MAX_RETRIES = 5; - - private function createScraper(): ScraperInterface - { - $decider = fn (int $retries, RequestInterface $request, ResponseInterface $response = null): bool - => $retries < self::MAX_RETRIES && null !== $response && $response->getStatusCode() >= 500; - $delay = fn (int $retries): int => 1000 * ($retries + 1); - - $stack = HandlerStack::create(); - $stack->push(Middleware::retry($decider, $delay)); - $client = new Client(['handler' => $stack]); - - return new Scraper($client); - } - public function testObtainSequence(): void { - $scraper = $this->createScraper(); + $scraper = new Scraper(new Client()); $types = $scraper->obtainTypes(); $expectedTypeId = 1; From 239321545175c8dc9747b3a942e7f51fdc68951c Mon Sep 17 00:00:00 2001 From: Carlos C Soto Date: Tue, 15 Oct 2024 09:24:39 -0600 Subject: [PATCH 12/16] Remove change "create http client when retry" --- docs/CHANGELOG.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 6f81ef7..fee46e5 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -13,9 +13,6 @@ versión, aunque sí su incorporación en la rama principal de trabajo. Generalm ### Versión 3.0.1 2024-09-17 -Se modifica el script de ejecución y la prueba funcional para poder reintentar en caso de que el -servidor del SAT devuelva un estado HTTP 500. Esto sucede frecuentemente desde hace un par de meses. - Se cambia la construcción de imagen de docker, ahora depende de `php:8.3-cli-alpine`. Se actualiza el archivo de licencia a 2024. From 7b4c7ebb59459dd4855825e59eda7b48c8f91339 Mon Sep 17 00:00:00 2001 From: Carlos C Soto Date: Tue, 15 Oct 2024 09:28:34 -0600 Subject: [PATCH 13/16] remove redundant return types from class --- src/Scraper.php | 4 ---- src/ScraperInterface.php | 16 ++++++++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/Scraper.php b/src/Scraper.php index 3129921..816e20f 100644 --- a/src/Scraper.php +++ b/src/Scraper.php @@ -20,14 +20,12 @@ public function __construct(private readonly ClientInterface $client) { } - /** @return array */ public function obtainTypes(): array { $crawler = $this->sendGet(); return $this->extractSelectValues($crawler, 'cmbTipo'); } - /** @return array */ public function obtainSegments(int|string $type): array { $inputs = [ @@ -39,7 +37,6 @@ public function obtainSegments(int|string $type): array return $this->extractSelectValues($crawler, 'cmbSegmento'); } - /** @return array */ public function obtainFamilies(int|string $type, int|string $segment): array { $inputs = [ @@ -52,7 +49,6 @@ public function obtainFamilies(int|string $type, int|string $segment): array return $this->extractSelectValues($crawler, 'cmbFamilia'); } - /** @return array */ public function obtainClasses(int|string $type, int|string $segment, int|string $family): array { $inputs = [ diff --git a/src/ScraperInterface.php b/src/ScraperInterface.php index a1348b9..e7f1ae6 100644 --- a/src/ScraperInterface.php +++ b/src/ScraperInterface.php @@ -6,15 +6,23 @@ interface ScraperInterface { - /** @return array */ + /** + * @return array + */ public function obtainTypes(): array; - /** @return array */ + /** + * @return array + */ public function obtainSegments(int|string $type): array; - /** @return array */ + /** + * @return array + */ public function obtainFamilies(int|string $type, int|string $segment): array; - /** @return array */ + /** + * @return array + */ public function obtainClasses(int|string $type, int|string $segment, int|string $family): array; } From e6a6c86468d5fbb28cf2d7f596d3fb90901a1d65 Mon Sep 17 00:00:00 2001 From: Carlos C Soto Date: Tue, 15 Oct 2024 09:56:03 -0600 Subject: [PATCH 14/16] Add Exceptions for HTTP errors --- README.md | 14 +++++ src/Exceptions/HttpException.php | 11 ++++ src/Exceptions/HttpServerException.php | 9 ++++ src/Exceptions/PysException.php | 9 ++++ src/Generator.php | 6 +++ src/Scraper.php | 53 +++++++++++++++---- src/ScraperInterface.php | 4 ++ tests/Unit/Exceptions/HttpExceptionTest.php | 18 +++++++ .../Exceptions/HttpServerExceptionTest.php | 18 +++++++ tests/Unit/GeneratorTest.php | 25 +++++++++ tests/Unit/ScraperTest.php | 23 ++++++++ tests/Unit/TestCase.php | 11 ++++ 12 files changed, 190 insertions(+), 11 deletions(-) create mode 100644 src/Exceptions/HttpException.php create mode 100644 src/Exceptions/HttpServerException.php create mode 100644 src/Exceptions/PysException.php create mode 100644 tests/Unit/Exceptions/HttpExceptionTest.php create mode 100644 tests/Unit/Exceptions/HttpServerExceptionTest.php diff --git a/README.md b/README.md index d0240cd..7a573fa 100644 --- a/README.md +++ b/README.md @@ -140,6 +140,20 @@ Un objeto `Classification` solamente contiene las propiedades `key` y `name`. Todos los objetos de datos implementan `JsonSerializable`, por lo que puedes usar esta característica para exportar a formato JSON. +### Excepciones + +La clase `Scraper` y -por consecuencia- también la clase `Generator` generan excepciones. +En el caso de una excepción de tipo HTTP se tira una excepción `HttpException`. +En el caso de una excepción HTTP y tenga un código de error del servicio remoto se tira una excepción `HttpServerException`. + +La jerarquía de excepciones es: + +```text +- PysException (interface) + - HttpException (class) + - HttpServerException (class) +``` + ## Soporte Puedes obtener soporte abriendo un ticket en Github. diff --git a/src/Exceptions/HttpException.php b/src/Exceptions/HttpException.php new file mode 100644 index 0000000..cbea24f --- /dev/null +++ b/src/Exceptions/HttpException.php @@ -0,0 +1,11 @@ +crawler; } + /** + * @throws Exceptions\HttpException + */ private function sendGet(): Crawler { - $response = $this->client->request('GET', self::PYS_URL); + try { + $response = $this->client->request('GET', self::PYS_URL); + } catch (GuzzleException $exception) { + throw $this->wrapGuzzleException($exception); + } $crawler = new Crawler((string) $response->getBody(), self::PYS_URL); $this->crawler = $crawler; return $crawler; } - /** @param array $data */ + /** + * @param array $data + * @throws Exceptions\HttpException + */ private function sendPost(array $data): Crawler { $currentState = $this->extractState($this->getLastCrawler()); - $response = $this->client->request('POST', self::PYS_URL, [ - RequestOptions::HEADERS => [ - 'Accept-Encoding' => 'gzip, deflate', - 'Referer' => self::PYS_URL, - 'X-Requested-With' => 'XMLHttpRequest', - 'X-Microsoft-Ajax' => 'delta=false', - ], - RequestOptions::FORM_PARAMS => array_merge(['__ASYNCPOST' => 'false'], $currentState, $data), - ]); + try { + $response = $this->client->request('POST', self::PYS_URL, [ + RequestOptions::HEADERS => [ + 'Accept-Encoding' => 'gzip, deflate', + 'Referer' => self::PYS_URL, + 'X-Requested-With' => 'XMLHttpRequest', + 'X-Microsoft-Ajax' => 'delta=false', + ], + RequestOptions::FORM_PARAMS => array_merge(['__ASYNCPOST' => 'false'], $currentState, $data), + ]); + } catch (GuzzleException $exception) { + throw $this->wrapGuzzleException($exception); + } $crawler = new Crawler((string) $response->getBody(), self::PYS_URL); $this->crawler = $crawler; return $crawler; @@ -113,4 +129,19 @@ private function extractState(Crawler $crawler): array $form = $crawler->filter('#form1')->form(); return $form->getPhpValues(); } + + private function wrapGuzzleException(GuzzleException $exception): Exceptions\HttpException + { + if ($exception instanceof ServerException) { + return new Exceptions\HttpServerException( + message: $exception->getMessage(), + previous: $exception + ); + } + + return new Exceptions\HttpException( + message: $exception->getMessage(), + previous: $exception + ); + } } diff --git a/src/ScraperInterface.php b/src/ScraperInterface.php index e7f1ae6..0e64415 100644 --- a/src/ScraperInterface.php +++ b/src/ScraperInterface.php @@ -8,21 +8,25 @@ interface ScraperInterface { /** * @return array + * @throws Exceptions\HttpException|Exceptions\HttpServerException */ public function obtainTypes(): array; /** * @return array + * @throws Exceptions\HttpException|Exceptions\HttpServerException */ public function obtainSegments(int|string $type): array; /** * @return array + * @throws Exceptions\HttpException|Exceptions\HttpServerException */ public function obtainFamilies(int|string $type, int|string $segment): array; /** * @return array + * @throws Exceptions\HttpException|Exceptions\HttpServerException */ public function obtainClasses(int|string $type, int|string $segment, int|string $family): array; } diff --git a/tests/Unit/Exceptions/HttpExceptionTest.php b/tests/Unit/Exceptions/HttpExceptionTest.php new file mode 100644 index 0000000..67122aa --- /dev/null +++ b/tests/Unit/Exceptions/HttpExceptionTest.php @@ -0,0 +1,18 @@ +assertInstanceOf(PysException::class, $exception); + } +} diff --git a/tests/Unit/Exceptions/HttpServerExceptionTest.php b/tests/Unit/Exceptions/HttpServerExceptionTest.php new file mode 100644 index 0000000..93a283c --- /dev/null +++ b/tests/Unit/Exceptions/HttpServerExceptionTest.php @@ -0,0 +1,18 @@ +assertInstanceOf(HttpException::class, $exception); + } +} diff --git a/tests/Unit/GeneratorTest.php b/tests/Unit/GeneratorTest.php index 5977049..d75074a 100644 --- a/tests/Unit/GeneratorTest.php +++ b/tests/Unit/GeneratorTest.php @@ -4,6 +4,9 @@ namespace PhpCfdi\SatPysScraper\Tests\Unit; +use GuzzleHttp\Psr7\Response; +use PhpCfdi\SatPysScraper\Exceptions\HttpException; +use PhpCfdi\SatPysScraper\Exceptions\HttpServerException; use PhpCfdi\SatPysScraper\Generator; class GeneratorTest extends TestCase @@ -19,4 +22,26 @@ public function testGenerate(): void $expectedFile = __DIR__ . '/../_files/exported-fake.json'; $this->assertJsonStringEqualsJsonFile($expectedFile, (string) json_encode($types)); } + + public function testGenerateThrowsExceptionOnServerError(): void + { + $scraper = $this->createPreparedScraperQueue([ + new Response(500, body: 'Internal server error'), + ]); + $generator = new Generator($scraper); + + $this->expectException(HttpServerException::class); + $generator->generate(); + } + + public function testGenerateThrowsExceptionOnRequestError(): void + { + $scraper = $this->createPreparedScraperQueue([ + new Response(404, body: 'Not found'), + ]); + $generator = new Generator($scraper); + + $this->expectException(HttpException::class); + $generator->generate(); + } } diff --git a/tests/Unit/ScraperTest.php b/tests/Unit/ScraperTest.php index 9fa7f36..5cc0f8b 100644 --- a/tests/Unit/ScraperTest.php +++ b/tests/Unit/ScraperTest.php @@ -4,7 +4,10 @@ namespace PhpCfdi\SatPysScraper\Tests\Unit; +use GuzzleHttp\Psr7\Response; use LogicException; +use PhpCfdi\SatPysScraper\Exceptions\HttpException; +use PhpCfdi\SatPysScraper\Exceptions\HttpServerException; final class ScraperTest extends TestCase { @@ -28,4 +31,24 @@ public function testCallObtainClassesWithoutCorrectSequenceThrowsLogicException( $this->expectException(LogicException::class); $scraper->obtainClasses(2711, 27, 1); } + + public function testObtainTypesThrowsExceptionOnServerError(): void + { + $scraper = $this->createPreparedScraperQueue([ + new Response(500, body: 'Internal server error'), + ]); + + $this->expectException(HttpServerException::class); + $scraper->obtainTypes(); + } + + public function testObtainTypesThrowsExceptionOnRequestError(): void + { + $scraper = $this->createPreparedScraperQueue([ + new Response(404, body: 'Not found'), + ]); + + $this->expectException(HttpException::class); + $scraper->obtainTypes(); + } } diff --git a/tests/Unit/TestCase.php b/tests/Unit/TestCase.php index bb64b0a..61c274c 100644 --- a/tests/Unit/TestCase.php +++ b/tests/Unit/TestCase.php @@ -5,6 +5,8 @@ namespace PhpCfdi\SatPysScraper\Tests\Unit; use GuzzleHttp\Client; +use GuzzleHttp\Handler\MockHandler; +use GuzzleHttp\HandlerStack; use LogicException; use PhpCfdi\SatPysScraper\Scraper; use PhpCfdi\SatPysScraper\ScraperInterface; @@ -69,6 +71,15 @@ public function createFakeScraper(): ScraperInterface return new Scraper($client); } + /** @param array $queue */ + public function createPreparedScraperQueue(array $queue): ScraperInterface + { + $mockHandler = new MockHandler($queue); + $handlerStack = HandlerStack::create($mockHandler); + $client = new Client(['handler' => $handlerStack]); + return new Scraper($client); + } + public function createTemporaryFilename(): string { $temporaryFile = (string) tempnam(directory: '', prefix: 'testing-'); From d1e19e5d2048fb55e393e2458a79afbe0598a72c Mon Sep 17 00:00:00 2001 From: Carlos C Soto Date: Tue, 15 Oct 2024 10:09:50 -0600 Subject: [PATCH 15/16] Refactor Scraper test to allow retry --- tests/Integration/ScraperTest.php | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/Integration/ScraperTest.php b/tests/Integration/ScraperTest.php index 0ebcfe2..ac1746e 100644 --- a/tests/Integration/ScraperTest.php +++ b/tests/Integration/ScraperTest.php @@ -5,12 +5,32 @@ namespace PhpCfdi\SatPysScraper\Tests\Integration; use GuzzleHttp\Client; +use PhpCfdi\SatPysScraper\Exceptions\HttpServerException; use PhpCfdi\SatPysScraper\Scraper; use PhpCfdi\SatPysScraper\Tests\TestCase; class ScraperTest extends TestCase { + private const MAX_RETRIES = 5; + public function testObtainSequence(): void + { + do { + $try = ($try ?? 0) + 1; + try { + $this->procedureObtainSequence(); + $lastException = null; + break; + } catch (HttpServerException $exception) { + $lastException = $exception; + } + } while ($try < self::MAX_RETRIES); + if (null !== $lastException) { + throw $lastException; + } + } + + public function procedureObtainSequence(): void { $scraper = new Scraper(new Client()); From 7ef90a0ebb000c38a220f418d822caa5eaa69653 Mon Sep 17 00:00:00 2001 From: Carlos C Soto Date: Tue, 15 Oct 2024 10:47:45 -0600 Subject: [PATCH 16/16] Document SAT problem and why we throw now http exceptions --- docs/CHANGELOG.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index fee46e5..c70bf15 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -11,7 +11,13 @@ versión, aunque sí su incorporación en la rama principal de trabajo. Generalm ## Listado de cambios -### Versión 3.0.1 2024-09-17 +### Versión 3.0.1 2024-10-15 + +La aplicación del SAT devuelve un error 500 frecuentemente (1 de cada 3 veces) desde 2024-07-15. +Este error parece estar relacionado con la distribución de cargas por parte del SAT, así que +reintentar la llamada HTTP sobre la misma conexión no soluciona el problema y hay que crear +un nuevo cliente HTTP. Para intentar solventarlo, se modifica la librería para tirar +excepciones con errores HTTP e intentar solventar el error. Se cambia la construcción de imagen de docker, ahora depende de `php:8.3-cli-alpine`. @@ -19,6 +25,7 @@ Se actualiza el archivo de licencia a 2024. Se hacen otros cambios en el entorno de desarrollo: +- Se modifica la prueba funcional para poder hacer hasta 5 reintentos reconstruyendo el cliente http. - Se prueba el correcto orden para llamar a los métodos para obtener datos. - Se utiliza la variable `php-version` en singular para las matrices de pruebas. - Se actualizan las herramientas de desarrollo.