diff --git a/.dockerignore b/.dockerignore index 5530bdb88919b..114ac74dc2483 100644 --- a/.dockerignore +++ b/.dockerignore @@ -7,6 +7,11 @@ logs debug/ perl-language-server.log +openfoodfacts-mongodbdump.tar.gz +openfoodfacts-products.jsonl.gz +en.openfoodfacts.org.products.csv +en.openfoodfacts.org.products.csv.gz + html/images/misc/openfoodfacts-visual-decision-05-03-13.pdf html/images/misc/20130416_QUOz.pdf html/images/misc/assemblee-constitutive-off-v1.pdf diff --git a/.github/labeler.yml b/.github/labeler.yml index 49b06f4eb4791..e6f28347d8a6c 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -57,6 +57,11 @@ Stores: - taxonomies/states.result.sto - taxonomies/states.result.txt +status system: +- taxonomies/states.result.sto +- taxonomies/states.result.txt +- taxonomies/states.txt + WikiData: - scripts/build_countries_taxonomy_from_wikidata.pl - scripts/build_languages_taxonomy_from_wikidata.pl @@ -193,6 +198,7 @@ Footer: 🐋 Docker: - docker/**/* +- .dockerignore 🏷️ Folksonomy Project: - html/js/folksonomy.js @@ -253,6 +259,7 @@ Food.pm: Food groups: - lib/ProductOpener/FoodGroups.pm - tests/unit/food_groups.t +- taxonomies/food_groups.txt 🧪 Additives: - taxonomies/additives.txt @@ -314,6 +321,7 @@ Ingredients analysis: Labels: - taxonomies/labels.txt +- taxonomies/unused/labels_categories.txt Categories: - taxonomies/categories.txt @@ -621,6 +629,7 @@ Blocks: 🗺️ Made Near Me: - scripts/generate_madenearme_page.pl +- scripts/generate_madenearme_pages.sh PAO: - scripts/generate_pao_taxonomy.pl @@ -657,6 +666,7 @@ Config: Ingredients processing: - tests/unit/ingredients_processing.t +- taxonomies/ingredients_processing.txt 👥 Users: - lib/ProductOpener/Users.pm @@ -798,6 +808,9 @@ Averages by categories: Static content: - scripts/copy_text_files.pl +matomo: +- scripts/utils/logs/download_matomo_logs.pl + # TODO: find or create proper labels for those #- scripts/extract_images.pl #- scripts/test_text_fuzzy.pl diff --git a/.github/workflows/auto-assign-pr.yml b/.github/workflows/auto-assign-pr.yml index bd2144b7e5f56..05f5d328860b4 100644 --- a/.github/workflows/auto-assign-pr.yml +++ b/.github/workflows/auto-assign-pr.yml @@ -17,4 +17,4 @@ jobs: name: 'Pull request assignment' runs-on: ubuntu-latest steps: - - uses: toshimaru/auto-author-assign@v2.0.1 + - uses: toshimaru/auto-author-assign@v2.1.0 diff --git a/.github/workflows/crowdin-per-language.yml b/.github/workflows/crowdin-per-language.yml index 7b81ad1c9681c..8278afbd69673 100644 --- a/.github/workflows/crowdin-per-language.yml +++ b/.github/workflows/crowdin-per-language.yml @@ -22,7 +22,7 @@ jobs: - name: Checkout uses: actions/checkout@v4 - name: Matrix - uses: crowdin/github-action@v1.16.0 + uses: crowdin/github-action@v1.16.1 with: upload_translations: false # default is false download_translations: true diff --git a/.github/workflows/crowdin.yml b/.github/workflows/crowdin.yml index d43fb2da095f8..956352a2d2e21 100644 --- a/.github/workflows/crowdin.yml +++ b/.github/workflows/crowdin.yml @@ -13,7 +13,7 @@ jobs: - name: Checkout uses: actions/checkout@v4 - name: crowdin action - uses: crowdin/github-action@v1.16.0 + uses: crowdin/github-action@v1.16.1 with: upload_translations: false # default is false # Use this option to upload translations for a single specified language diff --git a/Makefile b/Makefile index 7f102fe79a0e3..e071209ce7f13 100644 --- a/Makefile +++ b/Makefile @@ -112,9 +112,10 @@ create_folders: # Docker Compose # #----------------# +# args variable may be use to eg. "--progress plain" option and keep logs on a failing build build: @echo "🥫 Building containers …" - ${DOCKER_COMPOSE} build ${container} 2>&1 + ${DOCKER_COMPOSE} build ${args} ${container} 2>&1 _up: @echo "🥫 Starting containers …" diff --git a/conf/systemd/sync_images_s3@.service b/conf/systemd/sync_images_s3@.service new file mode 100644 index 0000000000000..586d059731cf2 --- /dev/null +++ b/conf/systemd/sync_images_s3@.service @@ -0,0 +1,13 @@ +# service instance name "%i" is off only (for now) +[Unit] +Description=Synchronize images to AWS S3 %i +# __ will be replaced by @ in email-failures@.service +OnFailure=email-failures@sync_images_s3__%i.service + +[Service] +Type=oneshot +User=off +Group=off +WorkingDirectory=/srv/%i/scripts/sync-s3-images +ExecStart=.venv/bin/python3 sync_s3_images.py /mnt/%i/images/products +KillMode=process diff --git a/conf/systemd/sync_images_s3@.timer b/conf/systemd/sync_images_s3@.timer new file mode 100644 index 0000000000000..a95321360637f --- /dev/null +++ b/conf/systemd/sync_images_s3@.timer @@ -0,0 +1,12 @@ +# service instance name "%i" is off only (for now) +[Unit] +Description=Synchronize images to AWS S3 daily + +[Timer] +# every tuesday +OnCalendar=Tue *-*-* 02:00:00 +# service instance name "%i" is off / obf / opff / opf +Unit=sync_images_s3@%i.service + +[Install] +WantedBy=multi-user.target diff --git a/docker/README.md b/docker/README.md index 64db848d58865..65d493007fa35 100644 --- a/docker/README.md +++ b/docker/README.md @@ -21,7 +21,8 @@ See also [targets to run tests](../docs/dev/how-to-write-and-run-tests.md#runnin | Command | Description | Notes | | ------------------------- | -------------------------------------------------------------------------------------- | ------------------------------------------------------------- | -| `make dev` | Setup a fresh dev environment. | Run only once, then use the `up`, `down`, `restart` commands. | +| `make dev` | Setup a fresh dev environment. | Run only once, then use the `up`, `down`, `restart` commands. | +| `make build` | build containers. Add `container=name` to build a specific container | args="--progress log" keeps all log in console (to debug failing build) | | `make up` | Start containers. | | | `make down` | Stop containers and keep the volumes. | Products and users data will be kept. | | `make hdown` | Stop containers and delete the volumes (hard down). | Products and users data will be lost ! | diff --git a/docker/dev.yml b/docker/dev.yml index 9ee76dc1f5e53..2737b2de77a06 100644 --- a/docker/dev.yml +++ b/docker/dev.yml @@ -37,7 +37,6 @@ x-minion-db-network: &minion-db-network services: postgres: # only turn on for off profile - profiles: ["off"] <<: *minion-db-network backend: <<: [*backend-conf, *minion-db-network] diff --git a/docs/api/how-to-download-images.md b/docs/api/how-to-download-images.md index 4b6772a8e989b..c178011ed00e8 100644 --- a/docs/api/how-to-download-images.md +++ b/docs/api/how-to-download-images.md @@ -22,126 +22,38 @@ about how to download images from AWS dataset. ## Download from Open Food Facts server -All images can be found on -[https://images.openfoodfacts.org/images/products/](https://static.openfoodfacts.org/images/products/). +All images are hosted under +[https://images.openfoodfacts.org/images/products/](https://static.openfoodfacts.org/images/products/) folder. +But you have to build the right URL from the product info. + +### Computing single product image folder + Images of a product are stored in a single directory. The path of this -directory can be inferred easily from the product barcode. If the product -barcode length is lower or equal to 8 (ex: "22222222"), the directory path is -simply the barcode: all images can be found on -`https://images.openfoodfacts.org/images/products/{barcode}`. +directory can be inferred easily from the product barcode. +There are two cases: + +1. If the product barcode length is lower or equal to 8 (ex: "22222222"), the directory path is +simply the barcode: `https://images.openfoodfacts.org/images/products/{barcode}`. + +2. Otherwise, we split the 9 first part of the code by group of three digits to get the three first folder names, and use the rest of the name as the last folder name^[split-regexp]. + For example, the barcode `3435660768163` is split as : `343/566/076/8163`, thus products images will be in `https://images.openfoodfacts.org/images/products/343/566/076/8163` + +^[split-regexp]: The following regex can be used to split the barcode into subfolders: `r"^(...)(...)(...)(.*)$"` -Otherwise, the following regex is used to split the barcode into subfolders: -`r"^(...)(...)(...)(.*)$"`. For example, the barcode `3435660768163` is split as -follows: `343/566/076/8163`, and all images of the products can be found on -[https://images.openfoodfacts.org/images/products/343/566/076/8163](https://images.openfoodfacts.org/images/products/343/566/076/8163). +### Computing single image file name -To get the image file names, we have to use the database dump or the API. All -images information are stored in the `images` field. For product -[3168930010883](https://world.openfoodfacts.org/api/v0/product/3168930010883.json), -we have: +Above we get the folder name, now we need the filename inside that folder for a particular image. + +#### Understanding images data + +To get the image file names, we have to use the database dump or the API. +All images information are stored in the `images` field. + +Eg. For product [3168930010883](https://world.openfoodfacts.org/api/v0/product/3168930010883.json), +we have (trimmed the data): ```json { - "4": { - "uploader": "openfoodfacts-contributors", - "uploaded_t": 1548685211, - "sizes": { - "400": { - "h": 400, - "w": 300 - }, - "100": { - "w": 75, - "h": 100 - }, - "full": { - "h": 3174, - "w": 2380 - } - } - }, - "3": { - "uploader": "openfoodfacts-contributors", - "uploaded_t": 1537002125, - "sizes": { - "full": { - "h": 3302, - "w": 2476 - }, - "100": { - "h": 100, - "w": 75 - }, - "400": { - "w": 300, - "h": 400 - } - } - }, - "ingredients_fr": { - "rev": "7", - "orientation": "0", - "ocr": 1, - "imgid": "2", - "y2": null, - "white_magic": "0", - "angle": null, - "x1": null, - "x2": null, - "geometry": "0x0-0-0", - "normalize": "0", - "y1": null, - "sizes": { - "100": { - "h": 100, - "w": 75 - }, - "400": { - "w": 300, - "h": 400 - }, - "200": { - "w": 150, - "h": 200 - }, - "full": { - "h": 1200, - "w": 900 - } - } - }, - "nutrition_fr": { - "sizes": { - "200": { - "h": 200, - "w": 150 - }, - "full": { - "w": 2476, - "h": 3302 - }, - "100": { - "w": 75, - "h": 100 - }, - "400": { - "w": 300, - "h": 400 - } - }, - "y1": "-1", - "normalize": null, - "x2": "-1", - "geometry": "0x0--8--8", - "x1": "-1", - "angle": 0, - "imgid": "3", - "white_magic": null, - "y2": "-1", - "ocr": 1, - "orientation": "0", - "rev": "11" - }, "1": { "sizes": { "full": { @@ -160,24 +72,6 @@ we have: "uploader": "kiliweb", "uploaded_t": "1527184614" }, - "2": { - "sizes": { - "100": { - "h": 100, - "w": 75 - }, - "400": { - "h": 400, - "w": 300 - }, - "full": { - "h": 1200, - "w": 900 - } - }, - "uploader": "kiliweb", - "uploaded_t": "1527184615" - }, "front_fr": { "x1": null, "angle": null, @@ -213,31 +107,79 @@ we have: The keys of the map are the keys of the images. These keys can be: -- digits: the image is the raw image sent by the contributor (full resolution). -- selected images: `front_{lang}`, `nutrition_{lang}` and - `ingredients_{lang}`, selected as front, nutrition and ingredients images - respectively for `lang`. Here, `lang` is a 2-letter ISO 639-1 language code - (fr, en, es,\...). +- digits: the image is the *raw image* sent by the contributor (full resolution). +- selected images: + * `front_{lang}` correspond to the front product image in language with code `lang` + * `ingredients_{lang}` correspond to the ingredients image in language with code `lang` + * `nutrition_{lang}` is the same but for nutrition data + * `packaging_{lang}` for packaging logos -Each image is available in different resolutions: `100`, `200`, `400` or -`full`, each corresponding to image height (`full` means not resized). The -available resolutions can be found in the `sizes` subfield. + `lang` is a 2-letter ISO 639-1 language code (fr, en, es, …). -Selected images have additional fields: +Each image is available in different resolutions: +`100`, `200`, `400` or `full`, each corresponding to image height (`full` means not resized). +The available resolutions can be found in the `sizes` subfield. + +#### Filename for a raw image + +For a raw image (the one under a numeric key in images field), +filename is very easy to compute: +* just take the image digit + `.jpg` for full resolution +* image digit + `.` + resolution + `.jpg` for a lower resolution + +For our example above, the filename for image `"1"` +* in resolution 400px is `1.400.jpg` +* in full resolution, it is `1.jpg` + +So, adding the folder part, the final url for our example is: +* https://images.openfoodfacts.org/images/products/316/893/001/0883/1.jpg for the full image +* https://images.openfoodfacts.org/images/products/316/893/001/0883/1.400.jpg for the 400px version + +#### Filename for a selected image + +In the structure, selected images have additional fields: - `rev` (as revision) indicates the revision number of the image to use (each time a new image is selected, cropped or rotated, a new image with an incremented rev is generated). - `imgid`, the image ID of the raw image used to generate the selected image. -- `angle`, `x1`, `x2`, `y1`, `y2`: rotation angle and cropping coordinates. - -For selected images, the file name is the image key followed by the revision -number and the resolution: `front_fr.1.400.jpg`. For raw images, the file name -is either the image ID (`1.jpg`) or the image ID followed by the resolution -(`1.100.jpg`). - -To get the full URL, simply concatenate the product directory path and the -image name. Examples: - -- [https://images.openfoodfacts.org/images/products/343/566/076/8163/1.jpg](https://images.openfoodfacts.org/images/products/343/566/076/8163/1.jpg) -- [https://images.openfoodfacts.org/images/products/343/566/076/8163/1.400.jpg](https://images.openfoodfacts.org/images/products/343/566/076/8163/1.400.jpg) +- `angle`, `x1`, `x2`, `y1`, `y2`: rotation angle and cropping coordinates (it's to be able to regenerate the image from the raw image) + +For selected images, the filename is the image key followed by the revision number and the resolution: `...jpg`. +Resolution must always be specified, but you can use `full` keyword to get the full resolution image. + +In our above example, the filename for the front image in french (`front_fr` key) is: +* `front_fr.4.400.jpg` for 400 px version +* `front_fr.4.full.jpg` for full resolution version + +So, adding the folder part, the final url for our example is: +* https://images.openfoodfacts.org/images/products/316/893/001/0883/front_fr.4.full.jpg for the full image +* https://images.openfoodfacts.org/images/products/316/893/001/0883/front_fr.4.400.jpg for the 400px version + +## A python snippet + +So if we have the product_data in a dict, a Python code doing it would be something like: + +```python +def get_image_url(product_data, image_name, resolution="full"): + if image_name not in product_data["images"]: + return None + base_url = "https://images.openfoodfacts.org/images/products" + # get product folder name + folder_name = product_data["code"] + if len(folder_name) > 8: + folder_name = re.sub(r'(...)(...)(...)(.*)', r'\1/\2/\3/\4', folder_name) + # get filename + if re.match("^\d+$", image_name): # only digits + # raw image + resolution_suffix = "" if resolution == "full" else f".{resolution}" + filename = f"{image_name}{resolution_suffix}.jpg" + else: + # selected image + rev = product_data["images"][image_name]["rev"] + filename = f"{image_name}.{rev}.{resolution}.jpg" + # join things together + return f"{base_url}/{folder_name}/{filename}" +``` + + diff --git a/docs/api/ref/requestBodies/add_or_edit_a_product.yaml b/docs/api/ref/requestBodies/add_or_edit_a_product.yaml index 7cbb98e66e561..5cf65f19376ee 100644 --- a/docs/api/ref/requestBodies/add_or_edit_a_product.yaml +++ b/docs/api/ref/requestBodies/add_or_edit_a_product.yaml @@ -1,4 +1,6 @@ type: object +description: | + You can provide most of the properties defined in the product schema. properties: code: type: string @@ -12,6 +14,10 @@ properties: type: string description: A valid corresponding password. example: mypassword + comment: + type: string + description: A comment for the change. It will be shown in product changes history. + example: new packaging from super-app brands: schema: type: array @@ -49,4 +55,4 @@ properties: required: - code - user_id - - password \ No newline at end of file + - password diff --git a/docs/api/ref/schemas/product_ecoscore.yaml b/docs/api/ref/schemas/product_ecoscore.yaml index a892add3f14e2..31d0e6ddde1af 100644 --- a/docs/api/ref/schemas/product_ecoscore.yaml +++ b/docs/api/ref/schemas/product_ecoscore.yaml @@ -1,10 +1,14 @@ type: object -description: Fields related to Eco-Score for a product +description: | + Fields related to Eco-Score for a product. + + See also: `ecoscore_score`, `ecoscore_grade` and `ecoscore_tags`. + properties: ecoscore_data: type: object description: | - An object about a lot of details about data needed for nutriscore computation + An object about a lot of details about data needed for Eco-Score computation and complementary data of interest. properties: adjustments: @@ -139,4 +143,4 @@ properties: environment_impact_level_tags: type: array items: - type: object \ No newline at end of file + type: object diff --git a/docs/api/ref/schemas/product_images.yaml b/docs/api/ref/schemas/product_images.yaml index 662a4d29ca279..81e8adf420df1 100644 --- a/docs/api/ref/schemas/product_images.yaml +++ b/docs/api/ref/schemas/product_images.yaml @@ -5,6 +5,10 @@ description: | Images ensure the reliability of Open Food Facts data. It provides a primary source and proof of all the structured data. You may therefore want to display it along the structured information. + + See also tutorials about images: + * [Getting images](https://openfoodfacts.github.io/openfoodfacts-server/api/how-to-download-images/) + * [Uploading images](https://openfoodfacts.github.io/openfoodfacts-server/api/tutorial-uploading-photo-to-a-product/) properties: image_front_small_url: type: string @@ -85,4 +89,4 @@ properties: description: | See property `front` to get the real type of those objects (Put this way because of a [bug in rapidoc](https://github.com/rapi-doc/RapiDoc/issues/880)) - type: string \ No newline at end of file + type: string diff --git a/docs/api/ref/schemas/product_nutrition.yaml b/docs/api/ref/schemas/product_nutrition.yaml index dcd631f66db50..ec1309b9a5cc6 100644 --- a/docs/api/ref/schemas/product_nutrition.yaml +++ b/docs/api/ref/schemas/product_nutrition.yaml @@ -1,6 +1,10 @@ type: object description: | Nutrition fields of a product + + Most of these properties are read-only. + + See [how to add nutrition data](https://openfoodfacts.github.io/openfoodfacts-server/api/ref-cheatsheet/#add-nutrition-facts-values-units-and-base) properties: no_nutrition_data: type: string @@ -169,10 +173,12 @@ properties: **Note**: If you want to characterize products in a uniform way, this is the value you should use. type: number + readOnly: true '(?[\w-]+)_serving': description: | The standardized value of a serving for this product. type: number + readOnly: true '(?[\w-]+)_value': description: | The value input by the user / displayed on the product for the nutrient. @@ -180,6 +186,7 @@ properties: * per 100g or serving, depending on `nutrition_data_per` * in the unit of corresponding _unit field. type: number + readOnly: true '(?[\w-]+)_prepared': description: | The value for nutrient for **prepared** product. @@ -193,20 +200,25 @@ properties: The standardized value of a serving of 100g (or 100ml for liquids) for the nutrient, for **prepared** product. type: number + readOnly: true '(?[\w-]+)_prepared_serving': description: | The standardized value of a serving for the **prepared** product. type: number + readOnly: true '(?[\w-]+)_prepared_value': description: | The standardized value for a serving or 100g (or 100ml for liquids), depending on `nutrition_data_prepared_per` for the nutrient for **prepared** product. type: number + readOnly: true nutriscore_data: description: | Detail of data the Nutri-Score was computed upon. + **Note**: this might not be stable, don't rely too much on this, or, at least, tell us ! + **TODO** document each property type: object properties: diff --git a/howmuchsugar/en/openfoodfacts-logo-en.png b/howmuchsugar/en/openfoodfacts-logo-en.png index 2236038443a85..bc98e03e6e8a1 100644 Binary files a/howmuchsugar/en/openfoodfacts-logo-en.png and b/howmuchsugar/en/openfoodfacts-logo-en.png differ diff --git a/howmuchsugar/fr/openfoodfacts-logo-fr.png b/howmuchsugar/fr/openfoodfacts-logo-fr.png index c39cf9943c1b2..bc98e03e6e8a1 100644 Binary files a/howmuchsugar/fr/openfoodfacts-logo-fr.png and b/howmuchsugar/fr/openfoodfacts-logo-fr.png differ diff --git a/html/images/lang/en/labels/conservation-international.216x90.png b/html/images/lang/en/labels/conservation-international.216x90.png new file mode 100644 index 0000000000000..32799907e2344 Binary files /dev/null and b/html/images/lang/en/labels/conservation-international.216x90.png differ diff --git a/html/images/lang/en/labels/too-good-to-go.108x90.png b/html/images/lang/en/labels/too-good-to-go.108x90.png new file mode 100644 index 0000000000000..ce3f5806d60cb Binary files /dev/null and b/html/images/lang/en/labels/too-good-to-go.108x90.png differ diff --git a/html/images/lang/hr/labels/povratna-naknada.96x90.png b/html/images/lang/hr/labels/povratna-naknada.96x90.png new file mode 100644 index 0000000000000..7b8505fe2b07e Binary files /dev/null and b/html/images/lang/hr/labels/povratna-naknada.96x90.png differ diff --git a/lib/ProductOpener/Config_obf.pm b/lib/ProductOpener/Config_obf.pm index 9d9ac51676cef..9eb08636ce8b0 100644 --- a/lib/ProductOpener/Config_obf.pm +++ b/lib/ProductOpener/Config_obf.pm @@ -418,4 +418,7 @@ $options{other_servers} = { $options{no_nutrition_table} = 1; +# Name of the Redis stream to which product updates are published +$options{redis_stream_name} = "product_updates_obf"; + 1; diff --git a/lib/ProductOpener/Config_off.pm b/lib/ProductOpener/Config_off.pm index f9603039acb81..f160757b85826 100644 --- a/lib/ProductOpener/Config_off.pm +++ b/lib/ProductOpener/Config_off.pm @@ -1027,6 +1027,9 @@ $options{other_servers} = { } }; +# Name of the Redis stream to which product updates are published +$options{redis_stream_name} = "product_updates_off"; + # used to rename texts and to redirect to the new name $options{redirect_texts} = { "en/nova-groups-for-food-processing" => "nova", diff --git a/lib/ProductOpener/Config_opf.pm b/lib/ProductOpener/Config_opf.pm index 4d1ef7410ee74..9d6718e90cf84 100644 --- a/lib/ProductOpener/Config_opf.pm +++ b/lib/ProductOpener/Config_opf.pm @@ -391,4 +391,7 @@ $options{other_servers} = { $options{no_nutrition_table} = 1; +# Name of the Redis stream to which product updates are published +$options{redis_stream_name} = "product_updates_opf"; + 1; diff --git a/lib/ProductOpener/Config_opff.pm b/lib/ProductOpener/Config_opff.pm index 2e647c66a23a6..e742a0e5eaeff 100644 --- a/lib/ProductOpener/Config_opff.pm +++ b/lib/ProductOpener/Config_opff.pm @@ -430,4 +430,7 @@ $options{other_servers} = { } }; +# Name of the Redis stream to which product updates are published +$options{redis_stream_name} = "product_updates_opff"; + 1; diff --git a/lib/ProductOpener/DataQualityFood.pm b/lib/ProductOpener/DataQualityFood.pm index dded1baaa3faa..0627441db8fca 100644 --- a/lib/ProductOpener/DataQualityFood.pm +++ b/lib/ProductOpener/DataQualityFood.pm @@ -1091,7 +1091,7 @@ sub check_nutrition_data ($product_ref) { } } - foreach my $nid (keys %{$product_ref->{nutriments}}) { + foreach my $nid (sort keys %{$product_ref->{nutriments}}) { $log->debug("nid: " . $nid . ": " . $product_ref->{nutriments}{$nid}) if $log->is_debug(); if ($nid =~ /_prepared_100g$/ && $product_ref->{nutriments}{$nid} > 0) { @@ -1164,16 +1164,26 @@ sub check_nutrition_data ($product_ref) { } # raise error if # all values are identical + # and values (check first value only) are above 1 (see issue #9572) # OR # all values but one - because sodium and salt can be automatically calculated one depending on the value of the other - are identical + # and values (check salt (should not check sodium which could be lower)) are above 1 (see issue #9572) + # and at least 4 values are input by contributors (see issue #9572) if ( - ($nutriments_values_occurences_max_value == scalar @major_nutriments_values) - or ( - ($nutriments_values_occurences_max_value >= scalar @major_nutriments_values - 1) - and ( (defined $nutriments_values{'salt_100g'}) - and ($nutriments_values{'sodium_100g'}) - and ($nutriments_values{'salt_100g'} != $nutriments_values{'sodium_100g'})) + ( + ( + $nutriments_values_occurences_max_value == scalar @major_nutriments_values + and ($major_nutriments_values[0] > 1) + ) + or ( + ($nutriments_values_occurences_max_value >= scalar @major_nutriments_values - 1) + and ( (defined $nutriments_values{'salt_100g'}) + and (defined $nutriments_values{'sodium_100g'}) + and ($nutriments_values{'salt_100g'} != $nutriments_values{'sodium_100g'}) + and ($nutriments_values{'salt_100g'} > 1)) + ) ) + and (scalar @major_nutriments_values > 3) ) { push @{$product_ref->{data_quality_errors_tags}}, "en:nutrition-values-are-all-identical"; @@ -1613,6 +1623,10 @@ sub check_ingredients ($product_ref) { "en:ingredients-" . $display_lc . "-unexpected-chars-question-mark"; } + if ($product_ref->{$ingredients_text_lc} =~ /http/i) { + add_tag($product_ref, "data_quality_errors", "en:ingredients-" . $display_lc . "-unexpected-url"); + } + # French specific #if ($display_lc eq 'fr') { @@ -1806,10 +1820,21 @@ sub check_labels ($product_ref) { unshift @ingredients, @{$ingredient_ref->{ingredients}}; } - # some additives_classes (like thickener, for example) do not have the key-value vegan and vegetarian - # it can be additives_classes that contain only vegan/vegetarian additives. - # to avoid false-positive - instead of raising a warning (else below) we ignore additives_classes - if (!exists_taxonomy_tag("additives_classes", $ingredientid)) { + # - some additives_classes (like thickener, for example) do not have the key-value vegan and vegetarian + # it can be additives_classes that contain only vegan/vegetarian additives. + # - also we cannot tell if a compound ingredient (preparation) is vegan or vegetarian + # to handle both cases we ignore the ingredient having vegan/vegatarian "maybe" and if it contains sub-ingredients + my $ignore_vegan_vegetarian_facet = 0; + if ( + (defined $ingredient_ref->{ingredients}) + and ( ((defined $ingredient_ref->{"vegan"}) and ($ingredient_ref->{"vegan"} ne 'no')) + or ((defined $ingredient_ref->{"vegetarian"}) and ($ingredient_ref->{"vegetarian"} ne 'no'))) + ) + { + $ignore_vegan_vegetarian_facet = 1; + } + + if (not $ignore_vegan_vegetarian_facet) { if (has_tag($product_ref, "labels", "en:vegan")) { # vegan if (defined $ingredient_ref->{"vegan"}) { diff --git a/lib/ProductOpener/Display.pm b/lib/ProductOpener/Display.pm index 5e2c3df127bb0..db209060cf439 100644 --- a/lib/ProductOpener/Display.pm +++ b/lib/ProductOpener/Display.pm @@ -6722,20 +6722,21 @@ sub get_packager_code_coordinates ($emb_code) { my $lng; if (exists $packager_codes{$emb_code}) { - if (exists $packager_codes{$emb_code}{lat}) { + my %emb_code_data = %{$packager_codes{$emb_code}}; + if (exists $emb_code_data{lat}) { # some lat/lng have , for floating point numbers - $lat = $packager_codes{$emb_code}{lat}; - $lng = $packager_codes{$emb_code}{lng}; + $lat = $emb_code_data{lat}; + $lng = $emb_code_data{lng}; $lat =~ s/,/\./g; $lng =~ s/,/\./g; } - elsif (exists $packager_codes{$emb_code}{fsa_rating_business_geo_lat}) { - $lat = $packager_codes{$emb_code}{fsa_rating_business_geo_lat}; - $lng = $packager_codes{$emb_code}{fsa_rating_business_geo_lng}; + elsif (exists $emb_code_data{fsa_rating_business_geo_lat}) { + $lat = $emb_code_data{fsa_rating_business_geo_lat}; + $lng = $emb_code_data{fsa_rating_business_geo_lng}; } - elsif ($packager_codes{$emb_code}{cc} eq 'uk') { - #my $address = 'uk' . '.' . $packager_codes{$emb_code}{local_authority}; - my $address = 'uk' . '.' . $packager_codes{$emb_code}{canon_local_authority}; + elsif ($emb_code_data{cc} eq 'uk') { + #my $address = 'uk' . '.' . $emb_code_data{local_authority}; + my $address = 'uk' . '.' . ($emb_code_data{canon_local_authority} // ''); if (exists $geocode_addresses{$address}) { $lat = $geocode_addresses{$address}[0]; $lng = $geocode_addresses{$address}[1]; diff --git a/lib/ProductOpener/Ingredients.pm b/lib/ProductOpener/Ingredients.pm index 68b506313a497..03031c4ae7438 100644 --- a/lib/ProductOpener/Ingredients.pm +++ b/lib/ProductOpener/Ingredients.pm @@ -807,6 +807,7 @@ my %min_regexp = ( es => "min|min\.|mín|mín\.|mínimo|minimo|minimum", fr => "min|min\.|mini|minimum", hr => "min|min\.|mini|minimum", + pl => "min|min\.|minimum", ); my %max_regexp = ( @@ -815,6 +816,7 @@ my %max_regexp = ( es => "max|max\.|máximo", fr => "max|max\.|maxi|maximum", hr => "max|max\.|maxi|maximum", + pl => "max|max\.|maximum", ); # Words that can be ignored after a percent @@ -2781,6 +2783,7 @@ sub parse_ingredients_text_service ($product_ref, $updated_product_fields_ref) { '^czekolada( deserowa)?: masa kakaowa min(imum)?$', '^masa kakaowa( w czekoladzie mlecznej)? min(imum)?$', '^masa mleczna min(imum)?$', + '^zawartość tłuszczu$', '^(?>\d+\s+g\s+)?(?>\w+\s?)*?100\s?g(?> \w*)?$', # "pomidorów zużyto na 100 g produktu" '^\w*\s?z \d* g (?>\w+\s?)*?100\s?g\s(?>produktu)?$' , # "Sporządzono z 40 g owoców na 100 g produktu" @@ -5523,12 +5526,23 @@ my %ingredients_categories_and_types = ( ], de => [ + # oil and fat { categories => ["pflanzliches Fett", "pflanzliche Öle", "pflanzliche Öle und Fette", "Fett", "Öle"], types => ["Kokosnuss", "Palm", "Palmkern", "Raps", "Shea", "Sonnenblumen",], # Kokosnussöl, Sonnenblumenfett alternate_names => ["fett", "öl"], }, + # plant protein + { + categories => ["pflanzliche Proteine", "Pflanzliches Eiweiß", "Pflanzliches Eiweiss"], + types => [ + "Ackerbohnen", "Erbsen", "Hafer", "Kartoffel", "Kichererbsen", "Pilz", + "Reis", "Soja", "Sonnenblumen", "Weizen" + ], + # haferprotein + alternate_names => ["protein", "eiweiß"], + }, ], fr => [ @@ -5565,6 +5579,15 @@ my %ingredients_categories_and_types = ( "thym", ] }, + # plant protein + { + categories => ["protéines végétales",], + types => [ + "avoine", "blé", "champignon", "colza", "fève", "pois", + "pois chiche", "pomme de terre", "riz", "soja", "tournesol", + ], + alternate_names => ["protéine de ", "protéine d'", "protéines de ", "protéines d'"], + }, # lecithin { categories => ["lécithine",], diff --git a/lib/ProductOpener/Redis.pm b/lib/ProductOpener/Redis.pm index 41cf06d3e46b4..01976cde05c72 100644 --- a/lib/ProductOpener/Redis.pm +++ b/lib/ProductOpener/Redis.pm @@ -112,7 +112,7 @@ sub push_to_redis_stream ($user_id, $product_ref, $action, $comment, $diffs) { eval { $redis_client->xadd( # name of the Redis stream - 'product_update', + $options{redis_stream_name}, # We do not add a MAXLEN 'MAXLEN', '~', '10000000', # We let Redis generate the id diff --git a/package-lock.json b/package-lock.json index 199b0b34f2a91..cb4d224142b44 100644 --- a/package-lock.json +++ b/package-lock.json @@ -14,7 +14,7 @@ "win32" ], "dependencies": { - "@snyk/protect": "^1.1269.0", + "@snyk/protect": "^1.1275.0", "@webcomponents/webcomponentsjs": "2.8.0", "@yaireo/tagify": ">=4.12.0 <4.13.0", "blueimp-file-upload": "^10.31.0", @@ -46,7 +46,7 @@ "@typescript-eslint/parser": "^6.13.2", "chai": "^5.0.0", "chai-http": "^4.4.0", - "dotenv": "^16.3.1", + "dotenv": "^16.4.1", "eslint": "^8.55.0", "gulp": "^4.0.2", "gulp-babel": "^8.0.0", @@ -2152,9 +2152,9 @@ } }, "node_modules/@snyk/protect": { - "version": "1.1269.0", - "resolved": "https://registry.npmjs.org/@snyk/protect/-/protect-1.1269.0.tgz", - "integrity": "sha512-2sBAjL8NC4+N6AJU06cpAR+6Uu0pTB7K4Cho7W0kF6K0dfXrikJ1EFQ/Q5OOlJZCW+oUuuf9xFr4bSreLXU4Wg==", + "version": "1.1275.0", + "resolved": "https://registry.npmjs.org/@snyk/protect/-/protect-1.1275.0.tgz", + "integrity": "sha512-+q+voIXV/V4o+ktCOWk81cfYY9HWcSTWIwmMU7pzQqp+pwn++Hlfga3tH+jxtp0IgsNkiuVZuHiwALBREyBWjg==", "bin": { "snyk-protect": "bin/snyk-protect" }, @@ -4365,9 +4365,9 @@ } }, "node_modules/dotenv": { - "version": "16.3.1", - "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.3.1.tgz", - "integrity": "sha512-IPzF4w4/Rd94bA9imS68tZBaYyBWSCE47V1RGuMrB94iyTOIEwRmVL2x/4An+6mETpLrKJ5hQkB8W4kFAadeIQ==", + "version": "16.4.1", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.1.tgz", + "integrity": "sha512-CjA3y+Dr3FyFDOAMnxZEGtnW9KBR2M0JvvUtXNW+dYJL5ROWxP9DUHCwgFqpMk0OXCc0ljhaNTr2w/kutYIcHQ==", "dev": true, "engines": { "node": ">=12" diff --git a/package.json b/package.json index cd7af4d4f1849..e7e8af14debd7 100644 --- a/package.json +++ b/package.json @@ -37,7 +37,7 @@ "win32" ], "dependencies": { - "@snyk/protect": "^1.1269.0", + "@snyk/protect": "^1.1275.0", "@webcomponents/webcomponentsjs": "2.8.0", "@yaireo/tagify": ">=4.12.0 <4.13.0", "blueimp-file-upload": "^10.31.0", @@ -69,7 +69,7 @@ "@typescript-eslint/parser": "^6.13.2", "chai": "^5.0.0", "chai-http": "^4.4.0", - "dotenv": "^16.3.1", + "dotenv": "^16.4.1", "eslint": "^8.55.0", "gulp": "^4.0.2", "gulp-babel": "^8.0.0", diff --git a/po/common/common.pot b/po/common/common.pot index f254471e06e4b..50ed7c3979350 100644 --- a/po/common/common.pot +++ b/po/common/common.pot @@ -168,6 +168,10 @@ msgctxt "alcohol_warning" msgid "Excess drinking is harmful for health." msgstr "" +msgctxt "email_warning" +msgid "Please note that your Pro account will only be valid if you use your professional e-mail address. Our moderation team checks that the domain name is consistent with the organisation you wish to join." +msgstr "" + msgctxt "all_missions" msgid "All missions" msgstr "" diff --git a/po/common/en.po b/po/common/en.po index b9388062752a2..274fecf80abbd 100644 --- a/po/common/en.po +++ b/po/common/en.po @@ -172,6 +172,10 @@ msgctxt "alcohol_warning" msgid "Excess drinking is harmful for health." msgstr "Excess drinking is harmful for health." +msgctxt "email_warning" +msgid "Please note that your Pro account will only be valid if you use your professional e-mail address. Our moderation team checks that the domain name is consistent with the organisation you wish to join." +msgstr "Please note that your Pro account will only be valid if you use your professional e-mail address. Our moderation team checks that the domain name is consistent with the organisation you wish to join." + msgctxt "all_missions" msgid "All missions" msgstr "All missions" @@ -875,7 +879,7 @@ msgstr "https://world.openbeautyfacts.org" msgctxt "footer_pro" msgid "Open Food Facts for Producers" -msgstr "" +msgstr "Open Food Facts for Producers" msgctxt "for" msgid "for" diff --git a/scripts/gen_feeds_daily_off.sh b/scripts/gen_feeds_daily_off.sh index cef5f339af1cb..a1029b3f8858c 100755 --- a/scripts/gen_feeds_daily_off.sh +++ b/scripts/gen_feeds_daily_off.sh @@ -40,5 +40,5 @@ cd /srv/off # On sunday, generates madenearme if [ "$(date +%u)" = "7" ] then - ./scripts/gen_madenearme_pages.sh -fi \ No newline at end of file + ./scripts/generate_madenearme_pages.sh +fi diff --git a/scripts/generate_madenearme_pages.sh b/scripts/generate_madenearme_pages.sh index fbf998d44f360..49799d9b4f697 100644 --- a/scripts/generate_madenearme_pages.sh +++ b/scripts/generate_madenearme_pages.sh @@ -4,6 +4,9 @@ . <(perl -e 'use ProductOpener::Paths qw/:all/; print base_paths_loading_script()') # Made near me static pages generation -./scripts/generate_madenearme_page.pl uk en > $OFF_PUBLIC_DATA_DIR/madenearme-uk.html -./scripts/generate_madenearme_page.pl world en > $OFF_PUBLIC_DATA_DIR/madenearme.html -./scripts/generate_madenearme_page.pl fr fr > $OFF_PUBLIC_DATA_DIR/cestemballepresdechezvous.html +./scripts/generate_madenearme_page.pl uk en > $OFF_PUBLIC_DATA_DIR/madenearme-uk.html.tmp +mv $OFF_PUBLIC_DATA_DIR/madenearme-uk.html.tmp $OFF_PUBLIC_DATA_DIR/madenearme-uk.html +./scripts/generate_madenearme_page.pl world en > $OFF_PUBLIC_DATA_DIR/madenearme.html.tmp +mv $OFF_PUBLIC_DATA_DIR/madenearme.html.tmp $OFF_PUBLIC_DATA_DIR/madenearme.html +./scripts/generate_madenearme_page.pl fr fr > $OFF_PUBLIC_DATA_DIR/cestemballepresdechezvous.html.tmp +mv $OFF_PUBLIC_DATA_DIR/cestemballepresdechezvous.html.tmp $OFF_PUBLIC_DATA_DIR/cestemballepresdechezvous.html diff --git a/scripts/sync-s3-images/README.md b/scripts/sync-s3-images/README.md new file mode 100644 index 0000000000000..0a4c5e475defc --- /dev/null +++ b/scripts/sync-s3-images/README.md @@ -0,0 +1,9 @@ +# AWS Open Dataset: Open Food Facts images + +This directory contains the [script](./sync_s3_images.py) that synchronizes +images and OCR results, from off1 to `openfoodfacts-images` bucket, as part of +AWS Open Dataset program. + +The dataset YAML description sent to [AWS Open Data +registry](https://github.com/awslabs/open-data-registry/tree/main) can be found +at [openfoodfacts-images.yml](./openfoodfacts-images.yml). \ No newline at end of file diff --git a/scripts/sync-s3-images/openfoodfacts-images.yaml b/scripts/sync-s3-images/openfoodfacts-images.yaml new file mode 100644 index 0000000000000..01a67136a2060 --- /dev/null +++ b/scripts/sync-s3-images/openfoodfacts-images.yaml @@ -0,0 +1,16 @@ +Name: Open Food Facts Images +Description: A dataset of all images of Open Food Facts, the biggest open + dataset of food products in the world. +Documentation: https://openfoodfacts.github.io/openfoodfacts-server/api/aws-images-dataset +Contact: contact@openfoodfacts.org +ManagedBy: "[Open Food Facts](https://world.openfoodfacts.org)" +UpdateFrequency: Monthly +License: All data contained in this dataset is licenced under the [Creative Commons Attribution ShareAlike licence](https://creativecommons.org/licenses/by-sa/3.0/deed.en) +Tags: + - machine learning + - image processing +Resources: + - Description: Open Food Facts image dataset + ARN: arn:aws:s3:::openfoodfacts-images + Region: eu-west-3 + Type: S3 Bucket diff --git a/scripts/sync-s3-images/requirements.txt b/scripts/sync-s3-images/requirements.txt new file mode 100644 index 0000000000000..7cc32df9cdb50 --- /dev/null +++ b/scripts/sync-s3-images/requirements.txt @@ -0,0 +1,4 @@ +openfoodfacts==0.1.12 +orjson==3.9.11 +boto3==1.34.22 +tqdm==4.66.1 \ No newline at end of file diff --git a/scripts/sync-s3-images/sync_s3_images.py b/scripts/sync-s3-images/sync_s3_images.py new file mode 100644 index 0000000000000..b985a1ddec67e --- /dev/null +++ b/scripts/sync-s3-images/sync_s3_images.py @@ -0,0 +1,201 @@ +"""This script is used to synchronize Open Food Facts images and OCR JSONs on +AWS S3. As part of AWS Open Dataset program, we can host free of charge data on +AWS S3. + +This dataset can be used by researchers to access easily OFF data, without +overloading OFF servers. + +This script should be run regularly, to synchronize new images. We currently +upload: + +- all raw images (ex: 1.jpg, 2.jpg,...) +- 400px resized version of the raw images +- OCR results of the raw images (ex: 1.json.gz) +""" + +import argparse +import gzip +import logging +import re +import tempfile +from logging import getLogger +from pathlib import Path +from typing import Iterator, Tuple + +import boto3 +import tqdm +from openfoodfacts import DatasetType, ProductDataset + +logger = getLogger() +handler = logging.StreamHandler() +formatter = logging.Formatter( + "%(asctime)s :: %(processName)s :: " + "%(threadName)s :: %(levelname)s :: " + "%(message)s" +) +handler.setFormatter(formatter) +handler.setLevel(logging.INFO) +logger.addHandler(handler) +logger.setLevel(logging.INFO) + +s3 = boto3.resource("s3", region_name="eu-west-3") +bucket = s3.Bucket("openfoodfacts-images") + + +BARCODE_PATH_REGEX = re.compile(r"^(...)(...)(...)(.*)$") + + +def generate_product_path(barcode: str) -> str: + if not barcode.isdigit(): + raise ValueError("unknown barcode format: {}".format(barcode)) + + match = BARCODE_PATH_REGEX.fullmatch(barcode) + splitted_barcode = [x for x in match.groups() if x] if match else [barcode] + return "/".join(splitted_barcode) + + +def get_sync_filepaths( + base_dir: Path, ds: ProductDataset +) -> Iterator[Tuple[str, Path]]: + """Return an iterator containing files to synchronize with AWS S3 bucket. + + The iterator returns (barcode, file_path) tuples, where `barcode` is the + product barcode, and `file_path` is the path of the file to synchronize. + + We use the product dataset to know images associated with each products, + this way we don't push to S3 deleted images. + + We currently synchronize: + + - all raw images (ex: 1.jpg, 2.jpg,...) + - 400px resized version of the raw images + - OCR results of the raw images (ex: 1.json.gz) + + :param base_dir: directory where images are stored + :param ds: product dataset + """ + for item in tqdm.tqdm(ds, desc="products"): + barcode = item["code"] + if not barcode: + continue + product_path = generate_product_path(barcode) + product_dir = Path(product_path) + full_product_dir = base_dir / product_dir + + for image_id in item.get("images", {}).keys(): + if not image_id.isdigit(): + # Ignore selected image keys + continue + + # Only synchronize raw and 400px version of images + for image_name in ( + "{}.jpg".format(image_id), + "{}.400.jpg".format(image_id), + ): + full_image_path = full_product_dir / image_name + if not full_image_path.is_file(): + logger.warning("image {} not found".format(full_image_path)) + continue + yield barcode, product_dir / image_name + + # Synchronize OCR JSON if it exists + ocr_file_name = "{}.json.gz".format(image_id) + if (full_product_dir / ocr_file_name).is_file(): + yield barcode, product_dir / ocr_file_name + + +def run(image_dir: Path) -> None: + """Launch the synchronization. + + :param image_dir: directory where images are stored + """ + ds = ProductDataset(DatasetType.jsonl, force_download=True, download_newer=True) + logger.info("Fetching existing keys...") + existing_keys = set(obj.key for obj in bucket.objects.filter(Prefix="data/")) + logger.info("%d keys in openfoodfacts-images bucket", len(existing_keys)) + dataset_keys = set() + + uploaded = 0 + kept = 0 + deleted = 0 + for barcode, file_path in get_sync_filepaths(image_dir, ds): + full_file_path = image_dir / file_path + key = "data/{}".format(file_path) + dataset_keys.add(key) + + if key in existing_keys: + logger.debug("File %s already exists on S3", key) + kept += 1 + continue + + extra_args = {"Metadata": {"barcode": barcode}} + if key.endswith(".jpg"): + extra_args["ContentType"] = "image/jpeg" + + logger.debug("Uploading file %s -> %s", full_file_path, key) + bucket.upload_file(str(full_file_path), key, ExtraArgs=extra_args) + uploaded += 1 + existing_keys.add(key) + + if (kept + uploaded) % 1000 == 0: + logger.info("uploaded: %d, kept: %d", uploaded, kept) + + logger.info("Removing deleted files...") + for missing_key in existing_keys - dataset_keys: + # Removing files associated with deleted images + logger.debug("Deleting S3 file %s", missing_key) + deleted += 1 + bucket.delete_objects( + Delete={ + "Objects": [ + {"Key": missing_key}, + ], + }, + ) + + # We upload all S3 keys in a single `data_keys.txt` text file + # to make it easier to know existing files on the bucket + + # Create a temporary directory to avoid uploading a corrupted file + tmp_dir = Path(tempfile.mkdtemp()) + data_keys_path = tmp_dir / "data_keys.txt" + logger.info("Saving data keys in %s", data_keys_path) + + with gzip.open(str(data_keys_path), "wt") as f: + f.write("\n".join(sorted(existing_keys))) + + logger.info("Uploading data keys...") + bucket.upload_file(str(data_keys_path), "data/data_keys.gz") + data_keys_path.unlink() + tmp_dir.rmdir() + + logger.info( + "Synchronization finished, uploaded: %d, kept: %d, deleted: %d", + uploaded, + kept, + deleted, + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="""Synchronize Open Food Facts images and OCR files with AWS S3. + + This script should be run regularly, to synchronize new images. We currently + upload: + + - all raw images (ex: 1.jpg, 2.jpg,...) + - 400px resized version of the raw images + - OCR results of the raw images (ex: 1.json.gz) + + Before upload, the latest version of the dataset is downloaded from Open Food + Facts servers to get the list of images to synchronize. + """ + ) + parser.add_argument( + "image_dir", + type=Path, + help="Directory where images are stored.", + ) + args = parser.parse_args() + run(args.image_dir) diff --git a/scripts/update_all_products.pl b/scripts/update_all_products.pl index 8bc71c573bea0..0202d180400b3 100755 --- a/scripts/update_all_products.pl +++ b/scripts/update_all_products.pl @@ -139,6 +139,7 @@ my $compute_main_countries = ''; my $prefix_packaging_tags_with_language = ''; my $fix_non_string_ids = ''; +my $fix_string_last_modified_t = ''; my $assign_ciqual_codes = ''; my $obsolete = 0; my $fix_obsolete; @@ -175,6 +176,7 @@ "fix-zulu-lang" => \$fix_zulu_lang, "fix-rev-not-incremented" => \$fix_rev_not_incremented, "fix-non-string-ids" => \$fix_non_string_ids, + "fix-string-last-modified-t" => \$fix_string_last_modified_t, "user-id=s" => \$User_id, "comment=s" => \$comment, "run-ocr" => \$run_ocr, @@ -252,6 +254,7 @@ and (not $fix_nutrition_data_per) and (not $fix_nutrition_data) and (not $fix_non_string_ids) + and (not $fix_string_last_modified_t) and (not $compute_sort_key) and (not $remove_team) and (not $remove_category) @@ -334,6 +337,11 @@ $query_ref->{_id} = {'$type' => "long"}; } +# Query products that have the last_modified_t field stored as a number +if ($fix_string_last_modified_t) { + $query_ref->{last_modified_t} = {'$type' => "string"}; +} + # On the producers platform, require --query owners_tags to be set, or the --all-owners field to be set. if ((defined $server_options{private_products}) and ($server_options{private_products})) { @@ -743,11 +751,17 @@ my $current_last_modified_t = $product_ref->{last_modified_t} // 0; if ($current_last_modified_t != $change_last_modified_t) { print STDERR "-> fixing last_modified_t from $current_last_modified_t to $change_last_modified_t"; - $product_ref->{last_modified_t} = $change_last_modified_t; + # print statement above makes $change_last_modified_t a a string + $product_ref->{last_modified_t} = $change_last_modified_t + 0; } } } + if ($fix_string_last_modified_t) { + # Make sure last_modified_t is stored as a number + $product_ref->{last_modified_t} += 0; + } + # Fix zulu lang, bug https://github.com/openfoodfacts/openfoodfacts-server/issues/2063 if ($fix_zulu_lang) { diff --git a/stop_words.txt b/stop_words.txt index b2c9efcea68c6..e2c33370d098d 100644 --- a/stop_words.txt +++ b/stop_words.txt @@ -5,6 +5,7 @@ acérola acide acidifiant +actionDetails Agribalyse AgriBalyse AGS @@ -37,6 +38,7 @@ canonicalizes canonicalized Canonicalizes Carrefour +Catégorie Catalogue céléri CIC @@ -55,6 +57,7 @@ couvercle Crowdin csv CSV +déc d'acérola d'olive dans @@ -73,7 +76,12 @@ EREN et Eurocode Eurocodes +Evènement eval +eventAction +eventCategory +eventName +eventValue EXIF Fabriqué filehandle @@ -114,6 +122,10 @@ hinnies http https huile +iconSVG +idpageview +idSite +idVisit incrontab Ingrédients Intermarché @@ -175,6 +187,9 @@ OpenFoodFacts Origine overriden packagings +pageId +pageIdAction +pageviewPosition param Pâtes pectine @@ -205,9 +220,11 @@ Robotoff RTFSG Santé saturés +scanAction scanbot scrypt Scrypt +serverTimePretty sftp sirop slad @@ -250,10 +267,13 @@ UUID Valeur vegetal viande +visitIp +visitorId vitamines VPF weigher weighers +WpZhmY www xml gzipped diff --git a/taxonomies/additives.txt b/taxonomies/additives.txt index a5f0fe9addcbc..f0d95c8fbdacf 100644 --- a/taxonomies/additives.txt +++ b/taxonomies/additives.txt @@ -926,7 +926,7 @@ cs:E129, Allura red ac, červeň ac, Červeň Allura AC da:E129, Allura red ac de:E129, Allurarot ac, Allurarot, E 129 el:E129, Ερυθρο allura ac -es:E129, Rojo allura ac, E 129, Rojo Allura 2C, rojo FD&C 40, rojo 40 +es:E129, Rojo allura ac, E 129, Rojo Allura 2C, rojo FD&C 40, rojo 40, rojo allura et:E129, Võlupunane ac fi:E129, Alluranpunainen ac, Alluranpunainen, E 129, CI 16035 fr:E129, Rouge allura AC, Rouge Allura, Rouge alimentaire 17, FD&C Rouge 40, Allura Red, Food Red 17, C.I. 16035, FD&C Red 40, Red 40 lake, Red 40, 2-naphthalenesulfonic acid disodium salt, Allura Red AC, C18H14N2Na2O8S2 @@ -1084,7 +1084,7 @@ cs:E133, Brilantní modř FCF da:E133, Brilliant blue FCF, brillant blue de:E133, Brilliantblau FCF, Brillantblau FCF, E 133, Brillantblau el:E133, Λαμπρο κυανο FCF -es:E133, Azul brillante FCF, Azul brillante FCP, E 133, Brilliant Blue FCF, azul FD&C 1 +es:E133, Azul brillante FCF, Azul brillante FCP, E 133, Brilliant Blue FCF, azul FD&C 1, azul brillante et:E133, Briljantsinine FCF fi:E133, Briljanttisininen FCF fr:E133, Bleu brillant FCF, C.I. Acid Blue 9, CI 42090, C-Blau 21, Erioglaucin A, Blue 1, Brilliant Blue FCF, bleu brillant, FD&C Blue No.1, FD&C Blue #1, BB FCF, Acid Blue 9, D&C Blue No. 4, Alzen Food Blue No. 1, Atracid Blue FG, Erioglaucine, Eriosky blue, Patent Blue AR, Xylene Blue VSG, CAS 25305-78-6, CAS 2650-18-2, CAS 3844-45-9, CAS 71701-18-3, CAS 15792-67-3, 2650-18-2, 15792-67-3 @@ -4506,7 +4506,7 @@ description:en:SULPHUR DIOXIDE is the chemical compound with the formula SO2. en:E221, Sodium sulphite, Sodium sulfite ar:E221, كبريتيت الصوديوم bg:E221, Натриев сулфит -ca:E221, Sulfit de sodi +ca:E221, Sulfit de sodi, sulfit sòdic cs:E221, Siřičitan sodný da:E221, Natriumsulfit de:E221, Natriumsulfit @@ -6769,7 +6769,7 @@ vegetarian:en:yes en:E301, Sodium ascorbate, Sodium L-ascorbate bg:E301, Натриев аскорбат, Натриев L-аскорбат -ca:E301, Ascorbat de sodi +ca:E301, Ascorbat de sodi, ascorbat sòdic cs:E301, Askorban sodný, L-askorban sodný da:E301, Natriumascorbat, Natrium-L-ascorbat de:E301, Natriumascorbat, E 301, Natrium-L-Ascorbat @@ -8442,7 +8442,7 @@ lt:E333, E333 food additive lv:E333, E333 food additive mt:E333, E333 food additive nl:E333, Calciumcitraten -pl:E333, Cytryniany wapnia +pl:E333, Cytryniany wapnia, cytrynian wapnia pt:E333, Citrato de cálcio ro:E333, E333 food additive ru:E333, Цитрат кальция @@ -15690,7 +15690,7 @@ nn:E500(ii), natriumvätekarbonat, natriumhydrogenkarbonat oc:E500(ii), Idrogenocarbonat de sòdi or:E500(ii), ସୋଡ଼ିଅମ ବାଇକାର୍ବୋନେଟ pa:E500(ii), ਮਿੱਠਾ ਸੋਡਾ -pl:E500(ii), Wodorowęglan sodu, Diwęglan sodu, kwaśny węglan sodu, diwęglan sodowy +pl:E500(ii), Wodorowęglan sodu, Diwęglan sodu, kwaśny węglan sodu, diwęglan sodowy, soda oczyszczona pt:E500(ii), Hidrogenocarbonato de sódio, Bicarbonato de sódio, carbonato ácido de sódio ro:E500(ii), Bicarbonat de sodiu, Carbonat acid de sodiu, Bicarbonat ru:E500(ii), натрий двууглекислый, бикарбонат натрия, бикарбонат-натрия diff --git a/taxonomies/additives_classes.txt b/taxonomies/additives_classes.txt index f17806b1f9ec6..d8ea68f173f05 100644 --- a/taxonomies/additives_classes.txt +++ b/taxonomies/additives_classes.txt @@ -407,6 +407,7 @@ description:sv:Färgämnen: ämnen som tillför eller återställer färg hos li en:concentrated plant colour de:färbendes Pflanzenkonzentrat, färbende Pflanzenkonzentrate es:colorante vegetal concentrado +pl:koncentraty barwiące, koncentrat barwiący, wyciągi barwiące, barwiące koncentraty fr:colorant végétal concentré it:colorante vegetale concentrato nl:geconcentreerde plantaardige kleurstof @@ -423,7 +424,7 @@ hr:prirodno bojilo hu:természetes színezék, természetes színezékek, természetes színezőanyag, természetes színezőanyagok it:coloranti naturali, colorante naturale nl:Natuurlijke kleurstoffen, natuurlijk kleurstof -pl:barwnik naturalny, barwniki naturalne +pl:barwnik naturalny, barwniki naturalne, naturalny barwnik, naturalne barwniki, naturalny preparat barwiący ru:краситель натуральный, натуральный краситель, натуральные красители # ingredient/fr:colorant-naturel has 283 products in 5 languages @2019-04-20 @@ -434,6 +435,7 @@ de:färbendes Lebensmittel, färbende Lebensmittel es:colorante alimenticio, colorantes alimenticios hu:élelmiszer színezék it:sostanze alimentari coloranti +pl:żywność barwiąca, środek spożywczy o właściwościach barwiących, środek spożywczy barwiący # ingredient/fr:denrée-alimentaire-colorante has 202 products in 3 languages @2019-03-27 # denrées alimentaires colorantes: concentrés de spriruline et de carthame @@ -452,6 +454,7 @@ en:colour retention agent, colour adjunct, colour fixative, colour stabilizer, m de:Farbstabilisator es:Agente de retención de color, complementos del color, estabilizador del color, estabilizadores del color, fijadores del color, fijador del color fr:agent de rétention de la couleur, fixateur de la couleur, fixateur de couleur, adjuvant de la couleur, stabilisant de la couleur, agent de rétention de couleur, fixateur de couleur, adjuvant de couleur, stabilisant de couleur +pl:do korekcji barwy, substancja wzmacniająca kolor pt:estabilizador de cor description:es:Aditivos alimentarios que estabilizan, retienen o intensifican el color de un alimento. description:fr:Additif alimentaire qui stabilise, retient ou intensifie la couleur d’une denrée alimentaire. diff --git a/taxonomies/allergens.txt b/taxonomies/allergens.txt index 64f78ec2c1b7d..1cd7b79463d15 100644 --- a/taxonomies/allergens.txt +++ b/taxonomies/allergens.txt @@ -79,7 +79,7 @@ lt:vėžiagyviai, krabai, krabų, omarai, omarų, vėžiai, krevetės, kreveči lv:vēžveidīgie mt:krustaċej nl:schaaldieren, schaaldier -pl:skorupiaki +pl:skorupiaki, skorupiaków pt:crustáceos, caranguejo, lagosta, lagostim, gambas, gamba, camarão ro:crustacee ru:ракообразные @@ -535,7 +535,7 @@ lv:gliemji mt:molluski nl:weekdieren nl_be:weekdieren -pl:mięczaki +pl:mięczaki, mięczaków pt:moluscos, molusco, lulas, lula, chocos, choco, ostras, ostra, mexilhões, mexilhão, amêijoas, amêijoa, vieiras, vieira ro:moluște ru:моллюск, моллюски, кальмар, каракатица, устрицы, устрица, ракушки, гребешок, улитка, улитки diff --git a/taxonomies/categories.txt b/taxonomies/categories.txt index 873f32adfc1fa..40087be32f97f 100644 --- a/taxonomies/categories.txt +++ b/taxonomies/categories.txt @@ -7650,6 +7650,7 @@ he:משקאות צמחים, משקה צמחים it:Bevande a base di verdure lt:Daržovių gėrimai nl:Groentedranken, Groentedrank, Groentendrank +pl:Napoje na bazie warzyw #Duplicate #he:משקאות צמחים, משקאות מבוססי צמחים, משקה צמחים @@ -7699,6 +7700,7 @@ it:Succo di carota ja:にんじんジュース lt:Morkų sultys nl:Wortelsappen, Wortelsap +pl:Soki marchewkowe ru:Морковные соки tr:Havuç suyu zh:胡萝卜汁 @@ -7786,6 +7788,7 @@ de:Passierte Tomaten fr:Coulis de tomates, coulis de tomate, Coulis de tomate appertisé mi-réduit à 11%, Purée de tomates appertisée mi-réduite à 11% it:Passata nl:Gezeefde tomaten, Passata, Tomatencoulis +pl:Przetarte pomidory, Passata agribalyse_food_code:en:20260 ciqual_food_code:en:20260 ciqual_food_name:en:Tomato coulis, canned (tomato puree semi-reduced 11%) @@ -7831,6 +7834,7 @@ it:Tisane, Infusi ja:ハーブティ, ハーブティー lt:Žolelių arbatos, Žolelių užpilai, užpilai nl:Kruidentheeën, Kruidenthee +pl:Herbaty ziołowe pt:Chás de ervas, infusões de ervas, infusões ru:Травяные чаи и настойки wikidata:en:Q379932 @@ -9520,6 +9524,7 @@ it:Caffè istantaneo ja:インスタントコーヒー lt:Tirpios kavos, tirpi kava, kavos milteliai, tirpios kavos milteliai nl:Oploskoffies, Oploskoffie +pl:Kawa rozpuszczalna, Kawa instant, Kawy rozpuszczalne, Kawy instant pt:Cafés instantâneos ru:Растворимый кофе, кофе растворимый tr:Kahve tozu, Suda çözünür kahve tozu, Hazır kahve tozu @@ -9537,6 +9542,7 @@ gpc_category_description:en:Definition: Includes any products that can be descri en:Coffee without sugar bg:Кафе без захар fr:Café non sucré +pl:Kawa bez cukru pt:Café sem açúcar agribalyse_food_code:en:18004 ciqual_food_code:en:18004 @@ -9607,6 +9613,42 @@ ciqual_food_code:en:18151 ciqual_food_name:en:Coffee with milk or white coffee or cappuccino, instant coffee or not, without sugar, ready-to-drink ciqual_food_name:fr:Café au lait, café crème ou cappuccino, instantané ou non, non sucré, prêt à boire + [% field.label %] + [% IF server_options_producers_platform %] +
🚨 [% lang('email_warning') %]
+ [% END %] [% ELSIF field.type == 'select' %]