diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4ef239eb..d817a231 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -52,6 +52,7 @@ jobs: environment-name: DEVELOP channels: conda-forge cache-env: true + cache-env-key: ubuntu-latest-3.10 extra-specs: | python=3.10 - name: Install package @@ -61,6 +62,34 @@ jobs: run: | make unit-tests + unit-tests-no-eccodes: + name: unit-tests (3.10) + if: ${{ !github.event.pull_request.head.repo.fork && github.event.action != 'labeled' || github.event.label.name == 'approved-for-ci' }} + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.head.sha || github.ref }} + - name: Install Conda environment with Micromamba + uses: mamba-org/provision-with-micromamba@v14 + with: + environment-file: tests/environment-unit-tests.yml + environment-name: DEVELOP + channels: conda-forge + cache-env: true + cache-env-key: ubuntu-latest-3.10-no-eccodes + extra-specs: | + python=3.10 + - name: Install package + run: | + python -m pip install --no-deps -e . + micromamba remove eccodes + - name: Run tests without eccodes + run: | + python -m pytest -v -m 'no_eccodes' + + type-check: needs: [unit-tests] if: ${{ !github.event.pull_request.head.repo.fork && github.event.action != 'labeled' || github.event.label.name == 'approved-for-ci' }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 48052f9d..78746069 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,7 +15,7 @@ repos: hooks: - id: isort - repo: https://github.com/psf/black - rev: 23.3.0 + rev: 23.9.1 hooks: - id: black - repo: https://github.com/keewis/blackdoc diff --git a/.readthedocs.yml b/.readthedocs.yml index 2498d970..326f8821 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -1,6 +1,9 @@ version: 2 formats: [] +build: + os: "ubuntu-22.04" + tools: + python: "3.9" python: - version: "3.8" install: - requirements: docs/requirements.txt diff --git a/README.md b/README.md index 51576d26..c74d27a8 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,17 @@ # earthkit-data -![earthkit-data](docs/_static/earthkit-data.png) + +[![PyPI version fury.io](https://badge.fury.io/py/earthkit-data.svg)](https://pypi.python.org/pypi/earthkit-data/) [![PyPI pyversions](https://img.shields.io/pypi/pyversions/earthkit-data.svg)](https://pypi.python.org/pypi/earthkit-data/) +**DISCLAIMER** + +> This project is in the **BETA** stage of development. Please be aware that interfaces and functionality may change as the project develops. If this software is to be used in operational systems you are **strongly advised to use a released tag in your system configuration**, and you should be willing to accept incoming changes and bug fixes that require adaptations on your part. ECMWF **does use** this software in operations and abides by the same caveats. + A format-agnostic interface for geospatial data with a focus on meteorology and climate science. -> :warning: **DISCLAIMER** -> -> This project is **BETA** and will be **Experimental** for the foreseeable future. -> Interfaces and functionality are likely to change, and the project itself may be scrapped. -> **DO NOT** use this software in any project/software that is operational. - ## Documentation The documentation can be found at https://earthkit-data.readthedocs.io/. diff --git a/docs/api.rst b/docs/api.rst index ce2f0a4e..16fc7ea3 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -30,6 +30,11 @@ BUFR - :py:class:`~data.readers.bufr.bufr.BUFRList` - :py:class:`~data.readers.bufr.bufr.BUFRMessage` +CSV +---- + +- :py:class:`~data.readers.csv.CSVReader` + Other -------- diff --git a/docs/conf.py b/docs/conf.py index c587a323..ad0e4776 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -35,6 +35,7 @@ "nbsphinx", "sphinx.ext.autodoc", "sphinx.ext.napoleon", + "sphinx.ext.intersphinx", "autoapi.extension", "earthkit.data.sphinxext.xref", "earthkit.data.sphinxext.module_output", @@ -109,6 +110,9 @@ } +intersphinx_mapping = {"pandas": ("https://pandas.pydata.org/docs/", None)} + + def setup(app): from skip_api_rules import _skip_api_items diff --git a/docs/examples.rst b/docs/examples.rst index 8502e358..577fe3a6 100644 --- a/docs/examples.rst +++ b/docs/examples.rst @@ -15,6 +15,9 @@ Here is a list of example notebooks to illustrate how to use earthkit-data. examples/cds.ipynb examples/ecmwf_open_data.ipynb examples/fdb.ipynb + examples/mars.ipynb + examples/polytope.ipynb + examples/wekeo.ipynb .. toctree:: @@ -33,6 +36,7 @@ Here is a list of example notebooks to illustrate how to use earthkit-data. examples/grib_tar.ipynb examples/grib_url.ipynb examples/grib_to_netcdf.ipynb + examples/numpy_fieldlist.ipynb .. toctree:: :maxdepth: 1 @@ -73,4 +77,3 @@ Here is a list of example notebooks to illustrate how to use earthkit-data. examples/cache.ipynb examples/projection.ipynb examples/metadata.ipynb - examples/numpy_fieldlist.ipynb diff --git a/docs/examples/cds.ipynb b/docs/examples/cds.ipynb index a2149bd5..2b82c0e1 100644 --- a/docs/examples/cds.ipynb +++ b/docs/examples/cds.ipynb @@ -211,7 +211,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.16" + "version": "3.8.18" } }, "nbformat": 4, diff --git a/docs/examples/grib_lat_lon_value.ipynb b/docs/examples/grib_lat_lon_value.ipynb index 560b5738..eb79d523 100644 --- a/docs/examples/grib_lat_lon_value.ipynb +++ b/docs/examples/grib_lat_lon_value.ipynb @@ -13,7 +13,7 @@ "id": "c74a86a6-79ae-44bc-a9c9-8cc8a3aaa484", "metadata": {}, "source": [ - "We read a GRIB file containing pressure level data. First we ensure the example file is available." + "In this example we will work with pressure level GRIB data. First we ensure the example file is available." ] }, { @@ -32,7 +32,7 @@ "id": "ae4ff8c4-d87c-4d57-83a5-38e1e2545556", "metadata": {}, "source": [ - "We create a fieldlist with the temperature fields only." + "We will only use the temperature fields, so we extract them from the data." ] }, { @@ -198,7 +198,7 @@ "id": "c7ec94f3-4e68-4380-a88d-128b2fb46cd8", "metadata": {}, "source": [ - "The simplest way to access the latitudes, longitudes and values is to use the *data()* method. It works both on fields and fieldlists." + "The simplest way to access the latitudes, longitudes and values is using the *data()* method. It works both on fields and fieldlists." ] }, { @@ -311,7 +311,7 @@ "id": "a21f46b4-6c79-4306-a005-390ef2a02173", "metadata": {}, "source": [ - "*data()* only works on a fieldlist if all the fields has the same grid. The first two elements of the resulting ndarray are the latitude and longitude arrays (shared between fields), while the rest of the elements are the value arrays per field. Since we have six fields in our data the size of the first axis of the resulting ndarray is 2+6=8." + "*data()* only works on a fieldlist if all the fields have the same grid. The first two elements of the resulting ndarray are the latitude and longitude arrays (shared between fields), while the rest of the elements are the value arrays per field. Since we have six fields in our data the size of the first axis of the resulting ndarray is 2+6=8." ] }, { @@ -582,7 +582,7 @@ "id": "392bdc90-0e15-415f-892b-7ebbf86cbc8c", "metadata": {}, "source": [ - "*to_latlon()* only works on a fieldlist if all the fields has the same grid. In this case it returns the same dict that we would get for any of the fields:" + "*to_latlon()* only works on a fieldlist if all the fields have the same grid. In this case it returns the same dict that we would get for any of the fields:" ] }, { @@ -679,7 +679,7 @@ "id": "3bbbaad8-f9b7-4faa-b9ad-7a696f391b40", "metadata": {}, "source": [ - "For *to_numpy()* we can set the array type with the *dtype* keyword both for fields and fieldlists:" + "For all the methods above we can set the array type with the *dtype* keyword both for fields and fieldlists:" ] }, { @@ -708,11 +708,35 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "c89a473d-c6fd-4400-bfbc-da35bdcf8dc3", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "llv = ds.data(dtype=np.float32)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "1e888bd9-48ec-4d18-8ef3-ee6d4fd107c0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 90. , 0. , 272.56418, 272.53918, 271.26532, 255.84306,\n", + " 244.00323, 226.65315], dtype=float32)" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "llv[:,0,0]" + ] } ], "metadata": { diff --git a/docs/examples/numpy_fieldlist.ipynb b/docs/examples/numpy_fieldlist.ipynb index cbb49384..99c3256a 100644 --- a/docs/examples/numpy_fieldlist.ipynb +++ b/docs/examples/numpy_fieldlist.ipynb @@ -196,7 +196,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 7, @@ -913,6 +913,273 @@ "ds1 = earthkit.data.from_source(\"file\", path)\n", "ds1.ls()" ] + }, + { + "cell_type": "markdown", + "id": "3f4d3b46-e905-4ac6-88c2-bd514101d61b", + "metadata": {}, + "source": [ + "#### Performing the computations in a loop" + ] + }, + { + "cell_type": "markdown", + "id": "0963540f-3a6d-4f0d-a046-3806c5f65e30", + "metadata": {}, + "source": [ + "In this example we create an **empty fieldlist** and add the results of the computations to it in a loop." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "6c6e8651-f5a3-4534-9227-812e8e69407d", + "metadata": {}, + "outputs": [], + "source": [ + "fs = ds.sel(shortName=\"t\", level=[850, 700, 500])\n", + "\n", + "# create an empty fieldlist\n", + "ds_r = FieldList()\n", + "\n", + "for f in fs:\n", + " p = f.metadata(\"level\")*100. # hPa -> Pa\n", + " t_new = potential_temperature(f.values, p)\n", + " md_new = f.metadata().override(shortName=\"pt\")\n", + " \n", + " # create new numpy fieldlist with a single field\n", + " ds_new = FieldList.from_numpy(t_new, md_new)\n", + "\n", + " # add it to the resulting fieldlist\n", + " ds_r += ds_new" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "32c53494-e8f4-4a22-aa93-e16b5188483a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
centreshortNametypeOfLevelleveldataDatedataTimestepRangedataTypenumbergridType
0ecmfptisobaricInhPa8502018080112000an0regular_ll
1ecmfptisobaricInhPa7002018080112000an0regular_ll
2ecmfptisobaricInhPa5002018080112000an0regular_ll
\n", + "
" + ], + "text/plain": [ + " centre shortName typeOfLevel level dataDate dataTime stepRange \\\n", + "0 ecmf pt isobaricInhPa 850 20180801 1200 0 \n", + "1 ecmf pt isobaricInhPa 700 20180801 1200 0 \n", + "2 ecmf pt isobaricInhPa 500 20180801 1200 0 \n", + "\n", + " dataType number gridType \n", + "0 an 0 regular_ll \n", + "1 an 0 regular_ll \n", + "2 an 0 regular_ll " + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds_r.ls()" + ] + }, + { + "cell_type": "markdown", + "id": "b76eaf59-868c-4a59-8823-8d4bc53376f3", + "metadata": {}, + "source": [ + "We can save the NumpyFieldList into a GRIB file:" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "a149caa0-b1ba-4b8e-b2d0-6aad7ee224a6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
centreshortNametypeOfLevelleveldataDatedataTimestepRangedataTypenumbergridType
0ecmfptisobaricInhPa8502018080112000an0regular_ll
1ecmfptisobaricInhPa7002018080112000an0regular_ll
2ecmfptisobaricInhPa5002018080112000an0regular_ll
\n", + "
" + ], + "text/plain": [ + " centre shortName typeOfLevel level dataDate dataTime stepRange \\\n", + "0 ecmf pt isobaricInhPa 850 20180801 1200 0 \n", + "1 ecmf pt isobaricInhPa 700 20180801 1200 0 \n", + "2 ecmf pt isobaricInhPa 500 20180801 1200 0 \n", + "\n", + " dataType number gridType \n", + "0 an 0 regular_ll \n", + "1 an 0 regular_ll \n", + "2 an 0 regular_ll " + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "path = \"_pt_from_loop.grib\"\n", + "ds_r.save(path)\n", + "ds1 = earthkit.data.from_source(\"file\", path)\n", + "ds1.ls()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8ce30259-bb20-492e-b90b-244636822dea", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/docs/examples/polytope.ipynb b/docs/examples/polytope.ipynb new file mode 100644 index 00000000..0108e524 --- /dev/null +++ b/docs/examples/polytope.ipynb @@ -0,0 +1,188 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "efdd065e-e9fc-494d-9f73-9cb3d525dc4a", + "metadata": {}, + "source": [ + "## Retrieving data with polytope" + ] + }, + { + "cell_type": "markdown", + "id": "75079b34-78ba-4536-8498-4dc3d0c3e646", + "metadata": {}, + "source": [ + "The “polytope” data source provides access to the [Polytope web services](https://polytope-client.readthedocs.io/en/latest/)." + ] + }, + { + "cell_type": "markdown", + "id": "621c0aa7-db93-441a-ab55-743fa6fbcd51", + "metadata": {}, + "source": [ + "The following example retrieves data from the ECMWF MARS archive using polytope. The dataset was prepared for the OGC GeoDataCubes working group, see details [here](https://github.com/ecmwf/ogc-gdc-usecase/tree/main)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "7910ac60-a503-4392-a719-0b780625346f", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-29 18:03:52 - INFO - Sending request...\n", + "{'request': 'class: rd\\n'\n", + " \"date: '20200915'\\n\"\n", + " 'domain: g\\n'\n", + " 'expver: hsvs\\n'\n", + " \"levellist: '1'\\n\"\n", + " 'levtype: pl\\n'\n", + " \"param: '129.128'\\n\"\n", + " 'step: 0/12\\n'\n", + " 'stream: oper\\n'\n", + " 'time: 00:00:00\\n'\n", + " 'type: fc\\n',\n", + " 'verb': 'retrieve'}\n", + "2023-09-29 18:03:53 - INFO - Request accepted. Please poll http://polytope.ecmwf.int/api/v1/requests/5af79420-5e06-477d-8167-a54e0de84fe1 for status\n", + "2023-09-29 18:03:53 - INFO - Checking request status (5af79420-5e06-477d-8167-a54e0de84fe1)...\n", + "2023-09-29 18:03:54 - INFO - The current status of the request is 'processing'\n", + "2023-09-29 18:03:58 - INFO - The current status of the request is 'processed'\n", + " \r" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
centreshortNametypeOfLevelleveldataDatedataTimestepRangedataTypenumbergridType
0ecmfzisobaricInhPa12020091500fc0sh
1ecmfzisobaricInhPa120200915012fc0sh
\n", + "
" + ], + "text/plain": [ + " centre shortName typeOfLevel level dataDate dataTime stepRange \\\n", + "0 ecmf z isobaricInhPa 1 20200915 0 0 \n", + "1 ecmf z isobaricInhPa 1 20200915 0 12 \n", + "\n", + " dataType number gridType \n", + "0 fc 0 sh \n", + "1 fc 0 sh " + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import earthkit.data\n", + "\n", + "request = {\n", + " 'stream': 'oper',\n", + " 'levtype': 'pl',\n", + " 'levellist': '1',\n", + " 'param': '129.128',\n", + " 'step': '0/12',\n", + " 'time': '00:00:00',\n", + " 'date': '20200915',\n", + " 'type': 'fc',\n", + " 'class': 'rd',\n", + " 'expver': 'hsvs',\n", + " 'domain': 'g'\n", + "}\n", + "\n", + "\n", + "ds = earthkit.data.from_source(\"polytope\", \"ecmwf-mars\", request)\n", + "ds.ls()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "32cb0e10-7545-4758-b2f0-99984f01d71f", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dev", + "language": "python", + "name": "dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/examples/wekeo.ipynb b/docs/examples/wekeo.ipynb new file mode 100644 index 00000000..3a4826ec --- /dev/null +++ b/docs/examples/wekeo.ipynb @@ -0,0 +1,711 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "3599bcd8-a5ee-4911-96c9-976b849138fb", + "metadata": {}, + "source": [ + "## Retieving data from WEkEO" + ] + }, + { + "cell_type": "markdown", + "id": "1491dd53-2993-48c1-9118-72b1ffdd2b91", + "metadata": {}, + "source": [ + "The “wekeo” and \"wekeocds\" data sources provide access to [WEkEO](https://www.wekeo.eu/)." + ] + }, + { + "cell_type": "markdown", + "id": "e32b8f13-137a-436e-8523-fcee759dfb02", + "metadata": {}, + "source": [ + "#### Using the WEkEO grammar" + ] + }, + { + "cell_type": "markdown", + "id": "c9d0a291-c9b9-480d-a70a-8be0a4be423f", + "metadata": {}, + "source": [ + "When we want access data using the WEkEO grammar (see the [hda](https://hda.readthedocs.io/) documentation) we need to use the \"wekeo\" source. The following example retrieves Normalized Difference Vegetation Index data derived from EO satellite imagery in NetCDF format:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "15d5599f-1242-4238-8d5d-e459694bc296", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:  (lon: 120960, lat: 47040)\n",
+       "Coordinates:\n",
+       "  * lon      (lon) float64 -180.0 -180.0 -180.0 -180.0 ... 180.0 180.0 180.0\n",
+       "  * lat      (lat) float64 80.0 80.0 79.99 79.99 ... -59.99 -59.99 -59.99 -60.0\n",
+       "Data variables:\n",
+       "    crs      |S1 ...\n",
+       "    NDVI     (lat, lon) float32 dask.array<chunksize=(47040, 120960), meta=np.ndarray>\n",
+       "Attributes: (12/19)\n",
+       "    Conventions:          CF-1.6\n",
+       "    parent_identifier:    urn:cgls:global:ndvi300_v1_333m\n",
+       "    platform:             Proba-V\n",
+       "    copyright:            Copernicus Service information 2016\n",
+       "    time_coverage_end:    2014-01-10T23:59:59Z\n",
+       "    title:                10-daily Normalized Difference Vegetation Index 333...\n",
+       "    ...                   ...\n",
+       "    archive_facility:     VITO\n",
+       "    identifier:           urn:cgls:global:ndvi300_v1_333m:NDVI300_20140101000...\n",
+       "    sensor:               VEGETATION\n",
+       "    institution:          VITO NV\n",
+       "    processing_level:     L3\n",
+       "    history:              Processing line NDVI: 2016-04-26
" + ], + "text/plain": [ + "\n", + "Dimensions: (lon: 120960, lat: 47040)\n", + "Coordinates:\n", + " * lon (lon) float64 -180.0 -180.0 -180.0 -180.0 ... 180.0 180.0 180.0\n", + " * lat (lat) float64 80.0 80.0 79.99 79.99 ... -59.99 -59.99 -59.99 -60.0\n", + "Data variables:\n", + " crs |S1 ...\n", + " NDVI (lat, lon) float32 dask.array\n", + "Attributes: (12/19)\n", + " Conventions: CF-1.6\n", + " parent_identifier: urn:cgls:global:ndvi300_v1_333m\n", + " platform: Proba-V\n", + " copyright: Copernicus Service information 2016\n", + " time_coverage_end: 2014-01-10T23:59:59Z\n", + " title: 10-daily Normalized Difference Vegetation Index 333...\n", + " ... ...\n", + " archive_facility: VITO\n", + " identifier: urn:cgls:global:ndvi300_v1_333m:NDVI300_20140101000...\n", + " sensor: VEGETATION\n", + " institution: VITO NV\n", + " processing_level: L3\n", + " history: Processing line NDVI: 2016-04-26" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import earthkit.data \n", + "\n", + "ds = earthkit.data.from_source(\n", + " \"wekeo\",\n", + " \"EO:CLMS:DAT:CGLS_GLOBAL_NDVI300_V1_333M\",\n", + " request={\n", + " \"datasetId\": \"EO:CLMS:DAT:CGLS_GLOBAL_NDVI300_V1_333M\",\n", + " \"dateRangeSelectValues\": [\n", + " {\n", + " \"name\": \"dtrange\",\n", + " \"start\": \"2014-01-01T00:00:00.000Z\",\n", + " \"end\": \"2014-01-01T23:59:59.999Z\"\n", + " }\n", + " ]\n", + " }\n", + " )\n", + "\n", + "ds.to_xarray()" + ] + }, + { + "cell_type": "markdown", + "id": "23cd1784-1478-4ac0-8ad8-cf6d8d5358f8", + "metadata": {}, + "source": [ + "#### Using the CDSAPI grammar" + ] + }, + { + "cell_type": "markdown", + "id": "e50860ec-4da6-4fdb-b65c-af876ad5c6df", + "metadata": {}, + "source": [ + "We can use the \"wekeocds\" source to access [Copernicus Climate Data Store (CDS)](https://cds.climate.copernicus.eu) datasets served on [WEkEO](https://www.wekeo.eu/) using the [cdsapi](https://pypi.org/project/cdsapi) standard grammar." + ] + }, + { + "cell_type": "markdown", + "id": "777ec204-fbff-41f3-8ae2-3f60c6acb39f", + "metadata": {}, + "source": [ + "The following example retrieves ERA5 surface data in GRIB format:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "eff2587c-eb03-4de5-b3c4-f894d31027be", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
centreshortNametypeOfLevelleveldataDatedataTimestepRangedataTypenumbergridType
0ecmf2tsurface02012121212000an0regular_ll
1ecmfmslsurface02012121212000an0regular_ll
\n", + "
" + ], + "text/plain": [ + " centre shortName typeOfLevel level dataDate dataTime stepRange dataType \\\n", + "0 ecmf 2t surface 0 20121212 1200 0 an \n", + "1 ecmf msl surface 0 20121212 1200 0 an \n", + "\n", + " number gridType \n", + "0 0 regular_ll \n", + "1 0 regular_ll " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds = earthkit.data.from_source(\n", + " \"wekeocds\",\n", + " \"EO:ECMWF:DAT:REANALYSIS_ERA5_SINGLE_LEVELS\",\n", + " variable=[\"2m_temperature\", \"mean_sea_level_pressure\"],\n", + " product_type=[\"reanalysis\"],\n", + " year=\"2012\",\n", + " month=\"12\",\n", + " day=\"12\",\n", + " time=\"12:00\",\n", + " format=\"grib\"\n", + " )\n", + "\n", + "ds.ls()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fffe1e96-03da-434d-a3cd-fa51ed847ac6", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dev", + "language": "python", + "name": "dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/guide/caching.rst b/docs/guide/caching.rst index f9c315f4..c6decdeb 100644 --- a/docs/guide/caching.rst +++ b/docs/guide/caching.rst @@ -11,8 +11,8 @@ Caching Purpose ------- -eartkit-data caches most of the remote data access on a local cache. Running again -``earthkit.data.from_source`` will use the cached data instead of +earthkit-data caches most of the remote data access on a local cache. Running again +:func:`from_source` will use the cached data instead of downloading it again. When the cache is full, cached data is deleted according it cache policy (i.e. oldest data is deleted first). earthkit-data cache configuration is managed through the :doc:`settings`. @@ -26,49 +26,94 @@ earthkit-data cache configuration is managed through the :doc:`settings`. through using mirrors. .. _cache_location: +.. _cache_policies: -Cache location --------------- +Cache policies and locations +------------------------------ - The cache location is defined by the ``cache‑directory`` setting. Its default - value depends on your system: +The primary key to control the cache in the settings is ``cache‑policy``, which can take the following values: - - ``/tmp/earthkit-data-$USER`` for Linux, - - ``C:\\Users\\$USER\\AppData\\Local\\Temp\\earthkit-data-$USER`` for Windows - - ``/tmp/.../earthkit-data-$USER`` for MacOS + - "user" (default) + - "temporary" + - "off" +The cache location can be read and modified with Python (see the details below). - The cache location can be read and modified either with shell command or within python. +.. tip:: - .. note:: + See the :ref:`/examples/cache.ipynb` notebook for examples. - It is recommended to restart your Jupyter kernels after changing - the cache location. +.. note:: + + It is recommended to restart your Jupyter kernels after changing + the cache location. + +User cache policy ++++++++++++++++++++ + +When the ``cache‑policy`` is "user" the cache is created in the directory defined by the ``user-cache-directory`` settings. The user cache directory is not cleaned up on exit. So next time you start earthkit-data it will (probably) be there again. Also, when you run multiple sessions of earthkit-data under the same user they will share the same cache. + +The default value of the cache directory depends on your system: + + - ``/tmp/earthkit-data-$USER`` for Linux, + - ``C:\\Users\\$USER\\AppData\\Local\\Temp\\earthkit-data-$USER`` for Windows + - ``/tmp/.../earthkit-data-$USER`` for MacOS + + +The following code shows how to change the ``user-cache-directory`` settings: + +.. code:: python + + >>> from earthkit.data import settings + >>> settings.get("user-cache-directory") # Find the current cache directory + /tmp/earthkit-data-$USER + >>> # Change the value of the setting + >>> settings.set("cache-directory", "/big-disk/earthkit-data-cache") + # Python kernel restarted - From Python: + >>> from earthkit.data import settings + >>> settings.get("user-cache-directory") # Cache directory has been modified + /big-disk/earthkit-data-cache - .. code:: python +More generally, the earthkit-data settings can be read, modified, reset +to their default values from Python, +see the :doc:`Settings documentation `. - >>> import earthkit.data - >>> earthkit.data.settings.get( - ... "cache-directory" - ... ) # Find the current cache directory - /tmp/earthkit-data-$USER - >>> # Change the value of the setting - >>> earthkit.data.settings.set("cache-directory", "/big-disk/earthkit-data-cache") - # Python kernel restarted +Temporary cache policy +++++++++++++++++++++++++ - >>> import earthkit.data - >>> earthkit.data.settings.get( - ... "cache-directory" - ... ) # Cache directory has been modified - /big-disk/earthkit-data-cache +When the ``cache‑policy`` is "temporary" the cache will be located in a temporary directory created by ``tempfile.TemporaryDirectory``. This directory will be unique for each earthkit-data session. When the directory object goes out of scope (at the latest on exit) the cache is cleaned up. Due to the temporary nature of this directory path it cannot be queried via the :doc:`settings`, but we need to use :meth:`cache_directory` on the ``cache`` object. + +.. code-block:: python + + >>> from earthkit.data import cache, settings + >>> settings.set("cache-policy", "temporary") + >>> cache.cache_directory() + '/var/folders/ng/g0zkhc2s42xbslpsywwp_26m0000gn/T/tmp_5bf5kq8' + +We can specify the parent directory for the the temporary cache by using the ``temporary-cache-directory-root`` settings. By default it is set to None (no parent directory specified). + +.. code-block:: python + + >>> from earthkit.data import cache, setting + >>> s = { + ... "cache-policy": "temporary", + ... "temporary-cache-directory-root": "~/my_demo_cache", + ... } + >>> settings.set(s) + >>> cache.cache_directory() + '~/my_demo_cache/tmp0iiuvsz5' + +Off cache policy +++++++++++++++++++++++++ + +It is also possible to turn caching off completely by setting the ``cache-policy`` to “off”. + +.. warning:: - More generally, the earthkit-data settings can be read, modified, reset - to their default values from python, - see the :doc:`Settings documentation `. + At the moment, when the cache is disabled none of the sources downloading data (e.g. :ref:`data-sources-mars`) will work. On top of that the :ref:`data-sources-file` source will not be able to handle archive input (e.g. tar, zip). Cache limits ------------ @@ -100,9 +145,9 @@ Maximum-cache-disk-usage and ``maximum-cache-disk-usage`` to ``None``. -Caching settings default values +Caching settings parameters ------------------------------- -.. module-output:: generate_settings_rst .*-cache-.* cache-.* +.. module-output:: generate_settings_rst .*-cache-.* cache-.* .*-cache Other earthkit-data settings can be found :ref:`here `. diff --git a/docs/guide/data_format/bufr.rst b/docs/guide/data_format/bufr.rst index a01db1c3..118282ff 100644 --- a/docs/guide/data_format/bufr.rst +++ b/docs/guide/data_format/bufr.rst @@ -9,12 +9,12 @@ BUFR (Binary Universal Form for Representation of meteorological data) is a bina BUFRList +++++++++++ -We can read/retrieve BUFR data with :func:`from_source `. The resulting object will be a :obj:`BUFRList ` representing a list of :obj:`BUFRMessage `. +We can read/retrieve BUFR data with :func:`from_source `. The resulting object will be a :obj:`BUFRList ` representing a list of :obj:`BUFRMessage `\ s. The structure of a BUFR message is typically hierarchical and can be rather complex, so the recommended way to deal with BUFR data is to extract the required data with :meth:`to_pandas() ` into a pandas DataFrame, which is much easier to work with. -The following table gives an overview of the :obj:`BUFRList API`: +The following table gives us an overview of the :obj:`BUFRList API`: .. list-table:: Highlights of the BUFRList API :header-rows: 1 diff --git a/docs/guide/data_format/grib.rst b/docs/guide/data_format/grib.rst index 3a6dd88e..818f79a3 100644 --- a/docs/guide/data_format/grib.rst +++ b/docs/guide/data_format/grib.rst @@ -9,7 +9,7 @@ GRIB is the WMO's format for binary gridded data consisting of GRIB messages, wh Fieldlists +++++++++++ -We can read/retrieve GRIB data with :func:`from_source`. The resulting object will be a :class:`~data.readers.grib.index.GribFieldList` representing a list of :class:`~data.readers.grib.codes.GribField`, which we can iterate through: +We can read/retrieve GRIB data with :func:`from_source`. The resulting object will be a :class:`~data.readers.grib.index.GribFieldList` representing a list of :class:`~data.readers.grib.codes.GribField`\ s, which we can iterate through: .. code-block:: python @@ -26,7 +26,7 @@ We can read/retrieve GRIB data with :func:`from_source`. The resulting object wi GribField(msl,1000,20200513,1200,0,0) -The following table gives an overview of the GRIB :class:`~data.readers.grib.index.FieldList` API: +The following table gives us an overview of the GRIB :class:`~data.readers.grib.index.FieldList` API: .. list-table:: Highlights of the GRIB FieldList API :header-rows: 1 diff --git a/docs/guide/include/settings-2-set.py b/docs/guide/include/settings-2-set.py deleted file mode 100644 index e87945e4..00000000 --- a/docs/guide/include/settings-2-set.py +++ /dev/null @@ -1,8 +0,0 @@ -import earthkit.data - -# Change the location of the cache: -earthkit.data.settings.set("user-cache-directory", "/big-disk/earthkit-data-cache") - -# Set some default plotting options (e.g. all maps will -# be 400 pixels wide by default): -earthkit.data.settings.set("plotting-options", width=400) diff --git a/docs/guide/include/settings-1-get.py b/docs/guide/include/settings-get.py similarity index 100% rename from docs/guide/include/settings-1-get.py rename to docs/guide/include/settings-get.py diff --git a/docs/guide/include/settings-3-reset.py b/docs/guide/include/settings-reset.py similarity index 100% rename from docs/guide/include/settings-3-reset.py rename to docs/guide/include/settings-reset.py diff --git a/docs/guide/include/settings-set.py b/docs/guide/include/settings-set.py new file mode 100644 index 00000000..880d96cd --- /dev/null +++ b/docs/guide/include/settings-set.py @@ -0,0 +1,18 @@ +import earthkit.data + +# Change the location of the user defined cache: +earthkit.data.settings.set("user-cache-directory", "/big-disk/earthkit-data-cache") + +# Change number of download threads +earthkit.data.settings.set("number-of-download-threads", 7) + +# Multiple values can be set together. The argument list +# can be a dictionary: +earthkit.data.settings.set( + {"number-of-download-threads": 7, "url-download-timeout": "1m"} +) + +# Alternatively, we can use keyword arguments. However, because +# the “-” character is not allowed in variable names in Python we have +# to replace “-” with “_” in all the keyword arguments: +earthkit.data.settings.set(number_of_download_threads=8, url_download_timeout="2m") diff --git a/docs/guide/include/settings-temporary.py b/docs/guide/include/settings-temporary.py new file mode 100644 index 00000000..b738691e --- /dev/null +++ b/docs/guide/include/settings-temporary.py @@ -0,0 +1,11 @@ +import earthkit.data + +print(earthkit.data.settings.get("number-of-download-threads")) + +with earthkit.data.settings.temporary(): + earthkit.data.settings.set("number-of-download-threads", 12) + print(earthkit.data.settings.get("number-of-download-threads")) + +# Temporary settings can also be created with arguments: +with earthkit.data.settings.temporary("number-of-download-threads", 11): + print(earthkit.data.settings.get("number-of-download-threads")) diff --git a/docs/guide/settings.rst b/docs/guide/settings.rst index 936506cc..95c7fe1a 100644 --- a/docs/guide/settings.rst +++ b/docs/guide/settings.rst @@ -6,19 +6,24 @@ Settings *earthkit-data* is maintaining a set of global settings which control its behaviour. -The settings are saved in ``~/.earthkit-data/settings.yaml``. They can -be accessed from Python. +The settings are automatically saved in ``~/.earthkit-data/settings.yaml`` and they can +be accessed and modified from Python. + +.. tip:: + + See the :ref:`/examples/settings.ipynb` notebook for examples. + Accessing settings ------------------ earthkit-data settings can be accessed using the python API: -.. literalinclude:: include/settings-1-get.py +.. literalinclude:: include/settings-get.py Changing settings ------------------ +------------------ .. note:: @@ -27,7 +32,21 @@ Changing settings earthkit-data settings can be modified using the python API: -.. literalinclude:: include/settings-2-set.py +.. literalinclude:: include/settings-set.py + + +Temporary settings +------------------ + +We can create a temporary settings (as a context manager) as a copy of the original settings. We will still refer to it as “settings”, but it is completely independent from the original object and changes are not saved into the yaml file (even when ``settings.auto_save_settings`` is True). + +.. literalinclude:: include/settings-temporary.py + +Output:: + + 8 + 12 + 11 Resetting settings @@ -40,12 +59,14 @@ Resetting settings earthkit-data settings can be reset using the python API: -.. literalinclude:: include/settings-3-reset.py +.. literalinclude:: include/settings-reset.py .. _settings_table: -Default values --------------- +Settings parameters +------------------- + +This is the list of all the settings parameters: .. module-output:: generate_settings_rst diff --git a/docs/guide/sources.rst b/docs/guide/sources.rst index e8234759..2b3cc4b9 100644 --- a/docs/guide/sources.rst +++ b/docs/guide/sources.rst @@ -36,17 +36,22 @@ We can get data from a given source by using :func:`from_source`: - read data from a stream * - :ref:`data-sources-memory` - read data from a memory buffer - * - :ref:`data-sources-mars` - - retrieve data from the ECMWF `MARS archive `_ - * - :ref:`data-sources-cds` - - retrieve data from the `Copernicus Climate Data Store `_ (CDS) * - :ref:`data-sources-ads` - retrieve data from the `Copernicus Atmosphere Data Store `_ (ADS) + * - :ref:`data-sources-cds` + - retrieve data from the `Copernicus Climate Data Store `_ (CDS) * - :ref:`data-sources-eod` - retrieve `ECMWF open data `_ * - :ref:`data-sources-fdb` - retrieve data from the `Fields DataBase `_ (FDB) - + * - :ref:`data-sources-mars` + - retrieve data from the ECMWF `MARS archive `_ + * - :ref:`data-sources-polytope` + - retrieve data from the `Polytope services `_ + * - :ref:`data-sources-wekeo` + - retrieve data from `WEkEO`_ using the WEkEO grammar + * - :ref:`data-sources-wekeocds` + - retrieve `CDS `_ data stored on `WEkEO`_ using the `cdsapi`_ grammar ---------------------------------- @@ -285,47 +290,43 @@ memory print(f.metadata("param")) -.. _data-sources-mars: - -mars --------------- -.. py:function:: from_source("mars", *args, **kwargs) - :noindex: +.. _data-sources-ads: - The ``mars`` source will retrieve data from the ECMWF MARS (Meteorological Archival and Retrieval System) archive. In addition - to data retrieval, the request specified as ``*args`` and/or ``**kwargs`` also has GRIB post-processing options such as ``grid`` and ``area`` for regridding and - sub-area extraction, respectively. +ads +--- - To figure out which data you need, or discover relevant data available in MARS, see the publicly accessible `MARS catalog`_ (or this `access restricted catalog `_). +.. py:function:: from_source("ads", dataset, *args, **kwargs) + :noindex: - The MARS access is direct when the MARS client is installed (as at ECMWF), otherwise it will use the `web API`_. In order to use the `web API`_ you will need to register and retrieve an access token. For a more extensive documentation about MARS, please refer to the `MARS user documentation`_. + The ``ads`` source accesses the `Copernicus Atmosphere Data Store`_ (ADS), using the cdsapi_ package. In addition to data retrieval, ``request`` also has post-processing options such as ``grid`` and ``area`` for regridding and sub-area extraction respectively. - :param tuple *args: positional arguments specifying the request as a dict + :param str dataset: the name of the ADS dataset + :param tuple *args: specifies the request as a dict :param dict **kwargs: other keyword arguments specifying the request - The following example retrieves analysis GRIB data for a subarea for 2 surface parameters: + The following example retrieves CAMS global reanalysis GRIB data for 2 parameters: .. code-block:: python import earthkit.data ds = earthkit.data.from_source( - "mars", - { - "param": ["2t", "msl"], - "levtype": "sfc", - "area": [50, -50, 20, 50], - "grid": [2, 2], - "date": "2023-05-10", - }, + "ads", + "cams-global-reanalysis-eac4", + variable=["particulate_matter_10um", "particulate_matter_1um"], + area=[50, -50, 20, 50], # N,W,S,E + date="2012-12-12", + time="12:00", ) - Data downloaded from MARS is stored in the :ref:`cache `. + Data downloaded from the ADS is stored in the the :ref:`cache `. + + To access data from the ADS, you will need to register and retrieve an access token. The process is described `here `__. For more information, see the `ADS_knowledge base`_. Further examples: - - :ref:`/examples/mars.ipynb` + - :ref:`/examples/ads.ipynb` .. _data-sources-cds: @@ -368,44 +369,6 @@ cds - :ref:`/examples/cds.ipynb` -.. _data-sources-ads: - -ads ---- - -.. py:function:: from_source("ads", dataset, *args, **kwargs) - :noindex: - - The ``ads`` source accesses the `Copernicus Atmosphere Data Store`_ (ADS), using the cdsapi_ package. In addition to data retrieval, ``request`` also has post-processing options such as ``grid`` and ``area`` for regridding and sub-area extraction respectively. - - :param str dataset: the name of the ADS dataset - :param tuple *args: specifies the request as a dict - :param dict **kwargs: other keyword arguments specifying the request - - The following example retrieves CAMS global reanalysis GRIB data for 2 parameters: - - .. code-block:: python - - import earthkit.data - - ds = earthkit.data.from_source( - "ads", - "cams-global-reanalysis-eac4", - variable=["particulate_matter_10um", "particulate_matter_1um"], - area=[50, -50, 20, 50], # N,W,S,E - date="2012-12-12", - time="12:00", - ) - - Data downloaded from the ADS is stored in the the :ref:`cache `. - - To access data from the ADS, you will need to register and retrieve an access token. The process is described `here `__. For more information, see the `ADS_knowledge base`_. - - Further examples: - - - :ref:`/examples/ads.ipynb` - - .. _data-sources-eod: ecmwf-open-data @@ -525,6 +488,182 @@ fdb - :ref:`/examples/fdb.ipynb` +.. _data-sources-mars: + +mars +-------------- + +.. py:function:: from_source("mars", *args, **kwargs) + :noindex: + + The ``mars`` source will retrieve data from the ECMWF MARS (Meteorological Archival and Retrieval System) archive. In addition + to data retrieval, the request specified as ``*args`` and/or ``**kwargs`` also has GRIB post-processing options such as ``grid`` and ``area`` for regridding and + sub-area extraction, respectively. + + To figure out which data you need, or discover relevant data available in MARS, see the publicly accessible `MARS catalog`_ (or this `access restricted catalog `_). + + The MARS access is direct when the MARS client is installed (as at ECMWF), otherwise it will use the `web API`_. In order to use the `web API`_ you will need to register and retrieve an access token. For a more extensive documentation about MARS, please refer to the `MARS user documentation`_. + + :param tuple *args: positional arguments specifying the request as a dict + :param dict **kwargs: other keyword arguments specifying the request + + The following example retrieves analysis GRIB data for a subarea for 2 surface parameters: + + .. code-block:: python + + import earthkit.data + + ds = earthkit.data.from_source( + "mars", + { + "param": ["2t", "msl"], + "levtype": "sfc", + "area": [50, -50, 20, 50], + "grid": [2, 2], + "date": "2023-05-10", + }, + ) + + Data downloaded from MARS is stored in the :ref:`cache `. + + Further examples: + + - :ref:`/examples/mars.ipynb` + + +.. _data-sources-polytope: + +polytope +-------- + +.. py:function:: from_source("polytope", collection, *args, **kwargs) + :noindex: + + The ``polytope`` source accesses the `Polytope web services `_ , using the polytope-client_ package. + + :param str collection: the name of the polytope collection + :param tuple *args: specifies the request as a dict + :param dict **kwargs: other keyword arguments specifying the request + + The following example retrieves GRIB data from the "ecmwf-mars" polytope collection: + + .. code-block:: python + + import earthkit.data + + request = { + "stream": "oper", + "levtype": "pl", + "levellist": "1", + "param": "130.128", + "step": "0/12", + "time": "00:00:00", + "date": "20200915", + "type": "fc", + "class": "rd", + "expver": "hsvs", + "domain": "g", + } + + ds = earthkit.data.from_source("polytope", "ecmwf-mars", request) + + Data downloaded from the polytope service is stored in the the :ref:`cache `. However, + please note that, in the current version, each call to :func:`from_source` will download the data again. + + To access data from polytope, you will need to register and retrieve an access token. + + Further examples: + + - :ref:`/examples/polytope.ipynb` + + + +.. _data-sources-wekeo: + +wekeo +----- + +.. py:function:: from_source("wekeo", dataset, *args, **kwargs) + :noindex: + + `WEkEO`_ is the Copernicus DIAS reference service for environmental data and virtual processing environments. The ``wekeo`` source provides access to `WEkEO`_ using the WEkEO grammar. The retrieval is based on the hda_ Python API. + + :param str dataset: the name of the WEkEO dataset + :param tuple *args: specifies the request as a dict + :param dict **kwargs: other keyword arguments specifying the request + + The following example retrieves Normalized Difference Vegetation Index data derived from EO satellite imagery in NetCDF format: + + .. code-block:: python + + import earthkit.data + + ds = earthkit.data.from_source( + "wekeo", + "EO:CLMS:DAT:CGLS_GLOBAL_NDVI300_V1_333M", + request={ + "datasetId": "EO:CLMS:DAT:CGLS_GLOBAL_NDVI300_V1_333M", + "dateRangeSelectValues": [ + { + "name": "dtrange", + "start": "2014-01-01T00:00:00.000Z", + "end": "2014-01-01T23:59:59.999Z", + } + ], + }, + ) + + + Data downloaded from WEkEO is stored in the the :ref:`cache `. + + To access data from WEkEO, you will need to register and set up the Harmonized Data Access (HDA) API client. The process is described `here `_. + + Further examples: + + - :ref:`/examples/wekeo.ipynb` + + +.. _data-sources-wekeocds: + +wekeocds +-------- + +.. py:function:: from_source("wekeocds", dataset, *args, **kwargs) + :noindex: + + `WEkEO`_ is the Copernicus DIAS reference service for environmental data and virtual processing environments. The ``wekeocds`` source provides access to `Copernicus Climate Data Store`_ (CDS) datasets served on `WEkEO`_ using the `cdsapi`_ grammar. The retrieval is based on the hda_ Python API. + + :param str dataset: the name of the WEkEO dataset + :param tuple *args: specifies the request as a dict + :param dict **kwargs: other keyword arguments specifying the request + + The following example retrieves ERA5 surface data for multiple days in GRIB format: + + .. code-block:: python + + import earthkit.data + + ds = earthkit.data.from_source( + "wekeocds", + "EO:ECMWF:DAT:REANALYSIS_ERA5_SINGLE_LEVELS", + variable=["2m_temperature", "mean_sea_level_pressure"], + product_type=["reanalysis"], + year=["2012"], + month=["12"], + day=["12", "13", "14", "15"], + time=["11:00"], + format="grib", + ) + + Data downloaded from WEkEO is stored in the the :ref:`cache `. + + To access data from WEkEO, you will need to register and set up the Harmonized Data Access (HDA) API client. The process is described `here `_. + + Further examples: + + - :ref:`/examples/wekeo.ipynb` + + .. _MARS catalog: https://apps.ecmwf.int/archive-catalogue/ .. _MARS user documentation: https://confluence.ecmwf.int/display/UDOC/MARS+user+documentation .. _web API: https://www.ecmwf.int/en/forecasts/access-forecasts/ecmwf-web-api @@ -536,3 +675,8 @@ fdb .. _ADS_knowledge base: https://confluence.ecmwf.int/pages/viewpage.action?pageId=151530675 .. _ECMWF open data: https://www.ecmwf.int/en/forecasts/datasets/open-data + +.. _WEkEO: https://www.wekeo.eu/ +.. _hda: https://pypi.org/project/hda + +.. _polytope-client: https://pypi.org/project/polytope-client diff --git a/docs/howtos.rst b/docs/howtos.rst new file mode 100644 index 00000000..1f192014 --- /dev/null +++ b/docs/howtos.rst @@ -0,0 +1,27 @@ +.. _howtos + + +Howtos +============ + + +How to save results from a retrieval into a file? +-------------------------------------------------------------- + +You need to use the :func:`save` method on the resulting object. For example, this is how to +save the results of a :ref:`MARS retrieval ` into a file: + +.. code-block:: python + + import earthkit.data + + ds = earthkit.data.from_source( + "mars", + param=["2t", "msl"], + levtype="sfc", + area=[50, -10, 40, 10], # N,W,S,E + grid=[2, 2], + date="2023-05-10", + ) + + ds.save("my_data.grib") diff --git a/docs/index.rst b/docs/index.rst index 937d8b91..2fee62f7 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -3,12 +3,7 @@ Welcome to earthkit-data's documentation .. warning:: - This project is **BETA** and will be **Experimental** for the foreseeable future. Interfaces and functionality are likely to change, and the project itself may be scrapped. **DO NOT** use this software in any project/software that is operational. - -.. warning:: - - This documentation is still work in progress and can only be regarded as a **DRAFT**. - + This project is in the **BETA** stage of development. Please be aware that interfaces and functionality may change as the project develops. If this software is to be used in operational systems you are **strongly advised to use a released tag in your system configuration**, and you should be willing to accept incoming changes and bug fixes that require adaptations on your part. ECMWF **does use** this software in operations and abides by the same caveats. **earthkit-data** is a format-agnostic Python interface for geospatial data with a focus on meteorology and climate science. @@ -40,6 +35,7 @@ reference systems and bounding boxes. :maxdepth: 1 :caption: Documentation + howtos guide/index api diff --git a/docs/release_notes/index.rst b/docs/release_notes/index.rst index 735948d5..31d83e5c 100644 --- a/docs/release_notes/index.rst +++ b/docs/release_notes/index.rst @@ -4,5 +4,6 @@ Release notes .. toctree:: :maxdepth: 1 + version_0.4_updates version_0.3_updates version_0.2_updates diff --git a/docs/release_notes/version_0.4_updates.rst b/docs/release_notes/version_0.4_updates.rst new file mode 100644 index 00000000..890f5eb4 --- /dev/null +++ b/docs/release_notes/version_0.4_updates.rst @@ -0,0 +1,30 @@ +Version 0.4 Updates +///////////////////////// + +Version 0.4.0 +=============== + +New features +++++++++++++++++ + +- added new sources :ref:`data-sources-wekeo` and :ref:`data-sources-wekeocds` to retrieve data from `WEkEO `_. See the :ref:`/examples/wekeo.ipynb` notebook example. +- added new source :ref:`data-sources-polytope` to retrieve data from the `Polytope web services `_. See the :ref:`/examples/polytope.ipynb` notebook example. +- added the ``append`` option to :meth:`FieldList.save() `. +- added the ``dtype`` option to the ``to_data()``, ``to_latlon()`` and ``to_points()`` methods both on a :class:`~data.core.fieldlist.Field` or :class:`~data.core.fieldlist.FieldList`. +- allowed access to 32-bit GRIB data values without requiring a cast in Python from 64 to 32 bits. Only works with a recent ecCodes version (ecCodes >= 2.31.0 and eccodes-python >= 1.6.0 required). In order to use this feature set ``dtype=np.float32`` in the ``to_numpy()``, ``to_data()``, ``to_latlon()`` or ``to_points()`` methods on either a :class:`~data.core.fieldlist.Field` or :class:`~data.core.fieldlist.FieldList`. +- implemented :class:`~data.core.fieldlist.FieldList` for xarray data +- added :meth:`~data.core.readers.csv.CSVReader.to_xarray` to csv data +- no ecCodes installation required any longer when we do not want to access GRIB or BUFR data + + +Fixes +++++++ + +- fixed issue when concatenation for :class:`~data.core.readers.numpy_list.NumpyFieldList` did not work +- fixed issue when concatenation to an empty Fieldlist did not work +- fixed issue when could not get values from a :class:`~data.core.readers.numpy_list.NumpyFieldList` +- fixed issue when could not retrieve data from the :ref:`CDS ` beacause the ``month`` and ``day`` request parameters were pre-filled by earthkit-data. These parameters are not pre-filled any longer. +- fixed issue when missing values were not correctly written to GRIB output +- fixed issue when could not read non-fieldlist type NetCDF data with :func:`from_source` +- fixed issue when could not save NetCDF data to disk +- fixed issue when after modifying an archive file (tar/zip) still the original cached contents was read diff --git a/docs/skip_api_rules.py b/docs/skip_api_rules.py index 71de5b79..3e9f4fb9 100644 --- a/docs/skip_api_rules.py +++ b/docs/skip_api_rules.py @@ -65,6 +65,24 @@ "statistics", "xarray_open_dataset_kwargs", ], + "data.readers.csv.CSVReader": [ + "bounding_box", + "cache_file", + "to_numpy", + "ignore", + "datetime", + "index_content", + "isel", + "merge", + "metadata", + "mutate", + "mutate_source", + "order_by", + "sel", + "filter", + "merger", + "source", + ], } @@ -81,6 +99,7 @@ def _skip_api_items(app, what, name, obj, skip, options): "data.readers.bufr.bufr", "data.readers.grib.codes", "data.readers.grib.index", + "data.readers.csv", "data.utils", "data.utils.bbox", ]: @@ -93,6 +112,7 @@ def _skip_api_items(app, what, name, obj, skip, options): "data.readers.bufr.bufr", "data.readers.grib", "data.readers.grib.index", + "data.readers.csv", "data.utils", "data.utils.bbox", ]: @@ -107,6 +127,7 @@ def _skip_api_items(app, what, name, obj, skip, options): "data.readers.grib.codes.GribField", "data.readers.grib.index.GribFieldList", "data.readers.grib.metadata.GribMetadata", + "data.readers.csv.CSVReader", "data.utils.bbox.BoundingBox", ]: skip = True diff --git a/earthkit/data/core/fieldlist.py b/earthkit/data/core/fieldlist.py index e1140d3b..2fd9620e 100644 --- a/earthkit/data/core/fieldlist.py +++ b/earthkit/data/core/fieldlist.py @@ -22,11 +22,29 @@ class Field(Base): def __init__(self, metadata=None): self.__metadata = metadata - @property @abstractmethod + def _values(self, dtype=None): + r"""Return the values stored in the field as a 1D ndarray. + + Parameters + ---------- + dtype: str, numpy.dtype or None + Typecode or data-type of the array. When it is :obj:`None` the default + type used by the underlying data accessor is used. For GRIB it is + ``np.float64``. + + Returns + ------- + ndarray + Field values + + """ + self._not_implemented() + + @property def values(self): r"""ndarray: Get the values stored in the field as a 1D ndarray.""" - self._not_implemented() + return self._values() def _make_metadata(self): r"""Create a field metadata object.""" @@ -47,8 +65,9 @@ def to_numpy(self, flatten=False, dtype=None): flatten: bool When it is True a flat ndarray is returned. Otherwise an ndarray with the field's :obj:`shape` is returned. - dtype: str or dtype - Typecode or data-type to which the array is cast. + dtype: str, numpy.dtype or None + Typecode or data-type of the array. When it is :obj:`None` the default + type used by the underlying data accessor is used. For GRIB it is ``np.float64``. Returns ------- @@ -58,12 +77,13 @@ def to_numpy(self, flatten=False, dtype=None): """ values = self.values if not flatten: - values = self.values.reshape(self.shape) + # values = self.values.reshape(self.shape) + values = self._values(dtype=dtype).reshape(self.shape) if dtype is not None: values = values.astype(dtype) return values - def data(self, keys=("lat", "lon", "value"), flatten=False): + def data(self, keys=("lat", "lon", "value"), flatten=False, dtype=None): r"""Return the values and/or the geographical coordinates for each grid point. Parameters @@ -74,6 +94,10 @@ def data(self, keys=("lat", "lon", "value"), flatten=False): flatten: bool When it is True a flat ndarray per key is returned. Otherwise an ndarray with the field's :obj:`shape` is returned for each key. + dtype: str, numpy.dtype or None + Typecode or data-type of the arrays. When it is :obj:`None` the default + type used by the underlying data accessor is used. For GRIB it is ``np.float64``. + Returns ------- @@ -115,7 +139,7 @@ def data(self, keys=("lat", "lon", "value"), flatten=False): _keys = dict( lat=self._metadata.geography.latitudes, lon=self._metadata.geography.longitudes, - value=lambda: self.values, + value=self._values, ) if isinstance(keys, str): @@ -125,7 +149,7 @@ def data(self, keys=("lat", "lon", "value"), flatten=False): if k not in _keys: raise ValueError(f"data: invalid argument: {k}") - r = [_keys[k]() for k in keys] + r = [_keys[k](dtype=dtype) for k in keys] if not flatten: shape = self.shape r = [x.reshape(shape) for x in r] @@ -136,7 +160,7 @@ def data(self, keys=("lat", "lon", "value"), flatten=False): return np.array(r) - def to_points(self, flatten=False): + def to_points(self, flatten=False, dtype=None): r"""Return the geographical coordinates in the data's original Coordinate Reference System (CRS). @@ -145,6 +169,10 @@ def to_points(self, flatten=False): flatten: bool When it is True 1D ndarrays are returned. Otherwise ndarrays with the field's :obj:`shape` are returned. + dtype: str, numpy.dtype or None + Typecode or data-type of the arrays. When it is :obj:`None` the default + type used by the underlying data accessor is used. For GRIB it is + ``np.float64``. Returns ------- @@ -162,8 +190,8 @@ def to_points(self, flatten=False): to_latlon """ - x = self._metadata.geography.x() - y = self._metadata.geography.y() + x = self._metadata.geography.x(dtype=dtype) + y = self._metadata.geography.y(dtype=dtype) if x is not None and y is not None: if not flatten: shape = self.shape @@ -171,14 +199,14 @@ def to_points(self, flatten=False): y = y.reshape(shape) return dict(x=x, y=y) elif self.projection().CARTOPY_CRS == "PlateCarree": - lon, lat = self.data(("lon", "lat"), flatten=flatten) + lon, lat = self.data(("lon", "lat"), flatten=flatten, dtype=dtype) return dict(x=lon, y=lat) else: raise ValueError( "to_points(): geographical coordinates in original CRS are not available" ) - def to_latlon(self, flatten=False): + def to_latlon(self, flatten=False, dtype=None): r"""Return the latitudes/longitudes of all the gridpoints in the field. Parameters @@ -186,6 +214,10 @@ def to_latlon(self, flatten=False): flatten: bool When it is True 1D ndarrays are returned. Otherwise ndarrays with the field's :obj:`shape` are returned. + dtype: str, numpy.dtype or None + Typecode or data-type of the arrays. When it is :obj:`None` the default + type used by the underlying data accessor is used. For GRIB it is + ``np.float64``. Returns ------- @@ -198,7 +230,7 @@ def to_latlon(self, flatten=False): to_points """ - lon, lat = self.data(("lon", "lat"), flatten=flatten) + lon, lat = self.data(("lon", "lat"), flatten=flatten, dtype=dtype) return dict(lat=lat, lon=lon) @property @@ -505,6 +537,15 @@ def from_numpy(array, metadata): return NumpyFieldList(array, metadata) + def ignore(self): + # When the concrete type is Fieldlist we assume the object was + # created with Fieldlist() i.e. it is empty. We ignore it from + # all the merge operations. + if type(self) is FieldList: + return True + else: + return False + @cached_method def _default_index_keys(self): if len(self) > 0: @@ -544,7 +585,7 @@ def indices(self, squeeze=False): ------- dict Unique, sorted metadata values from all the - :obj:`GribField `\ s. + :obj:`Field`\ s. See Also -------- @@ -590,7 +631,7 @@ def index(self, key): ------- list Unique, sorted values of ``key`` from all the - :obj:`GribField `\ s. + :obj:`Field`\ s. See Also -------- @@ -660,7 +701,7 @@ def values(self): return np.array([f.values for f in self]) - def data(self, keys=("lat", "lon", "value"), flatten=False): + def data(self, keys=("lat", "lon", "value"), flatten=False, dtype=None): r"""Return the values and/or the geographical coordinates. Only works when all the fields have the same grid geometry. @@ -673,6 +714,10 @@ def data(self, keys=("lat", "lon", "value"), flatten=False): flatten: bool When it is True the "lat", "lon" arrays and the "value" arrays per field will all be flattened. Otherwise they will preserve the field's :obj:`shape`. + dtype: str, numpy.dtype or None + Typecode or data-type of the arrays. When it is :obj:`None` the default + type used by the underlying data accessor is used. For GRIB it is + ``np.float64``. Returns ------- @@ -729,7 +774,7 @@ def data(self, keys=("lat", "lon", "value"), flatten=False): keys = [keys] if "lat" in keys or "lon" in keys: - latlon = self[0].to_latlon(flatten=flatten) + latlon = self[0].to_latlon(flatten=flatten, dtype=dtype) r = [] for k in keys: @@ -738,7 +783,7 @@ def data(self, keys=("lat", "lon", "value"), flatten=False): elif k == "lon": r.append(latlon["lon"]) elif k == "value": - r.extend([f.to_numpy(flatten=flatten) for f in self]) + r.extend([f.to_numpy(flatten=flatten, dtype=dtype) for f in self]) else: raise ValueError(f"data: invalid argument: {k}") @@ -977,7 +1022,7 @@ def to_points(self, **kwargs): ---------- **kwargs: dict, optional Keyword arguments passed to - :obj:`GribField.to_points() ` + :obj:`Field.to_points() ` Returns ------- @@ -1004,7 +1049,7 @@ def to_latlon(self, **kwargs): ---------- **kwargs: dict, optional Keyword arguments passed to - :obj:`GribField.to_latlon() ` + :meth:`Field.to_latlon() ` Returns ------- @@ -1094,7 +1139,7 @@ def bounding_box(self): ------- list List with one :obj:`BoundingBox ` per - :obj:`GribField ` + :obj:`Field` """ return [s.bounding_box() for s in self] @@ -1108,16 +1153,19 @@ def _is_shared_grid(self): ) return False - def save(self, filename): - r"""Write all the fields into a file. The target file will be overwritten if - already exists. + def save(self, filename, append=False): + r"""Write all the fields into a file. Parameters ---------- filename: str The target file path. + append: bool + When it is true append data to the target file. Otherwise + the target file be overwritten if already exists. """ - with open(filename, "wb") as f: + flag = "wb" if not append else "ab" + with open(filename, flag) as f: self.write(f) def write(self, f): diff --git a/earthkit/data/core/geography.py b/earthkit/data/core/geography.py index 75788ee2..7e9db2b4 100644 --- a/earthkit/data/core/geography.py +++ b/earthkit/data/core/geography.py @@ -15,9 +15,16 @@ class Geography(metaclass=ABCMeta): """Geographical information about a field or data unit""" @abstractmethod - def latitudes(self): + def latitudes(self, dtype=None): r"""Return the latitudes. + Parameters + ---------- + dtype: str, numpy.dtype or None + Typecode or data-type of the array. When it is :obj:`None` the default + type used by the underlying data accessor is used. For GRIB it is + ``np.float64``. + Returns ------- ndarray @@ -25,9 +32,16 @@ def latitudes(self): pass @abstractmethod - def longitudes(self): + def longitudes(self, dtype=None): r"""Return the longitudes. + Parameters + ---------- + dtype: str, numpy.dtype or None + Typecode or data-type of the array. When it is :obj:`None` the default + type used by the underlying data accessor is used. For GRIB it is + ``np.float64``. + Returns ------- ndarray @@ -35,9 +49,16 @@ def longitudes(self): pass @abstractmethod - def x(self): + def x(self, dtype=None): r"""Return the x coordinates in the original CRS. + Parameters + ---------- + dtype: str, numpy.dtype or None + Typecode or data-type of the array. When it is :obj:`None` the default + type used by the underlying data accessor is used. For GRIB it is + ``np.float64``. + Returns ------- ndarray @@ -45,9 +66,16 @@ def x(self): pass @abstractmethod - def y(self): + def y(self, dtype=None): r"""Return the y coordinates in the original CRS. + Parameters + ---------- + dtype: str, numpy.dtype or None + Typecode or data-type of the array. When it is :obj:`None` the default + type used by the underlying data accessor is used. For GRIB it is + ``np.float64``. + Returns ------- ndarray diff --git a/earthkit/data/core/settings.py b/earthkit/data/core/settings.py index 8d96e655..e8b52fe4 100644 --- a/earthkit/data/core/settings.py +++ b/earthkit/data/core/settings.py @@ -112,7 +112,7 @@ def kind(self): return type(self.default) def save(self, name, value, f): - for n in self.description.split("\n"): + for n in self.docs_description.split("\n"): print(f"# {n.strip()}", file=f) print(file=f) comment = yaml.dump({name: self.default}, default_flow_style=False) @@ -162,7 +162,8 @@ def validate(self, name, value): ), "cache-policy": _( "user", - """Caching policy. {validator} See :doc:`/guide/caching` for more information. """, + """Caching policy. {validator} + See :doc:`/guide/caching` for more information. """, validator=ListValidator(["off", "temporary", "user"]), ), "use-message-position-index-cache": _( @@ -201,7 +202,8 @@ def validate(self, name, value): ), "reader-type-check-bytes": _( 64, - "Number of bytes read from the beginning of a source to identify its type. {validator}", + """Number of bytes read from the beginning of a source to identify its type. + {validator}""", validator=IntervalValidator(Interval(8, 4096)), ), } @@ -250,7 +252,7 @@ def save_settings(path, settings): print(file=f) print("#", "-" * 76, file=f) - print("# Version of CliMetLab", file=f) + print("# Version of earthkit-data", file=f) print(file=f) yaml.dump({"version": VERSION}, f, default_flow_style=False) print(file=f) diff --git a/earthkit/data/readers/archive.py b/earthkit/data/readers/archive.py index 64a8967b..491df5b7 100644 --- a/earthkit/data/readers/archive.py +++ b/earthkit/data/readers/archive.py @@ -66,9 +66,17 @@ def unpack(target, args): continue archive.extract(member=member, path=target, **kwargs) + try: + r = os.stat(self.path) + fsize = r.st_size + mtime = r.st_mtime_ns + except Exception: + fsize = 0 + mtime = 0 + self.path = self.cache_file( unpack, - self.path, + [self.path, fsize, mtime], extension=".d", replace=self.path, ) diff --git a/earthkit/data/readers/bufr/bufr.py b/earthkit/data/readers/bufr/bufr.py index e4557244..87ae8b88 100644 --- a/earthkit/data/readers/bufr/bufr.py +++ b/earthkit/data/readers/bufr/bufr.py @@ -631,14 +631,12 @@ def __init__(self, *args, **kwargs): class BUFRInFiles(BUFRList): # Remote BUFRLists (with urls) are also here, # as the actual fieldlist is accessed on a file in cache. - # This class changes the interface (_getitem__ and __len__) + # This class changes the interface (__getitem__ and __len__) # into the interface (part and number_of_parts). - def __getitem__(self, n): + def _getitem(self, n): if isinstance(n, int): part = self.part(n if n >= 0 else len(self) + n) return BUFRMessage(part.path, part.offset, part.length) - else: - return super().__getitem__(n) def __len__(self): return self.number_of_parts() diff --git a/earthkit/data/readers/csv.py b/earthkit/data/readers/csv.py index 3a9a9277..61b7e35b 100644 --- a/earthkit/data/readers/csv.py +++ b/earthkit/data/readers/csv.py @@ -127,15 +127,38 @@ def is_csv(path, probe_size=4096, compression=None): class CSVReader(Reader): + r"""Class representing CSV data""" + def __init__(self, source, path, compression=None): super().__init__(source, path) self.compression = compression self.dialect, self.has_header = probe_csv(path, compression=compression) - def to_pandas(self, **kwargs): + def to_pandas(self, pandas_read_csv_kwargs=None): + """Convert CSV data into a :py:class:`pandas.DataFrame` using :py:func:`pandas.read_csv`. + + Parameters + ---------- + pandas_read_csv_kwargs: dict + kwargs passed to :func:`pandas.read_csv`. + + Returns + ------- + :py:class:`pandas.DataFrame` + + + Examples + -------- + >>> import earthkit.data + >>> ds = earthkit.data.from_source("file", "data_with_comments.csv") + >>> df = ds.to_pandas(pandas_read_csv_kwargs={"comment": "#"}) + + """ import pandas - pandas_read_csv_kwargs = kwargs.get("pandas_read_csv_kwargs", {}) + if pandas_read_csv_kwargs is None: + pandas_read_csv_kwargs = {} + if self.compression is not None: pandas_read_csv_kwargs = dict(**pandas_read_csv_kwargs) pandas_read_csv_kwargs["compression"] = self.compression @@ -143,6 +166,27 @@ def to_pandas(self, **kwargs): LOG.debug("pandas.read_csv(%s,%s)", self.path, pandas_read_csv_kwargs) return pandas.read_csv(self.path, **pandas_read_csv_kwargs) + def to_xarray(self, pandas_read_csv_kwargs=None): + """Convert CSV data into an xarray object`. + + First, the data is converted into a :py:class:`pandas.DataFrame` with :py:func:`pandas.read_csv`, + then :py:meth:`pandas.DataFrame.to_xarray` is called to generate the xarray object. + + Parameters + ---------- + pandas_read_csv_kwargs: dict + kwargs passed to :py:func:`pandas.read_csv`. + + Returns + ------- + Xarray object + + """ + if pandas_read_csv_kwargs is None: + pandas_read_csv_kwargs = {} + + return self.to_pandas(pandas_read_csv_kwargs=pandas_read_csv_kwargs).to_xarray() + def reader(source, path, magic, deeper_check, fwf=False): kind, compression = mimetypes.guess_type(path) diff --git a/earthkit/data/readers/grib/codes.py b/earthkit/data/readers/grib/codes.py index eb876fee..4dcf21a3 100644 --- a/earthkit/data/readers/grib/codes.py +++ b/earthkit/data/readers/grib/codes.py @@ -33,6 +33,56 @@ def missing_is_none(x): return None if x == 2147483647 else x +class GribCodesFloatArrayAccessor: + HAS_FLOAT_SUPPORT = None + KEY = None + + def __init__(self): + if GribCodesFloatArrayAccessor.HAS_FLOAT_SUPPORT is None: + GribCodesFloatArrayAccessor.HAS_FLOAT_SUPPORT = hasattr( + eccodes, "codes_get_float_array" + ) + + def get(self, handle, dtype=None): + v = eccodes.codes_get_array(handle, self.KEY) + if dtype is not None: + return v.astype(dtype) + else: + return v + + +class GribCodesValueAccessor(GribCodesFloatArrayAccessor): + KEY = "values" + + def __init__(self): + super().__init__() + + def get(self, handle, dtype=None): + if dtype is np.float32 and self.HAS_FLOAT_SUPPORT: + return eccodes.codes_get_array(handle, self.KEY, ktype=dtype) + else: + return super().get(handle, dtype=dtype) + + +class GribCodesLatitudeAccessor(GribCodesFloatArrayAccessor): + KEY = "latitudes" + + def __init__(self): + super().__init__() + + +class GribCodesLongitudeAccessor(GribCodesFloatArrayAccessor): + KEY = "longitudes" + + def __init__(self): + super().__init__() + + +VALUE_ACCESSOR = GribCodesValueAccessor() +LATITUDE_ACCESSOR = GribCodesLatitudeAccessor() +LONGITUDE_ACCESSOR = GribCodesLongitudeAccessor() + + class GribCodesMessagePositionIndex(CodesMessagePositionIndex): # This does not belong here, should be in the C library def _get_message_positions(self, path): @@ -153,18 +203,18 @@ def as_namespace(self, namespace, param="shortName"): # TODO: once missing value handling is implemented in the base class this method # can be removed - def get_values(self): + def get_values(self, dtype=None): eccodes.codes_set(self._handle, "missingValue", CodesHandle.MISSING_VALUE) - vals = eccodes.codes_get_values(self._handle) + vals = VALUE_ACCESSOR.get(self._handle, dtype=dtype) if self.get_long("bitmapPresent"): vals[vals == CodesHandle.MISSING_VALUE] = np.nan return vals - def get_latitudes(self): - return self.get("latitudes") + def get_latitudes(self, dtype=None): + return LATITUDE_ACCESSOR.get(self._handle, dtype=dtype) - def get_longitudes(self): - return self.get("longitudes") + def get_longitudes(self, dtype=None): + return LONGITUDE_ACCESSOR.get(self._handle, dtype=dtype) def get_data_points(self): return eccodes.codes_grib_get_data(self._handle) @@ -214,10 +264,13 @@ def handle(self): self._handle = GribCodesReader.from_cache(self.path).at_offset(self._offset) return self._handle - @property - def values(self): - r"""ndarray: Gets the values stored in the GRIB field as a 1D ndarray.""" - return self.handle.get_values() + def _values(self, dtype=None): + return self.handle.get_values(dtype=dtype) + + # @property + # def values(self): + # r"""ndarray: Gets the values stored in the GRIB field as a 1D ndarray.""" + # return self.handle.get_values() @property def offset(self): diff --git a/earthkit/data/readers/grib/index/__init__.py b/earthkit/data/readers/grib/index/__init__.py index ebe05fbc..4b7cf6c7 100644 --- a/earthkit/data/readers/grib/index/__init__.py +++ b/earthkit/data/readers/grib/index/__init__.py @@ -193,14 +193,12 @@ def __init__(self, *args, **kwargs): class GribFieldListInFiles(GribFieldList): # Remote FieldLists (with urls) are also here, # as the actual fieldlist is accessed on a file in cache. - # This class changes the interface (_getitem__ and __len__) + # This class changes the interface (__getitem__ and __len__) # into the interface (part and number_of_parts). - def __getitem__(self, n): + def _getitem(self, n): if isinstance(n, int): part = self.part(n if n >= 0 else len(self) + n) return GribField(part.path, part.offset, part.length) - else: - return super().__getitem__(n) def __len__(self): return self.number_of_parts() diff --git a/earthkit/data/readers/grib/metadata.py b/earthkit/data/readers/grib/metadata.py index 1b5520ac..14411502 100644 --- a/earthkit/data/readers/grib/metadata.py +++ b/earthkit/data/readers/grib/metadata.py @@ -24,25 +24,25 @@ class GribFieldGeography(Geography): def __init__(self, metadata): self.metadata = metadata - def latitudes(self): + def latitudes(self, dtype=None): r"""Return the latitudes of the field. Returns ------- ndarray """ - return self.metadata._handle.get_latitudes() + return self.metadata._handle.get_latitudes(dtype=dtype) - def longitudes(self): + def longitudes(self, dtype=None): r"""Return the longitudes of the field. Returns ------- ndarray """ - return self.metadata._handle.get_longitudes() + return self.metadata._handle.get_longitudes(dtype=dtype) - def x(self): + def x(self, dtype=None): r"""Return the x coordinates in the field's original CRS. Returns @@ -51,9 +51,9 @@ def x(self): """ grid_type = self.metadata.get("gridType", None) if grid_type in ["regular_ll", "reduced_gg", "regular_gg"]: - return self.longitudes() + return self.longitudes(dtype=dtype) - def y(self): + def y(self, dtype=None): r"""Return the y coordinates in the field's original CRS. Returns @@ -62,7 +62,7 @@ def y(self): """ grid_type = self.metadata.get("gridType", None) if grid_type in ["regular_ll", "reduced_gg", "regular_gg"]: - return self.latitudes() + return self.latitudes(dtype=dtype) def shape(self): r"""Get the shape of the field. diff --git a/earthkit/data/readers/netcdf.py b/earthkit/data/readers/netcdf.py index a4bac3fb..a94b64fa 100644 --- a/earthkit/data/readers/netcdf.py +++ b/earthkit/data/readers/netcdf.py @@ -144,20 +144,136 @@ def bbox(self, variable): return self._bbox[(lat, lon)] -class NetCDFFieldGeography(Geography): +def get_fields_from_ds( + ds, + field_type=None, + check_only=False, +): # noqa C901 + # Select only geographical variables + has_lat = False + has_lon = False + + fields = [] + + skip = set() + + for name in ds.data_vars: + v = ds[name] + skip.update(getattr(v, "coordinates", "").split(" ")) + skip.update(getattr(v, "bounds", "").split(" ")) + skip.update(getattr(v, "grid_mapping", "").split(" ")) + + for name in ds.data_vars: + if name in skip: + continue + + v = ds[name] + + coordinates = [] + + # self.log.info('Scanning file: %s var=%s coords=%s', self.path, name, v.coords) + + info = [value for value in v.coords if value not in v.dims] + non_dim_coords = {} + for coord in v.coords: + if coord not in v.dims: + non_dim_coords[coord] = ds[coord].values + continue + + c = ds[coord] + + # self.log.info("COORD %s %s %s %s", coord, type(coord), hasattr(c, 'calendar'), c) + + standard_name = getattr(c, "standard_name", "") + axis = getattr(c, "axis", "") + long_name = getattr(c, "long_name", "") + + use = False + + if ( + standard_name.lower() in GEOGRAPHIC_COORDS["x"] + or (long_name == "longitude") + or (axis == "X") + ): + has_lon = True + use = True + + if ( + standard_name.lower() in GEOGRAPHIC_COORDS["y"] + or (long_name == "latitude") + or (axis == "Y") + ): + has_lat = True + use = True + + # print(f" standard_name={standard_name}") + + # Of course, not every one sets the standard_name + if ( + standard_name in ["time", "forecast_reference_time"] + or long_name in ["time"] + or axis == "T" + ): + # we might not be able to convert time to datetime + try: + coordinates.append(TimeCoordinate(c, coord in info)) + use = True + except ValueError: + break + + # TODO: Support other level types + if standard_name in [ + "air_pressure", + "model_level_number", + "altitude", + ] or long_name in [ + "pressure_level" + ]: # or axis == 'Z': + coordinates.append(LevelCoordinate(c, coord in info)) + use = True + + if axis in ("X", "Y"): + use = True + + if not use: + coordinates.append(OtherCoordinate(c, coord in info)) + + if not (has_lat and has_lon): + # self.log.info("NetCDFReader: skip %s (Not a 2 field)", name) + continue + + for values in product(*[c.values for c in coordinates]): + slices = [] + for value, coordinate in zip(values, coordinates): + slices.append(coordinate.make_slice(value)) + + if check_only: + return True + + fields.append(field_type(ds, name, slices, non_dim_coords)) + + # if not fields: + # raise Exception("NetCDFReader no 2D fields found in %s" % (self.path,)) + + if check_only: + return False + return fields + + +class XArrayFieldGeography(Geography): def __init__(self, metadata, da, ds, variable): self.metadata = metadata self._da = da self._ds = ds self.north, self.west, self.south, self.east = self._ds.bbox(variable) - def latitudes(self): - return self.x() + def latitudes(self, dtype=None): + return self.x(dtype=dtype) - def longitudes(self): - return self.y() + def longitudes(self, dtype=None): + return self.y(dtype=dtype) - def _get_xy(self, axis, flatten=False): + def _get_xy(self, axis, flatten=False, dtype=None): if axis not in ("x", "y"): raise ValueError(f"Invalid axis={axis}") @@ -177,13 +293,16 @@ def _get_xy(self, axis, flatten=False): points["x"], points["y"] = np.meshgrid(points["x"], points["y"]) if flatten: points[axis] = points[axis].flatten() - return points[axis] + if dtype is not None: + return points[axis].astype(dtype) + else: + return points[axis] - def x(self): - return self._get_xy("x", flatten=True) + def x(self, dtype=None): + return self._get_xy("x", flatten=True, dtype=dtype) - def y(self): - return self._get_xy("y", flatten=True) + def y(self, dtype=None): + return self._get_xy("y", flatten=True, dtype=dtype) def shape(self): return self._da.shape[-2:] @@ -209,11 +328,11 @@ def _grid_mapping(self): return grid_mapping -class NetCDFMetadata(RawMetadata): +class XArrayMetadata(RawMetadata): def __init__(self, field): - if not isinstance(field, NetCDFField): + if not isinstance(field, XArrayField): raise TypeError( - f"NetCDFMetadata: expected field type NetCDFField, got {type(field)}" + f"XArrayMetadata: expected field type XArrayField, got {type(field)}" ) self._field = field self._geo = None @@ -233,7 +352,7 @@ def override(self, *args, **kwargs): @property def geography(self): if self._geo is None: - self._geo = NetCDFFieldGeography( + self._geo = XArrayFieldGeography( self, self._field._da, self._field._ds, self._field.variable ) return self._geo @@ -247,10 +366,9 @@ def ls_keys(self): return LS_KEYS -class NetCDFField(Field): +class XArrayField(Field): def __init__(self, ds, variable, slices, non_dim_coords): super().__init__() - self._ds = ds self._da = ds[variable] @@ -288,13 +406,13 @@ def __init__(self, ds, variable, slices, non_dim_coords): def __repr__(self): return ( - f"NetCDFField({self.variable}," + f"{self.__class__.__name__}({self.variable}," + ",".join([f"{s.name}={s.value}" for s in self.slices]) + ")" ) def _make_metadata(self): - return NetCDFMetadata(self) + return XArrayMetadata(self) def to_xarray(self): dims = self._da.dims @@ -311,8 +429,7 @@ def to_pandas(self): def _to_numpy(self): return self.to_xarray().to_numpy() - @property - def values(self): + def _values(self, dtype=None): return self._to_numpy().flatten() def to_numpy(self, flatten=False, dtype=None): @@ -327,9 +444,11 @@ def to_numpy(self, flatten=False, dtype=None): return values -class NetCDFFieldList(FieldList): - def __init__(self, path, *args, **kwargs): - self.path = path +class XArrayFieldListCore(FieldList): + FIELD_TYPE = None + + def __init__(self, ds, *args, **kwargs): + self.ds = ds self._fields = None Index.__init__(self, *args, **kwargs) @@ -339,123 +458,114 @@ def fields(self): self._scan() return self._fields + def has_fields(self): + if self._fields is None: + return get_fields_from_ds( + DataSet(self.ds), field_type=self.FIELD_TYPE, check_only=True + ) + else: + return len(self._fields) + def _scan(self): if self._fields is None: self._fields = self._get_fields() def _get_fields(self): - import xarray as xr + return get_fields_from_ds(DataSet(self.ds), field_type=self.FIELD_TYPE) - with closing( - xr.open_mfdataset(self.path, combine="by_coords") - ) as ds: # or nested - return self._get_fields_from_ds(DataSet(ds)) + def to_pandas(self): + return self.to_xarray().to_pandas() + + def to_xarray(self, **kwargs): + return self.ds + + def to_netcdf(self, *args, **kwargs): + """ + Save the data to a netCDF file. - def _get_fields_from_ds(self, ds): # noqa C901 - # Select only geographical variables - has_lat = False - has_lon = False + Parameters + ---------- + See `xarray.DataArray.to_netcdf`. + """ + return self.ds.to_netcdf(*args, **kwargs) - fields = [] + @classmethod + def merge(cls, sources): + assert all(isinstance(_, XArrayFieldList) for _ in sources) + return XArrayMultiFieldList(sources) - skip = set() + @classmethod + def new_mask_index(cls, *args, **kwargs): + return XArrayMaskFieldList(*args, **kwargs) - for name in ds.data_vars: - v = ds[name] - skip.update(getattr(v, "coordinates", "").split(" ")) - skip.update(getattr(v, "bounds", "").split(" ")) - skip.update(getattr(v, "grid_mapping", "").split(" ")) - for name in ds.data_vars: - if name in skip: - continue +class XArrayFieldList(XArrayFieldListCore): + VERSION = 1 - v = ds[name] + def __init__(self, ds, **kwargs): + self.FIELD_TYPE = XArrayField + super().__init__(ds, **kwargs) - coordinates = [] + def _getitem(self, n): + if isinstance(n, int): + return self.fields[n] - # self.log.info('Scanning file: %s var=%s coords=%s', self.path, name, v.coords) + def __len__(self): + return len(self.fields) - info = [value for value in v.coords if value not in v.dims] - non_dim_coords = {} - for coord in v.coords: - if coord not in v.dims: - non_dim_coords[coord] = ds[coord].values - continue - c = ds[coord] +class XArrayMaskFieldList(XArrayFieldListCore, MaskIndex): + def __init__(self, *args, **kwargs): + MaskIndex.__init__(self, *args, **kwargs) - # self.log.info("COORD %s %s %s %s", coord, type(coord), hasattr(c, 'calendar'), c) - standard_name = getattr(c, "standard_name", "") - axis = getattr(c, "axis", "") - long_name = getattr(c, "long_name", "") +class XArrayMultiFieldList(XArrayFieldListCore, MultiIndex): + def __init__(self, *args, **kwargs): + MultiIndex.__init__(self, *args, **kwargs) - use = False + def to_xarray(self, **kwargs): + import xarray as xr - if ( - standard_name.lower() in GEOGRAPHIC_COORDS["x"] - or (long_name == "longitude") - or (axis == "X") - ): - has_lon = True - use = True + return xr.merge([x.ds for x in self.indexes], **kwargs) - if ( - standard_name.lower() in GEOGRAPHIC_COORDS["y"] - or (long_name == "latitude") - or (axis == "Y") - ): - has_lat = True - use = True - # print(f" standard_name={standard_name}") - - # Of course, not every one sets the standard_name - if ( - standard_name in ["time", "forecast_reference_time"] - or long_name in ["time"] - or axis == "T" - ): - # we might not be able to convert time to datetime - try: - coordinates.append(TimeCoordinate(c, coord in info)) - use = True - except ValueError: - break +class NetCDFMetadata(XArrayMetadata): + pass - # TODO: Support other level types - if standard_name in [ - "air_pressure", - "model_level_number", - "altitude", - ] or long_name in [ - "pressure_level" - ]: # or axis == 'Z': - coordinates.append(LevelCoordinate(c, coord in info)) - use = True - if axis in ("X", "Y"): - use = True +class NetCDFField(XArrayField): + def _make_metadata(self): + return NetCDFMetadata(self) - if not use: - coordinates.append(OtherCoordinate(c, coord in info)) - if not (has_lat and has_lon): - # self.log.info("NetCDFReader: skip %s (Not a 2 field)", name) - continue +class NetCDFFieldList(XArrayFieldListCore): + FIELD_TYPE = NetCDFField - for values in product(*[c.values for c in coordinates]): - slices = [] - for value, coordinate in zip(values, coordinates): - slices.append(coordinate.make_slice(value)) + def __init__(self, path, *args, **kwargs): + self.path = path + # self._fields = None + super().__init__(None, *args, **kwargs) - fields.append(NetCDFField(ds, name, slices, non_dim_coords)) + def _get_fields(self): + import xarray as xr - # if not fields: - # raise Exception("NetCDFReader no 2D fields found in %s" % (self.path,)) + with closing( + xr.open_mfdataset(self.path, combine="by_coords") + ) as ds: # or nested + return get_fields_from_ds(DataSet(ds), field_type=self.FIELD_TYPE) - return fields + def has_fields(self): + if self._fields is None: + import xarray as xr + + with closing( + xr.open_mfdataset(self.path, combine="by_coords") + ) as ds: # or nested + return get_fields_from_ds( + DataSet(ds), field_type=self.FIELD_TYPE, check_only=True + ) + else: + return len(self._fields) @classmethod def merge(cls, sources): @@ -466,12 +576,19 @@ def merge(cls, sources): def new_mask_index(self, *args, **kwargs): return NetCDFMaskFieldList(*args, **kwargs) - def to_pandas(self): - return self.to_xarray().to_pandas() - def to_xarray(self, **kwargs): return type(self).to_xarray_multi_from_paths(self.path, **kwargs) + def to_netcdf(self, *args, **kwargs): + """ + Save the data to a netCDF file. + + Parameters + ---------- + See `xarray.DataArray.to_netcdf`. + """ + return self.to_xarray().to_netcdf(*args, **kwargs) + @classmethod def to_xarray_multi_from_paths(cls, paths, **kwargs): import xarray as xr @@ -487,6 +604,9 @@ def to_xarray_multi_from_paths(cls, paths, **kwargs): **options, ) + def write(self, *args, **kwargs): + return self.to_netcdf(*args, **kwargs) + class NetCDFFieldListInFiles(NetCDFFieldList): pass @@ -499,11 +619,9 @@ def __init__(self, path, **kwargs): assert isinstance(path, str), path super().__init__(path, **kwargs) - def __getitem__(self, n): + def _getitem(self, n): if isinstance(n, int): return self.fields[n] - else: - return super().__getitem__(n) def __len__(self): return len(self.fields) @@ -513,15 +631,24 @@ class NetCDFMaskFieldList(NetCDFFieldList, MaskIndex): def __init__(self, *args, **kwargs): MaskIndex.__init__(self, *args, **kwargs) + # TODO: Implement this, but discussion required + def to_xarray(self, *args, **kwargs): + self._not_implemented() + class NetCDFMultiFieldList(NetCDFFieldList, MultiIndex): def __init__(self, *args, **kwargs): MultiIndex.__init__(self, *args, **kwargs) def to_xarray(self, **kwargs): - return NetCDFFieldList.to_xarray_multi_from_paths( - [x.path for x in self.indexes], **kwargs - ) + try: + return NetCDFFieldList.to_xarray_multi_from_paths( + [x.path for x in self.indexes], **kwargs + ) + except AttributeError: + # TODO: Implement this, but discussion required + # This catches Multi-MaskFieldLists which cannot be openned in xarray + self._not_implemented() class NetCDFFieldListReader(NetCDFFieldListInOneFile, Reader): @@ -532,43 +659,14 @@ def __init__(self, source, path): def __repr__(self): return "NetCDFFieldListReader(%s)" % (self.path,) - # @classmethod - # def merge(cls, readers): - # assert all(isinstance(s, NetCDFFieldListReader) for s in readers), readers - # assert len(readers) > 1 - - # return NetCDFFieldListReader(readers[0], [s.path for s in readers]) - def mutate_source(self): # A NetCDFReader is a source itself return self - # def to_pandas(self): - # return self.to_xarray().to_pandas() - - # def to_xarray(self, **kwargs): - # return type(self).to_xarray_multi_from_paths(self.path, **kwargs) - - # @classmethod - # def to_xarray_multi_from_paths(cls, paths, **kwargs): - # import xarray as xr - - # if not isinstance(paths, list): - # paths = [paths] - - # options = dict() - # options.update(kwargs.get("xarray_open_mfdataset_kwargs", {})) - - # return xr.open_mfdataset( - # paths, - # **options, - # ) - class NetCDFReader(Reader): def __init__(self, source, path): Reader.__init__(self, source, path) - NetCDFFieldList.__init__(self, path) def __repr__(self): return "NetCDFReader(%s)" % (self.path,) @@ -610,8 +708,8 @@ def _match_magic(magic, deeper_check): def reader(source, path, magic=None, deeper_check=False): if _match_magic(magic, deeper_check): - r = NetCDFFieldListReader(source, path) - if len(r) > 0: - return r + fs = NetCDFFieldListReader(source, path) + if fs.has_fields(): + return fs else: return NetCDFReader(source, path) diff --git a/earthkit/data/sources/__init__.py b/earthkit/data/sources/__init__.py index d67f57d2..55f59820 100644 --- a/earthkit/data/sources/__init__.py +++ b/earthkit/data/sources/__init__.py @@ -44,6 +44,12 @@ def mutate(self): return self def ignore(self): + """Indicates to ignore this source in concatenation/merging. + + Returns + ------- + bool + """ # Used by multi-source return False diff --git a/earthkit/data/sources/cds.py b/earthkit/data/sources/cds.py index 778cd36a..aafec3ce 100644 --- a/earthkit/data/sources/cds.py +++ b/earthkit/data/sources/cds.py @@ -110,13 +110,6 @@ def retrieve(target, args): @normalize("date", "date-list(%Y-%m-%d)") @normalize("area", "bounding-box(list)") def requests(self, **kwargs): - # TODO: move these 5 lines into @normalize - if "year" in kwargs: - if "month" not in kwargs: - kwargs["month"] = [f"{i+1:02}" for i in range(0, 12)] - if "day" not in kwargs: - kwargs["day"] = [f"{i+1:02}" for i in range(0, 31)] - split_on = kwargs.pop("split_on", None) if split_on is None or not isinstance(kwargs.get(split_on), (list, tuple)): return [kwargs] diff --git a/earthkit/data/sources/dummy_source.py b/earthkit/data/sources/dummy_source.py index 3c826a6c..1a672b52 100644 --- a/earthkit/data/sources/dummy_source.py +++ b/earthkit/data/sources/dummy_source.py @@ -155,10 +155,16 @@ def generate_csv( separator=",", quote_strings=None, none_are_empty=True, + comment="#", + comment_line=None, **kwargs, ): assert none_are_empty + with open(target, "w") as f: + if comment_line is not None: + print(f"{comment} {comment_line}", file=f) + if headers: print(separator.join(headers), file=f) diff --git a/earthkit/data/sources/file.py b/earthkit/data/sources/file.py index 842007c4..cd80e48a 100644 --- a/earthkit/data/sources/file.py +++ b/earthkit/data/sources/file.py @@ -15,7 +15,6 @@ from earthkit.data import from_source from earthkit.data.core.caching import CACHE from earthkit.data.readers import reader -from earthkit.data.sources.file_indexed import FileIndexedSource from . import Source @@ -53,6 +52,8 @@ def mutate(self): # here we must have a file or a directory if self._kwargs.get("indexing", False): + from earthkit.data.sources.file_indexed import FileIndexedSource + kw = dict(self._kwargs) kw.pop("indexing", None) return FileIndexedSource(self.path, filter=filter, merger=self.merger, **kw) diff --git a/earthkit/data/sources/list_of_dicts.py b/earthkit/data/sources/list_of_dicts.py index b0b2be2d..3417d6c8 100644 --- a/earthkit/data/sources/list_of_dicts.py +++ b/earthkit/data/sources/list_of_dicts.py @@ -98,21 +98,29 @@ def ls_keys(self): def namespaces(self): return [] - def latitudes(self): - return self.get("latitudes") - - def longitudes(self): - return self.get("longitudes") - - def x(self): + def latitudes(self, dtype=None): + v = self.get("latitudes") + if dtype is None: + return v + else: + return v.astype(dtype) + + def longitudes(self, dtype=None): + v = self.get("longitudes") + if dtype is None: + return v + else: + return v.astype(dtype) + + def x(self, dtype=None): grid_type = self.get("gridType", None) if grid_type in ["regular_ll", "reduced_gg", "regular_gg"]: - return self.longitudes() + return self.longitudes(dtype=dtype) - def y(self): + def y(self, dtype=None): grid_type = self.get("gridType", None) if grid_type in ["regular_ll", "reduced_gg", "regular_gg"]: - return self.latitudes() + return self.latitudes(dtype=dtype) def _unique_grid_id(self): return self.get("md5GridSection", None) @@ -156,9 +164,12 @@ class VirtualGribField(Field): def __init__(self, d): super().__init__(metadata=VirtualGribMetadata(d)) - @property - def values(self): - return self._metadata["values"] + def _values(self, dtype=None): + v = self._metadata["values"] + if dtype is None: + return v + else: + return v.astype(dtype) def _make_metadata(self): pass diff --git a/earthkit/data/sources/numpy_list.py b/earthkit/data/sources/numpy_list.py index bbe5de70..3502a84d 100644 --- a/earthkit/data/sources/numpy_list.py +++ b/earthkit/data/sources/numpy_list.py @@ -14,6 +14,8 @@ from earthkit.data.core.fieldlist import Field, FieldList from earthkit.data.core.index import MaskIndex, MultiIndex from earthkit.data.core.metadata import Metadata +from earthkit.data.readers.grib.pandas import PandasMixIn +from earthkit.data.readers.grib.xarray import XarrayMixIn LOG = logging.getLogger(__name__) @@ -26,17 +28,22 @@ def __init__(self, array, metadata): def _make_metadata(self): pass - @property - def values(self): - return self._array + def _values(self, dtype=None): + if dtype is None: + return self._array + else: + return self._array.astype(dtype) + + def __repr__(self): + return f"{self.__class__.__name__}()" def write(self, f): from earthkit.data.writers import write - write(f, self.values, self._metadata, check_nans=False) + write(f, self.values, self._metadata, check_nans=True) -class NumpyFieldList(FieldList): +class NumpyFieldListCore(PandasMixIn, XarrayMixIn, FieldList): def __init__(self, array, metadata, *args, **kwargs): self._array = array self._metadata = metadata @@ -48,29 +55,39 @@ def __init__(self, array, metadata, *args, **kwargs): if not isinstance(md, Metadata): raise TypeError("metadata must be a subclass of MetaData") - if self._array.shape[0] != len(self._metadata): - import numpy as np - - # we have a single array and a single metadata - if len(self._metadata) == 1 and self._shape_match( - self._array.shape, self._metadata[0].geography.shape() - ): - self._array = np.array([self._array]) - else: + if isinstance(self._array, np.ndarray): + if self._array.shape[0] != len(self._metadata): + # we have a single array and a single metadata + if len(self._metadata) == 1 and self._shape_match( + self._array.shape, self._metadata[0].geography.shape() + ): + self._array = np.array([self._array]) + else: + raise ValueError( + ( + f"first array dimension ({self._array.shape[0]}) differs " + f"from number of metadata objects ({len(self._metadata)})" + ) + ) + elif isinstance(self._array, list): + if len(self._array) != len(self._metadata): raise ValueError( ( - f"first array dimension ({self._array.shape[0]}) differs " + f"array len ({len(self._array)}) differs " f"from number of metadata objects ({len(self._metadata)})" ) ) - super().__init__(*args, **kwargs) + for i, a in enumerate(self._array): + if not isinstance(a, np.ndarray): + raise ValueError( + f"All array element must be an ndarray. Type at position={i} is {type(a)}" + ) - def __getitem__(self, n): - return NumpyField(self._array[n], self._metadata[n]) + else: + raise TypeError("array must be an ndarray or a list of ndarrays") - def __len__(self): - return self._array.shape[0] + super().__init__(*args, **kwargs) def _shape_match(self, shape1, shape2): if shape1 == shape2: @@ -79,12 +96,69 @@ def _shape_match(self, shape1, shape2): return True return False + @classmethod + def new_mask_index(self, *args, **kwargs): + return NumpyMaskFieldList(*args, **kwargs) + + @classmethod + def merge(cls, sources): + assert all(isinstance(_, NumpyFieldListCore) for _ in sources) + merger = ListMerger(sources) + # merger = MultiUnwindMerger(sources) + return merger.to_fieldlist() + + def __repr__(self): + return f"{self.__class__.__name__}(fields={len(self)})" + + +class MultiUnwindMerger: + def __init__(self, sources): + self.sources = list(self._flatten(sources)) + + def _flatten(self, sources): + if isinstance(sources, NumpyMultiFieldList): + for s in sources.indexes: + yield from self._flatten(s) + elif isinstance(sources, list): + for s in sources: + yield from self._flatten(s) + else: + yield sources + + def to_fieldlist(self): + return NumpyMultiFieldList(self.sources) + + +class ListMerger: + def __init__(self, sources): + self.sources = sources + + def to_fieldlist(self): + array = [] + metadata = [] + for s in self.sources: + for f in s: + array.append(f._array) + metadata.append(f._metadata) + return NumpyFieldList(array, metadata) + + +class NumpyFieldList(NumpyFieldListCore): + def _getitem(self, n): + if isinstance(n, int): + return NumpyField(self._array[n], self._metadata[n]) + + def __len__(self): + return ( + len(self._array) if isinstance(self._array, list) else self._array.shape[0] + ) + -class NumpyMaskFieldList(NumpyFieldList, MaskIndex): +class NumpyMaskFieldList(NumpyFieldListCore, MaskIndex): def __init__(self, *args, **kwargs): MaskIndex.__init__(self, *args, **kwargs) -class NumpyMultiFieldList(NumpyFieldList, MultiIndex): +class NumpyMultiFieldList(NumpyFieldListCore, MultiIndex): def __init__(self, *args, **kwargs): MultiIndex.__init__(self, *args, **kwargs) diff --git a/earthkit/data/sources/polytope.py b/earthkit/data/sources/polytope.py new file mode 100644 index 00000000..2a03fc11 --- /dev/null +++ b/earthkit/data/sources/polytope.py @@ -0,0 +1,89 @@ +# (C) Copyright 2020 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +import logging + +from . import Source +from .multi_url import MultiUrl +from .prompt import APIKeyPrompt + +LOG = logging.getLogger(__name__) + + +class PolytopeWebKeyPrompt(APIKeyPrompt): + register_or_sign_in_url = ("",) + retrieve_api_key_url = ("",) + prompts = [ + dict( + name="user_email", + title="Your email", + ), + dict( + name="user_key", + example="b295aad8af30332fad2fa8c963ab7900", + title="API key", + hidden=True, + validate="[0-9a-z]{32}", + ), + ] + + rcfile = "~/.polytopeapirc" + + +class Polytope(Source): + """ + Retrieve data using the Polytope Web API. + See polytope-client.readthedocs.io for more information. + + Parameters + ---------- + dataset : str + The name of the dataset to query. + request: dict[str, str] + A collection of key : value pairs specifying the dataset. + + Examples + -------- + >>> src = earthkit.data.from_source("polytope", "ecmwf-mars", request) + >>> src.to_pandas() # if tabular data + >>> src.to_xarray() # if datacube + """ + + def __init__(self, dataset, request) -> None: + try: + import polytope + except ImportError: + raise ImportError( + "Polytope Web Client must be installed with 'pip install polytope-client'" + ) + + super().__init__() + assert isinstance(dataset, str) + + self.request = dict(dataset=dataset, request=request) + + credentials = PolytopeWebKeyPrompt().check(load=True) + self.client = polytope.api.Client(**credentials) + + def __repr__(self) -> str: + return f"{self.__class__.__name__}({self.request['dataset']}, {self.request['request']})" + + def mutate(self) -> Source: + pointers = self.client.retrieve( + self.request["dataset"], + self.request["request"], + pointer=True, + asynchronous=False, + ) + + urls = [p["location"] for p in pointers] + return MultiUrl(urls) + + +source = Polytope diff --git a/earthkit/data/sources/wekeo.py b/earthkit/data/sources/wekeo.py new file mode 100644 index 00000000..b4bddcf5 --- /dev/null +++ b/earthkit/data/sources/wekeo.py @@ -0,0 +1,160 @@ +# (C) Copyright 2020 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +import os + +import hda +import yaml +from hda.api import DataOrderRequest + +from earthkit.data.core.thread import SoftThreadPool +from earthkit.data.utils import tqdm + +from .file import FileSource +from .prompt import APIKeyPrompt + + +class HDAAPIKeyPrompt(APIKeyPrompt): + register_or_sign_in_url = "https://www.wekeo.eu" + retrieve_api_key_url = "https://www.wekeo.eu" + + prompts = [ + dict( + name="url", + default="https://wekeo-broker.apps.mercator.dpi.wekeo.eu/databroker", + title="API url", + validate=r"http.?://.*", + ), + dict( + name="user", + example="name", + title="User name", + hidden=False, + validate=r"[0-9a-z]+", + ), + dict( + name="password", + example="secretpassword", + title="Password", + hidden=True, + validate=r"[0-9A-z\!\@\#\$\%\&\*]{5,30}", + ), + ] + + rcfile = "~/.hdarc" + + def save(self, input, file): + yaml.dump(input, file, default_flow_style=False) + + +class ApiClient(hda.Client): + name = "wekeo" + + def __int__(self, *args, **kwargs): + super().__init__(self, *args, **kwargs) + + def retrieve(self, name, request, target=None): + matches = self.search(request["request"]) + out = [] + for result in matches.results: + query = {"jobId": matches.job_id, "uri": result["url"]} + url = DataOrderRequest(self).run(query) + out.append( + os.path.abspath( + self.stream( + result.get("filename"), result.get("size"), target, *url + ) + ) + ) + return out + + def download(self, download_dir: str = "."): + for result in self.results: + query = {"jobId": self.jobId, "uri": result["url"]} + self.debug(result) + url = DataOrderRequest(self.client).run(query) + self.stream(result.get("filename"), result.get("size"), download_dir, *url) + + +EXTENSIONS = { + "grib": ".grib", + "netcdf": ".nc", +} + + +class WekeoRetriever(FileSource): + sphinxdoc = """ + WekeoRetriever + """ + + @staticmethod + def client(): + prompt = HDAAPIKeyPrompt() + prompt.check() + + try: + return ApiClient() + except Exception as e: + if ".hdarc" in str(e): + prompt.ask_user_and_save() + return ApiClient() + raise + + def __init__(self, dataset, *args, **kwargs): + super().__init__() + + assert isinstance(dataset, str) + if len(args): + assert len(args) == 1 + assert isinstance(args[0], dict) + assert not kwargs + kwargs = args[0] + + requests = self.requests(**kwargs) + + self.client() # Trigger password prompt before thraeding + + nthreads = min(self.settings("number-of-download-threads"), len(requests)) + + if nthreads < 2: + self.path = [self._retrieve(dataset, r) for r in requests] + else: + with SoftThreadPool(nthreads=nthreads) as pool: + futures = [pool.submit(self._retrieve, dataset, r) for r in requests] + + iterator = (f.result() for f in futures) + self.path = list(tqdm(iterator, leave=True, total=len(requests))) + + def _retrieve(self, dataset, request): + def retrieve(target, args): + self.client().retrieve(args[0], args[1], target) + + return self.cache_file( + retrieve, + (dataset, request), + extension=EXTENSIONS.get(request.get("format"), ".cache"), + ) + + @staticmethod + def requests(**kwargs): + split_on = kwargs.pop("split_on", None) + if split_on is None or not isinstance(kwargs.get(split_on), (list, tuple)): + return [kwargs] + + result = [] + + for v in kwargs[split_on]: + r = dict(**kwargs) + r[split_on] = v + result.append(r) + + return result + + +source = WekeoRetriever diff --git a/earthkit/data/sources/wekeocds.py b/earthkit/data/sources/wekeocds.py new file mode 100644 index 00000000..d86b2caa --- /dev/null +++ b/earthkit/data/sources/wekeocds.py @@ -0,0 +1,131 @@ +# (C) Copyright 2020 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +import os + +from hda.api import DataOrderRequest + +from earthkit.data.core.thread import SoftThreadPool +from earthkit.data.decorators import normalize +from earthkit.data.utils import tqdm + +from .file import FileSource +from .wekeo import EXTENSIONS +from .wekeo import ApiClient as WekeoClient +from .wekeo import HDAAPIKeyPrompt + + +class ApiClient(WekeoClient): + name = "wekeocds" + + def __int__(self, *args, **kwargs): + super().__init__(self, *args, **kwargs) + + def retrieve(self, name, request, target=None): + rq = { + "datasetId": name, + "multiStringSelectValues": [ + { + "name": _name, + "value": _value if isinstance(_value, list) else [_value], + } + for _name, _value in request.items() + ], + } + if "area" in request: + rq.update({"boundingBoxValues": {"name": "area", "bbox": request["area"]}}) + matches = self.search(rq) + out = [] + for result in matches.results: + query = {"jobId": matches.job_id, "uri": result["url"]} + # matches.debug(result) + url = DataOrderRequest(self).run(query) + out.append( + self.stream(result.get("filename"), result.get("size"), target, *url) + ) + return [os.path.abspath(_) for _ in out] + + +class WekeoCdsRetriever(FileSource): + sphinxdoc = """ + WekeoCdsRetriever + """ + + @staticmethod + def client(): + prompt = HDAAPIKeyPrompt() + prompt.check() + try: + return ApiClient() + except Exception as e: + if ".hdarc" in str(e): + prompt.ask_user_and_save() + return ApiClient() + + raise + + def __init__(self, dataset, *args, **kwargs): + super().__init__() + + assert isinstance(dataset, str) + if len(args): + assert len(args) == 1 + assert isinstance(args[0], dict) + assert not kwargs + kwargs = args[0] + + requests = self.requests(**kwargs) + + self.client() # Trigger password prompt before thraeding + + nthreads = min(self.settings("number-of-download-threads"), len(requests)) + + if nthreads < 2: + self.path = [self._retrieve(dataset, r) for r in requests] + else: + with SoftThreadPool(nthreads=nthreads) as pool: + futures = [pool.submit(self._retrieve, dataset, r) for r in requests] + + iterator = (f.result() for f in futures) + self.path = list(tqdm(iterator, leave=True, total=len(requests))) + + def _retrieve(self, dataset, request): + def retrieve(target, args): + self.client().retrieve(args[0], args[1], target) + + return self.cache_file( + retrieve, + (dataset, request), + extension=EXTENSIONS.get(request.get("format"), ".cache"), + ) + + @normalize("date", "date-list(%Y-%m-%d)") + @normalize("area", "bounding-box(list)") + def requests(self, **kwargs): + if "year" in kwargs: + if "month" not in kwargs: + kwargs["month"] = [f"{i+1:02}" for i in range(0, 12)] + if "day" not in kwargs: + kwargs["day"] = [f"{i+1:02}" for i in range(0, 31)] + + split_on = kwargs.pop("split_on", None) + if split_on is None or not isinstance(kwargs.get(split_on), (list, tuple)): + return [kwargs] + + result = [] + + for v in kwargs[split_on]: + r = dict(**kwargs) + r[split_on] = v + result.append(r) + + return result + + +source = WekeoCdsRetriever diff --git a/earthkit/data/testing.py b/earthkit/data/testing.py index 376818f3..013f3ff9 100644 --- a/earthkit/data/testing.py +++ b/earthkit/data/testing.py @@ -14,7 +14,7 @@ from importlib import import_module from unittest.mock import patch -from earthkit.data import from_source +from earthkit.data import from_object, from_source from earthkit.data.readers.text import TextReader from earthkit.data.sources.empty import EmptySource @@ -80,6 +80,7 @@ def modules_installed(*modules): NO_MARS = not os.path.exists(os.path.expanduser("~/.ecmwfapirc")) NO_CDS = not os.path.exists(os.path.expanduser("~/.cdsapirc")) +NO_HDA = not os.path.exists(os.path.expanduser("~/.hdarc")) IN_GITHUB = os.environ.get("GITHUB_WORKFLOW") is not None try: import ecmwf.opendata # noqa @@ -96,6 +97,8 @@ def modules_installed(*modules): except Exception: NO_FDB = True +NO_POLYTOPE = not os.path.exists(os.path.expanduser("~/.polytopeapirc")) + def MISSING(*modules): return not modules_installed(*modules) @@ -134,6 +137,15 @@ def check_unsafe_archives(extension): check(ds) +def load_nc_or_xr_source(path, mode): + if mode == "nc": + return from_source("file", path) + else: + import xarray + + return from_object(xarray.open_dataset(path)) + + def main(path): import sys diff --git a/earthkit/data/wrappers/xarray.py b/earthkit/data/wrappers/xarray.py index 0cfa2022..189e7c6d 100644 --- a/earthkit/data/wrappers/xarray.py +++ b/earthkit/data/wrappers/xarray.py @@ -9,6 +9,7 @@ # from emohawk.metadata import AXES, COMPONENTS +from earthkit.data.readers import netcdf from earthkit.data.wrappers import Wrapper @@ -133,13 +134,20 @@ def to_numpy(self): def wrapper(data, *args, **kwargs): import xarray as xr + ds = None if isinstance(data, xr.Dataset): - return XArrayDatasetWrapper(data, *args, **kwargs) - - if isinstance(data, xr.DataArray): + ds = data + elif isinstance(data, xr.DataArray): try: - return XArrayDatasetWrapper(data.to_dataset(), *args, **kwargs) + ds = data.to_dataset() except ValueError: return XArrayDataArrayWrapper(data, *args, **kwargs) + if ds is not None: + fs = netcdf.XArrayFieldList(ds, **kwargs) + if fs.has_fields(): + return fs + else: + return XArrayDatasetWrapper(ds, *args, **kwargs) + return None diff --git a/earthkit/data/writers/grib.py b/earthkit/data/writers/grib.py index e4fc547f..9ca6b792 100644 --- a/earthkit/data/writers/grib.py +++ b/earthkit/data/writers/grib.py @@ -13,14 +13,13 @@ class GribWriter(Writer): METADATA_TYPE = "GribMetadata" - def write(self, f, values, metadata, check_nans=False): + def write(self, f, values, metadata, check_nans=True): handle = metadata._handle if check_nans: import numpy as np if np.isnan(values).any(): - # missing_value = np.finfo(values.dtype).max - missing_value = 9999 + missing_value = handle.MISSING_VALUE values = np.nan_to_num(values, nan=missing_value) handle.set_double("missingValue", missing_value) handle.set_long("bitmapPresent", 1) diff --git a/environment.yml b/environment.yml index 48bb9e4d..148c41a4 100644 --- a/environment.yml +++ b/environment.yml @@ -20,10 +20,12 @@ dependencies: - jupyterlab - ecmwf-api-client>=1.6.1 - cdsapi +- hda - pip: - git+https://github.com/ecmwf/multiurl - git+https://github.com/ecmwf/pyfdb - ecmwf-opendata>=0.1.2 + - polytope-client>=0.7.1 - tqdm - markdown - make @@ -35,8 +37,9 @@ dependencies: - pytest-cov - pytest-forked - pytest-timeout -- sphinx -- sphinx-autoapi +- sphinx>=7.2.6 +- pip: + - sphinx-autoapi>=3.0.0 - sphinx_rtd_theme - sphinxcontrib-apidoc - nbformat diff --git a/pytest.ini b/pytest.ini index c2cb6780..09ce5741 100644 --- a/pytest.ini +++ b/pytest.ini @@ -6,4 +6,5 @@ markers = ftp: test that used FTP. FTP is an old protocol and is not supported by most recent firewalls. notebook: testing notebooks can be slow. But needs to be performed to ensure that the documention is tested. no_cache_init: a test where the cache is not initialised. Must be run with --forked. + no_eccodes: a test which should pass when ecCodes is not installed testpaths = tests diff --git a/setup.cfg b/setup.cfg index ba175aad..15ec8d1c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -26,9 +26,11 @@ install_requires = eccodes>=1.5.0 ecmwf-api-client>=1.6.1 ecmwf-opendata>=0.1.2 + polytope-client>=0.7.1 dask entrypoints filelock + hda jinja2 markdown multiurl diff --git a/tests/core/test_cache.py b/tests/core/test_cache.py index 0da99685..949611ad 100644 --- a/tests/core/test_cache.py +++ b/tests/core/test_cache.py @@ -198,6 +198,69 @@ def __repr__(self): # the problem still occurs! +def test_cache_zip_file_overwritten_1(): + with temp_directory() as tmp_dir: + import shutil + import zipfile + + # copy input data to work dir + grb1_path = os.path.join(tmp_dir, "test.grib") + shutil.copyfile(earthkit_examples_file("test.grib"), grb1_path) + + grb2_path = os.path.join(tmp_dir, "test6.grib") + shutil.copyfile(earthkit_examples_file("test6.grib"), grb2_path) + + # first pass + zip_path = os.path.join(tmp_dir, "test.zip") + with zipfile.ZipFile(zip_path, "w") as zip_object: + zip_object.write(grb1_path) + + ds = from_source("file", zip_path) + assert len(ds) == 2 + ds_path = ds.path + + # second pass - same zip file, the grib should be read + # from the cache + ds1 = from_source("file", zip_path) + assert len(ds1) == 2 + assert ds1.path == ds_path + + # third pass - same zipfile path with different contents + with zipfile.ZipFile(zip_path, "w") as zip_object: + zip_object.write(grb2_path) + + ds2 = from_source("file", zip_path) + assert len(ds2) == 6 + assert ds2.path != ds_path + + +def test_cache_zip_file_changed_modtime(): + with temp_directory() as tmp_dir: + import shutil + import zipfile + + # copy input data to work dir + grb1_path = os.path.join(tmp_dir, "test.grib") + shutil.copyfile(earthkit_examples_file("test.grib"), grb1_path) + + # first pass + zip_path = os.path.join(tmp_dir, "test.zip") + with zipfile.ZipFile(zip_path, "w") as zip_object: + zip_object.write(grb1_path) + + ds = from_source("file", zip_path) + assert len(ds) == 2 + ds_path = ds.path + + # second pass - changed modtime + # TODO: here we have to assume more than 1 ns passed since the + # zip file was created. + os.utime(zip_path, None) + ds2 = from_source("file", zip_path) + assert len(ds2) == 2 + assert ds2.path != ds_path + + if __name__ == "__main__": from earthkit.data.testing import main diff --git a/tests/documentation/test_examples.py b/tests/documentation/test_examples.py index acd09ee3..e5d21121 100644 --- a/tests/documentation/test_examples.py +++ b/tests/documentation/test_examples.py @@ -20,7 +20,9 @@ "xml2rst.py", "actions.py", "generate-examples-maps.py", - "settings-2-set.py", + "settings-set.py", + "settings-reset.py", + "settings-temporary.py", "xref.py", ] diff --git a/tests/documentation/test_notebooks.py b/tests/documentation/test_notebooks.py index fcaba58e..2b4f9727 100644 --- a/tests/documentation/test_notebooks.py +++ b/tests/documentation/test_notebooks.py @@ -22,7 +22,14 @@ EXAMPLES = earthkit_file("docs", "examples") -SKIP = ["fdb.ipynb", "mars.ipynb", "cds.ipynb", "ads.ipynb"] +SKIP = [ + "fdb.ipynb", + "mars.ipynb", + "cds.ipynb", + "ads.ipynb", + "wekeo.ipynb", + "polytope.ipynb", +] def notebooks_list(): diff --git a/tests/environment-unit-tests.yml b/tests/environment-unit-tests.yml index 0822c067..8dde1b22 100644 --- a/tests/environment-unit-tests.yml +++ b/tests/environment-unit-tests.yml @@ -21,10 +21,12 @@ dependencies: - jupyterlab - ecmwf-api-client>=1.6.1 - cdsapi +- hda - pip: - git+https://github.com/ecmwf/multiurl - git+https://github.com/ecmwf/pyfdb - ecmwf-opendata>=0.1.2 + - polytope-client>=0.7.1 - tqdm - markdown - make diff --git a/tests/grib/grib_fixtures.py b/tests/grib/grib_fixtures.py new file mode 100644 index 00000000..c923cc52 --- /dev/null +++ b/tests/grib/grib_fixtures.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 + +# (C) Copyright 2020 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + + +from earthkit.data import from_source +from earthkit.data.core.fieldlist import FieldList +from earthkit.data.testing import earthkit_examples_file, earthkit_test_data_file + + +def load_numpy_fieldlist(path): + ds = from_source("file", path) + return FieldList.from_numpy( + ds.values, [m.override(generatingProcessIdentifier=120) for m in ds.metadata()] + ) + + +def load_file_or_numpy_fs(filename, mode, folder="example"): + if folder == "example": + path = earthkit_examples_file(filename) + elif folder == "data": + path = earthkit_test_data_file(filename) + else: + raise ValueError("Invalid folder={folder}") + + if mode == "file": + return from_source("file", path) + else: + return load_numpy_fieldlist(path) diff --git a/tests/grib/test_grib_concat.py b/tests/grib/test_grib_concat.py index 2c1630bf..01bd0260 100644 --- a/tests/grib/test_grib_concat.py +++ b/tests/grib/test_grib_concat.py @@ -9,19 +9,130 @@ # nor does it submit to any jurisdiction. # +import os + +import pytest + from earthkit.data import from_source +from earthkit.data.core.fieldlist import FieldList +from earthkit.data.core.temporary import temp_file from earthkit.data.testing import earthkit_examples_file -def test_grib_concat(): +def _check_save_to_disk(ds, len_ref, meta_ref): + # save to disk + tmp = temp_file() + ds.save(tmp.path) + assert os.path.exists(tmp.path) + r_tmp = from_source("file", tmp.path) + assert len(r_tmp) == len_ref + assert r_tmp.metadata("shortName") == meta_ref + r_tmp = None + + +@pytest.mark.parametrize("mode", ["oper", "multi"]) +def test_grib_concat(mode): ds1 = from_source("file", earthkit_examples_file("test.grib")) ds2 = from_source("file", earthkit_examples_file("test6.grib")) - ds = ds1 + ds2 + if mode == "oper": + ds = ds1 + ds2 + else: + ds = from_source("multi", ds1, ds2) + + # check metadata assert len(ds) == 8 md = ds1.metadata("param") + ds2.metadata("param") assert ds.metadata("param") == md + # check slice + r = ds[1] + assert r.metadata("param") == "msl" + + r = ds[1:3] + assert len(r) == 2 + assert r.metadata("param") == ["msl", "t"] + assert r[0].metadata("param") == "msl" + assert r[1].metadata("param") == "t" + + # check sel + r = ds.sel(param="2t") + assert len(r) == 1 + assert r.metadata("param") == ["2t"] + assert r[0].metadata("param") == "2t" + + # check saving to disk + _check_save_to_disk(ds, 8, md) + + +@pytest.mark.parametrize("mode", ["oper", "multi"]) +def test_grib_concat_3a(mode): + ds1 = from_source("file", earthkit_examples_file("test.grib")) + ds2 = from_source("file", earthkit_examples_file("test6.grib")) + ds3 = from_source("file", earthkit_examples_file("tuv_pl.grib")) + md = ds1.metadata("param") + ds2.metadata("param") + ds3.metadata("param") + + if mode == "oper": + ds = ds1 + ds2 + ds = ds + ds3 + else: + ds = from_source("multi", ds1, ds2) + ds = from_source("multi", ds, ds3) + + assert len(ds) == 26 + assert ds.metadata("param") == md + _check_save_to_disk(ds, 26, md) + + +@pytest.mark.parametrize("mode", ["oper", "multi"]) +def test_grib_concat_3b(mode): + ds1 = from_source("file", earthkit_examples_file("test.grib")) + ds2 = from_source("file", earthkit_examples_file("test6.grib")) + ds3 = from_source("file", earthkit_examples_file("tuv_pl.grib")) + md = ds1.metadata("param") + ds2.metadata("param") + ds3.metadata("param") + + if mode == "oper": + ds = ds1 + ds2 + ds3 + else: + ds = from_source("multi", ds1, ds2, ds3) + + assert len(ds) == 26 + assert ds.metadata("param") == md + _check_save_to_disk(ds, 26, md) + + +def test_grib_from_empty_1(): + ds_e = FieldList() + ds = from_source("file", earthkit_examples_file("test.grib")) + md = ds.metadata("param") + + ds1 = ds_e + ds + assert id(ds1) == id(ds) + assert len(ds1) == 2 + _check_save_to_disk(ds1, 2, md) + + +def test_grib_from_empty_2(): + ds_e = FieldList() + ds = from_source("file", earthkit_examples_file("test.grib")) + md = ds.metadata("param") + + ds1 = ds + ds_e + assert id(ds1) == id(ds) + assert len(ds1) == 2 + _check_save_to_disk(ds1, 2, md) + + +def test_grib_from_empty_3(): + ds_e = FieldList() + ds1 = from_source("file", earthkit_examples_file("test.grib")) + ds2 = from_source("file", earthkit_examples_file("test6.grib")) + md = ds1.metadata("param") + ds2.metadata("param") + + ds3 = ds_e + ds1 + ds2 + assert len(ds3) == 8 + _check_save_to_disk(ds3, 8, md) + if __name__ == "__main__": from earthkit.data.testing import main diff --git a/tests/grib/test_grib_convert.py b/tests/grib/test_grib_convert.py index dda1409f..251f3855 100644 --- a/tests/grib/test_grib_convert.py +++ b/tests/grib/test_grib_convert.py @@ -9,15 +9,22 @@ # nor does it submit to any jurisdiction. # +import os +import sys + import numpy as np +import pytest -from earthkit.data import from_source -from earthkit.data.testing import earthkit_test_data_file +here = os.path.dirname(__file__) +sys.path.insert(0, here) +from grib_fixtures import load_file_or_numpy_fs # noqa: E402 -def test_icon_to_xarray(): +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_icon_to_xarray(mode): # test the conversion to xarray for an icon (unstructured grid) grib file. - g = from_source("file", earthkit_test_data_file("test_icon.grib")) + g = load_file_or_numpy_fs("test_icon.grib", mode, folder="data") + ds = g.to_xarray() assert len(ds.data_vars) == 1 # Dataset contains 9 levels and 9 grid points per level @@ -26,8 +33,9 @@ def test_icon_to_xarray(): assert ds["pres"].sizes["values"] == 6 -def test_grib_to_pandas(): - f = from_source("file", earthkit_test_data_file("test_single.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_to_pandas(mode): + f = load_file_or_numpy_fs("test_single.grib", mode, folder="data") # all points df = f.to_pandas() @@ -60,3 +68,9 @@ def test_grib_to_pandas(): assert np.isclose(df["lat"][0], 90) assert np.isclose(df["lon"][0], 30) assert np.isclose(df["value"][0], 260.435608) + + +if __name__ == "__main__": + from earthkit.data.testing import main + + main() diff --git a/tests/grib/test_grib_geography.py b/tests/grib/test_grib_geography.py index 7d735f0c..d589a858 100644 --- a/tests/grib/test_grib_geography.py +++ b/tests/grib/test_grib_geography.py @@ -9,13 +9,18 @@ # nor does it submit to any jurisdiction. # +import os +import sys + import numpy as np import pytest -from earthkit.data import from_source -from earthkit.data.testing import earthkit_examples_file, earthkit_test_data_file from earthkit.data.utils import projections +here = os.path.dirname(__file__) +sys.path.insert(0, here) +from grib_fixtures import load_file_or_numpy_fs # noqa: E402 + def check_array(v, shape=None, first=None, last=None, meanv=None, eps=1e-3): assert v.shape == shape @@ -24,9 +29,10 @@ def check_array(v, shape=None, first=None, last=None, meanv=None, eps=1e-3): assert np.isclose(v.mean(), meanv, eps) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) @pytest.mark.parametrize("index", [0, None]) -def test_grib_to_latlon_single(index): - f = from_source("file", earthkit_test_data_file("test_single.grib")) +def test_grib_to_latlon_single(mode, index): + f = load_file_or_numpy_fs("test_single.grib", mode, folder="data") eps = 1e-5 g = f[index] if index is not None else f @@ -34,6 +40,8 @@ def test_grib_to_latlon_single(index): assert isinstance(v, dict) assert isinstance(v["lon"], np.ndarray) assert isinstance(v["lat"], np.ndarray) + assert v["lon"].dtype == np.float64 + assert v["lat"].dtype == np.float64 check_array( v["lon"], (84,), @@ -52,9 +60,10 @@ def test_grib_to_latlon_single(index): ) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) @pytest.mark.parametrize("index", [0, None]) -def test_grib_to_latlon_single_shape(index): - f = from_source("file", earthkit_test_data_file("test_single.grib")) +def test_grib_to_latlon_single_shape(mode, index): + f = load_file_or_numpy_fs("test_single.grib", mode, folder="data") g = f[index] if index is not None else f v = g.to_latlon() @@ -64,40 +73,48 @@ def test_grib_to_latlon_single_shape(index): # x assert v["lon"].shape == (7, 12) + assert v["lon"].dtype == np.float64 for x in v["lon"]: assert np.allclose(x, np.linspace(0, 330, 12)) # y assert v["lat"].shape == (7, 12) + assert v["lon"].dtype == np.float64 for i, y in enumerate(v["lat"]): assert np.allclose(y, np.ones(12) * (90 - i * 30)) -def test_grib_to_latlon_multi(): - f = from_source("file", earthkit_examples_file("test.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +def test_grib_to_latlon_multi(mode, dtype): + f = load_file_or_numpy_fs("test.grib", mode) - v_ref = f[0].to_latlon(flatten=True) - v = f.to_latlon(flatten=True) + v_ref = f[0].to_latlon(flatten=True, dtype=dtype) + v = f.to_latlon(flatten=True, dtype=dtype) assert isinstance(v, dict) assert v.keys() == v_ref.keys() assert isinstance(v, dict) assert np.allclose(v["lat"], v_ref["lat"]) assert np.allclose(v["lon"], v_ref["lon"]) + assert v["lat"].dtype == dtype + assert v["lon"].dtype == dtype -def test_grib_to_latlon_multi_non_shared_grid(): - f1 = from_source("file", earthkit_examples_file("test.grib")) - f2 = from_source("file", earthkit_examples_file("test4.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_to_latlon_multi_non_shared_grid(mode): + f1 = load_file_or_numpy_fs("test.grib", mode) + f2 = load_file_or_numpy_fs("test4.grib", mode) f = f1 + f2 with pytest.raises(ValueError): f.to_latlon() +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) @pytest.mark.parametrize("index", [0, None]) -def test_grib_to_points_single(index): - f = from_source("file", earthkit_test_data_file("test_single.grib")) +def test_grib_to_points_single(mode, index): + f = load_file_or_numpy_fs("test_single.grib", mode, folder="data") eps = 1e-5 g = f[index] if index is not None else f @@ -105,6 +122,8 @@ def test_grib_to_points_single(index): assert isinstance(v, dict) assert isinstance(v["x"], np.ndarray) assert isinstance(v["y"], np.ndarray) + assert v["x"].dtype == np.float64 + assert v["y"].dtype == np.float64 check_array( v["x"], (84,), @@ -123,45 +142,53 @@ def test_grib_to_points_single(index): ) -def test_grib_to_points_unsupported_grid(): - f = from_source("file", earthkit_test_data_file("mercator.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_to_points_unsupported_grid(mode): + f = load_file_or_numpy_fs("mercator.grib", mode, folder="data") with pytest.raises(ValueError): f[0].to_points() -def test_grib_to_points_multi(): - f = from_source("file", earthkit_examples_file("test.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +@pytest.mark.parametrize("dtype", [np.float32, np.float64]) +def test_grib_to_points_multi(mode, dtype): + f = load_file_or_numpy_fs("test.grib", mode) - v_ref = f[0].to_points(flatten=True) - v = f.to_points(flatten=True) + v_ref = f[0].to_points(flatten=True, dtype=dtype) + v = f.to_points(flatten=True, dtype=dtype) assert isinstance(v, dict) assert v.keys() == v_ref.keys() assert isinstance(v, dict) assert np.allclose(v["x"], v_ref["x"]) assert np.allclose(v["y"], v_ref["y"]) + assert v["x"].dtype == dtype + assert v["y"].dtype == dtype -def test_grib_to_points_multi_non_shared_grid(): - f1 = from_source("file", earthkit_examples_file("test.grib")) - f2 = from_source("file", earthkit_examples_file("test4.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_to_points_multi_non_shared_grid(mode): + f1 = load_file_or_numpy_fs("test.grib", mode) + f2 = load_file_or_numpy_fs("test4.grib", mode) f = f1 + f2 with pytest.raises(ValueError): f.to_points() -def test_bbox(): - ds = from_source("file", earthkit_examples_file("test.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_bbox(mode): + ds = load_file_or_numpy_fs("test.grib", mode) bb = ds.bounding_box() assert len(bb) == 2 for b in bb: assert b.as_tuple() == (73, -27, 33, 45) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) @pytest.mark.parametrize("index", [0, None]) -def test_grib_projection_ll(index): - f = from_source("file", earthkit_examples_file("test.grib")) +def test_grib_projection_ll(mode, index): + f = load_file_or_numpy_fs("test.grib", mode) if index is not None: g = f[index] @@ -172,8 +199,9 @@ def test_grib_projection_ll(index): ) -def test_grib_projection_mercator(): - f = from_source("file", earthkit_test_data_file("mercator.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_projection_mercator(mode): + f = load_file_or_numpy_fs("mercator.grib", mode, folder="data") projection = f[0].projection() assert isinstance(projection, projections.Mercator) assert projection.parameters == { diff --git a/tests/grib/test_grib_metadata.py b/tests/grib/test_grib_metadata.py index 7167c3e4..5c90786e 100644 --- a/tests/grib/test_grib_metadata.py +++ b/tests/grib/test_grib_metadata.py @@ -10,12 +10,18 @@ # import datetime +import os +import sys import numpy as np import pytest from earthkit.data import from_source -from earthkit.data.testing import earthkit_examples_file, earthkit_test_data_file +from earthkit.data.testing import earthkit_examples_file + +here = os.path.dirname(__file__) +sys.path.insert(0, here) +from grib_fixtures import load_file_or_numpy_fs # noqa: E402 def check_array(v, shape=None, first=None, last=None, meanv=None, eps=1e-3): @@ -29,6 +35,7 @@ def repeat_list_items(items, count): return sum([[x] * count for x in items], []) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) @pytest.mark.parametrize( "key,expected_value", [ @@ -46,14 +53,15 @@ def repeat_list_items(items, count): (("shortName", "level"), ("2t", 0)), ], ) -def test_grib_metadata_grib(key, expected_value): - f = from_source("file", earthkit_test_data_file("test_single.grib")) +def test_grib_metadata_grib(mode, key, expected_value): + f = load_file_or_numpy_fs("test_single.grib", mode, folder="data") sn = f.metadata(key) assert sn == [expected_value] sn = f[0].metadata(key) assert sn == expected_value +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) @pytest.mark.parametrize( "key,astype,expected_value", [ @@ -67,14 +75,15 @@ def test_grib_metadata_grib(key, expected_value): ("level", int, 0), ], ) -def test_grib_metadata_astype_1(key, astype, expected_value): - f = from_source("file", earthkit_test_data_file("test_single.grib")) +def test_grib_metadata_astype_1(mode, key, astype, expected_value): + f = load_file_or_numpy_fs("test_single.grib", mode, folder="data") sn = f.metadata(key, astype=astype) assert sn == [expected_value] sn = f[0].metadata(key, astype=astype) assert sn == expected_value +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) @pytest.mark.parametrize( "key,expected_value", [ @@ -86,12 +95,13 @@ def test_grib_metadata_astype_1(key, astype, expected_value): ("level:int", repeat_list_items([1000, 850, 700, 500, 400, 300], 3)), ], ) -def test_grib_metadata_18(key, expected_value): - f = from_source("file", earthkit_examples_file("tuv_pl.grib")) +def test_grib_metadata_18(mode, key, expected_value): + f = load_file_or_numpy_fs("tuv_pl.grib", mode) sn = f.metadata(key) assert sn == expected_value +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) @pytest.mark.parametrize( "key,astype,expected_value", [ @@ -109,12 +119,13 @@ def test_grib_metadata_18(key, expected_value): ), ], ) -def test_grib_metadata_astype_18(key, astype, expected_value): - f = from_source("file", earthkit_examples_file("tuv_pl.grib")) +def test_grib_metadata_astype_18(mode, key, astype, expected_value): + f = load_file_or_numpy_fs("tuv_pl.grib", mode) sn = f.metadata(key, astype=astype) assert sn == expected_value +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) @pytest.mark.parametrize( "key,expected_value", [ @@ -123,13 +134,14 @@ def test_grib_metadata_astype_18(key, astype, expected_value): ("max:float", 307.18560791015625), ], ) -def test_grib_metadata_double_1(key, expected_value): - f = from_source("file", earthkit_test_data_file("test_single.grib")) +def test_grib_metadata_double_1(mode, key, expected_value): + f = load_file_or_numpy_fs("test_single.grib", mode, folder="data") r = f.metadata(key) assert len(r) == 1 assert np.isclose(r[0], expected_value) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) @pytest.mark.parametrize( "key", [ @@ -138,8 +150,8 @@ def test_grib_metadata_double_1(key, expected_value): ("max:float"), ], ) -def test_grib_metadata_double_18(key): - f = from_source("file", earthkit_examples_file("tuv_pl.grib")) +def test_grib_metadata_double_18(mode, key): + f = load_file_or_numpy_fs("tuv_pl.grib", mode) ref = [ 320.5641784667969, @@ -166,6 +178,7 @@ def test_grib_metadata_double_18(key): np.testing.assert_allclose(r, ref, 0.001) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) @pytest.mark.parametrize( "key,astype", [ @@ -173,8 +186,8 @@ def test_grib_metadata_double_18(key): ("max", float), ], ) -def test_grib_metadata_double_astype_18(key, astype): - f = from_source("file", earthkit_examples_file("tuv_pl.grib")) +def test_grib_metadata_double_astype_18(mode, key, astype): + f = load_file_or_numpy_fs("tuv_pl.grib", mode) ref = [ 320.5641784667969, @@ -201,10 +214,12 @@ def test_grib_metadata_double_astype_18(key, astype): np.testing.assert_allclose(r, ref, 0.001) -def test_grib_get_long_array_1(): - f = from_source( - "file", earthkit_test_data_file("rgg_small_subarea_cellarea_ref.grib") +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_get_long_array_1(mode): + f = load_file_or_numpy_fs( + "rgg_small_subarea_cellarea_ref.grib", mode, folder="data" ) + assert len(f) == 1 pl = f.metadata("pl") assert len(pl) == 1 @@ -217,8 +232,10 @@ def test_grib_get_long_array_1(): assert pl[72] == 312 -def test_grib_get_double_array_values_1(): - f = from_source("file", earthkit_test_data_file("test_single.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_get_double_array_values_1(mode): + f = load_file_or_numpy_fs("test_single.grib", mode, folder="data") + v = f.metadata("values") assert len(v) == 1 v = v[0] @@ -234,8 +251,9 @@ def test_grib_get_double_array_values_1(): ) -def test_grib_get_double_array_values_18(): - f = from_source("file", earthkit_examples_file("tuv_pl.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_get_double_array_values_18(mode): + f = load_file_or_numpy_fs("tuv_pl.grib", mode) v = f.metadata("values") assert isinstance(v, list) assert len(v) == 18 @@ -263,8 +281,9 @@ def test_grib_get_double_array_values_18(): ) -def test_grib_get_double_array_1(): - f = from_source("file", earthkit_test_data_file("ml_data.grib"))[0] +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_get_double_array_1(mode): + f = load_file_or_numpy_fs("ml_data.grib", mode, folder="data")[0] # f is now a field! v = f.metadata("pv") assert isinstance(v, np.ndarray) @@ -275,8 +294,9 @@ def test_grib_get_double_array_1(): assert np.isclose(v[275], 1.0) -def test_grib_get_double_array_18(): - f = from_source("file", earthkit_test_data_file("ml_data.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_get_double_array_18(mode): + f = load_file_or_numpy_fs("ml_data.grib", mode, folder="data") v = f.metadata("pv") assert isinstance(v, list) assert len(v) == 36 @@ -291,8 +311,9 @@ def test_grib_get_double_array_18(): assert np.isclose(v[17][20], 316.4207458496094, eps) -def test_grib_metadata_type_qualifier(): - f = from_source("file", earthkit_examples_file("tuv_pl.grib"))[0:4] +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_metadata_type_qualifier(mode): + f = load_file_or_numpy_fs("tuv_pl.grib", mode)[0:4] # to str r = f.metadata("centre:s") @@ -329,8 +350,9 @@ def test_grib_metadata_type_qualifier(): assert all(isinstance(x, float) for x in r) -def test_grib_metadata_astype(): - f = from_source("file", earthkit_examples_file("tuv_pl.grib"))[0:4] +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_metadata_astype(mode): + f = load_file_or_numpy_fs("tuv_pl.grib", mode)[0:4] # to str r = f.metadata("centre", astype=None) @@ -362,8 +384,11 @@ def test_grib_metadata_astype(): f.metadata(["level", "cfVarName", "centre"], astype=(int, None)) -def test_grib_metadata_generic(): - f = from_source("file", earthkit_examples_file("tuv_pl.grib"))[0:4] +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_metadata_generic(mode): + f_full = load_file_or_numpy_fs("tuv_pl.grib", mode) + + f = f_full[0:4] sn = f.metadata("shortName") assert sn == ["t", "u", "v", "t"] @@ -377,8 +402,8 @@ def test_grib_metadata_generic(): assert lg == [(1000, "t"), (1000, "u"), (1000, "v"), (850, "t")] # single fieldlist - f = from_source("file", earthkit_examples_file("tuv_pl.grib")) - f = f.sel(count=[1]) + f = f_full + f = f.sel(param="t", level=1000) lg = f.metadata(["level", "cfVarName"]) assert lg == [[1000, "t"]] @@ -388,8 +413,9 @@ def test_grib_metadata_generic(): assert lg == [1000, "t"] -def test_grib_metadata_missing_value(): - f = from_source("file", earthkit_test_data_file("ml_data.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_metadata_missing_value(mode): + f = load_file_or_numpy_fs("ml_data.grib", mode, folder="data") with pytest.raises(KeyError): f[0].metadata("scaleFactorOfSecondFixedSurface") @@ -398,8 +424,9 @@ def test_grib_metadata_missing_value(): assert v is None -def test_grib_metadata_missing_key(): - f = from_source("file", earthkit_examples_file("test.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_metadata_missing_key(mode): + f = load_file_or_numpy_fs("test.grib", mode) with pytest.raises(KeyError): f[0].metadata("_badkey_") @@ -408,8 +435,9 @@ def test_grib_metadata_missing_key(): assert v == 0 -def test_grib_metadata_namespace(): - f = from_source("file", earthkit_examples_file("test6.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_metadata_namespace(mode): + f = load_file_or_numpy_fs("test6.grib", mode) r = f[0].metadata(namespace="vertical") ref = {"level": 1000, "typeOfLevel": "isobaricInhPa"} @@ -483,8 +511,9 @@ def test_grib_metadata_namespace(): assert "must be a str when key specified" in str(excinfo.value) -def test_grib_datetime(): - s = from_source("file", earthkit_examples_file("test.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_datetime(mode): + s = load_file_or_numpy_fs("test.grib", mode) ref = { "base_time": [datetime.datetime(2020, 5, 13, 12)], @@ -512,15 +541,17 @@ def test_grib_datetime(): assert s.datetime() == ref -def test_grib_valid_datetime(): - ds = from_source("file", earthkit_test_data_file("t_time_series.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_valid_datetime(mode): + ds = load_file_or_numpy_fs("t_time_series.grib", mode, folder="data") f = ds[4] assert f.metadata("valid_datetime") == datetime.datetime(2020, 12, 21, 18) -def test_message(): - f = from_source("file", earthkit_examples_file("test.grib")) +@pytest.mark.parametrize("mode", ["file"]) +def test_message(mode): + f = load_file_or_numpy_fs("test.grib", mode) v = f[0].message() assert len(v) == 526 assert v[:4] == b"GRIB" diff --git a/tests/grib/test_grib_order_by.py b/tests/grib/test_grib_order_by.py index a5a47474..414d2cbd 100644 --- a/tests/grib/test_grib_order_by.py +++ b/tests/grib/test_grib_order_by.py @@ -10,16 +10,22 @@ # import datetime +import os +import sys import pytest from earthkit.data import from_source -from earthkit.data.testing import earthkit_file + +here = os.path.dirname(__file__) +sys.path.insert(0, here) +from grib_fixtures import load_file_or_numpy_fs # noqa: E402 # @pytest.mark.skipif(("GITHUB_WORKFLOW" in os.environ) or True, reason="Not yet ready") -def test_grib_order_by_single_message(): - s = from_source("file", earthkit_file("tests/data/test_single.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_order_by_single_message(mode): + s = load_file_or_numpy_fs("test_single.grib", mode, folder="data") r = s.order_by("shortName") assert len(r) == 1 @@ -47,6 +53,7 @@ def __call__(self, x, y): return -1 +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) @pytest.mark.parametrize( "params,expected_meta", [ @@ -93,10 +100,11 @@ def __call__(self, x, y): ], ) def test_grib_order_by_single_file_( + mode, params, expected_meta, ): - f = from_source("file", earthkit_file("docs/examples/test6.grib")) + f = load_file_or_numpy_fs("test6.grib", mode) g = f.order_by(params) assert len(g) == len(f) @@ -105,6 +113,7 @@ def test_grib_order_by_single_file_( assert g.metadata(k) == v +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) @pytest.mark.parametrize( "params,expected_meta", [ @@ -133,9 +142,9 @@ def test_grib_order_by_single_file_( ), ], ) -def test_grib_order_by_multi_file(params, expected_meta): - f1 = from_source("file", earthkit_file("docs/examples/test4.grib")) - f2 = from_source("file", earthkit_file("docs/examples/test6.grib")) +def test_grib_order_by_multi_file(mode, params, expected_meta): + f1 = load_file_or_numpy_fs("test4.grib", mode) + f2 = load_file_or_numpy_fs("test6.grib", mode) f = from_source("multi", [f1, f2]) g = f.order_by(params) @@ -145,8 +154,9 @@ def test_grib_order_by_multi_file(params, expected_meta): assert g.metadata(k) == v -def test_grib_order_by_with_sel(): - f = from_source("file", earthkit_file("docs/examples/tuv_pl.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_order_by_with_sel(mode): + f = load_file_or_numpy_fs("tuv_pl.grib", mode) g = f.sel(level=500) assert len(g) == 3 @@ -161,8 +171,9 @@ def test_grib_order_by_with_sel(): assert r.metadata("shortName") == ["v", "u", "t"] -def test_grib_order_by_valid_datetime(): - f = from_source("file", earthkit_file("tests/data/t_time_series.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_order_by_valid_datetime(mode): + f = load_file_or_numpy_fs("t_time_series.grib", mode, folder="data") g = f.order_by(valid_datetime="descending") assert len(g) == 10 diff --git a/tests/grib/test_grib_sel.py b/tests/grib/test_grib_sel.py index f6c361b1..20690187 100644 --- a/tests/grib/test_grib_sel.py +++ b/tests/grib/test_grib_sel.py @@ -10,23 +10,31 @@ # import datetime +import os +import sys import numpy as np import pytest from earthkit.data import from_source -from earthkit.data.testing import earthkit_file +here = os.path.dirname(__file__) +sys.path.insert(0, here) +from grib_fixtures import load_file_or_numpy_fs # noqa: E402 # @pytest.mark.skipif(("GITHUB_WORKFLOW" in os.environ) or True, reason="Not yet ready") -def test_grib_sel_single_message(): - s = from_source("file", earthkit_file("tests/data/test_single.grib")) + + +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_sel_single_message(mode): + s = load_file_or_numpy_fs("test_single.grib", mode, folder="data") r = s.sel(shortName="2t") assert len(r) == 1 assert r[0].metadata("shortName") == "2t" +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) @pytest.mark.parametrize( "params,expected_meta,metadata_keys", [ @@ -54,8 +62,8 @@ def test_grib_sel_single_message(): ), ], ) -def test_grib_sel_single_file_1(params, expected_meta, metadata_keys): - f = from_source("file", earthkit_file("docs/examples/tuv_pl.grib")) +def test_grib_sel_single_file_1(mode, params, expected_meta, metadata_keys): + f = load_file_or_numpy_fs("tuv_pl.grib", mode) g = f.sel(**params) assert len(g) == len(expected_meta) @@ -68,8 +76,9 @@ def test_grib_sel_single_file_1(params, expected_meta, metadata_keys): return -def test_grib_sel_single_file_2(): - f = from_source("file", earthkit_file("tests/data/t_time_series.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_sel_single_file_2(mode): + f = load_file_or_numpy_fs("t_time_series.grib", mode, folder="data") g = f.sel(shortName=["t"], step=[3, 6]) assert len(g) == 2 @@ -88,8 +97,10 @@ def test_grib_sel_single_file_2(): ] -def test_grib_sel_single_file_as_dict(): - f = from_source("file", earthkit_file("docs/examples/tuv_pl.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_sel_single_file_as_dict(mode): + f = load_file_or_numpy_fs("tuv_pl.grib", mode) + g = f.sel({"shortName": "t", "level": [500, 700], "mars.type": "an"}) assert len(g) == 2 assert g.metadata(["shortName", "level:l", "mars.type"]) == [ @@ -98,6 +109,7 @@ def test_grib_sel_single_file_as_dict(): ] +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) @pytest.mark.parametrize( "param_id,level,expected_meta", [ @@ -109,8 +121,8 @@ def test_grib_sel_single_file_as_dict(): (131, (slice(510, 520)), []), ], ) -def test_grib_sel_slice_single_file(param_id, level, expected_meta): - f = from_source("file", earthkit_file("docs/examples/tuv_pl.grib")) +def test_grib_sel_slice_single_file(mode, param_id, level, expected_meta): + f = load_file_or_numpy_fs("tuv_pl.grib", mode) g = f.sel(paramId=param_id, level=level) assert len(g) == len(expected_meta) @@ -118,9 +130,10 @@ def test_grib_sel_slice_single_file(param_id, level, expected_meta): assert g.metadata(["paramId", "level"]) == expected_meta -def test_grib_sel_multi_file(): - f1 = from_source("file", earthkit_file("docs/examples/tuv_pl.grib")) - f2 = from_source("file", earthkit_file("tests/data/ml_data.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_sel_multi_file(mode): + f1 = load_file_or_numpy_fs("tuv_pl.grib", mode) + f2 = load_file_or_numpy_fs("ml_data.grib", mode, folder="data") f = from_source("multi", [f1, f2]) # single resulting field @@ -133,9 +146,11 @@ def test_grib_sel_multi_file(): assert np.allclose(d, np.zeros(len(d))) -def test_grib_sel_slice_multi_file(): - f1 = from_source("file", earthkit_file("docs/examples/tuv_pl.grib")) - f2 = from_source("file", earthkit_file("tests/data/ml_data.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_sel_slice_multi_file(mode): + f1 = load_file_or_numpy_fs("tuv_pl.grib", mode) + f2 = load_file_or_numpy_fs("ml_data.grib", mode, folder="data") + f = from_source("multi", [f1, f2]) g = f.sel(shortName="t", level=slice(56, 62)) @@ -146,9 +161,10 @@ def test_grib_sel_slice_multi_file(): ] -def test_grib_sel_date(): +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_sel_date(mode): # date and time - f = from_source("file", earthkit_file("tests/data/t_time_series.grib")) + f = load_file_or_numpy_fs("t_time_series.grib", mode, folder="data") g = f.sel(date=20201221, time=1200, step=9) # g = f.sel(date="20201221", time="12", step="9") @@ -163,8 +179,9 @@ def test_grib_sel_date(): assert g.metadata(ref_keys) == ref -def test_grib_sel_valid_datetime(): - f = from_source("file", earthkit_file("tests/data/t_time_series.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_sel_valid_datetime(mode): + f = load_file_or_numpy_fs("t_time_series.grib", mode, folder="data") g = f.sel(valid_datetime=datetime.datetime(2020, 12, 21, 21)) assert len(g) == 2 @@ -178,14 +195,16 @@ def test_grib_sel_valid_datetime(): assert g.metadata(ref_keys) == ref -def test_grib_isel_single_message(): - s = from_source("file", earthkit_file("tests/data/test_single.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_isel_single_message(mode): + s = load_file_or_numpy_fs("test_single.grib", mode, folder="data") r = s.isel(shortName=0) assert len(r) == 1 assert r[0].metadata("shortName") == "2t" +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) @pytest.mark.parametrize( "params,expected_meta,metadata_keys", [ @@ -222,8 +241,8 @@ def test_grib_isel_single_message(): ), ], ) -def test_grib_isel_single_file(params, expected_meta, metadata_keys): - f = from_source("file", earthkit_file("docs/examples/tuv_pl.grib")) +def test_grib_isel_single_file(mode, params, expected_meta, metadata_keys): + f = load_file_or_numpy_fs("tuv_pl.grib", mode) g = f.isel(**params) assert len(g) == len(expected_meta) @@ -235,6 +254,7 @@ def test_grib_isel_single_file(params, expected_meta, metadata_keys): assert g.metadata(keys) == expected_meta +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) @pytest.mark.parametrize( "param_id,level,expected_meta", [ @@ -246,8 +266,8 @@ def test_grib_isel_single_file(params, expected_meta, metadata_keys): (1, (slice(None, None, 2)), [[131, 850], [131, 500], [131, 300]]), ], ) -def test_grib_isel_slice_single_file(param_id, level, expected_meta): - f = from_source("file", earthkit_file("docs/examples/tuv_pl.grib")) +def test_grib_isel_slice_single_file(mode, param_id, level, expected_meta): + f = load_file_or_numpy_fs("tuv_pl.grib", mode) g = f.isel(paramId=param_id, level=level) assert len(g) == len(expected_meta) @@ -255,8 +275,9 @@ def test_grib_isel_slice_single_file(param_id, level, expected_meta): assert g.metadata(["paramId", "level"]) == expected_meta -def test_grib_isel_slice_invalid(): - f = from_source("file", earthkit_file("docs/examples/tuv_pl.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_isel_slice_invalid(mode): + f = load_file_or_numpy_fs("tuv_pl.grib", mode) with pytest.raises(IndexError): f.isel(level=500) @@ -265,9 +286,10 @@ def test_grib_isel_slice_invalid(): f.isel(level="a") -def test_grib_isel_multi_file(): - f1 = from_source("file", earthkit_file("docs/examples/tuv_pl.grib")) - f2 = from_source("file", earthkit_file("tests/data/ml_data.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_isel_multi_file(mode): + f1 = load_file_or_numpy_fs("tuv_pl.grib", mode) + f2 = load_file_or_numpy_fs("ml_data.grib", mode, folder="data") f = from_source("multi", [f1, f2]) # single resulting field @@ -280,9 +302,10 @@ def test_grib_isel_multi_file(): assert np.allclose(d, np.zeros(len(d))) -def test_grib_isel_slice_multi_file(): - f1 = from_source("file", earthkit_file("docs/examples/tuv_pl.grib")) - f2 = from_source("file", earthkit_file("tests/data/ml_data.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_isel_slice_multi_file(mode): + f1 = load_file_or_numpy_fs("tuv_pl.grib", mode) + f2 = load_file_or_numpy_fs("ml_data.grib", mode, folder="data") f = from_source("multi", [f1, f2]) g = f.isel(shortName=1, level=slice(20, 22)) diff --git a/tests/grib/test_grib_slice.py b/tests/grib/test_grib_slice.py index f2968591..0dbcbd5a 100644 --- a/tests/grib/test_grib_slice.py +++ b/tests/grib/test_grib_slice.py @@ -9,6 +9,8 @@ # nor does it submit to any jurisdiction. # +import os +import sys import numpy as np import pytest @@ -16,7 +18,12 @@ from earthkit.data import from_source from earthkit.data.testing import earthkit_examples_file +here = os.path.dirname(__file__) +sys.path.insert(0, here) +from grib_fixtures import load_file_or_numpy_fs # noqa: E402 + +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) @pytest.mark.parametrize( "index,expected_meta", [ @@ -27,8 +34,9 @@ (-5, ["u", 400]), ], ) -def test_grib_single_index(index, expected_meta): - f = from_source("file", earthkit_examples_file("tuv_pl.grib")) +def test_grib_single_index(mode, index, expected_meta): + f = load_file_or_numpy_fs("tuv_pl.grib", mode) + # f = from_source("file", earthkit_examples_file("tuv_pl.grib")) r = f[index] assert r.metadata(["shortName", "level"]) == expected_meta @@ -38,12 +46,14 @@ def test_grib_single_index(index, expected_meta): # assert np.isclose(v[1088], 304.5642, eps) -def test_grib_single_index_bad(): - f = from_source("file", earthkit_examples_file("tuv_pl.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_single_index_bad(mode): + f = load_file_or_numpy_fs("tuv_pl.grib", mode) with pytest.raises(IndexError): f[27] +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) @pytest.mark.parametrize( "indexes,expected_meta", [ @@ -55,8 +65,8 @@ def test_grib_single_index_bad(): (slice(14, None), [["v", 400], ["t", 300], ["u", 300], ["v", 300]]), ], ) -def test_grib_slice_single_file(indexes, expected_meta): - f = from_source("file", earthkit_examples_file("tuv_pl.grib")) +def test_grib_slice_single_file(mode, indexes, expected_meta): + f = load_file_or_numpy_fs("tuv_pl.grib", mode) r = f[indexes] assert len(r) == 4 assert r.metadata(["shortName", "level"]) == expected_meta @@ -91,12 +101,14 @@ def test_grib_slice_multi_file(indexes, expected_meta): assert f.metadata("shortName") == ["2t", "msl", "t", "z", "t", "z"] +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) @pytest.mark.parametrize( "indexes1,indexes2", [(np.array([1, 16, 5, 9]), np.array([1, 3])), ([1, 16, 5, 9], [1, 3])], ) -def test_grib_array_indexing(indexes1, indexes2): - f = from_source("file", earthkit_examples_file("tuv_pl.grib")) +def test_grib_array_indexing(mode, indexes1, indexes2): + f = load_file_or_numpy_fs("tuv_pl.grib", mode) + r = f[indexes1] assert len(r) == 4 assert r.metadata("shortName") == ["u", "u", "v", "t"] @@ -106,15 +118,17 @@ def test_grib_array_indexing(indexes1, indexes2): assert r1.metadata("shortName") == ["u", "t"] +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) @pytest.mark.parametrize("indexes", [(np.array([1, 19, 5, 9])), ([1, 19, 5, 9])]) -def test_grib_array_indexing_bad(indexes): - f = from_source("file", earthkit_examples_file("tuv_pl.grib")) +def test_grib_array_indexing_bad(mode, indexes): + f = load_file_or_numpy_fs("tuv_pl.grib", mode) with pytest.raises(IndexError): f[indexes] -def test_grib_fieldlist_iterator(): - g = from_source("file", earthkit_examples_file("tuv_pl.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_fieldlist_iterator(mode): + g = load_file_or_numpy_fs("tuv_pl.grib", mode) sn = g.metadata("shortName") assert len(sn) == 18 iter_sn = [f.metadata("shortName") for f in g] @@ -124,11 +138,12 @@ def test_grib_fieldlist_iterator(): assert iter_sn == sn -def test_grib_fieldlist_iterator_with_zip(): - # this tests something different with the iterator - this does not try to +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_fieldlist_iterator_with_zip(mode): + # test something different to the iterator - does not try to # 'go off the edge' of the fieldlist, because the length is determined by # the list of levels - g = from_source("file", earthkit_examples_file("tuv_pl.grib")) + g = load_file_or_numpy_fs("tuv_pl.grib", mode) ref_levs = g.metadata("level") assert len(ref_levs) == 18 levs1 = [] @@ -140,9 +155,10 @@ def test_grib_fieldlist_iterator_with_zip(): assert levs2 == ref_levs -def test_grib_fieldlist_iterator_with_zip_multiple(): +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_fieldlist_iterator_with_zip_multiple(mode): # same as test_fieldlist_iterator_with_zip() but multiple times - g = from_source("file", earthkit_examples_file("tuv_pl.grib")) + g = load_file_or_numpy_fs("tuv_pl.grib", mode) ref_levs = g.metadata("level") assert len(ref_levs) == 18 for i in range(2): @@ -155,8 +171,9 @@ def test_grib_fieldlist_iterator_with_zip_multiple(): assert levs2 == ref_levs, i -def test_grib_fieldlist_reverse_iterator(): - g = from_source("file", earthkit_examples_file("tuv_pl.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_fieldlist_reverse_iterator(mode): + g = load_file_or_numpy_fs("tuv_pl.grib", mode) sn = g.metadata("shortName") sn_reversed = list(reversed(sn)) assert sn_reversed[0] == "v" diff --git a/tests/grib/test_grib_summary.py b/tests/grib/test_grib_summary.py index 33261bb7..e32f09b0 100644 --- a/tests/grib/test_grib_summary.py +++ b/tests/grib/test_grib_summary.py @@ -8,14 +8,19 @@ # granted to it by virtue of its status as an intergovernmental organisation # nor does it submit to any jurisdiction. +import os +import sys + import pytest -from earthkit.data import from_source -from earthkit.data.testing import earthkit_examples_file +here = os.path.dirname(__file__) +sys.path.insert(0, here) +from grib_fixtures import load_file_or_numpy_fs # noqa: E402 -def test_grib_describe(): - f = from_source("file", earthkit_examples_file("tuv_pl.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_describe(mode): + f = load_file_or_numpy_fs("tuv_pl.grib", mode) # full contents df = f.describe() @@ -138,11 +143,12 @@ def test_grib_describe(): assert ref[0] == df[0].to_dict() -def test_grib_ls(): - f = from_source("file", earthkit_examples_file("tuv_pl.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_ls(mode): + f = load_file_or_numpy_fs("tuv_pl.grib", mode) # default keys - f1 = f.sel(count=[1, 2, 3, 4]) + f1 = f[0:4] df = f1.ls() ref = { @@ -171,7 +177,7 @@ def test_grib_ls(): assert ref == df.to_dict() # extra keys - f1 = f.sel(count=[1, 2]) + f1 = f[0:2] df = f1.ls(extra_keys=["paramId"]) ref = { @@ -191,8 +197,9 @@ def test_grib_ls(): assert ref == df.to_dict() -def test_grib_ls_keys(): - f = from_source("file", earthkit_examples_file("tuv_pl.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_ls_keys(mode): + f = load_file_or_numpy_fs("tuv_pl.grib", mode) # default keys # positive num (=head) @@ -216,8 +223,9 @@ def test_grib_ls_keys(): assert ref == df.to_dict() -def test_grib_ls_namespace(): - f = from_source("file", earthkit_examples_file("tuv_pl.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_ls_namespace(mode): + f = load_file_or_numpy_fs("tuv_pl.grib", mode) df = f.ls(n=2, namespace="vertical") ref = { @@ -236,8 +244,10 @@ def test_grib_ls_namespace(): assert ref == df.to_dict() -def test_grib_ls_invalid_num(): - f = from_source("file", earthkit_examples_file("tuv_pl.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_ls_invalid_num(mode): + f = load_file_or_numpy_fs("tuv_pl.grib", mode) + with pytest.raises(ValueError): f.ls(n=0) @@ -245,14 +255,16 @@ def test_grib_ls_invalid_num(): f.ls(0) -def test_grib_ls_invalid_arg(): - f = from_source("file", earthkit_examples_file("tuv_pl.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_ls_invalid_arg(mode): + f = load_file_or_numpy_fs("tuv_pl.grib", mode) with pytest.raises(TypeError): f.ls(invalid=1) -def test_grib_ls_num(): - f = from_source("file", earthkit_examples_file("tuv_pl.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_ls_num(mode): + f = load_file_or_numpy_fs("tuv_pl.grib", mode) # default keys @@ -297,8 +309,9 @@ def test_grib_ls_num(): assert ref == df.to_dict() -def test_grib_head_num(): - f = from_source("file", earthkit_examples_file("tuv_pl.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_head_num(mode): + f = load_file_or_numpy_fs("tuv_pl.grib", mode) # default keys df = f.head(n=2) @@ -321,8 +334,9 @@ def test_grib_head_num(): assert ref == df.to_dict() -def test_grib_tail_num(): - f = from_source("file", earthkit_examples_file("tuv_pl.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_tail_num(mode): + f = load_file_or_numpy_fs("tuv_pl.grib", mode) # default keys df = f.tail(n=2) @@ -345,8 +359,9 @@ def test_grib_tail_num(): assert ref == df.to_dict() -def test_grib_dump(): - f = from_source("file", earthkit_examples_file("test6.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_dump(mode): + f = load_file_or_numpy_fs("test6.grib", mode) namespaces = ( "default", diff --git a/tests/grib/test_grib_values.py b/tests/grib/test_grib_values.py index 3da1749c..e57fddd5 100644 --- a/tests/grib/test_grib_values.py +++ b/tests/grib/test_grib_values.py @@ -9,11 +9,15 @@ # nor does it submit to any jurisdiction. # +import os +import sys + import numpy as np import pytest -from earthkit.data import from_source -from earthkit.data.testing import earthkit_examples_file, earthkit_test_data_file +here = os.path.dirname(__file__) +sys.path.insert(0, here) +from grib_fixtures import load_file_or_numpy_fs # noqa: E402 def check_array(v, shape=None, first=None, last=None, meanv=None, eps=1e-3): @@ -23,14 +27,15 @@ def check_array(v, shape=None, first=None, last=None, meanv=None, eps=1e-3): assert np.isclose(v.mean(), meanv, eps) -def test_grib_values_1(): - f = from_source("file", earthkit_test_data_file("test_single.grib")) - +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_values_1(mode): + f = load_file_or_numpy_fs("test_single.grib", mode, folder="data") eps = 1e-5 # whole file v = f.values assert isinstance(v, np.ndarray) + assert v.dtype == np.float64 assert v.shape == (1, 84) v = v[0].flatten() check_array( @@ -49,14 +54,15 @@ def test_grib_values_1(): assert np.allclose(v, v1, eps) -def test_grib_values_18(): - f = from_source("file", earthkit_examples_file("tuv_pl.grib")) - +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_values_18(mode): + f = load_file_or_numpy_fs("tuv_pl.grib", mode) eps = 1e-5 # whole file v = f.values assert isinstance(v, np.ndarray) + assert v.dtype == np.float64 assert v.shape == (18, 84) vf = v[0].flatten() check_array( @@ -79,12 +85,14 @@ def test_grib_values_18(): ) -def test_grib_to_numpy_1(): - f = from_source("file", earthkit_test_data_file("test_single.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_to_numpy_1(mode): + f = load_file_or_numpy_fs("test_single.grib", mode, folder="data") eps = 1e-5 v = f.to_numpy() assert isinstance(v, np.ndarray) + assert v.dtype == np.float64 v = v[0].flatten() check_array( v, @@ -96,6 +104,7 @@ def test_grib_to_numpy_1(): ) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) @pytest.mark.parametrize( "first,options, expected_shape", [ @@ -107,8 +116,8 @@ def test_grib_to_numpy_1(): (True, {"flatten": False}, (7, 12)), ], ) -def test_grib_to_numpy_1_shape(first, options, expected_shape): - f = from_source("file", earthkit_test_data_file("test_single.grib")) +def test_grib_to_numpy_1_shape(mode, first, options, expected_shape): + f = load_file_or_numpy_fs("test_single.grib", mode, folder="data") v_ref = f[0].to_numpy().flatten() eps = 1e-5 @@ -116,19 +125,22 @@ def test_grib_to_numpy_1_shape(first, options, expected_shape): data = f[0] if first else f v1 = data.to_numpy(**options) assert isinstance(v1, np.ndarray) + assert v1.dtype == np.float64 assert v1.shape == expected_shape v1 = v1.flatten() assert np.allclose(v_ref, v1, eps) -def test_grib_to_numpy_18(): - f = from_source("file", earthkit_examples_file("tuv_pl.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_to_numpy_18(mode): + f = load_file_or_numpy_fs("tuv_pl.grib", mode) eps = 1e-5 # whole file v = f.to_numpy(flatten=True) assert isinstance(v, np.ndarray) + assert v.dtype == np.float64 assert v.shape == (18, 84) vf0 = v[0].flatten() check_array( @@ -151,6 +163,7 @@ def test_grib_to_numpy_18(): ) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) @pytest.mark.parametrize( "options, expected_shape", [ @@ -172,14 +185,15 @@ def test_grib_to_numpy_18(): ({"flatten": False}, (18, 7, 12)), ], ) -def test_grib_to_numpy_18_shape(options, expected_shape): - f = from_source("file", earthkit_examples_file("tuv_pl.grib")) +def test_grib_to_numpy_18_shape(mode, options, expected_shape): + f = load_file_or_numpy_fs("tuv_pl.grib", mode) eps = 1e-5 # whole file v = f.to_numpy() assert isinstance(v, np.ndarray) + assert v.dtype == np.float64 assert v.shape == (18, 7, 12) vf0 = f[0].to_numpy().flatten() assert vf0.shape == (84,) @@ -188,6 +202,7 @@ def test_grib_to_numpy_18_shape(options, expected_shape): v1 = f.to_numpy(**options) assert isinstance(v1, np.ndarray) + assert v1.dtype == np.float64 assert v1.shape == expected_shape vr = v1[0].flatten() assert np.allclose(vf0, vr, eps) @@ -195,9 +210,10 @@ def test_grib_to_numpy_18_shape(options, expected_shape): assert np.allclose(vf15, vr, eps) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) -def test_grib_to_numpy_1_dtype(dtype): - f = from_source("file", earthkit_test_data_file("test_single.grib")) +def test_grib_to_numpy_1_dtype(mode, dtype): + f = load_file_or_numpy_fs("test_single.grib", mode, folder="data") v = f[0].to_numpy(dtype=dtype) assert v.dtype == dtype @@ -206,9 +222,10 @@ def test_grib_to_numpy_1_dtype(dtype): assert v.dtype == dtype +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) -def test_grib_to_numpy_18_dtype(dtype): - f = from_source("file", earthkit_examples_file("tuv_pl.grib")) +def test_grib_to_numpy_18_dtype(mode, dtype): + f = load_file_or_numpy_fs("tuv_pl.grib", mode) v = f[0].to_numpy(dtype=dtype) assert v.dtype == dtype @@ -217,18 +234,28 @@ def test_grib_to_numpy_18_dtype(dtype): assert v.dtype == dtype +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) @pytest.mark.parametrize( - "kwarg,expected_shape", - [({}, (11, 19)), ({"flatten": True}, (209,)), ({"flatten": False}, (11, 19))], + "kwarg,expected_shape,expected_dtype", + [ + ({}, (11, 19), np.float64), + ({"flatten": True}, (209,), np.float64), + ({"flatten": True, "dtype": np.float32}, (209,), np.float32), + ({"flatten": True, "dtype": np.float64}, (209,), np.float64), + ({"flatten": False}, (11, 19), np.float64), + ({"flatten": False, "dtype": np.float32}, (11, 19), np.float32), + ({"flatten": False, "dtype": np.float64}, (11, 19), np.float64), + ], ) -def test_grib_field_data(kwarg, expected_shape): - ds = from_source("file", earthkit_examples_file("test.grib")) +def test_grib_field_data(mode, kwarg, expected_shape, expected_dtype): + ds = load_file_or_numpy_fs("test.grib", mode) latlon = ds[0].to_latlon(**kwarg) v = ds[0].to_numpy(**kwarg) d = ds[0].data(**kwarg) assert isinstance(d, np.ndarray) + assert d.dtype == expected_dtype assert len(d) == 3 assert d[0].shape == expected_shape assert np.allclose(d[0], latlon["lat"]) @@ -237,36 +264,42 @@ def test_grib_field_data(kwarg, expected_shape): d = ds[0].data(keys="lat", **kwarg) assert d.shape == expected_shape + assert d.dtype == expected_dtype assert np.allclose(d, latlon["lat"]) d = ds[0].data(keys="lon", **kwarg) assert d.shape == expected_shape + assert d.dtype == expected_dtype assert np.allclose(d, latlon["lon"]) d = ds[0].data(keys="value", **kwarg) assert d.shape == expected_shape + assert d.dtype == expected_dtype assert np.allclose(d, v) d = ds[0].data(keys=("value", "lon"), **kwarg) assert isinstance(d, np.ndarray) + assert d.dtype == expected_dtype assert len(d) == 2 assert np.allclose(d[0], v) assert np.allclose(d[1], latlon["lon"]) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) @pytest.mark.parametrize( - "kwarg,expected_shape", + "kwarg,expected_shape,expected_dtype", [ - ({}, (11, 19)), - ( - {"flatten": True}, - (209,), - ), - ({"flatten": False}, (11, 19)), + ({}, (11, 19), np.float64), + ({"flatten": True}, (209,), np.float64), + ({"flatten": True, "dtype": np.float32}, (209,), np.float32), + ({"flatten": True, "dtype": np.float64}, (209,), np.float64), + ({"flatten": False}, (11, 19), np.float64), + ({"flatten": False, "dtype": np.float32}, (11, 19), np.float32), + ({"flatten": False, "dtype": np.float64}, (11, 19), np.float64), ], ) -def test_grib_fieldlist_data(kwarg, expected_shape): - ds = from_source("file", earthkit_examples_file("test.grib")) +def test_grib_fieldlist_data(mode, kwarg, expected_shape, expected_dtype): + ds = load_file_or_numpy_fs("test.grib", mode) latlon = ds.to_latlon(**kwarg) v = ds.to_numpy(**kwarg) @@ -274,6 +307,7 @@ def test_grib_fieldlist_data(kwarg, expected_shape): d = ds.data(**kwarg) assert isinstance(d, np.ndarray) assert d.shape == tuple([4, *expected_shape]) + assert d.dtype == expected_dtype assert np.allclose(d[0], latlon["lat"]) assert np.allclose(d[1], latlon["lon"]) assert np.allclose(d[2], v[0]) @@ -281,26 +315,31 @@ def test_grib_fieldlist_data(kwarg, expected_shape): d = ds.data(keys="lat", **kwarg) assert d.shape == tuple([1, *expected_shape]) + assert d.dtype == expected_dtype assert np.allclose(d[0], latlon["lat"]) d = ds.data(keys="lon", **kwarg) assert d.shape == tuple([1, *expected_shape]) + assert d.dtype == expected_dtype assert np.allclose(d[0], latlon["lon"]) d = ds.data(keys="value", **kwarg) assert d.shape == tuple([2, *expected_shape]) + assert d.dtype == expected_dtype assert np.allclose(d, v) d = ds.data(keys=("value", "lon"), **kwarg) assert isinstance(d, np.ndarray) assert d.shape == tuple([3, *expected_shape]) + assert d.dtype == expected_dtype assert np.allclose(d[0], v[0]) assert np.allclose(d[1], v[1]) assert np.allclose(d[2], latlon["lon"]) -def test_grib_values_with_missing(): - f = from_source("file", earthkit_test_data_file("test_single_with_missing.grib")) +@pytest.mark.parametrize("mode", ["file", "numpy_fs"]) +def test_grib_values_with_missing(mode): + f = load_file_or_numpy_fs("test_single_with_missing.grib", mode, folder="data") v = f[0].values assert isinstance(v, np.ndarray) diff --git a/tests/indexing/indexing_fixtures.py b/tests/indexing/indexing_fixtures.py index 425ac478..383435a4 100644 --- a/tests/indexing/indexing_fixtures.py +++ b/tests/indexing/indexing_fixtures.py @@ -11,10 +11,8 @@ import os import shutil -import warnings from earthkit.data.core.temporary import temp_directory, temp_file -from earthkit.data.readers.grib.index import GribFieldList from earthkit.data.testing import ( earthkit_examples_file, earthkit_file, @@ -78,36 +76,6 @@ def list_of_dicts(): ] -class GribIndexFromDicts(GribFieldList): - def __init__(self, list_of_dicts, *args, **kwargs): - self.list_of_dicts = list_of_dicts - print(f"KWARGS={kwargs}") - super().__init__(*args, **kwargs) - - def __getitem__(self, n): - class _VirtualGribField(dict): - def metadata(_self, n, **kwargs): - try: - if n == "level": - n = "levelist" - if n == "shortName": - n = "param" - if n == "paramId": - n = "_param_id" - return _self[n] - except KeyError: - warnings.warn("Cannot find all metadata keys.") - - @property - def values(self, n): - return self["values"] - - return _VirtualGribField(self.list_of_dicts[n]) - - def __len__(self): - return len(self.list_of_dicts) - - def get_tmp_fixture(input_mode): tmp = { "directory": unique_grib_dir, diff --git a/tests/netcdf/test_netcdf_concat.py b/tests/netcdf/test_netcdf_concat.py index c2f8cabd..0b9776f7 100644 --- a/tests/netcdf/test_netcdf_concat.py +++ b/tests/netcdf/test_netcdf_concat.py @@ -11,13 +11,15 @@ import datetime -from earthkit.data import from_source -from earthkit.data.testing import earthkit_test_data_file +import pytest +from earthkit.data.testing import earthkit_test_data_file, load_nc_or_xr_source -def test_netcdf_concat(): - ds1 = from_source("file", earthkit_test_data_file("era5_2t_1.nc")) - ds2 = from_source("file", earthkit_test_data_file("era5_2t_2.nc")) + +@pytest.mark.parametrize("mode", ["nc", "xr"]) +def test_netcdf_concat(mode): + ds1 = load_nc_or_xr_source(earthkit_test_data_file("era5_2t_1.nc"), mode) + ds2 = load_nc_or_xr_source(earthkit_test_data_file("era5_2t_2.nc"), mode) ds = ds1 + ds2 assert len(ds) == 2 diff --git a/tests/netcdf/test_netcdf_geography.py b/tests/netcdf/test_netcdf_geography.py index 3ad16369..ee79ea9f 100644 --- a/tests/netcdf/test_netcdf_geography.py +++ b/tests/netcdf/test_netcdf_geography.py @@ -28,14 +28,20 @@ def check_array(v, shape=None, first=None, last=None, meanv=None, eps=1e-3): assert np.isclose(v.mean(), meanv, eps) -def test_netcdf_to_points_1(): +@pytest.mark.parametrize( + "dtype,expected_dtype", + [(None, np.float64), (np.float32, np.float32), (np.float64, np.float64)], +) +def test_netcdf_to_points_1(dtype, expected_dtype): ds = from_source("file", earthkit_test_data_file("test_single.nc")) eps = 1e-5 - v = ds[0].to_points(flatten=True) + v = ds[0].to_points(flatten=True, dtype=dtype) assert isinstance(v, dict) assert isinstance(v["x"], np.ndarray) assert isinstance(v["y"], np.ndarray) + assert v["x"].dtype == expected_dtype + assert v["y"].dtype == expected_dtype check_array( v["x"], (84,), diff --git a/tests/netcdf/test_netcdf_metadata.py b/tests/netcdf/test_netcdf_metadata.py index f604c7c3..60c206d2 100644 --- a/tests/netcdf/test_netcdf_metadata.py +++ b/tests/netcdf/test_netcdf_metadata.py @@ -14,9 +14,10 @@ import pytest from earthkit.data import from_source -from earthkit.data.testing import earthkit_examples_file +from earthkit.data.testing import earthkit_examples_file, load_nc_or_xr_source +@pytest.mark.parametrize("mode", ["nc", "xr"]) @pytest.mark.parametrize( "key,expected_value", [ @@ -34,8 +35,9 @@ (("variable", "level"), ("t", 1000)), ], ) -def test_netcdf_metadata_single_field(key, expected_value): - f = from_source("file", earthkit_examples_file("tuv_pl.nc")) +def test_netcdf_metadata_single_field(mode, key, expected_value): + f = load_nc_or_xr_source(earthkit_examples_file("tuv_pl.nc"), mode) + # sn = f.metadata(key) # assert sn == [expected_value] sn = f[0].metadata(key) @@ -77,8 +79,9 @@ def test_netcdf_datetime(): assert ds.datetime() == ref -def test_netcdf_valid_datetime(): - ds = from_source("file", earthkit_examples_file("test.nc")) +@pytest.mark.parametrize("mode", ["nc", "xr"]) +def test_netcdf_valid_datetime(mode): + ds = load_nc_or_xr_source(earthkit_examples_file("test.nc"), mode) assert ds[0].metadata("valid_datetime") == datetime.datetime(2020, 5, 13, 12) diff --git a/tests/netcdf/test_netcdf_output.py b/tests/netcdf/test_netcdf_output.py new file mode 100644 index 00000000..bfa58beb --- /dev/null +++ b/tests/netcdf/test_netcdf_output.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 + +# (C) Copyright 2020 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +import os + +import pytest + +from earthkit.data import from_source +from earthkit.data.core.temporary import temp_file +from earthkit.data.testing import earthkit_examples_file + + +def test_netcdf_fieldlist_save(): + ds = from_source("file", earthkit_examples_file("test.nc")) + assert len(ds) == 2 + + tmp = temp_file() + ds.save(tmp.path) + assert os.path.exists(tmp.path) + r_tmp = from_source("file", tmp.path) + assert len(r_tmp) == 2 + + +def test_netcdf_fieldlist_subset_save(): + ds = from_source("file", earthkit_examples_file("test.nc")) + assert len(ds) == 2 + r = ds[1] + + tmp = temp_file() + with pytest.raises(NotImplementedError): + r.save(tmp.path) + + +def test_netcdf_fieldlist_multi_subset_save(): + ds1 = from_source("file", earthkit_examples_file("test.nc")) + ds2 = from_source("file", earthkit_examples_file("tuv_pl.nc")) + + ds = ds1 + ds2 + assert len(ds) == 20 + + tmp = temp_file() + ds.save(tmp.path) + assert os.path.exists(tmp.path) + r_tmp = from_source("file", tmp.path) + assert len(r_tmp) == 20 + + +def test_netcdf_fieldlist_multi_subset_save_bad(): + ds1 = from_source("file", earthkit_examples_file("test.nc")) + ds2 = from_source("file", earthkit_examples_file("tuv_pl.nc")) + + ds = ds1 + ds2[1:5] + assert len(ds) == 6 + + tmp = temp_file() + with pytest.raises(NotImplementedError): + ds.save(tmp.path) diff --git a/tests/netcdf/test_netcdf_sel.py b/tests/netcdf/test_netcdf_sel.py index f4112a91..e41c34e2 100644 --- a/tests/netcdf/test_netcdf_sel.py +++ b/tests/netcdf/test_netcdf_sel.py @@ -13,10 +13,10 @@ import pytest -from earthkit.data import from_source -from earthkit.data.testing import earthkit_examples_file +from earthkit.data.testing import earthkit_examples_file, load_nc_or_xr_source +@pytest.mark.parametrize("mode", ["nc", "xr"]) @pytest.mark.parametrize( "params,expected_meta,metadata_keys", [ @@ -47,8 +47,8 @@ ), ], ) -def test_netcdf_sel_single_file_1(params, expected_meta, metadata_keys): - f = from_source("file", earthkit_examples_file("tuv_pl.nc")) +def test_netcdf_sel_single_file_1(mode, params, expected_meta, metadata_keys): + f = load_nc_or_xr_source(earthkit_examples_file("tuv_pl.nc"), mode) g = f.sel(**params) assert len(g) == len(expected_meta) diff --git a/tests/netcdf/test_netcdf_slice.py b/tests/netcdf/test_netcdf_slice.py index e5d6f058..7ee5c86d 100644 --- a/tests/netcdf/test_netcdf_slice.py +++ b/tests/netcdf/test_netcdf_slice.py @@ -13,9 +13,10 @@ import pytest from earthkit.data import from_source -from earthkit.data.testing import earthkit_examples_file +from earthkit.data.testing import earthkit_examples_file, load_nc_or_xr_source +@pytest.mark.parametrize("mode", ["nc", "xr"]) @pytest.mark.parametrize( "index,expected_meta", [ @@ -26,8 +27,8 @@ (-5, ["v", 850]), ], ) -def test_netcdf_single_index(index, expected_meta): - f = from_source("file", earthkit_examples_file("tuv_pl.nc")) +def test_netcdf_single_index(mode, index, expected_meta): + f = load_nc_or_xr_source(earthkit_examples_file("tuv_pl.nc"), mode) r = f[index] assert r.metadata(["variable", "level"]) == expected_meta @@ -43,6 +44,7 @@ def test_netcdf_single_index_bad(): f[27] +@pytest.mark.parametrize("mode", ["nc", "xr"]) @pytest.mark.parametrize( "indexes,expected_meta", [ @@ -54,8 +56,8 @@ def test_netcdf_single_index_bad(): (slice(14, None), [["v", 700], ["v", 500], ["v", 400], ["v", 300]]), ], ) -def test_netcdf_slice_single_file(indexes, expected_meta): - f = from_source("file", earthkit_examples_file("tuv_pl.nc")) +def test_netcdf_slice_single_file(mode, indexes, expected_meta): + f = load_nc_or_xr_source(earthkit_examples_file("tuv_pl.nc"), mode) r = f[indexes] assert len(r) == 4 assert r.metadata(["variable", "level"]) == expected_meta @@ -91,12 +93,14 @@ def test_netcdf_slice_multi_file(indexes, expected_meta): assert f.metadata("shortName") == ["2t", "msl", "t", "z", "t", "z"] +@pytest.mark.parametrize("mode", ["nc", "xr"]) @pytest.mark.parametrize( "indexes1,indexes2", [(np.array([1, 16, 5, 9]), np.array([1, 3])), ([1, 16, 5, 9], [1, 3])], ) -def test_netcdf_array_indexing(indexes1, indexes2): - f = from_source("file", earthkit_examples_file("tuv_pl.nc")) +def test_netcdf_array_indexing(mode, indexes1, indexes2): + f = load_nc_or_xr_source(earthkit_examples_file("tuv_pl.nc"), mode) + r = f[indexes1] assert len(r) == 4 assert r.metadata("variable") == ["t", "v", "t", "u"] @@ -106,15 +110,17 @@ def test_netcdf_array_indexing(indexes1, indexes2): assert r1.metadata("variable") == ["v", "u"] +@pytest.mark.parametrize("mode", ["nc", "xr"]) @pytest.mark.parametrize("indexes", [(np.array([1, 19, 5, 9])), ([1, 19, 5, 9])]) -def test_netcdf_array_indexing_bad(indexes): - f = from_source("file", earthkit_examples_file("tuv_pl.nc")) +def test_netcdf_array_indexing_bad(mode, indexes): + f = load_nc_or_xr_source(earthkit_examples_file("tuv_pl.nc"), mode) with pytest.raises(IndexError): f[indexes] -def test_netcdf_fieldlist_iterator(): - g = from_source("file", earthkit_examples_file("tuv_pl.nc")) +@pytest.mark.parametrize("mode", ["nc", "xr"]) +def test_netcdf_fieldlist_iterator(mode): + g = load_nc_or_xr_source(earthkit_examples_file("tuv_pl.nc"), mode) sn = g.metadata("variable") assert len(sn) == 18 iter_sn = [f.metadata("variable") for f in g] @@ -155,8 +161,9 @@ def test_netcdf_fieldlist_iterator_with_zip_multiple(): assert levs2 == ref_levs, i -def test_netcdf_fieldlist_reverse_iterator(): - g = from_source("file", earthkit_examples_file("tuv_pl.nc")) +@pytest.mark.parametrize("mode", ["nc", "xr"]) +def test_netcdf_fieldlist_reverse_iterator(mode): + g = load_nc_or_xr_source(earthkit_examples_file("tuv_pl.nc"), mode) sn = g.metadata("variable") sn_reversed = list(reversed(sn)) assert sn_reversed[0] == "v" diff --git a/tests/netcdf/test_netcdf_summary.py b/tests/netcdf/test_netcdf_summary.py index c988fa8e..aef884bd 100644 --- a/tests/netcdf/test_netcdf_summary.py +++ b/tests/netcdf/test_netcdf_summary.py @@ -10,12 +10,14 @@ import datetime -from earthkit.data import from_source -from earthkit.data.testing import earthkit_examples_file +import pytest +from earthkit.data.testing import earthkit_examples_file, load_nc_or_xr_source -def test_netcdf_ls(): - f = from_source("file", earthkit_examples_file("tuv_pl.nc")) + +@pytest.mark.parametrize("mode", ["nc", "xr"]) +def test_netcdf_ls(mode): + f = load_nc_or_xr_source(earthkit_examples_file("tuv_pl.nc"), mode) # default keys f1 = f[:4] diff --git a/tests/netcdf/test_netcdf_values.py b/tests/netcdf/test_netcdf_values.py index a33772b7..7944b91d 100644 --- a/tests/netcdf/test_netcdf_values.py +++ b/tests/netcdf/test_netcdf_values.py @@ -12,8 +12,7 @@ import numpy as np import pytest -from earthkit.data import from_source -from earthkit.data.testing import earthkit_examples_file +from earthkit.data.testing import earthkit_examples_file, load_nc_or_xr_source def check_array(v, shape=None, first=None, last=None, meanv=None, eps=1e-3): @@ -23,8 +22,9 @@ def check_array(v, shape=None, first=None, last=None, meanv=None, eps=1e-3): assert np.isclose(v.mean(), meanv, eps) -def test_netcdf_values_surf(): - f = from_source("file", earthkit_examples_file("test.nc")) +@pytest.mark.parametrize("mode", ["nc", "xr"]) +def test_netcdf_values_surf(mode): + f = load_nc_or_xr_source(earthkit_examples_file("test.nc"), mode) eps = 1e-5 @@ -59,8 +59,9 @@ def test_netcdf_values_surf(): assert np.allclose(v0_f, v0, eps) -def test_netcdf_values_upper(): - f = from_source("file", earthkit_examples_file("tuv_pl.nc")) +@pytest.mark.parametrize("mode", ["nc", "xr"]) +def test_netcdf_values_upper(mode): + f = load_nc_or_xr_source(earthkit_examples_file("tuv_pl.nc"), mode) eps = 1e-5 @@ -89,8 +90,9 @@ def test_netcdf_values_upper(): ) -def test_netcdf_to_numpy_surf(): - f = from_source("file", earthkit_examples_file("test.nc")) +@pytest.mark.parametrize("mode", ["nc", "xr"]) +def test_netcdf_to_numpy_surf(mode): + f = load_nc_or_xr_source(earthkit_examples_file("test.nc"), mode) eps = 1e-5 v = f.to_numpy() @@ -124,6 +126,7 @@ def test_netcdf_to_numpy_surf(): assert np.allclose(v0_f.flatten(), v0, eps) +@pytest.mark.parametrize("mode", ["nc", "xr"]) @pytest.mark.parametrize( "first,options, expected_shape", [ @@ -135,8 +138,8 @@ def test_netcdf_to_numpy_surf(): (True, {"flatten": False}, (11, 19)), ], ) -def test_netcdf_to_numpy_surf_shape(first, options, expected_shape): - f = from_source("file", earthkit_examples_file("test.nc")) +def test_netcdf_to_numpy_surf_shape(mode, first, options, expected_shape): + f = load_nc_or_xr_source(earthkit_examples_file("test.nc"), mode) eps = 1e-5 @@ -154,6 +157,7 @@ def test_netcdf_to_numpy_surf_shape(first, options, expected_shape): assert np.allclose(v_ref, v1, eps) +@pytest.mark.parametrize("mode", ["nc", "xr"]) @pytest.mark.parametrize( "options, expected_shape", [ @@ -175,8 +179,8 @@ def test_netcdf_to_numpy_surf_shape(first, options, expected_shape): ({"flatten": False}, (18, 7, 12)), ], ) -def test_netcdf_to_numpy_upper_shape(options, expected_shape): - f = from_source("file", earthkit_examples_file("tuv_pl.nc")) +def test_netcdf_to_numpy_upper_shape(mode, options, expected_shape): + f = load_nc_or_xr_source(earthkit_examples_file("tuv_pl.nc"), mode) eps = 1e-5 @@ -198,9 +202,10 @@ def test_netcdf_to_numpy_upper_shape(options, expected_shape): assert np.allclose(vf15, vr, eps) +@pytest.mark.parametrize("mode", ["nc", "xr"]) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) -def test_netcdf_to_numpy_surf_dtype(dtype): - f = from_source("file", earthkit_examples_file("test.nc")) +def test_netcdf_to_numpy_surf_dtype(mode, dtype): + f = load_nc_or_xr_source(earthkit_examples_file("test.nc"), mode) v = f[0].to_numpy(dtype=dtype) assert v.dtype == dtype @@ -209,9 +214,10 @@ def test_netcdf_to_numpy_surf_dtype(dtype): assert v.dtype == dtype +@pytest.mark.parametrize("mode", ["nc", "xr"]) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) -def test_netcdf_to_numpy_upper_dtype(dtype): - f = from_source("file", earthkit_examples_file("tuv_pl.nc")) +def test_netcdf_to_numpy_upper_dtype(mode, dtype): + f = load_nc_or_xr_source(earthkit_examples_file("tuv_pl.nc"), mode) v = f[0].to_numpy(dtype=dtype) assert v.dtype == dtype diff --git a/tests/numpy_fs/numpy_fs_fixtures.py b/tests/numpy_fs/numpy_fs_fixtures.py new file mode 100644 index 00000000..37f1e90d --- /dev/null +++ b/tests/numpy_fs/numpy_fs_fixtures.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 + +# (C) Copyright 2020 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +import os + +import numpy as np + +from earthkit.data import from_source +from earthkit.data.core.fieldlist import FieldList +from earthkit.data.core.temporary import temp_file +from earthkit.data.testing import earthkit_examples_file + + +def load_numpy_fs(num): + assert num in [1, 2, 3] + files = ["test.grib", "test6.grib", "tuv_pl.grib"] + files = files[:num] + + ds_in = [] + md = [] + for fname in files: + ds_in.append(from_source("file", earthkit_examples_file(fname))) + md += ds_in[-1].metadata("param") + + ds = [] + for x in ds_in: + ds.append( + FieldList.from_numpy( + x.values, [m.override(edition=1) for m in x.metadata()] + ) + ) + + return (*ds, md) + + +def check_numpy_fs(ds, ds_input, md_full): + assert len(ds_input) in [1, 2, 3] + + assert len(ds) == len(md_full) + assert ds.metadata("param") == md_full + assert np.allclose(ds[0].values, ds_input[0][0].values) + + # check slice + r = ds[1] + assert r.metadata("param") == "msl" + + if len(ds_input) > 1: + r = ds[1:3] + assert len(r) == 2 + assert r.metadata("param") == ["msl", "t"] + assert r[0].metadata("param") == "msl" + assert r[1].metadata("param") == "t" + assert np.allclose(r[0].values, ds_input[0][1].values) + assert np.allclose(r[1].values, ds_input[1][0].values) + + # check sel + r = ds.sel(shortName="msl") + assert len(r) == 1 + assert r.metadata("shortName") == ["msl"] + assert r[0].metadata("param") == "msl" + assert np.allclose(r[0].values, ds_input[0][1].values) + + if len(ds_input) == 3: + r = ds[1:13:4] + assert len(r) == 3 + assert r.metadata("param") == ["msl", "t", "u"] + assert r[0].metadata("param") == "msl" + assert r[1].metadata("param") == "t" + assert r[2].metadata("param") == "u" + + +def check_save_to_disk(ds, len_ref, meta_ref): + tmp = temp_file() + ds.save(tmp.path) + assert os.path.exists(tmp.path) + r_tmp = from_source("file", tmp.path) + assert len(r_tmp) == len_ref + assert r_tmp.metadata("shortName") == meta_ref + r_tmp = None diff --git a/tests/sources/test_numpy_list.py b/tests/numpy_fs/test_numpy_fs.py similarity index 60% rename from tests/sources/test_numpy_list.py rename to tests/numpy_fs/test_numpy_fs.py index 225c3df6..c5e41056 100644 --- a/tests/sources/test_numpy_list.py +++ b/tests/numpy_fs/test_numpy_fs.py @@ -9,48 +9,55 @@ # nor does it submit to any jurisdiction. # -import logging import os +import sys import numpy as np +import pytest from earthkit.data import from_source from earthkit.data.core.fieldlist import FieldList from earthkit.data.core.temporary import temp_file from earthkit.data.testing import earthkit_examples_file -LOG = logging.getLogger(__name__) +here = os.path.dirname(__file__) +sys.path.insert(0, here) +from numpy_fs_fixtures import check_numpy_fs # noqa: E402 -def test_numpy_list_grib_single_field(): +def test_numpy_fs_grib_single_field(): ds = from_source("file", earthkit_examples_file("test.grib")) assert ds[0].metadata("shortName") == "2t" - v = ds[0].values + lat, lon, v = ds[0].data(flatten=True) v1 = v + 1 md = ds[0].metadata() md1 = md.override(shortName="msl") r = FieldList.from_numpy(v1, md1) - assert len(r) == 1 - assert np.allclose(v1, r[0].values) - assert r[0].shape == ds[0].shape - assert r[0].metadata("shortName") == "msl" + def _check_field(r): + assert len(r) == 1 + assert np.allclose(r[0].values, v1) + assert r[0].shape == ds[0].shape + assert r[0].metadata("shortName") == "msl" + _lat, _lon, _v = r[0].data(flatten=True) + assert np.allclose(_lat, lat) + assert np.allclose(_lon, lon) + assert np.allclose(_v, v1) + + _check_field(r) # save to disk tmp = temp_file() r.save(tmp.path) assert os.path.exists(tmp.path) r_tmp = from_source("file", tmp.path) - assert len(r_tmp) == 1 - assert np.allclose(v1, r_tmp[0].values) - assert r_tmp[0].shape == ds[0].shape - assert r_tmp[0].metadata("shortName") == "msl" + _check_field(r_tmp) -def test_numpy_list_grib_multi_field(): +def test_numpy_fs_grib_multi_field(): ds = from_source("file", earthkit_examples_file("test.grib")) assert ds[0].metadata("shortName") == "2t" @@ -81,6 +88,31 @@ def test_numpy_list_grib_multi_field(): assert f.metadata("name") == "2 metre dewpoint temperature", f"name {i}" +def test_numpy_fs_grib_from_list_of_arrays(): + ds = from_source("file", earthkit_examples_file("test.grib")) + md_full = ds.metadata("param") + assert len(ds) == 2 + + v = [ds[0].values, ds[1].values] + md = [f.metadata().override(generatingProcessIdentifier=150) for f in ds] + r = FieldList.from_numpy(v, md) + + check_numpy_fs(r, [ds], md_full) + + +def test_numpy_fs_grib_from_list_of_arrays_bad(): + ds = from_source("file", earthkit_examples_file("test.grib")) + + v = ds[0].values + md = [f.metadata().override(generatingProcessIdentifier=150) for f in ds] + + with pytest.raises(ValueError): + _ = FieldList.from_numpy(v, md) + + with pytest.raises(ValueError): + _ = FieldList.from_numpy([v], md) + + if __name__ == "__main__": from earthkit.data.testing import main diff --git a/tests/numpy_fs/test_numpy_fs_concat.py b/tests/numpy_fs/test_numpy_fs_concat.py new file mode 100644 index 00000000..b15b543d --- /dev/null +++ b/tests/numpy_fs/test_numpy_fs_concat.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python3 + +# (C) Copyright 2020 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +import os +import sys + +import pytest + +from earthkit.data import from_source +from earthkit.data.core.fieldlist import FieldList + +here = os.path.dirname(__file__) +sys.path.insert(0, here) +from numpy_fs_fixtures import ( # noqa: E402 + check_numpy_fs, + check_save_to_disk, + load_numpy_fs, +) + + +@pytest.mark.parametrize("mode", ["oper", "multi"]) +def test_numpy_fs_grib_concat_2a(mode): + ds1, ds2, md = load_numpy_fs(2) + + if mode == "oper": + ds = ds1 + ds2 + else: + ds = from_source("multi", ds1, ds2) + + check_numpy_fs(ds, [ds1, ds2], md) + check_save_to_disk(ds, 8, md) + + +def test_numpy_fs_grib_concat_2b(): + ds1, ds2, md = load_numpy_fs(2) + ds1_ori = ds1 + ds1 += ds2 + + check_numpy_fs(ds1, [ds1_ori, ds2], md) + check_save_to_disk(ds1, 8, md) + + +@pytest.mark.parametrize("mode", ["oper", "multi"]) +def test_numpy_fs_grib_concat_3a(mode): + ds1, ds2, ds3, md = load_numpy_fs(3) + + if mode == "oper": + ds = ds1 + ds2 + ds = ds + ds3 + else: + ds = from_source("multi", ds1, ds2) + ds = from_source("multi", ds, ds3) + + check_numpy_fs(ds, [ds1, ds2, ds3], md) + check_save_to_disk(ds, 26, md) + + +@pytest.mark.parametrize("mode", ["oper", "multi"]) +def test_numpy_fs_grib_concat_3b(mode): + ds1, ds2, ds3, md = load_numpy_fs(3) + + if mode == "oper": + ds = ds1 + ds2 + ds3 + else: + ds = from_source("multi", ds1, ds2, ds3) + + check_numpy_fs(ds, [ds1, ds2, ds3], md) + check_save_to_disk(ds, 26, md) + + +def test_numpy_fs_grib_from_empty_1(): + ds_e = FieldList() + ds, md = load_numpy_fs(1) + ds1 = ds_e + ds + assert id(ds1) == id(ds) + assert len(ds1) == 2 + assert ds1.metadata("param") == md + check_save_to_disk(ds1, 2, md) + + +def test_numpy_fs_grib_from_empty_2(): + ds_e = FieldList() + ds, md = load_numpy_fs(1) + ds1 = ds + ds_e + assert id(ds1) == id(ds) + assert len(ds1) == 2 + assert ds1.metadata("param") == md + check_save_to_disk(ds1, 2, md) + + +def test_numpy_fs_grib_from_empty_3(): + ds_e = FieldList() + ds1, ds2, md = load_numpy_fs(2) + ds = ds_e + ds1 + ds2 + + check_numpy_fs(ds, [ds1, ds2], md) + check_save_to_disk(ds, 8, md) + + +def test_numpy_fs_grib_from_empty_4(): + ds = FieldList() + ds1, md = load_numpy_fs(1) + ds += ds1 + assert id(ds) == id(ds1) + assert len(ds) == 2 + assert ds.metadata("param") == md + check_save_to_disk(ds, 2, md) + + +def test_numpy_fs_grib_from_empty_5(): + ds = FieldList() + ds1, ds2, md = load_numpy_fs(2) + ds += ds1 + ds2 + + check_numpy_fs(ds, [ds1, ds2], md) + check_save_to_disk(ds, 8, md) + + +if __name__ == "__main__": + from earthkit.data.testing import main + + main(__file__) diff --git a/tests/numpy_fs/test_numpy_fs_write.py b/tests/numpy_fs/test_numpy_fs_write.py new file mode 100644 index 00000000..acb4d7cd --- /dev/null +++ b/tests/numpy_fs/test_numpy_fs_write.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 + +# (C) Copyright 2020 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +import logging +import os + +import numpy as np + +from earthkit.data import from_source +from earthkit.data.core.fieldlist import FieldList +from earthkit.data.core.temporary import temp_file +from earthkit.data.testing import earthkit_examples_file + +LOG = logging.getLogger(__name__) + + +def test_numpy_fs_grib_write_missing(): + ds = from_source("file", earthkit_examples_file("test.grib")) + + assert ds[0].metadata("shortName") == "2t" + + v = ds[0].values + v1 = v + 1 + assert not np.isnan(v1[0]) + assert not np.isnan(v1[1]) + v1[0] = np.nan + assert np.isnan(v1[0]) + assert not np.isnan(v1[1]) + + md = ds[0].metadata() + md1 = md.override(shortName="msl") + r = FieldList.from_numpy(v1, md1) + + assert np.isnan(r[0].values[0]) + assert not np.isnan(r[0].values[1]) + + # save to disk + tmp = temp_file() + r.save(tmp.path) + assert os.path.exists(tmp.path) + r_tmp = from_source("file", tmp.path) + v_tmp = r_tmp[0].values + assert np.isnan(v_tmp[0]) + assert not np.isnan(v_tmp[1]) + + +def test_numpy_fs_grib_write_append(): + ds = from_source("file", earthkit_examples_file("test.grib")) + + assert ds[0].metadata("shortName") == "2t" + + v = ds[0].values + v1 = v + 1 + v2 = v + 2 + + md = ds[0].metadata() + md1 = md.override(shortName="msl") + md2 = md.override(shortName="2d") + + r1 = FieldList.from_numpy(v1, md1) + r2 = FieldList.from_numpy(v2, md2) + + # save to disk + tmp = temp_file() + r1.save(tmp.path) + assert os.path.exists(tmp.path) + r_tmp = from_source("file", tmp.path) + assert len(r_tmp) == 1 + assert r_tmp.metadata("shortName") == ["msl"] + r_tmp = None + + # append + r2.save(tmp.path, append=True) + assert os.path.exists(tmp.path) + r_tmp = from_source("file", tmp.path) + assert len(r_tmp) == 2 + assert r_tmp.metadata("shortName") == ["msl", "2d"] + + +if __name__ == "__main__": + from earthkit.data.testing import main + + main(__file__) diff --git a/tests/readers/test_csv_reader.py b/tests/readers/test_csv_reader.py index eaa9994c..64e36154 100644 --- a/tests/readers/test_csv_reader.py +++ b/tests/readers/test_csv_reader.py @@ -13,11 +13,11 @@ import pytest -import earthkit.data as cml +from earthkit.data import from_source def test_csv_1(): - s = cml.from_source( + s = from_source( "dummy-source", "csv", headers=["a", "b", "c"], @@ -28,11 +28,17 @@ def test_csv_1(): ], ) - print(s.to_pandas()) + df = s.to_pandas() + assert len(df) == 3 + assert list(df.columns) == ["a", "b", "c"] + + ds = s.to_xarray() + assert len(ds) == 3 + assert list(ds.variables) == ["index", "a", "b", "c"] def test_csv_2(): - s = cml.from_source( + s = from_source( "dummy-source", "csv", headers=["a", "b", "c"], @@ -43,11 +49,13 @@ def test_csv_2(): ], ) - print(s.to_pandas()) + df = s.to_pandas() + assert len(df) == 3 + assert list(df.columns) == ["a", "b", "c"] def test_csv_3(): - s = cml.from_source( + s = from_source( "dummy-source", "csv", headers=["a", "b", "c"], @@ -58,11 +66,13 @@ def test_csv_3(): ], ) - print(s.to_pandas()) + df = s.to_pandas() + assert len(df) == 3 + assert list(df.columns) == ["a", "b", "c"] def test_csv_4(): - s = cml.from_source( + s = from_source( "dummy-source", "csv", headers=["a", "b", "c"], @@ -74,7 +84,9 @@ def test_csv_4(): ], ) - print(s.to_pandas()) + df = s.to_pandas() + assert len(df) == 3 + assert list(df.columns) == ["a", "b", "c"] @pytest.mark.skipif(True, reason="Test not yet implemented") @@ -92,12 +104,12 @@ def test_csv_icoads(): "type": "ofb", } - source = cml.from_source("mars", **r) + source = from_source("mars", **r) print(source) -def test_csv_text(): - s = cml.from_source( +def test_csv_text_file(): + s = from_source( "dummy-source", "csv", headers=["a", "b", "c"], @@ -110,7 +122,32 @@ def test_csv_text(): extension=".txt", ) - print(s.to_pandas()) + df = s.to_pandas() + assert len(df) == 3 + assert list(df.columns) == ["a", "b", "c"] + + +def test_csv_with_comment(): + s = from_source( + "dummy-source", + "csv", + headers=["a", "b", "c"], + quote_strings=True, + lines=[ + [1, "x", 3], + [4, "y", 6], + [7, "z", 9], + ], + comment_line="This is a comment", + ) + + df = s.to_pandas(pandas_read_csv_kwargs={"comment": "#"}) + assert len(df) == 3 + assert list(df.columns) == ["a", "b", "c"] + + ds = s.to_xarray(pandas_read_csv_kwargs={"comment": "#"}) + assert len(ds) == 3 + assert list(ds.variables) == ["index", "a", "b", "c"] def test_csv_mimetypes(): diff --git a/tests/readers/test_netcdf_reader.py b/tests/readers/test_netcdf_reader.py index 5db8812c..1ebe5d1f 100644 --- a/tests/readers/test_netcdf_reader.py +++ b/tests/readers/test_netcdf_reader.py @@ -19,8 +19,10 @@ from earthkit.data import from_source from earthkit.data.readers.netcdf import NetCDFField from earthkit.data.testing import ( + NO_CDS, earthkit_examples_file, earthkit_file, + earthkit_remote_test_data_file, earthkit_test_data_file, ) @@ -32,16 +34,15 @@ def check_array(v, shape=None, first=None, last=None, meanv=None, eps=1e-3): assert np.isclose(v.mean(), meanv, eps) -def test_netcdf(): - for s in from_source("file", earthkit_file("docs/examples/test.nc")): - s is not None - - -def test_dummy_netcdf_reader_1(): +@pytest.mark.no_eccodes +def test_netcdf_reader(): ds = from_source("file", earthkit_file("docs/examples/test.nc")) # assert str(ds).startswith("NetCDFReader"), r assert len(ds) == 2 - assert isinstance(ds[1], NetCDFField), ds + assert isinstance(ds[0], NetCDFField) + assert isinstance(ds[1], NetCDFField) + for f in from_source("file", earthkit_file("docs/examples/test.nc")): + assert isinstance(f, NetCDFField) @pytest.mark.parametrize("attribute", ["coordinates", "bounds", "grid_mapping"]) @@ -99,9 +100,9 @@ def test_dummy_netcdf_4(): @pytest.mark.long_test +@pytest.mark.download +@pytest.mark.skipif(NO_CDS, reason="No access to CDS") def test_netcdf_multi_cds(): - if not os.path.exists(os.path.expanduser("~/.cdsapirc")): - pytest.skip("No ~/.cdsapirc") s1 = from_source( "cds", "reanalysis-era5-single-levels", @@ -130,6 +131,7 @@ def test_netcdf_multi_cds(): source.to_xarray() +@pytest.mark.no_eccodes def test_netcdf_multi_sources(): path = earthkit_test_data_file("era5_2t_1.nc") s1 = from_source("file", path) @@ -167,6 +169,7 @@ def test_netcdf_multi_sources(): s3.to_xarray() +@pytest.mark.no_eccodes def test_netcdf_multi_files(): ds = from_source( "file", @@ -199,6 +202,7 @@ def test_netcdf_multi_files(): ds.to_xarray() +@pytest.mark.no_eccodes def test_get_fields_missing_standard_name_attr_in_coord_array(): """test _get_fields() can handle a missing 'standard_name' attr in coordinate data arrays""" @@ -221,6 +225,37 @@ def test_get_fields_missing_standard_name_attr_in_coord_array(): assert len(fs) == 2 +@pytest.mark.no_eccodes +def test_netcdf_non_fieldlist(): + ek_ch4_l2 = from_source( + "url", + earthkit_remote_test_data_file( + "test-data/20210101-C3S-L2_GHG-GHG_PRODUCTS-TANSO2-GOSAT2-SRFP-DAILY-v2.0.0.nc" + ) + # Data from this CDS request: + # "cds", + # "satellite-methane", + # { + # "processing_level": "level_2", + # "sensor_and_algorithm": "tanso2_fts2_srfp", + # "year": "2021", + # "month": "01", + # "day": "01", + # "version": "2.0.0", + # }, + ) + # TODO: add more conditions to this test when it is clear what methods it should have + ek_ch4_l2.to_xarray() + + +@pytest.mark.no_eccodes +def test_netcdf_lazy_fieldlist_scan(): + ds = from_source("file", earthkit_examples_file("test.nc")) + assert ds._fields is None + assert len(ds) == 2 + assert len(ds._fields) == 2 + + if __name__ == "__main__": from earthkit.data.testing import main diff --git a/tests/sources/test_cds.py b/tests/sources/test_cds.py index d3941c71..e83b34dd 100644 --- a/tests/sources/test_cds.py +++ b/tests/sources/test_cds.py @@ -81,6 +81,27 @@ def test_cds_netcdf(): assert len(s) == 2 +@pytest.mark.long_test +@pytest.mark.download +@pytest.mark.skipif(NO_CDS, reason="No access to CDS") +def test_cds_netcdf_selection_limited(): + s = from_source( + "cds", + "satellite-albedo", + { + "variable": "albb_bh", + "satellite": "noaa_7", + "sensor": "avhrr", + "product_version": "v2", + "horizontal_resolution": "4km", + "year": "1983", + "month": "01", + "nominal_day": "10", + }, + ) + assert len(s) == 9 + + if __name__ == "__main__": from earthkit.data.testing import main diff --git a/tests/sources/test_polytope.py b/tests/sources/test_polytope.py new file mode 100644 index 00000000..fe3b27e5 --- /dev/null +++ b/tests/sources/test_polytope.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 + +# (C) Copyright 2020 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +import sys + +import pytest + +from earthkit.data import from_source +from earthkit.data.testing import NO_POLYTOPE + + +def test_no_polytope_client(monkeypatch): + "Check that a useful message is given in the absence of the polytope-client library" + monkeypatch.setitem(sys.modules, "polytope", None) + with pytest.raises(ImportError) as excinfo: + from_source("polytope", None, None) + assert "pip install polytope-client" in str(excinfo.value) + + +@pytest.mark.long_test +@pytest.mark.download +@pytest.mark.skipif(NO_POLYTOPE, reason="No access to Polytope Web API") +def test_polytope_odb(): + request = { + "database": "fdbdev", + "class": "rd", + "type": "oai", + "stream": "lwda", + "expver": "xxxx", + "obsgroup": "CONV", + "reportype": 16001, + "obstype": 15, + "date": [20150601, 20150602], + "time": "0/to/23/by/1", + "filter": "'select * where stationid=\"Ams01\"'", + "domain": "off", + } + + src = from_source("polytope", "ichange", request) + df = src.to_pandas() + assert len(df) == 52 + + +@pytest.mark.long_test +@pytest.mark.download +@pytest.mark.skipif(NO_POLYTOPE, reason="No access to Polytope Web API") +def test_polytope_grib(): + request = { + "stream": "oper", + "levtype": "pl", + "levellist": "500", + "param": "129.128", + "step": "0/12", + "time": "00:00:00", + "date": "20200915", + "type": "fc", + "class": "rd", + "expver": "hsvs", + "domain": "g", + } + + ds = from_source("polytope", "ecmwf-mars", request) + + assert len(ds) == 2 + assert ds.metadata("level") == [500, 500] diff --git a/tests/sources/test_url.py b/tests/sources/test_url.py index 0319ddef..b0e34a70 100644 --- a/tests/sources/test_url.py +++ b/tests/sources/test_url.py @@ -15,8 +15,12 @@ import pytest from earthkit.data import from_source, settings -from earthkit.data.core.temporary import temp_directory -from earthkit.data.testing import earthkit_file, network_off +from earthkit.data.core.temporary import temp_directory, temp_file +from earthkit.data.testing import ( + earthkit_file, + earthkit_remote_test_data_file, + network_off, +) @pytest.mark.skipif( # TODO: fix @@ -129,6 +133,17 @@ def test_url_part_file_source(): assert f.read() == b"GRIB7777GRIB7777" +def test_url_netcdf_source_save(): + ds = from_source( + "url", + earthkit_remote_test_data_file("examples/test.nc"), + ) + + tmp = temp_file() + ds.save(tmp.path) + assert os.path.exists(tmp.path) + + if __name__ == "__main__": test_part_url() # from earthkit.data.testing import main diff --git a/tests/sources/test_wekeo.py b/tests/sources/test_wekeo.py new file mode 100644 index 00000000..370f837e --- /dev/null +++ b/tests/sources/test_wekeo.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 + +# (C) Copyright 2020 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +import pytest + +from earthkit.data import from_source +from earthkit.data.testing import NO_HDA + + +@pytest.mark.long_test +@pytest.mark.download +@pytest.mark.skipif(NO_HDA, reason="No access to WEKEO") +def test_wekeo_download(): + s = from_source( + "wekeo", + "EO:CLMS:DAT:CGLS_GLOBAL_NDVI300_V1_333M", + request={ + "datasetId": "EO:CLMS:DAT:CGLS_GLOBAL_NDVI300_V1_333M", + "dateRangeSelectValues": [ + { + "name": "dtrange", + "start": "2014-01-01T00:00:00.000Z", + "end": "2014-01-01T23:59:59.999Z", + } + ], + }, + ) + assert len(s) == 1 + + +if __name__ == "__main__": + from earthkit.data.testing import main + + main(__file__) diff --git a/tests/sources/test_wekeocds.py b/tests/sources/test_wekeocds.py new file mode 100644 index 00000000..0e5e9fa8 --- /dev/null +++ b/tests/sources/test_wekeocds.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 + +# (C) Copyright 2020 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +import pytest + +from earthkit.data import from_source +from earthkit.data.testing import NO_HDA + + +@pytest.mark.long_test +@pytest.mark.download +@pytest.mark.skipif(NO_HDA, reason="No access to WEKEO") +def test_wekeo_grib_1(): + s = from_source( + "wekeocds", + "EO:ECMWF:DAT:REANALYSIS_ERA5_SINGLE_LEVELS", + variable=["2m_temperature", "mean_sea_level_pressure"], + product_type=["reanalysis"], + year=["2012"], + month=["12"], + day=["12"], + time=["13:00"], + format="grib", + ) + assert len(s) == 2 + + +@pytest.mark.long_test +@pytest.mark.download +@pytest.mark.skipif(NO_HDA, reason="No access to CDS") +def test_wekeo_grib_2(): + s = from_source( + "wekeocds", + "EO:ECMWF:DAT:REANALYSIS_ERA5_SINGLE_LEVELS", + variable=["2m_temperature", "mean_sea_level_pressure"], + product_type=["reanalysis"], + year=["2012"], + month=["12"], + day=["12"], + time=["13:00"], + format="grib", + split_on="variable", + ) + assert len(s) == 2 + + +@pytest.mark.long_test +@pytest.mark.download +@pytest.mark.skipif(NO_HDA, reason="No access to CDS") +def test_wekeo_grib_3(): + s = from_source( + "wekeocds", + "EO:ECMWF:DAT:REANALYSIS_ERA5_SINGLE_LEVELS", + variable=["2m_temperature", "mean_sea_level_pressure"], + product_type=["reanalysis"], + year=["2012"], + month=["12"], + day=["12", "13", "14", "15"], + time=["13:00"], + format="grib", + ) + assert len(s) == 8 + + +@pytest.mark.long_test +@pytest.mark.download +@pytest.mark.skipif(NO_HDA, reason="No access to CDS") +def test_wekeo_netcdf(): + s = from_source( + "wekeocds", + "EO:ECMWF:DAT:REANALYSIS_ERA5_SINGLE_LEVELS", + variable=["2m_temperature", "mean_sea_level_pressure"], + product_type=["reanalysis"], + year=["2012"], + month=["12"], + day=["12"], + time=["13:00"], + format="netcdf", + ) + assert len(s) == 2 + + +if __name__ == "__main__": + from earthkit.data.testing import main + + main(__file__) diff --git a/tests/utils/test_module_inputs_wrapper.py b/tests/utils/test_module_inputs_wrapper.py index 3df34b81..e39c1572 100644 --- a/tests/utils/test_module_inputs_wrapper.py +++ b/tests/utils/test_module_inputs_wrapper.py @@ -28,7 +28,6 @@ TEST_DS = TEST_DA.to_dataset() TEST_DS["test2"] = TEST_DA2 -EK_GRIB_READER = from_source("file", "tests/data/test_single.grib") EK_XARRAY_WRAPPER = from_object(TEST_DS) EK_NUMPY_WRAPPER = from_object(TEST_NP) @@ -59,6 +58,7 @@ def test_transform_function_inputs_reader_to_xarray(): # Check EK GribReader object + EK_GRIB_READER = from_source("file", "tests/data/test_single.grib") ek_reader_result = WRAPPED_XR_ONES_LIKE(EK_GRIB_READER) # Will return a DataSet becuase that is first value in kwarg_types assert isinstance(ek_reader_result, xr.Dataset) @@ -67,6 +67,7 @@ def test_transform_function_inputs_reader_to_xarray(): def test_transform_function_inputs_reader_to_xarray_typesetting(): # Check EK GribReader object + EK_GRIB_READER = from_source("file", "tests/data/test_single.grib") ek_reader_result = WRAPPED_XR_ONES_LIKE_TYPE_SETTING(EK_GRIB_READER) # Will return a dataarray because that is first value in type-set Union assert isinstance(ek_reader_result, xr.DataArray) @@ -75,6 +76,7 @@ def test_transform_function_inputs_reader_to_xarray_typesetting(): def test_transform_module_inputs_reader_to_xarray(): # Check EK GribReader object + EK_GRIB_READER = from_source("file", "tests/data/test_single.grib") ek_reader_result = WRAPPED_DUMMY_MODULE.xarray_ones_like(EK_GRIB_READER) # Data array because type-setting of function has dataarray first assert isinstance(ek_reader_result, xr.DataArray) @@ -106,12 +108,14 @@ def test_transform_module_inputs_wrapper_to_xarray(): def test_transform_function_inputs_reader_to_numpy(): # Test with Earthkit.data GribReader object + EK_GRIB_READER = from_source("file", "tests/data/test_single.grib") assert WRAPPED_NP_MEAN(EK_GRIB_READER) == np.mean(EK_GRIB_READER.to_numpy()) assert isinstance(WRAPPED_NP_MEAN(EK_GRIB_READER), np.float64) def test_transform_function_inputs_reader_to_numpy_typesetting(): # Test with Earthkit.data GribReader object + EK_GRIB_READER = from_source("file", "tests/data/test_single.grib") result = WRAPPED_NP_MEAN_TYPE_SETTING(EK_GRIB_READER) assert result == np.mean(EK_GRIB_READER.to_numpy()) assert isinstance(result, np.float64) @@ -119,6 +123,7 @@ def test_transform_function_inputs_reader_to_numpy_typesetting(): def test_transform_module_inputs_reader_to_numpy(): # Test with Earthkit.data GribReader object + EK_GRIB_READER = from_source("file", "tests/data/test_single.grib") result = WRAPPED_DUMMY_MODULE.numpy_mean(EK_GRIB_READER) assert result == np.mean(EK_GRIB_READER.to_numpy()) assert isinstance(result, np.float64) diff --git a/tests/wrappers/test_xarray.py b/tests/wrappers/test_xarray.py index fc8e7398..3088132b 100644 --- a/tests/wrappers/test_xarray.py +++ b/tests/wrappers/test_xarray.py @@ -9,15 +9,14 @@ # nor does it submit to any jurisdiction. # - -import logging +import pytest from earthkit.data import from_object, wrappers +from earthkit.data.testing import earthkit_examples_file from earthkit.data.wrappers import xarray as xr_wrapper -LOG = logging.getLogger(__name__) - +@pytest.mark.no_eccodes def test_dataset_wrapper(): import xarray as xr @@ -29,6 +28,7 @@ def test_dataset_wrapper(): assert isinstance(_wrapper, xr_wrapper.XArrayDatasetWrapper) +@pytest.mark.no_eccodes def test_dataarray_wrapper(): import xarray as xr @@ -38,3 +38,13 @@ def test_dataarray_wrapper(): assert isinstance(_wrapper, xr_wrapper.XArrayDataArrayWrapper) _wrapper = from_object(xr.DataArray()) assert isinstance(_wrapper, xr_wrapper.XArrayDataArrayWrapper) + + +@pytest.mark.no_eccodes +def test_xarray_lazy_fieldlist_scan(): + import xarray as xr + + ds = from_object(xr.open_dataset(earthkit_examples_file("test.nc"))) + assert ds._fields is None + assert len(ds) == 2 + assert len(ds._fields) == 2