From 5ef47442c2028ff7c945de2081e542f8ea7b4450 Mon Sep 17 00:00:00 2001 From: Luke Campbell Date: Thu, 10 Nov 2022 11:32:43 -0500 Subject: [PATCH] [OSRA-126] Adds source for GridDAP This commit adds a specific source for GridDAP datasets available from ERDDAP. --- .pre-commit-config.yaml | 2 +- dev-environment.yml | 1 + environment.yml | 2 + intake_erddap/erddap.py | 302 +++++++++++++++++++++++-- intake_erddap/erddap_cat.py | 40 +++- pyproject.toml | 2 +- setup.py | 1 + tests/test_data/tabledap_metadata.json | 116 ++++++++++ tests/test_erddap_cat.py | 26 +++ tests/test_erddap_source.py | 108 ++++++++- 10 files changed, 570 insertions(+), 30 deletions(-) create mode 100644 tests/test_data/tabledap_metadata.json diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 79faa14..05932db 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -52,7 +52,7 @@ repos: rev: v0.982 hooks: - id: mypy - additional_dependencies: [types-setuptools] + additional_dependencies: [types-setuptools, types-requests] exclude: docs/source/conf.py args: [--ignore-missing-imports] diff --git a/dev-environment.yml b/dev-environment.yml index a36cc2d..b52b132 100644 --- a/dev-environment.yml +++ b/dev-environment.yml @@ -8,4 +8,5 @@ dependencies: - flake8 - pre-commit - types-setuptools + - types-requests - mypy diff --git a/environment.yml b/environment.yml index 259c4a4..83d4058 100644 --- a/environment.yml +++ b/environment.yml @@ -3,10 +3,12 @@ channels: - conda-forge dependencies: - python + - numpy - pandas - erddapy - panel - intake + - intake-xarray>=0.6.1 - pip - pip: - cf-pandas diff --git a/intake_erddap/erddap.py b/intake_erddap/erddap.py index 4b468b3..70c433d 100644 --- a/intake_erddap/erddap.py +++ b/intake_erddap/erddap.py @@ -1,18 +1,28 @@ """Source implementations for intake-erddap.""" -from typing import List, Optional, Type +import typing +from typing import List, Optional, Tuple, Type, Union + +import numpy as np import pandas as pd +import requests +import xarray as xr from erddapy import ERDDAP from intake.source import base +from intake_xarray.xarray_container import serialize_zarr_ds from .version import __version__ +if typing.TYPE_CHECKING: # pragma: no cover + # numpy typing is only available after version 1.21 + from numpy.typing import ArrayLike + + class ERDDAPSource(base.DataSource): """ One-shot ERDDAP to dataframe reader (no partitioning) - (TableDAP only) Caches entire dataframe in memory. @@ -21,9 +31,7 @@ class ERDDAPSource(base.DataSource): server: str URI to ERDDAP server dataset_id: str - variables: list - constraints: dict """ @@ -35,20 +43,19 @@ class ERDDAPSource(base.DataSource): def __init__( self, - server: str, dataset_id: str, - protocol: str = "tabledap", + protocol: str, variables: List[str] = None, constraints: dict = None, metadata: dict = None, erddap_client: Optional[Type[ERDDAP]] = None, + http_client: Optional[Type] = None, ): variables = variables or [] constraints = constraints or {} metadata = metadata or {} self._init_args = { - "server": server, "dataset_id": dataset_id, "protocol": protocol, "variables": variables, @@ -56,13 +63,12 @@ def __init__( "metadata": metadata, } - self._server = server self._dataset_id = dataset_id self._protocol = protocol self._variables = variables self._constraints = constraints - self._dataframe: Optional[pd.DataFrame] = None self._erddap_client = erddap_client or ERDDAP + self._http = http_client or requests super(ERDDAPSource, self).__init__(metadata=metadata) @@ -75,15 +81,81 @@ def get_client(self) -> ERDDAP: e.constraints = self._constraints return e - def _load(self): - e = self.get_client() - self._dataframe: pd.DataFrame = e.to_pandas() + +class TableDAPSource(ERDDAPSource): + """Creates a Data Source for an ERDDAP TableDAP Dataset. + + Parameters + ---------- + server : str + URL to the ERDDAP service. Example: + "https://coastwatch.pfeg.noaa.gov/erddap" . Do not include a trailing + slash. + dataset_id : str + The dataset identifier from ERDDAP. + variables : list of str, optional + A list of variables to retrieve from the dataset. + constraints : dict, optional + A mapping of conditions and constraints. Example: + {"time>=": "2022-01-02T12:00:00Z", "lon>": -140, "lon<": 0} + metadata : dict, optional + Additional metadata to include with the source passed from the catalog. + erddap_client : type, optional + A class that implements an interface like erdappy's ERDDAP class. The + source will rely on this client to interface with ERDDAP for most + requests. + http_client : module or object, optional + An object or module that implements an HTTP Client similar to request's + interface. The source will use this object to make HTTP requests to + ERDDAP in some cases. + + Examples + -------- + Sources are normally returned from a catalog object, but a source can be instantiated directly: + + >>> source = TableDAPSource("https://erddap.senors.axds.co/erddap", + ... "gov_usgs_waterdata_441759103261203") + + Getting a pandas DataFrame from the source: + + >>> ds = source.read() + + Once the dataset object has been instantiated, the dataset's full metadata + is available in the source. + + >>> source.metadata + {'info_url': 'https://erddap.sensors.axds.co/erddap/info/gov_usgs_waterdata_404513098181201...', + 'catalog_dir': '', + 'variables': {'time': {'_CoordinateAxisType': 'Time', + 'actual_range': [1430828100.0, 1668079800.0], + 'axis': 'T', + 'ioos_category': 'Time', + 'long_name': 'Time', + 'standard_name': 'time', + 'time_origin': '01-JAN-1970 00:00:00', + 'units': 'seconds since 1970-01-01T00:00:00Z'}, + ... + """ + + name = "tabledap" + version = __version__ + container = "dataframe" + partition_access = True + + def __init__(self, server: str, *args, **kwargs): + self._server = server + self._dataframe: Optional[pd.DataFrame] = None + self._dataset_metadata: Optional[dict] = None + kwargs.pop("protocol", None) + # https://github.com/python/mypy/issues/6799 + super().__init__(*args, protocol="tabledap", **kwargs) # type: ignore def _get_schema(self) -> base.Schema: if self._dataframe is None: # TODO: could do partial read with chunksize to get likely schema from # first few records, rather than loading the whole thing self._load() + self._dataset_metadata = self._get_dataset_metadata() # make type checker happy assert self._dataframe is not None return base.Schema( @@ -91,7 +163,7 @@ def _get_schema(self) -> base.Schema: dtype=self._dataframe.dtypes, shape=self._dataframe.shape, npartitions=1, - extra_metadata={}, + extra_metadata=self._dataset_metadata, ) def _get_partition(self) -> pd.DataFrame: @@ -105,3 +177,207 @@ def read(self) -> pd.DataFrame: def _close(self): self._dataframe = None + + def _load(self): + e = self.get_client() + self._dataframe: pd.DataFrame = e.to_pandas() + + def _get_dataset_metadata(self) -> dict: + """Fetch and return the metadata document for the dataset.""" + url = f"{self._server}/info/{self._dataset_id}/index.json" + resp = self._http.get(url) + resp.raise_for_status() + metadata: dict = {"variables": {}} + for rowtype, varname, attrname, dtype, value in resp.json()["table"]["rows"]: + if rowtype != "attribute": + continue + try: + value = self._parse_metadata_value(value=value, dtype=dtype) + except ValueError: + # TODO: Convert to logging statement + print( + f"WARNING: could not convert {dtype} {varname}:{attrname} = {value}" + ) + continue + + if varname == "NC_GLOBAL": + metadata[attrname] = value + else: + if varname not in metadata["variables"]: + metadata["variables"][varname] = {} + metadata["variables"][varname][attrname] = value + return metadata + + def _parse_metadata_value( + self, value: str, dtype: str + ) -> Union[int, float, str, List[int], List[float]]: + """Return the value from ERDDAPs metadata table parsed into a Python type.""" + newvalue: Union[int, float, str, List[int], List[float]] = value + if dtype in ("int", "double", "float") and "," in value: + tmp = [i.strip() for i in value.split(",")] + if dtype == "int": + newvalue = [int(i) for i in tmp] + if dtype in ("float", "double"): + newvalue = [float(i) for i in tmp] + elif dtype == "int": + newvalue = int(value) + elif dtype in ("float", "double"): + newvalue = float(value) + return newvalue + + +class GridDAPSource(ERDDAPSource): + """Creates a Data Source for an ERDDAP GridDAP Dataset. + + Parameters + ---------- + server : str + URL to the ERDDAP service. Example: + "https://coastwatch.pfeg.noaa.gov/erddap" . Do not include a trailing + slash. + dataset_id : str + The dataset identifier from ERDDAP. + constraints : dict, optional + A mapping of conditions and constraints. + chunks : None or int or dict or str, optional + If chunks is provided, it is used to load the new dataset into dask + arrays. chunks=-1 loads the dataset with dask using a single chunk for + all arrays. chunks={} loads the dataset with dask using engine preferred + chunks if exposed by the backend, otherwise with a single chunk for all + arrays. chunks='auto' will use dask auto chunking taking into account + the engine preferred chunks. See dask chunking for more details. + xarray_kwargs : dict, optional + Arguments to be passed to the xarray open_dataset function. + + Examples + -------- + Sources are normally returned from a catalog object, but a source can be instantiated directly: + + >>> source = GridDAPSource("https://coastwatch.pfeg.noaa.gov/erddap", "charmForecast1day", + ... chunks={"time": 1}) + + Getting an xarray dataset from the source object: + + >>> ds = source.to_dask() + + Once the dataset object has been instantiated, the dataset's full metadata + is available in the source. + + >>> source.metadata + {'catalog_dir': '', + 'dims': {'time': 1182, 'latitude': 391, 'longitude': 351}, + 'data_vars': {'pseudo_nitzschia': ['time', 'latitude', 'longitude'], + 'particulate_domoic': ['time', 'latitude', 'longitude'], + 'cellular_domoic': ['time', 'latitude', 'longitude'], + 'chla_filled': ['time', 'latitude', 'longitude'], + 'r555_filled': ['time', 'latitude', 'longitude'], + 'r488_filled': ['time', 'latitude', 'longitude']}, + 'coords': ('time', 'latitude', 'longitude'), + 'acknowledgement': + ... + """ + + name = "griddap" + version = __version__ + container = "xarray" + partition_access = True + + def __init__( + self, + server: str, + dataset_id: str, + constraints: dict = None, + chunks: Union[None, int, dict, str] = None, + xarray_kwargs: dict = None, + **kwargs, + ): + self._ds: Optional[xr.Dataset] = None + self._chunks = chunks + self._constraints = constraints or {} + self._xarray_kwargs = xarray_kwargs or {} + # Initialized by the private getter _get_schema + self._schema: Optional[base.Schema] = None + self.urlpath = f"{server}/griddap/{dataset_id}" + # https://github.com/python/mypy/issues/6799 + kwargs.pop("protocol", None) + super().__init__(dataset_id=dataset_id, protocol="griddap", **kwargs) # type: ignore + + def _get_schema(self) -> base.Schema: + self.urlpath = self._get_cache(self.urlpath)[0] + + if self._ds is None: + # Sets self._ds + self._open_dataset() + # Make mypy happy + assert self._ds is not None + metadata = { + "dims": dict(self._ds.dims), + "data_vars": { + k: list(self._ds[k].coords) for k in self._ds.data_vars.keys() + }, + "coords": tuple(self._ds.coords.keys()), + } + if getattr(self, "on_server", False): + metadata["internal"] = serialize_zarr_ds + metadata.update(self._ds.attrs) + metadata["variables"] = {} + for varname in self._ds.variables: + metadata["variables"][varname] = self._ds[varname].attrs + self._schema = base.Schema( + datashape=None, + dtype=None, + shape=None, + npartitions=None, + extra_metadata=metadata, + ) + + return self._schema + + def _open_dataset(self): + self._ds = xr.open_dataset( + self.urlpath, chunks=self._chunks, **self._xarray_kwargs + ) + # _NCProperties is an internal property which xarray does not yet deal + # with specially, so we remove it here to prevent it from causing + # problems for clients. + if "_NCProperties" in self._ds.attrs: + del self._ds.attrs["_NCProperties"] + + def read(self): + raise NotImplementedError( + "GridDAPSource.read is not implemented because ds.load() for grids from ERDDAP are " + "strongly discouraged. Use to_dask() instead." + ) + + def read_chunked(self) -> xr.Dataset: + """Return an xarray dataset (optionally chunked).""" + self._load_metadata() + return self._ds + + def read_partition(self, i: Tuple[str, ...]) -> "ArrayLike": + """Fetch one chunk of the array for a variable.""" + self._load_metadata() + if not isinstance(i, (tuple, list)): + raise TypeError("For Xarray sources, must specify partition as tuple") + if isinstance(i, list): + i = tuple(i) + # Make mypy happy + assert self._ds is not None + if hasattr(self._ds, "variables") or i[0] in self._ds.coords: + arr = self._ds[i[0]].data + idx = i[1:] + else: + arr = self._ds.data + if isinstance(arr, np.ndarray): + return arr + # dask array + return arr.blocks[idx].compute() + + def to_dask(self) -> xr.Dataset: + """Return an xarray dataset (optionally chunked).""" + return self.read_chunked() + + def close(self): + """Close open descriptors.""" + self._ds = None + self._schema = None diff --git a/intake_erddap/erddap_cat.py b/intake_erddap/erddap_cat.py index dadb742..cdaadbc 100644 --- a/intake_erddap/erddap_cat.py +++ b/intake_erddap/erddap_cat.py @@ -8,7 +8,7 @@ from intake.catalog.base import Catalog from intake.catalog.local import LocalCatalogEntry -from .erddap import ERDDAPSource +from .erddap import GridDAPSource, TableDAPSource from .utils import match_key_to_category from .version import __version__ @@ -37,6 +37,7 @@ def __init__( category_search: Optional[Tuple[str, str]] = None, erddap_client: Optional[Type[ERDDAP]] = None, use_source_constraints: bool = True, + protocol: str = "tabledap", **kwargs, ): """ERDDAPCatalog initialization @@ -61,11 +62,16 @@ def __init__( use_source_constraints : bool, default True Any relevant search parameter defined in kwargs_search will be passed to the source objects as contraints. + protocol : str, default "tabledap" + One of the two supported ERDDAP Data Access Protocols: "griddap", or + "tabledap". "tabledap" will present tabular datasets using pandas, + meanwhile "griddap" will use xarray. """ self._erddap_client = erddap_client or ERDDAP self._entries: Dict[str, LocalCatalogEntry] = {} self._use_source_contraints = use_source_constraints + self._protocol = protocol self.server = server self.search_url = None @@ -117,7 +123,7 @@ def get_search_url(self) -> str: def get_client(self) -> ERDDAP: """Return an initialized ERDDAP Client.""" e = self._erddap_client(self.server) - e.protocol = "tabledap" + e.protocol = self._protocol e.dataset_id = "allDatasets" return e @@ -137,13 +143,11 @@ def _load(self): args = { "server": self.server, "dataset_id": dataset_id, - "protocol": "tabledap", + "protocol": self._protocol, "constraints": {}, } - if self._use_source_contraints and "min_time" in self.kwargs_search: - args["constraints"]["time>="] = self.kwargs_search["min_time"] - if self._use_source_contraints and "max_time" in self.kwargs_search: - args["constraints"]["time<="] = self.kwargs_search["max_time"] + if self._protocol == "tabledap": + args["constraints"].update(self._get_tabledap_constraints()) entry = LocalCatalogEntry( dataset_id, @@ -158,9 +162,23 @@ def _load(self): getenv=False, getshell=False, ) - entry._metadata = { - "info_url": e.get_info_url(response="csv", dataset_id=dataset_id), - } - entry._plugin = [ERDDAPSource] + if self._protocol == "tabledap": + entry._metadata = { + "info_url": e.get_info_url(response="csv", dataset_id=dataset_id), + } + entry._plugin = [TableDAPSource] + elif self._protocol == "griddap": + entry._plugin = [GridDAPSource] + else: + raise ValueError(f"Unsupported protocol: {self._protocol}") self._entries[dataset_id] = entry + + def _get_tabledap_constraints(self) -> Dict[str, Union[str, int, float]]: + """Return the constraints dictionary for a tabledap source.""" + result = {} + if self._use_source_contraints and "min_time" in self.kwargs_search: + result["time>="] = self.kwargs_search["min_time"] + if self._use_source_contraints and "max_time" in self.kwargs_search: + result["time<="] = self.kwargs_search["max_time"] + return result diff --git a/pyproject.toml b/pyproject.toml index f05c564..dc5c97e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,7 @@ quiet = false color = true [tool.isort] -known_third_party = ["cf_pandas", "erddapy", "intake", "pandas", "pkg_resources", "pytest", "setuptools"] +known_third_party = ["cf_pandas", "erddapy", "intake", "intake_xarray", "numpy", "pandas", "pkg_resources", "pytest", "requests", "setuptools", "xarray"] skip_glob = ["docs/*", "docs/**/*.py"] [tool.pytest.ini_options] diff --git a/setup.py b/setup.py index 823c25a..ac847bf 100644 --- a/setup.py +++ b/setup.py @@ -23,6 +23,7 @@ entry_points={ "intake.drivers": [ "erddap = intake_erddap.erddap:ERDDAPSource", + "tabledap = intake_erddap.erddap:TableDAPSource", "erddap_cat = intake_erddap.erddap_cat:ERDDAPCatalog", ] }, diff --git a/tests/test_data/tabledap_metadata.json b/tests/test_data/tabledap_metadata.json new file mode 100644 index 0000000..ca90754 --- /dev/null +++ b/tests/test_data/tabledap_metadata.json @@ -0,0 +1,116 @@ +{ + "table": { + "columnNames": ["Row Type", "Variable Name", "Attribute Name", "Data Type", "Value"], + "columnTypes": ["String", "String", "String", "String", "String"], + "rows": [ + ["attribute", "NC_GLOBAL", "cdm_data_type", "String", "TimeSeries"], + ["attribute", "NC_GLOBAL", "cdm_timeseries_variables", "String", "station,longitude,latitude"], + ["attribute", "NC_GLOBAL", "contributor_email", "String", "feedback@axiomdatascience.com"], + ["attribute", "NC_GLOBAL", "contributor_name", "String", "Axiom Data Science"], + ["attribute", "NC_GLOBAL", "contributor_role", "String", "processor"], + ["attribute", "NC_GLOBAL", "contributor_role_vocabulary", "String", "NERC"], + ["attribute", "NC_GLOBAL", "contributor_url", "String", "https://www.axiomdatascience.com"], + ["attribute", "NC_GLOBAL", "Conventions", "String", "IOOS-1.2, CF-1.6, ACDD-1.3"], + ["attribute", "NC_GLOBAL", "creator_country", "String", "USA"], + ["attribute", "NC_GLOBAL", "creator_email", "String", "MAPSManager@alaskageographic.org"], + ["attribute", "NC_GLOBAL", "creator_institution", "String", "USGS National Water Information System (NWIS)"], + ["attribute", "NC_GLOBAL", "creator_name", "String", "USGS National Water Information System (NWIS)"], + ["attribute", "NC_GLOBAL", "creator_sector", "String", "gov_federal"], + ["attribute", "NC_GLOBAL", "creator_type", "String", "institution"], + ["attribute", "NC_GLOBAL", "creator_url", "String", "http://waterdata.usgs.gov/nwis"], + ["attribute", "NC_GLOBAL", "defaultDataQuery", "String", "depth_to_water_level,z,time&time>=max(time)-3days"], + ["attribute", "NC_GLOBAL", "Easternmost_Easting", "double", "-103.43675"], + ["attribute", "NC_GLOBAL", "featureType", "String", "TimeSeries"], + ["attribute", "NC_GLOBAL", "geospatial_lat_max", "double", "44.2999722"], + ["attribute", "NC_GLOBAL", "geospatial_lat_min", "double", "44.2999722"], + ["attribute", "NC_GLOBAL", "geospatial_lat_units", "String", "degrees_north"], + ["attribute", "NC_GLOBAL", "geospatial_lon_max", "double", "-103.43675"], + ["attribute", "NC_GLOBAL", "geospatial_lon_min", "double", "-103.43675"], + ["attribute", "NC_GLOBAL", "geospatial_lon_units", "String", "degrees_east"], + ["attribute", "NC_GLOBAL", "geospatial_vertical_max", "double", "0.0"], + ["attribute", "NC_GLOBAL", "geospatial_vertical_min", "double", "0.0"], + ["attribute", "NC_GLOBAL", "geospatial_vertical_positive", "String", "up"], + ["attribute", "NC_GLOBAL", "geospatial_vertical_units", "String", "m"], + ["attribute", "NC_GLOBAL", "history", "String", "Downloaded from USGS National Water Information System (NWIS) at "], + ["attribute", "NC_GLOBAL", "id", "int", "27433"], + ["attribute", "NC_GLOBAL", "infoUrl", "String", "https://sensors.ioos.us/#metadata/27433/station"], + ["attribute", "NC_GLOBAL", "institution", "String", "USGS National Water Information System (NWIS)"], + ["attribute", "NC_GLOBAL", "license", "String", "The data may be used and redistributed for free but is not intended\nfor legal use, since it may contain inaccuracies. Neither the data\nContributor, ERD, NOAA, nor the United States Government, nor any\nof their employees or contractors, makes any warranty, express or\nimplied, including warranties of merchantability and fitness for a\nparticular purpose, or assumes any legal liability for the accuracy,\ncompleteness, or usefulness, of this information."], + ["attribute", "NC_GLOBAL", "naming_authority", "String", "com.axiomdatascience"], + ["attribute", "NC_GLOBAL", "Northernmost_Northing", "double", "44.2999722"], + ["attribute", "NC_GLOBAL", "platform", "String", "fixed"], + ["attribute", "NC_GLOBAL", "platform_name", "String", " 4N 6E19AABA3"], + ["attribute", "NC_GLOBAL", "platform_vocabulary", "String", "http://mmisw.org/ont/ioos/platform"], + ["attribute", "NC_GLOBAL", "processing_level", "String", "Level 2"], + ["attribute", "NC_GLOBAL", "publisher_country", "String", "USA"], + ["attribute", "NC_GLOBAL", "publisher_email", "String", "MAPSManager@alaskageographic.org"], + ["attribute", "NC_GLOBAL", "publisher_institution", "String", "USGS National Water Information System (NWIS)"], + ["attribute", "NC_GLOBAL", "publisher_name", "String", "USGS National Water Information System (NWIS)"], + ["attribute", "NC_GLOBAL", "publisher_sector", "String", "gov_federal"], + ["attribute", "NC_GLOBAL", "publisher_type", "String", "institution"], + ["attribute", "NC_GLOBAL", "publisher_url", "String", "http://waterdata.usgs.gov/nwis"], + ["attribute", "NC_GLOBAL", "references", "String", "https://waterdata.usgs.gov/usa/nwis/uv?site_no=441759103261203,,"], + ["attribute", "NC_GLOBAL", "sourceUrl", "String", "https://sensors.axds.co/api/"], + ["attribute", "NC_GLOBAL", "Southernmost_Northing", "double", "44.2999722"], + ["attribute", "NC_GLOBAL", "standard_name_vocabulary", "String", "CF Standard Name Table v72"], + ["attribute", "NC_GLOBAL", "summary", "String", "Timeseries data from ' 4N 6E19AABA3' (urn:ioos:station:gov.usgs.waterdata:441759103261203)"], + ["attribute", "NC_GLOBAL", "time_coverage_end", "String", "2022-11-10T12:00:00Z"], + ["attribute", "NC_GLOBAL", "time_coverage_start", "String", "2015-05-05T12:00:00Z"], + ["attribute", "NC_GLOBAL", "title", "String", " 4N 6E19AABA3"], + ["attribute", "NC_GLOBAL", "Westernmost_Easting", "double", "-103.43675"], + ["variable", "time", "", "double", ""], + ["attribute", "time", "_CoordinateAxisType", "String", "Time"], + ["attribute", "time", "actual_range", "double", "1.4308272E9, 1.6680816E9"], + ["attribute", "time", "axis", "String", "T"], + ["attribute", "time", "ioos_category", "String", "Time"], + ["attribute", "time", "long_name", "String", "Time"], + ["attribute", "time", "standard_name", "String", "time"], + ["attribute", "time", "time_origin", "String", "01-JAN-1970 00:00:00"], + ["attribute", "time", "units", "String", "seconds since 1970-01-01T00:00:00Z"], + ["variable", "latitude", "", "double", ""], + ["attribute", "latitude", "_CoordinateAxisType", "String", "Lat"], + ["attribute", "latitude", "actual_range", "double", "44.2999722, 44.2999722"], + ["attribute", "latitude", "axis", "String", "Y"], + ["attribute", "latitude", "ioos_category", "String", "Location"], + ["attribute", "latitude", "long_name", "String", "Latitude"], + ["attribute", "latitude", "standard_name", "String", "latitude"], + ["attribute", "latitude", "units", "String", "degrees_north"], + ["variable", "longitude", "", "double", ""], + ["attribute", "longitude", "_CoordinateAxisType", "String", "Lon"], + ["attribute", "longitude", "actual_range", "double", "-103.43675, -103.43675"], + ["attribute", "longitude", "axis", "String", "X"], + ["attribute", "longitude", "ioos_category", "String", "Location"], + ["attribute", "longitude", "long_name", "String", "Longitude"], + ["attribute", "longitude", "standard_name", "String", "longitude"], + ["attribute", "longitude", "units", "String", "degrees_east"], + ["variable", "z", "", "double", ""], + ["attribute", "z", "_CoordinateAxisType", "String", "Height"], + ["attribute", "z", "_CoordinateZisPositive", "String", "up"], + ["attribute", "z", "actual_range", "double", "0.0, 0.0"], + ["attribute", "z", "axis", "String", "Z"], + ["attribute", "z", "ioos_category", "String", "Location"], + ["attribute", "z", "long_name", "String", "Altitude"], + ["attribute", "z", "positive", "String", "up"], + ["attribute", "z", "standard_name", "String", "altitude"], + ["attribute", "z", "units", "String", "m"], + ["variable", "depth_to_water_level", "", "double", ""], + ["attribute", "depth_to_water_level", "_FillValue", "double", "-9999.99"], + ["attribute", "depth_to_water_level", "id", "String", "323912"], + ["attribute", "depth_to_water_level", "ioos_category", "String", "Other"], + ["attribute", "depth_to_water_level", "long_name", "String", "Depth to Water Level"], + ["attribute", "depth_to_water_level", "missing_value", "double", "-9999.99"], + ["attribute", "depth_to_water_level", "platform", "String", "station"], + ["attribute", "depth_to_water_level", "status_flags", "int", "1, 2, 3, 4, 9"], + ["attribute", "depth_to_water_level", "standard_name", "String", "depth_to_water_level"], + ["attribute", "depth_to_water_level", "standard_name_url", "String", "http://mmisw.org/ont/unknown/parameter/depth_to_water_level"], + ["attribute", "depth_to_water_level", "units", "String", "m"], + ["variable", "station", "", "String", ""], + ["attribute", "station", "cf_role", "String", "timeseries_id"], + ["attribute", "station", "ioos_category", "String", "Identifier"], + ["attribute", "station", "ioos_code", "String", "urn:ioos:station:com.axiomdatascience:27433"], + ["attribute", "station", "long_name", "String", " 4N 6E19AABA3"], + ["attribute", "station", "short_name", "String", "urn:ioos:station:gov.usgs.waterdata:441759103261203"], + ["attribute", "station", "type", "String", "fixed"] + ] + } +} diff --git a/tests/test_erddap_cat.py b/tests/test_erddap_cat.py index 6ad0f89..5facd60 100644 --- a/tests/test_erddap_cat.py +++ b/tests/test_erddap_cat.py @@ -10,6 +10,7 @@ from erddapy import ERDDAP +from intake_erddap.erddap import GridDAPSource from intake_erddap.erddap_cat import ERDDAPCatalog @@ -196,3 +197,28 @@ def test_constraints_present_in_source(mock_read_csv, single_dataset_catalog): ) source = next(cat.values()) assert len(source._constraints) == 0 + + +@mock.patch("pandas.read_csv") +def test_catalog_with_griddap(mock_read_csv, single_dataset_catalog): + mock_read_csv.return_value = single_dataset_catalog + server = "https://erddap.invalid/erddap" + search = { + "min_time": "2022-01-01", + "max_time": "2022-11-07", + } + cat = ERDDAPCatalog(server=server, kwargs_search=search, protocol="griddap") + source = next(cat.values()) + assert isinstance(source, GridDAPSource) + + +@mock.patch("pandas.read_csv") +def test_catalog_with_unsupported_protocol(mock_read_csv, single_dataset_catalog): + server = "https://erddap.invalid/erddap" + search = { + "min_time": "2022-01-01", + "max_time": "2022-11-07", + } + mock_read_csv.return_value = single_dataset_catalog + with pytest.raises(ValueError): + ERDDAPCatalog(server=server, kwargs_search=search, protocol="fakedap") diff --git a/tests/test_erddap_source.py b/tests/test_erddap_source.py index cdd7baf..78659bb 100644 --- a/tests/test_erddap_source.py +++ b/tests/test_erddap_source.py @@ -1,26 +1,126 @@ #!/usr/bin/env pytest # -*- coding: utf-8 -*- """Unit tests for the ERDDAP Source object.""" +import json - +from pathlib import Path from unittest import mock +import numpy as np import pandas as pd +import pytest +import xarray as xr + +from intake_erddap.erddap import GridDAPSource, TableDAPSource + + +@pytest.fixture +def fake_grid() -> xr.Dataset: + """Return a fake grid for testing purposes.""" + + time = xr.DataArray( + data=np.array(["2022-01-01T00:00:00"], dtype="