Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

No units attribute for single variable in xarray engine #592

Merged
merged 1 commit into from
Jan 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions src/earthkit/data/readers/grib/xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,10 +231,12 @@ def to_xarray(self, engine=None, xarray_open_dataset_kwargs=None, **kwargs):
- "fixed": Use the attributes defined in ``variable_attrs`` as variables
attributes and ``global_attrs`` as global attributes.
- "unique": Use all the attributes defined in ``attrs``, ``variable_attrs``
and ``global_attrs``. When an attribute has unique a value for a dataset
and ``global_attrs``. When an attribute has unique value for a dataset
it will be a global attribute, otherwise it will be a variable attribute.
However keys in ``variable_attrs`` are always used as variable attributes,
while keys in ``global_attrs`` are always used as global attributes.
However, this logic is only applied if a unique variable attribute can be
a global attribute according to the CF conventions Appendix A. (e.g. "units" cannot
be a global attribute). Additionally, keys in ``variable_attrs`` are always used as
variable attributes, while keys in ``global_attrs`` are always used as global attributes.
* attrs: str, number, callable, dict or list of these, None
Attribute or list of attributes. Only used when ``attrs_mode`` is ``unique``.
Its default value (None) expands to [] unless the ``profile`` overwrites it.
Expand Down
49 changes: 46 additions & 3 deletions src/earthkit/data/utils/xarray/attrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,48 @@
#

import logging
import os
from abc import ABCMeta
from abc import abstractmethod
from collections import defaultdict
from functools import cached_property

from earthkit.data.utils import ensure_dict
from earthkit.data.utils import ensure_iterable

LOG = logging.getLogger(__name__)


class CFAttrs:
def _load(self):
here = os.path.dirname(__file__)
path = os.path.join(here, "cf_attrs.yaml")
if os.path.exists(path):
import yaml

try:
with open(path, "r") as f:
return yaml.safe_load(f)
except Exception as e:
LOG.exception(f"Failed read CF attributes file {path}. {e}")
raise
else:
raise ValueError(f"CF attributes file not found! path={path}")

@cached_property
def attrs(self):
return self._load()

def can_be_global(self, name):
item = self.attrs.get(name, None)
if item:
return "G" in item["use"]
return True


CF_ATTRS = CFAttrs()


class Attr:
"""Generic attribute class.

Expand Down Expand Up @@ -238,7 +270,7 @@ def _id(x):
global_attrs[item.name] = item.value()

# TODO: make it optional
global_attrs.pop("units", None)
# global_attrs.pop("units", None)

return global_attrs

Expand All @@ -257,13 +289,24 @@ def _build(self, ds, t_vars, rename=None):
if len(v) == 1 and k not in self.attrs.variable_attrs:
global_attrs[k] = list(v)[0]

for var_obj in t_vars.values():
var_obj.adjust_attrs(drop_keys=global_attrs.keys(), rename=rename)
# Some attrs cannot be global according to the CF convention.
# These are removed from global attrs and kept as variable attrs.
global_attrs_keys = list(global_attrs.keys())
global_attrs_renamed_keys = global_attrs_keys
if rename:
global_attrs_renamed_keys = list(rename(global_attrs).keys())

for k1, k2 in zip(global_attrs_keys, global_attrs_renamed_keys):
if not CF_ATTRS.can_be_global(k1) or not CF_ATTRS.can_be_global(k2):
global_attrs.pop(k1)

for k in self.attrs.variable_attrs:
if k in global_attrs:
global_attrs.pop(k)

for var_obj in t_vars.values():
var_obj.adjust_attrs(drop_keys=global_attrs.keys(), rename=rename)

global_attrs = {k: v for k, v in global_attrs.items() if v is not None}

return global_attrs
Expand Down
195 changes: 195 additions & 0 deletions src/earthkit/data/utils/xarray/cf_attrs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
# Based on CF Conventions Appendix A
# All CF attributes are listed here except for those that are used to describe grid mappings. See Appendix F for the grid mapping attributes.
# The 'Type' values are S for string, N for numeric, and D for the type of the data variable.
# The 'Use' values are G for global, C for variables containing coordinate data, and D for variables containing non-coordinate data
Conventions:
type: S
use: G
_FillValue:
type: D
use:
- C
- D
actual_range:
type: N
use:
- C
- D
add_offset:
type: N
use:
- C
- D
ancillary_variables:
type: S
use: D
axis:
type: S
use: C
bounds:
type: S
use: C
calendar:
type: S
use: C
cell_measures:
type: S
use: D
cell_methods:
type: S
use: D
cf_role:
type: S
use: C
climatology:
type: S
use: C
comment:
type: S
use:
- G
- C
- D
compress:
type: S
use: C
computed_standard_name:
type: S
use: C
coordinates:
type: S
use:
- D
- M
external_variables:
type: S
use: G
featureType:
type: S
use: G
flag_masks:
type: D
use: D
flag_meanings:
type: S
use: D
flag_values:
type: D
use: D
formula_terms:
type: S
use: C
geometry:
type: S
use:
- C
- D
geometry_type:
type: S
use: M
grid_mapping:
type: S
use:
- D
- M
history:
type: S
use:
- G
- Gr
instance_dimension:
type: S
use: "-"
institution:
type: S
use:
- G
- D
interior_ring:
type: S
use: M
leap_month:
type: N
use: C
leap_year:
type: N
use: C
long_name:
type: S
use:
- C
- D
missing_value:
type: D
use:
- C
- D
month_lengths:
type: N
use: C
node_coordinates:
type: S
use: M
node_count:
type: S
use: M
nodes:
type: S
use: C
part_node_count:
type: S
use: M
positive:
type: S
use: C
references:
type: S
use:
- G
- D
sample_dimension:
type: S
use: "-"
scale_factor:
type: N
use:
- C
- D
source:
type: S
use:
- G
- D
standard_error_multiplier:
type: N
use: D
standard_name:
type: S
use:
- C
- D
title:
type: S
use:
- G
- Gr
units:
type: S
use:
- C
- D
valid_max:
type: N
use:
- C
- D
valid_min:
type: N
use:
- C
- D
valid_range:
type: N
use:
- C
- D
8 changes: 5 additions & 3 deletions src/earthkit/data/utils/xarray/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,10 +188,12 @@ def open_dataset(
- "fixed": Use the attributes defined in ``variable_attrs`` as variables
attributes and ``global_attrs`` as global attributes.
- "unique": Use all the attributes defined in ``attrs``, ``variable_attrs``
and ``global_attrs``. When an attribute has unique a value for a dataset
and ``global_attrs``. When an attribute has unique value for a dataset
it will be a global attribute, otherwise it will be a variable attribute.
However keys in ``variable_attrs`` are always used as variable attributes,
while keys in ``global_attrs`` are always used as global attributes.
However, this logic is only applied if a unique variable attribute can be
a global attribute according to the CF conventions Appendix A. (e.g. "units" cannot
be a global attribute). Additionally, keys in ``variable_attrs`` are always used as
variable attributes, while keys in ``global_attrs`` are always used as global attributes.
attrs: str, number, callable, dict or list of these, None
Attribute or list of attributes. Only used when ``attrs_mode`` is ``unique``.
Its default value (None) expands to [] unless the ``profile`` overwrites it.
Expand Down
13 changes: 10 additions & 3 deletions tests/xr_engine/test_xr_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,10 +469,14 @@ def test_xr_engine_single_field():
lats = np.linspace(90, -90, 19)
lons = np.linspace(0, 350, 36)

attrs_ref = {
"param": "t",
var_attrs_ref = {
"standard_name": "air_temperature",
"long_name": "Temperature",
"units": "K",
}

global_attrs_ref = {
"param": "t",
"paramId": 130,
"class": "od",
"stream": "oper",
Expand All @@ -488,7 +492,7 @@ def test_xr_engine_single_field():
"institution": "ECMWF",
}

assert ds.attrs == attrs_ref
assert ds.attrs == global_attrs_ref

data_vars = ["t"]

Expand All @@ -510,6 +514,9 @@ def test_xr_engine_single_field():

da = ds["t"]

for k, v in var_attrs_ref.items():
assert da.attrs[k] == v

r = da[:, :]
r.shape == (19, 36)
assert np.allclose(r.values, vals_ref)
Expand Down
Loading