Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix qc attrs #143

Merged
merged 3 commits into from
Feb 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion pydropsonde/helper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,6 @@ def calc_rh_from_q(ds):
w_s = mtf.partial_pressure_to_mixing_ratio(e_s, ds.p.values)
w = physics.q2mr(ds.q.values)
rh = w / w_s

try:
rh_attrs = ds.rh.attrs
rh_attrs.update(
Expand Down
114 changes: 45 additions & 69 deletions pydropsonde/helper/quality.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,22 @@ class QualityControl:
def __init__(
self,
) -> None:
self.qc_vars = []
self.qc_vars = {}
self.qc_flags = {}
self.qc_details = {}
self.qc_by_var = {}
self.alt_dim = "time"

def set_qc_variables(self, qc_variables):
self.qc_vars = self.qc_vars + list(qc_variables)
for variable in self.qc_vars:
"""
set qc variables
Parameters
----------
qc_variables : dictionary of the form {<var>:<unit>}
"""
self.qc_vars.update(qc_variables)

for variable in qc_variables.keys():
self.qc_by_var.update({variable: dict(qc_flags={}, qc_details={})})

def get_is_floater(
Expand Down Expand Up @@ -149,8 +156,8 @@ def profile_sparsity(

"""
var_keys = set(variable_dict.keys())
if set(var_keys) != set(self.qc_vars):
var_keys = set(var_keys) & set(self.qc_vars)
if set(var_keys) != set(self.qc_vars.keys()):
var_keys = set(var_keys) & set(self.qc_vars.keys())
warnings.warn(
f"variables for which frequency is given do not match the qc_variables. Continue for the intersection {var_keys}"
)
Expand Down Expand Up @@ -214,7 +221,7 @@ def near_surface_coverage(
f"{ds.attrs['SondeId']} has not been checked for being a floater. Please run is_floater first."
)

for variable in self.qc_vars:
for variable in self.qc_vars.keys():
dataset = ds.where(
(ds[alt_dim] > alt_bounds[0]) & (ds[alt_dim] < alt_bounds[1]), drop=True
)
Expand Down Expand Up @@ -278,15 +285,15 @@ def low_physics(self, ds, rh_min=0.3, ta_min=293.15, alt_dim="gpsalt"):
"""
ds_check = ds.where(ds[alt_dim] < 100, drop=True)
if ds_check.sizes["time"] == 0:
self.qc_flags["low_physics"] = False
self.qc_details["low_physics_rh_min"] = np.nan
self.qc_details["low_physics_ta_min"] = np.nan
self.qc_flags["rh_low_physics"] = False
self.qc_flags["ta_low_physics"] = False
self.qc_details["rh_low_physics_min"] = np.nan
self.qc_details["ta_low_physics_min"] = np.nan
else:
self.qc_flags["low_physics"] = (ds_check.rh.min() > float(rh_min)) and (
ds_check.ta.min() > float(ta_min)
)
self.qc_details["low_physics_rh_min"] = ds_check.rh.min().values
self.qc_details["low_physics_ta_min"] = ds_check.ta.min().values
self.qc_flags["ta_low_physics"] = ds_check.ta.min() > float(ta_min)
self.qc_flags["rh_low_physics"] = ds_check.rh.min() > float(rh_min)
self.qc_details["rh_low_physics_min"] = ds_check.rh.min().values
self.qc_details["ta_low_physics_min"] = ds_check.ta.min().values

def check_qc(self, used_flags=None, check_ugly=True):
"""
Expand Down Expand Up @@ -344,7 +351,7 @@ def get_qc_by_var(self):
be filtered and organized by variable.

"""
for variable in self.qc_vars:
for variable in self.qc_vars.keys():
self.qc_by_var[variable]["qc_flags"].update(
{
key: self.qc_flags.get(key)
Expand Down Expand Up @@ -406,7 +413,27 @@ def get_byte_array(self, variable):
)
return np.byte(qc_val), attrs

def get_details(self, variable):
def get_unit_for_qc(self, qc_name, var_name=None):
"""
get the correct unit for the detailed qc value. Depends on the last bit of the qc detail name
"""
var_unit = self.qc_vars[var_name]
if (
(qc_name.endswith("diff"))
or (qc_name.endswith("min"))
or (qc_name.endswith("max"))
):
return var_unit
elif (
qc_name.endswith("count")
or qc_name.endswith("fraction")
or qc_name.endswith("ratio")
):
return "1"
else:
warnings.warn("qc ending not specified. can't return a unit.")

def get_details_var(self, variable):
"""
Retrieve quality control details and attributes for a specified variable.

Expand All @@ -431,7 +458,7 @@ def get_details(self, variable):
{
key: dict(
long_name=f"value for qc {variable} " + name.replace("_", " "),
units="1",
units=self.get_unit_for_qc(key, variable),
)
}
)
Expand All @@ -447,7 +474,7 @@ def add_variable_flags_to_ds(self, ds, variable, add_to=None, details=True):
ds = hx.add_ancillary_var(ds, add_to, name)
# get detail
if details:
qc_dict, attrs = self.get_details(variable)
qc_dict, attrs = self.get_details_var(variable)
for key in list(qc_dict.keys()):
ds = ds.assign({key: qc_dict.get(key)})
ds[key].attrs.update(attrs.get(key))
Expand Down Expand Up @@ -487,56 +514,6 @@ def add_alt_near_gpsalt_to_ds(self, ds):
)
return ds

def add_low_physic_flags_to_ds(self, ds):
if self.qc_flags.get("low_physics") is not None:
ds = ds.assign(
{"low_physics": np.byte(not self.qc_flags.get("low_physics"))}
)
ds["low_physics"].attrs.update(
dict(
long_name="low physics",
flag_values="0 1 ",
flag_meaning="GOOD BAD",
)
)

ds = ds.assign(
{"low_physics_rh_min": self.qc_details.get("low_physics_rh_min")}
)
ds["low_physics_rh_min"].attrs.update(
dict(
long_name="minimal relative humidity below 100m",
units="%",
)
)

ds = ds.assign(
{"low_physics_ta_min": self.qc_details.get("low_physics_ta_min")}
)
ds["low_physics_ta_min"].attrs.update(
dict(
long_name="minimal temperature below 100m",
units="degreeC",
)
)

ds = hx.add_ancillary_var(
ds,
"sonde_id",
"low_physics",
)
ds = hx.add_ancillary_var(
ds,
"rh",
"low_physics_rh_min",
)
ds = hx.add_ancillary_var(
ds,
"ta",
"low_physics_ta_min",
)
return ds

def replace_alt_var(self, ds, alt_var):
"""
Replace the altitude variable in a dataset with its counterpart.
Expand Down Expand Up @@ -618,7 +595,6 @@ def add_non_var_qc_to_ds(self, ds):
"""
ds_out = self.add_alt_near_gpsalt_to_ds(ds)
ds_out = self.add_replace_alt_var_to_ds(ds_out)
ds_out = self.add_low_physic_flags_to_ds(ds_out)

return ds_out

Expand Down
37 changes: 25 additions & 12 deletions pydropsonde/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,7 @@ def set_qc_vars(self, qc_vars=None):
"""

if qc_vars is None:
qc_vars = ["u", "v", "rh", "ta", "p"]
qc_vars = {"u": "m s-1", "v": "m s-1", "rh": "1", "ta": "K", "p": "Pa"}
self.qc.set_qc_variables(qc_vars)
return self

Expand Down Expand Up @@ -1511,20 +1511,33 @@ def add_qc_to_interim_l3(self, keep=None):
self: The instance with updated `interim_l3_ds` including quality control flags.
"""
ds = self.interim_l3_ds

for var in ds.variables:
if var != "sonde_id":
ds[var].attrs.pop("ancillary_variables", None)
if keep is None:
keep = []
elif keep == "all":
keep = (
[f"{var}_qc" for var in list(self.qc.qc_by_var.keys())]
+ list(self.qc.qc_details.keys())
+ ["low_physics", "alt_near_gpsalt"]
)
else:
for var in ds.variables:
if var != "sonde_id":
ds[var].attrs.pop("ancillary_variables", None)
if keep is None:
keep = []
if keep == "all":
keep = (
[f"{var}_qc" for var in list(self.qc.qc_by_var.keys())]
+ list(self.qc.qc_details.keys())
+ ["alt_near_gpsalt"]
)
for variable in self.qc.qc_vars:
ds = self.qc.add_variable_flags_to_ds(ds, variable, details=True)
if (not np.isin("q", self.qc.qc_vars)) and np.isin(
"rh", self.qc.qc_vars
):
ds = self.qc.add_variable_flags_to_ds(
ds, "rh", add_to="q", details=True
)
if (not np.isin("theta", self.qc.qc_vars)) and np.isin(
"ta", self.qc.qc_vars
):
ds = self.qc.add_variable_flags_to_ds(
ds, "ta", add_to="theta", details=True
)
elif keep == "var_flags":
keep = [f"{var}_qc" for var in list(self.qc.qc_by_var.keys())] + [
"sonde_qc"
Expand Down
16 changes: 8 additions & 8 deletions tests/test_qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ def qc():

@pytest.fixture
def qc_vars(qc):
qc.set_qc_variables(["q", "p", "rh"])
assert qc.qc_vars == ["q", "p", "rh"]
qc.set_qc_variables({"q": "m s-1", "p": "Pa", "rh": "1"})
assert qc.qc_vars == {"q": "m s-1", "p": "Pa", "rh": "1"}
return qc


Expand Down Expand Up @@ -113,12 +113,12 @@ def test_add_variable_flag_to_ds(qc_vars, varname, output):
@pytest.mark.parametrize(
"variables,output",
[
(["p"], 0), # GOOD
(["q", "p"], 2), # UGLY
(["q"], 1), # BAD
(["rh"], 2), # UGLY
(["rh", "q"], 2), # UGLY
(["rh", "p"], 2), # UGLY
({"p": "Pa"}, 0), # GOOD
({"q": "1", "p": "Pa"}, 2), # UGLY
({"q": "1"}, 1), # BAD
({"rh": "1"}, 2), # UGLY
({"rh": "1", "q": "1"}, 2), # UGLY
({"rh": "1", "p": "1"}, 2), # UGLY
],
)
def test_sonde_qc(qc_vars, variables, output):
Expand Down