diff --git a/pydropsonde/helper/__init__.py b/pydropsonde/helper/__init__.py index d70cf4aa..82d66d0b 100644 --- a/pydropsonde/helper/__init__.py +++ b/pydropsonde/helper/__init__.py @@ -297,7 +297,6 @@ def calc_rh_from_q(ds): w_s = mtf.partial_pressure_to_mixing_ratio(e_s, ds.p.values) w = physics.q2mr(ds.q.values) rh = w / w_s - try: rh_attrs = ds.rh.attrs rh_attrs.update( diff --git a/pydropsonde/helper/quality.py b/pydropsonde/helper/quality.py index b4c62b92..fea90f1f 100644 --- a/pydropsonde/helper/quality.py +++ b/pydropsonde/helper/quality.py @@ -12,15 +12,22 @@ class QualityControl: def __init__( self, ) -> None: - self.qc_vars = [] + self.qc_vars = {} self.qc_flags = {} self.qc_details = {} self.qc_by_var = {} self.alt_dim = "time" def set_qc_variables(self, qc_variables): - self.qc_vars = self.qc_vars + list(qc_variables) - for variable in self.qc_vars: + """ + set qc variables + Parameters + ---------- + qc_variables : dictionary of the form {:} + """ + self.qc_vars.update(qc_variables) + + for variable in qc_variables.keys(): self.qc_by_var.update({variable: dict(qc_flags={}, qc_details={})}) def get_is_floater( @@ -149,8 +156,8 @@ def profile_sparsity( """ var_keys = set(variable_dict.keys()) - if set(var_keys) != set(self.qc_vars): - var_keys = set(var_keys) & set(self.qc_vars) + if set(var_keys) != set(self.qc_vars.keys()): + var_keys = set(var_keys) & set(self.qc_vars.keys()) warnings.warn( f"variables for which frequency is given do not match the qc_variables. Continue for the intersection {var_keys}" ) @@ -214,7 +221,7 @@ def near_surface_coverage( f"{ds.attrs['SondeId']} has not been checked for being a floater. Please run is_floater first." ) - for variable in self.qc_vars: + for variable in self.qc_vars.keys(): dataset = ds.where( (ds[alt_dim] > alt_bounds[0]) & (ds[alt_dim] < alt_bounds[1]), drop=True ) @@ -278,15 +285,15 @@ def low_physics(self, ds, rh_min=0.3, ta_min=293.15, alt_dim="gpsalt"): """ ds_check = ds.where(ds[alt_dim] < 100, drop=True) if ds_check.sizes["time"] == 0: - self.qc_flags["low_physics"] = False - self.qc_details["low_physics_rh_min"] = np.nan - self.qc_details["low_physics_ta_min"] = np.nan + self.qc_flags["rh_low_physics"] = False + self.qc_flags["ta_low_physics"] = False + self.qc_details["rh_low_physics_min"] = np.nan + self.qc_details["ta_low_physics_min"] = np.nan else: - self.qc_flags["low_physics"] = (ds_check.rh.min() > float(rh_min)) and ( - ds_check.ta.min() > float(ta_min) - ) - self.qc_details["low_physics_rh_min"] = ds_check.rh.min().values - self.qc_details["low_physics_ta_min"] = ds_check.ta.min().values + self.qc_flags["ta_low_physics"] = ds_check.ta.min() > float(ta_min) + self.qc_flags["rh_low_physics"] = ds_check.rh.min() > float(rh_min) + self.qc_details["rh_low_physics_min"] = ds_check.rh.min().values + self.qc_details["ta_low_physics_min"] = ds_check.ta.min().values def check_qc(self, used_flags=None, check_ugly=True): """ @@ -344,7 +351,7 @@ def get_qc_by_var(self): be filtered and organized by variable. """ - for variable in self.qc_vars: + for variable in self.qc_vars.keys(): self.qc_by_var[variable]["qc_flags"].update( { key: self.qc_flags.get(key) @@ -406,7 +413,27 @@ def get_byte_array(self, variable): ) return np.byte(qc_val), attrs - def get_details(self, variable): + def get_unit_for_qc(self, qc_name, var_name=None): + """ + get the correct unit for the detailed qc value. Depends on the last bit of the qc detail name + """ + var_unit = self.qc_vars[var_name] + if ( + (qc_name.endswith("diff")) + or (qc_name.endswith("min")) + or (qc_name.endswith("max")) + ): + return var_unit + elif ( + qc_name.endswith("count") + or qc_name.endswith("fraction") + or qc_name.endswith("ratio") + ): + return "1" + else: + warnings.warn("qc ending not specified. can't return a unit.") + + def get_details_var(self, variable): """ Retrieve quality control details and attributes for a specified variable. @@ -431,7 +458,7 @@ def get_details(self, variable): { key: dict( long_name=f"value for qc {variable} " + name.replace("_", " "), - units="1", + units=self.get_unit_for_qc(key, variable), ) } ) @@ -447,7 +474,7 @@ def add_variable_flags_to_ds(self, ds, variable, add_to=None, details=True): ds = hx.add_ancillary_var(ds, add_to, name) # get detail if details: - qc_dict, attrs = self.get_details(variable) + qc_dict, attrs = self.get_details_var(variable) for key in list(qc_dict.keys()): ds = ds.assign({key: qc_dict.get(key)}) ds[key].attrs.update(attrs.get(key)) @@ -487,56 +514,6 @@ def add_alt_near_gpsalt_to_ds(self, ds): ) return ds - def add_low_physic_flags_to_ds(self, ds): - if self.qc_flags.get("low_physics") is not None: - ds = ds.assign( - {"low_physics": np.byte(not self.qc_flags.get("low_physics"))} - ) - ds["low_physics"].attrs.update( - dict( - long_name="low physics", - flag_values="0 1 ", - flag_meaning="GOOD BAD", - ) - ) - - ds = ds.assign( - {"low_physics_rh_min": self.qc_details.get("low_physics_rh_min")} - ) - ds["low_physics_rh_min"].attrs.update( - dict( - long_name="minimal relative humidity below 100m", - units="%", - ) - ) - - ds = ds.assign( - {"low_physics_ta_min": self.qc_details.get("low_physics_ta_min")} - ) - ds["low_physics_ta_min"].attrs.update( - dict( - long_name="minimal temperature below 100m", - units="degreeC", - ) - ) - - ds = hx.add_ancillary_var( - ds, - "sonde_id", - "low_physics", - ) - ds = hx.add_ancillary_var( - ds, - "rh", - "low_physics_rh_min", - ) - ds = hx.add_ancillary_var( - ds, - "ta", - "low_physics_ta_min", - ) - return ds - def replace_alt_var(self, ds, alt_var): """ Replace the altitude variable in a dataset with its counterpart. @@ -618,7 +595,6 @@ def add_non_var_qc_to_ds(self, ds): """ ds_out = self.add_alt_near_gpsalt_to_ds(ds) ds_out = self.add_replace_alt_var_to_ds(ds_out) - ds_out = self.add_low_physic_flags_to_ds(ds_out) return ds_out diff --git a/pydropsonde/processor.py b/pydropsonde/processor.py index f6d273e4..176ef90b 100644 --- a/pydropsonde/processor.py +++ b/pydropsonde/processor.py @@ -450,7 +450,7 @@ def set_qc_vars(self, qc_vars=None): """ if qc_vars is None: - qc_vars = ["u", "v", "rh", "ta", "p"] + qc_vars = {"u": "m s-1", "v": "m s-1", "rh": "1", "ta": "K", "p": "Pa"} self.qc.set_qc_variables(qc_vars) return self @@ -1511,20 +1511,33 @@ def add_qc_to_interim_l3(self, keep=None): self: The instance with updated `interim_l3_ds` including quality control flags. """ ds = self.interim_l3_ds + + for var in ds.variables: + if var != "sonde_id": + ds[var].attrs.pop("ancillary_variables", None) if keep is None: keep = [] - elif keep == "all": - keep = ( - [f"{var}_qc" for var in list(self.qc.qc_by_var.keys())] - + list(self.qc.qc_details.keys()) - + ["low_physics", "alt_near_gpsalt"] - ) else: - for var in ds.variables: - if var != "sonde_id": - ds[var].attrs.pop("ancillary_variables", None) - if keep is None: - keep = [] + if keep == "all": + keep = ( + [f"{var}_qc" for var in list(self.qc.qc_by_var.keys())] + + list(self.qc.qc_details.keys()) + + ["alt_near_gpsalt"] + ) + for variable in self.qc.qc_vars: + ds = self.qc.add_variable_flags_to_ds(ds, variable, details=True) + if (not np.isin("q", self.qc.qc_vars)) and np.isin( + "rh", self.qc.qc_vars + ): + ds = self.qc.add_variable_flags_to_ds( + ds, "rh", add_to="q", details=True + ) + if (not np.isin("theta", self.qc.qc_vars)) and np.isin( + "ta", self.qc.qc_vars + ): + ds = self.qc.add_variable_flags_to_ds( + ds, "ta", add_to="theta", details=True + ) elif keep == "var_flags": keep = [f"{var}_qc" for var in list(self.qc.qc_by_var.keys())] + [ "sonde_qc" diff --git a/tests/test_qc.py b/tests/test_qc.py index 050ce108..5d7674b3 100644 --- a/tests/test_qc.py +++ b/tests/test_qc.py @@ -41,8 +41,8 @@ def qc(): @pytest.fixture def qc_vars(qc): - qc.set_qc_variables(["q", "p", "rh"]) - assert qc.qc_vars == ["q", "p", "rh"] + qc.set_qc_variables({"q": "m s-1", "p": "Pa", "rh": "1"}) + assert qc.qc_vars == {"q": "m s-1", "p": "Pa", "rh": "1"} return qc @@ -113,12 +113,12 @@ def test_add_variable_flag_to_ds(qc_vars, varname, output): @pytest.mark.parametrize( "variables,output", [ - (["p"], 0), # GOOD - (["q", "p"], 2), # UGLY - (["q"], 1), # BAD - (["rh"], 2), # UGLY - (["rh", "q"], 2), # UGLY - (["rh", "p"], 2), # UGLY + ({"p": "Pa"}, 0), # GOOD + ({"q": "1", "p": "Pa"}, 2), # UGLY + ({"q": "1"}, 1), # BAD + ({"rh": "1"}, 2), # UGLY + ({"rh": "1", "q": "1"}, 2), # UGLY + ({"rh": "1", "p": "1"}, 2), # UGLY ], ) def test_sonde_qc(qc_vars, variables, output):