Skip to content

Commit

Permalink
Merge pull request #66 from Geet-George/skip-qc
Browse files Browse the repository at this point in the history
add skip param to bypass QC checks
  • Loading branch information
Geet-George authored Nov 15, 2023
2 parents 2b244d6 + c933a45 commit df661f0
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 29 deletions.
17 changes: 17 additions & 0 deletions src/halodrops/helper/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
def get_bool(s):
if isinstance(s, bool):
return s
elif isinstance(s, int):
return bool(s)
elif isinstance(s, str):
lower_s = s.lower()
if lower_s == "true":
return True
elif lower_s == "false":
return False
elif lower_s in ["0", "1"]:
return bool(int(lower_s))
else:
raise ValueError(f"Cannot convert {s} to boolean")
else:
raise ValueError(f"Cannot convert {s} to boolean")
66 changes: 37 additions & 29 deletions src/halodrops/sonde.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import xarray as xr

from halodrops.helper import rawreader as rr
import halodrops.helper as hh

_no_default = object()

Expand Down Expand Up @@ -209,6 +210,7 @@ def weighted_fullness(
variable_dict={"u_wind": 4, "v_wind": 4, "rh": 2, "tdry": 2, "pres": 2},
time_dimension="time",
timestamp_frequency=4,
skip=False,
):
"""Return profile-coverage for variable weighed for sampling frequency
Expand All @@ -232,24 +234,27 @@ def weighted_fullness(
float
Fraction of non-nan variable values along time_dimension weighed for sampling frequency
"""

for variable, sampling_frequency in variable_dict.items():
dataset = self.aspen_ds[variable]
weighed_time_size = len(dataset[time_dimension]) / (
timestamp_frequency / sampling_frequency
)
object.__setattr__(
self.qc,
f"profile_fullness_{variable}",
np.sum(~np.isnan(dataset.values)) / weighed_time_size,
)
return self
if hh.get_bool(skip):
return self
else:
for variable, sampling_frequency in variable_dict.items():
dataset = self.aspen_ds[variable]
weighed_time_size = len(dataset[time_dimension]) / (
timestamp_frequency / sampling_frequency
)
object.__setattr__(
self.qc,
f"profile_fullness_{variable}",
np.sum(~np.isnan(dataset.values)) / weighed_time_size,
)
return self

def near_surface_coverage(
self,
variables=["u_wind", "v_wind", "rh", "tdry", "pres"],
alt_bounds=[0, 1000],
alt_dimension_name="alt",
skip=False,
):
"""Return fraction of non-nan values in variables near surface
Expand All @@ -272,24 +277,27 @@ def near_surface_coverage(
ValueError
If the attribute `aspen_ds` does not exist.
"""
if not hasattr(self, "aspen_ds"):
raise ValueError(
"The attribute `aspen_ds` does not exist. Please run `add_aspen_ds` method first."
)
if hh.get_bool(skip):
return self
else:
if not hasattr(self, "aspen_ds"):
raise ValueError(
"The attribute `aspen_ds` does not exist. Please run `add_aspen_ds` method first."
)

for variable in variables:
dataset = self.aspen_ds[[variable, alt_dimension_name]]
near_surface = dataset.where(
(dataset[alt_dimension_name] > alt_bounds[0])
& (dataset[alt_dimension_name] < alt_bounds[1]),
drop=True,
)
object.__setattr__(
self.qc,
f"near_surface_coverage_{variable}",
np.sum(~np.isnan(near_surface[variable].values)),
)
return self
for variable in variables:
dataset = self.aspen_ds[[variable, alt_dimension_name]]
near_surface = dataset.where(
(dataset[alt_dimension_name] > alt_bounds[0])
& (dataset[alt_dimension_name] < alt_bounds[1]),
drop=True,
)
object.__setattr__(
self.qc,
f"near_surface_coverage_{variable}",
np.sum(~np.isnan(near_surface[variable].values)),
)
return self

def qc_filter(self, filter_flags=None):
"""
Expand Down

0 comments on commit df661f0

Please sign in to comment.