Skip to content

Commit

Permalink
Merge pull request #118 from Geet-George/lenient_qc
Browse files Browse the repository at this point in the history
Lenient qc as default behaviour

(some small formatting changes also present but) Mainly this PR is to include a more lenient QC as the default behaviour of halodrops.

This means instead of filtering out sondes that do not pass the QC thresholds of profile fullness or near surface coverage, the QC will allow all sondes to go from Level-1 to Level-2. However, the attributes of profile fullness & near surface coverage for all variables will be added as global attributes to the NC file, so that it is easier to filter out such sondes later on if needed during analyses.
  • Loading branch information
Geet-George authored Aug 6, 2024
2 parents 371e731 + 417cbbe commit 818ec96
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 56 deletions.
2 changes: 2 additions & 0 deletions src/halodrops/helper/rawreader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Module to read from raw files, mostly to gather metadata from A files
"""

from datetime import datetime
import logging
from typing import List
Expand Down Expand Up @@ -31,6 +32,7 @@ def check_launch_detect_in_afile(a_file: "str") -> bool:
"""

with open(a_file, "r") as f:

module_logger.debug(f"Opened File: {a_file=}")
lines = f.readlines()

Expand Down
88 changes: 44 additions & 44 deletions src/halodrops/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,48 +403,48 @@ def run_pipeline(pipeline: dict, config: configparser.ConfigParser):
"output": "sondes",
"comment": "This steps creates the L2 files after the QC (user says how QC flags are used to go from L1 to L2) and then saves these as L2 NC datasets.",
},
"read_and_process_L2": {
"intake": "sondes",
"apply": iterate_Sonde_method_over_dict_of_Sondes_objects,
"functions": [],
"output": "sondes",
"comment": "This step reads from the saved L2 files and prepares individual sonde datasets before they can be concatenated to create L3.",
},
"concatenate_L2": {
"intake": "sondes",
"apply": sondes_to_gridded,
"output": "gridded",
"comment": "This step concatenates the individual sonde datasets to create the L3 dataset and saves it as an NC file.",
},
"create_L3": {
"intake": "gridded",
"apply": iterate_method_over_dataset,
"functions": [],
"output": "gridded",
"comment": "This step creates the L3 dataset after adding additional products.",
},
"create_patterns": {
"intake": "gridded",
"apply": gridded_to_pattern,
"output": "pattern",
"comment": "This step creates a dataset with the pattern-wide variables by creating the pattern with the flight-phase segmentation file.",
},
"create_L4": {
"intake": "pattern",
"apply": iterate_method_over_dataset,
"functions": [],
"output": "pattern",
"comment": "This step creates the L4 dataset after adding additional products and saves the L4 dataset.",
},
"quicklooks": {
"intake": ["sondes", "gridded", "pattern"],
"apply": [
iterate_Sonde_method_over_dict_of_Sondes_objects,
iterate_method_over_dataset,
iterate_method_over_dataset,
],
"functions": [[], [], []],
"output": "plots",
"comment": "This step creates quicklooks from the L3 & L4 dataset.",
},
# "read_and_process_L2": {
# "intake": "sondes",
# "apply": iterate_Sonde_method_over_dict_of_Sondes_objects,
# "functions": [],
# "output": "sondes",
# "comment": "This step reads from the saved L2 files and prepares individual sonde datasets before they can be concatenated to create L3.",
# },
# "concatenate_L2": {
# "intake": "sondes",
# "apply": sondes_to_gridded,
# "output": "gridded",
# "comment": "This step concatenates the individual sonde datasets to create the L3 dataset and saves it as an NC file.",
# },
# "create_L3": {
# "intake": "gridded",
# "apply": iterate_method_over_dataset,
# "functions": [],
# "output": "gridded",
# "comment": "This step creates the L3 dataset after adding additional products.",
# },
# "create_patterns": {
# "intake": "gridded",
# "apply": gridded_to_pattern,
# "output": "pattern",
# "comment": "This step creates a dataset with the pattern-wide variables by creating the pattern with the flight-phase segmentation file.",
# },
# "create_L4": {
# "intake": "pattern",
# "apply": iterate_method_over_dataset,
# "functions": [],
# "output": "pattern",
# "comment": "This step creates the L4 dataset after adding additional products and saves the L4 dataset.",
# },
# "quicklooks": {
# "intake": ["sondes", "gridded", "pattern"],
# "apply": [
# iterate_Sonde_method_over_dict_of_Sondes_objects,
# iterate_method_over_dataset,
# iterate_method_over_dataset,
# ],
# "functions": [[], [], []],
# "output": "plots",
# "comment": "This step creates quicklooks from the L3 & L4 dataset.",
# },
}
38 changes: 26 additions & 12 deletions src/halodrops/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ def profile_fullness(
time_dimension="time",
timestamp_frequency=4,
fullness_threshold=0.75,
add_fullness_fraction_attribute=False,
add_fullness_fraction_attribute=True,
skip=False,
):
"""
Expand Down Expand Up @@ -371,7 +371,7 @@ def profile_fullness(
The threshold for the fullness fraction. If the calculated fullness fraction is less than this threshold,
the profile is considered not full. Default is 0.8.
add_fullness_fraction_attribute : bool or str, optional
If True, the function will add the fullness fraction as an attribute to the object. Default is False.
If True, the function will add the fullness fraction as an attribute to the object. Default is True.
If provided as string, it should be possible to convert it to a boolean value with the helper get_bool function.
skip : bool, optional
If True, the function will return the object without performing any operations. Default is False.
Expand Down Expand Up @@ -427,7 +427,7 @@ def near_surface_coverage(
alt_bounds=[0, 1000],
alt_dimension_name="alt",
count_threshold=50,
add_near_surface_count_attribute=False,
add_near_surface_count_attribute=True,
skip=False,
):
"""
Expand All @@ -444,7 +444,7 @@ def near_surface_coverage(
count_threshold : int, optional
The minimum count of non-null values required for a variable to be considered as having near surface coverage. Defaults to 50.
add_near_surface_count_attribute : bool, optional
If True, adds the count of non-null values as an attribute for every variable to the object. Defaults to False.
If True, adds the count of non-null values as an attribute for every variable to the object. Defaults to True.
skip : bool, optional
If True, skips the calculation and returns the object as is. Defaults to False.
Expand Down Expand Up @@ -519,7 +519,7 @@ def filter_qc_fail(self, filter_flags=None):
Parameters
----------
filter_flags : str or list, optional
Comma-separated string or list of QC-related attribute names to be checked. Each item can be a specific attribute name or a prefix to include all attributes starting with that prefix. You can also provide 'all_except_<prefix>' to filter all flags except those starting with '<prefix>'. If 'all_except_<prefix>' is provided, it should be the only value in filter_flags. If not provided, all QC attributes will be checked.
Comma-separated string or list of QC-related attribute names to be checked. Each item can be a specific attribute name or a prefix to include all attributes starting with that prefix. You can also provide 'all_except_<prefix>' to filter all flags except those starting with '<prefix>'. If 'all_except_<prefix>' is provided, it should be the only value in filter_flags. If not provided, no sondes will be filtered.
Returns
-------
Expand All @@ -534,7 +534,7 @@ def filter_qc_fail(self, filter_flags=None):
all_qc_attributes = [attr for attr in dir(self.qc) if not attr.startswith("__")]

if filter_flags is None:
filter_flags = all_qc_attributes
filter_flags = []
elif isinstance(filter_flags, str):
filter_flags = filter_flags.split(",")
elif isinstance(filter_flags, list):
Expand Down Expand Up @@ -775,15 +775,19 @@ def get_other_global_attributes(self):
# "instrument_id": "Vaisala RD-41",
"product_id": "Level-2",
# "AVAPS_Software_version": "Version 4.1.2",
"ASPEN_version": self.aspen_ds.AspenVersion
if hasattr(self.aspen_ds, "AspenVersion")
else self.aspen_ds.AvapsEditorVersion,
"ASPEN_version": (
self.aspen_ds.AspenVersion
if hasattr(self.aspen_ds, "AspenVersion")
else self.aspen_ds.AvapsEditorVersion
),
"ASPEN_processing_time": self.aspen_ds.ProcessingTime,
# "JOANNE_version": joanne.__version__,
# "launch_date": str(pd.to_datetime(self.launch_time).date()),
"launch_time_(UTC)": str(self.aspen_ds.launch_time.values)
if hasattr(self.aspen_ds, "launch_time")
else str(self.aspen_ds.base_time.values),
"launch_time_(UTC)": (
str(self.aspen_ds.launch_time.values)
if hasattr(self.aspen_ds, "launch_time")
else str(self.aspen_ds.base_time.values)
),
"is_floater": self.is_floater.__str__(),
"sonde_serial_ID": self.serial_id,
"author": "Geet George",
Expand All @@ -793,6 +797,16 @@ def get_other_global_attributes(self):
"creation_time": str(datetime.datetime.utcnow()) + " UTC",
}

for attr in dir(self):
if attr.startswith("near_surface_count_"):
nc_global_attrs[attr] = getattr(self, attr)
if attr.startswith("profile_fullness_fraction_"):
nc_global_attrs[attr] = getattr(self, attr)

for attr in dir(self.qc):
if not attr.startswith("__"):
nc_global_attrs[f"qc_{attr}"] = int(getattr(self.qc, attr))

object.__setattr__(self, "nc_global_attrs", nc_global_attrs)

return self
Expand Down

0 comments on commit 818ec96

Please sign in to comment.