Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lenient qc as default behaviour #118

Merged
merged 7 commits into from
Aug 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/halodrops/helper/rawreader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Module to read from raw files, mostly to gather metadata from A files
"""

from datetime import datetime
import logging
from typing import List
Expand Down Expand Up @@ -31,6 +32,7 @@ def check_launch_detect_in_afile(a_file: "str") -> bool:
"""

with open(a_file, "r") as f:

module_logger.debug(f"Opened File: {a_file=}")
lines = f.readlines()

Expand Down
88 changes: 44 additions & 44 deletions src/halodrops/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,48 +403,48 @@ def run_pipeline(pipeline: dict, config: configparser.ConfigParser):
"output": "sondes",
"comment": "This steps creates the L2 files after the QC (user says how QC flags are used to go from L1 to L2) and then saves these as L2 NC datasets.",
},
"read_and_process_L2": {
"intake": "sondes",
"apply": iterate_Sonde_method_over_dict_of_Sondes_objects,
"functions": [],
"output": "sondes",
"comment": "This step reads from the saved L2 files and prepares individual sonde datasets before they can be concatenated to create L3.",
},
"concatenate_L2": {
"intake": "sondes",
"apply": sondes_to_gridded,
"output": "gridded",
"comment": "This step concatenates the individual sonde datasets to create the L3 dataset and saves it as an NC file.",
},
"create_L3": {
"intake": "gridded",
"apply": iterate_method_over_dataset,
"functions": [],
"output": "gridded",
"comment": "This step creates the L3 dataset after adding additional products.",
},
"create_patterns": {
"intake": "gridded",
"apply": gridded_to_pattern,
"output": "pattern",
"comment": "This step creates a dataset with the pattern-wide variables by creating the pattern with the flight-phase segmentation file.",
},
"create_L4": {
"intake": "pattern",
"apply": iterate_method_over_dataset,
"functions": [],
"output": "pattern",
"comment": "This step creates the L4 dataset after adding additional products and saves the L4 dataset.",
},
"quicklooks": {
"intake": ["sondes", "gridded", "pattern"],
"apply": [
iterate_Sonde_method_over_dict_of_Sondes_objects,
iterate_method_over_dataset,
iterate_method_over_dataset,
],
"functions": [[], [], []],
"output": "plots",
"comment": "This step creates quicklooks from the L3 & L4 dataset.",
},
# "read_and_process_L2": {
# "intake": "sondes",
# "apply": iterate_Sonde_method_over_dict_of_Sondes_objects,
# "functions": [],
# "output": "sondes",
# "comment": "This step reads from the saved L2 files and prepares individual sonde datasets before they can be concatenated to create L3.",
# },
# "concatenate_L2": {
# "intake": "sondes",
# "apply": sondes_to_gridded,
# "output": "gridded",
# "comment": "This step concatenates the individual sonde datasets to create the L3 dataset and saves it as an NC file.",
# },
# "create_L3": {
# "intake": "gridded",
# "apply": iterate_method_over_dataset,
# "functions": [],
# "output": "gridded",
# "comment": "This step creates the L3 dataset after adding additional products.",
# },
# "create_patterns": {
# "intake": "gridded",
# "apply": gridded_to_pattern,
# "output": "pattern",
# "comment": "This step creates a dataset with the pattern-wide variables by creating the pattern with the flight-phase segmentation file.",
# },
# "create_L4": {
# "intake": "pattern",
# "apply": iterate_method_over_dataset,
# "functions": [],
# "output": "pattern",
# "comment": "This step creates the L4 dataset after adding additional products and saves the L4 dataset.",
# },
# "quicklooks": {
# "intake": ["sondes", "gridded", "pattern"],
# "apply": [
# iterate_Sonde_method_over_dict_of_Sondes_objects,
# iterate_method_over_dataset,
# iterate_method_over_dataset,
# ],
# "functions": [[], [], []],
# "output": "plots",
# "comment": "This step creates quicklooks from the L3 & L4 dataset.",
# },
}
38 changes: 26 additions & 12 deletions src/halodrops/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ def profile_fullness(
time_dimension="time",
timestamp_frequency=4,
fullness_threshold=0.75,
add_fullness_fraction_attribute=False,
add_fullness_fraction_attribute=True,
skip=False,
):
"""
Expand Down Expand Up @@ -371,7 +371,7 @@ def profile_fullness(
The threshold for the fullness fraction. If the calculated fullness fraction is less than this threshold,
the profile is considered not full. Default is 0.8.
add_fullness_fraction_attribute : bool or str, optional
If True, the function will add the fullness fraction as an attribute to the object. Default is False.
If True, the function will add the fullness fraction as an attribute to the object. Default is True.
If provided as string, it should be possible to convert it to a boolean value with the helper get_bool function.
skip : bool, optional
If True, the function will return the object without performing any operations. Default is False.
Expand Down Expand Up @@ -427,7 +427,7 @@ def near_surface_coverage(
alt_bounds=[0, 1000],
alt_dimension_name="alt",
count_threshold=50,
add_near_surface_count_attribute=False,
add_near_surface_count_attribute=True,
skip=False,
):
"""
Expand All @@ -444,7 +444,7 @@ def near_surface_coverage(
count_threshold : int, optional
The minimum count of non-null values required for a variable to be considered as having near surface coverage. Defaults to 50.
add_near_surface_count_attribute : bool, optional
If True, adds the count of non-null values as an attribute for every variable to the object. Defaults to False.
If True, adds the count of non-null values as an attribute for every variable to the object. Defaults to True.
skip : bool, optional
If True, skips the calculation and returns the object as is. Defaults to False.

Expand Down Expand Up @@ -519,7 +519,7 @@ def filter_qc_fail(self, filter_flags=None):
Parameters
----------
filter_flags : str or list, optional
Comma-separated string or list of QC-related attribute names to be checked. Each item can be a specific attribute name or a prefix to include all attributes starting with that prefix. You can also provide 'all_except_<prefix>' to filter all flags except those starting with '<prefix>'. If 'all_except_<prefix>' is provided, it should be the only value in filter_flags. If not provided, all QC attributes will be checked.
Comma-separated string or list of QC-related attribute names to be checked. Each item can be a specific attribute name or a prefix to include all attributes starting with that prefix. You can also provide 'all_except_<prefix>' to filter all flags except those starting with '<prefix>'. If 'all_except_<prefix>' is provided, it should be the only value in filter_flags. If not provided, no sondes will be filtered.

Returns
-------
Expand All @@ -534,7 +534,7 @@ def filter_qc_fail(self, filter_flags=None):
all_qc_attributes = [attr for attr in dir(self.qc) if not attr.startswith("__")]

if filter_flags is None:
filter_flags = all_qc_attributes
filter_flags = []
elif isinstance(filter_flags, str):
filter_flags = filter_flags.split(",")
elif isinstance(filter_flags, list):
Expand Down Expand Up @@ -775,15 +775,19 @@ def get_other_global_attributes(self):
# "instrument_id": "Vaisala RD-41",
"product_id": "Level-2",
# "AVAPS_Software_version": "Version 4.1.2",
"ASPEN_version": self.aspen_ds.AspenVersion
if hasattr(self.aspen_ds, "AspenVersion")
else self.aspen_ds.AvapsEditorVersion,
"ASPEN_version": (
self.aspen_ds.AspenVersion
if hasattr(self.aspen_ds, "AspenVersion")
else self.aspen_ds.AvapsEditorVersion
),
"ASPEN_processing_time": self.aspen_ds.ProcessingTime,
# "JOANNE_version": joanne.__version__,
# "launch_date": str(pd.to_datetime(self.launch_time).date()),
"launch_time_(UTC)": str(self.aspen_ds.launch_time.values)
if hasattr(self.aspen_ds, "launch_time")
else str(self.aspen_ds.base_time.values),
"launch_time_(UTC)": (
str(self.aspen_ds.launch_time.values)
if hasattr(self.aspen_ds, "launch_time")
else str(self.aspen_ds.base_time.values)
),
"is_floater": self.is_floater.__str__(),
"sonde_serial_ID": self.serial_id,
"author": "Geet George",
Expand All @@ -793,6 +797,16 @@ def get_other_global_attributes(self):
"creation_time": str(datetime.datetime.utcnow()) + " UTC",
}

for attr in dir(self):
if attr.startswith("near_surface_count_"):
nc_global_attrs[attr] = getattr(self, attr)
if attr.startswith("profile_fullness_fraction_"):
nc_global_attrs[attr] = getattr(self, attr)

for attr in dir(self.qc):
if not attr.startswith("__"):
nc_global_attrs[f"qc_{attr}"] = int(getattr(self.qc, attr))

object.__setattr__(self, "nc_global_attrs", nc_global_attrs)

return self
Expand Down
Loading