diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 0288349..8c974a3 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -1,4 +1,4 @@ -# This workflow will install Python dependencies using conda, run tests and lint with a variety of Python versions +# This workflow will install Python dependencies using mamba, run tests and check pre-commit name: Python package @@ -14,30 +14,23 @@ jobs: runs-on: ubuntu-latest strategy: fail-fast: false - matrix: - python-version: ["3.10","3.11"] steps: - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 + - name: Set Swap Space + uses: pierotofy/set-swap-space@master with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - $CONDA/bin/conda env update --file environment.yml --name base - - name: Lint with flake8 - run: | - $CONDA/bin/conda install flake8 - # stop the build if there are Python syntax errors or undefined names - $CONDA/bin/flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - $CONDA/bin/flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - name: Test with pytest + swap-size-gb: 10 + - uses: mamba-org/setup-micromamba@v1 + with: + micromamba-version: 'latest' + init-shell: bash + generate-run-shell: true + environment-name: testenv + environment-file: environment.yml + - name: Run pytest in micromamba environment run: | - $CONDA/bin/conda install conda-build - $CONDA/bin/conda develop ./src - $CONDA/bin/conda install pytest - $CONDA/bin/pytest --import-mode=importlib . + micromamba install -n testenv pytest + micromamba run -n testenv pytest --import-mode=importlib . - name: pre-commit uses: pre-commit/action@v3.0.0 diff --git a/docs/source/explanation/for_developers.md b/docs/source/explanation/for_developers.md new file mode 100644 index 0000000..38072f2 --- /dev/null +++ b/docs/source/explanation/for_developers.md @@ -0,0 +1,52 @@ +# For Developers + +## How does `halodrops.__init__` work? + +The idea is to minimize the decision-making on the user's part. The user should be able to run the package with minimal configuration. The package should be able to handle the rest. For this, all functions in the package should ideally have all arguments with default values. The user can override these default values by providing non-default values in a configuration file. + +However, some arguments cannot have default values (e.g. `data_directory` or `flight_id`). These arguments are mandatory and must be provided by the user within a `MANDATORY` section in the configuration file. This means that functions in the package that have the same mandatory arguments must always use the same argument name across the whole package (e.g. `data_directory` should not be called by a different function as `data_dir`). + +The package handles these non-default and mandatory values and executes the functions with the provided arguments. + +This `__init__` script thus retrieves mandatory and non-default values from the configuration file for functions within the package and its subpackages, and executes those functions with the retrieved arguments. + +### Functions +`__init__` defines several functions: + +`get_all_defaults(package)`: This function retrieves the default values of all functions within a package and its subpackages. It returns a dictionary where the keys are the fully qualified names of the functions, and the values are dictionaries containing the default values of the function parameters. + +`nondefault_values_from_config(config, default_dict)`: This function retrieves non-default argument values from a configuration file. It returns a dictionary containing the non-default arguments and their corresponding values based on the config file. + +`get_mandatory_args(function)`: This function retrieves a list of all arguments that do not have a default value for a given function. + +`get_mandatory_values_from_config(config, mandatory_args)`: This function extracts mandatory values from the 'MANDATORY' section of a configuration file. It returns a dictionary where the keys are the argument names and the values are the corresponding values from the config file. + +### Main Execution +The script's main execution begins by parsing command-line arguments to get the path to a configuration file. It then checks if the provided directory and file exist. If they do, it reads the configuration file. + +Next, it retrieves the default values for all functions within the halodrops package using the `get_all_defaults` function. It then retrieves the non-default values from the configuration file using the `nondefault_values_from_config` function. + +The script then defines a list of functions to execute. For each function, it retrieves the non-default arguments from the previously retrieved non-default values. If the function has mandatory arguments, it retrieves their values from the configuration file using the `get_mandatory_values_from_config` function. + +Finally, the script executes each function with the retrieved arguments. + +### Usage +To use this script, you need to provide a configuration file that contains the non-default and mandatory values for the functions you want to execute. The configuration file should should have a `MANDATORY` section and a separate for each function where non-default values are to be provided, where the section name is the fully qualified name of the function (e.g.`api.qc.run`). Each section should contain options for the function arguments, where the option names are the argument names and the option values are the argument values. + +An example config file would look like + +```ini +[MANDATORY] +data_directory = /path/to/data +flight_id = 20220401 + +[api.qc.run] +arg1 = nondefault1 +arg2 = nondefault2 +``` + +You can run the script from the command line simply by running `halodrops` or optionally with the `-c` or `--config_file_path` option followed by the path to your configuration file. For example: + +```bash +halodrops -c /path/to/config/file +``` diff --git a/docs/source/explanation/index.md b/docs/source/explanation/index.md index 81613be..98a4dd8 100644 --- a/docs/source/explanation/index.md +++ b/docs/source/explanation/index.md @@ -6,4 +6,5 @@ performance_of_sondes reconditioning +for_developers ``` diff --git a/environment.yml b/environment.yml index 1ba13a7..c3cc7bf 100644 --- a/environment.yml +++ b/environment.yml @@ -3,7 +3,7 @@ channels: - conda-forge - defaults dependencies: - - python=3.10 + - python>=3.10 - sphinx - ca-certificates - openssl @@ -22,4 +22,3 @@ dependencies: - sphinx-book-theme - sphinx-last-updated-by-git - -e . - - git+https://github.com/Geet-George/gogoesgone.git@main diff --git a/halodrops.cfg b/halodrops.cfg index 25f857a..d934760 100644 --- a/halodrops.cfg +++ b/halodrops.cfg @@ -1,2 +1,7 @@ -[PATHS] -data-directory = "./Data/" +[MANDATORY] +data_directory = /Users/geetgeorge/Documents/Work/Research/Projects/Pre-TUD/AC3/Dropsondes/Data/Data +flight_id = 20220401 + +[api.qc.run2] +arg1 = nondefault1 +arg2 = nondefault2 diff --git a/src/halodrops/__init__.py b/src/halodrops/__init__.py index 763471f..7454dfe 100644 --- a/src/halodrops/__init__.py +++ b/src/halodrops/__init__.py @@ -1,4 +1,5 @@ import logging +from pathlib import Path # create halodrops logger logger = logging.getLogger("halodrops") @@ -27,9 +28,13 @@ logger.addHandler(fh_debug) logger.addHandler(ch) +import configparser +from . import pipeline as pi + def main(): import argparse + import halodrops parser = argparse.ArgumentParser("Arguments") @@ -63,3 +68,5 @@ def main(): config = configparser.ConfigParser() config.read(config_file_path) + + pi.run_pipeline(pi.pipeline, config) diff --git a/src/halodrops/helper/__init__.py b/src/halodrops/helper/__init__.py index e69de29..33a4891 100644 --- a/src/halodrops/helper/__init__.py +++ b/src/halodrops/helper/__init__.py @@ -0,0 +1,153 @@ +import numpy as np + +# Keys in l2_variables should be variable names in aspen_ds attribute of Sonde object +l2_variables = { + "u_wind": { + "rename_to": "u", + "attributes": { + "standard_name": "eastward_wind", + "long_name": "u component of winds", + "units": "m s-1", + "coordinates": "time lon lat alt", + }, + }, + "v_wind": { + "rename_to": "v", + "attributes": { + "standard_name": "northward_wind", + "long_name": "v component of winds", + "units": "m s-1", + "coordinates": "time lon lat alt", + }, + }, + "tdry": { + "rename_to": "ta", + "attributes": { + "standard_name": "air_temperature", + "long_name": "air temperature", + "units": "K", + "coordinates": "time lon lat alt", + }, + }, + "pres": { + "rename_to": "p", + "attributes": { + "standard_name": "air_pressure", + "long_name": "atmospheric pressure", + "units": "Pa", + "coordinates": "time lon lat alt", + }, + }, + "rh": { + "attributes": { + "standard_name": "relative_humidity", + "long_name": "relative humidity", + "units": "", + "coordinates": "time lon lat alt", + } + }, + "lat": { + "attributes": { + "standard_name": "latitude", + "long_name": "latitude", + "units": "degree_north", + "axis": "Y", + } + }, + "lon": { + "attributes": { + "standard_name": "longitude", + "long_name": "longitude", + "units": "degree_east", + "axis": "X", + } + }, + "time": { + "attributes": { + "standard_name": "time", + "long_name": "time of recorded measurement", + "axis": "T", + } + }, + "gpsalt": { + "attributes": { + "standard_name": "altitude", + "long_name": "gps reported altitude above MSL", + "units": "m", + "axis": "Z", + "positive": "up", + } + }, +} + +encoding_variables = { + "time": {"units": "seconds since 1970-01-01", "dtype": "float"}, +} + +variable_compression_properties = dict( + zlib=True, + complevel=4, + fletcher32=True, + _FillValue=np.finfo("float32").max, +) + +l2_flight_attributes_map = { + "True Air Speed (m/s)": "true_air_speed_(ms-1)", + "Ground Speed (m/s)": "ground_speed_(ms-1)", + "Software Notes": "AVAPS_software_notes", + "Format Notes": "AVAPS_format_notes", + "True Heading (deg)": "true_heading_(deg)", + "Ground Track (deg)": "ground_track_(deg)", + "Longitude (deg)": "aircraft_longitude_(deg_E)", + "Latitude (deg)": "aircraft_latitude_(deg_N)", + "MSL Altitude (m)": "aircraft_msl_altitude_(m)", + "Geopotential Altitude (m)": "aircraft_geopotential_altitude_(m)", +} + + +l2_filename_template = ( + "HALO-(AC)3_{platform}_{launch_time}_{flight_id}_{serial_id}_Level_2.nc" +) + + +def get_bool(s): + if isinstance(s, bool): + return s + elif isinstance(s, int): + return bool(s) + elif isinstance(s, str): + lower_s = s.lower() + if lower_s == "true": + return True + elif lower_s == "false": + return False + elif lower_s in ["0", "1"]: + return bool(int(lower_s)) + else: + raise ValueError(f"Cannot convert {s} to boolean") + else: + raise ValueError(f"Cannot convert {s} to boolean") + + +def convert_rh_to_si(value): + """convert RH from % to fraction""" + return value / 100 + + +def convert_pres_to_si(value): + """convert pressure from hPa to Pa""" + return value * 100 + + +def convert_tdry_to_si(value): + """convert temperature from C to K""" + return value + 273.15 + + +def get_si_converter_function_based_on_var(var_name): + """get the function to convert a variable to SI units based on its name""" + func_name = f"convert_{var_name}_to_si" + func = globals().get(func_name, None) + if func is None: + raise ValueError(f"No function named {func_name} found in the module") + return func diff --git a/src/halodrops/helper/paths.py b/src/halodrops/helper/paths.py index 6c6cb87..ee7ccef 100644 --- a/src/halodrops/helper/paths.py +++ b/src/halodrops/helper/paths.py @@ -11,7 +11,39 @@ module_logger = logging.getLogger("halodrops.helper.paths") -class Paths: +class Platform: + """ + Deriving flight paths from the provided platform directory + + The input should align in terms of hierarchy and nomenclature + with the {doc}`Directory Structure ` that `halodrops` expects. + """ + + def __init__( + self, data_directory, platform_id, platform_directory_name=None + ) -> None: + self.platform_id = platform_id + self.platform_directory_name = platform_directory_name + self.data_directory = data_directory + self.flight_ids = self.get_flight_ids() + + def get_flight_ids(self): + """Returns a list of flight IDs for the given platform directory""" + if self.platform_directory_name is None: + platform_dir = os.path.join(self.data_directory, self.platform_id) + else: + platform_dir = os.path.join( + self.data_directory, self.platform_directory_name + ) + + flight_ids = [] + for flight_dir in os.listdir(platform_dir): + if os.path.isdir(os.path.join(platform_dir, flight_dir)): + flight_ids.append(flight_dir) + return flight_ids + + +class Flight: """ Deriving paths from the provided directory @@ -19,36 +51,46 @@ class Paths: with the {doc}`Directory Structure ` that `halodrops` expects. """ - def __init__(self, directory, flightdir): + def __init__( + self, data_directory, flight_id, platform_id, platform_directory_name=None + ): """Creates an instance of Paths object for a given flight Parameters ---------- - `directory` : `str` + `data_directory` : `str` Main data directory - `flightdir` : `str` + `flight_id` : `str` Individual flight directory name + `platform_id` : `str` + Platform name + Attributes ---------- - `flightdir` + `flight_idpath` Path to flight data directory - `flightdirname` + `flight_id` Name of flight data directory `l1dir` Path to Level-1 data directory """ self.logger = logging.getLogger("halodrops.helper.paths.Paths") - self.flightdir = os.path.join(directory, flightdir) - self.flightdirname = flightdir - self.l0dir = os.path.join(directory, flightdir, "Level_0") - self.l1dir = os.path.join(directory, flightdir, "Level_1") + if platform_directory_name is None: + platform_directory_name = platform_id + self.flight_idpath = os.path.join( + data_directory, platform_directory_name, flight_id + ) + self.flight_id = flight_id + self.platform_id = platform_id + self.l1dir = os.path.join(self.flight_idpath, "Level_1") + self.l0dir = os.path.join(self.flight_idpath, "Level_0") self.logger.info( - f"Created Path Instance: {self.flightdir=}; {self.flightdirname=}; {self.l1dir=}" + f"Created Path Instance: {self.flight_idpath=}; {self.flight_id=}; {self.l1dir=}" ) def get_all_afiles(self): @@ -69,7 +111,7 @@ def quicklooks_path(self): `str` Path to quicklooks directory """ - quicklooks_path_str = os.path.join(self.flightdir, "Quicklooks") + quicklooks_path_str = os.path.join(self.flight_idpath, "Quicklooks") if pp(quicklooks_path_str).exists(): self.logger.info(f"Path exists: {quicklooks_path_str=}") else: @@ -80,7 +122,7 @@ def quicklooks_path(self): return quicklooks_path_str def populate_sonde_instances(self) -> Dict: - """Returns a dictionary of `Sonde` class instances for all A-files found in `flightdir` + """Returns a dictionary of `Sonde` class instances for all A-files found in `flight_idpath` and also sets the dictionary as value of `Sondes` attribute """ afiles = self.get_all_afiles() @@ -94,9 +136,12 @@ def populate_sonde_instances(self) -> Dict: Sondes[sonde_id] = Sonde(sonde_id, launch_time=launch_time) Sondes[sonde_id].add_launch_detect(launch_detect) + Sondes[sonde_id].add_flight_id(self.flight_id) + Sondes[sonde_id].add_platform_id(self.platform_id) Sondes[sonde_id].add_afile(a_file) - Sondes[sonde_id].add_postaspenfile() - Sondes[sonde_id].add_aspen_ds() + if launch_detect: + Sondes[sonde_id].add_postaspenfile() + Sondes[sonde_id].add_aspen_ds() object.__setattr__(self, "Sondes", Sondes) diff --git a/src/halodrops/helper/rd41.CONFIG b/src/halodrops/helper/rd41.CONFIG deleted file mode 100644 index af41ec4..0000000 --- a/src/halodrops/helper/rd41.CONFIG +++ /dev/null @@ -1,10 +0,0 @@ -[sampling - frequencies] -pres = 2 -tdry = 2 -dp = 2 -rh = 2 -u_wind = 4 -v_wind = 4 -wspd = 4 -wdir = 4 -mr = 2 diff --git a/src/halodrops/pipeline.py b/src/halodrops/pipeline.py new file mode 100644 index 0000000..5800ec4 --- /dev/null +++ b/src/halodrops/pipeline.py @@ -0,0 +1,445 @@ +from .helper.paths import Platform, Flight +from .sonde import Sonde +import configparser +import inspect +import os +import xarray as xr + + +def get_mandatory_args(function): + """ + Get a list of all arguments that do not have a default value for each function in the list. + + Parameters + ---------- + list_of_functions : list + A list of functions to inspect. + + Returns + ------- + list + A list of argument names that do not have a default value. + + Examples + -------- + >>> def func1(a, b=2): + ... pass + >>> def func2(c, d=4, e=5): + ... pass + >>> mandatory_args([func1, func2]) + ['a', 'c'] + """ + mandatory_args = [] + sig = inspect.signature(function) + for name, param in sig.parameters.items(): + if param.default == inspect.Parameter.empty and name != "self": + mandatory_args.append(name) + return mandatory_args + + +def get_mandatory_values_from_config(config, mandatory_args): + """ + Extracts mandatory values from the 'MANDATORY' section of a configuration file. + + Parameters + ---------- + config : ConfigParser + The configuration file parser. + mandatory_args : list + A list of argument names that are expected to be in the 'MANDATORY' section of the config file. + + Returns + ------- + dict + A dictionary where the keys are the argument names and the values are the corresponding values from the config file. + + Raises + ------ + ValueError + If the 'MANDATORY' section is not found in the config file or if a mandatory argument is not found in the 'MANDATORY' section. + + Examples + -------- + >>> import configparser + >>> config = configparser.ConfigParser() + >>> config.read_string('[MANDATORY]\\narg1=value1\\narg2=value2') + >>> mandatory_values_from_config(config, ['arg1', 'arg2']) + {'arg1': 'value1', 'arg2': 'value2'} + """ + if not config.has_section("MANDATORY"): + raise ValueError(f"MANDATORY section not found in config file") + else: + mandatory_dict = {} + for arg in mandatory_args: + if config.has_option("MANDATORY", arg): + mandatory_dict[arg] = config.get("MANDATORY", arg) + else: + raise ValueError(f"Mandatory argument {arg} not found in config file") + return mandatory_dict + + +def get_nondefaults_from_config( + config: configparser.ConfigParser, obj: callable +) -> dict: + """ + Get the non-default arguments for a given function from a ConfigParser object. + + Parameters + ---------- + config : configparser.ConfigParser + A ConfigParser object containing configuration settings. + obj : callable + The function for which to get the non-default arguments. + + Returns + ------- + dict + A dictionary of non-default arguments for the function. + """ + + section_name = f"{obj.__module__}.{obj.__qualname__}".split("halodrops.")[1] + + if section_name in config.sections(): + nondefault_args = config[section_name] + else: + nondefault_args = {} + return nondefault_args + + +def get_args_for_function(config, function): + """ + Get the arguments for a given function. + + This function first checks if the qualified name of the function exists in the nondefaults dictionary. + If it does, it uses the corresponding value as the arguments for the function. + Otherwise, it initializes an empty dictionary as the arguments. + + Then, it gets the list of mandatory arguments for the function. + If there are any mandatory arguments, it gets their values from the config and updates the arguments dictionary with them. + + Parameters + ---------- + function : function + The function for which to get the arguments. + nondefaults : dict + A dictionary mapping function qualified names to dictionaries of arguments. + + Returns + ------- + dict + A dictionary of arguments for the function. + """ + args = get_nondefaults_from_config(config, function) + mandatory = get_mandatory_args(function) + if mandatory: + mandatory_args = get_mandatory_values_from_config(config, mandatory) + args.update(mandatory_args) + + return args + + +def get_platforms(config): + """ + Get platforms based on the directory names in `data_directory` or the user-provided `platforms` values. + + Parameters + ---------- + config : ConfigParser instance + The configuration file parser. + + Returns + ------- + dict + A dictionary where keys are platform names and values are `Platform` objects. + + Raises + ------ + ValueError + If `platforms` is specified in the config file but `platform_directory_names` is not, or + if a value in `platform_directory_names` does not correspond to a directory in `data_directory`. + + """ + data_directory = config.get("MANDATORY", "data_directory") + if config.has_option("MANDATORY", "platforms"): + if not config.has_option("MANDATORY", "platform_directory_names"): + raise ValueError( + "platform_directory_names must be provided in the config file when platforms is specified" + ) + platforms = config.get("MANDATORY", "platforms").split(",") + platform_directory_names = config.get( + "MANDATORY", "platform_directory_names" + ).split(",") + platforms = dict(zip(platforms, platform_directory_names)) + for directory_name in platform_directory_names: + if not os.path.isdir(os.path.join(data_directory, directory_name)): + raise ValueError( + f"No directory found for {directory_name} in data_directory" + ) + platform_objects = {} + for platform, platform_directory_name in platforms.items(): + platform_objects[platform] = Platform( + data_directory=data_directory, + platform_id=platform, + platform_directory_name=platform_directory_name, + ) + else: + platforms = [ + name + for name in os.listdir(data_directory) + if os.path.isdir(os.path.join(data_directory, name)) + ] + platform_objects = {} + for platform in platforms: + platform_objects[platform] = Platform( + data_directory=data_directory, platform_id=platform + ) + return platform_objects + + +def create_and_populate_flight_object(config: configparser.ConfigParser) -> Flight: + """ + Creates a Flight object and populates it with A-files. + + Parameters + ---------- + config : configparser.ConfigParser + A ConfigParser object containing configuration settings. + + Returns + ------- + Flight + A Flight object. + """ + platform_objects = get_platforms(config) + output = {} + output["sondes"] = {} + for platform in platform_objects: + for flight_id in platform_objects[platform].flight_ids: + flight = Flight( + platform_objects[platform].data_directory, + flight_id, + platform, + platform_objects[platform].platform_directory_name, + ) + output["sondes"].update(flight.populate_sonde_instances()) + return output["sondes"] + + +def iterate_Sonde_method_over_dict_of_Sondes_objects( + obj: dict, functions: list, config: configparser.ConfigParser +) -> dict: + """ + Iterates over a dictionary of Sonde objects and applies a list of methods to each Sonde. + + For each Sonde object in the dictionary, this function + applies each method listed in the 'functions' key of the substep dictionary. + If the method returns a value, it stores the value in a new dictionary. + If the method returns None, it does not store the value in the new dictionary. + + The arguments for each method are determined by the `get_args_for_function` function, + which uses the nondefaults dictionary and the config object. + + Parameters + ---------- + obj : dict + A dictionary of Sonde objects. + functions : list + a list of method names. + nondefaults : dict + A dictionary mapping function qualified names to dictionaries of arguments. + config : configparser.ConfigParser + A ConfigParser object containing configuration settings. + + Returns + ------- + dict + A dictionary of Sonde objects with the results of the methods applied to them (keys where results are None are not included). + """ + my_dict = obj + + for function_name in functions: + new_dict = {} + for key, value in my_dict.items(): + function = getattr(Sonde, function_name) + result = function(value, **get_args_for_function(config, function)) + if result is not None: + new_dict[key] = result + my_dict = new_dict + + return my_dict + + +def sondes_to_gridded(sondes: dict) -> xr.Dataset: + pass + + +def iterate_method_over_dataset(dataset: xr.Dataset, functions: list) -> xr.Dataset: + pass + + +def gridded_to_pattern( + gridded: xr.Dataset, config: configparser.ConfigParser +) -> xr.Dataset: + """ + The flight-phase segmentation file must be provided via the config file. + """ + pass + + +def run_substep( + previous_substep_output, substep: dict, config: configparser.ConfigParser +): + """ + This function applies a specified function to the input data and stores the output for use in subsequent steps. + + Parameters + ---------- + previous_substep_output : dict + A dictionary storing the output data from previous steps. The input data for this step is retrieved from this dictionary using the key specified in substep['intake'], and the output of this step is stored in this dictionary under the key(s) specified in substep['output']. + substep : dict + A dictionary containing information about the current processing step. It should have the following keys: + - 'apply': a function to apply to the input data. + - 'intake': the key in the previous_substep_output dictionary that corresponds to the input data for this step. + - 'output': the key(s) under which to store the output of this step in the previous_substep_output dictionary. If this is a list, the function should return a list of outputs of the same length. + - 'functions' (optional): a list of functions to apply to the input data. If this key is present, the 'apply' function should take this list as an additional argument. + config : object + A configuration object used by the function. + + Returns + ------- + dict + The updated dictionary with the output data from the current step. The output data is stored under the key(s) specified in substep['output']. + + Notes + ----- + This function assumes that the 'apply' function returns a list of outputs if substep['output'] is a list, and a single output otherwise. If substep['output'] is a list but the 'apply' function does not return a list of outputs, or if the lengths of the two lists do not match, this function will raise an exception. + """ + function = substep["apply"] + intake = substep["intake"] + if "functions" not in substep: + if intake is None: + previous_substep_output = {} + output = function(config) + else: + output = function(previous_substep_output[intake], config) + else: + output = function(previous_substep_output[intake], substep["functions"], config) + + if isinstance(substep["output"], list): + for i, key in enumerate(substep["output"]): + previous_substep_output[key] = output[i] + else: + previous_substep_output[substep["output"]] = output + + return previous_substep_output + + +def run_pipeline(pipeline: dict, config: configparser.ConfigParser): + """ + Executes a pipeline of processing steps. + + Parameters: + ---------- + pipeline : dict + A dictionary representing the pipeline + where each key is a substep and the value is a dictionary with the configurations of that substep. + config : configparser.ConfigParser + Configuration settings for the package. + + Returns: + ---------- + dict: + The output of the last substep in the pipeline. + """ + previous_substep_output = None + for step in pipeline: + print(f"Running {step}...") + substep = pipeline[step] + if previous_substep_output is None: + previous_substep_output = run_substep(None, substep, config) + else: + previous_substep_output = run_substep( + previous_substep_output, substep, config + ) + return previous_substep_output + + +pipeline = { + "create_flight": { + "intake": None, + "apply": create_and_populate_flight_object, + "output": "sondes", + }, + "qc": { + "intake": "sondes", + "apply": iterate_Sonde_method_over_dict_of_Sondes_objects, + "functions": [ + "filter_no_launch_detect", + "profile_fullness", + "near_surface_coverage", + "filter_qc_fail", + ], + "output": "sondes", + }, + "create_L2": { + "intake": "sondes", + "apply": iterate_Sonde_method_over_dict_of_Sondes_objects, + "functions": [ + "convert_to_si", + "get_l2_variables", + "add_compression_and_encoding_properties", + "get_flight_attributes", + "get_other_global_attributes", + "add_global_attributes_to_interim_l2_ds", + "add_sonde_id_variable", + "get_l2_filename", + "write_l2", + ], + "output": "sondes", + "comment": "This steps creates the L2 files after the QC (user says how QC flags are used to go from L1 to L2) and then saves these as L2 NC datasets.", + }, + "read_and_process_L2": { + "intake": "sondes", + "apply": iterate_Sonde_method_over_dict_of_Sondes_objects, + "functions": [], + "output": "sondes", + "comment": "This step reads from the saved L2 files and prepares individual sonde datasets before they can be concatenated to create L3.", + }, + "concatenate_L2": { + "intake": "sondes", + "apply": sondes_to_gridded, + "output": "gridded", + "comment": "This step concatenates the individual sonde datasets to create the L3 dataset and saves it as an NC file.", + }, + "create_L3": { + "intake": "gridded", + "apply": iterate_method_over_dataset, + "functions": [], + "output": "gridded", + "comment": "This step creates the L3 dataset after adding additional products.", + }, + "create_patterns": { + "intake": "gridded", + "apply": gridded_to_pattern, + "output": "pattern", + "comment": "This step creates a dataset with the pattern-wide variables by creating the pattern with the flight-phase segmentation file.", + }, + "create_L4": { + "intake": "pattern", + "apply": iterate_method_over_dataset, + "functions": [], + "output": "pattern", + "comment": "This step creates the L4 dataset after adding additional products and saves the L4 dataset.", + }, + "quicklooks": { + "intake": ["sondes", "gridded", "pattern"], + "apply": [ + iterate_Sonde_method_over_dict_of_Sondes_objects, + iterate_method_over_dataset, + iterate_method_over_dataset, + ], + "functions": [[], [], []], + "output": "plots", + "comment": "This step creates quicklooks from the L3 & L4 dataset.", + }, +} diff --git a/src/halodrops/qc/__init__.py b/src/halodrops/qc/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/halodrops/qc/profile.py b/src/halodrops/qc/profile.py deleted file mode 100644 index 6b07068..0000000 --- a/src/halodrops/qc/profile.py +++ /dev/null @@ -1,90 +0,0 @@ -import logging -import numpy as np - -# create logger -module_logger = logging.getLogger("halodrops.qc.fullness") - - -def fullness(dataset, variable, time_dimension="time"): - """Return the profile-coverage for variable - - Parameters - ---------- - dataset : xarray.Dataset - Dataset containing variable along time_dimension - variable : str - Variable in xr.Dataset whose profile-coverage is to be estimated - time_dimension : str, optional - Name of independent dimension of profile, by default "time" - - Returns - ------- - float - Fraction of non-nan variable values along time_dimension - """ - return np.sum(~np.isnan(dataset[variable].values)) / len(dataset[time_dimension]) - - -def weighted_fullness(dataset, variable, sampling_frequency, time_dimension="time"): - """Return profile-coverage for variable weighed for sampling frequency - - The assumption is that the time_dimension has coordinates spaced over 0.25 seconds, - which is true for ASPEN-processed QC and PQC files at least for RD41. - - Parameters - ---------- - dataset : xarray.Dataset - Dataset containing variable along time_dimension - variable : str - Variable in xr.Dataset whose weighted profile-coverage is to be estimated - sampling_frequency : numeric - Sampling frequency of `variable` in hertz - time_dimension : str, optional - Name of independent dimension of profile, by default "time" - - Returns - ------- - float - Fraction of non-nan variable values along time_dimension weighed for sampling frequency - """ - # 4 is the number of timestamps every second, read assumption in description - weighed_time_size = len(dataset[time_dimension]) / (4 / sampling_frequency) - return np.sum(~np.isnan(dataset[variable].values)) / weighed_time_size - - -def weighted_fullness_for_config_vars(dataset, config_file_path, add_to_dataset=True): - """Return weighted fullness for all variables in a provided config file - - Parameters - ---------- - dataset : xarray.Dataset - Dataset containing variable along time_dimension - config_file : str - Path to config file - add_to_dataset : bool, optional - Should values be added to the provided dataset? by default True - - Returns - ------- - xr.Dataset or dictionary - if True, returns weighted fullness as variables in provided dataset, else returns them as a dictionary, - """ - # Reading the CONFIG file - import configparser - - config = configparser.ConfigParser() - config.read(config_file_path) - - vars = [ - (var, int(config["sampling - frequencies"][var])) - for var in config["sampling - frequencies"].keys() - ] - - dict = {} - for var in vars: - dict[f"{var[0]}_weighted_fullness"] = weighted_fullness(dataset, var[0], var[1]) - - if add_to_dataset: - return dataset.assign(dict) - else: - return dict diff --git a/src/halodrops/sonde.py b/src/halodrops/sonde.py index bf4bc70..5835ae0 100644 --- a/src/halodrops/sonde.py +++ b/src/halodrops/sonde.py @@ -1,10 +1,15 @@ -from dataclasses import dataclass, field, InitVar, KW_ONLY +import ast +from dataclasses import dataclass, field, KW_ONLY +import datetime from typing import Any, Optional, List import os import numpy as np import xarray as xr +from halodrops.helper import rawreader as rr +import halodrops.helper as hh + _no_default = object() @@ -30,10 +35,35 @@ class Sonde: launch_time: Optional[Any] = None def __post_init__(self): - """The `sort_index` attribute is only applicable when `launch_time` is available.""" + """ + Initializes the 'qc' attribute as an empty object and sets the 'sort_index' attribute based on 'launch_time'. + + The 'sort_index' attribute is only applicable when 'launch_time' is available. If 'launch_time' is None, 'sort_index' will not be set. + """ + object.__setattr__(self, "qc", type("", (), {})()) if self.launch_time is not None: object.__setattr__(self, "sort_index", self.launch_time) + def add_flight_id(self, flight_id: str) -> None: + """Sets attribute of flight ID + + Parameters + ---------- + flight_id : str + The flight ID of the flight during which the sonde was launched + """ + object.__setattr__(self, "flight_id", flight_id) + + def add_platform_id(self, platform_id: str) -> None: + """Sets attribute of platform ID + + Parameters + ---------- + platform_id : str + The platform ID of the flight during which the sonde was launched + """ + object.__setattr__(self, "platform_id", platform_id) + def add_spatial_coordinates_at_launch(self, launch_coordinates: List) -> None: """Sets attributes of spatial coordinates at launch @@ -74,6 +104,7 @@ def add_afile(self, path_to_afile: str) -> None: Path to the sonde's A-file """ object.__setattr__(self, "afile", path_to_afile) + return self def add_postaspenfile(self, path_to_postaspenfile: str = None) -> None: """Sets attribute with path to post-ASPEN file of the sonde @@ -81,15 +112,27 @@ def add_postaspenfile(self, path_to_postaspenfile: str = None) -> None: If the A-file path is known for the sonde, i.e. if the attribute `path_to_afile` exists, then the function will attempt to look for a post-ASPEN file of the same date-time as in the A-file's name. Sometimes, the post-ASPEN file might not exist (e.g. because launch was not detected), and in - such cases, an exception will be printed. + such cases, an exception will be raised. If the A-file path is not known for the sonde, the function will expect the argument `path_to_postaspenfile` to be not empty. Parameters ---------- - path_to_postaspenfile : str - Path to the sonde's post-ASPEN file + path_to_postaspenfile : str, optional + The path to the post-ASPEN file. If not provided, the function will attempt to construct the path from the `afile` attribute. + + Raises + ------ + ValueError + If the `afile` attribute does not exist when `path_to_postaspenfile` is not provided. + If the post-ASPEN file does not exist at the constructed or provided path, and launch was detected in the A-file. + If the launch was not detected in the A-file. + + Attributes Set + -------------- + postaspenfile : str + The path to the post-ASPEN file. This attribute is set if the file exists at the constructed or provided path. """ if path_to_postaspenfile is None: @@ -102,19 +145,25 @@ def add_postaspenfile(self, path_to_postaspenfile: str = None) -> None: if os.path.exists(path_to_postaspenfile): object.__setattr__(self, "postaspenfile", path_to_postaspenfile) else: - print( - f"The post-ASPEN file for {self.serial_id} with filename {postaspenfile} does not exist. Therefore, I am not setting the `postaspenfile` attribute." - ) + if rr.check_launch_detect_in_afile(self.afile): + raise ValueError( + f"The post-ASPEN file for {self.serial_id} with filename {postaspenfile} does not exist. Therefore, I am not setting the `postaspenfile` attribute. I checked and found that launch was detected for {self.serial_id}." + ) + else: + raise ValueError( + f"Launch not detected for {self.serial_id}. Therefore, {postaspenfile} does not exist and I am not setting the `postaspenfile` attribute." + ) else: - print("The attribute `path_to_afile` doesn't exist.") + raise ValueError("The attribute `path_to_afile` doesn't exist.") else: if os.path.exists(path_to_postaspenfile): object.__setattr__(self, "postaspenfile", path_to_postaspenfile) else: - print( + raise ValueError( f"The post-ASPEN file for your provided {path_to_postaspenfile=} does not exist. Therefore, I am not setting the `postaspenfile` attribute." ) + return self def add_aspen_ds(self) -> None: """Sets attribute with an xarray Dataset read from post-ASPEN file @@ -127,37 +176,599 @@ def add_aspen_ds(self) -> None: if hasattr(self, "postaspenfile"): ds = xr.open_dataset(self.postaspenfile) - if ds.attrs["SondeId"] == self.serial_id: + if "SondeId" not in ds.attrs: + if ds.attrs["SoundingDescription"].split(" ")[1] == self.serial_id: + object.__setattr__(self, "aspen_ds", ds) + else: + raise ValueError( + f"I didn't find the `SondeId` attribute, so checked the `SoundingDescription` attribute. I found the ID in the `SoundingDescription` global attribute ({ds.attrs['SoundingDescription'].split(' ')[1]}) to not match with this instance's `serial_id` attribute ({self.serial_id}). Therefore, I am not storing the xarray dataset as an attribute." + ) + elif ds.attrs["SondeId"] == self.serial_id: object.__setattr__(self, "aspen_ds", ds) else: + raise ValueError( + f"I found the `SondeId` global attribute ({ds.attrs['SondeId']}) to not match with this instance's `serial_id` attribute ({self.serial_id}). Therefore, I am not storing the xarray dataset as an attribute." + ) + else: + raise ValueError( + f"I didn't find the `postaspenfile` attribute for Sonde {self.serial_id}, therefore I can't store the xarray dataset as an attribute" + ) + return self + + def filter_no_launch_detect(self) -> None: + """ + Filter out sondes that did not detect a launch + + The function will check if the `launch_detect` attribute exists and if it is False. + If the attribute doesn't exist, the function will raise an error. + If the attribute exists and is False, the function will print a no-launch detected message. + If the attribute exists and is True, the function will return the object. + + This function serves as a checkpoint for filtering out sondes + that did not detect a launch before running functions + that will require `aspen_ds`, e.g. the QC functions. + + Parameters + ---------- + None + + Returns + ------- + self : Sonde object + The Sonde object itself, if the launch was detected, else None + + Raises + ------ + ValueError + If the `launch_detect` attribute does not exist. + """ + if hasattr(self, "launch_detect"): + if self.launch_detect == False: print( - "I found the `SondeId` global attribute to not match with this instance's `serial_id` attribute. I am not storing the xarray dataset as an attribute." + f"No launch detected for Sonde {self.serial_id}. I am not running QC checks for this Sonde." ) + else: + return self else: - print( - "I didn't find the `postaspenfile` attribute, therefore I am not storing the xarray dataset as an attribute" + raise ValueError( + f"The attribute `launch_detect` does not exist for Sonde {self.serial_id}." ) + def profile_fullness( + self, + variable_dict={"u_wind": 4, "v_wind": 4, "rh": 2, "tdry": 2, "pres": 2}, + time_dimension="time", + timestamp_frequency=4, + fullness_threshold=0.8, + add_fullness_fraction_attribute=False, + skip=False, + ): + """ + Calculates the profile coverage for a given set of variables, considering their sampling frequency. -@dataclass(frozen=True) -class SondeData(Sonde): - """Class containing data of a sonde + This function assumes that the time_dimension coordinates are spaced over 0.25 seconds, + implying a timestamp_frequency of 4 hertz. This is applicable for ASPEN-processed QC and PQC files, + specifically for RD41. - Parameters - ---------- - Sonde : class - parent class + For each variable in the variable_dict, the function calculates the fullness fraction. If the fullness + fraction is less than the fullness_threshold, it sets an attribute in `self.qc` named + "profile_fullness_{variable}" to False. Otherwise, it sets this attribute to True. - Raises - ------ - TypeError - If data is not provided while initializing the instance, a TypeError will be raised - """ + If add_fullness_fraction_attribute is True, the function also sets an attribute in `self` named + "profile_fullness_fraction_{variable}" to the calculated fullness fraction. - data: Any = _no_default + Parameters + ---------- + variable_dict : dict, optional + Dictionary containing the variables in `self.aspen_ds` and their respective sampling frequencies. + The function will estimate the weighted profile-coverage for these variables. + Default is {'u_wind':4,'v_wind':4,'rh':2,'tdry':2,'pres':2}. + time_dimension : str, optional + The independent dimension of the profile. Default is "time". + timestamp_frequency : numeric, optional + The sampling frequency of `time_dimension` in hertz. Default is 4. + fullness_threshold : float or str, optional + The threshold for the fullness fraction. If the calculated fullness fraction is less than this threshold, + the profile is considered not full. Default is 0.8. + add_fullness_fraction_attribute : bool or str, optional + If True, the function will add the fullness fraction as an attribute to the object. Default is False. + If provided as string, it should be possible to convert it to a boolean value with the helper get_bool function. + skip : bool, optional + If True, the function will return the object without performing any operations. Default is False. + + Returns + ------- + self + The object itself, possibly with new attributes added based on the function parameters. + """ + if hh.get_bool(skip): + return self + else: + if isinstance(fullness_threshold, str): + fullness_threshold = float(fullness_threshold) - def __post_init__(self): - if self.data is _no_default: - raise TypeError( - "No data provided! __init__ missing 1 required argument: 'data'" + for variable, sampling_frequency in variable_dict.items(): + dataset = self.aspen_ds[variable] + weighed_time_size = len(dataset[time_dimension]) / ( + timestamp_frequency / sampling_frequency + ) + fullness_fraction = ( + np.sum(~np.isnan(dataset.values)) / weighed_time_size + ) + if fullness_fraction < fullness_threshold: + object.__setattr__( + self.qc, + f"profile_fullness_{variable}", + False, + ) + else: + object.__setattr__( + self.qc, + f"profile_fullness_{variable}", + True, + ) + if hh.get_bool(add_fullness_fraction_attribute): + object.__setattr__( + self, + f"profile_fullness_fraction_{variable}", + fullness_fraction, + ) + return self + + def near_surface_coverage( + self, + variables=["u_wind", "v_wind", "rh", "tdry", "pres"], + alt_bounds=[0, 1000], + alt_dimension_name="alt", + count_threshold=50, + add_near_surface_count_attribute=False, + skip=False, + ): + """ + Calculates the fraction of non-null values in specified variables near the surface. + + Parameters + ---------- + variables : list, optional + The variables to consider for the calculation. Defaults to ["u_wind","v_wind","rh","tdry","pres"]. + alt_bounds : list, optional + The lower and upper bounds of altitude in meters to consider for the calculation. Defaults to [0,1000]. + alt_dimension_name : str, optional + The name of the altitude dimension. Defaults to "alt". + count_threshold : int, optional + The minimum count of non-null values required for a variable to be considered as having near surface coverage. Defaults to 50. + add_near_surface_count_attribute : bool, optional + If True, adds the count of non-null values as an attribute for every variable to the object. Defaults to False. + skip : bool, optional + If True, skips the calculation and returns the object as is. Defaults to False. + + Returns + ------- + self + The object with updated attributes. + + Raises + ------ + ValueError + If the attribute `aspen_ds` does not exist. The `add_aspen_ds` method should be run first. + """ + if hh.get_bool(skip): + return self + else: + if not hasattr(self, "aspen_ds"): + raise ValueError( + "The attribute `aspen_ds` does not exist. Please run `add_aspen_ds` method first." + ) + + if isinstance(alt_bounds, str): + alt_bounds = alt_bounds.split(",") + alt_bounds = [float(alt_bound) for alt_bound in alt_bounds] + if isinstance(count_threshold, str): + count_threshold = int(count_threshold) + if isinstance(variables, str): + variables = variables.split(",") + + for variable in variables: + dataset = self.aspen_ds[[variable, alt_dimension_name]] + near_surface = dataset.where( + (dataset[alt_dimension_name] > alt_bounds[0]) + & (dataset[alt_dimension_name] < alt_bounds[1]), + drop=True, + ) + + near_surface_count = np.sum(~np.isnan(near_surface[variable].values)) + if near_surface_count < count_threshold: + object.__setattr__( + self.qc, + f"near_surface_coverage_{variable}", + False, + ) + else: + object.__setattr__( + self.qc, + f"near_surface_coverage_{variable}", + True, + ) + if hh.get_bool(add_near_surface_count_attribute): + object.__setattr__( + self, + f"near_surface_count_{variable}", + near_surface_count, + ) + return self + + def filter_qc_fail(self, filter_flags=None): + """ + Filters the sonde based on a list of QC flags. If any of the flags are False, the sonde will be filtered out from creating L2. + If the sonde passes all the QC checks, the attributes listed in filter_flags will be removed from the sonde object. + + Parameters + ---------- + filter_flags : str or list, optional + Comma-separated string or list of QC-related attribute names to be checked. Each item can be a specific attribute name or a prefix to include all attributes starting with that prefix. You can also provide 'all_except_' to filter all flags except those starting with ''. If 'all_except_' is provided, it should be the only value in filter_flags. If not provided, all QC attributes will be checked. + + Returns + ------- + self : object + The sonde object itself, with the attributes listed in filter_flags removed if it passes all the QC checks. + + Raises + ------ + ValueError + If a flag in filter_flags does not exist as an attribute of the sonde object, or if 'all_except_' is provided in filter_flags along with other values. Please ensure that the flag names provided in 'filter_flags' correspond to existing QC attributes. If you're using a prefix to filter attributes, make sure the prefix is correct. Check your skipped QC functions or your provided list of filter flags. + """ + all_qc_attributes = [attr for attr in dir(self.qc) if not attr.startswith("__")] + + if filter_flags is None: + filter_flags = all_qc_attributes + elif isinstance(filter_flags, str): + filter_flags = filter_flags.split(",") + elif isinstance(filter_flags, list): + pass + else: + raise ValueError( + "Invalid type for filter_flags. It must be one of the following:\n" + "- None: If you want to filter against all QC attributes.\n" + "- A string: If you want to provide a comma-separated list of individual flag values or prefixes of flag values.\n" + "- A list: If you want to provide individual flag values or prefixes of flag values." + ) + + if ( + any(flag.startswith("all_except_") for flag in filter_flags) + and len(filter_flags) > 1 + ): + raise ValueError( + "If 'all_except_' is provided in filter_flags, it should be the only value." + ) + + new_filter_flags = [] + for flag in filter_flags: + if flag.startswith("all_except_"): + prefix = flag.replace("all_except_", "") + new_filter_flags.extend( + [attr for attr in all_qc_attributes if not attr.startswith(prefix)] + ) + else: + new_filter_flags.extend( + [attr for attr in all_qc_attributes if attr.startswith(flag)] + ) + + filter_flags = new_filter_flags + + for flag in filter_flags: + if not hasattr(self.qc, flag): + raise ValueError( + f"The attribute '{flag}' does not exist in the QC attributes of the sonde object. " + "Please ensure that the flag names provided in 'filter_flags' correspond to existing QC attributes. " + "If you're using a prefix to filter attributes, make sure the prefix is correct. " + "Check your skipped QC functions or your provided list of filter flags." + ) + if not bool(getattr(self.qc, flag)): + print( + f"{flag} returned False. Therefore, filtering this sonde ({self.serial_id}) out from L2" + ) + return None + + # If the sonde passes all the QC checks, remove all attributes listed in filter_flags + for flag in filter_flags: + delattr(self.qc, flag) + + return self + + def convert_to_si(self, variables=["rh", "pres", "tdry"], skip=False): + """ + Converts variables to SI units. + + Parameters + ---------- + variables : list or str, optional + The variables to convert to SI units. If a string is provided, it should be a comma-separated list of variables. + The default variables are 'rh', 'pres', and 'tdry'. + + skip : bool, optional + If set to True, the function will skip the conversion process but will still ensure that the '_interim_l2_ds' attribute is set. + If '_interim_l2_ds' is not already an attribute of the object, it will be set to 'aspen_ds'. + Default is False. + + Returns + ------- + self : object + Returns the sonde object with the specified variables in aspen_ds converted to SI units. + If 'skip' is set to True, it returns the sonde object with '_interim_l2_ds' set to 'aspen_ds' if it wasn't already present. + """ + if hh.get_bool(skip): + if hasattr(self, "_interim_l2_ds"): + return self + else: + object.__setattr__(self, "_interim_l2_ds", self.aspen_ds) + return self + else: + if isinstance(variables, str): + variables = variables.split(",") + + if hasattr(self, "_interim_l2_ds"): + ds = self._interim_l2_ds + else: + ds = self.aspen_ds + + for variable in variables: + func = hh.get_si_converter_function_based_on_var(variable) + ds = ds.assign({f"{variable}": func(self.aspen_ds[variable])}) + + object.__setattr__(self, "_interim_l2_ds", ds) + + return self + + def get_l2_variables(self, l2_variables: dict = hh.l2_variables): + """ + Gets the variables from aspen_ds to create L2. + + Parameters + ---------- + l2_variables : dict or str, optional + A dictionary where the keys are the variables in aspen_ds to keep for L2. + If dictionary items contain a 'rename_to' key, the variable will be renamed to the value of 'rename_to'. + If dictionary items contain a 'attributes' key, the variable will be assigned the attributes in the value of 'attributes'. + The default is the l2_variables dictionary from the helper module. + + Returns + ------- + self : object + Returns the sonde object with only the specified variables (renamed if dictionary has 'rename_to' key and attributes added if dictionary has 'attributes' key) in _interim_l2_ds attribute. + If '_interim_l2_ds' is not already an attribute of the object, it will first be set to 'aspen_ds' before reducing to the variables and renaming. + """ + if isinstance(l2_variables, str): + l2_variables = ast.literal_eval(l2_variables) + + l2_variables_list = list(l2_variables.keys()) + + if hasattr(self, "_interim_l2_ds"): + ds = self._interim_l2_ds + else: + ds = self.aspen_ds + + ds = ds[l2_variables_list] + + for variable, variable_dict in l2_variables.items(): + if "attributes" in variable_dict: + ds[variable].attrs = variable_dict["attributes"] + if "rename_to" in variable_dict: + ds = ds.rename({variable: variable_dict["rename_to"]}) + + object.__setattr__(self, "_interim_l2_ds", ds) + + return self + + def add_sonde_id_variable(self, variable_name="sonde_id"): + """ + Adds a variable and related attributes to the sonde object with the Sonde object (self)'s serial_id attribute. + + Parameters + ---------- + variable_name : str, optional + The name of the variable to be added. Default is 'sonde_id'. + + Returns + ------- + self : object + Returns the sonde object with a variable containing serial_id. Name of the variable provided by 'variable_name'. + """ + if hasattr(self, "_interim_l2_ds"): + ds = self._interim_l2_ds + else: + ds = self.aspen_ds + + ds = ds.assign({variable_name: self.serial_id}) + ds[variable_name].attrs = { + "descripion": "unique sonde ID", + "long_name": "sonde identifier", + "cf_role": "trajectory_id", + } + + object.__setattr__(self, "_interim_l2_ds", ds) + + return self + + def get_flight_attributes( + self, l2_flight_attributes_map: dict = hh.l2_flight_attributes_map + ) -> None: + """ + Gets flight attributes from the A-file and adds them to the sonde object. + + Parameters + ---------- + l2_flight_attributes_map : dict or str, optional + A dictionary where the keys are the flight attributes in the A-file + and the values are the corresponding (renamed) attribute names to be used for the L2 file. + The default is the l2_flight_attributes_map dictionary from the helper module. + + Returns + ------- + self : object + Returns the sonde object with the flight attributes added as attributes. + """ + flight_attrs = {} + + with open(self.afile, "r") as f: + lines = f.readlines() + + for attr in l2_flight_attributes_map.keys(): + for line_id, line in enumerate(lines): + if attr in line: + break + + attr = l2_flight_attributes_map.get(attr, attr) + + value = lines[line_id].split("= ")[1] + flight_attrs[attr] = float(value) if "AVAPS" not in attr else value + + object.__setattr__(self, "flight_attrs", flight_attrs) + + return self + + def get_other_global_attributes(self): + nc_global_attrs = { + # "title": "Level-2", + # "doi": f"{halodrops.data_doi}", + # "created with": f"pipeline.py doi:{halodrops.software_doi}", + "Conventions": "CF-1.8", + "campaign_id": "HALO-(AC)3", + "platform_id": self.platform_id, + # "instrument_id": "Vaisala RD-41", + "product_id": "Level-2", + # "AVAPS_Software_version": "Version 4.1.2", + "ASPEN_version": self.aspen_ds.AspenVersion + if hasattr(self.aspen_ds, "AspenVersion") + else self.aspen_ds.AvapsEditorVersion, + "ASPEN_processing_time": self.aspen_ds.ProcessingTime, + # "JOANNE_version": joanne.__version__, + # "launch_date": str(pd.to_datetime(self.launch_time).date()), + "launch_time_(UTC)": str(self.aspen_ds.launch_time.values) + if hasattr(self.aspen_ds, "launch_time") + else str(self.aspen_ds.base_time.values), + "sonde_serial_ID": self.serial_id, + "author": "Geet George", + "author_email": "g.george@tudelft.nl", + "featureType": "trajectory", + # "reference": halodrops.reference_study, + "creation_time": str(datetime.datetime.utcnow()) + " UTC", + } + + object.__setattr__(self, "nc_global_attrs", nc_global_attrs) + + return self + + def add_global_attributes_to_interim_l2_ds(self): + """ + Adds global attributes to _interim_l2_ds. + + Parameters + ---------- + None + + Returns + ------- + self : object + Returns the sonde object with global attributes added to _interim_l2_ds. + """ + ds = self._interim_l2_ds + + attrs_to_del = [] + for attr in ds.attrs.keys(): + attrs_to_del.append(attr) + + for attr in attrs_to_del: + del ds.attrs[attr] + + if hasattr(self, "flight_attrs"): + for attr, value in self.flight_attrs.items(): + ds.attrs[attr] = value + if hasattr(self, "nc_global_attrs"): + for attr, value in self.nc_global_attrs.items(): + ds.attrs[attr] = value + + object.__setattr__(self, "_interim_l2_ds", ds) + + return self + + def add_compression_and_encoding_properties( + self, + encoding_variables: dict = hh.encoding_variables, + default_variable_compression_properties: dict = hh.variable_compression_properties, + ): + """ + Adds compression and encoding properties to _interim_l2_ds. + + Parameters + ---------- + comp : dict or str, optional + A dictionary containing the compression properties to be used for the L2 file. + The default is the comp dictionary from the helper module. + + Returns + ------- + self : object + Returns the sonde object with compression and encoding properties added to _interim_l2_ds. + """ + + for var in encoding_variables: + self._interim_l2_ds[var].encoding = encoding_variables[var] + + for var in self._interim_l2_ds.data_vars: + if not encoding_variables.get(var): + self._interim_l2_ds[ + var + ].encoding = default_variable_compression_properties + + return self + + def get_l2_filename(self, l2_filename: str = None): + """ + Gets the L2 filename from the template provided. + + Parameters + ---------- + l2_filename : str, optional + The L2 filename. The default is the l2_filename_template from the helper module. + + Returns + ------- + self : object + Returns the sonde object with the L2 filename added as an attribute. + """ + if l2_filename is None: + l2_filename = hh.l2_filename_template.format( + platform=self.platform_id, + serial_id=self.serial_id, + flight_id=self.flight_id, + launch_time=self.launch_time, ) + + object.__setattr__(self, "l2_filename", l2_filename) + + return self + + def write_l2(self, l2_dir: str = None): + """ + Writes the L2 file to the specified directory. + + Parameters + ---------- + l2_dir : str, optional + The directory to write the L2 file to. The default is the directory of the A-file with '0' replaced by '2'. + + Returns + ------- + self : object + Returns the sonde object with the L2 file written to the specified directory using the l2_filename attribute to set the name. + """ + + if l2_dir is None: + l2_dir = os.path.dirname(self.afile)[:-1] + "2" + + if not os.path.exists(l2_dir): + os.makedirs(l2_dir) + + self._interim_l2_ds.to_netcdf(os.path.join(l2_dir, self.l2_filename)) + + return self diff --git a/tests/test_inspect_function.py b/tests/test_inspect_function.py new file mode 100644 index 0000000..97c32a5 --- /dev/null +++ b/tests/test_inspect_function.py @@ -0,0 +1,77 @@ +import configparser +import pytest +from halodrops.pipeline import ( + get_mandatory_args, + get_mandatory_values_from_config, + get_nondefaults_from_config, + get_args_for_function, +) + +# Define a function for testing +@pytest.fixture +def test_func(a, b=2): + pass + + +@pytest.fixture +def config_and_function(): + # Create a ConfigParser object and add a section for the test function + config = configparser.ConfigParser() + config.add_section("tests.test_inspect_function.test_func") + config.set("tests.test_inspect_function.test_func", "b", "3") + config.add_section("MANDATORY") + config.set("MANDATORY", "a", "1") + + if "halodrops" not in test_func.__module__: + test_func.__module__ = f"halodrops.{test_func.__module__}" + + return config, test_func + + +def test_get_mandatory_args(config_and_function): + _, test_func = config_and_function + result = get_mandatory_args(test_func) + assert result == ["a"] + + +def test_get_mandatory_values_from_config(config_and_function): + config, _ = config_and_function + result = get_mandatory_values_from_config(config, ["a"]) + assert result == {"a": "1"} + + +def test_get_nondefaults_from_config(config_and_function): + config, test_func = config_and_function + result = get_nondefaults_from_config(config, test_func) + assert result == {"b": "3"} + + +def test_get_args_for_function(config_and_function): + config, test_func = config_and_function + result = get_args_for_function(config, test_func) + assert result == {"a": "1", "b": "3"} + + +def test_get_mandatory_values_from_config_no_section(config_and_function): + config, _ = config_and_function + config.remove_section("MANDATORY") + with pytest.raises(ValueError, match="MANDATORY section not found in config file"): + get_mandatory_values_from_config(config, ["a"]) + + +def test_get_mandatory_values_from_config_no_arg(config_and_function): + config, _ = config_and_function + config.remove_option("MANDATORY", "a") + with pytest.raises( + ValueError, match="Mandatory argument a not found in config file" + ): + get_mandatory_values_from_config(config, ["a"]) + + +def test_get_args_for_function_no_mandatory(config_and_function): + config, test_func = config_and_function + config.remove_option("MANDATORY", "a") + with pytest.raises( + ValueError, match="Mandatory argument a not found in config file" + ): + get_args_for_function(config, test_func) diff --git a/tests/test_paths.py b/tests/test_paths.py index 37867b7..4b06c26 100644 --- a/tests/test_paths.py +++ b/tests/test_paths.py @@ -2,11 +2,12 @@ import os main_data_directory = "../sample" +platform = "HALO" flightdate = "20200101" -l1_path = os.path.join(main_data_directory, flightdate, "Level_1") -quicklooks_path = os.path.join(main_data_directory, flightdate, "Quicklooks") +l1_path = os.path.join(main_data_directory, platform, flightdate, "Level_1") +quicklooks_path = os.path.join(main_data_directory, platform, flightdate, "Quicklooks") -object = paths.Paths(main_data_directory, flightdate) +object = paths.Flight(main_data_directory, flightdate, platform) def test_l1_path(): diff --git a/tests/test_sonde.py b/tests/test_sonde.py index 0eba693..e08fdbf 100644 --- a/tests/test_sonde.py +++ b/tests/test_sonde.py @@ -1,14 +1,19 @@ -from halodrops import sonde +import pytest +import os +import xarray as xr +from halodrops.sonde import Sonde s_id = "test_this_id" -launch_time = "2020-02-02 12:30:12" -data = ["placeholder", "data", 42] +launch_time = "2020-02-02 20:22:02" +file_name_nolaunch = "test_file_nolaunch" +file_name_launch = "test_file_launch" +postaspenfile_name = f"D{file_name_launch[1:]}QC.nc" def test_Sonde_attrs(): - TestSonde_nolaunchtime = sonde.Sonde(s_id) - TestSonde_withlaunchtime = sonde.Sonde(s_id, launch_time=launch_time) + TestSonde_nolaunchtime = Sonde(s_id) + TestSonde_withlaunchtime = Sonde(s_id, launch_time=launch_time) assert TestSonde_nolaunchtime.serial_id == s_id assert TestSonde_nolaunchtime.launch_time is None @@ -16,14 +21,115 @@ def test_Sonde_attrs(): assert TestSonde_withlaunchtime.launch_time == launch_time -def test_SondeData_attrs(): +@pytest.fixture +def tmp_data_directory(tmp_path): + """ + Create a temporary directory for testing. + """ + data_directory = tmp_path / "data" + data_directory.mkdir() + return str(data_directory) - TestSonde_nolaunchtime = sonde.SondeData(s_id, data) - TestSonde_withlaunchtime = sonde.SondeData(s_id, launch_time=launch_time, data=data) - assert TestSonde_nolaunchtime.serial_id == s_id - assert TestSonde_nolaunchtime.launch_time is None - assert TestSonde_nolaunchtime.data == data - assert TestSonde_withlaunchtime.serial_id == s_id - assert TestSonde_withlaunchtime.launch_time == launch_time - assert TestSonde_withlaunchtime.data == data +@pytest.fixture +def temp_afile_dir(tmp_data_directory): + """ + Create a temporary A-file directory for testing. + """ + afile_dir = os.path.join(tmp_data_directory, "Level_0") + os.mkdir(afile_dir) + return str(afile_dir) + + +@pytest.fixture +def temp_afile_nolaunchdetected(temp_afile_dir): + """ + Create a temporary A-file for testing. + """ + afile = os.path.join(temp_afile_dir, file_name_nolaunch) + with open(afile, "w") as f: + f.write("This is a temporary A-file.\nLaunch Obs Done? = 0") + return str(afile) + + +@pytest.fixture +def temp_afile_launchdetected(temp_afile_dir): + """ + Create a temporary A-file for testing. + """ + afile = os.path.join(temp_afile_dir, file_name_launch) + with open(afile, "w") as f: + f.write("This is a temporary A-file.\nLaunch Obs Done? = 1") + return str(afile) + + +@pytest.fixture +def temp_postaspenfile(tmp_data_directory): + """ + Create a temporary post-ASPEN file for testing. + """ + postaspenfile_dir = os.path.join(tmp_data_directory, "Level_1") + os.mkdir(postaspenfile_dir) + postaspenfile = os.path.join(postaspenfile_dir, postaspenfile_name) + ds = xr.Dataset(dict(foo=("bar", [4, 2]))) + ds.attrs["SondeId"] = s_id + ds.to_netcdf(postaspenfile) + return str(postaspenfile) + + +def test_sonde_add_afile(temp_afile_launchdetected, temp_afile_nolaunchdetected): + """ + Test the addition of an A-file. + """ + sonde = Sonde(serial_id=s_id) + sonde.add_afile(temp_afile_launchdetected) + assert sonde.afile == temp_afile_launchdetected + sonde.add_afile(temp_afile_nolaunchdetected) + assert sonde.afile == temp_afile_nolaunchdetected + + +def test_sonde_add_postaspenfile_without_launch(temp_afile_nolaunchdetected): + """ + Test the addition of a post-ASPEN file when a launch has not been detected. + """ + sonde = Sonde(serial_id=s_id) + sonde.add_afile(temp_afile_nolaunchdetected) + with pytest.raises(ValueError): + sonde.add_postaspenfile() + + +def test_sonde_add_postaspenfile_with_only_afile( + temp_afile_launchdetected, temp_postaspenfile +): + """ + Test the addition of a post-ASPEN file when an A-file has been added. + """ + sonde = Sonde(serial_id=s_id) + sonde.add_afile(temp_afile_launchdetected) + sonde.add_postaspenfile() + assert sonde.postaspenfile == temp_postaspenfile + + +def test_sonde_add_aspen_ds(temp_afile_launchdetected, temp_postaspenfile): + """ + Test the addition of an ASPEN dataset. + """ + sonde = Sonde(serial_id=s_id) + sonde.add_afile(temp_afile_launchdetected) + sonde.add_postaspenfile(temp_postaspenfile) + sonde.add_aspen_ds() + assert isinstance(sonde.aspen_ds, xr.Dataset) + assert sonde.aspen_ds.attrs["SondeId"] == s_id + + +def test_sonde_add_aspen_ds_with_mismatched_sonde_id( + temp_afile_launchdetected, temp_postaspenfile +): + """ + Test the addition of an ASPEN dataset with a mismatched SondeId. + """ + sonde = Sonde(serial_id=s_id[:-1]) + sonde.add_afile(temp_afile_launchdetected) + sonde.add_postaspenfile(temp_postaspenfile) + with pytest.raises(ValueError): + sonde.add_aspen_ds()