From 7f9e2f8ddbd339d8b3680e3eb96e1c9c90ebbfde Mon Sep 17 00:00:00 2001 From: Peter Andrew Bogenschutz Date: Fri, 31 Jan 2025 17:58:25 -0800 Subject: [PATCH 1/3] add draft to start OBS time correction --- IOP_diagnostics/iop_diagnostics.py | 44 ++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/IOP_diagnostics/iop_diagnostics.py b/IOP_diagnostics/iop_diagnostics.py index 82042a8..9fae2c8 100644 --- a/IOP_diagnostics/iop_diagnostics.py +++ b/IOP_diagnostics/iop_diagnostics.py @@ -5,6 +5,8 @@ import tarfile from jinja2 import Template from scipy.interpolate import interp1d +from datetime import datetime +import sys def compute_y_coord(ds, time_indices, height_cord, var_name): """ @@ -70,6 +72,42 @@ def compute_y_coord(ds, time_indices, height_cord, var_name): return y_coord +import netCDF4 +from datetime import datetime + +# get formatted date and time of start of dataset +def extract_time_info(ds): + + try: + # For xarray, you can access the time variable's attributes as follows: + time_units = ds['time'].attrs['units'] # Expected format: "days since YYYY-MM-DD HH:MM:SS" + except (KeyError, AttributeError): + print("Warning: 'time' variable or its 'units' attribute is not available.") + return -999, -999 + + try: + # Extract the date and time portions from the units string. + _, ref_str = time_units.split("since") + ref_str = ref_str.strip() # "YYYY-MM-DD HH:MM:SS" + date_str, time_str = ref_str.split(" ") + except ValueError: + print("Warning: The 'time:units' attribute is not in the expected format 'days since YYYY-MM-DD HH:MM:SS'.") + return -999, -999 + + # Format the date to "yyyymmdd" + formatted_date = date_str.replace("-", "") + + try: + from datetime import datetime + time_obj = datetime.strptime(time_str, "%H:%M:%S") + time_in_seconds = time_obj.hour * 3600 + time_obj.minute * 60 + time_obj.second + except ValueError: + print("Warning: The time portion in 'time:units' is not in the expected format (HH:MM:SS).") + return -999, -999 + + return formatted_date, time_in_seconds + + def run_diagnostics( output_dir, general_id, @@ -148,6 +186,12 @@ def run_diagnostics( if 'PRECC' in ds.data_vars and 'PRECL' in ds.data_vars: # Add 'PRECT' to all_vars if it's not already there all_vars.add('PRECT') + + # Determine the formatted date and time in seconds for model runs; + # only test the first file since all model runs should have the same start time + start_date, start_seconds = extract_time_info(datasets[0]) + print(start_date, start_seconds) + sys.exit() ############################################################################################################# # Plot profile variables with three dimensions (e.g., time, ncol, lev or ilev) From c237e5bf7a5a9bd5976872eafef1eb1f0893f9b4 Mon Sep 17 00:00:00 2001 From: Peter Bogenschutz Date: Fri, 7 Feb 2025 11:30:10 -0800 Subject: [PATCH 2/3] read in time units for each file to ensure that all plotted times are coordinated --- IOP_diagnostics/iop_diagnostics.py | 71 ++++++++++++++++++++++++++---- 1 file changed, 62 insertions(+), 9 deletions(-) diff --git a/IOP_diagnostics/iop_diagnostics.py b/IOP_diagnostics/iop_diagnostics.py index 05e22d3..70ebd7f 100644 --- a/IOP_diagnostics/iop_diagnostics.py +++ b/IOP_diagnostics/iop_diagnostics.py @@ -5,7 +5,7 @@ import tarfile from jinja2 import Template from scipy.interpolate import interp1d -from datetime import datetime +from datetime import datetime, timedelta import sys def compute_y_coord(ds, time_indices, height_cord, var_name): @@ -72,6 +72,8 @@ def compute_y_coord(ds, time_indices, height_cord, var_name): return y_coord +############################# + import netCDF4 from datetime import datetime @@ -107,6 +109,46 @@ def extract_time_info(ds): return formatted_date, time_in_seconds +############################# + +def compute_date_time_difference(date1, time1_seconds, date2, time2_seconds): + """ + Compute the difference in days (as a floating-point value) between two dates and times. + + Parameters: + date1 (str): First date in 'yyyymmdd' format. + time1_seconds (int): Time of the first date in seconds (0 to 86400). + date2 (str): Second date in 'yyyymmdd' format. + time2_seconds (int): Time of the second date in seconds (0 to 86400). + + Returns: + float: Difference in days, including the fractional day component. + """ + try: + # Convert date strings to datetime objects + date_obj1 = datetime.strptime(date1, "%Y%m%d") + date_obj2 = datetime.strptime(date2, "%Y%m%d") + + # Validate time in seconds (must be between 0 and 86400) + if not (0 <= time1_seconds <= 86400 and 0 <= time2_seconds <= 86400): + print("Error: Time in seconds must be between 0 and 86400.") + return -999.0 # Error flag + + # Add time offset to the datetime objects + datetime1 = date_obj1 + timedelta(seconds=time1_seconds) + datetime2 = date_obj2 + timedelta(seconds=time2_seconds) + + # Compute the difference in days as a float + day_difference = abs((datetime2 - datetime1).total_seconds()) / 86400.0 + + return round(day_difference, 6) # Rounded for better precision + + except ValueError: + print("Error: One or both dates are not in the correct 'yyyymmdd' format.") + return -999.0 # Error flag + +################################# +###### Main program def run_diagnostics( output_dir, @@ -189,9 +231,20 @@ def run_diagnostics( # Determine the formatted date and time in seconds for model runs; # only test the first file since all model runs should have the same start time - start_date, start_seconds = extract_time_info(datasets[0]) - print(start_date, start_seconds) - sys.exit() + start_date_base, start_seconds_base = extract_time_info(datasets[0]) + print(start_date_base, start_seconds_base) + + time_offset = [] + for ds in datasets: + test_date, test_seconds = extract_time_info(ds) + if (test_date == -999): + offset=0.0 + else: + offset = compute_date_time_difference(start_date_base, start_seconds_base, + test_date, test_seconds) + time_offset.append(offset) + + print(time_offset) ############################################################################################################# # Plot profile variables with three dimensions (e.g., time, ncol, lev or ilev) @@ -208,7 +261,7 @@ def run_diagnostics( valid_plot = False # Track if any data was valid for this variable # Loop over each dataset, using short_ids for the legend - for ds, short_id in zip(datasets, short_ids): + for idx, (ds, short_id) in enumerate(zip(datasets, short_ids)): if var_name not in ds.data_vars: print(f"Warning: Variable '{var_name}' not found in case '{short_id}'. Skipping for this case.") continue # Skip this case if variable is missing @@ -216,7 +269,7 @@ def run_diagnostics( valid_plot = True # At least one dataset has the variable # Convert `time` to a numeric array in days since the start - time_in_days = ds['time'] + time_in_days = ds['time'] - time_offset[idx] # Determine indices for the specified range start_time = start_time if start_time != "end" else time_in_days[0] @@ -298,7 +351,7 @@ def run_diagnostics( valid_plot = False # Track if any data was valid for this variable # Loop over each dataset, using short_ids for the legend - for ds, short_id in zip(datasets, short_ids): + for idx, (ds, short_id) in enumerate(zip(datasets, short_ids)): if var_name not in ds.data_vars: if var_name == 'PRECT': if 'PRECC' and 'PRECL' in ds.data_vars: @@ -314,7 +367,7 @@ def run_diagnostics( valid_plot = True # At least one dataset has the variable # Convert `time` to a numeric array in days since the start - time_in_days = ds['time'] + time_in_days = ds['time'] - time_offset[idx] # Determine indices for the specified range start_time = time_series_time_s if time_series_time_s is not None else time_in_days[0] @@ -386,7 +439,7 @@ def run_diagnostics( # Find global min and max values for consistent color scale across cases global_min, global_max = float('inf'), float('-inf') num_plots = 0 - for ds in datasets: + for idx, ds in enumerate(datasets): if var_name in ds.data_vars: # Convert `time` to a numeric array in days since the start From 8c99b9f8190e9f921d9128098cc73ba7c877322a Mon Sep 17 00:00:00 2001 From: Peter Bogenschutz Date: Fri, 7 Feb 2025 11:32:09 -0800 Subject: [PATCH 3/3] remove print statements --- IOP_diagnostics/iop_diagnostics.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/IOP_diagnostics/iop_diagnostics.py b/IOP_diagnostics/iop_diagnostics.py index 70ebd7f..fac57a2 100644 --- a/IOP_diagnostics/iop_diagnostics.py +++ b/IOP_diagnostics/iop_diagnostics.py @@ -244,8 +244,6 @@ def run_diagnostics( test_date, test_seconds) time_offset.append(offset) - print(time_offset) - ############################################################################################################# # Plot profile variables with three dimensions (e.g., time, ncol, lev or ilev) for var_name in all_vars: