Skip to content

Commit

Permalink
Correct decumulation filtering
Browse files Browse the repository at this point in the history
  • Loading branch information
gnrgomes committed Feb 3, 2025
1 parent 96b66aa commit 4513716
Show file tree
Hide file tree
Showing 5 changed files with 295 additions and 112 deletions.
41 changes: 37 additions & 4 deletions src/lisfloodutilities/gridding/decumulate_daily_grids.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import pandas as pd
from collections import OrderedDict
from scipy.spatial import cKDTree
import importlib
from lisfloodutilities.gridding.lib.utils import Config, FileUtils
from lisfloodutilities.gridding.lib.filters import KiwisFilter, DowgradedDailyTo6HourlyObservationsKiwisFilter

Expand All @@ -53,6 +54,34 @@ def get_filter_class(conf: Config) -> KiwisFilter:
return filter_class


def get_filter_classes(conf: Config) -> list:
'''
Create and instantiate the Filter classes to process the kiwis files
'''
global quiet_mode
plugins_array = []
# Load the class dynamically
plugins_config = conf.get_config_field('PROPERTIES', 'KIWIS_FILTER_PLUGIN_CLASSES')
plugins_dic = eval(plugins_config)
plugins = OrderedDict(plugins_dic)
plugins_columns_def = conf.get_config_field('PROPERTIES', 'KIWIS_FILTER_COLUMNS')
plugins_columns_dic = eval(plugins_columns_def)
plugins_columns = OrderedDict(plugins_columns_dic)
module_name = 'lisfloodutilities.gridding.lib.filters'
try:
for plugin in plugins:
class_name = plugin
class_args = plugins[plugin]
module = importlib.import_module(module_name)
class_instance = getattr(module, class_name)(plugins_columns, class_args, conf.var_code, quiet_mode)
plugins_array.append(class_instance)
except ImportError:
print(f"Error: Could not import module '{module_name}'")
except AttributeError:
print(f"Error: Could not find class '{class_name}' in module '{module_name}'")
return plugins_array


def get_timestamp_from_filename(conf: Config, filename: Path) -> datetime:
file_timestamp = datetime.strptime(filename.name, conf.input_timestamp_pattern)
if conf.force_time is not None:
Expand All @@ -62,8 +91,7 @@ def get_timestamp_from_filename(conf: Config, filename: Path) -> datetime:


def get_dataframes(conf: Config, kiwis_files: List[Path]) -> Tuple[List[pd.DataFrame], List[datetime], List[str], str]:
filter_class = get_filter_class(conf)
print_msg(f'Filter class: {filter_class.get_class_description()}')
filter_classes = get_filter_classes(conf)
filepath_kiwis = []
kiwis_timestamps = []
kiwis_str_timestamps = []
Expand All @@ -73,8 +101,13 @@ def get_dataframes(conf: Config, kiwis_files: List[Path]) -> Tuple[List[pd.DataF
filepath_kiwis.append(filename_kiwis)
kiwis_str_timestamps.append(kiwis_timestamp_str)
kiwis_timestamps.append(kiwis_timestamp)
df_kiwis_array = filter_class.filter(filepath_kiwis, kiwis_str_timestamps, [])
return df_kiwis_array, kiwis_timestamps, kiwis_str_timestamps, filter_class.COL_PROVIDER_ID
df_kiwis_array = []
last_filter_class = KiwisFilter()
for filter_class in filter_classes:
last_filter_class = filter_class
print_msg(f'{filter_class.get_class_description()}')
df_kiwis_array = filter_class.filter(filepath_kiwis, kiwis_str_timestamps, df_kiwis_array)
return df_kiwis_array, kiwis_timestamps, kiwis_str_timestamps, last_filter_class.COL_PROVIDER_ID

def get_providers_radius(conf: Config, kiwis_timestamp_24h: datetime) -> dict:
"""
Expand Down
34 changes: 34 additions & 0 deletions src/lisfloodutilities/gridding/tools/check_netcdf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from netCDF4 import Dataset
import sys
import argparse

def check_netcdf(filename: str, var_id: str):
"""
Check if a NetCDF file can be opened and contains grids in all timesteps.
Parameters:
- filename (str): The name of the NetCDF file.
- var_id (str): The current ID of the variable.
"""
try:
# Open the NetCDF file in read mode
with Dataset(filename, 'r') as ds:
# print(f"File: {filename}")
for t in range(0, len(ds.variables['time']), 1):
# print(f'time: {t}')
cur_grid = ds.variables[var_id][t, :, :]
except FileNotFoundError:
print(f"Error: File '{filename}' not found.")
except Exception as e:
print(f"An error occurred in file {filename}: {e}")

def main():
parser = argparse.ArgumentParser(description="Check a variable in a NetCDF file.")
parser.add_argument("filename", help="The name of the NetCDF file.")
parser.add_argument("-v", "--var", required=True, help="The ID of the variable.")
args = parser.parse_args()

check_netcdf(args.filename, args.var)

if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
Expand Up @@ -2,62 +2,70 @@
import numpy as np
from pathlib import Path

OUTPUT_PATH = '/mnt/nahaUsers/gomesgo/ERA5_vs_EFAS/var_statistics2'
OUTPUT_PATH = '/mnt/nahaUsers/gomesgo/CALIBRATION_6.0/var_statistics'

# For pr6, e0, es, et in mm/day
mm_per_day_to_m = 1000.0 / 4.0
# For pr6, e0, es, et in mm/6h
mm_to_m = 1000.0

# data_filename_pattern, vars_array, years, start_step, end_step, flip_updown, conversion_to_meters
data = {
# 'EFAS': ('/mnt/nahaUsers/grimast/meteo_template/meteo_from_francesca/EMO-1arcmin-{v}_{yyyy}_monsum.nc', ['pr6', 'e0', 'et'], ['2019', '2020', '2022'], 0, 12),
# 'EFAS_FRANCESCA_ATOS_2023': ('/mnt/nahaUsers/grimast/meteo_template/meteo_from_francesca/{v}_{yyyy}_from_francesca_and_ATOS_monsum.nc', ['pr', 'e0', 'et'], ['2023'], 0, 7),
# 'EFAS_FRANCESCA_ATOS_2024': ('/mnt/nahaUsers/grimast/meteo_template/meteo_from_francesca/{v}_{yyyy}_from_francesca_and_ATOS_monsum.nc', ['pr', 'e0', 'et'], ['2024'], 0, 3),
# 'EFAS_KISTERS_REFRESH_2023': ('/mnt/nahaUsers/grimast/meteo_template/meteo_from_francesca/{v}_EFASv5_Kisters_refresh_202307_202403_mm_per_day_monsum.nc', ['pr6'], ['2023'], 0, 6),
# 'EFAS_KISTERS_REFRESH_2024': ('/mnt/nahaUsers/grimast/meteo_template/meteo_from_francesca/{v}_EFASv5_Kisters_refresh_202307_202403_mm_per_day_monsum.nc', ['pr6'], ['2024'], 6, 9),
'EFAS_KISTERS_REFRESH_202404': ('/mnt/nahaUsers/grimast/meteo_template/meteo_from_francesca/{v}_EFASv5_Kisters_refresh_202404_mm_per_day_monsum.nc', ['pr6'], ['2024'], 0, 1),
}

pixarea_filename = '/mnt/nahaUsers/gomesgo/ERA5_vs_EFAS/pixarea.nc'
# 'KISTERS': ('/mnt/nahaUsers/gomesgo/CALIBRATION_6.0/compare/{v}/KISTERS-{v}_{yyyy}*_monsum.nc', ['pr6'], ['2000'], 0, 0, False, conversion2meters),
'KISTERS': ('/mnt/nahaUsers/gomesgo/CALIBRATION_6.0/Kisters/decumulation/compare/{v}/KISTERS-{v}_{yyyy}*_monsum.nc', ['pr6'], ['2000', '2005', '2021'], 0, 0, False, mm_to_m),
'EMO1': ('/mnt/nahaUsers/gomesgo/CALIBRATION_6.0/compare/{v}/EMO-1arcmin-{v}_{yyyy}*_monsum.nc', ['pr6'], ['2000', '2005', '2021'], 0, 11, False, mm_per_day_to_m),
# 'EFAS': ('/mnt/nahaUsers/grimast/meteo_template/meteo_from_francesca/EMO-1arcmin-{v}_{yyyy}_monsum.nc', ['pr6', 'e0', 'et'], ['2019', '2020', '2022'], 0, 11, False, mm_per_day_to_m),
# 'EFAS_FRANCESCA_ATOS_2023': ('/mnt/nahaUsers/grimast/meteo_template/meteo_from_francesca/{v}_{yyyy}_from_francesca_and_ATOS_monsum.nc', ['pr', 'e0', 'et'], ['2023'], 0, 6, False, mm_per_day_to_m),
# 'EFAS_FRANCESCA_ATOS_2024': ('/mnt/nahaUsers/grimast/meteo_template/meteo_from_francesca/{v}_{yyyy}_from_francesca_and_ATOS_monsum.nc', ['pr', 'e0', 'et'], ['2024'], 0, 2, False, mm_per_day_to_m),
# 'EFAS_KISTERS_REFRESH_2023': ('/mnt/nahaUsers/grimast/meteo_template/meteo_from_francesca/{v}_EFASv5_Kisters_refresh_202307_202403_mm_per_day_monsum.nc', ['pr6'], ['2023'], 0, 5, False, mm_per_day_to_m),
# 'EFAS_KISTERS_REFRESH_2024': ('/mnt/nahaUsers/grimast/meteo_template/meteo_from_francesca/{v}_EFASv5_Kisters_refresh_202307_202403_mm_per_day_monsum.nc', ['pr6'], ['2024'], 6, 8, False, mm_per_day_to_m),
# 'EFAS_KISTERS_REFRESH_202404': ('/mnt/nahaUsers/grimast/meteo_template/meteo_from_francesca/{v}_EFASv5_Kisters_refresh_202404_mm_per_day_monsum.nc', ['pr6'], ['2024'], 0, 0, False, mm_per_day_to_m),
}

pixarea_filename = '/mnt/nahaUsers/gomesgo/CALIBRATION_6.0/configuration/1arcmin/pixarea.nc'
flip_pixarea_updown = False
# pixarea_filename = '/mnt/nahaUsers/grimast/meteo_template/Goncalo_monthly_statistics/masks/PixAREA_01degrees.tif.nc'
# flip_pixarea_updown = True

pixarea_var = 'Band1'

# masks_base_path = '/mnt/nahaUsers/grimast/meteo_template/Goncalo_monthly_statistics/masks'
masks_base_path = '/mnt/nahaUsers/gomesgo/CALIBRATION_6.0/masks'

mask_var = 'Band1'



# mask_filename, mask_var, flip_mask_updown
mask_filenames = [
'/mnt/nahaUsers/grimast/meteo_template/Goncalo_monthly_statistics/masks/mask_Elbe_1arcmin.tif.nc',
'/mnt/nahaUsers/grimast/meteo_template/Goncalo_monthly_statistics/masks/mask_Vistula_1arcmin.tif.nc',
'/mnt/nahaUsers/grimast/meteo_template/Goncalo_monthly_statistics/masks/mask_Europe_commonareas_1arcmin.tif.nc',
'/mnt/nahaUsers/grimast/meteo_template/Goncalo_monthly_statistics/masks/Po_mask_1arcmin_LARGE.tif.nc',
'/mnt/nahaUsers/grimast/meteo_template/Goncalo_monthly_statistics/masks/mask_Oder_1arcmin.tif.nc',
'/mnt/nahaUsers/grimast/meteo_template/Goncalo_monthly_statistics/masks/mask_EmiliaRomagna_1arcmin.tif.nc',
'/mnt/nahaUsers/grimast/meteo_template/Goncalo_monthly_statistics/masks/mask_Piemonte_1arcmin.tif.nc',
'/mnt/nahaUsers/grimast/meteo_template/Goncalo_monthly_statistics/masks/mask_Lombardia_1arcmin.tif.nc',
]
(f'{masks_base_path}/mask_Elbe_1arcmin.tif.nc', 'Band1', True),
(f'{masks_base_path}/mask_Vistula_1arcmin.tif.nc', 'Band1', True),
(f'{masks_base_path}/mask_Europe_commonareas_1arcmin.tif.nc', 'Band1', True),
(f'{masks_base_path}/Po_mask_1arcmin_LARGE.tif.nc', 'Band1', True),
(f'{masks_base_path}/mask_Oder_1arcmin.tif.nc', 'Band1', True),
(f'{masks_base_path}/mask_EmiliaRomagna_1arcmin.tif.nc', 'Band1', True),
(f'{masks_base_path}/mask_Piemonte_1arcmin.tif.nc', 'Band1', True),
(f'{masks_base_path}/mask_Lombardia_1arcmin.tif.nc', 'Band1', True)
]


# mask_filenames = [
# '/mnt/nahaUsers/grimast/meteo_template/Goncalo_monthly_statistics/masks/mask_Elbe_01degrees.tif.nc',
# '/mnt/nahaUsers/grimast/meteo_template/Goncalo_monthly_statistics/masks/mask_Vistula_01degrees.tif.nc',
# '/mnt/nahaUsers/grimast/meteo_template/Goncalo_monthly_statistics/masks/mask_Europe_commonareas_01degrees.tif.nc',
# '/mnt/nahaUsers/grimast/meteo_template/Goncalo_monthly_statistics/masks/mask_Oder_01degrees.tif.nc',
# '/mnt/nahaUsers/grimast/meteo_template/Goncalo_monthly_statistics/masks/test_maskmap_PoValley_01degrees.tif.nc',
# '/mnt/nahaUsers/grimast/meteo_template/Goncalo_monthly_statistics/masks/mask_EmiliaRomagna_01degrees.tif.nc',
# '/mnt/nahaUsers/grimast/meteo_template/Goncalo_monthly_statistics/masks/mask_Piemonte_01degrees.tif.nc',
# '/mnt/nahaUsers/grimast/meteo_template/Goncalo_monthly_statistics/masks/mask_Lombardia_01degrees.tif.nc',
# ('/mnt/nahaUsers/grimast/meteo_template/Goncalo_monthly_statistics/masks/mask_Elbe_01degrees.tif.nc', 'Band1', True),
# ('/mnt/nahaUsers/grimast/meteo_template/Goncalo_monthly_statistics/masks/mask_Vistula_01degrees.tif.nc', 'Band1', True),
# ('/mnt/nahaUsers/grimast/meteo_template/Goncalo_monthly_statistics/masks/mask_Europe_commonareas_01degrees.tif.nc', 'Band1', True),
# ('/mnt/nahaUsers/grimast/meteo_template/Goncalo_monthly_statistics/masks/mask_Oder_01degrees.tif.nc', 'Band1', True),
# ('/mnt/nahaUsers/grimast/meteo_template/Goncalo_monthly_statistics/masks/test_maskmap_PoValley_01degrees.tif.nc', 'Band1', True),
# ('/mnt/nahaUsers/grimast/meteo_template/Goncalo_monthly_statistics/masks/mask_EmiliaRomagna_01degrees.tif.nc', 'Band1', True),
# ('/mnt/nahaUsers/grimast/meteo_template/Goncalo_monthly_statistics/masks/mask_Piemonte_01degrees.tif.nc', 'Band1', True),
# ('/mnt/nahaUsers/grimast/meteo_template/Goncalo_monthly_statistics/masks/mask_Lombardia_01degrees.tif.nc', 'Band1', True)
# ]


for mask_filename in mask_filenames:
f=Path(mask_filename)
output_basename=f.stem
for mask_filename, mask_var, flip_mask_updown in mask_filenames:
f = Path(mask_filename)
output_basename = f.stem

# Open the mask NetCDF file
mask_nc = nc.Dataset(mask_filename, 'r')
# Read the mask data
mask_data = mask_nc.variables[mask_var][:]
mask_data = np.flipud(mask_data)
if flip_mask_updown:
mask_data = np.flipud(mask_data)

# Open the Pixel Area NetCDF file
pixarea_nc = nc.Dataset(pixarea_filename, 'r')
Expand All @@ -68,82 +76,74 @@
masked_pixarea = np.ma.masked_where(mask_data !=1, pixarea_data)

for dataset_name in data:
data_filename_pattern, vars_array, years, start_step, end_step = data[dataset_name]

print(f'Processing: {dataset_name} for {mask_filename}')
data_filename_pattern, vars_array, years, start_step, end_step, flip_updown, conversion_to_meters = data[dataset_name]

for v in vars_array:
output_file_path = Path(OUTPUT_PATH, f'{v}_{output_basename}.csv')
output_file_path = Path(OUTPUT_PATH, f'{v}_{output_basename}.tsv')
with open(output_file_path, 'a') as out_file:
for yyyy in years:
data_filename = data_filename_pattern.format(yyyy=yyyy, v=v)

# Open the data NetCDF file
data_nc = nc.Dataset(data_filename, 'r')

# Assuming the variable containing the monthly data is named 'monthly_data'
# and the mask variable is named 'mask'
monthly_data_var = v

# data_lat = data_nc.variables['lat'][:]
# data_lon = data_nc.variables['lon'][:]
# mask_lat = mask_nc.variables['lat'][:]
# mask_lon = mask_nc.variables['lon'][:]
# print('data_lat', data_lat)
# print('data_lon', data_lon)
# print('mask_lat', mask_lat)
# print('mask_lon', mask_lon)
# pixarea_lat = pixarea_nc.variables['lat'][:]
# pixarea_lon = pixarea_nc.variables['lon'][:]
# print('pixarea_lat', pixarea_lat)
# print('pixarea_lon', pixarea_lon)


# Initialize an empty list to store the yearly totals
yearly_totals = []

# Loop through each timeslice (assuming there are 12 for each year)
for i in range(start_step, end_step):
# Read the data for the current timeslice
monthly_data = data_nc.variables[monthly_data_var][i, :, :]

# Apply the mask to the data subset
# masked_monthly_data = np.where(mask_data, data_subset, 0)
masked_monthly_data = np.ma.masked_where(mask_data !=1, monthly_data)

monthly_sum_m = masked_monthly_data / 1000 / 4

monthly_sum_m3 = monthly_sum_m * masked_pixarea

monthly_totals_m3 = np.sum(monthly_sum_m3)
# monthly_totals_m3 = np.mean(masked_monthly_data)

# print(f"Total values of {v} yyyy-mm {yyyy}-{i}: {monthly_totals_m3}")
# out_file.write(f"{dataset_name};{yyyy};{i+1};{monthly_totals_m3}\n")

out_file.write(f"EFAS;{yyyy};{i+1+3};{monthly_totals_m3}\n")
# if yyyy == '2023':
# out_file.write(f"EFAS;{yyyy};{i+1+6};{monthly_totals_m3}\n")
# elif yyyy == '2024':
# out_file.write(f"EFAS;{yyyy};{i+1-6};{monthly_totals_m3}\n")

# If it's the first month, initialize the yearly sum with the masked data
if i == 0:
yearly_sum = monthly_totals_m3
else:
# Otherwise, add the masked data to the running yearly sum
yearly_sum += monthly_totals_m3

# Add the yearly sum to the list of yearly totals
yearly_totals.append(yearly_sum)

# Convert the list to a numpy array if necessary
yearly_totals = np.array(yearly_totals)

total_values = np.sum(yearly_totals)

# Close the NetCDF files
data_nc.close()
print(f'Processing: {v} {yyyy}')
current_filename_pattern = data_filename_pattern.format(yyyy=yyyy, v=v)
current_filename_pattern_path = Path(current_filename_pattern)
current_folder = current_filename_pattern_path.parent
current_filename_wildcard = current_filename_pattern_path.name

cur_idx = start_step
for data_filename in sorted(current_folder.rglob(current_filename_wildcard)):
print(f'Processing: {data_filename}')
# Open the data NetCDF file
data_nc = nc.Dataset(data_filename, 'r')

# Assuming the variable containing the monthly data is named 'monthly_data'
# and the mask variable is named 'mask'
monthly_data_var = v

# Initialize an empty list to store the yearly totals
yearly_totals = []

# Loop through each timeslice (assuming there are 12 for each year)
for i in range(start_step, end_step+1):
cur_idx += 1
# Read the data for the current timeslice
monthly_data = data_nc.variables[monthly_data_var][i, :, :]

if flip_updown:
monthly_data = np.flipud(monthly_data)

# Apply the mask to the data subset
# masked_monthly_data = np.where(mask_data, data_subset, 0)
masked_monthly_data = np.ma.masked_where(mask_data!=1, monthly_data)

monthly_sum_m = masked_monthly_data / conversion_to_meters

monthly_sum_m3 = monthly_sum_m * masked_pixarea

monthly_totals_m3 = np.sum(monthly_sum_m3)
# monthly_totals_m3 = np.mean(masked_monthly_data)

out_file.write(f'{dataset_name}\t{yyyy}\t{cur_idx}\t{monthly_totals_m3}\n')

# If it's the first month, initialize the yearly sum with the masked data
if i == start_step:
yearly_sum = monthly_totals_m3
else:
# Otherwise, add the masked data to the running yearly sum
yearly_sum += monthly_totals_m3

# Add the yearly sum to the list of yearly totals
yearly_totals.append(yearly_sum)

# Convert the list to a numpy array if necessary
yearly_totals = np.array(yearly_totals)

total_values = np.sum(yearly_totals)

# Close the NetCDF files
data_nc.close()
data_nc = None

# print(f"Total values of {v} year {yyyy}: {total_values}")
print(f"Total values of {v} year {yyyy}: {total_values}")

mask_nc.close()
pixarea_nc.close()
Loading

0 comments on commit 4513716

Please sign in to comment.