Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

EAMxx variables #880

Merged
merged 25 commits into from
Jan 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
b605b8b
CDAT Migration Phase 2: Refactor core utilities and `lat_lon` set (#…
tomvothecoder Oct 10, 2023
edad9ac
update 2d 3d vars
chengzhuzhang Oct 24, 2024
14f3c9b
add more derived vars
chengzhuzhang Oct 25, 2024
cae3621
fix attrs errors
chengzhuzhang Oct 29, 2024
a346098
more informative log
chengzhuzhang Oct 31, 2024
3ab50ae
Remove inadvertant rebase diffs
tomvothecoder Nov 4, 2024
c3dbdd6
Update IOError
tomvothecoder Nov 4, 2024
bfc51a1
Add bounds after calculating climo for time series
tomvothecoder Nov 4, 2024
f49f606
Fix unit test
tomvothecoder Nov 5, 2024
849ea26
fixing radiation fluxes and units
chengzhuzhang Nov 5, 2024
f323ecd
Add support for more land ocean var keys
tomvothecoder Nov 5, 2024
b34caaf
Refactor land sea mask methods
tomvothecoder Nov 6, 2024
5115c6a
fixing for regions i.e. land_60N60S
chengzhuzhang Nov 8, 2024
35fbf20
Update order of time series subsetting to improve performance
tomvothecoder Nov 8, 2024
4ddf616
Revert accidental rebase changes
tomvothecoder Dec 5, 2024
64629a0
Add run script for bottleneck
tomvothecoder Dec 16, 2024
0009dea
Add FIXME comments for performance bottleneck
tomvothecoder Dec 16, 2024
9d00cbf
Add debug_ref_u script
tomvothecoder Dec 17, 2024
3ba6ec5
Add debug scripts
tomvothecoder Dec 19, 2024
e52c7da
add run scripts examples
chengzhuzhang Dec 13, 2024
02b96e0
address review; clean up
chengzhuzhang Dec 20, 2024
1a9152d
Update README.md
chengzhuzhang Jan 8, 2025
3bbc697
Update e3sm_diags/driver/lat_lon_driver.py
tomvothecoder Jan 10, 2025
c70efd1
Apply suggestions from code review
tomvothecoder Jan 15, 2025
8609b10
Fix pre-commit issues
tomvothecoder Jan 15, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build_workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ jobs:
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12"]
container:
container:
image: ghcr.io/e3sm-project/containers-e3sm-diags-test-data:e3sm-diags-test-data-0.0.2
steps:
- id: skip_check
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
"""
This script is used to debug the bottleneck issue in the reference u variable.
"""

# %%
import timeit

import xarray as xr

# Perlmutter
# ----------
# filepaths = [
# "/global/cfs/cdirs/e3sm/diagnostics/observations/Atm/time-series/ERA5/ua_197901_201912.nc"
# ]

# LCRC
# -----
filepaths = [
"/lcrc/group/e3sm/diagnostics/observations/Atm/time-series/ERA5/ua_197901_201912.nc"
]
time_slice = slice("1996-01-15", "1997-01-15", None)

# %%
# Test case 1 - OPEN_MFDATASET() + "ua" dataset (76 GB) + subsetting + `.load()`
# Result: .load() hangs when using `open_mfdataset`
# ------------------------------------------------------------------------------
ds_ua_omfd = xr.open_mfdataset(
filepaths,
decode_times=True,
use_cftime=True,
coords="minimal",
compat="override",
)
ds_ua_omfd_sub = ds_ua_omfd.sel(time=time_slice)

# %%
start_time = timeit.default_timer()
ds_ua_omfd_sub.load()
elapsed = timeit.default_timer() - start_time
print(f"Time taken to load ds_xc_sub: {elapsed} seconds")

# %%
# Test case 2 - OPEN_DATASET() + "ua" dataset (76 GB) + subsetting + `.load()`
# Result: load() works fine when using `open_dataset`
# ------------------------------------------------------------------------------
ds_ua_od = xc.open_dataset(
filepaths[0],
add_bounds=["X", "Y", "T"],
decode_times=True,
use_cftime=True,
# coords="minimal",
# compat="override",
)
ds_ua_od_sub = ds_ua_od.sel(time=time_slice)

# %%
start_time = timeit.default_timer()
ds_ua_od_sub.load()
elapsed = timeit.default_timer() - start_time
print(f"Time taken to load ds_xc_sub: {elapsed} seconds")

# %%
# Test case 3 - OPEN_MFDATASET() + "pr" dataset (2 GB) + subsetting + `.load()`
# Result: ds.load() works fine with pr variable, but not with ua variable
# Notes: pr is 3D variable (time, lat, lon), ua is a 4D variable (time, lat, lon, plev).
# ------------------------------------------------------------------------------
filepaths_pr = [
"/global/cfs/cdirs/e3sm/diagnostics/observations/Atm/time-series/ERA5/pr_197901_201912.nc"
]
ds_pr = xc.open_mfdataset(
filepaths_pr,
add_bounds=["X", "Y", "T"],
decode_times=True,
use_cftime=True,
coords="minimal",
compat="override",
)

# %%
# pr dataset is ~2 GB without subsetting. There is no need to subset.
start_time = timeit.default_timer()
ds_pr.load()
elapsed = timeit.default_timer() - start_time
print(f"Time taken to load ds_xc_sub_0: {elapsed} seconds")
# %%
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[#]
sets = ["lat_lon"]
case_id = "ERA5"
variables = ["U"]
ref_name = "ERA5"
reference_name = "ERA5 Reanalysis"
seasons = ["ANN", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12", "DJF", "MAM", "JJA", "SON"]
plevs = [850.0]
test_colormap = "PiYG_r"
reference_colormap = "PiYG_r"
contour_levels = [-20, -15, -10, -8, -5, -3, -1, 1, 3, 5, 8, 10, 15, 20]
diff_levels = [-8, -6, -5, -4, -3, -2, -1, 1, 2, 3, 4, 5, 6, 8]
regrid_method = "bilinear"
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import sys
import os
from e3sm_diags.parameter.core_parameter import CoreParameter
from e3sm_diags.run import runner

param = CoreParameter()


param.reference_data_path = (
"/global/cfs/cdirs/e3sm/diagnostics/observations/Atm/time-series"
)
param.test_data_path = "/global/cfs/cdirs/e3sm/chengzhu/eamxx/post/data/rgr"
param.test_name = "eamxx_decadal"
param.seasons = ["ANN"]
# param.save_netcdf = True

param.ref_timeseries_input = True
# Years to slice the ref data, base this off the years in the filenames.
param.ref_start_yr = "1996"
param.ref_end_yr = "1996"

prefix = "/global/cfs/cdirs/e3sm/www/cdat-migration-fy24/892-bottleneck"
param.results_dir = os.path.join(prefix, "eamxx_decadal_1996_1107_edv3")

cfg_path = "auxiliary_tools/cdat_regression_testing/892-bottleneck/run_script.cfg"
sys.argv.extend(["--diags", cfg_path])

runner.sets_to_run = [
"lat_lon",
"zonal_mean_xy",
"zonal_mean_2d",
"zonal_mean_2d_stratosphere",
"polar",
"cosp_histogram",
"meridional_mean_2d",
"annual_cycle_zonal_mean",
]

runner.run_diags([param])
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# %%
import timeit

import xarray as xr

filepaths = [
"/lcrc/group/e3sm/diagnostics/observations/Atm/time-series/ERA5/ua_197901_201912.nc"
]

ds = xr.open_mfdataset(filepaths)

ds_sub = ds.sel(time=slice("1996-01-15", "1997-01-15", None))

# %%
start_time = timeit.default_timer()
ds_sub.ua.load()
elapsed = timeit.default_timer() - start_time
print(f"Time taken to load ds_xc_sub: {elapsed} seconds")

# %%
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# %%
import numpy as np
import pandas as pd
import xarray as xr
import timeit

import dask.array as da

# %%
# Define the dimensions
time = 12
plev = 37
lat = 721
lon = 1440

# Create the data arrays using dask.
data = da.random.random(size=(time, plev, lat, lon), chunks=(12, 37, 721, 1440)).astype(
np.float32
)

# Create the coordinates.
times = pd.date_range("2000-01-01", periods=time)
plevs = np.linspace(100000, 10, plev)
lats = np.linspace(-90, 90, lat)
lons = np.linspace(0, 360, lon, endpoint=False)

# Create the dataset and write out to a file.
ds = xr.Dataset(
{"data": (["time", "plev", "lat", "lon"], data)},
coords={"time": times, "plev": plevs, "lat": lats, "lon": lons},
)
# %%
ds.to_netcdf("dask_bottleneck.nc")

# %%
# Open the dataset.
ds_open = xr.open_mfdataset("dask_bottleneck.nc")

# %%
# Load the dataset into memory
start_time = timeit.default_timer()
ds.load()
end_time = timeit.default_timer()

print(f"Time taken to load the dataset: {end_time - start_time} seconds")


# %%
Loading
Loading