diff --git a/.github/workflows/build_workflow.yml b/.github/workflows/build_workflow.yml index 5ed40ded..db7680ff 100644 --- a/.github/workflows/build_workflow.yml +++ b/.github/workflows/build_workflow.yml @@ -25,14 +25,14 @@ jobs: - id: skip_check uses: fkirc/skip-duplicate-actions@master with: - cancel_others: true + cancel_others: false paths_ignore: '["**/README.md", "**/docs/**"]' pre-commit-hooks: needs: check-jobs-to-skip if: ${{ needs.check-jobs-to-skip.outputs.should_skip != 'true' }} runs-on: ubuntu-latest - timeout-minutes: 2 + timeout-minutes: 5 steps: - name: Checkout Code Repository uses: actions/checkout@v3 @@ -73,10 +73,8 @@ jobs: uses: conda-incubator/setup-miniconda@v2 with: activate-environment: zppy_dev - miniforge-variant: Mambaforge + miniforge-variant: Miniforge3 miniforge-version: latest - use-mamba: true - mamba-version: "*" environment-file: conda/dev.yml channel-priority: strict auto-update-conda: true @@ -123,10 +121,8 @@ jobs: uses: conda-incubator/setup-miniconda@v2 with: activate-environment: zppy_dev - miniforge-variant: Mambaforge + miniforge-variant: Miniforge3 miniforge-version: latest - use-mamba: true - mamba-version: "*" environment-file: conda/dev.yml channel-priority: strict auto-update-conda: true diff --git a/.github/workflows/release_workflow.yml b/.github/workflows/release_workflow.yml index 6b58db47..834021f9 100644 --- a/.github/workflows/release_workflow.yml +++ b/.github/workflows/release_workflow.yml @@ -35,10 +35,8 @@ jobs: uses: conda-incubator/setup-miniconda@v2 with: activate-environment: zppy_dev - miniforge-variant: Mambaforge + miniforge-variant: Miniforge3 miniforge-version: latest - use-mamba: true - mamba-version: "*" environment-file: conda/dev.yml channel-priority: strict auto-update-conda: true diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 177692ed..e5c62ff2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,7 +4,7 @@ fail_fast: true repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 + rev: v5.0.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -13,13 +13,13 @@ repos: # Can run individually with `pre-commit run black --all-files` - repo: https://github.com/psf/black - rev: 22.8.0 + rev: 24.10.0 hooks: - id: black # Can run individually with `pre-commit run isort --all-files` - repo: https://github.com/PyCQA/isort - rev: 5.12.0 + rev: 5.13.2 hooks: - id: isort @@ -27,15 +27,20 @@ repos: # Need to use flake8 GitHub mirror due to CentOS git issue with GitLab # https://github.com/pre-commit/pre-commit/issues/1206 - repo: https://github.com/pycqa/flake8 - rev: 5.0.4 + rev: 7.1.1 hooks: - id: flake8 args: ["--config=setup.cfg"] - additional_dependencies: [flake8-isort] + additional_dependencies: [flake8-isort==6.1.1] # Can run individually with `pre-commit run mypy --all-files` - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.982 + rev: v1.11.2 hooks: - id: mypy args: ["--config=setup.cfg"] + +# https://pre-commit.ci/#configuration +ci: + autofix_prs: false + autoupdate_schedule: monthly diff --git a/conda/chrisalis.yml b/conda/chrisalis.yml new file mode 100644 index 00000000..cdefec7f --- /dev/null +++ b/conda/chrisalis.yml @@ -0,0 +1,34 @@ +name: zppy_dev +channels: + - conda-forge + - defaults +dependencies: + # Base + # ================= + - python=3.9.13 + - pip=22.2.2 + - configobj=5.0.6 + - jinja2=3.1.2 + - mache>=1.5.0 + - mpas_tools>=0.15.0 + - pillow=9.2.0 + # Developer Tools + # ================= + # If versions are updated, also update 'rev' in `.pre-commit-config.yaml` + - black=22.8.0 # version from https://anaconda.org/conda-forge/black + - flake8=5.0.4 # version from https://anaconda.org/conda-forge/flake8 + # This line also implicitly installs isort + - flake8-isort=4.2.0 # version from https://anaconda.org/conda-forge/flake8-isort + - mypy=0.982 # version from https://anaconda.org/conda-forge/mypy + - pre-commit=2.20.0 # version from https://anaconda.org/conda-forge/pre-commit + - tbump=6.9.0 + # Documentation + # If versions are updated, also update in `.github/workflows/build_workflow.yml` + # ================= + - sphinx=5.2.3 + - sphinx-multiversion=0.2.4 + - sphinx_rtd_theme=1.0.0 + # Need to pin docutils because 0.17 has a bug with unordered lists + # https://github.com/readthedocs/sphinx_rtd_theme/issues/1115 + - docutils=0.16 +prefix: /home/ac.szhang/.conda/envs/zppy_dev diff --git a/conda/dev.yml b/conda/dev.yml index 565fe235..f5c5df11 100644 --- a/conda/dev.yml +++ b/conda/dev.yml @@ -15,12 +15,12 @@ dependencies: # Developer Tools # ================= # If versions are updated, also update 'rev' in `.pre-commit-config.yaml` - - black=22.8.0 # version from https://anaconda.org/conda-forge/black - - flake8=5.0.4 # version from https://anaconda.org/conda-forge/flake8 + - black=24.10.0 # version from https://anaconda.org/conda-forge/black + - flake8=7.1.1 # version from https://anaconda.org/conda-forge/flake8 # This line also implicitly installs isort - - flake8-isort=4.2.0 # version from https://anaconda.org/conda-forge/flake8-isort - - mypy=0.982 # version from https://anaconda.org/conda-forge/mypy - - pre-commit=2.20.0 # version from https://anaconda.org/conda-forge/pre-commit + - flake8-isort=6.1.1 # version from https://anaconda.org/conda-forge/flake8-isort + - mypy=1.11.2 # version from https://anaconda.org/conda-forge/mypy + - pre-commit=4.0.1 # version from https://anaconda.org/conda-forge/pre-commit - tbump=6.9.0 # Documentation # If versions are updated, also update in `.github/workflows/build_workflow.yml` diff --git a/conda/meta.yaml b/conda/meta.yaml index fb6ceb08..6e4e7e07 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -18,10 +18,10 @@ build: requirements: host: - - python >=3.6 + - python >=3.9 - pip run: - - python >=3.6 + - python >=3.9 - configobj >=5.0.0,<6.0.0 - jinja2 >=2.0.0 - mache >=1.3.2 diff --git a/setup.cfg b/setup.cfg index 356bbdca..22201a95 100644 --- a/setup.cfg +++ b/setup.cfg @@ -46,7 +46,7 @@ exclude = venv [mypy] -python_version = 3.7 +python_version = 3.9 check_untyped_defs = True ignore_missing_imports = True warn_unused_ignores = True diff --git a/setup.py b/setup.py index 3f5c80c0..c1f32060 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ def package_files(directory, prefixes, extensions): """based on https://stackoverflow.com/a/36693250/7728169""" paths = [] - for (path, directories, filenames) in os.walk(directory): + for path, directories, filenames in os.walk(directory): for filename in filenames: parts = filename.split(".") prefix = parts[0] @@ -27,7 +27,7 @@ def package_files(directory, prefixes, extensions): author="Ryan Forsyth, Chris Golaz", author_email="forsyth2@llnl.gov, golaz1@llnl.gov", description="Post-processing software for E3SM", - python_requires=">=3.6", + python_requires=">=3.9", packages=find_packages(include=["zppy", "zppy.*"]), package_data={"": data_files}, entry_points={"console_scripts": ["zppy=zppy.__main__:main"]}, diff --git a/tests/integration/generated/test_min_case_add_dependencies_chrysalis.cfg b/tests/integration/generated/test_min_case_add_dependencies_chrysalis.cfg index 6c0559c4..2656fa7a 100644 --- a/tests/integration/generated/test_min_case_add_dependencies_chrysalis.cfg +++ b/tests/integration/generated/test_min_case_add_dependencies_chrysalis.cfg @@ -81,7 +81,7 @@ years = "1985:1995:5" [e3sm_diags] active = True -environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20240731" +environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20241015" grid = '180x360_aave' multiprocessing = True num_workers = 8 diff --git a/tests/integration/generated/test_min_case_carryover_dependencies_chrysalis.cfg b/tests/integration/generated/test_min_case_carryover_dependencies_chrysalis.cfg new file mode 100644 index 00000000..721faaf9 --- /dev/null +++ b/tests/integration/generated/test_min_case_carryover_dependencies_chrysalis.cfg @@ -0,0 +1,189 @@ +# Test carryover dependency handling + +# Things to check for in the output: +# 1. Check that `grep -v "OK" *status returns nothing (i.e., all jobs worked) +# 2. Check that .settings files include `dependencies` listed out, +# using `grep -A 5 "'dependencies'" *settings`. +# (Change the -A number to include enough lines to see the entire dependencies list) +# Check that these look correct. + + +# These tasks can have carryover dependencies: +# 1. mpas_analysis.py + +# These tasks have dependencies, but no carryover: +# 1. e3sm_diags.py +# 2. global_time_series.py +# 3. ilamb.py +# 4. tc_analysis.py + +# These tasks don't have dependencies: +# 1. climo.py +# 2. ts.py + +[default] +case = "v3.LR.historical_0051" +constraint = "" +dry_run = "False" +environment_commands = "" +input = /lcrc/group/e3sm2/ac.wlin/E3SMv3/v3.LR.historical_0051 +input_subdir = archive/atm/hist +mapping_file = "map_ne30pg2_to_cmip6_180x360_aave.20200201.nc" +output = "/lcrc/group/e3sm/ac.forsyth2/zppy_min_case_carryover_dependencies_output/unique_id/v3.LR.historical_0051" +partition = "debug" +qos = "regular" +www = "/lcrc/group/e3sm/public_html/diagnostic_output/ac.forsyth2/zppy_min_case_carryover_dependencies_www/unique_id" +years = "1985:1989:2", + +[climo] +active = True +walltime = "00:30:00" + + [[ atm_monthly_180x360_aave ]] + frequency = "monthly" + input_files = "eam.h0" + input_subdir = "archive/atm/hist" + vars = "" + + [[ atm_monthly_diurnal_8xdaily_180x360_aave ]] + frequency = "diurnal_8xdaily" + input_files = "eam.h3" + input_subdir = "archive/atm/hist" + vars = "PRECT" + + [[ land_monthly_climo ]] + frequency = "monthly" + input_files = "elm.h0" + input_subdir = "archive/lnd/hist" + mapping_file = "map_r05_to_cmip6_180x360_aave.20231110.nc" + vars = "" + +[ts] +active = True +e3sm_to_cmip_environment_commands = "" +walltime = "00:30:00" + + [[ atm_monthly_180x360_aave ]] + frequency = "monthly" + input_files = "eam.h0" + input_subdir = "archive/atm/hist" + ts_fmt = "cmip" + + [[ rof_monthly ]] + extra_vars = 'areatotal2' + frequency = "monthly" + input_files = "mosart.h0" + input_subdir = "archive/rof/hist" + mapping_file = "" + vars = "RIVER_DISCHARGE_OVER_LAND_LIQ" + + [[ atm_monthly_glb ]] + # Note global average won't work for 3D variables. + frequency = "monthly" + input_files = "eam.h0" + input_subdir = "archive/atm/hist" + mapping_file = "glb" + years = "1985:1995:5", + + [[ lnd_monthly_glb ]] + frequency = "monthly" + input_files = "elm.h0" + input_subdir = "archive/lnd/hist" + mapping_file = "glb" + vars = "FSH,RH2M,LAISHA,LAISUN,QINTR,QOVER,QRUNOFF,QSOIL,QVEGE,QVEGT,SOILWATER_10CM,TSA,H2OSNO,TOTLITC,CWDC,SOIL1C,SOIL2C,SOIL3C,SOIL4C,WOOD_HARVESTC,TOTVEGC,NBP,GPP,AR,HR" + years = "1985:1995:5", + + [[ land_monthly ]] + extra_vars = "landfrac" + frequency = "monthly" + input_files = "elm.h0" + input_subdir = "archive/lnd/hist" + mapping_file = "map_r05_to_cmip6_180x360_aave.20231110.nc" + ts_fmt = "cmip" + vars = "FSH,RH2M,LAISHA,LAISUN,QINTR,QOVER,QRUNOFF,QSOIL,QVEGE,QVEGT,SOILICE,SOILLIQ,SOILWATER_10CM,TSA,TSOI,H2OSNO,TOTLITC,CWDC,SOIL1C,SOIL2C,SOIL3C,SOIL4C,WOOD_HARVESTC,TOTVEGC,NBP,GPP,AR,HR" + +# TODO: Add "tc_analysis" back in after empty dat is resolved. +# [tc_analysis] +# # The second run should run in parallel with the first run. +# active = True +# scratch = "/lcrc/globalscratch/ac.forsyth2/zppy_min_case_carryover_dependencies_scratch/unique_id/v3.LR.historical_0051" +# walltime = "00:30:00" + +[e3sm_diags] +active = True +environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20241015" +grid = '180x360_aave' +multiprocessing = True +num_workers = 8 +partition = "compute" +qos = "regular" +ref_final_yr = 1986 +ref_start_yr = 1985 +ref_years = "1985-1986", +short_name = "v3.LR.historical_0051" +ts_num_years = 2 +walltime = "5:00:00" +years = "1987:1989:2" + + [[ atm_monthly_180x360_aave ]] + # TODO: Add "tc_analysis" back in after empty dat is resolved. + # This task will depend on the tc_analysis task + climo_diurnal_frequency = "diurnal_8xdaily" + climo_diurnal_subsection = "atm_monthly_diurnal_8xdaily_180x360_aave" + climo_subsection = "atm_monthly_180x360_aave" + dc_obs_climo = '/lcrc/group/e3sm/public_html/e3sm_diags_test_data/unit_test_complete_run/obs/climatology' + # TODO: Add "tc_analysis" back in after empty dat is resolved. + #sets = "lat_lon","tc_analysis" + sets = "lat_lon", + + [[ atm_monthly_180x360_aave_mvm ]] + # This task will not depend on the tc_analysis task + # Test model-vs-model using the same files as the reference + climo_subsection = "atm_monthly_180x360_aave" + diff_title = "Difference" + partition = "compute" + qos = "regular" + ref_name = "v3.LR.historical_0051" + reference_data_path = "/lcrc/group/e3sm/ac.forsyth2/zppy_min_case_carryover_dependencies_output/unique_id/v3.LR.historical_0051/post/atm/180x360_aave/clim" + run_type = "model_vs_model" + sets = "lat_lon", + short_ref_name = "same simulation" + swap_test_ref = False + tag = "model_vs_model" + ts_num_years_ref = 2 + ts_subsection = "atm_monthly_180x360_aave" + +[mpas_analysis] +# The second run should depend on the first run. +active = True +anomalyRefYear = 1985 +climo_years = "1985-1989", "1990-1995", +enso_years = "1985-1989", "1990-1995", +mesh = "IcoswISC30E3r5" +parallelTaskCount = 6 +partition = "compute" +qos = "regular" +shortTermArchive = True +ts_years = "1985-1989", "1985-1995", +walltime = "00:30:00" + +[global_time_series] +active = True +climo_years = "1985-1989", "1990-1995", +experiment_name = "v3.LR.historical_0051" +figstr = "v3.LR.historical_0051" +moc_file=mocTimeSeries_1985-1995.nc +plots_lnd = "FSH,RH2M,LAISHA,LAISUN,QINTR,QOVER,QRUNOFF,QSOIL,QVEGE,QVEGT,SOILWATER_10CM,TSA,H2OSNO,TOTLITC,CWDC,SOIL1C,SOIL2C,SOIL3C,SOIL4C,WOOD_HARVESTC,TOTVEGC,NBP,GPP,AR,HR" +ts_num_years = 5 +ts_years = "1985-1989", "1985-1995", +walltime = "00:30:00" +years = "1985-1995", + +[ilamb] +active = True +nodes = 8 +partition = "compute" +short_name = "v3.LR.historical_0051" +ts_num_years = 2 +walltime = "2:00:00" +years = "1985:1989:4" diff --git a/tests/integration/generated/test_min_case_e3sm_diags_depend_on_climo_chrysalis.cfg b/tests/integration/generated/test_min_case_e3sm_diags_depend_on_climo_chrysalis.cfg index 416771ae..e8b4b4e0 100644 --- a/tests/integration/generated/test_min_case_e3sm_diags_depend_on_climo_chrysalis.cfg +++ b/tests/integration/generated/test_min_case_e3sm_diags_depend_on_climo_chrysalis.cfg @@ -24,7 +24,7 @@ walltime = "00:30:00" [e3sm_diags] active = True -environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20240731" +environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20241015" grid = '180x360_aave' multiprocessing = True num_workers = 8 @@ -35,4 +35,4 @@ walltime = "5:00:00" [[ atm_monthly_180x360_aave ]] climo_subsection = "atm_monthly_180x360_aave" - sets = "lat_lon","zonal_mean_xy","zonal_mean_2d","polar","cosp_histogram","meridional_mean_2d","annual_cycle_zonal_mean","zonal_mean_2d_stratosphere", + sets = "lat_lon","zonal_mean_xy","zonal_mean_2d","polar","cosp_histogram","meridional_mean_2d","annual_cycle_zonal_mean","zonal_mean_2d_stratosphere","aerosol_aeronet","aerosol_budget", diff --git a/tests/integration/generated/test_min_case_e3sm_diags_depend_on_climo_mvm_2_chrysalis.cfg b/tests/integration/generated/test_min_case_e3sm_diags_depend_on_climo_mvm_2_chrysalis.cfg index 036e17a7..06e067dc 100644 --- a/tests/integration/generated/test_min_case_e3sm_diags_depend_on_climo_mvm_2_chrysalis.cfg +++ b/tests/integration/generated/test_min_case_e3sm_diags_depend_on_climo_mvm_2_chrysalis.cfg @@ -24,7 +24,7 @@ walltime = "00:30:00" [e3sm_diags] active = True -environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20240731" +environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20241015" grid = '180x360_aave' multiprocessing = True num_workers = 8 @@ -44,7 +44,7 @@ walltime = "5:00:00" # Use _1 as reference reference_data_path = "/lcrc/group/e3sm/ac.forsyth2/zppy_min_case_e3sm_diags_depend_on_climo_mvm_1_output/unique_id/v3.LR.historical_0051/post/atm/180x360_aave/clim" run_type = "model_vs_model" - sets = "lat_lon","zonal_mean_xy","zonal_mean_2d","polar","cosp_histogram","meridional_mean_2d","annual_cycle_zonal_mean","zonal_mean_2d_stratosphere", + sets = "lat_lon","zonal_mean_xy","zonal_mean_2d","polar","cosp_histogram","meridional_mean_2d","annual_cycle_zonal_mean","zonal_mean_2d_stratosphere","aerosol_aeronet","aerosol_budget", short_ref_name = "v3.LR.historical_0051" swap_test_ref = False tag = "model_vs_model" diff --git a/tests/integration/generated/test_min_case_e3sm_diags_depend_on_ts_chrysalis.cfg b/tests/integration/generated/test_min_case_e3sm_diags_depend_on_ts_chrysalis.cfg index 5a686029..9e47abd2 100644 --- a/tests/integration/generated/test_min_case_e3sm_diags_depend_on_ts_chrysalis.cfg +++ b/tests/integration/generated/test_min_case_e3sm_diags_depend_on_ts_chrysalis.cfg @@ -25,7 +25,7 @@ walltime = "00:30:00" [e3sm_diags] active = True -environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20240731" +environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20241015" grid = '180x360_aave' multiprocessing = True num_workers = 8 diff --git a/tests/integration/generated/test_min_case_e3sm_diags_depend_on_ts_mvm_2_chrysalis.cfg b/tests/integration/generated/test_min_case_e3sm_diags_depend_on_ts_mvm_2_chrysalis.cfg index 72219931..cc085aa1 100644 --- a/tests/integration/generated/test_min_case_e3sm_diags_depend_on_ts_mvm_2_chrysalis.cfg +++ b/tests/integration/generated/test_min_case_e3sm_diags_depend_on_ts_mvm_2_chrysalis.cfg @@ -25,7 +25,7 @@ walltime = "00:30:00" [e3sm_diags] active = True -environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20240731" +environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20241015" grid = '180x360_aave' multiprocessing = True num_workers = 8 diff --git a/tests/integration/generated/test_min_case_e3sm_diags_diurnal_cycle_chrysalis.cfg b/tests/integration/generated/test_min_case_e3sm_diags_diurnal_cycle_chrysalis.cfg index 481b7fc1..11e9983f 100644 --- a/tests/integration/generated/test_min_case_e3sm_diags_diurnal_cycle_chrysalis.cfg +++ b/tests/integration/generated/test_min_case_e3sm_diags_diurnal_cycle_chrysalis.cfg @@ -24,7 +24,7 @@ walltime = "00:30:00" [e3sm_diags] active = True -environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20240731" +environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20241015" grid = '180x360_aave' multiprocessing = True num_workers = 8 diff --git a/tests/integration/generated/test_min_case_e3sm_diags_diurnal_cycle_mvm_2_chrysalis.cfg b/tests/integration/generated/test_min_case_e3sm_diags_diurnal_cycle_mvm_2_chrysalis.cfg index b0109858..251d82c3 100644 --- a/tests/integration/generated/test_min_case_e3sm_diags_diurnal_cycle_mvm_2_chrysalis.cfg +++ b/tests/integration/generated/test_min_case_e3sm_diags_diurnal_cycle_mvm_2_chrysalis.cfg @@ -24,7 +24,7 @@ walltime = "00:30:00" [e3sm_diags] active = True -environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20240731" +environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20241015" grid = '180x360_aave' multiprocessing = True num_workers = 8 diff --git a/tests/integration/generated/test_min_case_e3sm_diags_lat_lon_land_mvm_2_chrysalis.cfg b/tests/integration/generated/test_min_case_e3sm_diags_lat_lon_land_mvm_2_chrysalis.cfg index bb585138..723e6c43 100644 --- a/tests/integration/generated/test_min_case_e3sm_diags_lat_lon_land_mvm_2_chrysalis.cfg +++ b/tests/integration/generated/test_min_case_e3sm_diags_lat_lon_land_mvm_2_chrysalis.cfg @@ -24,7 +24,7 @@ walltime = "00:30:00" [e3sm_diags] active = True -environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20240731" +environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20241015" grid = '180x360_aave' multiprocessing = True num_workers = 8 @@ -34,7 +34,7 @@ short_name = "v3.LR.historical_0051" walltime = "5:00:00" [[ lnd_monthly_mvm_lnd ]] - climo_subsection = "land_monthly_climo" + climo_land_subsection = "land_monthly_climo" diff_title = "Difference" ref_final_yr = 1988 ref_name = "v3.LR.historical_0051" diff --git a/tests/integration/generated/test_min_case_e3sm_diags_streamflow_chrysalis.cfg b/tests/integration/generated/test_min_case_e3sm_diags_streamflow_chrysalis.cfg index 797adc44..9a82f303 100644 --- a/tests/integration/generated/test_min_case_e3sm_diags_streamflow_chrysalis.cfg +++ b/tests/integration/generated/test_min_case_e3sm_diags_streamflow_chrysalis.cfg @@ -27,7 +27,7 @@ walltime = "00:30:00" [e3sm_diags] active = True -environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20240731" +environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20241015" grid = '180x360_aave' multiprocessing = True num_workers = 8 diff --git a/tests/integration/generated/test_min_case_e3sm_diags_streamflow_mvm_2_chrysalis.cfg b/tests/integration/generated/test_min_case_e3sm_diags_streamflow_mvm_2_chrysalis.cfg index f7d43fc6..147d851f 100644 --- a/tests/integration/generated/test_min_case_e3sm_diags_streamflow_mvm_2_chrysalis.cfg +++ b/tests/integration/generated/test_min_case_e3sm_diags_streamflow_mvm_2_chrysalis.cfg @@ -27,7 +27,7 @@ walltime = "00:30:00" [e3sm_diags] active = True -environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20240731" +environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20241015" grid = '180x360_aave' multiprocessing = True num_workers = 8 diff --git a/tests/integration/generated/test_min_case_e3sm_diags_tc_analysis_chrysalis.cfg b/tests/integration/generated/test_min_case_e3sm_diags_tc_analysis_chrysalis.cfg index ee24e496..df05210a 100644 --- a/tests/integration/generated/test_min_case_e3sm_diags_tc_analysis_chrysalis.cfg +++ b/tests/integration/generated/test_min_case_e3sm_diags_tc_analysis_chrysalis.cfg @@ -1,12 +1,12 @@ [default] -case = "v3.LR.historical_0051" +case = "v2.LR.historical_0201" constraint = "" dry_run = "False" environment_commands = "" -input = /lcrc/group/e3sm2/ac.wlin/E3SMv3/v3.LR.historical_0051 +input = /lcrc/group/e3sm/ac.forsyth2//E3SMv2/v2.LR.historical_0201 input_subdir = archive/atm/hist mapping_file = "map_ne30pg2_to_cmip6_180x360_aave.20200201.nc" -output = "/lcrc/group/e3sm/ac.forsyth2/zppy_min_case_e3sm_diags_tc_analysis_output/unique_id/v3.LR.historical_0051" +output = "/lcrc/group/e3sm/ac.forsyth2/zppy_min_case_e3sm_diags_tc_analysis_output/unique_id/v2.LR.historical_0201" partition = "debug" qos = "regular" www = "/lcrc/group/e3sm/public_html/diagnostic_output/ac.forsyth2/zppy_min_case_e3sm_diags_tc_analysis_www/unique_id" @@ -14,22 +14,21 @@ years = "1985:1987:2", [tc_analysis] active = True -scratch = "/lcrc/globalscratch/ac.forsyth2/zppy_min_case_e3sm_diags_tc_analysis_scratch/unique_id/v3.LR.historical_0051" +scratch = "/lcrc/globalscratch/ac.forsyth2/zppy_min_case_e3sm_diags_tc_analysis_scratch/unique_id/v2.LR.historical_0201" walltime = "00:30:00" [e3sm_diags] active = True -environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20240731" +environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20241015" grid = '180x360_aave' multiprocessing = True num_workers = 8 partition = "compute" qos = "regular" -short_name = "v3.LR.historical_0051" +short_name = "v2.LR.historical_0201" ts_num_years = 2 walltime = "5:00:00" [[ atm_monthly_180x360_aave_tc_analysis ]] - # Note: tc_analysis requires e3sm_diags jobs to run sequentially sets = "tc_analysis", # tc_obs is determined automatically diff --git a/tests/integration/generated/test_min_case_e3sm_diags_tc_analysis_mvm_1_chrysalis.cfg b/tests/integration/generated/test_min_case_e3sm_diags_tc_analysis_mvm_1_chrysalis.cfg index f9611ca1..aa29ad07 100644 --- a/tests/integration/generated/test_min_case_e3sm_diags_tc_analysis_mvm_1_chrysalis.cfg +++ b/tests/integration/generated/test_min_case_e3sm_diags_tc_analysis_mvm_1_chrysalis.cfg @@ -1,12 +1,12 @@ [default] -case = "v3.LR.historical_0051" +case = "v2.LR.historical_0201" constraint = "" dry_run = "False" environment_commands = "" -input = /lcrc/group/e3sm2/ac.wlin/E3SMv3/v3.LR.historical_0051 +input = /lcrc/group/e3sm/ac.forsyth2//E3SMv2/v2.LR.historical_0201 input_subdir = archive/atm/hist mapping_file = "map_ne30pg2_to_cmip6_180x360_aave.20200201.nc" -output = "/lcrc/group/e3sm/ac.forsyth2/zppy_min_case_e3sm_diags_tc_analysis_mvm_1_output/unique_id/v3.LR.historical_0051" +output = "/lcrc/group/e3sm/ac.forsyth2/zppy_min_case_e3sm_diags_tc_analysis_mvm_1_output/unique_id/v2.LR.historical_0201" partition = "debug" qos = "regular" www = "/lcrc/group/e3sm/public_html/diagnostic_output/ac.forsyth2/zppy_min_case_e3sm_diags_tc_analysis_mvm_2_www/unique_id" @@ -14,5 +14,5 @@ years = "1985:1987:2", [tc_analysis] active = True -scratch = "/lcrc/globalscratch/ac.forsyth2/zppy_min_case_e3sm_diags_tc_analysis_mvm_1_scratch/unique_id/v3.LR.historical_0051" +scratch = "/lcrc/globalscratch/ac.forsyth2/zppy_min_case_e3sm_diags_tc_analysis_mvm_1_scratch/unique_id/v2.LR.historical_0201" walltime = "00:30:00" diff --git a/tests/integration/generated/test_min_case_e3sm_diags_tc_analysis_mvm_2_chrysalis.cfg b/tests/integration/generated/test_min_case_e3sm_diags_tc_analysis_mvm_2_chrysalis.cfg index 3e42bc99..a226f2ef 100644 --- a/tests/integration/generated/test_min_case_e3sm_diags_tc_analysis_mvm_2_chrysalis.cfg +++ b/tests/integration/generated/test_min_case_e3sm_diags_tc_analysis_mvm_2_chrysalis.cfg @@ -1,12 +1,13 @@ +# Run this after _1 completes. (We need tc_analysis for the reference years). [default] -case = "v3.LR.historical_0051" +case = "v2.LR.historical_0201" constraint = "" dry_run = "False" environment_commands = "" -input = /lcrc/group/e3sm2/ac.wlin/E3SMv3/v3.LR.historical_0051 +input = /lcrc/group/e3sm/ac.forsyth2//E3SMv2/v2.LR.historical_0201 input_subdir = archive/atm/hist mapping_file = "map_ne30pg2_to_cmip6_180x360_aave.20200201.nc" -output = "/lcrc/group/e3sm/ac.forsyth2/zppy_min_case_e3sm_diags_tc_analysis_mvm_2_output/unique_id/v3.LR.historical_0051" +output = "/lcrc/group/e3sm/ac.forsyth2/zppy_min_case_e3sm_diags_tc_analysis_mvm_2_output/unique_id/v2.LR.historical_0201" partition = "debug" qos = "regular" www = "/lcrc/group/e3sm/public_html/diagnostic_output/ac.forsyth2/zppy_min_case_e3sm_diags_tc_analysis_mvm_2_www/unique_id" @@ -14,18 +15,18 @@ years = "1995:1997:2", [tc_analysis] active = True -scratch = "/lcrc/globalscratch/ac.forsyth2/zppy_min_case_e3sm_diags_tc_analysis_mvm_2_scratch/unique_id/v3.LR.historical_0051" +scratch = "/lcrc/globalscratch/ac.forsyth2/zppy_min_case_e3sm_diags_tc_analysis_mvm_2_scratch/unique_id/v2.LR.historical_0201" walltime = "00:30:00" [e3sm_diags] active = True -environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20240731" +environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20241015" grid = '180x360_aave' multiprocessing = True num_workers = 8 partition = "compute" qos = "regular" -short_name = "v3.LR.historical_0051" +short_name = "v2.LR.historical_0201" walltime = "5:00:00" [[ atm_monthly_180x360_aave_mvm ]] @@ -33,15 +34,15 @@ walltime = "5:00:00" climo_subsection = "atm_monthly_180x360_aave" diff_title = "Difference" ref_final_yr = 1986 - ref_name = "v3.LR.historical_0051" + ref_name = "v2.LR.historical_0201" ref_start_yr = 1985 ref_years = "1985-1986", # Use _1 as reference - reference_data_path = "/lcrc/group/e3sm/ac.forsyth2/zppy_min_case_e3sm_diags_tc_analysis_mvm_1_output/unique_id/v3.LR.historical_0051/post/atm/180x360_aave/clim" + reference_data_path = "/lcrc/group/e3sm/ac.forsyth2/zppy_min_case_e3sm_diags_tc_analysis_mvm_1_output/unique_id/v2.LR.historical_0201/post/atm/180x360_aave/clim" # reference_data_path_tc determined automatically run_type = "model_vs_model" sets = "tc_analysis", - short_ref_name = "v3.LR.historical_0051" + short_ref_name = "v2.LR.historical_0201" swap_test_ref = False tag = "model_vs_model" years = "1995-1996", diff --git a/tests/integration/generated/test_min_case_e3sm_diags_tc_analysis_parallel_chrysalis.cfg b/tests/integration/generated/test_min_case_e3sm_diags_tc_analysis_parallel_chrysalis.cfg new file mode 100644 index 00000000..c3a7e505 --- /dev/null +++ b/tests/integration/generated/test_min_case_e3sm_diags_tc_analysis_parallel_chrysalis.cfg @@ -0,0 +1,36 @@ +[default] +case = "v2.LR.historical_0201" +constraint = "" +dry_run = "False" +environment_commands = "" +input = /lcrc/group/e3sm/ac.forsyth2//E3SMv2/v2.LR.historical_0201 +input_subdir = archive/atm/hist +mapping_file = "map_ne30pg2_to_cmip6_180x360_aave.20200201.nc" +output = "/lcrc/group/e3sm/ac.forsyth2/zppy_min_case_e3sm_diags_tc_analysis_parallel_output/unique_id/v2.LR.historical_0201" +partition = "debug" +qos = "regular" +www = "/lcrc/group/e3sm/public_html/diagnostic_output/ac.forsyth2/zppy_min_case_e3sm_diags_tc_analysis_parallel_www/unique_id" +years = "1985:1989:2", + +[tc_analysis] +active = True +scratch = "/lcrc/globalscratch/ac.forsyth2/zppy_min_case_e3sm_diags_tc_analysis_parallel_scratch/unique_id/v2.LR.historical_0201" +walltime = "00:30:00" + +[e3sm_diags] +active = True +environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20241015" +grid = '180x360_aave' +multiprocessing = True +num_workers = 8 +partition = "compute" +qos = "regular" +short_name = "v2.LR.historical_0201" +ts_num_years = 2 +walltime = "5:00:00" + + [[ atm_monthly_180x360_aave_tc_analysis ]] + # Expecting two viewers: 1985-1986, 1987-1988 + # The generation of these two viewers should occur in parallel. + sets = "tc_analysis", + # tc_obs is determined automatically diff --git a/tests/integration/generated/test_min_case_e3sm_diags_tropical_subseasonal_mvm_2_chrysalis.cfg b/tests/integration/generated/test_min_case_e3sm_diags_tropical_subseasonal_mvm_2_chrysalis.cfg index 7a997083..233569cc 100644 --- a/tests/integration/generated/test_min_case_e3sm_diags_tropical_subseasonal_mvm_2_chrysalis.cfg +++ b/tests/integration/generated/test_min_case_e3sm_diags_tropical_subseasonal_mvm_2_chrysalis.cfg @@ -25,7 +25,7 @@ walltime = "00:30:00" [e3sm_diags] active = True -environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20240731" +environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20241015" grid = '180x360_aave' multiprocessing = True num_workers = 8 diff --git a/tests/integration/generated/test_min_case_tc_analysis_simultaneous_1_chrysalis.cfg b/tests/integration/generated/test_min_case_tc_analysis_simultaneous_1_chrysalis.cfg index 212baec1..41de5e34 100644 --- a/tests/integration/generated/test_min_case_tc_analysis_simultaneous_1_chrysalis.cfg +++ b/tests/integration/generated/test_min_case_tc_analysis_simultaneous_1_chrysalis.cfg @@ -1,12 +1,12 @@ [default] -case = "v3.LR.historical_0051" +case = "v2.LR.historical_0201" constraint = "" dry_run = "False" environment_commands = "" -input = /lcrc/group/e3sm2/ac.wlin/E3SMv3/v3.LR.historical_0051 +input = /lcrc/group/e3sm/ac.forsyth2//E3SMv2/v2.LR.historical_0201 input_subdir = archive/atm/hist mapping_file = "map_ne30pg2_to_cmip6_180x360_aave.20200201.nc" -output = "/lcrc/group/e3sm/ac.forsyth2/zppy_min_case_tc_analysis_simultaneous_1_output/unique_id/v3.LR.historical_0051" +output = "/lcrc/group/e3sm/ac.forsyth2/zppy_min_case_tc_analysis_simultaneous_1_output/unique_id/v2.LR.historical_0201" partition = "debug" qos = "regular" www = "/lcrc/group/e3sm/public_html/diagnostic_output/ac.forsyth2/zppy_min_case_tc_analysis_simultaneous_1_www/unique_id" @@ -14,5 +14,5 @@ years = "1985:1987:2", [tc_analysis] active = True -scratch = "/lcrc/globalscratch/ac.forsyth2/zppy_min_case_tc_analysis_simultaneous_1_scratch/unique_id/v3.LR.historical_0051" +scratch = "/lcrc/globalscratch/ac.forsyth2/zppy_min_case_tc_analysis_simultaneous_1_scratch/unique_id/v2.LR.historical_0201" walltime = "00:30:00" diff --git a/tests/integration/generated/test_min_case_tc_analysis_simultaneous_2_chrysalis.cfg b/tests/integration/generated/test_min_case_tc_analysis_simultaneous_2_chrysalis.cfg index 3a116479..71880091 100644 --- a/tests/integration/generated/test_min_case_tc_analysis_simultaneous_2_chrysalis.cfg +++ b/tests/integration/generated/test_min_case_tc_analysis_simultaneous_2_chrysalis.cfg @@ -1,14 +1,14 @@ # Run this immediately after running _1 [default] -case = "v3.LR.historical_0051" +case = "v2.LR.historical_0201" constraint = "" dry_run = "False" environment_commands = "" -input = /lcrc/group/e3sm2/ac.wlin/E3SMv3/v3.LR.historical_0051 +input = /lcrc/group/e3sm/ac.forsyth2//E3SMv2/v2.LR.historical_0201 input_subdir = archive/atm/hist mapping_file = "map_ne30pg2_to_cmip6_180x360_aave.20200201.nc" -output = "/lcrc/group/e3sm/ac.forsyth2/zppy_min_case_tc_analysis_simultaneous_2_output/unique_id/v3.LR.historical_0051" +output = "/lcrc/group/e3sm/ac.forsyth2/zppy_min_case_tc_analysis_simultaneous_2_output/unique_id/v2.LR.historical_0201" partition = "debug" qos = "regular" www = "/lcrc/group/e3sm/public_html/diagnostic_output/ac.forsyth2/zppy_min_case_tc_analysis_simultaneous_2_www/unique_id" @@ -16,5 +16,5 @@ years = "1985:1987:2", [tc_analysis] active = True -scratch = "/lcrc/globalscratch/ac.forsyth2/zppy_min_case_tc_analysis_simultaneous_2_scratch/unique_id/v3.LR.historical_0051" +scratch = "/lcrc/globalscratch/ac.forsyth2/zppy_min_case_tc_analysis_simultaneous_2_scratch/unique_id/v2.LR.historical_0201" walltime = "00:30:00" diff --git a/tests/integration/generated/test_weekly_bundles_chrysalis.cfg b/tests/integration/generated/test_weekly_bundles_chrysalis.cfg index c4c1b5db..a16129e3 100644 --- a/tests/integration/generated/test_weekly_bundles_chrysalis.cfg +++ b/tests/integration/generated/test_weekly_bundles_chrysalis.cfg @@ -95,15 +95,16 @@ years = "1985:1989:2", mapping_file = "" vars = "RIVER_DISCHARGE_OVER_LAND_LIQ" -[tc_analysis] -active = True -bundle = "bundle3" # Let bundle1 finish first because "e3sm_diags: atm_monthly_180x360_aave_mvm" requires "ts: atm_monthly_180x360_aave" -scratch = "/lcrc/globalscratch/ac.forsyth2/zppy_weekly_bundles_scratch/unique_id/v3.LR.historical_0051" -years = "1985:1989:2", +# TODO: Add "tc_analysis" back in after empty dat is resolved. +# [tc_analysis] +# active = True +# bundle = "bundle3" # Let bundle1 finish first because "e3sm_diags: atm_monthly_180x360_aave_mvm" requires "ts: atm_monthly_180x360_aave" +# scratch = "/lcrc/globalscratch/ac.forsyth2/zppy_weekly_bundles_scratch/unique_id/v3.LR.historical_0051" +# years = "1985:1989:2", [e3sm_diags] active = True -environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20240731" +environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20241015" grid = '180x360_aave' ref_final_yr = 1989 ref_start_yr = 1985 @@ -128,7 +129,8 @@ years = "1985:1989:2", ref_years = "1985-1986", reference_data_path = "/lcrc/group/e3sm/ac.forsyth2/zppy_weekly_bundles_output/unique_id/v3.LR.historical_0051/post/atm/180x360_aave/clim" run_type = "model_vs_model" - sets = "polar","enso_diags","streamflow","tc_analysis", + # TODO: Add "tc_analysis" back in after empty dat is resolved. + sets = "polar","enso_diags","streamflow", short_ref_name = "v3.LR.historical_0051" swap_test_ref = False tag = "model_vs_model" diff --git a/tests/integration/generated/test_weekly_comprehensive_v2_chrysalis.cfg b/tests/integration/generated/test_weekly_comprehensive_v2_chrysalis.cfg index 7b6987e1..b1bd857b 100644 --- a/tests/integration/generated/test_weekly_comprehensive_v2_chrysalis.cfg +++ b/tests/integration/generated/test_weekly_comprehensive_v2_chrysalis.cfg @@ -3,6 +3,7 @@ case = "v2.LR.historical_0201" constraint = "" dry_run = "False" environment_commands = "" +fail_on_dependency_skip = True input = /lcrc/group/e3sm/ac.forsyth2//E3SMv2/v2.LR.historical_0201 input_subdir = archive/atm/hist mapping_file = "map_ne30pg2_to_cmip6_180x360_aave.20200201.nc" @@ -10,7 +11,7 @@ output = "/lcrc/group/e3sm/ac.forsyth2/zppy_weekly_comprehensive_v2_output/uniqu partition = "debug" qos = "regular" www = "/lcrc/group/e3sm/public_html/diagnostic_output/ac.forsyth2/zppy_weekly_comprehensive_v2_www/unique_id" -years = "1850:1854:2", +years = "1980:1984:2", [climo] active = True @@ -45,6 +46,12 @@ walltime = "00:30:00" input_subdir = "archive/atm/hist" ts_fmt = "cmip" + [[ atm_daily_180x360_aave ]] + frequency = "daily" + input_files = "eam.h1" + input_subdir = "archive/atm/hist" + vars = "PRECT" + [[ rof_monthly ]] extra_vars = 'areatotal2' frequency = "monthly" @@ -59,7 +66,7 @@ walltime = "00:30:00" input_files = "eam.h0" input_subdir = "archive/atm/hist" mapping_file = "glb" - years = "1850:1860:5", + years = "1980:1990:5", [[ lnd_monthly_glb ]] frequency = "monthly" @@ -67,7 +74,7 @@ walltime = "00:30:00" input_subdir = "archive/lnd/hist" mapping_file = "glb" vars = "LAISHA,LAISUN" - years = "1850:1860:5", + years = "1980:1990:5", [[ land_monthly ]] extra_vars = "landfrac" @@ -84,26 +91,34 @@ walltime = "00:30:00" [e3sm_diags] active = True -environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20240731" +climo_diurnal_frequency = "diurnal_8xdaily" +climo_diurnal_subsection = "atm_monthly_diurnal_8xdaily_180x360_aave" +environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20241015" grid = '180x360_aave' multiprocessing = True num_workers = 8 partition = "compute" qos = "regular" -ref_final_yr = 1851 -ref_start_yr = 1850 -ref_years = "1850-1851", +ref_end_yr = 1981 +ref_final_yr = 1981 +ref_start_yr = 1980 +ref_years = "1980-1981", +# Include all sets +# min_case_e3sm_diags_depend_on_climo: "lat_lon","zonal_mean_xy","zonal_mean_2d","polar","cosp_histogram","meridional_mean_2d","annual_cycle_zonal_mean","zonal_mean_2d_stratosphere","aerosol_aeronet","aerosol_budget", +# min_case_e3sm_diags_depend_on_ts: "enso_diags","qbo", +# min_case_e3sm_diags_diurnal_cycle: "diurnal_cycle", +# min_case_e3sm_diags_streamflow: "streamflow", +# min_case_e3sm_diags_tc_analysis: "tc_analysis", +# min_case_e3sm_diags_tropical_subseasonal: "tropical_subseasonal", +sets = "lat_lon","zonal_mean_xy","zonal_mean_2d","polar","cosp_histogram","meridional_mean_2d","annual_cycle_zonal_mean","zonal_mean_2d_stratosphere","enso_diags","qbo","diurnal_cycle","streamflow","tc_analysis","tropical_subseasonal","aerosol_aeronet","aerosol_budget", short_name = "v2.LR.historical_0201" ts_num_years = 2 walltime = "5:00:00" -years = "1852:1854:2", +years = "1982:1984:2", [[ atm_monthly_180x360_aave ]] - climo_diurnal_frequency = "diurnal_8xdaily" - climo_diurnal_subsection = "atm_monthly_diurnal_8xdaily_180x360_aave" climo_subsection = "atm_monthly_180x360_aave" dc_obs_climo = '/lcrc/group/e3sm/public_html/e3sm_diags_test_data/unit_test_complete_run/obs/climatology' - sets = "lat_lon","enso_diags","diurnal_cycle","streamflow","tc_analysis","tc_analysis", [[ atm_monthly_180x360_aave_mvm ]] # Test model-vs-model using the same files as the reference @@ -114,16 +129,17 @@ years = "1852:1854:2", ref_name = "v2.LR.historical_0201" reference_data_path = "/lcrc/group/e3sm/ac.forsyth2/zppy_weekly_comprehensive_v2_output/unique_id/v2.LR.historical_0201/post/atm/180x360_aave/clim" run_type = "model_vs_model" - sets = "lat_lon", short_ref_name = "same simulation" swap_test_ref = False tag = "model_vs_model" + ts_daily_subsection = "atm_monthly_180x360_aave" ts_num_years_ref = 2 ts_subsection = "atm_monthly_180x360_aave" + years = "1980:1982:2", [[ lnd_monthly_mvm_lnd ]] # Test model-vs-model using the same files as the reference - climo_subsection = "land_monthly_climo" + climo_land_subsection = "land_monthly_climo" diff_title = "Difference" partition = "compute" qos = "regular" @@ -138,28 +154,28 @@ years = "1852:1854:2", [mpas_analysis] active = True -anomalyRefYear = 1850 -climo_years ="1850-1854", "1855-1860", -enso_years = "1850-1854", "1855-1860", +anomalyRefYear = 1980 +climo_years ="1980-1984", "1985-1990", +enso_years = "1980-1984", "1985-1990", mesh = "EC30to60E2r2" parallelTaskCount = 6 partition = "compute" qos = "regular" shortTermArchive = True -ts_years = "1850-1854", "1850-1860", +ts_years = "1980-1984", "1980-1990", walltime = "00:30:00" [global_time_series] active = True -climo_years ="1850-1854", "1855-1860", +climo_years ="1980-1984", "1985-1990", experiment_name = "v2.LR.historical_0201" figstr = "v2.LR.historical_0201" -moc_file=mocTimeSeries_1850-1860.nc +moc_file=mocTimeSeries_1980-1990.nc plots_lnd = "LAISHA,LAISUN" ts_num_years = 5 -ts_years = "1850-1854", "1850-1860", +ts_years = "1980-1984", "1980-1990", walltime = "00:30:00" -years = "1850-1860", +years = "1980-1990", [ilamb] active = True @@ -168,4 +184,4 @@ partition = "compute" short_name = "v2.LR.historical_0201" ts_num_years = 2 walltime = "2:00:00" -years = "1850:1854:2", +years = "1980:1984:2", diff --git a/tests/integration/generated/test_weekly_comprehensive_v3_chrysalis.cfg b/tests/integration/generated/test_weekly_comprehensive_v3_chrysalis.cfg index 6d37de66..62aac29c 100644 --- a/tests/integration/generated/test_weekly_comprehensive_v3_chrysalis.cfg +++ b/tests/integration/generated/test_weekly_comprehensive_v3_chrysalis.cfg @@ -3,6 +3,9 @@ case = "v3.LR.historical_0051" constraint = "" dry_run = "False" environment_commands = "" +fail_on_dependency_skip = True +guess_path_parameters = False +guess_section_parameters = False input = /lcrc/group/e3sm2/ac.wlin/E3SMv3/v3.LR.historical_0051 input_subdir = archive/atm/hist mapping_file = "map_ne30pg2_to_cmip6_180x360_aave.20200201.nc" @@ -46,6 +49,12 @@ walltime = "00:30:00" input_subdir = "archive/atm/hist" ts_fmt = "cmip" + [[ atm_daily_180x360_aave ]] + frequency = "daily" + input_files = "eam.h1" + input_subdir = "archive/atm/hist" + vars = "PRECT" + [[ rof_monthly ]] extra_vars = 'areatotal2' frequency = "monthly" @@ -79,64 +88,97 @@ walltime = "00:30:00" ts_fmt = "cmip" vars = "FSH,RH2M,LAISHA,LAISUN,QINTR,QOVER,QRUNOFF,QSOIL,QVEGE,QVEGT,SOILICE,SOILLIQ,SOILWATER_10CM,TSA,TSOI,H2OSNO,TOTLITC,CWDC,SOIL1C,SOIL2C,SOIL3C,SOIL4C,WOOD_HARVESTC,TOTVEGC,NBP,GPP,AR,HR" -[tc_analysis] -active = True -scratch = "/lcrc/globalscratch/ac.forsyth2/zppy_weekly_comprehensive_v3_scratch/unique_id/v3.LR.historical_0051" -walltime = "00:30:00" +# TODO: Add "tc_analysis" back in after empty dat is resolved. +# [tc_analysis] +# active = True +# scratch = "/lcrc/globalscratch/ac.forsyth2/zppy_weekly_comprehensive_v3_scratch/unique_id/v3.LR.historical_0051" +# walltime = "00:30:00" [e3sm_diags] active = True -environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20240731" +climo_diurnal_frequency = "diurnal_8xdaily" +climo_diurnal_subsection = "atm_monthly_diurnal_8xdaily_180x360_aave" +climo_subsection = "atm_monthly_180x360_aave" +environment_commands = "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20241015" grid = '180x360_aave' multiprocessing = True num_workers = 8 partition = "compute" qos = "regular" +ref_end_yr = 1986 ref_final_yr = 1986 ref_start_yr = 1985 ref_years = "1985-1986", +# Include all sets +# min_case_e3sm_diags_depend_on_climo: "lat_lon","zonal_mean_xy","zonal_mean_2d","polar","cosp_histogram","meridional_mean_2d","annual_cycle_zonal_mean","zonal_mean_2d_stratosphere","aerosol_aeronet","aerosol_budget", +# min_case_e3sm_diags_depend_on_ts: "enso_diags","qbo", +# min_case_e3sm_diags_diurnal_cycle: "diurnal_cycle", +# min_case_e3sm_diags_streamflow: "streamflow", +# min_case_e3sm_diags_tc_analysis: "tc_analysis", +# min_case_e3sm_diags_tropical_subseasonal: "tropical_subseasonal", +# TODO: Add "tc_analysis" back in after empty dat is resolved. +# TODO: Add "aerosol_budget" back in once that's working for v3. +sets = "lat_lon","zonal_mean_xy","zonal_mean_2d","polar","cosp_histogram","meridional_mean_2d","annual_cycle_zonal_mean","zonal_mean_2d_stratosphere","enso_diags","qbo","diurnal_cycle","streamflow","tropical_subseasonal","aerosol_aeronet", short_name = "v3.LR.historical_0051" +ts_daily_subsection = "atm_daily_180x360_aave" ts_num_years = 2 +ts_subsection = "atm_monthly_180x360_aave" walltime = "5:00:00" years = "1987:1989:2" +# Reference paths +# Used for mvo and mvm, if ts_num_years is set +obs_ts = "/lcrc/group/e3sm/diagnostics/observations/Atm/time-series/" +# mvo & mvm tc_analysis only +tc_obs = "/lcrc/group/e3sm/diagnostics/observations/Atm/tc-analysis/" [[ atm_monthly_180x360_aave ]] - climo_diurnal_frequency = "diurnal_8xdaily" - climo_diurnal_subsection = "atm_monthly_diurnal_8xdaily_180x360_aave" - climo_subsection = "atm_monthly_180x360_aave" + # Reference paths + reference_data_path = "/lcrc/group/e3sm/diagnostics/observations/Atm/climatology/" + # mvo diurnal_cycle only + # NOTE: This is NOT the guess zppy would have made! dc_obs_climo = '/lcrc/group/e3sm/public_html/e3sm_diags_test_data/unit_test_complete_run/obs/climatology' - sets = "lat_lon","enso_diags","diurnal_cycle","streamflow","tc_analysis","tropical_subseasonal", + # mvo streamflow only + streamflow_obs_ts = "/lcrc/group/e3sm/diagnostics/observations/Atm/time-series/" [[ atm_monthly_180x360_aave_mvm ]] # Test model-vs-model using the same files as the reference - climo_subsection = "atm_monthly_180x360_aave" diff_title = "Difference" partition = "compute" qos = "regular" ref_name = "v3.LR.historical_0051" - reference_data_path = "/lcrc/group/e3sm/ac.forsyth2/zppy_weekly_comprehensive_v3_output/unique_id/v3.LR.historical_0051/post/atm/180x360_aave/clim" run_type = "model_vs_model" - sets = "lat_lon", short_ref_name = "same simulation" swap_test_ref = False tag = "model_vs_model" ts_num_years_ref = 2 - ts_subsection = "atm_monthly_180x360_aave" + # Reference paths + reference_data_path = "/lcrc/group/e3sm/ac.forsyth2/zppy_weekly_comprehensive_v3_output/unique_id/v3.LR.historical_0051/post/atm/180x360_aave/clim" + # mvm streamflow only + gauges_path = "/lcrc/group/e3sm/diagnostics/observations/Atm/time-series/GSIM/GSIM_catchment_characteristics_all_1km2.csv" + reference_data_path_ts_rof = "/lcrc/group/e3sm/ac.forsyth2/zppy_weekly_comprehensive_v3_output/unique_id/v3.LR.historical_0051/post/rof/native/ts/monthly" + # mvm diurnal_cycle only + reference_data_path_climo_diurnal = "/lcrc/group/e3sm/ac.forsyth2/zppy_weekly_comprehensive_v3_output/unique_id/v3.LR.historical_0051/post/atm/180x360_aave/clim_diurnal_8xdaily" + # mvm "enso_diags", "qbo", "area_mean_time_series" + reference_data_path_ts = "/lcrc/group/e3sm/ac.forsyth2/zppy_weekly_comprehensive_v3_output/unique_id/v3.LR.historical_0051/post/atm/180x360_aave/ts/monthly" + # mvm tropical_subseasonal only + reference_data_path_ts_daily = "/lcrc/group/e3sm/ac.forsyth2/zppy_weekly_comprehensive_v3_output/unique_id/v3.LR.historical_0051/post/atm/180x360_aave/ts/daily" [[ lnd_monthly_mvm_lnd ]] # Test model-vs-model using the same files as the reference - climo_subsection = "land_monthly_climo" + climo_land_subsection = "land_monthly_climo" diff_title = "Difference" partition = "compute" qos = "regular" ref_name = "v3.LR.historical_0051" - reference_data_path = "/lcrc/group/e3sm/ac.forsyth2/zppy_weekly_comprehensive_v3_output/unique_id/v3.LR.historical_0051/post/lnd/180x360_aave/clim" run_type = "model_vs_model" sets = "lat_lon_land", short_ref_name = "same simulation" swap_test_ref = False tag = "model_vs_model" ts_num_years_ref = 2 + # Reference paths + reference_data_path = "/lcrc/group/e3sm/ac.forsyth2/zppy_weekly_comprehensive_v3_output/unique_id/v3.LR.historical_0051/post/lnd/180x360_aave/clim" + [mpas_analysis] active = True @@ -165,9 +207,12 @@ years = "1985-1995", [ilamb] active = True +ilamb_obs = "/lcrc/group/e3sm/diagnostics/ilamb_data" nodes = 8 partition = "compute" short_name = "v3.LR.historical_0051" +ts_atm_subsection = "atm_monthly_180x360_aave" +ts_land_subsection = "land_monthly" ts_num_years = 2 walltime = "2:00:00" years = "1985:1989:4" diff --git a/tests/integration/generated/update_weekly_expected_files_chrysalis.sh b/tests/integration/generated/update_weekly_expected_files_chrysalis.sh old mode 100644 new mode 100755 diff --git a/tests/integration/template_min_case_carryover_dependencies.cfg b/tests/integration/template_min_case_carryover_dependencies.cfg new file mode 100644 index 00000000..4d0fc037 --- /dev/null +++ b/tests/integration/template_min_case_carryover_dependencies.cfg @@ -0,0 +1,189 @@ +# Test carryover dependency handling + +# Things to check for in the output: +# 1. Check that `grep -v "OK" *status returns nothing (i.e., all jobs worked) +# 2. Check that .settings files include `dependencies` listed out, +# using `grep -A 5 "'dependencies'" *settings`. +# (Change the -A number to include enough lines to see the entire dependencies list) +# Check that these look correct. + + +# These tasks can have carryover dependencies: +# 1. mpas_analysis.py + +# These tasks have dependencies, but no carryover: +# 1. e3sm_diags.py +# 2. global_time_series.py +# 3. ilamb.py +# 4. tc_analysis.py + +# These tasks don't have dependencies: +# 1. climo.py +# 2. ts.py + +[default] +case = "#expand case_name#" +constraint = "#expand constraint#" +dry_run = "#expand dry_run#" +environment_commands = "#expand environment_commands#" +input = /lcrc/group/e3sm2/ac.wlin/E3SMv3/#expand case_name# +input_subdir = archive/atm/hist +mapping_file = "map_ne30pg2_to_cmip6_180x360_aave.20200201.nc" +output = "#expand user_output#zppy_min_case_carryover_dependencies_output/#expand unique_id#/#expand case_name#" +partition = "#expand partition_short#" +qos = "#expand qos_short#" +www = "#expand user_www#zppy_min_case_carryover_dependencies_www/#expand unique_id#" +years = "1985:1989:2", + +[climo] +active = True +walltime = "00:30:00" + + [[ atm_monthly_180x360_aave ]] + frequency = "monthly" + input_files = "eam.h0" + input_subdir = "archive/atm/hist" + vars = "" + + [[ atm_monthly_diurnal_8xdaily_180x360_aave ]] + frequency = "diurnal_8xdaily" + input_files = "eam.h3" + input_subdir = "archive/atm/hist" + vars = "PRECT" + + [[ land_monthly_climo ]] + frequency = "monthly" + input_files = "elm.h0" + input_subdir = "archive/lnd/hist" + mapping_file = "map_r05_to_cmip6_180x360_aave.20231110.nc" + vars = "" + +[ts] +active = True +e3sm_to_cmip_environment_commands = "#expand e3sm_to_cmip_environment_commands#" +walltime = "00:30:00" + + [[ atm_monthly_180x360_aave ]] + frequency = "monthly" + input_files = "eam.h0" + input_subdir = "archive/atm/hist" + ts_fmt = "cmip" + + [[ rof_monthly ]] + extra_vars = 'areatotal2' + frequency = "monthly" + input_files = "mosart.h0" + input_subdir = "archive/rof/hist" + mapping_file = "" + vars = "RIVER_DISCHARGE_OVER_LAND_LIQ" + + [[ atm_monthly_glb ]] + # Note global average won't work for 3D variables. + frequency = "monthly" + input_files = "eam.h0" + input_subdir = "archive/atm/hist" + mapping_file = "glb" + years = "1985:1995:5", + + [[ lnd_monthly_glb ]] + frequency = "monthly" + input_files = "elm.h0" + input_subdir = "archive/lnd/hist" + mapping_file = "glb" + vars = "FSH,RH2M,LAISHA,LAISUN,QINTR,QOVER,QRUNOFF,QSOIL,QVEGE,QVEGT,SOILWATER_10CM,TSA,H2OSNO,TOTLITC,CWDC,SOIL1C,SOIL2C,SOIL3C,SOIL4C,WOOD_HARVESTC,TOTVEGC,NBP,GPP,AR,HR" + years = "1985:1995:5", + + [[ land_monthly ]] + extra_vars = "landfrac" + frequency = "monthly" + input_files = "elm.h0" + input_subdir = "archive/lnd/hist" + mapping_file = "map_r05_to_cmip6_180x360_aave.20231110.nc" + ts_fmt = "cmip" + vars = "FSH,RH2M,LAISHA,LAISUN,QINTR,QOVER,QRUNOFF,QSOIL,QVEGE,QVEGT,SOILICE,SOILLIQ,SOILWATER_10CM,TSA,TSOI,H2OSNO,TOTLITC,CWDC,SOIL1C,SOIL2C,SOIL3C,SOIL4C,WOOD_HARVESTC,TOTVEGC,NBP,GPP,AR,HR" + +# TODO: Add "tc_analysis" back in after empty dat is resolved. +# [tc_analysis] +# # The second run should run in parallel with the first run. +# active = True +# scratch = "#expand scratch#zppy_min_case_carryover_dependencies_scratch/#expand unique_id#/#expand case_name#" +# walltime = "00:30:00" + +[e3sm_diags] +active = True +environment_commands = "#expand diags_environment_commands#" +grid = '180x360_aave' +multiprocessing = True +num_workers = 8 +partition = "#expand partition_long#" +qos = "#expand qos_long#" +ref_final_yr = 1986 +ref_start_yr = 1985 +ref_years = "1985-1986", +short_name = "#expand case_name#" +ts_num_years = 2 +walltime = "#expand diags_walltime#" +years = "1987:1989:2" + + [[ atm_monthly_180x360_aave ]] + # TODO: Add "tc_analysis" back in after empty dat is resolved. + # This task will depend on the tc_analysis task + climo_diurnal_frequency = "diurnal_8xdaily" + climo_diurnal_subsection = "atm_monthly_diurnal_8xdaily_180x360_aave" + climo_subsection = "atm_monthly_180x360_aave" + dc_obs_climo = '/lcrc/group/e3sm/public_html/e3sm_diags_test_data/unit_test_complete_run/obs/climatology' + # TODO: Add "tc_analysis" back in after empty dat is resolved. + #sets = "lat_lon","tc_analysis" + sets = "lat_lon", + + [[ atm_monthly_180x360_aave_mvm ]] + # This task will not depend on the tc_analysis task + # Test model-vs-model using the same files as the reference + climo_subsection = "atm_monthly_180x360_aave" + diff_title = "Difference" + partition = "#expand partition_long#" + qos = "#expand qos_long#" + ref_name = "#expand case_name#" + reference_data_path = "#expand user_output#zppy_min_case_carryover_dependencies_output/#expand unique_id#/#expand case_name#/post/atm/180x360_aave/clim" + run_type = "model_vs_model" + sets = "lat_lon", + short_ref_name = "same simulation" + swap_test_ref = False + tag = "model_vs_model" + ts_num_years_ref = 2 + ts_subsection = "atm_monthly_180x360_aave" + +[mpas_analysis] +# The second run should depend on the first run. +active = True +anomalyRefYear = 1985 +climo_years = "1985-1989", "1990-1995", +enso_years = "1985-1989", "1990-1995", +mesh = "IcoswISC30E3r5" +parallelTaskCount = 6 +partition = "#expand partition_long#" +qos = "#expand qos_long#" +shortTermArchive = True +ts_years = "1985-1989", "1985-1995", +walltime = "#expand mpas_analysis_walltime#" + +[global_time_series] +active = True +climo_years = "1985-1989", "1990-1995", +experiment_name = "#expand case_name#" +figstr = "#expand case_name#" +moc_file=mocTimeSeries_1985-1995.nc +plots_lnd = "FSH,RH2M,LAISHA,LAISUN,QINTR,QOVER,QRUNOFF,QSOIL,QVEGE,QVEGT,SOILWATER_10CM,TSA,H2OSNO,TOTLITC,CWDC,SOIL1C,SOIL2C,SOIL3C,SOIL4C,WOOD_HARVESTC,TOTVEGC,NBP,GPP,AR,HR" +ts_num_years = 5 +ts_years = "1985-1989", "1985-1995", +walltime = "00:30:00" +years = "1985-1995", + +[ilamb] +active = True +nodes = 8 +partition = "#expand partition_long#" +short_name = "#expand case_name#" +ts_num_years = 2 +walltime = "2:00:00" +years = "1985:1989:4" diff --git a/tests/integration/template_min_case_e3sm_diags_depend_on_climo.cfg b/tests/integration/template_min_case_e3sm_diags_depend_on_climo.cfg index cf48d9b9..a8eded71 100644 --- a/tests/integration/template_min_case_e3sm_diags_depend_on_climo.cfg +++ b/tests/integration/template_min_case_e3sm_diags_depend_on_climo.cfg @@ -35,4 +35,4 @@ walltime = "#expand diags_walltime#" [[ atm_monthly_180x360_aave ]] climo_subsection = "atm_monthly_180x360_aave" - sets = "lat_lon","zonal_mean_xy","zonal_mean_2d","polar","cosp_histogram","meridional_mean_2d","annual_cycle_zonal_mean","zonal_mean_2d_stratosphere", + sets = "lat_lon","zonal_mean_xy","zonal_mean_2d","polar","cosp_histogram","meridional_mean_2d","annual_cycle_zonal_mean","zonal_mean_2d_stratosphere","aerosol_aeronet","aerosol_budget", diff --git a/tests/integration/template_min_case_e3sm_diags_depend_on_climo_mvm_2.cfg b/tests/integration/template_min_case_e3sm_diags_depend_on_climo_mvm_2.cfg index fc01cd50..38b255db 100644 --- a/tests/integration/template_min_case_e3sm_diags_depend_on_climo_mvm_2.cfg +++ b/tests/integration/template_min_case_e3sm_diags_depend_on_climo_mvm_2.cfg @@ -44,7 +44,7 @@ walltime = "#expand diags_walltime#" # Use _1 as reference reference_data_path = "#expand user_output#zppy_min_case_e3sm_diags_depend_on_climo_mvm_1_output/#expand unique_id#/#expand case_name#/post/atm/180x360_aave/clim" run_type = "model_vs_model" - sets = "lat_lon","zonal_mean_xy","zonal_mean_2d","polar","cosp_histogram","meridional_mean_2d","annual_cycle_zonal_mean","zonal_mean_2d_stratosphere", + sets = "lat_lon","zonal_mean_xy","zonal_mean_2d","polar","cosp_histogram","meridional_mean_2d","annual_cycle_zonal_mean","zonal_mean_2d_stratosphere","aerosol_aeronet","aerosol_budget", short_ref_name = "#expand case_name#" swap_test_ref = False tag = "model_vs_model" diff --git a/tests/integration/template_min_case_e3sm_diags_lat_lon_land_mvm_2.cfg b/tests/integration/template_min_case_e3sm_diags_lat_lon_land_mvm_2.cfg index 691dd895..5b83882b 100644 --- a/tests/integration/template_min_case_e3sm_diags_lat_lon_land_mvm_2.cfg +++ b/tests/integration/template_min_case_e3sm_diags_lat_lon_land_mvm_2.cfg @@ -34,7 +34,7 @@ short_name = "#expand case_name#" walltime = "#expand diags_walltime#" [[ lnd_monthly_mvm_lnd ]] - climo_subsection = "land_monthly_climo" + climo_land_subsection = "land_monthly_climo" diff_title = "Difference" ref_final_yr = 1988 ref_name = "#expand case_name#" diff --git a/tests/integration/template_min_case_e3sm_diags_tc_analysis.cfg b/tests/integration/template_min_case_e3sm_diags_tc_analysis.cfg index 90c158e7..eefb73a8 100644 --- a/tests/integration/template_min_case_e3sm_diags_tc_analysis.cfg +++ b/tests/integration/template_min_case_e3sm_diags_tc_analysis.cfg @@ -1,12 +1,12 @@ [default] -case = "#expand case_name#" +case = "#expand case_name_v2#" constraint = "#expand constraint#" dry_run = "#expand dry_run#" environment_commands = "#expand environment_commands#" -input = /lcrc/group/e3sm2/ac.wlin/E3SMv3/#expand case_name# +input = #expand user_input#/E3SMv2/#expand case_name_v2# input_subdir = archive/atm/hist mapping_file = "map_ne30pg2_to_cmip6_180x360_aave.20200201.nc" -output = "#expand user_output#zppy_min_case_e3sm_diags_tc_analysis_output/#expand unique_id#/#expand case_name#" +output = "#expand user_output#zppy_min_case_e3sm_diags_tc_analysis_output/#expand unique_id#/#expand case_name_v2#" partition = "#expand partition_short#" qos = "#expand qos_short#" www = "#expand user_www#zppy_min_case_e3sm_diags_tc_analysis_www/#expand unique_id#" @@ -14,7 +14,7 @@ years = "1985:1987:2", [tc_analysis] active = True -scratch = "#expand scratch#zppy_min_case_e3sm_diags_tc_analysis_scratch/#expand unique_id#/#expand case_name#" +scratch = "#expand scratch#zppy_min_case_e3sm_diags_tc_analysis_scratch/#expand unique_id#/#expand case_name_v2#" walltime = "00:30:00" [e3sm_diags] @@ -25,11 +25,10 @@ multiprocessing = True num_workers = 8 partition = "#expand partition_long#" qos = "#expand qos_long#" -short_name = "#expand case_name#" +short_name = "#expand case_name_v2#" ts_num_years = 2 walltime = "#expand diags_walltime#" [[ atm_monthly_180x360_aave_tc_analysis ]] - # Note: tc_analysis requires e3sm_diags jobs to run sequentially sets = "tc_analysis", # tc_obs is determined automatically diff --git a/tests/integration/template_min_case_e3sm_diags_tc_analysis_mvm_1.cfg b/tests/integration/template_min_case_e3sm_diags_tc_analysis_mvm_1.cfg index 51e0426e..1eb6be42 100644 --- a/tests/integration/template_min_case_e3sm_diags_tc_analysis_mvm_1.cfg +++ b/tests/integration/template_min_case_e3sm_diags_tc_analysis_mvm_1.cfg @@ -1,12 +1,12 @@ [default] -case = "#expand case_name#" +case = "#expand case_name_v2#" constraint = "#expand constraint#" dry_run = "#expand dry_run#" environment_commands = "#expand environment_commands#" -input = /lcrc/group/e3sm2/ac.wlin/E3SMv3/#expand case_name# +input = #expand user_input#/E3SMv2/#expand case_name_v2# input_subdir = archive/atm/hist mapping_file = "map_ne30pg2_to_cmip6_180x360_aave.20200201.nc" -output = "#expand user_output#zppy_min_case_e3sm_diags_tc_analysis_mvm_1_output/#expand unique_id#/#expand case_name#" +output = "#expand user_output#zppy_min_case_e3sm_diags_tc_analysis_mvm_1_output/#expand unique_id#/#expand case_name_v2#" partition = "#expand partition_short#" qos = "#expand qos_short#" www = "#expand user_www#zppy_min_case_e3sm_diags_tc_analysis_mvm_2_www/#expand unique_id#" @@ -14,5 +14,5 @@ years = "1985:1987:2", [tc_analysis] active = True -scratch = "#expand scratch#zppy_min_case_e3sm_diags_tc_analysis_mvm_1_scratch/#expand unique_id#/#expand case_name#" +scratch = "#expand scratch#zppy_min_case_e3sm_diags_tc_analysis_mvm_1_scratch/#expand unique_id#/#expand case_name_v2#" walltime = "00:30:00" diff --git a/tests/integration/template_min_case_e3sm_diags_tc_analysis_mvm_2.cfg b/tests/integration/template_min_case_e3sm_diags_tc_analysis_mvm_2.cfg index 3f833ba6..80680e4e 100644 --- a/tests/integration/template_min_case_e3sm_diags_tc_analysis_mvm_2.cfg +++ b/tests/integration/template_min_case_e3sm_diags_tc_analysis_mvm_2.cfg @@ -1,12 +1,13 @@ +# Run this after _1 completes. (We need tc_analysis for the reference years). [default] -case = "#expand case_name#" +case = "#expand case_name_v2#" constraint = "#expand constraint#" dry_run = "#expand dry_run#" environment_commands = "#expand environment_commands#" -input = /lcrc/group/e3sm2/ac.wlin/E3SMv3/#expand case_name# +input = #expand user_input#/E3SMv2/#expand case_name_v2# input_subdir = archive/atm/hist mapping_file = "map_ne30pg2_to_cmip6_180x360_aave.20200201.nc" -output = "#expand user_output#zppy_min_case_e3sm_diags_tc_analysis_mvm_2_output/#expand unique_id#/#expand case_name#" +output = "#expand user_output#zppy_min_case_e3sm_diags_tc_analysis_mvm_2_output/#expand unique_id#/#expand case_name_v2#" partition = "#expand partition_short#" qos = "#expand qos_short#" www = "#expand user_www#zppy_min_case_e3sm_diags_tc_analysis_mvm_2_www/#expand unique_id#" @@ -14,7 +15,7 @@ years = "1995:1997:2", [tc_analysis] active = True -scratch = "#expand scratch#zppy_min_case_e3sm_diags_tc_analysis_mvm_2_scratch/#expand unique_id#/#expand case_name#" +scratch = "#expand scratch#zppy_min_case_e3sm_diags_tc_analysis_mvm_2_scratch/#expand unique_id#/#expand case_name_v2#" walltime = "00:30:00" [e3sm_diags] @@ -25,7 +26,7 @@ multiprocessing = True num_workers = 8 partition = "#expand partition_long#" qos = "#expand qos_long#" -short_name = "#expand case_name#" +short_name = "#expand case_name_v2#" walltime = "#expand diags_walltime#" [[ atm_monthly_180x360_aave_mvm ]] @@ -33,15 +34,15 @@ walltime = "#expand diags_walltime#" climo_subsection = "atm_monthly_180x360_aave" diff_title = "Difference" ref_final_yr = 1986 - ref_name = "#expand case_name#" + ref_name = "#expand case_name_v2#" ref_start_yr = 1985 ref_years = "1985-1986", # Use _1 as reference - reference_data_path = "#expand user_output#zppy_min_case_e3sm_diags_tc_analysis_mvm_1_output/#expand unique_id#/#expand case_name#/post/atm/180x360_aave/clim" + reference_data_path = "#expand user_output#zppy_min_case_e3sm_diags_tc_analysis_mvm_1_output/#expand unique_id#/#expand case_name_v2#/post/atm/180x360_aave/clim" # reference_data_path_tc determined automatically run_type = "model_vs_model" sets = "tc_analysis", - short_ref_name = "#expand case_name#" + short_ref_name = "#expand case_name_v2#" swap_test_ref = False tag = "model_vs_model" years = "1995-1996", diff --git a/tests/integration/template_min_case_e3sm_diags_tc_analysis_parallel.cfg b/tests/integration/template_min_case_e3sm_diags_tc_analysis_parallel.cfg new file mode 100644 index 00000000..499ee0ac --- /dev/null +++ b/tests/integration/template_min_case_e3sm_diags_tc_analysis_parallel.cfg @@ -0,0 +1,36 @@ +[default] +case = "#expand case_name_v2#" +constraint = "#expand constraint#" +dry_run = "#expand dry_run#" +environment_commands = "#expand environment_commands#" +input = #expand user_input#/E3SMv2/#expand case_name_v2# +input_subdir = archive/atm/hist +mapping_file = "map_ne30pg2_to_cmip6_180x360_aave.20200201.nc" +output = "#expand user_output#zppy_min_case_e3sm_diags_tc_analysis_parallel_output/#expand unique_id#/#expand case_name_v2#" +partition = "#expand partition_short#" +qos = "#expand qos_short#" +www = "#expand user_www#zppy_min_case_e3sm_diags_tc_analysis_parallel_www/#expand unique_id#" +years = "1985:1989:2", + +[tc_analysis] +active = True +scratch = "#expand scratch#zppy_min_case_e3sm_diags_tc_analysis_parallel_scratch/#expand unique_id#/#expand case_name_v2#" +walltime = "00:30:00" + +[e3sm_diags] +active = True +environment_commands = "#expand diags_environment_commands#" +grid = '180x360_aave' +multiprocessing = True +num_workers = 8 +partition = "#expand partition_long#" +qos = "#expand qos_long#" +short_name = "#expand case_name_v2#" +ts_num_years = 2 +walltime = "#expand diags_walltime#" + + [[ atm_monthly_180x360_aave_tc_analysis ]] + # Expecting two viewers: 1985-1986, 1987-1988 + # The generation of these two viewers should occur in parallel. + sets = "tc_analysis", + # tc_obs is determined automatically diff --git a/tests/integration/template_min_case_tc_analysis_simultaneous_1.cfg b/tests/integration/template_min_case_tc_analysis_simultaneous_1.cfg index 7309eca5..3932f753 100644 --- a/tests/integration/template_min_case_tc_analysis_simultaneous_1.cfg +++ b/tests/integration/template_min_case_tc_analysis_simultaneous_1.cfg @@ -1,12 +1,12 @@ [default] -case = "#expand case_name#" +case = "#expand case_name_v2#" constraint = "#expand constraint#" dry_run = "#expand dry_run#" environment_commands = "#expand environment_commands#" -input = /lcrc/group/e3sm2/ac.wlin/E3SMv3/#expand case_name# +input = #expand user_input#/E3SMv2/#expand case_name_v2# input_subdir = archive/atm/hist mapping_file = "map_ne30pg2_to_cmip6_180x360_aave.20200201.nc" -output = "#expand user_output#zppy_min_case_tc_analysis_simultaneous_1_output/#expand unique_id#/#expand case_name#" +output = "#expand user_output#zppy_min_case_tc_analysis_simultaneous_1_output/#expand unique_id#/#expand case_name_v2#" partition = "#expand partition_short#" qos = "#expand qos_short#" www = "#expand user_www#zppy_min_case_tc_analysis_simultaneous_1_www/#expand unique_id#" @@ -14,5 +14,5 @@ years = "1985:1987:2", [tc_analysis] active = True -scratch = "#expand scratch#zppy_min_case_tc_analysis_simultaneous_1_scratch/#expand unique_id#/#expand case_name#" +scratch = "#expand scratch#zppy_min_case_tc_analysis_simultaneous_1_scratch/#expand unique_id#/#expand case_name_v2#" walltime = "00:30:00" diff --git a/tests/integration/template_min_case_tc_analysis_simultaneous_2.cfg b/tests/integration/template_min_case_tc_analysis_simultaneous_2.cfg index d9318b58..c27a1bbf 100644 --- a/tests/integration/template_min_case_tc_analysis_simultaneous_2.cfg +++ b/tests/integration/template_min_case_tc_analysis_simultaneous_2.cfg @@ -1,14 +1,14 @@ # Run this immediately after running _1 [default] -case = "#expand case_name#" +case = "#expand case_name_v2#" constraint = "#expand constraint#" dry_run = "#expand dry_run#" environment_commands = "#expand environment_commands#" -input = /lcrc/group/e3sm2/ac.wlin/E3SMv3/#expand case_name# +input = #expand user_input#/E3SMv2/#expand case_name_v2# input_subdir = archive/atm/hist mapping_file = "map_ne30pg2_to_cmip6_180x360_aave.20200201.nc" -output = "#expand user_output#zppy_min_case_tc_analysis_simultaneous_2_output/#expand unique_id#/#expand case_name#" +output = "#expand user_output#zppy_min_case_tc_analysis_simultaneous_2_output/#expand unique_id#/#expand case_name_v2#" partition = "#expand partition_short#" qos = "#expand qos_short#" www = "#expand user_www#zppy_min_case_tc_analysis_simultaneous_2_www/#expand unique_id#" @@ -16,5 +16,5 @@ years = "1985:1987:2", [tc_analysis] active = True -scratch = "#expand scratch#zppy_min_case_tc_analysis_simultaneous_2_scratch/#expand unique_id#/#expand case_name#" +scratch = "#expand scratch#zppy_min_case_tc_analysis_simultaneous_2_scratch/#expand unique_id#/#expand case_name_v2#" walltime = "00:30:00" diff --git a/tests/integration/template_weekly_bundles.cfg b/tests/integration/template_weekly_bundles.cfg index 3c8a0d64..2ade2706 100644 --- a/tests/integration/template_weekly_bundles.cfg +++ b/tests/integration/template_weekly_bundles.cfg @@ -95,11 +95,12 @@ years = "1985:1989:2", mapping_file = "" vars = "RIVER_DISCHARGE_OVER_LAND_LIQ" -[tc_analysis] -active = True -bundle = "bundle3" # Let bundle1 finish first because "e3sm_diags: atm_monthly_180x360_aave_mvm" requires "ts: atm_monthly_180x360_aave" -scratch = "#expand scratch#zppy_weekly_bundles_scratch/#expand unique_id#/#expand case_name#" -years = "1985:1989:2", +# TODO: Add "tc_analysis" back in after empty dat is resolved. +# [tc_analysis] +# active = True +# bundle = "bundle3" # Let bundle1 finish first because "e3sm_diags: atm_monthly_180x360_aave_mvm" requires "ts: atm_monthly_180x360_aave" +# scratch = "#expand scratch#zppy_weekly_bundles_scratch/#expand unique_id#/#expand case_name#" +# years = "1985:1989:2", [e3sm_diags] active = True @@ -128,7 +129,8 @@ years = "1985:1989:2", ref_years = "1985-1986", reference_data_path = "#expand user_output#zppy_weekly_bundles_output/#expand unique_id#/#expand case_name#/post/atm/180x360_aave/clim" run_type = "model_vs_model" - sets = "polar","enso_diags","streamflow","tc_analysis", + # TODO: Add "tc_analysis" back in after empty dat is resolved. + sets = "polar","enso_diags","streamflow", short_ref_name = "#expand case_name#" swap_test_ref = False tag = "model_vs_model" diff --git a/tests/integration/template_weekly_comprehensive_v2.cfg b/tests/integration/template_weekly_comprehensive_v2.cfg index 93cde966..213d7792 100644 --- a/tests/integration/template_weekly_comprehensive_v2.cfg +++ b/tests/integration/template_weekly_comprehensive_v2.cfg @@ -3,6 +3,7 @@ case = "#expand case_name_v2#" constraint = "#expand constraint#" dry_run = "#expand dry_run#" environment_commands = "#expand environment_commands#" +fail_on_dependency_skip = True input = #expand user_input#/E3SMv2/#expand case_name_v2# input_subdir = archive/atm/hist mapping_file = "map_ne30pg2_to_cmip6_180x360_aave.20200201.nc" @@ -10,7 +11,7 @@ output = "#expand user_output#zppy_weekly_comprehensive_v2_output/#expand unique partition = "#expand partition_short#" qos = "#expand qos_short#" www = "#expand user_www#zppy_weekly_comprehensive_v2_www/#expand unique_id#" -years = "1850:1854:2", +years = "1980:1984:2", [climo] active = True @@ -45,6 +46,12 @@ walltime = "00:30:00" input_subdir = "archive/atm/hist" ts_fmt = "cmip" + [[ atm_daily_180x360_aave ]] + frequency = "daily" + input_files = "eam.h1" + input_subdir = "archive/atm/hist" + vars = "PRECT" + [[ rof_monthly ]] extra_vars = 'areatotal2' frequency = "monthly" @@ -59,7 +66,7 @@ walltime = "00:30:00" input_files = "eam.h0" input_subdir = "archive/atm/hist" mapping_file = "glb" - years = "1850:1860:5", + years = "1980:1990:5", [[ lnd_monthly_glb ]] frequency = "monthly" @@ -67,7 +74,7 @@ walltime = "00:30:00" input_subdir = "archive/lnd/hist" mapping_file = "glb" vars = "LAISHA,LAISUN" - years = "1850:1860:5", + years = "1980:1990:5", [[ land_monthly ]] extra_vars = "landfrac" @@ -84,26 +91,34 @@ walltime = "00:30:00" [e3sm_diags] active = True +climo_diurnal_frequency = "diurnal_8xdaily" +climo_diurnal_subsection = "atm_monthly_diurnal_8xdaily_180x360_aave" environment_commands = "#expand diags_environment_commands#" grid = '180x360_aave' multiprocessing = True num_workers = 8 partition = "#expand partition_long#" qos = "#expand qos_long#" -ref_final_yr = 1851 -ref_start_yr = 1850 -ref_years = "1850-1851", +ref_end_yr = 1981 +ref_final_yr = 1981 +ref_start_yr = 1980 +ref_years = "1980-1981", +# Include all sets +# min_case_e3sm_diags_depend_on_climo: "lat_lon","zonal_mean_xy","zonal_mean_2d","polar","cosp_histogram","meridional_mean_2d","annual_cycle_zonal_mean","zonal_mean_2d_stratosphere","aerosol_aeronet","aerosol_budget", +# min_case_e3sm_diags_depend_on_ts: "enso_diags","qbo", +# min_case_e3sm_diags_diurnal_cycle: "diurnal_cycle", +# min_case_e3sm_diags_streamflow: "streamflow", +# min_case_e3sm_diags_tc_analysis: "tc_analysis", +# min_case_e3sm_diags_tropical_subseasonal: "tropical_subseasonal", +sets = "lat_lon","zonal_mean_xy","zonal_mean_2d","polar","cosp_histogram","meridional_mean_2d","annual_cycle_zonal_mean","zonal_mean_2d_stratosphere","enso_diags","qbo","diurnal_cycle","streamflow","tc_analysis","tropical_subseasonal","aerosol_aeronet","aerosol_budget", short_name = "#expand case_name_v2#" ts_num_years = 2 walltime = "#expand diags_walltime#" -years = "1852:1854:2", +years = "1982:1984:2", [[ atm_monthly_180x360_aave ]] - climo_diurnal_frequency = "diurnal_8xdaily" - climo_diurnal_subsection = "atm_monthly_diurnal_8xdaily_180x360_aave" climo_subsection = "atm_monthly_180x360_aave" dc_obs_climo = '/lcrc/group/e3sm/public_html/e3sm_diags_test_data/unit_test_complete_run/obs/climatology' - sets = "lat_lon","enso_diags","diurnal_cycle","streamflow","tc_analysis","tc_analysis", [[ atm_monthly_180x360_aave_mvm ]] # Test model-vs-model using the same files as the reference @@ -114,16 +129,17 @@ years = "1852:1854:2", ref_name = "#expand case_name_v2#" reference_data_path = "#expand user_output#zppy_weekly_comprehensive_v2_output/#expand unique_id#/#expand case_name_v2#/post/atm/180x360_aave/clim" run_type = "model_vs_model" - sets = "lat_lon", short_ref_name = "same simulation" swap_test_ref = False tag = "model_vs_model" + ts_daily_subsection = "atm_monthly_180x360_aave" ts_num_years_ref = 2 ts_subsection = "atm_monthly_180x360_aave" + years = "1980:1982:2", [[ lnd_monthly_mvm_lnd ]] # Test model-vs-model using the same files as the reference - climo_subsection = "land_monthly_climo" + climo_land_subsection = "land_monthly_climo" diff_title = "Difference" partition = "#expand partition_long#" qos = "#expand qos_long#" @@ -138,28 +154,28 @@ years = "1852:1854:2", [mpas_analysis] active = True -anomalyRefYear = 1850 -climo_years ="1850-1854", "1855-1860", -enso_years = "1850-1854", "1855-1860", +anomalyRefYear = 1980 +climo_years ="1980-1984", "1985-1990", +enso_years = "1980-1984", "1985-1990", mesh = "EC30to60E2r2" parallelTaskCount = 6 partition = "#expand partition_long#" qos = "#expand qos_long#" shortTermArchive = True -ts_years = "1850-1854", "1850-1860", +ts_years = "1980-1984", "1980-1990", walltime = "#expand mpas_analysis_walltime#" [global_time_series] active = True -climo_years ="1850-1854", "1855-1860", +climo_years ="1980-1984", "1985-1990", experiment_name = "#expand case_name_v2#" figstr = "#expand case_name_v2#" -moc_file=mocTimeSeries_1850-1860.nc +moc_file=mocTimeSeries_1980-1990.nc plots_lnd = "LAISHA,LAISUN" ts_num_years = 5 -ts_years = "1850-1854", "1850-1860", +ts_years = "1980-1984", "1980-1990", walltime = "00:30:00" -years = "1850-1860", +years = "1980-1990", [ilamb] active = True @@ -168,4 +184,4 @@ partition = "#expand partition_long#" short_name = "#expand case_name_v2#" ts_num_years = 2 walltime = "2:00:00" -years = "1850:1854:2", +years = "1980:1984:2", diff --git a/tests/integration/template_weekly_comprehensive_v3.cfg b/tests/integration/template_weekly_comprehensive_v3.cfg index ef2fbb23..c7e312de 100644 --- a/tests/integration/template_weekly_comprehensive_v3.cfg +++ b/tests/integration/template_weekly_comprehensive_v3.cfg @@ -3,6 +3,9 @@ case = "#expand case_name#" constraint = "#expand constraint#" dry_run = "#expand dry_run#" environment_commands = "#expand environment_commands#" +fail_on_dependency_skip = True +guess_path_parameters = False +guess_section_parameters = False input = /lcrc/group/e3sm2/ac.wlin/E3SMv3/#expand case_name# input_subdir = archive/atm/hist mapping_file = "map_ne30pg2_to_cmip6_180x360_aave.20200201.nc" @@ -46,6 +49,12 @@ walltime = "00:30:00" input_subdir = "archive/atm/hist" ts_fmt = "cmip" + [[ atm_daily_180x360_aave ]] + frequency = "daily" + input_files = "eam.h1" + input_subdir = "archive/atm/hist" + vars = "PRECT" + [[ rof_monthly ]] extra_vars = 'areatotal2' frequency = "monthly" @@ -79,64 +88,97 @@ walltime = "00:30:00" ts_fmt = "cmip" vars = "FSH,RH2M,LAISHA,LAISUN,QINTR,QOVER,QRUNOFF,QSOIL,QVEGE,QVEGT,SOILICE,SOILLIQ,SOILWATER_10CM,TSA,TSOI,H2OSNO,TOTLITC,CWDC,SOIL1C,SOIL2C,SOIL3C,SOIL4C,WOOD_HARVESTC,TOTVEGC,NBP,GPP,AR,HR" -[tc_analysis] -active = True -scratch = "#expand scratch#zppy_weekly_comprehensive_v3_scratch/#expand unique_id#/#expand case_name#" -walltime = "00:30:00" +# TODO: Add "tc_analysis" back in after empty dat is resolved. +# [tc_analysis] +# active = True +# scratch = "#expand scratch#zppy_weekly_comprehensive_v3_scratch/#expand unique_id#/#expand case_name#" +# walltime = "00:30:00" [e3sm_diags] active = True +climo_diurnal_frequency = "diurnal_8xdaily" +climo_diurnal_subsection = "atm_monthly_diurnal_8xdaily_180x360_aave" +climo_subsection = "atm_monthly_180x360_aave" environment_commands = "#expand diags_environment_commands#" grid = '180x360_aave' multiprocessing = True num_workers = 8 partition = "#expand partition_long#" qos = "#expand qos_long#" +ref_end_yr = 1986 ref_final_yr = 1986 ref_start_yr = 1985 ref_years = "1985-1986", +# Include all sets +# min_case_e3sm_diags_depend_on_climo: "lat_lon","zonal_mean_xy","zonal_mean_2d","polar","cosp_histogram","meridional_mean_2d","annual_cycle_zonal_mean","zonal_mean_2d_stratosphere","aerosol_aeronet","aerosol_budget", +# min_case_e3sm_diags_depend_on_ts: "enso_diags","qbo", +# min_case_e3sm_diags_diurnal_cycle: "diurnal_cycle", +# min_case_e3sm_diags_streamflow: "streamflow", +# min_case_e3sm_diags_tc_analysis: "tc_analysis", +# min_case_e3sm_diags_tropical_subseasonal: "tropical_subseasonal", +# TODO: Add "tc_analysis" back in after empty dat is resolved. +# TODO: Add "aerosol_budget" back in once that's working for v3. +sets = "lat_lon","zonal_mean_xy","zonal_mean_2d","polar","cosp_histogram","meridional_mean_2d","annual_cycle_zonal_mean","zonal_mean_2d_stratosphere","enso_diags","qbo","diurnal_cycle","streamflow","tropical_subseasonal","aerosol_aeronet", short_name = "#expand case_name#" +ts_daily_subsection = "atm_daily_180x360_aave" ts_num_years = 2 +ts_subsection = "atm_monthly_180x360_aave" walltime = "#expand diags_walltime#" years = "1987:1989:2" +# Reference paths +# Used for mvo and mvm, if ts_num_years is set +obs_ts = "#expand diagnostics_base_path#/observations/Atm/time-series/" +# mvo & mvm tc_analysis only +tc_obs = "#expand diagnostics_base_path#/observations/Atm/tc-analysis/" [[ atm_monthly_180x360_aave ]] - climo_diurnal_frequency = "diurnal_8xdaily" - climo_diurnal_subsection = "atm_monthly_diurnal_8xdaily_180x360_aave" - climo_subsection = "atm_monthly_180x360_aave" + # Reference paths + reference_data_path = "#expand diagnostics_base_path#/observations/Atm/climatology/" + # mvo diurnal_cycle only + # NOTE: This is NOT the guess zppy would have made! dc_obs_climo = '/lcrc/group/e3sm/public_html/e3sm_diags_test_data/unit_test_complete_run/obs/climatology' - sets = "lat_lon","enso_diags","diurnal_cycle","streamflow","tc_analysis","tropical_subseasonal", + # mvo streamflow only + streamflow_obs_ts = "#expand diagnostics_base_path#/observations/Atm/time-series/" [[ atm_monthly_180x360_aave_mvm ]] # Test model-vs-model using the same files as the reference - climo_subsection = "atm_monthly_180x360_aave" diff_title = "Difference" partition = "#expand partition_long#" qos = "#expand qos_long#" ref_name = "#expand case_name#" - reference_data_path = "#expand user_output#zppy_weekly_comprehensive_v3_output/#expand unique_id#/#expand case_name#/post/atm/180x360_aave/clim" run_type = "model_vs_model" - sets = "lat_lon", short_ref_name = "same simulation" swap_test_ref = False tag = "model_vs_model" ts_num_years_ref = 2 - ts_subsection = "atm_monthly_180x360_aave" + # Reference paths + reference_data_path = "#expand user_output#zppy_weekly_comprehensive_v3_output/#expand unique_id#/#expand case_name#/post/atm/180x360_aave/clim" + # mvm streamflow only + gauges_path = "#expand diagnostics_base_path#/observations/Atm/time-series/GSIM/GSIM_catchment_characteristics_all_1km2.csv" + reference_data_path_ts_rof = "#expand user_output#zppy_weekly_comprehensive_v3_output/#expand unique_id#/#expand case_name#/post/rof/native/ts/monthly" + # mvm diurnal_cycle only + reference_data_path_climo_diurnal = "#expand user_output#zppy_weekly_comprehensive_v3_output/#expand unique_id#/#expand case_name#/post/atm/180x360_aave/clim_diurnal_8xdaily" + # mvm "enso_diags", "qbo", "area_mean_time_series" + reference_data_path_ts = "#expand user_output#zppy_weekly_comprehensive_v3_output/#expand unique_id#/#expand case_name#/post/atm/180x360_aave/ts/monthly" + # mvm tropical_subseasonal only + reference_data_path_ts_daily = "#expand user_output#zppy_weekly_comprehensive_v3_output/#expand unique_id#/#expand case_name#/post/atm/180x360_aave/ts/daily" [[ lnd_monthly_mvm_lnd ]] # Test model-vs-model using the same files as the reference - climo_subsection = "land_monthly_climo" + climo_land_subsection = "land_monthly_climo" diff_title = "Difference" partition = "#expand partition_long#" qos = "#expand qos_long#" ref_name = "#expand case_name#" - reference_data_path = "#expand user_output#zppy_weekly_comprehensive_v3_output/#expand unique_id#/#expand case_name#/post/lnd/180x360_aave/clim" run_type = "model_vs_model" sets = "lat_lon_land", short_ref_name = "same simulation" swap_test_ref = False tag = "model_vs_model" ts_num_years_ref = 2 + # Reference paths + reference_data_path = "#expand user_output#zppy_weekly_comprehensive_v3_output/#expand unique_id#/#expand case_name#/post/lnd/180x360_aave/clim" + [mpas_analysis] active = True @@ -165,9 +207,12 @@ years = "1985-1995", [ilamb] active = True +ilamb_obs = "#expand diagnostics_base_path#/ilamb_data" nodes = 8 partition = "#expand partition_long#" short_name = "#expand case_name#" +ts_atm_subsection = "atm_monthly_180x360_aave" +ts_land_subsection = "land_monthly" ts_num_years = 2 walltime = "2:00:00" years = "1985:1989:4" diff --git a/tests/integration/test_weekly.py b/tests/integration/test_weekly.py index e3929bae..d919cafc 100644 --- a/tests/integration/test_weekly.py +++ b/tests/integration/test_weekly.py @@ -68,8 +68,8 @@ def test_bundles_bash_file_list(self): "e3sm_diags_atm_monthly_180x360_aave_mvm_model_vs_model_1987-1988_vs_1985-1986.bash", "global_time_series_1985-1995.bash", "ilamb_1985-1986.bash", - "tc_analysis_1985-1986.bash", - "tc_analysis_1987-1988.bash", + # "tc_analysis_1985-1986.bash", + # "tc_analysis_1987-1988.bash", "ts_atm_monthly_180x360_aave_1985-1986-0002.bash", "ts_atm_monthly_180x360_aave_1987-1988-0002.bash", "ts_atm_monthly_glb_1985-1989-0005.bash", diff --git a/tests/integration/utils.py b/tests/integration/utils.py index 2c6d6bdf..77f2f890 100644 --- a/tests/integration/utils.py +++ b/tests/integration/utils.py @@ -62,21 +62,30 @@ def compare_images( mismatched_images.append(image_name) - simple_image_name = image_name.split("/")[-1].split(".")[0] + diff_dir_actual_png = os.path.join( + diff_dir, "{}_actual.png".format(image_name) + ) + # image_name could contain a number of subdirectories. + os.makedirs(os.path.dirname(diff_dir_actual_png), exist_ok=True) shutil.copy( path_to_actual_png, - os.path.join(diff_dir, "{}_actual.png".format(simple_image_name)), + diff_dir_actual_png, + ) + diff_dir_expected_png = os.path.join( + diff_dir, "{}_expected.png".format(image_name) ) + # image_name could contain a number of subdirectories. + os.makedirs(os.path.dirname(diff_dir_expected_png), exist_ok=True) shutil.copy( path_to_expected_png, - os.path.join(diff_dir, "{}_expected.png".format(simple_image_name)), + diff_dir_expected_png, ) # https://stackoverflow.com/questions/41405632/draw-a-rectangle-and-a-text-in-it-using-pil draw = ImageDraw.Draw(diff) (left, upper, right, lower) = diff.getbbox() draw.rectangle(((left, upper), (right, lower)), outline="red") diff.save( - os.path.join(diff_dir, "{}_diff.png".format(simple_image_name)), + os.path.join(diff_dir, "{}_diff.png".format(image_name)), "PNG", ) @@ -139,7 +148,7 @@ def get_chyrsalis_expansions(config): "constraint": "", # To run this test, replace conda environment with your e3sm_diags dev environment # To use default environment_commands, set to "" - "diags_environment_commands": "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20240731", + "diags_environment_commands": "source /home/ac.forsyth2/miniconda3/etc/profile.d/conda.sh; conda activate e3sm_diags_20241015", "diags_walltime": "5:00:00", "e3sm_to_cmip_environment_commands": "", "environment_commands_test": "", @@ -228,6 +237,7 @@ def get_expansions(): expansions = get_perlmutter_expansions(config) else: raise ValueError(f"Unsupported machine={machine}") + expansions["diagnostics_base_path"] = config.get("diagnostics", "base_path") expansions["machine"] = machine expansions["unique_id"] = UNIQUE_ID return expansions @@ -271,6 +281,7 @@ def generate_cfgs(unified_testing=False, dry_run=False): cfg_names = [ "min_case_add_dependencies", + "min_case_carryover_dependencies", "min_case_tc_analysis_simultaneous_1", "min_case_tc_analysis_simultaneous_2", "min_case_e3sm_diags_depend_on_climo_mvm_1", @@ -290,6 +301,7 @@ def generate_cfgs(unified_testing=False, dry_run=False): "min_case_e3sm_diags_streamflow", "min_case_e3sm_diags_tc_analysis_mvm_1", "min_case_e3sm_diags_tc_analysis_mvm_2", + "min_case_e3sm_diags_tc_analysis_parallel", "min_case_e3sm_diags_tc_analysis", "min_case_e3sm_diags_tropical_subseasonal_mvm_1", "min_case_e3sm_diags_tropical_subseasonal_mvm_2", @@ -345,4 +357,4 @@ def generate_cfgs(unified_testing=False, dry_run=False): if __name__ == "__main__": - generate_cfgs(unified_testing=False) + generate_cfgs(unified_testing=False, dry_run=False) diff --git a/tests/test_sections.py b/tests/test_sections.py index 5bde86b6..434cfdfb 100644 --- a/tests/test_sections.py +++ b/tests/test_sections.py @@ -5,7 +5,7 @@ from configobj import ConfigObj, Section from validate import Validator -from zppy.utils import getTasks +from zppy.utils import get_tasks def compare(tester, actual, expected): @@ -21,7 +21,7 @@ def compare(tester, actual, expected): tester.assertEqual(only_in_expected, set()) incorrect_values = [] for key in actual_keys: - if type(actual[key]) == Section: + if isinstance(actual[key], Section): print("Calling `compare` again on {}".format(key)) compare(tester, actual[key], expected[key]) elif actual[key] != expected[key]: @@ -73,8 +73,11 @@ def test_sections(self): "dry_run": False, "e3sm_to_cmip_environment_commands": "", "environment_commands": "", + "fail_on_dependency_skip": False, "frequency": "monthly", "grid": "", + "guess_section_parameters": True, + "guess_path_parameters": True, "input": "INPUT", "input_files": "eam.h0", "input_subdir": "INPUT_SUBDIR", @@ -112,7 +115,7 @@ def test_sections(self): "years": ["0001:0020:5"], } compare(self, actual_section, expected_section) - actual_tasks = getTasks(config, section_name) + actual_tasks = get_tasks(config, section_name) self.assertEqual(len(actual_tasks), 1) actual_task = actual_tasks[0] expected_task = { @@ -130,8 +133,11 @@ def test_sections(self): "e3sm_to_cmip_environment_commands": "", "environment_commands": "", "extra_vars": "", + "fail_on_dependency_skip": False, "frequency": "monthly", "grid": "", + "guess_section_parameters": True, + "guess_path_parameters": True, "input": "INPUT", "input_component": "", "input_files": "eam.h0", @@ -170,7 +176,7 @@ def test_sections(self): "years": ["0001:0050:50"], } compare(self, actual_section, expected_section) - actual_tasks = getTasks(config, section_name) + actual_tasks = get_tasks(config, section_name) compare(self, len(actual_tasks), 1) actual_task = actual_tasks[0] expected_task = { @@ -185,8 +191,11 @@ def test_sections(self): "e3sm_to_cmip_environment_commands": "", "environment_commands": "", "exclude": False, + "fail_on_dependency_skip": False, "frequency": "monthly", "grid": "", + "guess_section_parameters": True, + "guess_path_parameters": True, "input": "INPUT", "input_component": "", "input_files": "eam.h0", @@ -213,14 +222,14 @@ def test_sections(self): section_name = "tc_analysis" actual_section = config[section_name] self.assertTrue(actual_section["active"] == "False") - actual_tasks = getTasks(config, section_name) + actual_tasks = get_tasks(config, section_name) self.assertEqual(len(actual_tasks), 0) # e3sm_diags: test an excluded task section_name = "e3sm_diags" actual_section = config[section_name] self.assertTrue("active" not in actual_section.keys()) - actual_tasks = getTasks(config, section_name) + actual_tasks = get_tasks(config, section_name) self.assertEqual(len(actual_tasks), 0) def test_subsections(self): @@ -239,8 +248,11 @@ def test_subsections(self): "dry_run": False, "e3sm_to_cmip_environment_commands": "", "environment_commands": "", + "fail_on_dependency_skip": False, "frequency": "monthly", "grid": "", + "guess_section_parameters": True, + "guess_path_parameters": True, "input": "INPUT", "input_files": "eam.h0", "input_subdir": "INPUT_SUBDIR", @@ -298,7 +310,7 @@ def test_subsections(self): "vars": "FSNTOA,FLUT,FSNT,FLNT,FSNS,FLNS,SHFLX,QFLX,PRECC,PRECL,PRECSC,PRECSL,TS,TREFHT", } compare(self, actual_section, expected_section) - actual_tasks = getTasks(config, section_name) + actual_tasks = get_tasks(config, section_name) self.assertEqual(len(actual_tasks), 2) actual_task = actual_tasks[0] expected_task = { @@ -316,8 +328,11 @@ def test_subsections(self): "e3sm_to_cmip_environment_commands": "", "environment_commands": "", "extra_vars": "", + "fail_on_dependency_skip": False, "frequency": "monthly", "grid": "", + "guess_section_parameters": True, + "guess_path_parameters": True, "input": "INPUT", "input_component": "", "input_files": "eam.h0", @@ -357,8 +372,11 @@ def test_subsections(self): "e3sm_to_cmip_environment_commands": "", "environment_commands": "", "extra_vars": "", + "fail_on_dependency_skip": False, "frequency": "monthly", "grid": "", + "guess_section_parameters": True, + "guess_path_parameters": True, "input": "INPUT", "input_component": "", "input_files": "eam.h0", @@ -413,7 +431,7 @@ def test_subsections(self): "years": ["0001:0050:50"], } compare(self, actual_section, expected_section) - actual_tasks = getTasks(config, section_name) + actual_tasks = get_tasks(config, section_name) self.assertEqual(len(actual_tasks), 2) actual_task = actual_tasks[0] expected_task = { @@ -428,8 +446,11 @@ def test_subsections(self): "e3sm_to_cmip_environment_commands": "", "environment_commands": "", "exclude": False, + "fail_on_dependency_skip": False, "frequency": "monthly", "grid": "", + "guess_section_parameters": True, + "guess_path_parameters": True, "input": "INPUT", "input_component": "", "input_files": "eam.h0", @@ -464,8 +485,11 @@ def test_subsections(self): "e3sm_to_cmip_environment_commands": "", "environment_commands": "", "exclude": False, + "fail_on_dependency_skip": False, "frequency": "monthly", "grid": "", + "guess_section_parameters": True, + "guess_path_parameters": True, "input": "INPUT", "input_component": "", "input_files": "eam.h0", diff --git a/tests/test_zppy_e3sm_diags.py b/tests/test_zppy_e3sm_diags.py new file mode 100644 index 00000000..cb81b6ff --- /dev/null +++ b/tests/test_zppy_e3sm_diags.py @@ -0,0 +1,599 @@ +import unittest +from typing import Any, Dict, List + +from zppy.e3sm_diags import ( + add_climo_dependencies, + add_ts_dependencies, + check_and_define_parameters, + check_mvm_only_parameters_for_bash, + check_parameters_for_bash, +) +from zppy.utils import ParameterNotProvidedError + + +class TestZppyE3SMDiags(unittest.TestCase): + def test_check_parameters_for_bash(self): + # diurnal_cycle + c = {"sets": ["diurnal_cycle"], "climo_diurnal_frequency": "diurnal_8xdaily"} + check_parameters_for_bash(c) + c = {"sets": ["diurnal_cycle"], "climo_diurnal_frequency": ""} + self.assertRaises(ParameterNotProvidedError, check_parameters_for_bash, c) + + # enso_diags + c = {"sets": ["enso_diags"], "ref_start_yr": "1990"} + check_parameters_for_bash(c) + c = {"sets": ["enso_diags"], "ref_start_yr": ""} + self.assertRaises(ParameterNotProvidedError, check_parameters_for_bash, c) + + # qbo + c = {"sets": ["qbo"], "ref_final_yr": "2000", "ref_start_yr": "1990"} + check_parameters_for_bash(c) + c = {"sets": ["qbo"], "ref_final_yr": "", "ref_start_yr": "1990"} + self.assertRaises(ParameterNotProvidedError, check_parameters_for_bash, c) + c = {"sets": ["qbo"], "ref_final_yr": "2000", "ref_start_yr": ""} + self.assertRaises(ParameterNotProvidedError, check_parameters_for_bash, c) + + # tropical_subseasonal + c = {"sets": ["tropical_subseasonal"], "ref_end_yr": "2000"} + check_parameters_for_bash(c) + c = {"sets": ["tropical_subseasonal"], "ref_end_yr": ""} + self.assertRaises(ParameterNotProvidedError, check_parameters_for_bash, c) + + def test_check_mvm_only_parameters_for_bash(self): + z0 = {"diff_title": "a", "ref_name": "b", "short_ref_name": "c"} + z1 = {"diff_title": "", "ref_name": "b", "short_ref_name": "c"} + z2 = {"diff_title": "a", "ref_name": "", "short_ref_name": "c"} + z3 = {"diff_title": "a", "ref_name": "b", "short_ref_name": ""} + c: Dict[str, Any] = {"sets": []} + c.update(z0) + check_mvm_only_parameters_for_bash(c) + c.update(z1) + self.assertRaises( + ParameterNotProvidedError, check_mvm_only_parameters_for_bash, c + ) + c.update(z2) + self.assertRaises( + ParameterNotProvidedError, check_mvm_only_parameters_for_bash, c + ) + c.update(z3) + self.assertRaises( + ParameterNotProvidedError, check_mvm_only_parameters_for_bash, c + ) + + d0 = { + "ref_final_yr": "2000", + "ref_start_yr": "1990", + "ts_num_years_ref": "2", + "ts_subsection": "sub", + } + d1 = { + "ref_final_yr": "", + "ref_start_yr": "1990", + "ts_num_years_ref": "2", + "ts_subsection": "sub", + } + d2 = { + "ref_final_yr": "2000", + "ref_start_yr": "", + "ts_num_years_ref": "2", + "ts_subsection": "sub", + } + d3 = { + "ref_final_yr": "2000", + "ref_start_yr": "1990", + "ts_num_years_ref": "", + "ts_subsection": "sub", + } + d4 = { + "ref_final_yr": "2000", + "ref_start_yr": "1990", + "ts_num_years_ref": "2", + "ts_subsection": "", + } + + # Load required parameters into all of the dicts above. + d0.update(z0) + d1.update(z0) + d2.update(z0) + d3.update(z0) + d4.update(z0) + + # area_mean_time_series + c = {"sets": ["area_mean_time_series"]} + c.update(d0) + check_mvm_only_parameters_for_bash(c) + c.update(d1) + check_mvm_only_parameters_for_bash(c) # ref_final_yr not needed + c.update(d2) + check_mvm_only_parameters_for_bash(c) # ref_start_yr not needed + c.update(d3) + self.assertRaises( + ParameterNotProvidedError, check_mvm_only_parameters_for_bash, c + ) + c.update(d4) + self.assertRaises( + ParameterNotProvidedError, check_mvm_only_parameters_for_bash, c + ) + + # enso_diags + c = {"sets": ["enso_diags"]} + c.update(d0) + check_mvm_only_parameters_for_bash(c) + c.update(d1) + self.assertRaises( + ParameterNotProvidedError, check_mvm_only_parameters_for_bash, c + ) + c.update(d2) + check_mvm_only_parameters_for_bash(c) # ref_start_yr not needed + c.update(d3) + self.assertRaises( + ParameterNotProvidedError, check_mvm_only_parameters_for_bash, c + ) + c.update(d4) + self.assertRaises( + ParameterNotProvidedError, check_mvm_only_parameters_for_bash, c + ) + + # qbo + c = {"sets": ["qbo"]} + c.update(d0) + check_mvm_only_parameters_for_bash(c) + c.update(d1) + check_mvm_only_parameters_for_bash(c) # ref_final_yr not needed + c.update(d2) + check_mvm_only_parameters_for_bash(c) # ref_start_yr not needed + c.update(d3) + self.assertRaises( + ParameterNotProvidedError, check_mvm_only_parameters_for_bash, c + ) + c.update(d4) + self.assertRaises( + ParameterNotProvidedError, check_mvm_only_parameters_for_bash, c + ) + + # streamflow + c = {"sets": ["streamflow"]} + c.update(d0) + check_mvm_only_parameters_for_bash(c) + c.update(d1) + self.assertRaises( + ParameterNotProvidedError, check_mvm_only_parameters_for_bash, c + ) + c.update(d2) + self.assertRaises( + ParameterNotProvidedError, check_mvm_only_parameters_for_bash, c + ) + c.update(d3) + self.assertRaises( + ParameterNotProvidedError, check_mvm_only_parameters_for_bash, c + ) + c.update(d4) + self.assertRaises( + ParameterNotProvidedError, check_mvm_only_parameters_for_bash, c + ) + + # tc_analysis + c = {"sets": ["tc_analysis"]} + c.update(d0) + check_mvm_only_parameters_for_bash(c) + c.update(d1) + self.assertRaises( + ParameterNotProvidedError, check_mvm_only_parameters_for_bash, c + ) + c.update(d2) + self.assertRaises( + ParameterNotProvidedError, check_mvm_only_parameters_for_bash, c + ) + c.update(d3) + check_mvm_only_parameters_for_bash(c) # ts_num_years_ref not needed + c.update(d4) + check_mvm_only_parameters_for_bash(c) # ts_subsection not needed + + # tropical_subseasonal + c = {"sets": ["tropical_subseasonal"]} + c.update(d0) + check_mvm_only_parameters_for_bash(c) + c.update(d1) + self.assertRaises( + ParameterNotProvidedError, check_mvm_only_parameters_for_bash, c + ) + c.update(d2) + self.assertRaises( + ParameterNotProvidedError, check_mvm_only_parameters_for_bash, c + ) + c.update(d3) + self.assertRaises( + ParameterNotProvidedError, check_mvm_only_parameters_for_bash, c + ) + c.update(d4) + self.assertRaises( + ParameterNotProvidedError, check_mvm_only_parameters_for_bash, c + ) + + def test_check_and_define_parameters(self): + # test_zppy_utils.py tests the guessing functionality turned off. + # So, we'll only test it turned on here. + guesses = {"guess_path_parameters": True, "guess_section_parameters": True} + prefix_requirements = { + "subsection": "sub", + "tag": "tag", + "year1": 1990, + "year2": 2000, + "ref_year1": 1980, + "ref_year2": 1990, + } + base: Dict[str, Any] = {"diagnostics_base_path": "diags/post"} + base.update(guesses) + base.update(prefix_requirements) + + mvm_base = dict() + mvm_base.update(base) + required_for_mvm = { + "diff_title": "diff_title", + "ref_name": "ref_name", + "short_ref_name": "short_ref_name", + } + mvm_base.update(required_for_mvm) + + # No sets, mvo + c: Dict[str, Any] = { + "sets": [], + "run_type": "model_vs_obs", + "reference_data_path": "a", + } + c.update(base) + check_and_define_parameters(c) + self.assertEqual(c["reference_data_path"], "a") + self.assertEqual(c["prefix"], "e3sm_diags_sub_tag_1990-2000") + + # No sets, mvm + c = {"sets": [], "run_type": "model_vs_model", "reference_data_path": ""} + c.update(mvm_base) + check_and_define_parameters(c) + self.assertEqual( + c["reference_data_path"], "diags/post/observations/Atm/climatology/" + ) + self.assertEqual(c["prefix"], "e3sm_diags_sub_tag_1990-2000_vs_1980-1990") + + # No sets, bad run_type + c = {"sets": [], "run_type": "invalid", "reference_data_path": ""} + c.update(base) + self.assertRaises(ValueError, check_and_define_parameters, c) + + # ts_num_years => obs_ts, mvo + c = { + "sets": [], + "run_type": "model_vs_obs", + "reference_data_path": "", + "ts_num_years": 3, + "obs_ts": "a", + } + c.update(base) + check_and_define_parameters(c) + self.assertEqual(c["obs_ts"], "a") + + c = { + "sets": [], + "run_type": "model_vs_obs", + "reference_data_path": "", + "ts_num_years": 3, + "obs_ts": "", + } + c.update(base) + check_and_define_parameters(c) + self.assertEqual(c["obs_ts"], "diags/post/observations/Atm/time-series/") + + # ts_num_years => obs_ts, mvm + c = { + "sets": [], + "run_type": "model_vs_model", + "reference_data_path": "", + "ts_num_years": 3, + "obs_ts": "a", + } + c.update(mvm_base) + check_and_define_parameters(c) + self.assertEqual(c["obs_ts"], "a") + + c = { + "sets": [], + "run_type": "model_vs_model", + "reference_data_path": "", + "ts_num_years": 3, + "obs_ts": "", + } + c.update(mvm_base) + check_and_define_parameters(c) + self.assertEqual(c["obs_ts"], "diags/post/observations/Atm/time-series/") + + # area_mean_time_series/enso_diags/qbo, mvm + for diags_set in ["area_mean_time_series", "enso_diags", "qbo"]: + c = { + "sets": [diags_set], + "run_type": "model_vs_model", + "reference_data_path": "", + "reference_data_path_ts": "a", + "grid": "grid", + } + c.update(mvm_base) + check_and_define_parameters(c) + self.assertEqual(c["reference_data_path_ts"], "a") + + c = { + "sets": [diags_set], + "run_type": "model_vs_model", + "reference_data_path": "", + "reference_data_path_ts": "", + "grid": "grid", + } + c.update(mvm_base) + check_and_define_parameters(c) + self.assertEqual( + c["reference_data_path_ts"], "diags/post/atm/grid/ts/monthly" + ) + + # diurnal_cycle, mvo + c = { + "sets": ["diurnal_cycle"], + "run_type": "model_vs_obs", + "reference_data_path": "", + "dc_obs_climo": "a", + } + c.update(base) + check_and_define_parameters(c) + self.assertEqual(c["dc_obs_climo"], "a") + + c = { + "sets": ["diurnal_cycle"], + "run_type": "model_vs_obs", + "reference_data_path": "", + "dc_obs_climo": "", + } + c.update(base) + check_and_define_parameters(c) + self.assertEqual(c["dc_obs_climo"], "diags/post/observations/Atm/climatology/") + + # diurnal_cycle, mvm + c = { + "sets": ["diurnal_cycle"], + "run_type": "model_vs_model", + "reference_data_path": "", + "reference_data_path_climo_diurnal": "a", + "grid": "grid", + } + c.update(mvm_base) + check_and_define_parameters(c) + self.assertEqual(c["reference_data_path_climo_diurnal"], "a") + + c = { + "sets": ["diurnal_cycle"], + "run_type": "model_vs_model", + "reference_data_path": "", + "reference_data_path_climo_diurnal": "", + "grid": "grid", + } + c.update(mvm_base) + check_and_define_parameters(c) + self.assertEqual( + c["reference_data_path_climo_diurnal"], + "diags/post/atm/grid/clim_diurnal_8xdaily", + ) + + # streamflow, mvo + c = { + "sets": ["streamflow"], + "run_type": "model_vs_obs", + "reference_data_path": "", + "streamflow_obs_ts": "a", + "ts_num_years": 3, + "obs_ts": "", + } + c.update(base) + check_and_define_parameters(c) + self.assertEqual(c["streamflow_obs_ts"], "a") + + c = { + "sets": ["streamflow"], + "run_type": "model_vs_obs", + "reference_data_path": "", + "streamflow_obs_ts": "", + "ts_num_years": 3, + "obs_ts": "", + } + c.update(base) + check_and_define_parameters(c) + self.assertEqual( + c["streamflow_obs_ts"], "diags/post/observations/Atm/time-series/" + ) + + # streamflow, mvm + c = { + "sets": ["streamflow"], + "run_type": "model_vs_model", + "reference_data_path": "", + "reference_data_path_ts_rof": "a", + "gauges_path": "b", + } + c.update(mvm_base) + check_and_define_parameters(c) + self.assertEqual(c["reference_data_path_ts_rof"], "a") + self.assertEqual(c["gauges_path"], "b") + + c = { + "sets": ["streamflow"], + "run_type": "model_vs_model", + "reference_data_path": "", + "reference_data_path_ts_rof": "", + "gauges_path": "", + } + c.update(mvm_base) + check_and_define_parameters(c) + self.assertEqual( + c["reference_data_path_ts_rof"], "diags/post/rof/native/ts/monthly" + ) + self.assertEqual( + c["gauges_path"], + "diags/post/observations/Atm/time-series/GSIM/GSIM_catchment_characteristics_all_1km2.csv", + ) + + # tc_analysis, mvo + c = { + "sets": ["tc_analysis"], + "run_type": "model_vs_obs", + "reference_data_path": "", + "tc_obs": "a", + } + c.update(base) + check_and_define_parameters(c) + self.assertEqual(c["tc_obs"], "a") + + c = { + "sets": ["tc_analysis"], + "run_type": "model_vs_obs", + "reference_data_path": "", + "tc_obs": "", + } + c.update(base) + check_and_define_parameters(c) + self.assertEqual(c["tc_obs"], "diags/post/observations/Atm/tc-analysis/") + + # tc_analysis, mvm + c = { + "sets": ["tc_analysis"], + "run_type": "model_vs_model", + "reference_data_path": "", + "tc_obs": "a", + "reference_data_path_tc": "b", + } + c.update(mvm_base) + check_and_define_parameters(c) + self.assertEqual(c["tc_obs"], "a") + self.assertEqual(c["reference_data_path_tc"], "b") + + c = { + "sets": ["tc_analysis"], + "run_type": "model_vs_model", + "reference_data_path": "", + "tc_obs": "", + "reference_data_path_tc": "", + } + c.update(mvm_base) + check_and_define_parameters(c) + self.assertEqual(c["tc_obs"], "diags/post/observations/Atm/tc-analysis/") + self.assertEqual( + c["reference_data_path_tc"], "diags/post/atm/tc-analysis_1980_1990" + ) + + # tropical_subseasonal, mvm + c = { + "sets": ["tropical_subseasonal"], + "run_type": "model_vs_model", + "reference_data_path": "", + "reference_data_path_ts_daily": "a", + "grid": "grid", + } + c.update(mvm_base) + check_and_define_parameters(c) + self.assertEqual(c["reference_data_path_ts_daily"], "a") + + c = { + "sets": ["tropical_subseasonal"], + "run_type": "model_vs_model", + "reference_data_path": "", + "reference_data_path_ts_daily": "", + "grid": "grid", + } + c.update(mvm_base) + check_and_define_parameters(c) + self.assertEqual( + c["reference_data_path_ts_daily"], "diags/post/atm/grid/ts/daily" + ) + + def test_add_climo_dependencies(self): + base: Dict[str, Any] = {"year1": 1980, "year2": 1990} + sets = [ + "lat_lon", + "zonal_mean_xy", + "zonal_mean_2d", + "polar", + "cosp_histogram", + "meridional_mean_2d", + "annual_cycle_zonal_mean", + "zonal_mean_2d_stratosphere", + ] + for diags_set in sets: + c: Dict[str, Any] = {"sets": [diags_set], "climo_subsection": "csub"} + c.update(base) + dependencies: List[str] = [] + add_climo_dependencies(c, dependencies, "script_dir") + self.assertEqual(dependencies, ["script_dir/climo_csub_1980-1990.status"]) + + c = {"sets": ["diurnal_cycle"], "climo_diurnal_subsection": "cdsub"} + c.update(base) + dependencies = [] + add_climo_dependencies(c, dependencies, "script_dir") + self.assertEqual(dependencies, ["script_dir/climo_cdsub_1980-1990.status"]) + c = {"sets": ["diurnal_cycle"]} + c.update(base) + dependencies = [] + self.assertRaises( + ParameterNotProvidedError, + add_climo_dependencies, + c, + dependencies, + "script_dir", + ) + + c = {"sets": ["lat_lon_land"], "climo_land_subsection": "lndsub"} + c.update(base) + dependencies = [] + add_climo_dependencies(c, dependencies, "script_dir") + self.assertEqual(dependencies, ["script_dir/climo_lndsub_1980-1990.status"]) + c = {"sets": ["lat_lon_land"]} + c.update(base) + dependencies = [] + self.assertRaises( + ParameterNotProvidedError, + add_climo_dependencies, + c, + dependencies, + "script_dir", + ) + + c = {"sets": ["tc_analysis"]} + c.update(base) + dependencies = [] + add_climo_dependencies(c, dependencies, "script_dir") + self.assertEqual(dependencies, ["script_dir/tc_analysis_1980-1990.status"]) + + def test_add_ts_dependencies(self): + base: Dict[str, Any] = { + "ts_num_years": 5, + "ts_subsection": "sub", + "ts_daily_subsection": "dsub", + } + sets = ["area_mean_time_series", "enso_diags", "qbo"] + for diags_set in sets: + c: Dict[str, Any] = {"sets": [diags_set]} + c.update(base) + dependencies: List[str] = [] + add_ts_dependencies(c, dependencies, "script_dir", 1980) + self.assertEqual(dependencies, ["script_dir/ts_sub_1980-1984-0005.status"]) + + c = {"sets": ["streamflow"]} + c.update(base) + dependencies = [] + add_ts_dependencies(c, dependencies, "script_dir", 1980) + self.assertEqual( + dependencies, ["script_dir/ts_rof_monthly_1980-1984-0005.status"] + ) + + c = {"sets": ["tropical_subseasonal"]} + c.update(base) + dependencies = [] + add_ts_dependencies(c, dependencies, "script_dir", 1980) + self.assertEqual(dependencies, ["script_dir/ts_dsub_1980-1984-0005.status"]) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_zppy_global_time_series.py b/tests/test_zppy_global_time_series.py new file mode 100644 index 00000000..572ea893 --- /dev/null +++ b/tests/test_zppy_global_time_series.py @@ -0,0 +1,203 @@ +import unittest +from typing import Any, Dict, List + +from zppy.global_time_series import determine_and_add_dependencies, determine_components + + +class TestZppyGlobalTimeSeries(unittest.TestCase): + def test_determine_components(self): + # Test non-legacy + c: Dict[str, Any] = { + "plot_names": "", + "plots_original": "", + "plots_atm": ["a"], + "plots_ice": "", + "plots_lnd": "", + "plots_ocn": "", + } + determine_components(c) + self.assertEqual(c["use_atm"], True) + self.assertEqual(c["use_ice"], False) + self.assertEqual(c["use_lnd"], False) + self.assertEqual(c["use_ocn"], False) + self.assertEqual(c["plots_atm"], ["a"]) + self.assertEqual(c["plots_ice"], "None") + self.assertEqual(c["plots_lnd"], "None") + self.assertEqual(c["plots_ocn"], "None") + + c = { + "plot_names": "", + "plots_original": "", + "plots_atm": "", + "plots_ice": ["a"], + "plots_lnd": "", + "plots_ocn": "", + } + determine_components(c) + self.assertEqual(c["use_atm"], False) + self.assertEqual(c["use_ice"], True) + self.assertEqual(c["use_lnd"], False) + self.assertEqual(c["use_ocn"], False) + self.assertEqual(c["plots_atm"], "None") + self.assertEqual(c["plots_ice"], ["a"]) + self.assertEqual(c["plots_lnd"], "None") + self.assertEqual(c["plots_ocn"], "None") + + c = { + "plot_names": "", + "plots_original": "", + "plots_atm": "", + "plots_ice": "", + "plots_lnd": ["a"], + "plots_ocn": "", + } + determine_components(c) + self.assertEqual(c["use_atm"], False) + self.assertEqual(c["use_ice"], False) + self.assertEqual(c["use_lnd"], True) + self.assertEqual(c["use_ocn"], False) + self.assertEqual(c["plots_atm"], "None") + self.assertEqual(c["plots_ice"], "None") + self.assertEqual(c["plots_lnd"], ["a"]) + self.assertEqual(c["plots_ocn"], "None") + + c = { + "plot_names": "", + "plots_original": "", + "plots_atm": "", + "plots_ice": "", + "plots_lnd": "", + "plots_ocn": ["a"], + } + determine_components(c) + self.assertEqual(c["use_atm"], False) + self.assertEqual(c["use_ice"], False) + self.assertEqual(c["use_lnd"], False) + self.assertEqual(c["use_ocn"], True) + self.assertEqual(c["plots_atm"], "None") + self.assertEqual(c["plots_ice"], "None") + self.assertEqual(c["plots_lnd"], "None") + self.assertEqual(c["plots_ocn"], ["a"]) + + # Test legacy + base = {"plots_atm": "", "plots_ice": "", "plots_lnd": "", "plots_ocn": ""} + + c = { + "plot_names": ["a"], + "plots_original": "gets_overwritten", + "atmosphere_only": False, + } + c.update(base) + determine_components(c) + self.assertEqual(c["plots_original"], ["a"]) + self.assertEqual(c["use_atm"], True) + self.assertEqual(c["use_ice"], False) + self.assertEqual(c["use_lnd"], False) + self.assertEqual(c["use_ocn"], False) + self.assertEqual(c["plots_atm"], "None") + self.assertEqual(c["plots_ice"], "None") + self.assertEqual(c["plots_lnd"], "None") + self.assertEqual(c["plots_ocn"], "None") + + for ocn_set in ["change_ohc", "max_moc", "change_sea_level"]: + c = { + "plot_names": "", + "plots_original": [ocn_set], + "atmosphere_only": False, + } + c.update(base) + determine_components(c) + self.assertEqual(c["plots_original"], [ocn_set]) + self.assertEqual(c["use_atm"], True) + self.assertEqual(c["use_ice"], False) + self.assertEqual(c["use_lnd"], False) + self.assertEqual(c["use_ocn"], True) + self.assertEqual(c["plots_atm"], "None") + self.assertEqual(c["plots_ice"], "None") + self.assertEqual(c["plots_lnd"], "None") + self.assertEqual(c["plots_ocn"], "None") + + c = {"plot_names": "", "plots_original": ["a"], "atmosphere_only": True} + c.update(base) + determine_components(c) + self.assertEqual(c["plots_original"], ["a"]) + self.assertEqual(c["use_atm"], True) + self.assertEqual(c["use_ice"], False) + self.assertEqual(c["use_lnd"], False) + self.assertEqual(c["use_ocn"], False) + self.assertEqual(c["plots_atm"], "None") + self.assertEqual(c["plots_ice"], "None") + self.assertEqual(c["plots_lnd"], "None") + self.assertEqual(c["plots_ocn"], "None") + + def test_determine_and_add_dependencies(self): + c: Dict[str, Any] = { + "use_atm": True, + "use_lnd": False, + "use_ocn": False, + "year1": 1980, + "year2": 1990, + "ts_num_years": 5, + } + dependencies: List[str] = [] + determine_and_add_dependencies(c, dependencies, "script_dir") + expected = [ + "script_dir/ts_atm_monthly_glb_1980-1984-0005.status", + "script_dir/ts_atm_monthly_glb_1985-1989-0005.status", + ] + self.assertEqual(dependencies, expected) + + c = { + "use_atm": False, + "use_lnd": True, + "use_ocn": False, + "year1": 1980, + "year2": 1990, + "ts_num_years": 5, + } + dependencies = [] + determine_and_add_dependencies(c, dependencies, "script_dir") + expected = [ + "script_dir/ts_lnd_monthly_glb_1980-1984-0005.status", + "script_dir/ts_lnd_monthly_glb_1985-1989-0005.status", + ] + self.assertEqual(dependencies, expected) + + c = { + "use_atm": False, + "use_lnd": False, + "use_ocn": True, + "ts_years": "1980:1990:10", + "climo_years": "1980:1990:10", + } + dependencies = [] + determine_and_add_dependencies(c, dependencies, "script_dir") + expected = ["script_dir/mpas_analysis_ts_1980-1989_climo_1980-1989.status"] + self.assertEqual(dependencies, expected) + + c = { + "use_atm": False, + "use_lnd": False, + "use_ocn": True, + "ts_years": "", + "climo_years": "1980:1990:10", + } + dependencies = [] + self.assertRaises( + Exception, determine_and_add_dependencies, c, dependencies, "script_dir" + ) + c = { + "use_atm": False, + "use_lnd": False, + "use_ocn": True, + "ts_years": "1980:1990:10", + "climo_years": "", + } + dependencies = [] + self.assertRaises( + Exception, determine_and_add_dependencies, c, dependencies, "script_dir" + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_zppy_ilamb.py b/tests/test_zppy_ilamb.py new file mode 100644 index 00000000..bc0f8f46 --- /dev/null +++ b/tests/test_zppy_ilamb.py @@ -0,0 +1,47 @@ +import unittest +from typing import List + +from zppy.ilamb import determine_and_add_dependencies + + +class TestZppyILAMB(unittest.TestCase): + def test_determine_and_add_dependencies(self): + c = { + "land_only": True, + "ts_land_subsection": "land_monthly", + "year1": 1980, + "year2": 1990, + "ts_num_years": 5, + } + dependencies: List[str] = [] + determine_and_add_dependencies(c, dependencies, "script_dir") + expected = [ + "script_dir/ts_land_monthly_1980-1984-0005.status", + "script_dir/ts_land_monthly_1985-1989-0005.status", + ] + self.assertEqual(dependencies, expected) + + # Have zppy guess the subsection names + c = { + "land_only": False, + "ts_land_subsection": "", + "ts_atm_subsection": "", + "year1": 1980, + "year2": 1990, + "ts_num_years": 5, + "guess_path_parameters": True, + "guess_section_parameters": True, + } + dependencies = [] + determine_and_add_dependencies(c, dependencies, "script_dir") + expected = [ + "script_dir/ts_land_monthly_1980-1984-0005.status", + "script_dir/ts_land_monthly_1985-1989-0005.status", + "script_dir/ts_atm_monthly_180x360_aave_1980-1984-0005.status", + "script_dir/ts_atm_monthly_180x360_aave_1985-1989-0005.status", + ] + self.assertEqual(dependencies, expected) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_zppy_utils.py b/tests/test_zppy_utils.py new file mode 100644 index 00000000..7a87420c --- /dev/null +++ b/tests/test_zppy_utils.py @@ -0,0 +1,504 @@ +import unittest +from typing import List + +from zppy.utils import ( + ParameterGuessType, + ParameterNotProvidedError, + add_dependencies, + check_parameter_defined, + check_required_parameters, + define_or_guess, + define_or_guess2, + get_active_status, + get_file_names, + get_guess_type_parameter, + get_url_message, + get_years, + set_component_and_prc_typ, + set_grid, + set_mapping_file, +) + + +class TestZppyUtils(unittest.TestCase): + def test_get_active_status(self): + # Test bool input + task = {"active": True} + self.assertTrue(get_active_status(task)) + task = {"active": False} + self.assertFalse(get_active_status(task)) + + # Test str input + task = {"active": "True"} # type: ignore + self.assertTrue(get_active_status(task)) + task = {"active": "False"} # type: ignore + self.assertFalse(get_active_status(task)) + + # Test bad value + task = {"active": "bad input"} # type: ignore + self.assertRaises(ValueError, get_active_status, task) + + # Test bad type + task = {"active": 5} # type: ignore + self.assertRaises(TypeError, get_active_status, task) + + def test_get_guess_type_parameter(self): + actual = get_guess_type_parameter(ParameterGuessType.SECTION_GUESS) + self.assertEqual(actual, "guess_section_parameters") + + actual = get_guess_type_parameter(ParameterGuessType.PATH_GUESS) + self.assertEqual(actual, "guess_path_parameters") + + def test_get_url_message(self): + c = { + "web_portal_base_path": "a", + "web_portal_base_url": "b", + "www": "a/c", + "case": "d", + } + actual = get_url_message(c, "task_name") + self.assertEqual(actual, "URL: b/c/d/task_name") + + c = { + "web_portal_base_path": "a", + "web_portal_base_url": "b", + "www": "c", + "case": "d", + } + actual = get_url_message(c, "task_name") + self.assertEqual(actual, "Could not determine URL from www=c") + + # def test_initialize_template + + # def test_get_tasks + + def test_set_mapping_file(self): + # Test no-change cases + c = {"mapping_file": ""} + set_mapping_file(c) + self.assertEqual(c["mapping_file"], "") + + c = {"mapping_file": "glb"} + set_mapping_file(c) + self.assertEqual(c["mapping_file"], "glb") + + c = {"mapping_file": "dir/file"} + set_mapping_file(c) + self.assertEqual(c["mapping_file"], "dir/file") + + # Now, the function should do something + c = {"mapping_file": "file", "diagnostics_base_path": "base"} + set_mapping_file(c) + self.assertEqual(c["mapping_file"], "base/maps/file") + + def test_set_grid(self): + c = {"grid": "grid"} + set_grid(c) + self.assertEqual(c["grid"], "grid") + + c = {"grid": "", "mapping_file": ""} + set_grid(c) + self.assertEqual(c["grid"], "native") + + c = {"grid": "", "mapping_file": "glb"} + set_grid(c) + self.assertEqual(c["grid"], "glb") + + # TODO: test a realistic mapping file + + def test_set_component_and_prc_typ(self): + # Test without input_files + c = {"input_component": "cam"} + set_component_and_prc_typ(c) + self.assertEqual(c["component"], "atm") + self.assertEqual(c["prc_typ"], "cam") + + c = {"input_component": "eam"} + set_component_and_prc_typ(c) + self.assertEqual(c["component"], "atm") + self.assertEqual(c["prc_typ"], "eam") + + c = {"input_component": "eamxx"} + set_component_and_prc_typ(c) + self.assertEqual(c["component"], "atm") + self.assertEqual(c["prc_typ"], "eamxx") + + c = {"input_component": "cpl"} + set_component_and_prc_typ(c) + self.assertEqual(c["component"], "cpl") + self.assertEqual(c["prc_typ"], "sgs") + + c = {"input_component": "clm2"} + set_component_and_prc_typ(c) + self.assertEqual(c["component"], "lnd") + self.assertEqual(c["prc_typ"], "clm") + + c = {"input_component": "elm"} + set_component_and_prc_typ(c) + self.assertEqual(c["component"], "lnd") + self.assertEqual(c["prc_typ"], "elm") + + c = {"input_component": "mosart"} + set_component_and_prc_typ(c) + self.assertEqual(c["component"], "rof") + self.assertEqual(c["prc_typ"], "sgs") + + # Test with input_files + c = {"input_component": "", "input_files": "cam.extension"} + set_component_and_prc_typ(c) + self.assertEqual(c["component"], "atm") + self.assertEqual(c["prc_typ"], "cam") + + c = {"input_component": "", "input_files": "eam.extension"} + set_component_and_prc_typ(c) + self.assertEqual(c["component"], "atm") + self.assertEqual(c["prc_typ"], "eam") + + c = {"input_component": "", "input_files": "eamxx.extension"} + set_component_and_prc_typ(c) + self.assertEqual(c["component"], "atm") + self.assertEqual(c["prc_typ"], "eamxx") + + c = {"input_component": "", "input_files": "cpl.extension"} + set_component_and_prc_typ(c) + self.assertEqual(c["component"], "cpl") + self.assertEqual(c["prc_typ"], "sgs") + + c = {"input_component": "", "input_files": "clm2.extension"} + set_component_and_prc_typ(c) + self.assertEqual(c["component"], "lnd") + self.assertEqual(c["prc_typ"], "clm") + + c = {"input_component": "", "input_files": "elm.extension"} + set_component_and_prc_typ(c) + self.assertEqual(c["component"], "lnd") + self.assertEqual(c["prc_typ"], "elm") + + c = {"input_component": "", "input_files": "mosart.extension"} + set_component_and_prc_typ(c) + self.assertEqual(c["component"], "rof") + self.assertEqual(c["prc_typ"], "sgs") + + # Test error case + c = {"input_component": "", "input_files": ""} + self.assertRaises(ValueError, set_component_and_prc_typ, c) + + def test_check_required_parameters(self): + # Parameter is required + # a, b need parameter p, and we want sets a, b, c + c = {"sets": ["a", "b", "c"], "p": "exists"} + check_required_parameters(c, set(["a", "b"]), "p") + + # Parameter isn't required based on the sets we want + # z needs parameter p, but we only want sets a, b, c + c = {"sets": ["a", "b", "c"], "p": ""} + check_required_parameters(c, set(["z"]), "p") + + # Parameter is required + # a, b need parameter p, and we want sets a, b, c + c = {"sets": ["a", "b", "c"], "p": ""} + self.assertRaises( + ParameterNotProvidedError, + check_required_parameters, + c, + set(["a", "b"]), + "p", + ) + + def test_get_years(self): + self.assertEqual(get_years("1980:1990:05"), [(1980, 1984), (1985, 1989)]) + self.assertEqual(get_years("1980-1990"), [(1980, 1990)]) + + self.assertEqual(get_years(["1980:1990:05"]), [(1980, 1984), (1985, 1989)]) + self.assertEqual(get_years(["1980-1990"]), [(1980, 1990)]) + + self.assertEqual( + get_years(["1980:1990:05", "2000:2010:05"]), + [(1980, 1984), (1985, 1989), (2000, 2004), (2005, 2009)], + ) + self.assertEqual( + get_years(["1980-1990", "2000-2005"]), [(1980, 1990), (2000, 2005)] + ) + + self.assertRaises(ValueError, get_years, "1980") + self.assertRaises(ValueError, get_years, "1980:1990") + self.assertRaises(ValueError, get_years, "1980:1990:05:03") + self.assertRaises(ValueError, get_years, "1980-1990-05") + + self.assertRaises( + ValueError, get_years, ["1983-1993", "1980"] + ) # one year set works + self.assertRaises(ValueError, get_years, ["1980:1990"]) + self.assertRaises(ValueError, get_years, ["1980:1990:05:03"]) + self.assertRaises(ValueError, get_years, ["1980-1990-05"]) + + # This one is in fact a value error, but not one we raised directly + self.assertRaises(ValueError, get_years, "1980-1990:05:03") + + def test_define_or_guess(self): + # First choice is defined + c = { + "first_choice": "a", + "second_choice": "b", + "guess_path_parameters": True, + "guess_section_parameters": True, + } + actual = define_or_guess( + c, "first_choice", "second_choice", ParameterGuessType.PATH_GUESS + ) + self.assertEqual(actual, "a") + actual = define_or_guess( + c, "first_choice", "second_choice", ParameterGuessType.SECTION_GUESS + ) + self.assertEqual(actual, "a") + + c = { + "first_choice": "a", + "second_choice": "b", + "guess_path_parameters": True, + "guess_section_parameters": False, + } + actual = define_or_guess( + c, "first_choice", "second_choice", ParameterGuessType.PATH_GUESS + ) + self.assertEqual(actual, "a") + actual = define_or_guess( + c, "first_choice", "second_choice", ParameterGuessType.SECTION_GUESS + ) + self.assertEqual(actual, "a") + + c = { + "first_choice": "a", + "second_choice": "b", + "guess_path_parameters": False, + "guess_section_parameters": True, + } + actual = define_or_guess( + c, "first_choice", "second_choice", ParameterGuessType.PATH_GUESS + ) + self.assertEqual(actual, "a") + actual = define_or_guess( + c, "first_choice", "second_choice", ParameterGuessType.SECTION_GUESS + ) + self.assertEqual(actual, "a") + + # First choice is undefined + c = { + "first_choice": "", + "second_choice": "b", + "guess_path_parameters": True, + "guess_section_parameters": True, + } + actual = define_or_guess( + c, "first_choice", "second_choice", ParameterGuessType.PATH_GUESS + ) + self.assertEqual(actual, "b") + actual = define_or_guess( + c, "first_choice", "second_choice", ParameterGuessType.SECTION_GUESS + ) + self.assertEqual(actual, "b") + + c = { + "first_choice": "", + "second_choice": "b", + "guess_path_parameters": True, + "guess_section_parameters": False, + } + actual = define_or_guess( + c, "first_choice", "second_choice", ParameterGuessType.PATH_GUESS + ) + self.assertEqual(actual, "b") + self.assertRaises( + ParameterNotProvidedError, + define_or_guess, + c, + "first_choice", + "second_choice", + ParameterGuessType.SECTION_GUESS, + ) + + c = { + "first_choice": "", + "second_choice": "b", + "guess_path_parameters": False, + "guess_section_parameters": True, + } + self.assertRaises( + ParameterNotProvidedError, + define_or_guess, + c, + "first_choice", + "second_choice", + ParameterGuessType.PATH_GUESS, + ) + actual = define_or_guess( + c, "first_choice", "second_choice", ParameterGuessType.SECTION_GUESS + ) + self.assertEqual(actual, "b") + + def test_define_or_guess2(self): + # The required parameter has a value + c = { + "required_parameter": "a", + "guess_path_parameters": True, + "guess_section_parameters": True, + } + define_or_guess2( + c, "required_parameter", "backup_option", ParameterGuessType.PATH_GUESS + ) + self.assertEqual(c["required_parameter"], "a") + c = { + "required_parameter": "a", + "guess_path_parameters": True, + "guess_section_parameters": True, + } + define_or_guess2( + c, "required_parameter", "backup_option", ParameterGuessType.SECTION_GUESS + ) + self.assertEqual(c["required_parameter"], "a") + + c = { + "required_parameter": "a", + "guess_path_parameters": True, + "guess_section_parameters": False, + } + define_or_guess2( + c, "required_parameter", "backup_option", ParameterGuessType.PATH_GUESS + ) + self.assertEqual(c["required_parameter"], "a") + c = { + "required_parameter": "a", + "guess_path_parameters": True, + "guess_section_parameters": False, + } + define_or_guess2( + c, "required_parameter", "backup_option", ParameterGuessType.SECTION_GUESS + ) + self.assertEqual(c["required_parameter"], "a") + + c = { + "required_parameter": "a", + "guess_path_parameters": False, + "guess_section_parameters": True, + } + define_or_guess2( + c, "required_parameter", "backup_option", ParameterGuessType.PATH_GUESS + ) + self.assertEqual(c["required_parameter"], "a") + c = { + "required_parameter": "a", + "guess_path_parameters": False, + "guess_section_parameters": True, + } + define_or_guess2( + c, "required_parameter", "backup_option", ParameterGuessType.SECTION_GUESS + ) + self.assertEqual(c["required_parameter"], "a") + + # The required parameter is undefined + c = { + "required_parameter": "", + "guess_path_parameters": True, + "guess_section_parameters": True, + } + define_or_guess2( + c, "required_parameter", "backup_option", ParameterGuessType.PATH_GUESS + ) + self.assertEqual(c["required_parameter"], "backup_option") + c = { + "required_parameter": "", + "guess_path_parameters": True, + "guess_section_parameters": True, + } + define_or_guess2( + c, "required_parameter", "backup_option", ParameterGuessType.SECTION_GUESS + ) + self.assertEqual(c["required_parameter"], "backup_option") + + c = { + "required_parameter": "", + "guess_path_parameters": True, + "guess_section_parameters": False, + } + define_or_guess2( + c, "required_parameter", "backup_option", ParameterGuessType.PATH_GUESS + ) + self.assertEqual(c["required_parameter"], "backup_option") + c = { + "required_parameter": "", + "guess_path_parameters": True, + "guess_section_parameters": False, + } + self.assertRaises( + ParameterNotProvidedError, + define_or_guess2, + c, + "required_parameter", + "backup_option", + ParameterGuessType.SECTION_GUESS, + ) + + c = { + "required_parameter": "", + "guess_path_parameters": False, + "guess_section_parameters": True, + } + self.assertRaises( + ParameterNotProvidedError, + define_or_guess2, + c, + "required_parameter", + "backup_option", + ParameterGuessType.PATH_GUESS, + ) + c = { + "required_parameter": "", + "guess_path_parameters": False, + "guess_section_parameters": True, + } + define_or_guess2( + c, "required_parameter", "backup_option", ParameterGuessType.SECTION_GUESS + ) + self.assertEqual(c["required_parameter"], "backup_option") + + def test_check_parameter_defined(self): + c = {"a": 1, "b": 2, "c": ""} + check_parameter_defined(c, "a") + self.assertRaises(ParameterNotProvidedError, check_parameter_defined, c, "c") + self.assertRaises(ParameterNotProvidedError, check_parameter_defined, c, "d") + + def test_get_file_names(self): + bash, settings, status = get_file_names("script_dir", "prefix") + self.assertEqual(bash, "script_dir/prefix.bash") + self.assertEqual(settings, "script_dir/prefix.settings") + self.assertEqual(status, "script_dir/prefix.status") + + # def test_check_status + + # def test_make_executable + + def test_add_dependencies(self): + dependencies: List[str] = [] + add_dependencies(dependencies, "script_dir", "prefix", "sub", 1980, 1990, 10) + self.assertEqual(dependencies, ["script_dir/prefix_sub_1980-1989-0010.status"]) + + dependencies = [] + add_dependencies(dependencies, "script_dir", "prefix", "sub", 1980, 1990, 2) + expected = [ + "script_dir/prefix_sub_1980-1981-0002.status", + "script_dir/prefix_sub_1982-1983-0002.status", + "script_dir/prefix_sub_1984-1985-0002.status", + "script_dir/prefix_sub_1986-1987-0002.status", + "script_dir/prefix_sub_1988-1989-0002.status", + ] + self.assertEqual(dependencies, expected) + + # def test_write_settings_file + + # def test_submit_script + + # def test_print_url + + +if __name__ == "__main__": + unittest.main() diff --git a/zppy/__main__.py b/zppy/__main__.py index adf8d067..78b51e17 100644 --- a/zppy/__main__.py +++ b/zppy/__main__.py @@ -3,7 +3,7 @@ import importlib import io import os -from typing import List +from typing import Any, List, Tuple from configobj import ConfigObj from mache import MachineInfo @@ -15,14 +15,48 @@ from zppy.global_time_series import global_time_series from zppy.ilamb import ilamb from zppy.mpas_analysis import mpas_analysis +from zppy.pcmdi_diags import pcmdi_diags from zppy.tc_analysis import tc_analysis from zppy.ts import ts -from zppy.utils import checkStatus, submitScript +from zppy.utils import check_status, submit_script -# FIXME: C901 'main' is too complex (19) -def main(): # noqa: C901 +def main(): + args = _get_args() + print( + "For help, please see https://e3sm-project.github.io/zppy. Ask questions at https://github.com/E3SM-Project/zppy/discussions/categories/q-a." + ) + # Subdirectory where templates are located + template_dir: str = os.path.join(os.path.dirname(__file__), "templates") + # Read configuration file and validate it + default_config: str = os.path.join(template_dir, "default.ini") + user_config: ConfigObj = ConfigObj(args.config, configspec=default_config) + user_config, plugins = _handle_plugins(user_config, default_config, args) + config: ConfigObj = _handle_campaigns(user_config, default_config, template_dir) + # Validate + _validate_config(config) + # Add templateDir to config + config["default"]["templateDir"] = template_dir + # Output script directory + output = config["default"]["output"] + username = os.environ.get("USER") + output = output.replace("$USER", username) + script_dir = os.path.join(output, "post/scripts") + job_ids_file = os.path.join(script_dir, "jobids.txt") + try: + os.makedirs(script_dir) + except OSError as exc: + if exc.errno != errno.EEXIST: + raise OSError("Cannot create script directory") + pass + machine_info = _get_machine_info(config) + config = _determine_parameters(machine_info, config) + if args.last_year: + config["default"]["last_year"] = args.last_year + _launch_scripts(config, script_dir, job_ids_file, plugins) + +def _get_args(): # Command line parser parser = argparse.ArgumentParser( description="Launch E3SM post-processing tasks", usage="zppy -c " @@ -34,18 +68,12 @@ def main(): # noqa: C901 "-l", "--last-year", type=int, help="last year to process", required=False ) args = parser.parse_args() + return args - print( - "For help, please see https://e3sm-project.github.io/zppy. Ask questions at https://github.com/E3SM-Project/zppy/discussions/categories/q-a." - ) - - # Subdirectory where templates are located - templateDir = os.path.join(os.path.dirname(__file__), "templates") - - # Read configuration file and validate it - default_config = os.path.join(templateDir, "default.ini") - user_config = ConfigObj(args.config, configspec=default_config) +def _handle_plugins( + user_config: ConfigObj, default_config: str, args +) -> Tuple[ConfigObj, List[Any]]: # Load all external plugins. Build a list. plugins = [] if "plugins" in user_config["default"].keys(): @@ -55,7 +83,7 @@ def main(): # noqa: C901 plugin_module = importlib.import_module(plugin_name) except BaseException: raise ValueError( - "Could not load external zppy plugin module {}".format(plugin_name) + f"Could not load external zppy plugin module {plugin_name}" ) # Path plugin_path = plugin_module.__path__[0] @@ -63,7 +91,6 @@ def main(): # noqa: C901 plugins.append( {"name": plugin_name, "module": plugin_module, "path": plugin_path} ) - # Read configuration files again, this time including all plugins with open(default_config) as f: default = f.read() @@ -75,44 +102,44 @@ def main(): # noqa: C901 with open(plugin_default_file) as f: default += "\n" + f.read() user_config = ConfigObj(args.config, configspec=io.StringIO(default)) + return user_config, plugins + +def _handle_campaigns( + user_config: ConfigObj, default_config: str, template_dir: str +) -> ConfigObj: # Handle 'campaign' option if "campaign" in user_config["default"]: campaign = user_config["default"]["campaign"] else: campaign = "none" if campaign != "none": - campaign_file = os.path.join(templateDir, "{}.cfg".format(campaign)) + campaign_file = os.path.join(template_dir, f"{campaign}.cfg") if not os.path.exists(campaign_file): - raise ValueError( - "{} does not appear to be a known campaign".format(campaign) - ) + raise ValueError(f"{campaign} does not appear to be a known campaign") campaign_config = ConfigObj(campaign_file, configspec=default_config) # merge such that user_config takes priority over campaign_config campaign_config.merge(user_config) config = campaign_config else: config = user_config + return config - # Validate - _validate_config(config) - # Add templateDir to config - config["default"]["templateDir"] = templateDir +def _validate_config(config): + validator = Validator() + + result = config.validate(validator) + if result is not True: + print("Validation results={}".format(result)) + raise ValueError( + "Configuration file validation failed. Parameters listed as false in the validation results have invalid values." + ) + else: + print("Configuration file validation passed.") - # Output script directory - output = config["default"]["output"] - username = os.environ.get("USER") - output = output.replace("$USER", username) - scriptDir = os.path.join(output, "post/scripts") - job_ids_file = os.path.join(scriptDir, "jobids.txt") - try: - os.makedirs(scriptDir) - except OSError as exc: - if exc.errno != errno.EEXIST: - raise OSError("Cannot create script directory") - pass +def _get_machine_info(config: ConfigObj) -> MachineInfo: if ("machine" not in config["default"]) or (config["default"]["machine"] == ""): if "E3SMU_MACHINE" in os.environ: # Use the machine identified by E3SM-Unified @@ -125,7 +152,10 @@ def main(): # noqa: C901 # If `machine` is set, then MachineInfo can bypass the # `discover_machine()` function. machine = config["default"]["machine"] - machine_info = MachineInfo(machine=machine) + return MachineInfo(machine=machine) + + +def _determine_parameters(machine_info: MachineInfo, config: ConfigObj) -> ConfigObj: default_machine = machine_info.machine ( default_account, @@ -177,37 +207,40 @@ def main(): # noqa: C901 config["default"][ "environment_commands" ] = f"source {unified_base}/load_latest_e3sm_unified_{machine}.sh" + return config - if args.last_year: - config["default"]["last_year"] = args.last_year +def _launch_scripts(config: ConfigObj, script_dir, job_ids_file, plugins) -> None: existing_bundles: List[Bundle] = [] # predefined bundles - existing_bundles = predefined_bundles(config, scriptDir, existing_bundles) + existing_bundles = predefined_bundles(config, script_dir, existing_bundles) # climo tasks - existing_bundles = climo(config, scriptDir, existing_bundles, job_ids_file) + existing_bundles = climo(config, script_dir, existing_bundles, job_ids_file) # time series tasks - existing_bundles = ts(config, scriptDir, existing_bundles, job_ids_file) + existing_bundles = ts(config, script_dir, existing_bundles, job_ids_file) # tc_analysis tasks - existing_bundles = tc_analysis(config, scriptDir, existing_bundles, job_ids_file) + existing_bundles = tc_analysis(config, script_dir, existing_bundles, job_ids_file) # e3sm_diags tasks - existing_bundles = e3sm_diags(config, scriptDir, existing_bundles, job_ids_file) + existing_bundles = e3sm_diags(config, script_dir, existing_bundles, job_ids_file) # mpas_analysis tasks - existing_bundles = mpas_analysis(config, scriptDir, existing_bundles, job_ids_file) + existing_bundles = mpas_analysis(config, script_dir, existing_bundles, job_ids_file) # global time series tasks existing_bundles = global_time_series( - config, scriptDir, existing_bundles, job_ids_file + config, script_dir, existing_bundles, job_ids_file ) # ilamb tasks - existing_bundles = ilamb(config, scriptDir, existing_bundles, job_ids_file) + existing_bundles = ilamb(config, script_dir, existing_bundles, job_ids_file) + + # pcmdi_diags tasks + existing_bundles = pcmdi_diags(config, scriptDir, existing_bundles, job_ids_file) # zppy external plugins for plugin in plugins: @@ -215,34 +248,22 @@ def main(): # noqa: C901 plugin_func = getattr(plugin["module"], plugin["name"]) # Call plugin existing_bundles = plugin_func( - plugin["path"], config, scriptDir, existing_bundles, job_ids_file + plugin["path"], config, script_dir, existing_bundles, job_ids_file ) # Submit bundle jobs for b in existing_bundles: - skip = checkStatus(b.bundle_status) + skip = check_status(b.bundle_status) if skip: continue b.display_dependencies() b.render(config) if not b.dry_run: - submitScript( + submit_script( b.bundle_file, b.bundle_status, b.export, job_ids_file, - dependFiles=b.dependencies_external, + dependFiles=list(b.dependencies_external), + fail_on_dependency_skip=config["default"]["fail_on_dependency_skip"], ) - - -def _validate_config(config): - validator = Validator() - - result = config.validate(validator) - if result is not True: - print("Validation results={}".format(result)) - raise ValueError( - "Configuration file validation failed. Parameters listed as false in the validation results have invalid values." - ) - else: - print("Configuration file validation passed.") diff --git a/zppy/bundle.py b/zppy/bundle.py index 5fdde06c..c9c4fa7a 100644 --- a/zppy/bundle.py +++ b/zppy/bundle.py @@ -1,10 +1,10 @@ import os import os.path -from typing import List, Set +from typing import Any, Dict, List, Set -import jinja2 +from configobj import ConfigObj -from zppy.utils import getTasks, makeExecutable +from zppy.utils import get_tasks, initialize_template, make_executable # ----------------------------------------------------------------------------- @@ -36,15 +36,8 @@ def __init__(self, c): self.export: str = "NONE" - def render(self, config): - - # Initialize jinja2 template engine - templateLoader = jinja2.FileSystemLoader( - searchpath=config["default"]["templateDir"] - ) - templateEnv = jinja2.Environment(loader=templateLoader) - template = templateEnv.get_template("bundle.bash") - + def render(self, config) -> None: + template, _ = initialize_template(config, "bundle.bash") # Populate dictionary c = {} c["machine"] = config["default"]["machine"] @@ -64,16 +57,12 @@ def render(self, config): # Create script with open(self.bundle_file, "w") as f: f.write(template.render(**c)) - makeExecutable(self.bundle_file) - - return - - def add_task(self, scriptFile, dependFiles): + make_executable(self.bundle_file) + def add_task(self, script_file, depend_files) -> None: # Add tasks and dependencies - self.tasks.append(scriptFile) - self.dependencies.update(dependFiles) - + self.tasks.append(script_file) + self.dependencies.update(depend_files) # Sort through dependencies to determine in or out of bundle # Remove extensions before performing inclusion test. tasks = [os.path.splitext(t)[0] for t in self.tasks] @@ -85,7 +74,7 @@ def add_task(self, scriptFile, dependFiles): self.dependencies_external.add(dependency) # Useful for debugging - def display_dependencies(self): + def display_dependencies(self) -> None: print(f"Displaying dependencies for {self.bundle_name}") print("dependencies_internal:") if self.dependencies_internal: @@ -106,7 +95,13 @@ def display_dependencies(self): # ----------------------------------------------------------------------------- -def handle_bundles(c, scriptFile, export, dependFiles=[], existing_bundles=[]): +def handle_bundles( + c: Dict[str, Any], + script_file, + export, + dependFiles=[], + existing_bundles: List[Bundle] = [], +) -> List[Bundle]: bundle_name = c["bundle"] if bundle_name == "": return existing_bundles @@ -120,28 +115,26 @@ def handle_bundles(c, scriptFile, export, dependFiles=[], existing_bundles=[]): # So, the bundle does not already exist bundle = Bundle(c) existing_bundles.append(bundle) - bundle.add_task(scriptFile, dependFiles) + bundle.add_task(script_file, dependFiles) if export == "ALL": # If one task requires export="ALL", then the bundle script will need it as well bundle.export = export - return existing_bundles # ----------------------------------------------------------------------------- -def predefined_bundles(config, scriptDir, existing_bundles): - +def predefined_bundles( + config: ConfigObj, script_dir: str, existing_bundles: List[Bundle] +) -> List[Bundle]: # --- List of tasks --- - tasks = getTasks(config, "bundle") + tasks = get_tasks(config, "bundle") if len(tasks) == 0: return existing_bundles - # --- Create new bundles as needed --- for c in tasks: if c["subsection"] is not None: c["bundle"] = c["subsection"] - c["scriptDir"] = scriptDir + c["scriptDir"] = script_dir bundle = Bundle(c) existing_bundles.append(bundle) - return existing_bundles diff --git a/zppy/climo.py b/zppy/climo.py index 71d8a7f1..716d4ace 100644 --- a/zppy/climo.py +++ b/zppy/climo.py @@ -1,108 +1,79 @@ -import os -import pprint -import re +from typing import Any, Dict, List, Tuple -import jinja2 +from configobj import ConfigObj from zppy.bundle import handle_bundles from zppy.utils import ( - checkStatus, - getComponent, - getTasks, - getYears, - makeExecutable, - setMappingFile, - submitScript, + ParameterGuessType, + check_status, + define_or_guess, + get_file_names, + get_tasks, + get_years, + initialize_template, + make_executable, + set_component_and_prc_typ, + set_grid, + set_mapping_file, + submit_script, + write_settings_file, ) # ----------------------------------------------------------------------------- -def climo(config, scriptDir, existing_bundles, job_ids_file): +def climo(config: ConfigObj, script_dir: str, existing_bundles, job_ids_file): - # --- Initialize jinja2 template engine --- - templateLoader = jinja2.FileSystemLoader( - searchpath=config["default"]["templateDir"] - ) - templateEnv = jinja2.Environment(loader=templateLoader) - template = templateEnv.get_template("climo.bash") + template, _ = initialize_template(config, "climo.bash") # --- List of climo tasks --- - tasks = getTasks(config, "climo") + tasks: List[Dict[str, Any]] = get_tasks(config, "climo") if len(tasks) == 0: return existing_bundles # --- Generate and submit climo scripts --- for c in tasks: - - setMappingFile(c) - - # Grid name (if not explicitly defined) - # 'native' if no remapping - # or extracted from mapping filename - if c["grid"] == "": - if c["mapping_file"] == "": - c["grid"] = "native" - else: - tmp = os.path.basename(c["mapping_file"]) - # FIXME: W605 invalid escape sequence '\.' - tmp = re.sub("\.[^.]*\.nc$", "", tmp) # noqa: W605 - tmp = tmp.split("_") - if tmp[0] == "map": - c["grid"] = "%s_%s" % (tmp[-2], tmp[-1]) - else: - raise ValueError( - "Cannot extract target grid name from mapping file %s" - % (c["mapping_file"]) - ) - - # Output component (for directory structure) and procedure type for ncclimo - c["component"], c["prc_typ"] = getComponent( - c["input_component"], c["input_files"] - ) - + set_mapping_file(c) + set_grid(c) + set_component_and_prc_typ(c) + year_sets: List[Tuple[int, int]] = get_years(c["years"]) # Loop over year sets - year_sets = getYears(c["years"]) for s in year_sets: - c["yr_start"] = s[0] c["yr_end"] = s[1] if ("last_year" in c.keys()) and (c["yr_end"] > c["last_year"]): continue # Skip this year set - c["scriptDir"] = scriptDir - if c["subsection"]: - sub = c["subsection"] - else: - sub = c["grid"] - prefix = "climo_%s_%04d-%04d" % (sub, c["yr_start"], c["yr_end"]) + c["scriptDir"] = script_dir + sub: str = define_or_guess( + c, "subsection", "grid", ParameterGuessType.SECTION_GUESS + ) + prefix: str = f"climo_{sub}_{c['yr_start']:04d}-{c['yr_end']:04d}" print(prefix) c["prefix"] = prefix - scriptFile = os.path.join(scriptDir, "%s.bash" % (prefix)) - statusFile = os.path.join(scriptDir, "%s.status" % (prefix)) - settingsFile = os.path.join(scriptDir, "%s.settings" % (prefix)) - skip = checkStatus(statusFile) + bash_file, settings_file, status_file = get_file_names(script_dir, prefix) + skip: bool = check_status(status_file) if skip: continue - # Create script - with open(scriptFile, "w") as f: + with open(bash_file, "w") as f: f.write(template.render(**c)) - makeExecutable(scriptFile) - - with open(settingsFile, "w") as sf: - p = pprint.PrettyPrinter(indent=2, stream=sf) - p.pprint(c) - p.pprint(s) - + make_executable(bash_file) + write_settings_file(settings_file, c, s) export = "ALL" existing_bundles = handle_bundles( - c, scriptFile, export, existing_bundles=existing_bundles + c, bash_file, export, existing_bundles=existing_bundles ) if not c["dry_run"]: if c["bundle"] == "": # Submit job - submitScript(scriptFile, statusFile, export, job_ids_file) + submit_script( + bash_file, + status_file, + export, + job_ids_file, + fail_on_dependency_skip=c["fail_on_dependency_skip"], + ) else: - print("...adding to bundle '%s'" % (c["bundle"])) + print(f"...adding to bundle {c['bundle']}") print(f" environment_commands={c['environment_commands']}") diff --git a/zppy/e3sm_diags.py b/zppy/e3sm_diags.py index d874d8e3..bbe93b57 100644 --- a/zppy/e3sm_diags.py +++ b/zppy/e3sm_diags.py @@ -1,51 +1,50 @@ import os -import pprint -from typing import List +from typing import Any, Dict, List, Set, Tuple -import jinja2 +from configobj import ConfigObj from zppy.bundle import handle_bundles from zppy.utils import ( + ParameterGuessType, add_dependencies, - checkStatus, - getTasks, - getYears, - makeExecutable, + check_parameter_defined, + check_required_parameters, + check_status, + define_or_guess, + define_or_guess2, + get_file_names, + get_tasks, + get_years, + initialize_template, + make_executable, print_url, - submitScript, + submit_script, + write_settings_file, ) # ----------------------------------------------------------------------------- -# FIXME: C901 'e3sm_diags' is too complex (20) -def e3sm_diags(config, scriptDir, existing_bundles, job_ids_file): # noqa: C901 +def e3sm_diags(config: ConfigObj, script_dir: str, existing_bundles, job_ids_file): - # Initialize jinja2 template engine - templateLoader = jinja2.FileSystemLoader( - searchpath=config["default"]["templateDir"] - ) - templateEnv = jinja2.Environment(loader=templateLoader) - template = templateEnv.get_template("e3sm_diags.bash") + template, _ = initialize_template(config, "e3sm_diags.bash") # --- List of e3sm_diags tasks --- - tasks = getTasks(config, "e3sm_diags") + tasks: List[Dict[str, Any]] = get_tasks(config, "e3sm_diags") if len(tasks) == 0: return existing_bundles # --- Generate and submit e3sm_diags scripts --- - dependencies: List[str] = [] - for c in tasks: - - c["scriptDir"] = scriptDir - + dependencies: List[str] = [] + check_parameters_for_bash(c) + c["scriptDir"] = script_dir if "ts_num_years" in c.keys(): c["ts_num_years"] = int(c["ts_num_years"]) - # Loop over year sets - year_sets = getYears(c["years"]) + year_sets: List[Tuple[int, int]] = get_years(c["years"]) + ref_year_sets: List[Tuple[int, int]] if ("ref_years" in c.keys()) and (c["ref_years"] != [""]): - ref_year_sets = getYears(c["ref_years"]) + ref_year_sets = get_years(c["ref_years"]) else: ref_year_sets = year_sets for s, rs in zip(year_sets, ref_year_sets): @@ -55,197 +54,29 @@ def e3sm_diags(config, scriptDir, existing_bundles, job_ids_file): # noqa: C901 continue # Skip this year set c["ref_year1"] = rs[0] c["ref_year2"] = rs[1] - if c["subsection"]: - c["sub"] = c["subsection"] - else: - c["sub"] = c["grid"] - # Make a guess for observation paths, if need be - if c["reference_data_path"] == "": - c[ - "reference_data_path" - ] = f"{c['diagnostics_base_path']}/observations/Atm/climatology/" - if ("tc_analysis" in c["sets"]) and (c["tc_obs"] == ""): - c[ - "tc_obs" - ] = f"{c['diagnostics_base_path']}/observations/Atm/tc-analysis/" - if ("ts_num_years" in c.keys()) and (c["obs_ts"] == ""): - c[ - "obs_ts" - ] = f"{c['diagnostics_base_path']}/observations/Atm/time-series/" - if c["run_type"] == "model_vs_obs": - prefix = "e3sm_diags_%s_%s_%04d-%04d" % ( - c["sub"], - c["tag"], - c["year1"], - c["year2"], - ) - elif c["run_type"] == "model_vs_model": - prefix = "e3sm_diags_%s_%s_%04d-%04d_vs_%04d-%04d" % ( - c["sub"], - c["tag"], - c["year1"], - c["year2"], - c["ref_year1"], - c["ref_year2"], - ) - reference_data_path = ( - c["reference_data_path"].split("/post")[0] + "/post" - ) - if ("diurnal_cycle" in c["sets"]) and ( - c["reference_data_path_climo_diurnal"] == "" - ): - c[ - "reference_data_path_climo_diurnal" - ] = f"{reference_data_path}/atm/{c['grid']}/clim_diurnal_8xdaily" - if ("tc_analysis" in c["sets"]) and (c["reference_data_path_tc"] == ""): - c[ - "reference_data_path_tc" - ] = f"{reference_data_path}/atm/tc-analysis_{c['ref_year1']}_{c['ref_year2']}" - if ("ts_num_years" in c.keys()) and (c["reference_data_path_ts"] == ""): - c[ - "reference_data_path_ts" - ] = f"{reference_data_path}/atm/{c['grid']}/ts/monthly" - if ("streamflow" in c["sets"]) and ( - c["reference_data_path_ts_rof"] == "" - ): - c[ - "reference_data_path_ts_rof" - ] = f"{reference_data_path}/rof/native/ts/monthly" - if c["gauges_path"] == "": - gauges_path_prefix = c["diagnostics_base_path"] - gauges_path_suffix = "observations/Atm/time-series/GSIM/GSIM_catchment_characteristics_all_1km2.csv" - c["gauges_path"] = os.path.join( - gauges_path_prefix, gauges_path_suffix - ) - if ("tropical_subseasonal" in c["sets"]) and ( - c["reference_data_path_ts_daily"] == "" - ): - c[ - "reference_data_path_ts_daily" - ] = f"{reference_data_path}/atm/{c['grid']}/ts/daily" - else: - raise ValueError("Invalid run_type={}".format(c["run_type"])) - if "diurnal_cycle" in c["sets"]: - if c["dc_obs_climo"] == "": - c["dc_obs_climo"] = c["reference_data_path"] - if ("streamflow" in c["sets"]) and (c["streamflow_obs_ts"] == ""): - c["streamflow_obs_ts"] = c["obs_ts"] - print(prefix) - c["prefix"] = prefix - scriptFile = os.path.join(scriptDir, "%s.bash" % (prefix)) - statusFile = os.path.join(scriptDir, "%s.status" % (prefix)) - settingsFile = os.path.join(scriptDir, "%s.settings" % (prefix)) - skip = checkStatus(statusFile) + check_and_define_parameters(c) + bash_file, settings_file, status_file = get_file_names( + script_dir, c["prefix"] + ) + skip: bool = check_status(status_file) if skip: continue - # Create script - with open(scriptFile, "w") as f: + with open(bash_file, "w") as f: f.write(template.render(**c)) - makeExecutable(scriptFile) - + make_executable(bash_file) # List of dependencies - depend_on_climo = set( - [ - "lat_lon", - "zonal_mean_xy", - "zonal_mean_2d", - "polar", - "cosp_histogram", - "meridional_mean_2d", - "annual_cycle_zonal_mean", - "zonal_mean_2d_stratosphere", - ] - ) - in_sets = set(c["sets"]) - # Check if any requested sets depend on climo: - if depend_on_climo & in_sets: - if "climo_subsection" in c.keys() and c["climo_subsection"] != "": - climo_sub = c["climo_subsection"] - else: - climo_sub = c["sub"] - dependencies.append( - os.path.join( - scriptDir, - "climo_%s_%04d-%04d.status" - % (climo_sub, c["year1"], c["year2"]), - ), - ) - if "diurnal_cycle" in c["sets"]: - dependencies.append( - os.path.join( - scriptDir, - "climo_%s_%04d-%04d.status" - % (c["climo_diurnal_subsection"], c["year1"], c["year2"]), - ) - ) - if "tc_analysis" in c["sets"]: - dependencies.append( - os.path.join( - scriptDir, - "tc_analysis_%04d-%04d.status" % (c["year1"], c["year2"]), - ) - ) + add_climo_dependencies(c, dependencies, script_dir) # Iterate from year1 to year2 incrementing by the number of years per time series file. if "ts_num_years" in c.keys(): for yr in range(c["year1"], c["year2"], c["ts_num_years"]): - start_yr = yr - end_yr = yr + c["ts_num_years"] - 1 - if "ts_subsection" in c.keys() and c["ts_subsection"] != "": - ts_sub = c["ts_subsection"] - else: - ts_sub = c["sub"] - - if ( - "ts_daily_subsection" in c.keys() - and c["ts_daily_subsection"] != "" - ): - ts_daily_sub = c["ts_daily_subsection"] - else: - ts_daily_sub = c["sub"] - if ( - ("enso_diags" in c["sets"]) - or ("qbo" in c["sets"]) - or ("area_mean_time_series" in c["sets"]) - ): - add_dependencies( - dependencies, - scriptDir, - "ts", - ts_sub, - start_yr, - end_yr, - c["ts_num_years"], - ) - if "streamflow" in c["sets"]: - add_dependencies( - dependencies, - scriptDir, - "ts", - "rof_monthly", - start_yr, - end_yr, - c["ts_num_years"], - ) - if "tropical_subseasonal" in c["sets"]: - add_dependencies( - dependencies, - scriptDir, - "ts", - ts_daily_sub, - start_yr, - end_yr, - c["ts_num_years"], - ) - with open(settingsFile, "w") as sf: - p = pprint.PrettyPrinter(indent=2, stream=sf) - p.pprint(c) - p.pprint(s) - + add_ts_dependencies(c, dependencies, script_dir, yr) + c["dependencies"] = dependencies + write_settings_file(settings_file, c, s) export = "ALL" existing_bundles = handle_bundles( c, - scriptFile, + bash_file, export, dependFiles=dependencies, existing_bundles=existing_bundles, @@ -253,25 +84,233 @@ def e3sm_diags(config, scriptDir, existing_bundles, job_ids_file): # noqa: C901 if not c["dry_run"]: if c["bundle"] == "": # Submit job - submitScript( - scriptFile, - statusFile, + submit_script( + bash_file, + status_file, export, job_ids_file, dependFiles=dependencies, + fail_on_dependency_skip=c["fail_on_dependency_skip"], ) - - # Due to a `socket.gaierror: [Errno -2] Name or service not known` error when running e3sm_diags with tc_analysis - # on multiple year_sets, if tc_analysis is in sets, then e3sm_diags should be run sequentially. - if "tc_analysis" in c["sets"]: - # Note that this line should still be executed even if jobid == -1 - # The later tc_analysis-using e3sm_diags tasks still depend on this task (and thus will also fail). - # Add to the dependency list - dependencies.append(statusFile) else: - print("...adding to bundle '%s'" % (c["bundle"])) + print(f"...adding to bundle {c['bundle']}") print(f" environment_commands={c['environment_commands']}") print_url(c, "e3sm_diags") return existing_bundles + + +def check_parameters_for_bash(c: Dict[str, Any]) -> None: + # Check parameters that aren't used until e3sm_diags.bash is run + check_required_parameters(c, set(["tropical_subseasonal"]), "ref_end_yr") + check_required_parameters(c, set(["qbo"]), "ref_final_yr") + check_required_parameters(c, set(["enso_diags", "qbo"]), "ref_start_yr") + check_required_parameters(c, set(["diurnal_cycle"]), "climo_diurnal_frequency") + + +def check_mvm_only_parameters_for_bash(c: Dict[str, Any]) -> None: + # Check mvm-specific parameters that aren't used until e3sm_diags.bash is run. + check_parameter_defined(c, "diff_title") + check_parameter_defined(c, "ref_name") + check_parameter_defined(c, "short_ref_name") + + check_required_parameters( + c, + set(["enso_diags", "tropical_subseasonal", "streamflow", "tc_analysis"]), + "ref_final_yr", + ) + check_required_parameters( + c, set(["tropical_subseasonal", "streamflow", "tc_analysis"]), "ref_start_yr" + ) + ts_sets = set( + [ + "enso_diags", + "qbo", + "area_mean_time_series", + "tropical_subseasonal", + "streamflow", + ] + ) + check_required_parameters(c, ts_sets, "ts_num_years_ref") + check_required_parameters(c, ts_sets, "ts_subsection") + + +def check_and_define_parameters(c: Dict[str, Any]) -> None: + c["sub"] = define_or_guess( + c, "subsection", "grid", ParameterGuessType.SECTION_GUESS + ) + define_or_guess2( + c, + "reference_data_path", + f"{c['diagnostics_base_path']}/observations/Atm/climatology/", + ParameterGuessType.PATH_GUESS, + ) + if "tc_analysis" in c["sets"]: + define_or_guess2( + c, + "tc_obs", + f"{c['diagnostics_base_path']}/observations/Atm/tc-analysis/", + ParameterGuessType.PATH_GUESS, + ) + # TODO: do this based on sets, rather than by relying on the user setting ts_num_years + if "ts_num_years" in c.keys(): + define_or_guess2( + c, + "obs_ts", + f"{c['diagnostics_base_path']}/observations/Atm/time-series/", + ParameterGuessType.PATH_GUESS, + ) + prefix: str + if c["run_type"] == "model_vs_obs": + prefix = f"e3sm_diags_{c['sub']}_{c['tag']}_{c['year1']:04d}-{c['year2']:04d}" + if "diurnal_cycle" in c["sets"]: + define_or_guess2( + c, + "dc_obs_climo", + c["reference_data_path"], + ParameterGuessType.PATH_GUESS, + ) + if "streamflow" in c["sets"]: + define_or_guess2( + c, "streamflow_obs_ts", c["obs_ts"], ParameterGuessType.PATH_GUESS + ) + elif c["run_type"] == "model_vs_model": + check_mvm_only_parameters_for_bash(c) + prefix = f"e3sm_diags_{c['sub']}_{c['tag']}_{c['year1']:04d}-{c['year2']:04d}_vs_{c['ref_year1']:04d}-{c['ref_year2']:04d}" + reference_data_path = c["reference_data_path"].split("/post")[0] + "/post" + if "diurnal_cycle" in c["sets"]: + define_or_guess2( + c, + "reference_data_path_climo_diurnal", + f"{reference_data_path}/atm/{c['grid']}/clim_diurnal_8xdaily", + ParameterGuessType.PATH_GUESS, + ) + if ("tc_analysis" in c["sets"]) and (c["reference_data_path_tc"] == ""): + # We have to guess parameters here, + # because multiple year sets are defined in a single subtask. + c["reference_data_path_tc"] = ( + f"{reference_data_path}/atm/tc-analysis_{c['ref_year1']}_{c['ref_year2']}" + ) + if set(["enso_diags", "qbo", "area_mean_time_series"]) & set(c["sets"]): + define_or_guess2( + c, + "reference_data_path_ts", + f"{reference_data_path}/atm/{c['grid']}/ts/monthly", + ParameterGuessType.PATH_GUESS, + ) + if "tropical_subseasonal" in c["sets"]: + define_or_guess2( + c, + "reference_data_path_ts_daily", + f"{reference_data_path}/atm/{c['grid']}/ts/daily", + ParameterGuessType.PATH_GUESS, + ) + if "streamflow" in c["sets"]: + define_or_guess2( + c, + "reference_data_path_ts_rof", + f"{reference_data_path}/rof/native/ts/monthly", + ParameterGuessType.PATH_GUESS, + ) + define_or_guess2( + c, + "gauges_path", + os.path.join( + c["diagnostics_base_path"], + "observations/Atm/time-series/GSIM/GSIM_catchment_characteristics_all_1km2.csv", + ), + ParameterGuessType.PATH_GUESS, + ) + else: + raise ValueError(f"Invalid run_type={c['run_type']}") + print(prefix) + c["prefix"] = prefix + + +def add_climo_dependencies( + c: Dict[str, Any], dependencies: List[str], script_dir: str +) -> None: + depend_on_climo: Set[str] = set( + [ + "lat_lon", + "zonal_mean_xy", + "zonal_mean_2d", + "polar", + "cosp_histogram", + "meridional_mean_2d", + "annual_cycle_zonal_mean", + "zonal_mean_2d_stratosphere", + "aerosol_aeronet", + "aerosol_budget", + ] + ) + # Check if any requested sets depend on climo: + status_suffix: str = f"_{c['year1']:04d}-{c['year2']:04d}.status" + if depend_on_climo & set(c["sets"]): + climo_sub = define_or_guess( + c, "climo_subsection", "sub", ParameterGuessType.SECTION_GUESS + ) + dependencies.append( + os.path.join(script_dir, f"climo_{climo_sub}{status_suffix}"), + ) + if "diurnal_cycle" in c["sets"]: + check_parameter_defined(c, "climo_diurnal_subsection") + dependencies.append( + os.path.join( + script_dir, f"climo_{c['climo_diurnal_subsection']}{status_suffix}" + ) + ) + if "lat_lon_land" in c["sets"]: + check_parameter_defined(c, "climo_land_subsection") + dependencies.append( + os.path.join( + script_dir, f"climo_{c['climo_land_subsection']}{status_suffix}" + ) + ) + if "tc_analysis" in c["sets"]: + dependencies.append(os.path.join(script_dir, f"tc_analysis{status_suffix}")) + + +def add_ts_dependencies( + c: Dict[str, Any], dependencies: List[str], script_dir: str, yr: int +): + start_yr = yr + end_yr = yr + c["ts_num_years"] - 1 + ts_sub = define_or_guess( + c, "ts_subsection", "sub", ParameterGuessType.SECTION_GUESS + ) + ts_daily_sub = define_or_guess( + c, "ts_daily_subsection", "sub", ParameterGuessType.SECTION_GUESS + ) + depend_on_ts: Set[str] = set(["enso_diags", "qbo", "area_mean_time_series"]) + if depend_on_ts & set(c["sets"]): + add_dependencies( + dependencies, + script_dir, + "ts", + ts_sub, + start_yr, + end_yr, + c["ts_num_years"], + ) + if "streamflow" in c["sets"]: + add_dependencies( + dependencies, + script_dir, + "ts", + "rof_monthly", + start_yr, + end_yr, + c["ts_num_years"], + ) + if "tropical_subseasonal" in c["sets"]: + add_dependencies( + dependencies, + script_dir, + "ts", + ts_daily_sub, + start_yr, + end_yr, + c["ts_num_years"], + ) diff --git a/zppy/global_time_series.py b/zppy/global_time_series.py index 0ca682fd..674c8187 100644 --- a/zppy/global_time_series.py +++ b/zppy/global_time_series.py @@ -1,197 +1,75 @@ import os -import pprint -from typing import List - -import jinja2 +from typing import Any, Dict, List from zppy.bundle import handle_bundles from zppy.utils import ( add_dependencies, - checkStatus, - getTasks, - getYears, - makeExecutable, + check_status, + get_file_names, + get_tasks, + get_years, + initialize_template, + make_executable, print_url, - submitScript, + submit_script, + write_settings_file, ) # ----------------------------------------------------------------------------- -# FIXME: C901 'run' is too complex (19) -def global_time_series(config, scriptDir, existing_bundles, job_ids_file): # noqa: C901 +def global_time_series(config, script_dir, existing_bundles, job_ids_file): - # Initialize jinja2 template engine - templateLoader = jinja2.FileSystemLoader( - searchpath=config["default"]["templateDir"] - ) - templateEnv = jinja2.Environment(loader=templateLoader) - template = templateEnv.get_template("global_time_series.bash") + template, template_env = initialize_template(config, "global_time_series.bash") # --- List of global_time_series tasks --- - tasks = getTasks(config, "global_time_series") + tasks: List[Dict[str, Any]] = get_tasks(config, "global_time_series") if len(tasks) == 0: return existing_bundles # --- Generate and submit global_time_series scripts --- for c in tasks: - c["ts_num_years"] = int(c["ts_num_years"]) - # Loop over year sets - year_sets = getYears(c["years"]) + year_sets = get_years(c["years"]) for s in year_sets: c["year1"] = s[0] c["year2"] = s[1] if ("last_year" in c.keys()) and (c["year2"] > c["last_year"]): continue # Skip this year set - c["scriptDir"] = scriptDir - prefix = "global_time_series_%04d-%04d" % (c["year1"], c["year2"]) + c["scriptDir"] = script_dir + prefix: str = f"global_time_series_{c['year1']:04d}-{c['year2']:04d}" print(prefix) c["prefix"] = prefix - scriptFile = os.path.join(scriptDir, "%s.bash" % (prefix)) - statusFile = os.path.join(scriptDir, "%s.status" % (prefix)) - settingsFile = os.path.join(scriptDir, "%s.settings" % (prefix)) - skip = checkStatus(statusFile) + bash_file, settings_file, status_file = get_file_names(script_dir, prefix) + skip: bool = check_status(status_file) if skip: continue - - # Handle legacy parameter - if c["plot_names"]: - print("warning: plot_names for global_time_series is deprecated.") - print( - "Setting plot_names will override the new parameter, plots_original." - ) - c["plots_original"] = c["plot_names"] - - # Determine which components are needed - c["use_atm"] = False - c["use_ice"] = False - c["use_lnd"] = False - c["use_ocn"] = False - if c["plots_original"]: - c["use_atm"] = True - if c["atmosphere_only"]: - print( - "warning: atmosphere_only for global_time_series is deprecated." - ) - print( - "preferred method: remove the 3 ocean plots (change_ohc,max_moc,change_sea_level) from plots_original." - ) - has_original_ocn_plots = ( - ("change_ohc" in c["plots_original"]) - or ("max_moc" in c["plots_original"]) - or ("change_sea_level" in c["plots_original"]) - ) - if (not c["atmosphere_only"]) and has_original_ocn_plots: - c["use_ocn"] = True - else: - # For better string processing in global_time_series.bash - c["plots_original"] = "None" - if c["plots_atm"]: - c["use_atm"] = True - else: - # For better string processing in global_time_series.bash - c["plots_atm"] = "None" - if c["plots_ice"]: - c["use_ice"] = True - else: - # For better string processing in global_time_series.bash - c["plots_ice"] = "None" - if c["plots_lnd"]: - c["use_lnd"] = True - else: - # For better string processing in global_time_series.bash - c["plots_lnd"] = "None" - if c["plots_ocn"]: - c["use_ocn"] = True - else: - # For better string processing in global_time_series.bash - c["plots_ocn"] = "None" - + determine_components(c) # Load useful scripts - c["global_time_series_dir"] = os.path.join( - scriptDir, "{}_dir".format(prefix) - ) + c["global_time_series_dir"] = os.path.join(script_dir, f"{prefix}_dir") if not os.path.exists(c["global_time_series_dir"]): os.mkdir(c["global_time_series_dir"]) scripts = ["coupled_global.py", "readTS.py", "ocean_month.py"] for script in scripts: - script_template = templateEnv.get_template(script) + script_template = template_env.get_template(script) script_file = os.path.join(c["global_time_series_dir"], script) with open(script_file, "w") as f: f.write(script_template.render(**c)) - makeExecutable(script_file) - + make_executable(script_file) # Create script - with open(scriptFile, "w") as f: + with open(bash_file, "w") as f: f.write(template.render(**c)) - makeExecutable(scriptFile) - + make_executable(bash_file) # List of dependencies dependencies: List[str] = [] - # Add Time Series dependencies - if c["use_atm"]: - # Iterate from year1 to year2 incrementing by the number of years per time series file. - for yr in range(c["year1"], c["year2"], c["ts_num_years"]): - start_yr = yr - end_yr = yr + c["ts_num_years"] - 1 - add_dependencies( - dependencies, - scriptDir, - "ts", - "atm_monthly_glb", - start_yr, - end_yr, - c["ts_num_years"], - ) - if c["use_lnd"]: - for yr in range(c["year1"], c["year2"], c["ts_num_years"]): - start_yr = yr - end_yr = yr + c["ts_num_years"] - 1 - add_dependencies( - dependencies, - scriptDir, - "ts", - "lnd_monthly_glb", - start_yr, - end_yr, - c["ts_num_years"], - ) - if c["use_ocn"]: - # Add MPAS Analysis dependencies - ts_year_sets = getYears(c["ts_years"]) - climo_year_sets = getYears(c["climo_years"]) - if (not ts_year_sets) or (not climo_year_sets): - raise Exception( - "ts_years and climo_years must both be set for ocn plots." - ) - for ts_year_set, climo_year_set in zip(ts_year_sets, climo_year_sets): - c["ts_year1"] = ts_year_set[0] - c["ts_year2"] = ts_year_set[1] - c["climo_year1"] = climo_year_set[0] - c["climo_year2"] = climo_year_set[1] - dependencies.append( - os.path.join( - scriptDir, - "mpas_analysis_ts_%04d-%04d_climo_%04d-%04d.status" - % ( - c["ts_year1"], - c["ts_year2"], - c["climo_year1"], - c["climo_year2"], - ), - ) - ) - - with open(settingsFile, "w") as sf: - p = pprint.PrettyPrinter(indent=2, stream=sf) - p.pprint(c) - p.pprint(s) - + # Add Global Time Series dependencies + determine_and_add_dependencies(c, dependencies, script_dir) + c["dependencies"] = dependencies + write_settings_file(settings_file, c, s) export = "NONE" existing_bundles = handle_bundles( c, - scriptFile, + bash_file, export, dependFiles=dependencies, existing_bundles=existing_bundles, @@ -199,17 +77,117 @@ def global_time_series(config, scriptDir, existing_bundles, job_ids_file): # no if not c["dry_run"]: if c["bundle"] == "": # Submit job - submitScript( - scriptFile, - statusFile, + submit_script( + bash_file, + status_file, export, job_ids_file, dependFiles=dependencies, + fail_on_dependency_skip=c["fail_on_dependency_skip"], ) else: - print("...adding to bundle '%s'" % (c["bundle"])) + print(f"...adding to bundle {c['bundle']}") print(f" environment_commands={c['environment_commands']}") print_url(c, "global_time_series") return existing_bundles + + +def determine_components(c: Dict[str, Any]) -> None: + # Handle legacy parameter + if c["plot_names"]: + print("warning: plot_names for global_time_series is deprecated.") + print("Setting plot_names will override the new parameter, plots_original.") + c["plots_original"] = c["plot_names"] + # Determine which components are needed + c["use_atm"] = False + c["use_ice"] = False + c["use_lnd"] = False + c["use_ocn"] = False + if c["plots_original"]: + c["use_atm"] = True + if c["atmosphere_only"]: + print("warning: atmosphere_only for global_time_series is deprecated.") + print( + "preferred method: remove the 3 ocean plots (change_ohc,max_moc,change_sea_level) from plots_original." + ) + has_original_ocn_plots = ( + ("change_ohc" in c["plots_original"]) + or ("max_moc" in c["plots_original"]) + or ("change_sea_level" in c["plots_original"]) + ) + if (not c["atmosphere_only"]) and has_original_ocn_plots: + c["use_ocn"] = True + else: + # For better string processing in global_time_series.bash + c["plots_original"] = "None" + if c["plots_atm"]: + c["use_atm"] = True + else: + # For better string processing in global_time_series.bash + c["plots_atm"] = "None" + if c["plots_ice"]: + c["use_ice"] = True + else: + # For better string processing in global_time_series.bash + c["plots_ice"] = "None" + if c["plots_lnd"]: + c["use_lnd"] = True + else: + # For better string processing in global_time_series.bash + c["plots_lnd"] = "None" + if c["plots_ocn"]: + c["use_ocn"] = True + else: + # For better string processing in global_time_series.bash + c["plots_ocn"] = "None" + + +def determine_and_add_dependencies( + c: Dict[str, Any], dependencies: List[str], script_dir: str +) -> None: + if c["use_atm"]: + # Iterate from year1 to year2 incrementing by the number of years per time series file. + for yr in range(c["year1"], c["year2"], c["ts_num_years"]): + start_yr = yr + end_yr = yr + c["ts_num_years"] - 1 + add_dependencies( + dependencies, + script_dir, + "ts", + "atm_monthly_glb", + start_yr, + end_yr, + c["ts_num_years"], + ) + if c["use_lnd"]: + for yr in range(c["year1"], c["year2"], c["ts_num_years"]): + start_yr = yr + end_yr = yr + c["ts_num_years"] - 1 + add_dependencies( + dependencies, + script_dir, + "ts", + "lnd_monthly_glb", + start_yr, + end_yr, + c["ts_num_years"], + ) + if c["use_ocn"]: + # Add MPAS Analysis dependencies + ts_year_sets = get_years(c["ts_years"]) + climo_year_sets = get_years(c["climo_years"]) + if (not ts_year_sets) or (not climo_year_sets): + raise Exception("ts_years and climo_years must both be set for ocn plots.") + for ts_year_set, climo_year_set in zip(ts_year_sets, climo_year_sets): + c["ts_year1"] = ts_year_set[0] + c["ts_year2"] = ts_year_set[1] + c["climo_year1"] = climo_year_set[0] + c["climo_year2"] = climo_year_set[1] + dependencies.append( + os.path.join( + script_dir, + f"mpas_analysis_ts_{c['ts_year1']:04d}-{c['ts_year2']:04d}_climo_{c['climo_year1']:04d}-{c['climo_year2']:04d}.status", + ) + ) diff --git a/zppy/ilamb.py b/zppy/ilamb.py index f1928f8b..f3a44a26 100644 --- a/zppy/ilamb.py +++ b/zppy/ilamb.py @@ -1,128 +1,127 @@ import os -import pprint -from typing import List +from typing import Any, Dict, List -import jinja2 +from configobj import ConfigObj from zppy.bundle import handle_bundles from zppy.utils import ( + ParameterGuessType, add_dependencies, - checkStatus, - getTasks, - getYears, - makeExecutable, + check_status, + define_or_guess2, + get_file_names, + get_tasks, + get_years, + initialize_template, + make_executable, print_url, - submitScript, + submit_script, + write_settings_file, ) # ----------------------------------------------------------------------------- -def ilamb(config, scriptDir, existing_bundles, job_ids_file): +def ilamb(config: ConfigObj, script_dir: str, existing_bundles, job_ids_file): - # Initialize jinja2 template engine - templateLoader = jinja2.FileSystemLoader( - searchpath=config["default"]["templateDir"] - ) - templateEnv = jinja2.Environment(loader=templateLoader) - template = templateEnv.get_template("ilamb.bash") + template, _ = initialize_template(config, "ilamb.bash") # --- List of ilamb tasks --- - tasks = getTasks(config, "ilamb") + tasks: List[Dict[str, Any]] = get_tasks(config, "ilamb") if len(tasks) == 0: return existing_bundles # --- Generate and submit ilamb scripts --- - dependencies: List[str] = [] - for c in tasks: + dependencies: List[str] = [] + if "ts_num_years" in c.keys(): c["ts_num_years"] = int(c["ts_num_years"]) - # Loop over year sets - year_sets = getYears(c["years"]) + year_sets = get_years(c["years"]) for s in year_sets: c["year1"] = s[0] c["year2"] = s[1] - c["scriptDir"] = scriptDir - if c["subsection"]: - c["sub"] = c["subsection"] - else: - c["sub"] = c["grid"] - - if c["ilamb_obs"] == "": - ilamb_obs_prefix = c["diagnostics_base_path"] - ilamb_obs_suffix = "ilamb_data" - c["ilamb_obs"] = os.path.join(ilamb_obs_prefix, ilamb_obs_suffix) - - # List of dependencies - add_dependencies( - dependencies, - scriptDir, - "ts", - c["ts_land_subsection"], - c["year1"], - c["year2"], - c["ts_num_years"], - ) - if not c["land_only"]: - add_dependencies( - dependencies, - scriptDir, - "ts", - c["ts_atm_subsection"], - c["year1"], - c["year2"], - c["ts_num_years"], - ) - - prefix = "ilamb_%04d-%04d" % ( - c["year1"], - c["year2"], + c["scriptDir"] = script_dir + define_or_guess2( + c, + "ilamb_obs", + os.path.join(c["diagnostics_base_path"], "ilamb_data"), + ParameterGuessType.PATH_GUESS, ) + # List of dependencies + determine_and_add_dependencies(c, dependencies, script_dir) + prefix: str = f"ilamb_{c['year1']:04d}-{c['year2']:04d}" c["prefix"] = prefix print(prefix) - scriptFile = os.path.join(scriptDir, "%s.bash" % (prefix)) - statusFile = os.path.join(scriptDir, "%s.status" % (prefix)) - settingsFile = os.path.join(scriptDir, "%s.settings" % (prefix)) - skip = checkStatus(statusFile) + bash_file, settings_file, status_file = get_file_names(script_dir, prefix) + skip: bool = check_status(status_file) if skip: continue - # Create script - with open(scriptFile, "w") as f: + with open(bash_file, "w") as f: f.write(template.render(**c)) - makeExecutable(scriptFile) - - with open(settingsFile, "w") as sf: - p = pprint.PrettyPrinter(indent=2, stream=sf) - p.pprint(c) - p.pprint(s) + make_executable(bash_file) + c["dependencies"] = dependencies + write_settings_file(settings_file, c, s) # Note --export=All is needed to make sure the executable is copied and executed on the nodes. export = "ALL" existing_bundles = handle_bundles( c, - scriptFile, + bash_file, export, dependFiles=dependencies, existing_bundles=existing_bundles, ) if not c["dry_run"]: - if c["bundle"] == "": # Submit job - submitScript( - scriptFile, - statusFile, + submit_script( + bash_file, + status_file, export, job_ids_file, dependFiles=dependencies, + fail_on_dependency_skip=c["fail_on_dependency_skip"], ) else: - print("...adding to bundle '%s'" % (c["bundle"])) + print("...adding to bundle '{c['bundle']}'") print(f" environment_commands={c['environment_commands']}") print_url(c, "ilamb") return existing_bundles + + +def determine_and_add_dependencies( + c: Dict[str, Any], dependencies: List[str], script_dir: str +) -> None: + define_or_guess2( + c, "ts_land_subsection", "land_monthly", ParameterGuessType.SECTION_GUESS + ) + add_dependencies( + dependencies, + script_dir, + "ts", + c["ts_land_subsection"], + c["year1"], + c["year2"], + c["ts_num_years"], + ) + if not c["land_only"]: + define_or_guess2( + c, + "ts_atm_subsection", + "atm_monthly_180x360_aave", + ParameterGuessType.SECTION_GUESS, + ) + add_dependencies( + dependencies, + script_dir, + "ts", + c["ts_atm_subsection"], + c["year1"], + c["year2"], + c["ts_num_years"], + ) diff --git a/zppy/mpas_analysis.py b/zppy/mpas_analysis.py index 40a1c664..8355a13c 100644 --- a/zppy/mpas_analysis.py +++ b/zppy/mpas_analysis.py @@ -1,64 +1,57 @@ -import os -import pprint +from typing import Any, Dict, List, Tuple -import jinja2 +from configobj import ConfigObj from zppy.bundle import handle_bundles from zppy.utils import ( - checkStatus, - getTasks, - getYears, - makeExecutable, + check_status, + get_file_names, + get_tasks, + get_years, + initialize_template, + make_executable, print_url, - submitScript, + submit_script, + write_settings_file, ) # ----------------------------------------------------------------------------- -def mpas_analysis(config, scriptDir, existing_bundles, job_ids_file): +def mpas_analysis(config: ConfigObj, script_dir: str, existing_bundles, job_ids_file): - # Initialize jinja2 template engine - templateLoader = jinja2.FileSystemLoader( - searchpath=config["default"]["templateDir"] - ) - templateEnv = jinja2.Environment(loader=templateLoader) - template = templateEnv.get_template("mpas_analysis.bash") + template, _ = initialize_template(config, "mpas_analysis.bash") # --- List of mpas_analysis tasks --- - tasks = getTasks(config, "mpas_analysis") + tasks: List[Dict[str, Any]] = get_tasks(config, "mpas_analysis") if len(tasks) == 0: return existing_bundles # --- Generate and submit mpas_analysis scripts --- - # MPAS-Analysis uses a shared output directory, so only a single # job should run at once. To gracefully handle this, we make each - # MAPS-Analysis task dependant on all previous ones. This may not + # MPAS-Analysis task dependant on all previous ones. This may not # be 100% fool-proof, but should be a reasonable start - dependencies = [] - for c in tasks: + # Dependencies carried over from previous task. + carried_over_dependencies: List[str] = [] - if config["mpas_analysis"]["shortTermArchive"]: - c["subdir_ocean"] = "/archive/ocn/hist" - c["subdir_ice"] = "/archive/ice/hist" - else: - c["subdir_ocean"] = "/run" - c["subdir_ice"] = "/run" + for c in tasks: + dependencies: List[str] = carried_over_dependencies + set_subdirs(config, c) # Loop over year sets - ts_year_sets = getYears(c["ts_years"]) + ts_year_sets: List[Tuple[int, int]] = get_years(c["ts_years"]) + climo_year_sets: List[Tuple[int, int]] + enso_year_sets: List[Tuple[int, int]] if c["climo_years"] != [""]: - climo_year_sets = getYears(c["climo_years"]) + climo_year_sets = get_years(c["climo_years"]) else: climo_year_sets = ts_year_sets if c["enso_years"] != [""]: - enso_year_sets = getYears(c["enso_years"]) + enso_year_sets = get_years(c["enso_years"]) else: enso_year_sets = ts_year_sets - for s, rs, es in zip(ts_year_sets, climo_year_sets, enso_year_sets): - c["ts_year1"] = s[0] c["ts_year2"] = s[1] if ("last_year" in c.keys()) and (c["ts_year2"] > c["last_year"]): @@ -71,49 +64,34 @@ def mpas_analysis(config, scriptDir, existing_bundles, job_ids_file): c["enso_year2"] = es[1] if ("last_year" in c.keys()) and (c["enso_year2"] > c["last_year"]): continue # Skip this year set - c["scriptDir"] = scriptDir + c["scriptDir"] = script_dir + prefix_suffix: str = ( + f"_ts_{c['ts_year1']:04d}-{c['ts_year2']:04d}_climo_{c['climo_year1']:04d}-{c['climo_year2']:04d}" + ) + prefix: str if c["subsection"]: - prefix = "mpas_analysis_%s_ts_%04d-%04d_climo_%04d-%04d" % ( - c["subsection"], - c["ts_year1"], - c["ts_year2"], - c["climo_year1"], - c["climo_year2"], - ) + prefix = f"mpas_analysis_{c['subsection']}{prefix_suffix}" else: - prefix = "mpas_analysis_ts_%04d-%04d_climo_%04d-%04d" % ( - c["ts_year1"], - c["ts_year2"], - c["climo_year1"], - c["climo_year2"], - ) + prefix = f"mpas_analysis{prefix_suffix}" print(prefix) c["prefix"] = prefix - scriptFile = os.path.join(scriptDir, "%s.bash" % (prefix)) - statusFile = os.path.join(scriptDir, "%s.status" % (prefix)) - settingsFile = os.path.join(scriptDir, "%s.settings" % (prefix)) - + bash_file, settings_file, status_file = get_file_names(script_dir, prefix) # Check if we can skip because it completed successfully before - skip = checkStatus(statusFile) + skip: bool = check_status(status_file) if skip: # Add to the dependency list - dependencies.append(statusFile) + carried_over_dependencies.append(status_file) continue - # Create script - with open(scriptFile, "w") as f: + with open(bash_file, "w") as f: f.write(template.render(**c)) - makeExecutable(scriptFile) - - with open(settingsFile, "w") as sf: - p = pprint.PrettyPrinter(indent=2, stream=sf) - p.pprint(c) - p.pprint(s) - + make_executable(bash_file) + c["dependencies"] = dependencies + write_settings_file(settings_file, c, s) export = "ALL" existing_bundles = handle_bundles( c, - scriptFile, + bash_file, export, dependFiles=dependencies, existing_bundles=existing_bundles, @@ -122,22 +100,32 @@ def mpas_analysis(config, scriptDir, existing_bundles, job_ids_file): if c["bundle"] == "": # Submit job - submitScript( - scriptFile, - statusFile, + submit_script( + bash_file, + status_file, export, job_ids_file, dependFiles=dependencies, + fail_on_dependency_skip=c["fail_on_dependency_skip"], ) # Note that this line should still be executed even if jobid == -1 # The later MPAS-Analysis tasks still depend on this task (and thus will also fail). # Add to the dependency list - dependencies.append(statusFile) + carried_over_dependencies.append(status_file) else: - print("...adding to bundle '%s'" % (c["bundle"])) + print(f"...adding to bundle {c['bundle']}") print(f" environment_commands={c['environment_commands']}") print_url(c, "mpas_analysis") return existing_bundles + + +def set_subdirs(config: ConfigObj, c: Dict[str, Any]) -> None: + if config["mpas_analysis"]["shortTermArchive"]: + c["subdir_ocean"] = "/archive/ocn/hist" + c["subdir_ice"] = "/archive/ice/hist" + else: + c["subdir_ocean"] = "/run" + c["subdir_ice"] = "/run" diff --git a/zppy/pcmdi_diags.py b/zppy/pcmdi_diags.py new file mode 100644 index 00000000..3c31e9b3 --- /dev/null +++ b/zppy/pcmdi_diags.py @@ -0,0 +1,158 @@ +import os +import pprint +from typing import List + +import jinja2 + +from zppy.bundle import handle_bundles +from zppy.utils import ( + add_dependencies, + checkStatus, + getTasks, + getYears, + makeExecutable, + print_url, + submitScript, +) + + +# ----------------------------------------------------------------------------- +def pcmdi_diags(config, scriptDir, existing_bundles, job_ids_file): + + # Initialize jinja2 template engine + templateLoader = jinja2.FileSystemLoader( + searchpath=config["default"]["templateDir"] + ) + templateEnv = jinja2.Environment(loader=templateLoader) + template = templateEnv.get_template("pcmdi_diags.bash") + + # --- List of pcmdi_diags tasks --- + tasks = getTasks(config, "pcmdi_diags") + if len(tasks) == 0: + return existing_bundles + + # --- Generate and submit pcmdi_diags scripts --- + dependencies: List[str] = [] + + for c in tasks: + + c["scriptDir"] = scriptDir + + if "ts_num_years" in c.keys(): + c["ts_num_years"] = int(c["ts_num_years"]) + + # procedure type for e3sm_to_cmip + c["cmor_tables_prefix"] = c["diagnostics_base_path"] + + # Loop over year sets + year_sets = getYears(c["ts_years"]) + if ("ref_years" in c.keys()) and (c["ref_years"] != [""]): + ref_year_sets = getYears(c["ref_years"]) + else: + ref_year_sets = year_sets + for s, rs in zip(year_sets, ref_year_sets): + c["year1"] = s[0] + c["year2"] = s[1] + if ("last_year" in c.keys()) and (c["year2"] > c["last_year"]): + continue # Skip this year set + c["ref_year1"] = rs[0] + c["ref_year2"] = rs[1] + if c["subsection"]: + c["sub"] = c["subsection"] + else: + c["sub"] = c["grid"] + # Make a guess for observation paths, if need be + if ("ts_num_years" in c.keys()) and (c["obs_ts"] == ""): + c[ + "obs_ts" + ] = f"{c['diagnostics_base_path']}/observations/Atm/time-series/" + if c["run_type"] == "model_vs_obs": + prefix = "pcmdi_diags_%s_%s_%04d-%04d" % ( + c["sub"], + c["tag"], + c["year1"], + c["year2"], + ) + elif c["run_type"] == "model_vs_model": + prefix = "pcmdi_diags_%s_%s_%04d-%04d_vs_%04d-%04d" % ( + c["sub"], + c["tag"], + c["year1"], + c["year2"], + c["ref_year1"], + c["ref_year2"], + ) + reference_data_path = ( + c["reference_data_path"].split("/post")[0] + "/post" + ) + if ("ts_num_years" in c.keys()) and (c["reference_data_path_ts"] == ""): + c[ + "reference_data_path_ts" + ] = f"{reference_data_path}/atm/{c['grid']}/cmip_ts/monthly" + else: + raise ValueError("Invalid run_type={}".format(c["run_type"])) + print(prefix) + c["prefix"] = prefix + scriptFile = os.path.join(scriptDir, "%s.bash" % (prefix)) + statusFile = os.path.join(scriptDir, "%s.status" % (prefix)) + settingsFile = os.path.join(scriptDir, "%s.settings" % (prefix)) + skip = checkStatus(statusFile) + if skip: + continue + + # Create script + with open(scriptFile, "w") as f: + f.write(template.render(**c)) + makeExecutable(scriptFile) + + # Iterate from year1 to year2 incrementing by the number of years per time series file. + if "ts_num_years" in c.keys(): + for yr in range(c["year1"], c["year2"], c["ts_num_years"]): + start_yr = yr + end_yr = yr + c["ts_num_years"] - 1 + if ( + ("mean_climate" in c["sets"]) + or ("variability_mode_atm" in c["sets"]) + or ("variability_mode_cpl" in c["sets"]) + or ("enso" in c["sets"]) + ): + add_dependencies( + dependencies, + scriptDir, + "ts", + "atm_monthly_180x360_aave", + start_yr, + end_yr, + c["ts_num_years"], + ) + with open(settingsFile, "w") as sf: + p = pprint.PrettyPrinter(indent=2, stream=sf) + p.pprint(c) + p.pprint(s) + + export = "ALL" + existing_bundles = handle_bundles( + c, + scriptFile, + export, + dependFiles=dependencies, + existing_bundles=existing_bundles, + ) + if not c["dry_run"]: + if c["bundle"] == "": + # Submit job + submitScript( + scriptFile, + statusFile, + export, + job_ids_file, + dependFiles=dependencies, + ) + + else: + print("...adding to bundle '%s'" % (c["bundle"])) + + print(f" environment_commands={c['environment_commands']}") + print_url(c, "pcmdi_diags") + + return existing_bundles diff --git a/zppy/tc_analysis.py b/zppy/tc_analysis.py index 52d7124c..652989d1 100644 --- a/zppy/tc_analysis.py +++ b/zppy/tc_analysis.py @@ -1,75 +1,64 @@ -import os -import pprint -from typing import List +from typing import Any, Dict, List, Tuple -import jinja2 +from configobj import ConfigObj from zppy.bundle import handle_bundles -from zppy.utils import checkStatus, getTasks, getYears, makeExecutable, submitScript +from zppy.utils import ( + check_status, + get_file_names, + get_tasks, + get_years, + initialize_template, + make_executable, + submit_script, + write_settings_file, +) # ----------------------------------------------------------------------------- -def tc_analysis(config, scriptDir, existing_bundles, job_ids_file): +def tc_analysis(config: ConfigObj, script_dir: str, existing_bundles, job_ids_file): - # Initialize jinja2 template engine - templateLoader = jinja2.FileSystemLoader( - searchpath=config["default"]["templateDir"] - ) - templateEnv = jinja2.Environment(loader=templateLoader) - template = templateEnv.get_template("tc_analysis.bash") + template, _ = initialize_template(config, "tc_analysis.bash") # --- List of tasks --- - tasks = getTasks(config, "tc_analysis") + tasks: List[Dict[str, Any]] = get_tasks(config, "tc_analysis") if len(tasks) == 0: return existing_bundles # --- Generate and submit scripts --- - - # There is a `GenerateConnectivityFile: error while loading shared libraries: libnetcdf.so.11: cannot open shared object file: No such file or directory` error - # when multiple year_sets are run simultaneously. Therefore, we will wait for the completion of one year_set before moving on to the next. - dependencies: List[str] = [] - for c in tasks: + dependencies: List[str] = [] + # Loop over year sets - year_sets = getYears(c["years"]) + year_sets: List[Tuple[int, int]] = get_years(c["years"]) for s in year_sets: c["year1"] = s[0] c["year2"] = s[1] if ("last_year" in c.keys()) and (c["year2"] > c["last_year"]): continue # Skip this year set - c["scriptDir"] = scriptDir + c["scriptDir"] = script_dir if c["input_files"]: c["atm_name"] = c["input_files"].split(".")[0] else: raise ValueError("No value was given for `input_files`.") - prefix = "tc_analysis_%04d-%04d" % ( - c["year1"], - c["year2"], - ) + prefix = f"tc_analysis_{c['year1']:04d}-{c['year2']:04d}" print(prefix) c["prefix"] = prefix - scriptFile = os.path.join(scriptDir, "%s.bash" % (prefix)) - statusFile = os.path.join(scriptDir, "%s.status" % (prefix)) - settingsFile = os.path.join(scriptDir, "%s.settings" % (prefix)) - skip = checkStatus(statusFile) + bash_file, settings_file, status_file = get_file_names(script_dir, prefix) + skip: bool = check_status(status_file) if skip: continue - # Create script - with open(scriptFile, "w") as f: + with open(bash_file, "w") as f: f.write(template.render(**c)) - makeExecutable(scriptFile) - - with open(settingsFile, "w") as sf: - p = pprint.PrettyPrinter(indent=2, stream=sf) - p.pprint(c) - p.pprint(s) - + make_executable(bash_file) + c["dependencies"] = dependencies + write_settings_file(settings_file, c, s) export = "NONE" existing_bundles = handle_bundles( c, - scriptFile, + bash_file, export, dependFiles=dependencies, existing_bundles=existing_bundles, @@ -77,20 +66,16 @@ def tc_analysis(config, scriptDir, existing_bundles, job_ids_file): if not c["dry_run"]: if c["bundle"] == "": # Submit job - submitScript( - scriptFile, - statusFile, + submit_script( + bash_file, + status_file, export, job_ids_file, dependFiles=dependencies, + fail_on_dependency_skip=c["fail_on_dependency_skip"], ) - - # Note that this line should still be executed even if jobid == -1 - # The later tc_analysis tasks still depend on this task (and thus will also fail). - # Add to the dependency list - dependencies.append(statusFile) else: - print("...adding to bundle '%s'" % (c["bundle"])) + print(f"...adding to bundle {c['bundle']}") print(f" environment_commands={c['environment_commands']}") diff --git a/zppy/templates/coupled_global.py b/zppy/templates/coupled_global.py index 762e7724..9fc41401 100644 --- a/zppy/templates/coupled_global.py +++ b/zppy/templates/coupled_global.py @@ -156,6 +156,7 @@ def get_ylim(standard_range, extreme_values): # ----------------------------------------------------------------------------- # Plotting functions + # 1 def plot_net_toa_flux_restom(ax, xlim, exps, rgn): print("Plot 1: plot_net_toa_flux_restom") @@ -748,24 +749,36 @@ def run(parameters, rgn): # noqa: C901 use_ocn = plots_ocn or (not atmosphere_only and has_original_ocn_plots) exps: List[Dict[str, Any]] = [ { - "atmos": f"{case_dir}/post/atm/glb/ts/monthly/{ts_num_years}yr/" - if use_atmos - else None, - "ice": f"{case_dir}/post/ice/glb/ts/monthly/{ts_num_years}yr/" - if plots_ice - else None, - "land": f"{case_dir}/post/lnd/glb/ts/monthly/{ts_num_years}yr/" - if plots_lnd - else None, - "ocean": f"{case_dir}/post/ocn/glb/ts/monthly/{ts_num_years}yr/" - if use_ocn - else None, - "moc": f"{case_dir}/post/ocn/glb/ts/monthly/{ts_num_years}yr/" - if use_ocn - else None, - "vol": f"{case_dir}/post/ocn/glb/ts/monthly/{ts_num_years}yr/" - if use_ocn - else None, + "atmos": ( + f"{case_dir}/post/atm/glb/ts/monthly/{ts_num_years}yr/" + if use_atmos + else None + ), + "ice": ( + f"{case_dir}/post/ice/glb/ts/monthly/{ts_num_years}yr/" + if plots_ice + else None + ), + "land": ( + f"{case_dir}/post/lnd/glb/ts/monthly/{ts_num_years}yr/" + if plots_lnd + else None + ), + "ocean": ( + f"{case_dir}/post/ocn/glb/ts/monthly/{ts_num_years}yr/" + if use_ocn + else None + ), + "moc": ( + f"{case_dir}/post/ocn/glb/ts/monthly/{ts_num_years}yr/" + if use_ocn + else None + ), + "vol": ( + f"{case_dir}/post/ocn/glb/ts/monthly/{ts_num_years}yr/" + if use_ocn + else None + ), "name": experiment_name, "yoffset": 0.0, "yr": ([year1, year2],), diff --git a/zppy/templates/default.ini b/zppy/templates/default.ini old mode 100644 new mode 100755 index 1e456999..daf2138d --- a/zppy/templates/default.ini +++ b/zppy/templates/default.ini @@ -25,10 +25,18 @@ e3sm_to_cmip_environment_commands = string(default="") # Set up the environment -- this is where you can tell zppy to use a custom conda environment. # To use a custom conda environment, you can set `environment_commands="source ; conda activate "`. environment_commands = string(default="") +# If set to True, zppy will fail as soon as a job is unable to launch because of a missing dependency. +# If set to False, zppy will launch other jobs, if possible. +fail_on_dependency_skip = boolean(default=False) # The frequency of the data. Options include "monthly", "diurnal_8xdaily" frequency = string(default="monthly") # The grid to use grid = string(default="") +# These two parameters enable zppy to guess path or section parameters. +# This allows users to set fewer parameters, but with the risk of zppy choosing incorrect values for them. +# Set to False for more transparency in path or section defintions. +guess_path_parameters = boolean(default=True) +guess_section_parameters = boolean(default=True) # The directory to be post-processed # NOTE: no default, must be provided by user input = string @@ -99,9 +107,11 @@ input_component = string(default="") [ts] area_nm = string(default="area") cmip_metadata = string(default="e3sm_to_cmip/default_metadata.json") +cmip_plevdata = string(default="e3sm_to_cmip/vrt_remap_plev19.nc") # Days per file dpf = integer(default=30) extra_vars = string(default="") +interp_vars = string(default="U,V,T,Q,RELHUM,OMEGA,Z3") # Time-steps per day tpd = integer(default=1) ts_fmt = string(default="ts_only") @@ -111,11 +121,13 @@ input_component = string(default="") [[__many__]] area_nm = string(default=None) cmip_metadata = string(default=None) + cmip_plevdata = string(default=None) dpf = integer(default=None) extra_vars = string(default=None) tpd = integer(default=None) ts_fmt = string(default=None) input_component = string(default=None) + interp_vars = string(default=None) [tc_analysis] # NOTE: always overrides value in [default] @@ -123,11 +135,222 @@ input_files = string(default="eam.h2") # The scratch directory scratch = string(default="") +[pcmdi_diags] +backend = string(default="mpl") +cfg = string(default="") +# File of cmip variable lists (cmip6 convention) +cmip_variables = string(default="pcmdi_diags/cmip_variables.json") +# File of specified regions for mean climate calculation +regions_specs = string(default="pcmdi_diags/regions_specs.json") +# File of derived variables +derived_variable = string(default="pcmdi_diags/derived_variable.json") +# File of observation data name for mean climate calculation +reference_alias = string(default="pcmdi_diags/reference_alias.json") +# File of fuction to generate land/sea mask +process_sftlf = string(default="pcmdi_diags/generate_sftlf.py") +# File of fuction to generate mean climate metrics figure +clim_plot_parser = string(default="pcmdi_diags/mean_climate_plot_parser.py") +# File of module to plot mean climate metrics figure +clim_plot_driver = string(default="pcmdi_diags/mean_climate_plot_driver.py") +# Path to observation time-series data +# Required for "mean_climate","variability_mode","enso" +obs_ts = string(default="") +# observational data sets (see reference_alias.json) +# observation data tag in reference_alias +obs_sets = string(default="default") +# options specific for constructing pcmdi preferred file name conventions +# required for "model_vs_obs" comparison +cmip_name = string(default="e3sm.historical.v3-LR.0051") +# required for "model_vs_model" comparison +cmip_name_ref = string(default="e3sm.historical.v3-LR.0051") +# options shared by pcmdi +pmp_debug = string(default=False) +# flag to process the land/sea mask within pcmdi +generate_sftlf = string(default=True) +# variables to be used by the pcmdi diagnostics +# needs to setup for each subsections +vars = string(default="") +# sets of diagnostics from pcmdi package +sets = string_list(default=list("mean_climate","variability_mode_atm","variability_mode_cpl","enso")) +# options to identify subset of pcmdi drivers ("mean_climate","variability_mode","enso") +subset = string(default="") +#options for cmip model metrics data from pcmdi +#group of pcmdi generated cmip metrics data (mip.exp.version) +pcmdi_data_set=string(default="cmip6.historical.v20220928") +# path to pcmdi generated cmip metrics data +pcmdi_data_path=string(default="") +########################################################################################## +# below followed the setup in e3sm_diag but used for PCMDI workflow +########################################################################################## +# See url +multiprocessing = boolean(default=True) +# See url +num_workers = integer(default=24) +# See url +figure_format = string(default="png") +# comparision type (same as e3sm_diag) +run_type = string(default="model_vs_obs") +# Used to label the results directory +# Options are "model_vs_obs" and "model_vs_model" +tag = string(default="model_vs_obs") +########################################################################################### +# Required for run_type="model_vs_model" runs, different from e3sm_diag, +# model_vs_model in pcmdi referred to the comparision of two model simulations +# with observations and cmip models. +########################################################################################### +#path for reference model data (time series) +reference_data_path_ts = string(default="") +# pcmdi_diags.py will set to match `years` if not specified +ref_years = string_list(default=list("")) +# End year (i.e., the last year to use) for the reference data +ref_end_yr = string(default="") +# Final year (i.e., the last available year) for the reference data +ref_final_yr = string(default="") +# Start year for the reference data +ref_start_yr = string(default="") +# reference model name +ref_name = string(default="") +# The years increment for reference data +ts_num_years_ref = integer(default=5) +# Set to true to swap test and ref when run_type="model_vs_model" +swap_test_ref = boolean(default=False) +########################################################################################## +# options for pcmdi mode varibility diagnostics +# vars = "psl" for atm_modes +# vars = "ts" for cpl_modes +######################################################################################### +#name of atmospheric modes varibility +atm_modes = string_list(default=list("NAM","NAO","PNA","NPO","SAM","PSA1","PSA2")) +#name of coupled modes varibility +cpl_modes = string_list(default=list("PDO","NPGO","AMO")) +#keywards for unit conversion in pcmdi (model) +ModUnitsAdjust = string(default="") +#keywards for unit conversion in pcmdi (observation) +ObsUnitsAdjust = string(default="") +#frequency of the model data +frequency = string(default="mo") +#options specific for mode varibility metrics in pcmdi +seasons = string(default="monthly") +landmask = string(default=False) +RmDomainMean = string(default=True) +EofScaling = string(default=False) +ConvEOF = string(default=True) +CBF = string(default=True) +cmec = string(default=True) +update_json = string(default=False) +plot_obs = string(default=True) +plot = string(default=True) +nc_out_obs = string(default=True) +nc_out = string(default=True) +########################################################################################## +# options for pcmdi enso diagnostics +# vars = "psl,pr,prsn,ts,tas,tauu,tauv,hflx,hfss,rlds,rsds,rlus,rlut,rsdt" +########################################################################################## +groups = string_list(default=list("ENSO_perf","ENSO_proc","ENSO_tel")) +########################################################################################## +# optional for mean climate diagnostics +# vars = "pr,prw,psl,rlds,rldscs,rltcre,rstcre,rlut,rlutcs,rsds,rsdscs,rsdt,rsus,rsuscs, +# rlus,rsut,rtmt,sfcWind,tas,tauu,tauv,ts,ta-200,ta-850,ua-200,ua-850,va-200, +# va-850,zg-500" +########################################################################################## +# model data grid after remapping +grid = string(default="180x360_aave") +#flag to turn on regional mean climate metrics +regional = string(default="y") +#default regions for mean climate metrics data +regions = string(default="global,ocean,land,NHEX,SHEX,TROPICS,NHEX_ocean,SHEX_ocean,NHEX_land,SHEX_land,ocean_50S50N") +# save derived climatology data +save_test_clims = string(default=True) +# method to determine the way to process mean climate data +# default used nco instead of pcmdi built-in function +climatology_process_method = string(default="nco") +# Regridding by pcmdi (default is to regrid data to 2.5x2.5 grid for diagnostic metrics) +# Required for mean climate +# OPTIONS: '2.5x2.5' or an actual cdms2 grid object +target_grid = string(default="2.5x2.5") +# OPTIONS: String for description on the selected grid +target_grid_string = string(default="2p5x2p5") +# OPTIONS: 'regrid2','esmf' +regrid_tool = string(default="esmf") +# OPTIONS: 'linear','conservative', only if tool is esmf +regrid_method = string(default="regrid2") +# OPTIONS: 'linear','conservative', only if tool is esmf +regrid_method_ocn = string(default="conservative") +# setup for parallel coordinate plots (hide makers for sigle model) +parcord_show_markers = string(default=False) +# setup for portrait plots (add vertical line to separate test and reference models) +portrait_vertical_line = string(default=True) + + [[__many__]] + backend = string(default=None) + cfg = string(default=None) + vars = string(default=None) + grid = string(default=None) + cmip_metadata = string(default=None) + cmip_variables = string(default=None) + pcmdi_data_set = string(default=None) + pcmdi_data_path = string(default=None) + derived_variable = string(default=None) + reference_alias = string(default=None) + regions_specs = string(default=None) + process_sftlf = string(default=None) + multiprocessing = boolean(default=None) + num_workers = integer(default=None) + obs_ts = string(default=None) + figure_format = string(default=None) + ref_end_yr = string(default=None) + ref_final_yr = string(default=None) + ref_name = string(default=None) + ref_start_yr = string(default=None) + ref_years = string_list(default=None) + reference_data_path_ts = string(default=None) + run_type = string(default=None) + sets = string_list(default=None) + swap_test_ref = boolean(default=None) + tag = string(default=None) + ts_num_years_ref = integer(default=None) + climatology_process_method = string(default=None) + target_grid = string(default=None) + target_grid_string = string(default=None) + regrid_tool = string(default=None) + regrid_method = string(default=None) + regrid_method_ocn = string(default=None) + obs_sets = string(default=None) + regions = string(default=None) + regional = string(default=None) + save_test_clims = string(default=None) + seasons = string(default=None) + RmDomainMean = string(default=None) + EofScaling = string(default=None) + ConvEOF = string(default=None) + CBF = string(default=None) + cmec = string(default=None) + update_json = string(default=None) + subset = string(default=None) + landmask = string(default=None) + frequency = string(default=None) + generate_sftlf = string(default=None) + atm_modes = string_list(default=None) + cpl_modes = string_list(default=None) + groups = string_list(default=None) + ModUnitsAdjust = string(default=None) + ObsUnitsAdjust = string(default=None) + cmip_name = string(default=None) + cmip_name_ref = string(default=None) + pmp_debug = string(default=None) + nc_out_obs = string(default=None) + nc_out = string(default=None) + plot_obs = string(default=None) + plot = string(default=None) + parcord_show_markers = string(default=None) + portrait_vertical_line = string(default=None) + [e3sm_diags] # See https://e3sm-project.github.io/e3sm_diags/_build/html/master/available-parameters.html backend = string(default="mpl") cfg = string(default="") # Name of the frequency from `[climo]` to use for "diurnal_cycle" runs +# Required for "diurnal_cycle" runs climo_diurnal_frequency = string(default="") # Name of the subsection of `[climo]` to use for "diurnal_cycle" runs climo_diurnal_subsection = string(default="") @@ -154,16 +377,18 @@ output_format = string_list(default=list("png")) # See https://e3sm-project.github.io/e3sm_diags/_build/html/master/available-parameters.html output_format_subplot = string_list(default=list()) # End year (i.e., the last year to use) for the reference data +# Required for "tropical_subseasonal" runs ref_end_yr = string(default="") # Final year (i.e., the last available year) for the reference data # Required for "qbo" runs -# Required for run_type="model_vs_model" "enso_diags"/"streamflow"/"tc_analysis" runs +# Required for run_type="model_vs_model" "enso_diags"/"streamflow"/"tc_analysis"/"tropical_subseasonal" runs ref_final_yr = string(default="") # See https://e3sm-project.github.io/e3sm_diags/_build/html/master/available-parameters.html # Required for run_type="model_vs_model" ref_name = string(default="") # Start year for the reference data -# Required for "enso_diags"/"qbo"/"streamflow"/"tc_analysis" runs +# Required for "enso_diags"/"qbo" runs +# Required for run_type="model_vs_model" "tropical_subseasonal"/"streamflow"/"tc_analysis" runs ref_start_yr = string(default="") # The ref years to run; "1:100:20" would mean process years 1-100 in 20-year increments # Recommended for run_type="model_vs_model" @@ -181,32 +406,18 @@ reference_data_path_tc = string(default="") # `reference_data_path` but for "enso_diags"/"qbo"/"area_mean_time_series" runs # Required for run_type="model_vs_model" "enso_diags"/"qbo"/"area_mean_time_series" runs reference_data_path_ts = string(default="") -# Required for "tropical_subseasonal" runs +# Required for run_type="model_vs_model" "tropical_subseasonal" runs reference_data_path_ts_daily = string(default="") # `reference_data_path` but for "streamflow" runs # Required for run_type="model_vs_model" "streamflow" runs reference_data_path_ts_rof = string(default="") # See https://e3sm-project.github.io/e3sm_diags/_build/html/master/available-parameters.html -# Some parameters are required if run_type="model_vs_model": `diff_title`, `ref_name`, `reference_data_path`, `short_ref_name` -# Required for "model_vs_model" "area_mean_time_series" runs: `reference_data_path_ts`, `ts_num_years_ref`, `ts_subsection` -# Required for "model_vs_model" "diurnal_cycle" runs: `reference_path_climo_diurnal` -# Required for "model_vs_model" "enso_diags" runs: `ref_final_yr`, `ref_start_yr`, `reference_data_path_ts`, `ts_num_years_ref`, `ts_subsection` -# Required for "model_vs_model" "qbo" runs: `reference_data_path_ts`, `ts_num_years_ref`, `ts_subsection` -# Required for "model_vs_model" "streamflow" runs: `gauges_path`, `ref_final_yr`, `ref_start_yr`, `reference_data_path_ts_rof`, `ts_num_years_ref`, `ts_subsection` -# Required for "model_vs_model" "tc_analysis" runs: `ref_final_yr`, `ref_start_yr`, `reference_diurnal_path_tc` -# Required for "model_vs_model" "tropical_subseasonal" runs: `ref_final_yr`, `ref_start_yr`, `reference_data_path_ts_daily`, `ts_num_years_ref`, `ts_subsection` +# Some additional parameters are required if run_type="model_vs_model" +# Search for `Required for run_type="model_vs_model"` in this file. run_type = string(default="model_vs_obs") # The sets to run -# All available sets (16) = "aerosol_aeronet","aerosol_budget","annual_cycle_zonal_mean","area_mean_time_series","cosp_histogram","diurnal_cycle","enso_diags","lat_lon","meridional_mean_2d","polar","qbo","streamflow","tc_analysis","zonal_mean_2d","zonal_mean_2d_stratosphere","zonal_mean_xy" -# -# A subset of these are provided as a default below. These 10 sets can be run as long as standard climo files are generated. -# -# The 6 additional sets can be included if the appropriate input is available: "area_mean_time_series","diurnal_cycle","enso_diags","qbo","streamflow","tc_analysis" -# 3 of these require time-series data and for `obs_ts` and `ref_start_yr` to be set: "area_mean_time_series","enso_diags","qbo" -# "diurnal_cycle" requires `climo_diurnal_subsection`, `climo_diurnal_frequency`, and `dc_obs_climo` to be set. -# "qbo" requires `ref_final_yr` to be set. -# "streamflow" requires `streamflow_obs_ts` to be set. -# "tc_analysis" requires `tc_obs` to be set. +# All available sets (17) = "aerosol_aeronet","aerosol_budget","annual_cycle_zonal_mean","area_mean_time_series","cosp_histogram","diurnal_cycle","enso_diags","lat_lon","meridional_mean_2d","polar","qbo","streamflow","tc_analysis", "tropical_subseasonal", "zonal_mean_2d","zonal_mean_2d_stratosphere","zonal_mean_xy" +# To find the parameters required for a set, search for the set's name in this file. # The order of the `sets` list is the order the sets will show up in E3SM Diags. # `sets` below are ordered by 1) core or speciality and then 2) older to newer. sets = string_list(default=list("lat_lon","zonal_mean_xy","zonal_mean_2d","polar","cosp_histogram","meridional_mean_2d","annual_cycle_zonal_mean","zonal_mean_2d_stratosphere","aerosol_aeronet","aerosol_budget")) @@ -225,10 +436,10 @@ tag = string(default="model_vs_obs") # Path to observation data for "tc_analysis" runs tc_obs = string(default="") # The years increment for reference data -# Required for run_type="model_vs_model" "enso_diags"/"qbo"/"area_mean_time_series/streamflow" runs +# Required for run_type="model_vs_model" "enso_diags"/"qbo"/"area_mean_time_series"/"tropical_subseasonal"/"streamflow" runs ts_num_years_ref = integer(default=5) -# Name of the `[ts]` subtask for "enso_diags"/"qbo"/"area_mean_time_series"/"streamflow" runs -# Required for run_type="model_vs_model" "enso_diags"/"qbo"/"area_mean_time_series"/"streamflow" runs +# Name of the `[ts]` subtask to depend on +# Required for run_type="model_vs_model" "enso_diags"/"qbo"/"area_mean_time_series"/"tropical_subseasonal"/"streamflow" runs ts_subsection = string(default="") # Required for "tropical_subseasonal" run ts_daily_subsection = string(default="") @@ -335,8 +546,8 @@ cfg = string(default="ilamb/cmip.cfg") ilamb_obs = string(default="") # for land_only run land_only = boolean(default=False) -ts_atm_subsection = string(default="atm_monthly_180x360_aave") -ts_land_subsection = string(default="land_monthly") +ts_atm_subsection = string(default="") +ts_land_subsection = string(default="") # Name of the grid used by the relevant `[ts]` `atm` task ts_atm_grid = string(default="180x360_aave") # Name of the grid used by the relevant `[ts]` `land` task diff --git a/zppy/templates/e3sm_diags.bash b/zppy/templates/e3sm_diags.bash index b32eed54..b94dbd43 100644 --- a/zppy/templates/e3sm_diags.bash +++ b/zppy/templates/e3sm_diags.bash @@ -207,10 +207,12 @@ create_links_ts ${ts_dir_source} ${ts_dir_ref} ${ref_Y1} ${ref_Y2} 6 {%- endif %} {%- endif %} +{%- if "tropical_subseasonal" in sets %} ts_daily_dir={{ output }}/post/atm/{{ grid }}/ts/daily/{{ '%dyr' % (ts_num_years) }} {% if run_type == "model_vs_model" %} ts_daily_dir_ref={{ reference_data_path_ts_daily }}/{{ ts_num_years_ref }}yr {%- endif %} +{%- endif %} {%- if "streamflow" in sets %} {% if run_type == "model_vs_obs" %} @@ -227,9 +229,6 @@ create_links_ts_rof ${ts_rof_dir_source} ${ts_rof_dir_ref} ${ref_Y1} ${ref_Y2} 8 {%- endif %} {%- endif %} -{% if run_type == "model_vs_obs" %} -ref_name={{ ref_name }} -{%- endif %} {% if (run_type == "model_vs_model") and keep_mvm_case_name_in_fig %} ref_name={{ ref_name }} {%- endif %} @@ -390,7 +389,7 @@ trop_param.short_ref_name = '{{ short_ref_name }}' ref_start_yr = {{ ref_start_yr }} ref_end_yr = {{ ref_end_yr }} trop_param.ref_start_yr = f'{ref_start_yr:04}' -trop_param.ref_end_yr = f'{ref_final_yr:04}' +trop_param.ref_end_yr = f'{ref_end_yr:04}' # Optionally, swap test and reference model if {{ swap_test_ref }}: trop_param.test_data_path, trop_param.reference_data_path = trop_param.reference_data_path, trop_param.test_data_path @@ -479,7 +478,6 @@ params.append(dc_param) {%- if "streamflow" in sets %} streamflow_param = StreamflowParameter() -streamflow_param.reference_data_path = '{{ streamflow_obs_ts }}' streamflow_param.test_data_path = '${ts_rof_dir_primary}' streamflow_param.test_name = short_name streamflow_param.test_start_yr = start_yr diff --git a/zppy/templates/e3sm_to_cmip/default_metadata.json b/zppy/templates/e3sm_to_cmip/default_metadata.json old mode 100644 new mode 100755 diff --git a/zppy/templates/e3sm_to_cmip/vrt_remap_plev19.nc b/zppy/templates/e3sm_to_cmip/vrt_remap_plev19.nc new file mode 100755 index 00000000..2b71cc29 Binary files /dev/null and b/zppy/templates/e3sm_to_cmip/vrt_remap_plev19.nc differ diff --git a/zppy/templates/global_time_series.bash b/zppy/templates/global_time_series.bash old mode 100644 new mode 100755 diff --git a/zppy/templates/pcmdi_diags.bash b/zppy/templates/pcmdi_diags.bash new file mode 100755 index 00000000..07e9a250 --- /dev/null +++ b/zppy/templates/pcmdi_diags.bash @@ -0,0 +1,1544 @@ +#!/bin/bash +{% include 'slurm_header.sh' %} + +{{ environment_commands }} + +# Turn on debug output if needed +debug={{ debug }} +if [[ "${debug,,}" == "true" ]]; then + set -x +fi + +# Need this setup as otherwise can not generate diagnostics +export UCX_SHM_DEVICES=all # or not set UCX_NET_DEVICES at all + +# Make sure UVCDAT doesn't prompt us about anonymous logging +export UVCDAT_ANONYMOUS_LOG=False + +# Script dir +cd {{ scriptDir }} + +# Get jobid +id=${SLURM_JOBID} + +# Update status file +STARTTIME=$(date +%s) +echo "RUNNING ${id}" > {{ prefix }}.status + +# Basic definitions +case="{{ case }}" +www="{{ www }}" +y1={{ year1 }} +y2={{ year2 }} +Y1="{{ '%04d' % (year1) }}" +Y2="{{ '%04d' % (year2) }}" +{% if run_type == "model_vs_model" %} +ref_Y1="{{ '%04d' % (ref_year1) }}" +ref_Y2="{{ '%04d' % (ref_year2) }}" +{%- endif %} +run_type="{{ run_type }}" +tag="{{ tag }}" + +results_dir=${tag}_${Y1}-${Y2} + +ref_name={{ ref_name }} + +#info for pcmdi specific data structure +case_id=v$(date '+%Y%m%d') + +# Create temporary workdir +workdir=`mktemp -d tmp.${id}.XXXX` +cd ${workdir} + +# Create results directory +if [ ! -d ${results_dir} ];then + mkdir -p ${results_dir} +fi +#directory to save land/sea mask generated by pcmdi +fixed_dir="${results_dir}/fixed" +if [ ! -d ${fixed_dir} ];then + mkdir -p ${fixed_dir} +fi + +# Prepare data files for pcmdi diagnostics, which is achieved by two steps: +# (a) convert e3sm output to cmip type, which used the "e3sm_to_cmip" function +# available at zppy (modifications are made to process more variables and +# 3D fileds at fixed pressure levels). +# (b) locate observations in e3sm diagnostics and convert them to the pcmdi preferred +# data format +#file to specify reference data used to derive the diagnostic metrics +cat > reference_alias.json << EOF +{% include reference_alias %} +EOF +#regions specified to derive global/regional mean metrics +cat > regions_specs.json << EOF +{% include regions_specs %} +EOF +#file include derived variables +cat > derived_variable.json << EOF +{% include derived_variable %} +EOF +#file to genereate land/sea mask data if not available +cat > generate_sftlf.py << EOF +{% include process_sftlf %} +EOF + +{%- if ("mean_climate" in sets) %} +#file to genereate figures for mean climate metrics(temporary) +cat > mean_climate_plot_parser.py << EOF +{% include clim_plot_parser %} +EOF +#file to genereate figures for mean climate metrics(temporary) +cat > mean_climate_plot_driver.py << EOF +{% include clim_plot_driver %} +EOF +{%- endif %} + +# script for pcmdi pre-processing +cat > collect_data.py << EOF +import os +import subprocess +import time +import psutil +import json +import sys +import glob +import collections +import cdms2 +import gc +import numpy as np +from re import split +from itertools import chain +from shutil import copyfile +from subprocess import Popen, PIPE, call + +def childCount(): + current_process = psutil.Process() + children = current_process.children() + return(len(children)) + +def combine_time_series(variables, start_yr, end_yr, num_years, + cmip_name, dir_source, out_dic_file, outpath, + multiprocessing, num_workers): + #special case treatment (variables not in cmip cmor list) + altmod_dic = {"sst" : "ts", + "taux" : "tauu", + "tauy" : "tauv", + "rstcre" : "SWCF", + "rltcre" : "LWCF"} + # list of model data dictionary + var_list = []; lstcm0 = []; lstcm1 = [] + mod_out = collections.OrderedDict() + for key in variables: + if "_" in key or "-" in key: + var = split("_|-", key)[0] + else: + var = key + varin = var + if var in ["areacella", "sftlf", "orog"]: + fpaths = sorted(glob.glob(os.path.join(dir_source,var+"_*.nc"))) + for fpath in fpaths: + if os.path.exists(fpath): + output = os.path.join(outpath,"{}_fx_{}.nc".format(var,product)) + copyfile(fpath,output) + del(fpaths) + else: + fpaths = sorted(glob.glob(os.path.join(dir_source,varin+"_*.nc"))) + ######################################################################################### + #code below attempts to address special scenarios + if len(fpaths) < 1 and var in altmod_dic.keys(): + varin = altmod_dic.get(var,var) + if varin == "SWCF" or varin == "LWCF": + dir_source1 = "/".join(dir_source.split("/")[0:-2])+"/ts/monthly/{{ts_num_years}}yr" + fpaths = sorted(glob.glob(os.path.join(dir_source1,varin+"_*.nc"))) + else: + fpaths = sorted(glob.glob(os.path.join(dir_source,varin+"_*.nc"))) + ######################################################################################### + if len(fpaths) > 0: + tableId = fpaths[0].split("/")[-1].split("_")[1] + if tableId not in [ "Amon", "Lmon", "Omon", "SImon" ]: + tableId = "Amon" + yms = '{:04d}01'.format(start_yr) + yme = '{:04d}12'.format(end_yr) + fname = "{}.{}.{}.{}.{}.{}.{}-{}.nc".format( + cmip_name.split(".")[0], + cmip_name.split(".")[1], + cmip_name.split(".")[2].replace(".","-"), + cmip_name.split(".")[3], + tableId,var,yms,yme) + output = os.path.join(outpath,fname) + if (var not in var_list) or (not os.path.exists(output)): + var_list.append(var) + cmd_list = [] + cmd_list.append("ncrcat -v {} -d time,{}-01-01,{}-12-31".format(varin,yms[0:4],yme[0:4])) + for fpath in fpaths: + cmd_list.append(fpath) + cmd_list.append(output) + cdm0 = (" ".join(cmd_list)) + lstcm0.append(cdm0) + del(cmd_list,cdm0) + if varin != var: + cmd_extra = "ncrename -v {},{} {}".format(varin,var,output) + lstcm1.append(cmd_extra) + del(cmd_extra) + ############################################################ + #record the test model data information + mod_out[var] = { "mip" : cmip_name.split(".")[0], + "exp" : cmip_name.split(".")[1], + "model" : cmip_name.split(".")[2].replace(".","-"), + "realization": cmip_name.split(".")[3], + "tableId" : tableId, + "file_path" : output, + "template" : fname, + "start_yymm" : yms, + "end_yymm" : yme, + "varin" : varin } + del(tableId,yms,yme,fname,output) + del(fpaths) + del(var,varin) + gc.collect() + # Save test model data information required for next step + json.dump(mod_out, + open(out_dic_file, "w"), + sort_keys=True, + indent=4, + separators=(",", ": ")) + del(mod_out,variables,altmod_dic) + + #finally process the data in parallel + if not os.path.exists(outpath): + os.makedirs(outpath,mode=0o777) + lstall = list(chain(lstcm0,lstcm1)) + lensub = [len(lstcm0),len(lstcm1)] + lensub = np.cumsum(lensub) - 1 + print("Number of jobs starting is ", str(len(lstall))) + procs = [] + for i,p in enumerate(lstall): + print('running %s' % (str(p))) + proc = Popen(p, stdout=PIPE, shell=True) + if multiprocessing == True: + procs.append(proc) + while (childCount() > num_workers): + time.sleep(0.25) + [pp.communicate() for pp in procs] # this will get the exit code + procs = [] + else: + if (i == len(lstall)-1): + try: + outs, errs = proc.communicate() + if proc.returncode == 0: + print("stdout = {}; stderr = {}".format(str(outs),str(errs))) + else: + exit("ERROR: subprocess {} failed".format(str(lstall[i]))) + except: + break + else: + return_code = proc.communicate() + if return_code != 0: + exit("Failed to run {}".format(str(p))) + del(lstall,lensub,lstcm0,lstcm1) + + #set a delay to esure all process fully done + time.sleep(1) + print("done submitting") + + if len(var_list) > 0: + print("# of variables available for diagnostics: ", len(var_list)) + else: + exit("ERROR: can not found model variables to process....") + + return var_list + +def locate_ts_observation (variables, obs_sets, start_yr, end_yr, + input_path, out_dic_file, outpath, + multiprocessing, num_workers): + # fixed observational name convention to be consistent with cmip + mip = "obs"; realization = "00"; tableId = "Amon" + # special case treatment (these obs vars are inconsistent with cmor vars) + altobs_dic = { "pr" : "PRECT", + "sst" : "ts", + "sfcWind" : "si10", + "taux" : "tauu", + "tauy" : "tauv", + "rltcre" : "toa_cre_lw_mon", + "rstcre" : "toa_cre_sw_mon", + "rtmt" : "toa_net_all_mon"} + + # find and process observational data avaiable in e3sm_diags + var_list = []; lstcm0 = []; lstcm1 = [] + obs_dic = json.load(open(os.path.join('.','reference_alias.json'))) + obs_out = collections.OrderedDict() + for i,key in enumerate(variables): + if "_" in key or "-" in key: + var = key.split("_|-", var)[0] + else: + var = key + if len(obs_sets) != len(variables): + option = obs_sets[0] + else: + option = obs_sets[i] + if "default" in obs_sets or "alternate" in obs_sets: + obstag = obs_dic[var][option] + else: + inv_map = {v: k for k, v in obs_dic[var].items()} + if len(obs_sets) != len(variables): + obstag = obs_sets[0] + else: + obstag = obs_sets[i] + option = inv_map[obstag] + del(inv_map) + varin = var + if "ceres_ebaf" in obstag: + fpaths = sorted(glob.glob(os.path.join(input_path, + obstag.replace('ceres_ebaf','ceres_ebaf*'), + varin+"_*.nc"))) + if len(fpaths) < 1 and var in altobs_dic.keys(): + varin = altobs_dic.get(var,var) + fpaths = sorted(glob.glob(os.path.join(input_path, + obstag.replace('ceres_ebaf','ceres_ebaf*'), + varin+"_*.nc"))) + else: + fpaths = sorted(glob.glob(os.path.join(input_path,obstag,var+"_*.nc"))) + if len(fpaths) < 1 and var in altobs_dic.keys(): + varin = altobs_dic.get(var,var) + fpaths = sorted(glob.glob(os.path.join(input_path,obstag,varin+"_*.nc"))) + + if len(fpaths) > 0 and os.path.exists(fpaths[0]): + template = fpaths[0].split("/")[-1] + obsname = fpaths[0].split("/")[-2] + fyms = template.split("_")[-2][0:6] + fyme = template.split("_")[-1][0:6] + yms = '{:04d}{:02d}'.format(start_yr,1) + yme = '{:04d}{:02d}'.format(end_yr,12) + if int(yms) < int(fyms): + yms = fyms + if int(yme) > int(fyme): + yme = fyme + + #rename file following cmip-like convention + fname = "{}.{}.{}.{}.{}.{}.{}-{}.nc".format( + mip,option,obsname.replace(".","-"),realization,tableId,var,yms,yme) + output = os.path.join(outpath,fname) + if (var not in var_list) or (not os.path.exists(output)): + var_list.append(var) + cmd = "ncrcat -v {} -d time,{}-01-01,{}-12-31 {} {}".format( + varin,yms[0:4],yme[0:4],fpaths[0],output) + lstcm0.append(cmd); del(cmd) + if var != varin: + cmd_extra = "ncrename -v {},{} {}".format(varin,var,output) + lstcm1.append(cmd_extra) + del(cmd_extra) + + #record the observation information + obs_out[var] = { "mip" : mip, + "exp" : option, + "realization" : realization, + "tableId" : tableId, + "model" : obsname, + "file_path" : output, + "template" : fname, + "start_yymm" : yms, + "end_yymm" : yme, + "varin" : varin} + del(template,obsname,fyms,fyme,yms,yme,fname,output) + else : + print("warning: reference data not found for", var) + del(var,varin,option,obstag) + gc.collect() + + # Save observational information required for next step + json.dump(obs_out, + open(out_dic_file,"w"), + sort_keys=True, + indent=4, + separators=(",", ": ")) + del(obs_dic,obs_out,obs_sets,altobs_dic) + + #finally process the data in parallel + if not os.path.exists(outpath): + os.makedirs(outpath,mode=0o777) + lstall = list(chain(lstcm0,lstcm1)) + lensub = [len(lstcm0),len(lstcm1)] + lensub = np.cumsum(lensub) - 1 + print("Number of jobs starting is ", str(len(lstall))) + procs = [] + for i,p in enumerate(lstall): + print('running %s' % (str(p))) + proc = Popen(p, stdout=PIPE, shell=True) + if multiprocessing == True: + procs.append(proc) + while (childCount() > num_workers): + time.sleep(0.25) + [pp.communicate() for pp in procs] # this will get the exit code + procs = [] + else: + if (i == len(lstall)-1): + try: + outs, errs = proc.communicate() + if proc.returncode == 0: + print("stdout = {}; stderr = {}".format(str(outs),str(errs))) + else: + exit("ERROR: subprocess {} failed".format(str(lstall[i]))) + except: + break + else: + return_code = proc.communicate() + if return_code != 0: + exit("Failed to run {}".format(str(p))) + del(lstall,lensub,lstcm0,lstcm1) + + #set a delay to avoid delay in writing process + time.sleep(1) + print("done submitting") + + if len(var_list) > 0: + print("# of variables in observations: ", len(var_list)) + else: + exit("ERROR: can not found model variables to process....") + + return var_list + +def main(): + #basic information + start_yr = int('${Y1}') + end_yr = int('${Y2}') + num_years = end_yr - start_yr + 1 + + multiprocessing = {{multiprocessing}} + num_workers = {{num_workers}} + + # Model + # Test data directory +{% if run_type == "model_vs_obs" %} + test_data_dir = 'ts' +{% elif run_type == "model_vs_model" %} + test_data_dir = 'ts_test' +{%- endif %} + test_name = '${case}' + test_start_yr = start_yr + test_end_yr = end_yr + test_dir_source='{{ output }}/post/atm/{{ grid }}/cmip_ts/monthly' + + #info for pcmdi data structure + test_cmip_name = '{{cmip_name}}' + + #Ref +{% if run_type == "model_vs_obs" %} + # Obs + reference_dir_source = '{{ obs_ts }}' + ref_data_dir = 'ts_ref' + ref_start_yr = {{ ref_start_yr }} + ref_end_yr = ref_start_yr + num_years - 1 + if (ref_end_yr <= {{ ref_final_yr }}): + ref_end_yr = ref_end_yr + else: + ref_end_yr = {{ ref_final_yr }} +{% elif run_type == "model_vs_model" %} + # Reference + reference_dir_source = '{{ reference_data_path_ts }}' + ref_data_dir = 'ts_ref' + ref_name = '${ref_name}' + short_ref_name = '{{ short_ref_name }}' + ref_start_yr = {{ ref_start_yr }} + ref_end_yr = {{ ref_final_yr }} + #info for pcmdi data structure + ref_cmip_name = '{{ cmip_name_ref }}' + + # Optionally, swap test and reference model + if {{ swap_test_ref }}: + test_data_dir, ref_data_dir = ref_data_dir, test_data_dir + test_name, ref_name = ref_name, test_name + short_test_name, short_ref_name = short_ref_name, short_test_name + ref_cmip_name, test_cmip_name = test_cmip_name, ref_cmip_name +{%- endif %} + + ################################################################ + # process test model data for comparision + ################################################################ + # variable list in configuration file # + variables = list("{{ vars }}".split(",")) + print("process test model data for comparision") + test_dic_file = os.path.join("${results_dir}",'{}_{{sub}}_mon_catalogue.json'.format(test_data_dir)) + cmor_vars = combine_time_series(variables,test_start_yr,test_end_yr, + int({{ts_num_years}}),test_cmip_name, + test_dir_source,test_dic_file,test_data_dir, + multiprocessing,num_workers) + ################################################################ + # process reference data for comparison + ################################################################ + print("process reference obs/model data for comparision") +{% if run_type == "model_vs_obs" %} + obs_sets = list('{{ obs_sets }}'.split(",")) + refr_dic_file = os.path.join("${results_dir}",'{}_{{sub}}_mon_catalogue.json'.format(ref_data_dir)) + refr_vars = locate_ts_observation(cmor_vars,obs_sets, + ref_start_yr,ref_end_yr, + reference_dir_source, + refr_dic_file,ref_data_dir, + multiprocessing,num_workers) + + print("# of variables in test model: ", len(cmor_vars)) + print("# of variables in reference model: ", len(refr_vars)) + del(refr_vars,cmor_vars) +{% elif run_type == "model_vs_model" %} + refr_dic_file = os.path.join("${results_dir}",'{}_{{sub}}_mon_catalogue.json'.format(ref_data_dir)) + refr_vars = combine_time_series(cmor_vars,ref_start_yr,ref_end_yr, + int({{ts_num_years_ref}}),ref_cmip_name, + ref_dir_source,refr_dic_file,ref_data_dir, + multiprocessing,num_workers) + + print("# of variables in test model: ", len(cmor_vars)) + print("# of variables in reference model: ", len(refr_vars)) + del(refr_vars,cmor_vars) +{%- endif %} + +if __name__ == "__main__": + main() + +EOF + +################################ +# Pcmdi pre-processing to link +# required data to work directory +command="python -u collect_data.py" +time ${command} +if [ $? != 0 ]; then + cd {{ scriptDir }} + echo 'ERROR (9)' > {{ prefix }}.status + exit 9 +fi + +################################################################ +# generate input parameter for pcmdi metrics driver +{%- if ("mean_climate" in sets) or ("variability_mode" in sets) or ("enso" in sets) %} +cat > parameterfile.py << EOF +import os +import sys +import json + +#basic information +start_yr = int('${Y1}') +end_yr = int('${Y2}') +num_years = end_yr - start_yr + 1 + +# Model +# Test data path +{% if run_type == "model_vs_obs" %} +test_data_dir = 'ts' +{% elif run_type == "model_vs_model" %} +test_data_dir = 'ts_test' +{%- endif %} +test_name = '${case}' +test_start_yr = start_yr +test_end_yr = end_yr +test_dir_source='{{ output }}/post/atm/{{ grid }}/cmip_ts/monthly' +test_cmip_name = '{{ cmip_name }}' + +# Ref +{% if run_type == "model_vs_obs" %} +# Obs +reference_dir_source = '{{ obs_ts }}' +ref_data_dir = 'ts_ref' +ref_start_yr = {{ ref_start_yr }} +ref_end_yr = ref_start_yr + num_years - 1 +if (ref_end_yr <= {{ ref_final_yr }}): + ref_end_yr = ref_end_yr +else: + ref_end_yr = {{ ref_final_yr }} +{% elif run_type == "model_vs_model" %} +# Reference +reference_dir_source = '{{ reference_data_path_ts }}' +ref_data_dir = 'ts_ref' +ref_name = '${ref_name}' +short_ref_name = '{{ short_ref_name }}' +ref_start_yr = {{ ref_start_yr }} +ref_end_yr = {{ ref_final_yr }} +ref_cmip_name = '{{ cmip_name_ref }}' + +# Optionally, swap test and reference model +if {{ swap_test_ref }}: + test_data_dir, ref_data_dir = ref_data_dir, test_data_dir + test_name, ref_name = ref_name, test_name + short_test_name, short_ref_name = short_ref_name, short_test_name + ref_cmip_name, test_cmip_name = test_cmip_name, ref_cmip_name +{%- endif %} + +# shared options +case_id = "${case_id}" + +# Record NetCDF output +nc_out_obs = {{ nc_out_obs }} +nc_out = {{ nc_out }} +if nc_out: + ext = ".nc" +else: + ext = ".xml" + +user_notes = 'Provenance and results' +parallel = False +debug = {{ pmp_debug }} + +# Generate plots +plot = {{ plot }} +plot_obs = {{ plot_obs }} # optional + +# Additional settings +run_type = '{{ run_type }}' +figure_format = '{{ figure_format }}' + +{%- if "mean_climate" in subset %} +############################################################# +#parameter setup specific for mean climate metrics +############################################################# +mip = test_cmip_name.split(".")[0] +exp = test_cmip_name.split(".")[1] +product = test_cmip_name.split(".")[2] +realm = test_cmip_name.split(".")[3] +realization = realm + +{% if run_type == "model_vs_obs" %} +test_data_set = [ test_cmip_name.split(".")[2] ] +{% elif run_type == "model_vs_model" %} +test_data_set = [ test_cmip_name.split(".")[2], ref_cmip_name.split(".")[2] ] +{%- endif %} + +modver = "${case_id}" + +# Generate CMEC compliant json +cmec = {{ cmec }} + +# SIMULATION PARAMETER +period = "{:04d}{:02d}-{:04d}{:02d}".format(test_start_yr,1,test_end_yr,12) + +# INTERPOLATION OPTIONS +target_grid = '{{ target_grid }}' # OPTIONS: '2.5x2.5' or an actual cdms2 grid object +targetGrid = target_grid +target_grid_string = '{{ target_grid_string }}' +regrid_tool = '{{ regrid_tool }}' # OPTIONS: 'regrid2','esmf' +regrid_method = '{{ regrid_method }}' # OPTIONS: 'linear','conservative', only if tool is esmf +regrid_tool_ocn = '{{ regrid_tool_ocn }}' # OPTIONS: "regrid2","esmf" +regrid_method_ocn = ( '{{ regrid_method_ocn }}' ) # OPTIONS: 'linear','conservative', only if tool is esmf + +# SAVE INTERPOLATED MODEL CLIMATOLOGIES ? +save_test_clims = {{ save_test_clims }} + +# CUSTOMIZE REGIONS VALUES NAMES +regions_values = {"land":100.,"ocean":0.} + +#defined regions +regions_specs = json.load(open(os.path.join(".",'regions_specs.json'))) +for kk in regions_specs.keys(): + if "domain" in regions_specs[kk].keys(): + if "latitude" in regions_specs[kk]['domain'].keys(): + regions_specs[kk]['domain']['latitude'] = tuple(regions_specs[kk]['domain']['latitude']) + if "longitude" in regions_specs[kk]['domain'].keys(): + regions_specs[kk]['domain']['longitude'] = tuple(regions_specs[kk]['domain']['longitude']) + +#region specified for each variable +regions =json.load(open(os.path.join("${results_dir}",'var_region_{{sub}}_catalogue.json'))) + +####################################### +# DATA LOCATION: MODELS, OBS AND METRICS OUTPUT +# --------------------------------------------- +# Templates for model climatology files +test_data_path = os.path.join( + "${results_dir}", + "climo", + "${case_id}") +test_dic = json.load(open(os.path.join("${results_dir}",'{}_{{sub}}_clim_catalogue.json'.format(test_data_dir)))) +template = test_dic['ts'][product]['template'] +filename_template = template.replace('ts',"%(variable)").replace(product,"%(model)") +del(test_dic) + +####################################### +# ROOT PATH FOR OBSERVATIONS +reference_data_set = list('{{ obs_sets }}'.split(",")) +reference_data_path = os.path.join("${results_dir}","climo","${case_id}") +observation_file = os.path.join("${results_dir}",'{}_{{sub}}_clim_catalogue.json'.format(ref_data_dir)) +custom_observations = os.path.abspath(observation_file) +if not os.path.exists(custom_observations): + sys.exit("ERROR: observation climatology file is missing....") + +####################################### +# DIRECTORY AND FILENAME FOR OUTPUTING METRICS RESULTS +metrics_in_single_file = 'n' # 'y' or 'n' +metrics_output_path = os.path.join( + "${results_dir}", + "metrics_results", + "mean_climate", + mip, + exp, + "%(case_id)" +) # All SAME FILE +############################################################ +# DIRECTORY WHERE TO PUT INTERPOLATED MODELS' CLIMATOLOGIES +diagnostics_output_path= os.path.join( + "${results_dir}", + "diagnostic_results", + "mean_climate", + mip, + exp, + "%(case_id)" +) + +########################################### +# Templates for MODEL land/sea mask (sftlf) +# depracated in new version of pcmdi +############################################# +generate_sftlf = {{ generate_sftlf }} +os.path.join("${fixed_dir}","sftlf_%(model).nc") +test_clims_interpolated_output = diagnostics_output_path + +{%- endif %} + +{%- if "variability_mode" in subset %} +############################################################ +#parameter setup specific for mode variability metrics +############################################################ +mip = test_cmip_name.split(".")[0] +exp = test_cmip_name.split(".")[1] +product = test_cmip_name.split(".")[2] + +{% if run_type == "model_vs_obs" %} +modnames = [ test_cmip_name.split(".")[2] ] +{% elif run_type == "model_vs_model" %} +modnames = [ test_cmip_name.split(".")[2], ref_cmip_name.split(".")[2] ] +{%- endif %} + +realm = test_cmip_name.split(".")[3] +realization = realm + +msyear = test_start_yr +meyear = test_end_yr +osyear = ref_start_yr +oeyear = ref_end_yr + +seasons = list('{{ seasons }}'.split(",")) +frequency = '{{ frequency }}' + +#from configuration file +varOBS = '{{vars}}' +varModel = '{{vars}}' +ObsUnitsAdjust = {{ ObsUnitsAdjust }} +ModUnitsAdjust = {{ ModUnitsAdjust }} + +# If True, maskout land region thus consider only over ocean +landmask = {{ landmask }} + +#open dictional file to locate model and reference files +test_dic = json.load(open(os.path.join("${results_dir}",'{}_{{sub}}_mon_catalogue.json'.format(test_data_dir)))) +modpath = test_dic[varModel]['file_path'] +model = test_dic[varModel]['model'] +if model != product: + print("warning: model {} in dataset differ from user setup {}".format(model,product)) + print("warning: use model in datasets to continue....") + modnames = [model] +del (test_dic) + +#setup template for fixed files (e.g. land/sea mask) +modpath_lf = os.path.join("${fixed_dir}","sftlf_%(model).nc") + +#open dictional file to locate reference data +ref_dic = json.load(open(os.path.join("${results_dir}", + '{}_{{sub}}_mon_catalogue.json'.format(ref_data_dir)))) +reference_data_name = ref_dic[varOBS]['model'] +reference_data_path = ref_dic[varOBS]['file_path'] + +#update time for observation if different +ref_syear = str(ref_dic[varOBS]['start_yymm'])[0:4] +ref_eyear = str(ref_dic[varOBS]['end_yymm'])[0:4] +if int(ref_syear) > osyear: + osyear = int(ref_syear) +if int(ref_eyear) < oeyear: + oeyear = int(ref_eyear) +del(ref_dic,ref_syear,ref_eyear) + +####################################### + +# If True, remove Domain Mean of each time step +RmDomainMean = {{ RmDomainMean }} + +# If True, consider EOF with unit variance +EofScaling = {{ EofScaling }} + +# Conduct CBF analysis +CBF = {{ CBF }} + +# Conduct conventional EOF analysis +ConvEOF = {{ ConvEOF }} + +# Generate CMEC compliant json +cmec = {{ cmec }} + +# Update diagnostic file if exist +update_json = {{ update_json }} + +####################################### +results_dir = os.path.join( + "${results_dir}", + "%(output_type)", + "variability_modes", + "%(mip)", + "%(exp)", + "${case_id}", + "%(variability_mode)", + "%(reference_data_name)", +) +{%- endif %} + +{%- if "enso" in subset %} +############################################################ +#parameter setup specific for enso metrics +############################################################ +mip = test_cmip_name.split(".")[0] +exp = test_cmip_name.split(".")[1] + +{% if run_type == "model_vs_obs" %} +modnames = [ test_cmip_name.split(".")[2] ] +{% elif run_type == "model_vs_model" %} +modnames = [ test_cmip_name.split(".")[2], ref_cmip_name.split(".")[2] ] +{%- endif %} + +realm = test_cmip_name.split(".")[3] +realization = realm + +msyear = test_start_yr +meyear = test_end_yr + +osyear = ref_start_yr +oeyear = ref_end_yr + +####################################### +# Model (test) +# setup template for fixed files (e.g. land/sea mask) +modpath_lf = os.path.join("${fixed_dir}","sftlf_%(model).nc") +# construct model template +test_dic = json.load(open(os.path.join("${results_dir}",'{}_{{sub}}_mon_catalogue.json'.format(test_data_dir)))) +vv0 = list(test_dic.keys())[0] +tableId = test_dic[vv0]['tableId'] +modpath = os.path.join( + test_data_dir, + "%(mip).%(exp).%(model).%(realization)."+tableId+".%(variable)." + + '{:04d}{:02d}-{:04d}{:02d}'.format(msyear,1,meyear,12) + + ".nc") +del(test_dic,vv0) + +# OBSERVATIONS +reference_data_path = {} +reference_data_lf_path = {} +#orgnize obs catalog +ref_dic = json.load(open(os.path.join("${results_dir}",'{}_{{sub}}_mon_catalogue.json'.format(ref_data_dir)))) +for var in ref_dic: + refname = ref_dic[var]['model'] + if refname not in reference_data_path.keys(): + reference_data_path[refname] = {} + reference_data_path[refname][var] = {'template': ref_dic[var]['template']} + #land/sea mask + reference_data_lf_path[refname] = os.path.join("${fixed_dir}",'sftlf.{}.nc'.format(refname)) + #update time information(minimum overlap) + ref_syear = str(ref_dic[var]['start_yymm'])[0:4] + ref_eyear = str(ref_dic[var]['end_yymm'])[0:4] + if int(ref_syear) > osyear: + osyear = int(ref_syear) + if int(ref_eyear) < oeyear: + oeyear = int(ref_eyear) + del(refname) +del(ref_dic) + +#document the observation catalogue +obs_cmor = True +obs_cmor_path = ref_data_dir +obs_catalogue = 'obs_info_catalogue.json' +json.dump(reference_data_path, + open(obs_catalogue,"w"), + sort_keys=True, + indent=4, + separators=(",", ": ")) +del(reference_data_path) + +# METRICS COLLECTION (ENSO_perf, ENSO_tel, ENSO_proc) +# will set in main driver +# metricsCollection = ENSO_perf # ENSO_perf, ENSO_tel, ENSO_proc + +# OUTPUT +results_dir = os.path.join( + "${results_dir}", + "%(output_type)", + "enso_metric", + "%(mip)", + "%(exp)", + "${case_id}", + "%(metricsCollection)", +) + +json_name = "%(mip)_%(exp)_%(metricsCollection)_${case_id}_%(model)_%(realization)" + +netcdf_name = json_name + +{%- endif %} + +EOF +{%- endif %} + +################################################################ + +# Run PCMDI Diags +echo +echo ===== RUN PCMDI DIAGS ===== +echo + +# Prepare configuration file +cat > pcmdi.py << EOF +import os +import glob +import json +import re +import sys +import cdms2 +import psutil +import numpy as np +import collections +import subprocess +import time +import pcmdi_metrics +from pcmdi_metrics.utils import StringConstructor +from argparse import RawTextHelpFormatter +from shutil import copyfile +from re import split +from itertools import chain +from subprocess import Popen, PIPE, call + +{%- if "mean_climate" in subset %} +from mean_climate_plot_parser import ( + create_mean_climate_plot_parser, +) +from mean_climate_plot_driver import ( + mean_climate_metrics_plot, +) +{%- endif %} + +def childCount(): + current_process = psutil.Process() + children = current_process.children() + return(len(children)) + +def generate_land_sea_mask(data_file,outpath): + data_dic = json.load(open(data_file)) + for var in data_dic: + model = data_dic[var]['model'] + mpath = data_dic[var]['file_path'] + mpath_lf = os.path.join(outpath,"sftlf.{}.nc".format(model)) + # generate land/sea mask if not exist + if not os.path.exists(mpath_lf): + print("generate land/sea mask file....") + return_code = call(['python','generate_sftlf.py',var,model,mpath,mpath_lf],text=False) + else: + return_code = 0 + del(model,mpath,mpath_lf) + del(data_dic) + + return return_code + +{%- if "mean_climate" in subset %} +def calculate_climatology(method,start_yr,end_yr,data_dic,out_dic, + outpath,multiprocessing,num_workers): + + #first check the monthly data dictionary + if not os.path.exists(data_dic): + exit("ERROR: monthly data dictionary file not found...") + else: + data_dic = json.load(open(data_dic)) + + if not os.path.exists(outpath): + os.makedirs(outpath,mode=0o777) + + ##################################### + #calculate annual cycle climatology + ##################################### + clim_dic = collections.OrderedDict() + lstcmd = []; lstcm0 = []; lstcm1 = []; lstcm2 = [] + for var in data_dic.keys(): + cyms = '{:04d}-{:02d}'.format(start_yr,1) + cyme = '{:04d}-{:02d}'.format(end_yr,12) + if int(data_dic[var]['start_yymm']) > (start_yr*100+1): + cyms = '{}-{}'.format(str(data_dic[var]['start_yymm'])[0:4], + str(data_dic[var]['start_yymm'])[4:6]) + if int(data_dic[var]['end_yymm']) < (end_yr*100+12): + cyme = '{}-{}'.format(str(data_dic[var]['end_yymm'])[0:4], + str(data_dic[var]['end_yymm'])[4:6]) + infile = data_dic[var]['file_path'] + if os.path.exists(infile): + if method == "pcmdi": + #reform the output file template + outfile = ".".join(data_dic[var]['template'].split(".")[:-2]) + ".nc" + cmd = (" ".join(["pcmdi_compute_climatologies.py", + "--start", cyms, + "--end", cyme, + "--var", var, + "--infile", infile, + "--outpath", outpath+"/", + "--outfilename", outfile ])) + lstcmd.append(cmd); del(cmd,outfile) + else: + # use nco to process mean climatology + # middle month days from January to February + dofm = [15,46,74,105,135,166,196,227,258,288,319,349] + #create a temporary directory to save temporary files + if not os.path.exists("tmpnco"): + os.mkdir("tmpnco",mode=0o777) + #derive annual cycle climate mean + for imon,mday in enumerate(dofm): + tmpfile = os.path.join('tmpnco',"{}_tmp_{:02d}-clim.nc".format(var,imon+1)) + cmd = (" ".join(['ncra -O -h -F -d', + 'time,{},,12'.format(imon+1), + infile,tmpfile])) + lstcmd.append(cmd) + cm0 = (" ".join(['ncatted -O -h -a', + 'units,time,o,c,"days since 0001-01-01 00:00:0.0"', + tmpfile,tmpfile])) + lstcm0.append(cm0) + cm1 = (" ".join(['ncap2 -O -h -s', + "'time=time*0+{};defdim({},{});time_bnds=make_bounds(time,{},{})'".format( + mday,'"bnds"',2,'\$bnds','"time_bnds"'), + tmpfile,tmpfile])) + lstcm1.append(cm1); del(cmd,cm0,cm1,tmpfile) + #derive seasonal and annual mean + for season in ["AC", "DJF", "JJA", "MAM", "SON", "ANN"]: + period = "{}-{}".format(cyms.replace("-",""),cyme.replace("-","")) + outpre = ".".join(data_dic[var]['template'].split(".")[:-2]) + outfile = os.path.join(outpath,".".join([outpre,"{}.{}.{}.nc".format(period,season,"${case_id}")])) + if season == "AC": + cm2 = (" ".join(["ncrcat -O -v {} -d time,0,".format(var), + os.path.join('tmpnco',"{}_*_*-clim.nc".format(var)), + outfile])) + elif season == "DJF": + cm2 = (" ".join(["ncra -O -h", + os.path.join('tmpnco',"{}_*_12-clim.nc".format(var)), + os.path.join('tmpnco',"{}_*_01-clim.nc".format(var)), + os.path.join('tmpnco',"{}_*_02-clim.nc".format(var)), + outfile])) + elif season == "JJA": + cm2 = (" ".join(["ncra -O -h", + os.path.join('tmpnco',"{}_*_06-clim.nc".format(var)), + os.path.join('tmpnco',"{}_*_07-clim.nc".format(var)), + os.path.join('tmpnco',"{}_*_08-clim.nc".format(var)), + outfile])) + elif season == "MAM": + cm2 = (" ".join(["ncra -O -h", + os.path.join('tmpnco',"{}_*_03-clim.nc".format(var)), + os.path.join('tmpnco',"{}_*_04-clim.nc".format(var)), + os.path.join('tmpnco',"{}_*_05-clim.nc".format(var)), + outfile])) + elif season == "SON": + cm2 = (" ".join(["ncra -O -h", + os.path.join('tmpnco',"{}_*_09-clim.nc".format(var)), + os.path.join('tmpnco',"{}_*_10-clim.nc".format(var)), + os.path.join('tmpnco',"{}_*_11-clim.nc".format(var)), + outfile])) + elif season == "ANN": + cm2 = (" ".join(["ncra -O -h", + os.path.join('tmpnco',"{}_*_*-clim.nc".format(var)), + outfile])) + lstcm2.append(cm2); del(cm2,period,outfile,outpre) + #document climatology info in dictionary file# + period = "{}-{}".format(cyms.replace("-",""),cyme.replace("-","")) + template = ".".join(data_dic[var]['template'].split(".")[:-2]) + \ + ".{}.AC.{}.nc".format(period,"${case_id}") + clim_dic[var] = {data_dic[var]['exp'] : data_dic[var]['model'], + data_dic[var]['model'] : {'template' : template, + 'period' : period, + 'data_path' : outpath}} + #save climatology dictionary + json.dump(clim_dic, + open(out_dic,"w"), + sort_keys=True, + indent=4, + separators=(",", ": ")) + + #finally process the data in parallela + if method == "pcmdi": + print("Number of jobs starting is ", str(len(lstcmd))) + procs = [] + for i,p in enumerate(lstcmd): + print('running %s' % (str(p))) + proc = Popen(p, stdout=PIPE, shell=True) + if multiprocessing == True: + procs.append(proc) + while (childCount() > num_workers): + time.sleep(0.25) + [pp.communicate() for pp in procs] # this will get the exit code + procs = [] + else: + if (i == len(lstcmd)-1): + try: + outs, errs = proc.communicate() + if proc.returncode == 0: + print("stdout = {}; stderr = {}".format(str(outs),str(errs))) + else: + exit("ERROR: subprocess {} failed".format(str(lstcmd[i]))) + except: + break + else: + return_code = proc.communicate() + if return_code != 0: + exit("Failed to run {}".format(str(p))) + elif method == "nco": + lstall = list(chain(lstcmd,lstcm0,lstcm1,lstcm2)) + lensub = [len(lstcmd),len(lstcm0),len(lstcm1),len(lstcm2)] + lensub = np.cumsum(lensub) - 1 + print("Number of jobs starting is ", str(len(lstall))) + procs = [] + for i,p in enumerate(lstall): + print('running %s' % (str(p))) + proc = Popen(p, stdout=PIPE, shell=True) + if multiprocessing == True: + procs.append(proc) + while (childCount() > num_workers): + time.sleep(0.25) + [pp.communicate() for pp in procs] # this will get the exit code + procs = [] + else: + if (i == len(lstall)-1): + try: + outs, errs = proc.communicate() + if proc.returncode == 0: + print("stdout = {}; stderr = {}".format(str(outs),str(errs))) + else: + exit("ERROR: subprocess {} failed".format(str(lstall[i]))) + except: + break + else: + return_code = proc.communicate() + if return_code != 0: + exit("Failed to run {}".format(str(p))) + # clean the temporary files + for tmpfil in glob.glob(os.path.join('tmpnco',"_*_*-clim.nc".format(var))): + if os.path.exists(tmpfil): + os.remove(tmpfil) + + # add a delay to ensure the processing fully done + time.sleep(1) + print("done submitting") + del(lstcmd,lstcm0,lstall,lstcm1,lstcm2,clim_dic,data_dic) + + return + +def calculate_derived_variable(var,data_dic,data_path): + #################################################### + #this function is used to calculate a quantity given + #the data documented in the data_dic passed by user + #derived_variable.json is a file documen the rules to + #calculate the required diagnostic variables + ##################################################### + derive_dic = json.load(open("derived_variable.json")) + vsublist = []; operator = [] + #collect the variable and operation rulse for derivation + for vv in derive_dic[var]: + vsublist.append(vv) + operator.append(derive_dic[var][vv]) + + #now search data file and judge if the derivation is possible + l_derive = True + for i,vv in enumerate(vsublist): + infile = data_dic[vv]['data_path'] + if i == 0: + outfile = infile.replace(vv,var) + if (not os.path.exists(infile)) or (os.path.exists(outfile)): + l_derive = False + + # finally do derivation + if l_derive: + for i,vv in enumerate(derive_dic[var].keys()): + infile = data_dic[vv]['data_path'] + f = cdms2.open(infile) + if i == 0: + d = f(vv) * operator[i] + else: + d = d + f(vv) * operator[i] + f.close() + del(infile) + f = cdms2.open(outfile,'w') + f.write(d) + f.close() + del(d,outfile,f) + outdic = {'template' : outfile.split("/")[-1], + 'data_path' : outfile} + del(derive_dic,vsublist,operator) + + return outdic, outfile + +{%- endif %} + +def main (): + start_yr = int('${Y1}') + end_yr = int('${Y2}') + num_years = end_yr - start_yr + 1 + + num_workers = {{ num_workers }} + multiprocessing = {{multiprocessing}} + + # Model + # Test data directory +{% if run_type == "model_vs_obs" %} + test_data_dir = 'ts' +{% elif run_type == "model_vs_model" %} + test_data_dir = 'ts_test' +{%- endif %} + test_name = '${case}' + test_start_yr = start_yr + test_end_yr = end_yr + test_dir_source='{{ output }}/post/atm/{{ grid }}/cmip_ts/monthly' + test_cmip_name = '{{ cmip_name }}' + + # Ref +{% if run_type == "model_vs_obs" %} + # Obs + reference_dir_source = '{{ obs_ts }}' + ref_data_dir = 'ts_ref' + ref_start_yr = {{ ref_start_yr }} + ref_end_yr = ref_start_yr + num_years - 1 + if (ref_end_yr <= {{ ref_final_yr }}): + ref_end_yr = ref_end_yr + else: + ref_end_yr = {{ ref_final_yr }} +{% elif run_type == "model_vs_model" %} + # Reference + reference_dir_source = '{{ reference_data_path_ts }}' + ref_data_dir = 'ts_ref' + ref_name = '${ref_name}' + short_ref_name = '{{ short_ref_name }}' + ref_start_yr = {{ ref_start_yr }} + ref_end_yr = {{ ref_final_yr }} + ref_cmip_name = '{{ cmip_name_ref }}' + + # Optionally, swap test and reference model + if {{ swap_test_ref }}: + test_data_dir, ref_data_dir = ref_data_dir, test_data_dir + test_name, ref_name = ref_name, test_name + short_test_name, short_ref_name = short_ref_name, short_test_name + ref_cmip_name, test_cmip_name = test_cmip_name, ref_cmip_name +{%- endif %} + + ################################################################################ + # land/sea mask is needed in PCMDI diagnostics, check and generate it here as + # these data are not always available for model or observations + ################################################################################ + # Model + test_dic = os.path.join("${results_dir}",'{}_{{sub}}_mon_catalogue.json'.format(test_data_dir)) + return_code = generate_land_sea_mask(test_dic,"${fixed_dir}") + if return_code != 0: + exit("Failed to generate land/sea mask...") + del(test_dic) + # Reference + ref_dic = os.path.join("${results_dir}",'{}_{{sub}}_mon_catalogue.json'.format(ref_data_dir)) + return_code = generate_land_sea_mask(ref_dic,"${fixed_dir}") + if return_code != 0: + exit("Failed to generate land/sea mask...") + del(ref_dic) + + # Run PCMDI for diagnostics +{%- if "mean_climate" in subset %} + ##################################################################### + # calculate test and reference model climatology + ##################################################################### + print("calculate mean climate diagnostics") + outpath = os.path.join("${results_dir}","climo","${case_id}") + method = '{{climatology_process_method}}' + for key in ["test","ref"]: + if key == "test": + data_dir = test_data_dir + start_yr = test_start_yr + end_yr = test_end_yr + elif key == "ref": + data_dir = ref_data_dir + start_yr = ref_start_yr + end_yr = ref_end_yr + data_dic = os.path.join("${results_dir}",'{}_{{sub}}_mon_catalogue.json'.format(data_dir)) + clim_dic = os.path.join("${results_dir}",'{}_{{sub}}_clim_catalogue.json'.format(data_dir)) + if method in [ "pcmdi", "PCMDI", "default" ]: + #method 1: built in PCMDI package (may have memory issue for highres data) + calculate_climatology("pcmdi",start_yr,end_yr,data_dic,clim_dic,outpath,multiprocessing,num_workers) + elif method in [ "nco", "NCO", "alternate"]: + #method 2: use nco package(default,faster) + calculate_climatology("nco",start_yr,end_yr,data_dic,clim_dic,outpath,multiprocessing,num_workers) + if not os.path.exists(clim_dic): + exist("ERROR: failed to process data climatology....") + del(data_dir,start_yr,end_yr,data_dic,clim_dic) + + ##################################################################### + # call mean_climate_driver.py to process diagnostics + ##################################################################### + #defined regions + regional = '{{ regional }}' + if regional == "y": + default_regions = list('{{ regions }}'.split(",")) + else: + default_regions = ["global", "NHEX", "SHEX", "TROPICS"] + # create command list for mean climate driver + lstcmd = [] + reg_var_dic = {} + for vv in list("{{vars}}".split(",")): + vkys = vv.split("-")[0] + reg_var_dic[vkys] = default_regions + vars = vv + cmd = (" ".join(["mean_climate_driver.py", + "-p", "parameterfile.py", + "--vars", '{}'.format(vars)])) + lstcmd.append(cmd); del(cmd,vars,vkys) + + #create regions for regional mean of each variable + json.dump(reg_var_dic, + open(os.path.join("${results_dir}",'var_region_{{sub}}_catalogue.json'),"w"), + sort_keys=True, + indent=4, + separators=(",", ": ")) + + #finally process the data in parallel + print("Number of jobs starting is ", str(len(lstcmd))) + procs = [] + if len(lstcmd) > 0: + for i,p in enumerate(lstcmd): + print('running %s' % (str(p))) + proc = Popen(p, stdout=PIPE, shell=True) + if multiprocessing == True: + procs.append(proc) + while (childCount() > num_workers): + time.sleep(0.25) + [pp.communicate() for pp in procs] + procs = [] + else: + if (i == len(lstcmd)-1): + try: + outs, errs = proc.communicate() + if proc.returncode == 0: + print("stdout = {}; stderr = {}".format(str(outs),str(errs))) + else: + exit("ERROR: subprocess {} failed".format(str(lstcmd[i]))) + except: + break + else: + return_code = proc.communicate() + if return_code != 0: + exit("Failed to run {}".format(str(p))) + + #set a delay to avoid delay in writing process + time.sleep(1) + print("done submitting") + del(reg_var_dic,regional,lstcmd) + + #generate diagnostics figures + print("--- prepare for mean climate metrics plot ---") + parser = create_mean_climate_plot_parser() + parameter = parser.get_parameter(argparse_vals_only=False) + parameter.regions = default_regions + parameter.run_type = "${run_type}" + parameter.period = "{}-{}".format(test_start_yr,test_end_yr) + parameter.pcmdi_data_set = "{{pcmdi_data_set}}" + parameter.pcmdi_data_path = os.path.join('{{pcmdi_data_path}}',"mean_climate") + parameter.test_data_set = "{}.{}".format(test_cmip_name,"${case_id}") + parameter.test_data_path = os.path.join("${results_dir}","metrics_results","mean_climate") +{% if run_type == "model_vs_obs" %} + parameter.refr_data_set = "" + parameter.refr_period = "" + parameter.refr_data_path = "" +{% elif run_type == "model_vs_model" %} + parameter.refr_data_set = "{}.{}".format(ref_cmip_name,"${case_id}") + parameter.refr_period = "{}-{}".format(ref_start_yr,ref_end_yr) + parameter.refr_data_path = os.path.join("${results_dir}","metrics_results","mean_climate") +{%- endif %} + parameter.output_path = os.path.join("${results_dir}","graphics","mean_climate") + parameter.ftype = '{{ figure_format }}' + parameter.debug = {{ pmp_debug }} + parameter.parcord_show_markers = {{parcord_show_markers}} #False + parameter.add_vertical_line = {{portrait_vertical_line}} #True + + #generate diagnostics figures + print("--- generate mean climate metrics plot ---") + mean_climate_metrics_plot(parameter) + del(parameter) + +{%- endif %} + +{%- if "variability_mode" in subset %} + print("calculate mode variability metrics") +{%- if subset == "variability_mode_atm" %} + modes = list({{ atm_modes }}) +{% elif subset == "variability_mode_cpl" %} + modes = list({{ cpl_modes }}) +{%- endif %} + ##################################################################### + # call variability_modes_driver.py to process diagnostics + ##################################################################### + lstcmd = [] + for variability_mode in modes: + if variability_mode in ["NPO", "NPGO", "PSA1"]: + eofn_obs = "2" + eofn_mod = "2" + elif variability_mode in ["PSA2"]: + eofn_obs = "3" + eofn_mod = "3" + else: + eofn_obs = "1" + eofn_mod = "1" + cmd = (" ".join(['variability_modes_driver.py', + '-p', "parameterfile.py", + '--variability_mode', variability_mode, + '--eofn_mod', eofn_mod, + '--eofn_obs', eofn_obs ])) + lstcmd.append(cmd); del(cmd) + #finally process the data in parallel + print("Number of jobs starting is ", str(len(lstcmd))) + procs = [] + for i,p in enumerate(lstcmd): + print('running %s' % (str(p))) + proc = Popen(p, stdout=PIPE, shell=True) + if multiprocessing == True: + procs.append(proc) + while (childCount() > num_workers): + time.sleep(0.25) + [pp.communicate() for pp in procs] # this will get the exit code + procs = [] + else: + if (i == len(lstcmd)-1): + try: + outs, errs = proc.communicate() + if proc.returncode == 0: + print("stdout = {}; stderr = {}".format(str(outs),str(errs))) + else: + exit("ERROR: subprocess {} failed".format(str(lstcmd[i]))) + except: + break + else: + return_code = proc.communicate() + if return_code != 0: + exit("Failed to run {}".format(str(p))) + #set a delay to avoid delay in writing process + time.sleep(1) + print("done submitting") + del(lstcmd) +{%- endif %} + +{%- if "enso" in subset %} + ##################################################################### + # call enso_driver.py to process diagnostics + ##################################################################### + print("calculate enso metrics") + groups = list({{ groups }}) + lstcmd = [] + for metricsCollection in groups: + cmd = (" ".join(['enso_driver.py', + '-p', "parameterfile.py", + '--metricsCollection',metricsCollection])) + lstcmd.append(cmd); del(cmd) + #finally process the data in parallel + print("Number of jobs starting is ", str(len(lstcmd))) + procs = [] + for i,p in enumerate(lstcmd): + print('running %s' % (str(p))) + proc = Popen(p, stdout=PIPE, shell=True) + procs.append(proc) + while (childCount() > {{num_workers}}): + time.sleep(0.25) + [pp.communicate() for pp in procs] # this will get the exit code + procs = [] + else: + if (i == len(lstcmd)-1): + try: + outs, errs = proc.communicate() + if proc.returncode == 0: + print("stdout = {}; stderr = {}".format(str(outs),str(errs))) + else: + exit("ERROR: subprocess {} failed".format(str(lstcmd[i]))) + except: + break + #set a delay to avoid delay in writing process + time.sleep(1) + print("done submitting") + del(lstcmd,procs) +{%- endif %} + +if __name__ == "__main__": + main() + +EOF + +################################ +# Run diagnostics +#command="srun -n 1 python -u pcmdi.py" +command="python -u pcmdi.py" +# Run diagnostics +time ${command} +if [ $? != 0 ]; then + cd {{ scriptDir }} + echo 'ERROR (10)' > {{ prefix }}.status + exit 9 +fi + +# Copy output to web server +echo +echo ===== COPY FILES TO WEB SERVER ===== +echo + +# Create top-level directory +web_dir=${www}/${case}/pcmdi_diags #/{{ sub }} +mkdir -p ${web_dir} +if [ $? != 0 ]; then + cd {{ scriptDir }} + echo 'ERROR (10)' > {{ prefix }}.status + exit 10 +fi + +{% if machine in ['pm-cpu', 'pm-gpu'] %} +# For NERSC, make sure it is world readable +f=`realpath ${web_dir}` +while [[ $f != "/" ]] +do + owner=`stat --format '%U' $f` + if [ "${owner}" = "${USER}" ]; then + chgrp e3sm $f + chmod go+rx $f + fi + f=$(dirname $f) +done +{% endif %} + +# Copy files +#rsync -a --delete ${results_dir} ${web_dir}/ +rsync -a ${results_dir} ${web_dir}/ +if [ $? != 0 ]; then + cd {{ scriptDir }} + echo 'ERROR (11)' > {{ prefix }}.status + exit 11 +fi + +{% if machine in ['pm-cpu', 'pm-gpu'] %} +# For NERSC, change permissions of new files +pushd ${web_dir}/ +chgrp -R e3sm ${results_dir} +chmod -R go+rX,go-w ${results_dir} +popd +{% endif %} + +{% if machine in ['anvil', 'chrysalis'] %} +# For LCRC, change permissions of new files +pushd ${web_dir}/ +chmod -R go+rX,go-w ${results_dir} +popd +{% endif %} + +# Delete temporary workdir +cd .. +if [[ "${debug,,}" != "true" ]]; then + rm -rf ${workdir} +fi + +# Update status file and exit +{% raw %} +ENDTIME=$(date +%s) +ELAPSEDTIME=$(($ENDTIME - $STARTTIME)) +{% endraw %} +echo ============================================== +echo "Elapsed time: $ELAPSEDTIME seconds" +echo ============================================== +rm -f {{ prefix }}.status +echo 'OK' > {{ prefix }}.status +exit 0 diff --git a/zppy/templates/pcmdi_diags/cmip_variables.json b/zppy/templates/pcmdi_diags/cmip_variables.json new file mode 100755 index 00000000..e5c3336e --- /dev/null +++ b/zppy/templates/pcmdi_diags/cmip_variables.json @@ -0,0 +1,121 @@ +{ + "SImon":[ + "siu", + "siv", + "sitemptop", + "sisnmass", + "simass", + "sisnthick", + "sithick", + "sitimefrac", + "siconc" + ], + "Omon": [ + "areacello", + "fsitherm", + "hfds", + "masso", + "mlotst", + "sfdsi", + "sob", + "soga", + "sos", + "tauuo", + "tauvo", + "thetaoga", + "tob", + "tos", + "tosga", + "volo", + "wfo", + "zos", + "thetaoga", + "hfsifrazil", + "masscello", + "so", + "thetao", + "thkcello", + "uo", + "vo", + "volcello", + "wo", + "zhalfo" + ], + "lnd": [ + "mrsos", + "mrso", + "mrfso", + "mrros", + "mrro", + "prveg", + "evspsblveg", + "evspsblsoi", + "tran", + "tsl", + "lai" + ], + "atm": [ + "hur", + "hus", + "ta", + "ua", + "va", + "wap", + "zg", + "o3", + "pfull", + "phalf", + "tas", + "ts", + "psl", + "ps", + "sfcWind", + "huss", + "pr", + "prc", + "prsn", + "evspsbl", + "tauu", + "tauv", + "hfls", + "clt", + "rlds", + "rlus", + "rsds", + "rsdscs", + "rsus", + "rsuscs", + "hfss", + "cl", + "clw", + "cli", + "clivi", + "clwvi", + "prw", + "rldscs", + "rlut", + "rlutcs", + "rsdt", + "rsut", + "rsutcs", + "rtmt", + "abs550aer", + "od550aer", + "tasmax", + "tasmin", + "clisccp", + "cltisccp", + "albisccp", + "pctisccp", + "clcalipso", + "cltcalipso", + "cllcalipso", + "clmcalipso", + "clhcalipso" + ], + "fx": [ + "areacella", + "sftlf", + "orog" + ] +} diff --git a/zppy/templates/pcmdi_diags/derived_variable.json b/zppy/templates/pcmdi_diags/derived_variable.json new file mode 100755 index 00000000..6ca047ed --- /dev/null +++ b/zppy/templates/pcmdi_diags/derived_variable.json @@ -0,0 +1,26 @@ +{ + "rltcre":{ + "rlutcs" : 1, + "rlut" : -1 + }, + "rstcre":{ + "rsutcs" : 1, + "rsut" : -1 + }, + "netsw":{ + "rsds" : 1, + "rsus" : -1 + }, + "netlw":{ + "rlus" : 1, + "rlds" : -1 + }, + "netflux":{ + "rsds" : 1, + "rsus" : -1, + "rlds" : 1, + "rlus" : -1, + "hfls" : -1, + "hfss" : -1 + } +} diff --git a/zppy/templates/pcmdi_diags/generate_sftlf.py b/zppy/templates/pcmdi_diags/generate_sftlf.py new file mode 100755 index 00000000..032625b1 --- /dev/null +++ b/zppy/templates/pcmdi_diags/generate_sftlf.py @@ -0,0 +1,61 @@ +#!/bin/env python +################################################################## +# This script attemts to generate land/sea mask for a given input +################################################################## +import datetime +import os +import sys + +import cdms2 as cdm +import cdutil +import numpy as np + +if len(sys.argv) > 4: + modvar = sys.argv[1] + modname = sys.argv[2] + modpath = sys.argv[3] + modpath_lf = sys.argv[4] +else: + print("ERROR: must specify {modname},{modpath},{outpath} info") + exit() + +# Set netcdf file criterion - turned on from default 0s +cdm.setCompressionWarnings(0) # Suppress warnings +cdm.setNetcdfShuffleFlag(0) +cdm.setNetcdfDeflateFlag(1) +cdm.setNetcdfDeflateLevelFlag(9) +cdm.setAutoBounds(1) + +cdm.setNetcdfDeflateLevelFlag(9) +cdm.setAutoBounds(1) +f_h = cdm.open(modpath) +var = f_h(modvar)[0, ...] +if var.ndim == 2: + landMask = cdutil.generateLandSeaMask(var) + # Deal with land values + landMask[np.greater(landMask, 1e-15)] = 100 + # Rename + landMask = cdm.createVariable( + landMask, id="sftlf", axes=var.getAxisList(), typecode="float32" + ) + landMask.associated_files = modpath + landMask.long_name = "Land Area Fraction" + landMask.standard_name = "land_area_fraction" + landMask.units = "%" + landMask.setMissing(1.0e20) + landMask.id = "sftlf" # Rename + + # Write variables to file + print("output sftlf:", modpath_lf) + if os.path.isfile(modpath_lf): + os.remove(modpath_lf) + fOut = cdm.open(modpath_lf, "w") + # Use function to write standard global atts + fOut.Conventions = "CF-1.0" + fOut.history = "File processed: " + datetime.datetime.now().strftime("%Y%m%d") + fOut.pcmdi_metrics_version = "0.1-alpha" + fOut.pcmdi_metrics_comment = "PCMDI metrics package" + fOut.write(landMask.astype("float32")) + fOut.close() + f_h.close() + del (f_h, landMask, fOut, var) diff --git a/zppy/templates/pcmdi_diags/mean_climate_plot_driver.py b/zppy/templates/pcmdi_diags/mean_climate_plot_driver.py new file mode 100755 index 00000000..913621a9 --- /dev/null +++ b/zppy/templates/pcmdi_diags/mean_climate_plot_driver.py @@ -0,0 +1,670 @@ +#!/bin/env python +############################################################################## +# This model is used to generate mean climate diagnostic figures +# Author: Shixuan Zhang (shixuan.zhang@pnnl.gov) +############################################################################# +import os +import shutil + +import numpy as np +import pandas as pd +from mean_climate_plot_parser import ( + fill_plot_var_and_units, + find_metrics_data, + metrics_inquire, + shift_row_to_bottom, +) +from pcmdi_metrics.graphics import ( + Metrics, + normalize_by_median, + parallel_coordinate_plot, + portrait_plot, +) + + +def load_test_model_data(test_file, refr_file, mip, run_type): + # load the data and reorganize if needed + pd.set_option("future.no_silent_downcasting", True) + test_lib = Metrics(test_file) + + # model_vs_model, merge the reference model data into test model + if run_type == "model_vs_model": + refr_lib = Metrics(refr_file) + test_lib = test_lib.merge(refr_lib) + del refr_lib + + # collect and reorgnize test model data for plotting: + test_models = [] + for stat in test_lib.df_dict: + for season in test_lib.df_dict[stat]: + for region in test_lib.df_dict[stat][season]: + df = pd.DataFrame(test_lib.df_dict[stat][season][region]) + for i, model in enumerate(df["model"].tolist()): + model_run = df["model_run"].tolist()[i] + new_name = "{}-{}".format(mip.upper(), model_run.upper()) + idxs = df[df.iloc[:, 2] == model_run].index + df.loc[idxs, "model"] = list( + map( + lambda x: x.replace(model, new_name), + df.loc[idxs, "model"], + ) + ) + if new_name not in test_models: + test_models.append(new_name) + test_lib.df_dict[stat][season][region] = df + del df + return test_models, test_lib + + +def load_cmip_metrics_data(cmip_file): + # collect cmip multi-model ensemble data for comparison + pd.set_option("future.no_silent_downcasting", True) + cmip_lib = Metrics(cmip_file) + cmip_models = [] + highlight_models = [] + for stat in cmip_lib.df_dict: + for season in cmip_lib.df_dict[stat]: + for region in cmip_lib.df_dict[stat][season]: + # now find all E3SM models in cmip6 + df = pd.DataFrame(cmip_lib.df_dict[stat][season][region]) + for model in df["model"].tolist(): + if model not in cmip_models: + cmip_models.append(model) + if ("e3sm" in model.lower()) and (model not in highlight_models): + highlight_models.append(model) + # move highlight_models to the end + for model in highlight_models: + idxs = df[df.iloc[:, 0] == model].index + cmip_models.remove(model) + cmip_models.append(model) + for idx in idxs: + df = shift_row_to_bottom(df, idx) + cmip_lib.df_dict[stat][season][region] = df + del df + return cmip_models, highlight_models, cmip_lib + + +def save_figure_data( + stat, region, season, var_names, var_units, data_dict, template, outdir +): + # construct output file name + fname = ( + template.replace("%(metric)", stat) + .replace("%(region)", region) + .replace("%(season)", season) + ) + outfile = os.path.join(outdir, fname) + outdic = pd.DataFrame(data_dict) + outdic = outdic.drop(columns=["model_run"]) + for var in list(outdic.columns.values[3:]): + if var not in var_names: + print("{} is excluded from the {}".format(var, fname)) + outdic = outdic.drop(columns=[var]) + else: + # replace the variable with the name + units + outdic.columns.values[ + outdic.columns.values.tolist().index(var) + ] = var_units[var_names.index(var)] + + # save data to .csv file + outdic.to_csv(outfile) + del (fname, outfile, outdic) + return + + +def construct_port4sea_axis_lables( + var_names, cmip_models, test_models, highlight_models +): + model_list = cmip_models + test_models + # assign colors for labels of models + lable_colors = [] + for model in model_list: + if model in highlight_models: + lable_colors.append("#5170d7") + elif model in test_models: + lable_colors.append("#FC5A50") + else: + lable_colors.append("#000000") + + if len(model_list) > len(var_names): + xlabels = model_list + ylabels = var_names + landscape = True + else: + xlabels = var_names + ylabels = model_list + landscape = False + del model_list + return xlabels, ylabels, lable_colors, landscape + + +def construct_port4sea_data( + stat, + seasons, + region, + data_dict, + var_names, + var_units, + file_template, + outdir, + landscape, +): + # work array + data_all = dict() + # loop 4 seasons and collect data + for season in seasons: + # save raw metric results as a .csv file for each season + save_figure_data( + stat, + region, + season, + var_names, + var_units, + data_dict[stat][season][region], + file_template, + outdir, + ) + if stat == "cor_xy": + data_nor = data_dict[stat][season][region][var_names].to_numpy() + if landscape: + data_all[season] = data_nor.T + else: + data_all[season] = data_nor + del data_nor + elif stat == "bias_xy": + # calculate the relative bias + data_sea = data_dict[stat][season][region][var_names].to_numpy() + data_rfm = data_dict["mean-obs_xy"][season][region][var_names].to_numpy() + data_msk = np.where(np.abs(data_rfm) == 0.0, np.nan, data_rfm) + data_nor = data_sea * 100.0 / data_msk + if landscape: + data_all[season] = data_nor.T + else: + data_all[season] = data_nor + del (data_sea, data_rfm, data_msk, data_nor) + else: + data_sea = data_dict[stat][season][region][var_names].to_numpy() + if landscape: + data_sea = data_sea.T + data_all[season] = normalize_by_median(data_sea, axis=1) + else: + data_all[season] = normalize_by_median(data_sea, axis=0) + del data_sea + + # data for final plot + data_all_nor = np.stack( + [data_all["djf"], data_all["mam"], data_all["jja"], data_all["son"]] + ) + del data_all + return data_all_nor + + +def port4sea_plot( + stat, + region, + seasons, + data_dict, + var_names, + var_units, + cmip_models, + test_models, + highlight_models, + file_template, + figure_template, + outdir, + add_vertical_line, + data_version=None, + watermark=False, +): + + # process figure + fontsize = 20 + var_names = sorted(var_names) + var_units = sorted(var_units) + + # construct the axis labels and colors + ( + xaxis_labels, + yaxis_labels, + lable_colors, + landscape, + ) = construct_port4sea_axis_lables( + var_names, cmip_models, test_models, highlight_models + ) + + # construct data for plotting + data_all_nor = construct_port4sea_data( + stat, + seasons, + region, + data_dict, + var_names, + var_units, + file_template, + outdir, + landscape, + ) + + if stat == "cor_xy": + cbar_label = "Pattern Corr." + var_range = (-1.0, 1.0) + cmap_bounds = [0.1, 0.2, 0.4, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0] + elif stat == "bias_xy": + cbar_label = "{}, relative (%)".format(stat.upper()) + var_range = (-30.0, 30.0) + cmap_bounds = [-30.0, -20.0, -10.0, -5.0, -1, 0.0, 1.0, 5.0, 10.0, 20.0, 30.0] + else: + cbar_label = "{}, normalized by median".format(stat.upper()) + var_range = (-0.5, 0.5) + cmap_bounds = [-0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1, 0.2, 0.3, 0.4, 0.5] + + if landscape: + figsize = (40, 18) + legend_box_xy = (1.08, 1.18) + legend_box_size = 4 + legend_lw = 1.5 + shrink = 0.8 + legend_fontsize = fontsize * 0.8 + else: + figsize = (18, 25) + legend_box_xy = (1.25, 1) + legend_box_size = 3 + legend_lw = 1.5 + shrink = 1.0 + legend_fontsize = fontsize * 0.8 + + # Add Watermark/Logo + if watermark: + logo_rect = [0.85, 0.15, 0.07, 0.07] + logo_off = False + else: + logo_rect = [0, 0, 0, 0] + logo_off = True + + # Using Matplotlib-based PMP Visualization Function to Generate Portrait Plot + fig, ax, cbar = portrait_plot( + data_all_nor, + xaxis_labels=xaxis_labels, + yaxis_labels=yaxis_labels, + cbar_label=cbar_label, + cbar_label_fontsize=fontsize * 1.2, + box_as_square=True, + vrange=var_range, + figsize=figsize, + cmap="RdYlBu_r", + cmap_bounds=cmap_bounds, + cbar_kw={"extend": "both", "shrink": shrink}, + missing_color="white", + legend_on=True, + legend_labels=["DJF", "MAM", "JJA", "SON"], + legend_box_xy=legend_box_xy, + legend_box_size=legend_box_size, + legend_lw=legend_lw, + legend_fontsize=legend_fontsize, + logo_rect=logo_rect, + logo_off=logo_off, + ) + + if add_vertical_line: + ax.axvline( + x=len(xaxis_labels) - len(highlight_models) - len(test_models), + color="k", + linewidth=3, + ) + + if landscape: + ax.set_xticklabels(xaxis_labels, rotation=45, va="bottom", ha="left") + ax.set_yticklabels(yaxis_labels, rotation=0, va="center", ha="right") + for xtick, color in zip(ax.get_xticklabels(), lable_colors): + xtick.set_color(color) + ax.yaxis.label.set_color(lable_colors[0]) + else: + ax.set_xticklabels(xaxis_labels, rotation=45, va="bottom", ha="left") + ax.set_yticklabels(yaxis_labels, rotation=0, va="center", ha="right") + ax.xaxis.label.set_color(lable_colors[0]) + for ytick, color in zip(ax.get_yticklabels(), lable_colors): + ytick.set_color(color) + + ax.tick_params(axis="x", labelsize=fontsize) + ax.tick_params(axis="y", labelsize=fontsize) + + cbar.ax.tick_params(labelsize=fontsize) + + # Add title + ax.set_title( + "Model Performance of Seasonal Climatology ({}, {})".format( + stat.upper(), region.upper() + ), + fontsize=fontsize * 1.5, + pad=30, + ) + + # Add Watermark + if watermark: + ax.text( + 0.5, + 0.5, + "E3SM-PCMDI", + transform=ax.transAxes, + fontsize=100, + color="black", + alpha=0.5, + ha="center", + va="center", + rotation=25, + ) + # Add data info + fig.text( + 1.25, + 0.9, + "Data version\n" + data_version, + transform=ax.transAxes, + fontsize=12, + color="black", + alpha=0.6, + ha="left", + va="top", + ) + + # Save figure as an image file + figname = ( + figure_template.replace("%(metric)", stat) + .replace("%(region)", region) + .replace("%(season)", "4season") + ) + figfile = os.path.join(outdir, figname) + fig.savefig(figfile, facecolor="w", bbox_inches="tight") + del ( + data_all_nor, + xaxis_labels, + yaxis_labels, + lable_colors, + ) + + return + + +def paracord_plot( + stat, + region, + season, + data_dict, + var_names, + var_units, + cmip_models, + test_models, + highlight_models, + file_template, + figure_template, + outdir, + identify_all_models, + data_version=None, + watermark=False, +): + + # construct plotting data + var_names = sorted(var_names) + var_units = sorted(var_units) + + # write out the results as a table + save_figure_data( + stat, region, season, var_names, var_units, data_dict, file_template, outdir + ) + + # add ensemble mean + model_data = data_dict[var_names].to_numpy() + + # construct the string for plot + model_list = data_dict[ + "model" + ].to_list() # cmip_models + test_models + ["CMIP6 MME"] + model_list_group2 = highlight_models + test_models + models_to_highlight = test_models + [ + data_dict["model"].to_list()[-1] + ] # ["CMIP6 MME"] + figsize = (40, 12) + fontsize = 20 + legend_ncol = int(7 * figsize[0] / 40.0) + legend_posistion = (0.50, -0.14) + # color map for markers + colormap = "tab20_r" + # color map for highlight lines + xcolors = [ + "#000000", + "#e41a1c", + "#ff7f00", + "#4daf4a", + "#f781bf", + "#a65628", + "#984ea3", + "#999999", + "#377eb8", + "#dede00", + ] + lncolors = xcolors[1 : len(test_models) + 1] + [xcolors[0]] + # Add Watermark/Logo + if watermark: + logo_rect = [0.85, 0.15, 0.07, 0.07] + logo_off = False + else: + logo_rect = [0, 0, 0, 0] + logo_off = True + + xlabel = "Metric" + if "rms" in stat: + ylabel = "RMS Error (" + stat.upper() + ")" + elif "std" in stat: + ylabel = "Standard Deviation (" + stat.upper() + ")" + else: + ylabel = "value (" + stat.upper() + ")" + + if not np.isnan(model_data).all(): + print(model_data.min(), model_data.max()) + title = "Model Performance of {} Climatology ({}, {})".format( + season.upper(), stat.upper(), region.upper() + ) + fig, ax = parallel_coordinate_plot( + model_data, + var_units, + model_list, + model_names2=model_list_group2, + group1_name="CMIP6", + group2_name="E3SM", + models_to_highlight=models_to_highlight, + models_to_highlight_colors=lncolors, + models_to_highlight_labels=models_to_highlight, + identify_all_models=identify_all_models, # hide indiviaul model markers for CMIP6 models + vertical_center="median", + vertical_center_line=True, + title=title, + figsize=figsize, + axes_labelsize=fontsize * 1.1, + title_fontsize=fontsize * 1.1, + yaxes_label=ylabel, + xaxes_label=xlabel, + colormap=colormap, + show_boxplot=False, + show_violin=True, + violin_colors=("lightgrey", "pink"), + legend_ncol=legend_ncol, + legend_bbox_to_anchor=legend_posistion, + legend_fontsize=fontsize * 0.85, + xtick_labelsize=fontsize * 0.95, + ytick_labelsize=fontsize * 0.95, + logo_rect=logo_rect, + logo_off=logo_off, + ) + + # Add Watermark + if watermark: + ax.text( + 0.5, + 0.5, + "E3SM-PCMDI", + transform=ax.transAxes, + fontsize=100, + color="black", + alpha=0.5, + ha="center", + va="center", + rotation=25, + ) + # Add data info + fig.text( + 1.25, + 0.9, + "Data version\n" + data_version, + transform=ax.transAxes, + fontsize=12, + color="black", + alpha=0.6, + ha="left", + va="top", + ) + + # Save figure as an image file + figname = ( + figure_template.replace("%(metric)", stat) + .replace("%(region)", region) + .replace("%(season)", season) + ) + figfile = os.path.join(outdir, figname) + fig.savefig(figfile, facecolor="w", bbox_inches="tight") + + del (model_data, model_list, model_list_group2, models_to_highlight) + + return + + +def mean_climate_metrics_plot(parameter): + # info for test simulation + test_mip = parameter.test_data_set.split(".")[0] + test_exp = parameter.test_data_set.split(".")[1] + test_product = parameter.test_data_set.split(".")[2] + test_case_id = parameter.test_data_set.split(".")[-1] + # output directory + outdir = os.path.join(parameter.output_path, test_mip, test_exp, test_case_id) + + # construct file template to save the figure data in .csv file + file_template = "%(metric)_%(region)_{}_{}_{}_{}_mean_climate_%(season)_{}.csv" + file_template = file_template.format( + parameter.run_type.upper(), + test_mip.upper(), + test_exp.upper(), + test_product.upper(), + parameter.period, + ) + # construct figure template + figure_template = file_template.replace("csv", parameter.ftype) + + # find the metrics data + test_file, refr_file, cmip_file = find_metrics_data(parameter) + + # load cmip metrics data + cmip_models, highlight_models, cmip_lib = load_cmip_metrics_data(cmip_file) + + # load test model metrics data + test_models, test_lib = load_test_model_data( + test_file, refr_file, test_mip, parameter.run_type + ) + # collect overlap sets of variables for plotting: + test_lib, cmip_lib, var_list, var_unit_list = fill_plot_var_and_units( + test_lib, cmip_lib + ) + # search overlap of regions in test and reference + regions = [] + for reg in parameter.regions: + if (reg in test_lib.regions) and (reg in cmip_lib.regions): + regions.append(reg) + + # merge the cmip and model data + merged_lib = cmip_lib.merge(test_lib) + + ################################### + # generate parallel coordinate plot + ################################### + parall_fig_dir = os.path.join(outdir, "paracord_annual") + if os.path.exists(parall_fig_dir): + shutil.rmtree(parall_fig_dir) + os.makedirs(parall_fig_dir) + print("Parallel Coordinate Plots (4 seasons), loop each region and metric....") + # add ensemble mean + for metric in [ + "rms_xyt", + "std-obs_xyt", + "std_xyt", + "rms_y", + "rms_devzm", + "std_xy_devzm", + "std-obs_xy_devzm", + ]: + for region in regions: + for season in ["ann"]: + data_dict = merged_lib.df_dict[metric][season][region] + data_dict.loc["CMIP MMM"] = cmip_lib.df_dict[metric][season][ + region + ].mean(numeric_only=True, skipna=True) + data_dict.at["CMIP MMM", "model"] = "CMIP MMM" + if parameter.parcord_show_markers is not None: + identify_all_models = parameter.parcord_show_markers + else: + identify_all_models = True + paracord_plot( + metric, + region, + season, + data_dict, + var_list, + var_unit_list, + cmip_models, + test_models, + highlight_models, + file_template, + figure_template, + parall_fig_dir, + identify_all_models, + data_version=None, + watermark=False, + ) + del data_dict + + ################################### + # generate portrait plot + ################################### + ptrait_fig_dir = os.path.join(outdir, "portrait_4seasons") + if os.path.exists(ptrait_fig_dir): + shutil.rmtree(ptrait_fig_dir) + os.makedirs(ptrait_fig_dir) + print("Portrait Plots (4 seasons),loop each region and metric....") + ######################################################################### + seasons = ["djf", "mam", "jja", "son"] + data_dict = merged_lib.df_dict + for metric in ["rms_xy", "cor_xy", "bias_xy"]: + for region in regions: + print("working on {} in {} region".format(metrics_inquire(metric), region)) + if parameter.add_vertical_line is not None: + add_vertical_line = parameter.add_vertical_line + else: + add_vertical_line = False + port4sea_plot( + metric, + region, + seasons, + data_dict, + var_list, + var_unit_list, + cmip_models, + test_models, + highlight_models, + file_template, + figure_template, + ptrait_fig_dir, + add_vertical_line, + data_version=None, + watermark=False, + ) + + # release the data space + del (merged_lib, cmip_lib, test_lib, var_unit_list, var_list, regions) + + return diff --git a/zppy/templates/pcmdi_diags/mean_climate_plot_parser.py b/zppy/templates/pcmdi_diags/mean_climate_plot_parser.py new file mode 100755 index 00000000..7c66d895 --- /dev/null +++ b/zppy/templates/pcmdi_diags/mean_climate_plot_parser.py @@ -0,0 +1,373 @@ +#!/usr/bin/env python +import ast +import glob +import os + +import numpy as np +import pandas as pd +from pcmdi_metrics.mean_climate.lib import pmp_parser + + +def create_mean_climate_plot_parser(): + parser = pmp_parser.PMPMetricsParser() + parser.add_argument( + "--test_model", + dest="test_model", + help="Defines target model for the metrics plots", + required=False, + ) + + parser.add_argument( + "--test_data_set", + type=str, + nargs="+", + dest="test_data_set", + help="List of observations or models to test " + + "against the reference_data_set", + required=False, + ) + + parser.add_argument( + "--test_data_path", + dest="test_data_path", + help="Path for the test climitologies", + required=False, + ) + + parser.add_argument( + "--period", dest="period", help="A simulation parameter", required=False + ) + + parser.add_argument( + "--run_type", dest="run_type", help="A post-process parameter", required=False + ) + + parser.add_argument( + "--regions", + type=ast.literal_eval, + dest="regions", + help="Regions on which to run the metrics", + required=False, + ) + + parser.add_argument( + "--pcmdi_data_set", + type=str, + nargs="+", + dest="pcmdi_data_set", + help="PCMDI CMIP dataset that is used as a " + + "CMIP multi-model ensembles against the test_data_set", + required=False, + ) + + parser.add_argument( + "--pcmdi_data_path", + dest="pcmdi_data_path", + help="Path for the PCMDI CMIP mean climate metrics data", + required=False, + ) + + parser.add_argument( + "--refr_model", + dest="refr_model", + help="A simulation parameter", + required=False, + ) + + parser.add_argument( + "--refr_data_set", + type=str, + nargs="+", + dest="refr_data_set", + help="List of reference models to test " + "against the reference_data_set", + required=False, + ) + + parser.add_argument( + "--refr_data_path", + dest="refr_data_path", + help="Path for the reference model climitologies", + required=False, + ) + + parser.add_argument( + "--output_path", + dest="output_path", + help="Path for the metrics plots", + required=False, + ) + + parser.add_argument( + "--parcord_show_markers", + dest="parcord_show_markers", + help="show markers for individual model in parallel coordinate plots", + required=False, + ) + parser.add_argument( + "--add_vertical_line", + dest="add_vertical_line", + help="draw a vertical line to separate test and reference models for portrait plots", + required=False, + ) + return parser + + +def metrics_inquire(name): + # list of metrics name and long-name + metrics = { + "std-obs_xy": "Spatial Standard Deviation (Reference)", + "std_xy": "Spatial Standard Deviation (Model)", + "std-obs_xyt": "Spatial-temporal Standard Deviation (Reference)", + "std_xyt": "Spatial-temporal Standard Deviation (Model)", + "std-obs_xy_devzm": "Standard Deviation of Deviation from Zonal Mean (Reference)", + "mean_xy": "Area Weighted Spatial Mean (Model)", + "mean-obs_xy": "Area Weighted Spatial Mean (Reference)", + "std_xy_devzm": "Standard Deviation of Deviation from Zonal Mean (Model)", + "rms_xyt": "Spatio-Temporal Root Mean Square Error", + "rms_xy": "Spatial Root Mean Square Error", + "rmsc_xy": "Centered Spatial Root Mean Square Error", + "cor_xy": "Spatial Pattern Correlation Coefficient", + "bias_xy": "Mean Bias (Model - Reference)", + "mae_xy": "Mean Absolute Difference (Model - Reference)", + "rms_y": "Root Mean Square Error of Zonal Mean", + "rms_devzm": "Root Mean Square Error of Deviation From Zonal Mean", + } + if name in metrics.keys(): + long_name = metrics[name] + + return long_name + + +def find_latest(pmprdir, mip, exp): + versions = sorted( + [ + r.split("/")[-1] + for r in glob.glob(os.path.join(pmprdir, mip, exp, "v????????")) + ] + ) + latest_version = versions[-1] + return latest_version + + +def shift_row_to_bottom(df, index_to_shift): + idx = [i for i in df.index if i != index_to_shift] + return df.loc[idx + [index_to_shift]] + + +def find_cmip_metric_data(pmprdir, data_set, var): + # cmip data for comparison + mip = data_set.split(".")[0] + exp = data_set.split(".")[1] + case_id = data_set.split(".")[2] + if case_id == "": + case_id = find_latest(pmprdir, mip, exp) + fpath = glob.glob(os.path.join(pmprdir, mip, exp, case_id, "{}.*.json".format(var))) + if len(fpath) < 1 and var == "rtmt": + fpath = glob.glob( + os.path.join(pmprdir, mip, exp, case_id, "{}.*.json".format("rt")) + ) + if len(fpath) > 0 and os.path.exists(fpath[0]): + cmip_list = fpath[0] + return_code = 0 + else: + print("Warning: cmip metrics data not found for {}....".format(var)) + print("Warning: remove {} from the metric list....".format(var)) + cmip_list = None + return_code = -99 + return cmip_list, return_code + + +def select_models(df, selected_models): + # Selected models only + model_names = df["model"].tolist() + for model_name in model_names: + drop_model = True + for keyword in selected_models: + if keyword in model_name: + drop_model = False + break + if drop_model: + df.drop(df.loc[df["model"] == model_name].index, inplace=True) + df.reset_index(drop=True, inplace=True) + + return df + + +def exclude_models(df, excluded_models): + # eclude models + model_names = df["model"].tolist() + for model_name in model_names: + drop_model = False + for keyword in excluded_models: + if keyword in model_name: + drop_model = True + break + if drop_model: + df.drop(df.loc[df["model"] == model_name].index, inplace=True) + df.reset_index(drop=True, inplace=True) + return df + + +def fill_plot_var_and_units(model_lib, cmip_lib): + # we define fixed sets of variables used for final plotting. + units_all = { + "prw": "[kg m$^{-2}$]", + "pr": "[mm d$^{-1}$]", + "prsn": "[mm d$^{-1}$]", + "prc": "[mm d$^{-1}$]", + "hfls": "[W m$^{-2}$]", + "hfss": "[W m$^{-2}$]", + "clivi": "[kg $m^{-2}$]", + "clwvi": "[kg $m^{-2}$]", + "psl": "[Pa]", + "evspsbl": "[kg m$^{-2} s^{-1}$]", + "rlds": "[W m$^{-2}$]", + "rldscs": "[W $m^{-2}$]", + "rtmt": "[W m$^{-2}$]", + "rsdt": "[W m$^{-2}$]", + "rlus": "[W m$^{-2}$]", + "rluscs": "[W m$^{-2}$]", + "rlut": "[W m$^{-2}$]", + "rlutcs": "[W m$^{-2}$]", + "rsds": "[W m$^{-2}$]", + "rsdscs": "[W m$^{-2}$]", + "rstcre": "[W m$^{-2}$]", + "rltcre": "[W m$^{-2}$]", + "rsus": "[W m$^{-2}$]", + "rsuscs": "[W m$^{-2}$]", + "rsut": "[W m$^{-2}$]", + "rsutcs": "[W m$^{-2}$]", + "ts": "[K]", + "tas": "[K]", + "tauu": "[Pa]", + "tauv": "[Pa]", + "sfcWind": "[m s$^{-1}$]", + "zg-500": "[m]", + "ta-200": "[K]", + "ta-850": "[K]", + "ua-200": "[m s$^{-1}$]", + "ua-850": "[m s$^{-1}$]", + "va-200": "[m s$^{-1}$]", + "va-850": "[m s$^{-1}$]", + "uas": "[m s$^{-1}$]", + "vas": "[m s$^{-1}$]", + "tasmin": "[K]", + "tasmax": "[K]", + "clt": "[%]", + } + + # loop variable list and find them in cmip and target models + variable_units = [] + variable_names = [] + for var in units_all.keys(): + # reorgnize cmip data + if var == "rtmt": + if ("rt" in cmip_lib.var_list) and ("rtmt" in model_lib.var_list): + # special case (rt is used in pcmdi datasets, but rtmt is for cmip) + cmip_lib.var_list = list( + map(lambda x: x.replace("rt", "rtmt"), cmip_lib.var_list) + ) + for stat in cmip_lib.df_dict: + for season in cmip_lib.df_dict[stat]: + for region in cmip_lib.df_dict[stat][season]: + cmip_lib.df_dict[stat][season][region][ + "rtmt" + ] = cmip_lib.df_dict[stat][season][region].pop("rt") + + if var in model_lib.var_list and var in cmip_lib.var_list: + varunt = var + "\n" + str(units_all[var]) + indv1 = cmip_lib.var_list.index(var) + indv2 = model_lib.var_list.index(var) + cmip_lib.var_unit_list[indv1] = varunt + model_lib.var_unit_list[indv2] = varunt + variable_units.append(varunt) + variable_names.append(var) + del (indv1, indv2, varunt) + else: + print("Warning: {} is not found in metrics data".format(var)) + print( + "Warning: {} is possibly not included as default in fill_plot_var_and_units()".format( + var + ) + ) + + # sanity check for cmip data + for stat in cmip_lib.df_dict: + for season in cmip_lib.df_dict[stat]: + for region in cmip_lib.df_dict[stat][season]: + df = pd.DataFrame(cmip_lib.df_dict[stat][season][region]) + for i, model in enumerate(df["model"].tolist()): + if model in ["E3SM-1-0", "E3SM-1-1-ECA"]: + idxs = df[df.iloc[:, 0] == model].index + df.loc[idxs, "ta-850"] = np.nan + del idxs + if model in ["CIESM"]: + idxs = df[df.iloc[:, 0] == model].index + df.loc[idxs, "pr"] = np.nan + del idxs + cmip_lib.df_dict[stat][season][region] = df + del df + + return model_lib, cmip_lib, variable_names, variable_units + + +def find_metrics_data(parameter): + pmp_set = parameter.pcmdi_data_set + pmp_path = parameter.pcmdi_data_path + test_set = parameter.test_data_set + test_path = parameter.test_data_path + refr_set = parameter.refr_data_set + refr_path = parameter.refr_data_path + run_type = parameter.run_type + debug = parameter.debug + + test_mip = test_set.split(".")[0] + test_exp = test_set.split(".")[1] + test_case_id = test_set.split(".")[-1] + test_dir = os.path.join(test_path, test_mip, test_exp, test_case_id) + if run_type == "model_vs_model": + refr_mip = refr_set.split(".")[0] + refr_exp = refr_set.split(".")[1] + refr_case_id = refr_set.split(".")[-1] + refr_dir = os.path.join(refr_path, refr_mip, refr_exp, refr_case_id) + + variables = [ + s.split("/")[-1].split("_")[0] + for s in glob.glob(os.path.join(test_dir, "*{}.json".format(test_case_id))) + if os.path.exists(s) + ] + variables = list(set(variables)) + + # find list of metrics data files + test_list = [] + refr_list = [] + cmip_list = [] + + for vv in variables: + ftest = glob.glob( + os.path.join(test_dir, "{}_*_{}.json".format(vv, test_case_id)) + ) + fcmip, rcode = find_cmip_metric_data(pmp_path, pmp_set, vv) + if rcode == 0: + if len(ftest) > 0 and len(fcmip) > 0: + for fx in ftest: + test_list.append(fx) + cmip_list.append(fcmip) + if debug: + print(ftest[0].split("/")[-1], fcmip.split("/")[-1]) + if run_type == "model_vs_model": + frefr = glob.glob( + os.path.join(refr_dir, "{}_*_{}.json".format(vv, refr_case_id)) + ) + if len(frefr) > 0: + for fr in frefr: + refr_list.append(fr) + if debug: + print( + ftest[0].split("/")[-1], + frefr[0].split("/")[-1], + fcmip.split("/")[-1], + ) + del frefr + del (ftest, fcmip) + return test_list, refr_list, cmip_list diff --git a/zppy/templates/pcmdi_diags/observation_to_cmip.py b/zppy/templates/pcmdi_diags/observation_to_cmip.py new file mode 100755 index 00000000..7b6eccee --- /dev/null +++ b/zppy/templates/pcmdi_diags/observation_to_cmip.py @@ -0,0 +1,85 @@ +#! /usr/bin/env python +import glob +import json +import os +import shutil +import subprocess + +# command = shlex.split("bash -c 'source init_env && env'") +# proc = subprocess.Popen(command, stdout = subprocess.PIPE) + +srcdir = "/lcrc/group/e3sm/ac.szhang/acme_scratch/e3sm_project/test_zppy_pmp/zppy" +cmip_var = json.load( + open(os.path.join(srcdir, "zppy/templates/pcmdi_diags", "cmip_var.json")) +) +ref_dic = json.load( + open(os.path.join(srcdir, "zppy/templates/pcmdi_diags", "reference_data.json")) +) + +output_path = ( + "/lcrc/soft/climate/e3sm_diags_data/obs_for_e3sm_diags/time-series/NOAA_20C" +) + + +default_metadata = os.path.join( + srcdir, "zppy/templates/pcmdi_diags/default_metadata.json" +) +tables_path = "/lcrc/group/e3sm/diagnostics/cmip6-cmor-tables/Tables" + +input_path = os.path.join(output_path, "input_data") +if not os.path.exists(input_path): + os.makedirs(input_path) + +raw_data_path = "/lcrc/group/acme/ac.szhang/acme_scratch/data/CVDP_RGD/NOAA_20C" +fpaths = sorted(glob.glob(os.path.join(raw_data_path, "{}*.nc".format("NOAA_20C")))) +for fpath in fpaths: + fname = fpath.split("/")[-1] + fname = fname.replace("-", ".") + fout = "_".join(fname.split(".")[2:]) + fout = os.path.join(input_path, fout.replace("_nc", ".nc")) + print("input: ", fpath) + print("output: ", fout) + if os.path.islink(fout): + os.remove(fout) + os.symlink(fpath, fout) + else: + os.symlink(fpath, fout) + del (fname, fout) +del (fpaths, raw_data_path) + +for key in cmip_var.keys(): + cmip_var_list = ", ".join(cmip_var[key]) + print(cmip_var_list) + subprocess.call( + [ + "e3sm_to_cmip", + "--output-path", + output_path, + "--var-list", + cmip_var_list, + "--input-path", + input_path, + "--user-metadata", + default_metadata, + "--tables-path", + tables_path, + ] + ) + +# move data to target location +opaths = sorted(glob.glob(os.path.join(output_path, "CMIP6/CMIP/*/*/*/*/*/*/*/*/*.nc"))) +for opath in opaths: + outfile = opath.split("/")[-1] + outname = outfile.replace("-", "_").split("_") + fout = "_".join([outname[0], outname[-2], outname[-1]]) + fout = os.path.join(output_path, fout.replace("_nc", ".nc")) + if os.path.exists(opath): + os.rename(opath, fout) + del (outfile, outname, fout) + +# clean up directory +if os.path.exists(os.path.join(output_path, "CMIP6")): + shutil.rmtree(os.path.join(output_path, "CMIP6")) + +if os.path.exists(input_path): + shutil.rmtree(input_path) diff --git a/zppy/templates/pcmdi_diags/plot_mean_climate.py b/zppy/templates/pcmdi_diags/plot_mean_climate.py new file mode 100755 index 00000000..e4abd119 --- /dev/null +++ b/zppy/templates/pcmdi_diags/plot_mean_climate.py @@ -0,0 +1,84 @@ +#!/bin/env python +############################################################################## +# This model is used to generate mean climate diagnostic figures +# Author: Shixuan Zhang (shixuan.zhang@pnnl.gov) +############################################################################# +import os + +from mean_climate_plot_driver import mean_climate_metrics_plot +from mean_climate_plot_parser import create_mean_climate_plot_parser + + +def main( + run_type, + test_data_set, + test_data_dir, + test_period, + refr_data_set, + refr_data_dir, + refr_period, + cmip_data_set, + pcmdi_data_dir, + results_dir, +): + parser = create_mean_climate_plot_parser() + parameter = parser.get_parameter(argparse_vals_only=False) + + parameter.pcmdi_data_set = cmip_data_set + parameter.pcmdi_data_path = pcmdi_data_dir + + parameter.period = test_period + parameter.test_product = test_data_set.split(".")[2] + parameter.test_data_set = test_data_set + parameter.test_data_path = os.path.join(test_data_dir, "mean_climate") + parameter.run_type = run_type + + if parameter.run_type == "model_vs_model": + parameter.refr_data_set = refr_data_set + parameter.refr_period = refr_period + parameter.refr_data_path = os.path.join(refr_data_dir, "mean_climate") + + parameter.output_path = os.path.join(results_dir, "graphics", "mean_climate") + parameter.ftype = "png" + parameter.debug = False + parameter.regions = ["global", "NHEX", "SHEX", "TROPICS"] + parameter.parcord_show_markers = False + parameter.add_vertical_line = True + + mean_climate_metrics_plot(parameter) + + +if __name__ == "__main__": + cmip_data_set = "cmip6.amip.v20241029" + pcmdi_data_dir = ( + "/lcrc/soft/climate/e3sm_diags_data/obs_for_e3sm_diags/pcmdi_data/mean_climate" + ) + results_dir = "/lcrc/group/e3sm/public_html/diagnostic_output/ac.szhang/e3sm-pcmdi/merged_data/model_vs_obs_1985-2014" + run_type = "model_vs_obs" + + test_data_set = "e3sm.amip.v3-LR.all.v20241030" + test_data_dir = "/lcrc/group/e3sm/public_html/diagnostic_output/ac.szhang/e3sm-pcmdi/merged_data/model_vs_obs_1985-2014" + test_period = "1985-2014" + + if run_type == "model_vs_obs": + refr_data_set = "" + refr_data_dir = "" + refr_period = "" + else: + print("need to provide reference data information ...") + refr_data_set = "e3sm.historical.v3-LR.all.v20241030" + refr_data_dir = "/lcrc/group/e3sm/public_html/diagnostic_output/ac.szhang/e3sm-pcmdi/merged_data/model_vs_obs_1985-2014" + refr_period = "1985-2014" + + main( + run_type, + test_data_set, + test_data_dir, + test_period, + refr_data_set, + refr_data_dir, + refr_period, + cmip_data_set, + pcmdi_data_dir, + results_dir, + ) diff --git a/zppy/templates/pcmdi_diags/post_merge_clim_jsons.py b/zppy/templates/pcmdi_diags/post_merge_clim_jsons.py new file mode 100755 index 00000000..8e265a14 --- /dev/null +++ b/zppy/templates/pcmdi_diags/post_merge_clim_jsons.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python +import copy +import glob +import json +import os + +from pcmdi_metrics.utils import StringConstructor +from pcmdi_metrics.variability_mode.lib import dict_merge + + +def main(): + mip = "e3sm" + exp = "amip" + case_id = "v20241030" + period = "1985-2014" + metric_collection = "mean_climate" + run_type = "model_vs_obs" + data_path = "/lcrc/group/e3sm/public_html/diagnostic_output/ac.szhang/e3sm-pcmdi" + obs_selection = "default" + + # target here is to merge all product models at all realizations to one-big file + # product = ['v3.LR'] + # realm = ["0101", "0151", "0201"] + + # template for diagnostic directory tree + # construct the directory for specific mpi, exp and case + pmprdir_template = StringConstructor( + "%(product).%(exp)_%(realization)/pcmdi_diags/%(run_type)_%(period)" + ) + pmprdir = os.path.join( + data_path, + pmprdir_template( + mip=mip, + exp=exp, + case_id=case_id, + product="*", + realization="*", + run_type=run_type, + period=period, + ), + ) + print("pmprdir:", pmprdir) + + # template for metrics directory tree + json_file_dir_template = StringConstructor( + "metrics_results/%(metric_collection)/%(mip)/%(exp)/%(case_id)" + ) + json_file_dir = os.path.join( + pmprdir, + json_file_dir_template( + metric_collection=metric_collection, + mip=mip, + exp=exp, + case_id=case_id, + ), + ) + print("json_file_dir:", json_file_dir) + + # template for output directory tree + out_file_dir_template = StringConstructor( + "%(run_type)_%(period)/%(metric_collection)/%(mip)/%(exp)/%(case_id)" + ) + out_file_dir = os.path.join( + data_path, + "merged_data", + out_file_dir_template( + metric_collection=metric_collection, + mip=mip, + exp=exp, + case_id=case_id, + run_type=run_type, + period=period, + ), + ) + print("out_file_dir:", out_file_dir) + variables = [ + s.split("/")[-1] + for s in glob.glob( + os.path.join( + json_file_dir, + "*", + ) + ) + if os.path.isdir(s) + ] + variables = list(set(variables)) + print("variables:", variables) + + for var in variables: + # json merge + # try: + if 1: + merge_json( + mip, exp, case_id, var, obs_selection, json_file_dir, out_file_dir + ) + """ + except Exception as err: + print("ERROR: ", mip, exp, var, err) + pass + """ + + +def merge_json(mip, exp, case_id, var, obs, json_file_dir, out_file_dir): + print("json_file_dir:", json_file_dir) + json_file_template = StringConstructor( + "%(var)_%(model)_%(realization)_*_%(obs)_%(case_id).json" + ) + # Search for individual JSONs + json_files = sorted( + glob.glob( + os.path.join( + json_file_dir, + var, + json_file_template( + var=var, + model="*", + realization="*", + obs=obs, + case_id=case_id, + ), + ) + ) + ) + + print("json_files:", json_files) + + # Remove diveDown JSONs and previously generated merged JSONs if included + json_files_revised = copy.copy(json_files) + for j, json_file in enumerate(json_files): + filename_component = json_file.split("/")[-1].split(".")[0].split("_") + if "allModels" in filename_component: + json_files_revised.remove(json_file) + elif "allRuns" in filename_component: + json_files_revised.remove(json_file) + + # Load individual JSON and merge to one big dictionary + for j, json_file in enumerate(json_files_revised): + print(j, json_file) + f = open(json_file) + dict_tmp = json.loads(f.read()) + if j == 0: + dict_final = dict_tmp.copy() + else: + dict_merge(dict_final, dict_tmp) + f.close() + + # Dump final dictionary to JSON + if not os.path.exists(out_file_dir): + os.makedirs(out_file_dir) + + final_json_filename = StringConstructor("%(var)_%(mip)_%(exp)_%(case_id).json")( + var=var, mip=mip, exp=exp, case_id=case_id + ) + final_json_file = os.path.join(out_file_dir, final_json_filename) + if os.path.exists(final_json_file): + # previously generated merged JSONs if included + os.remove(final_json_file) + + with open(final_json_file, "w") as fp: + json.dump(dict_final, fp, sort_keys=True, indent=4) + + +if __name__ == "__main__": + main() diff --git a/zppy/templates/pcmdi_diags/reference_alias.json b/zppy/templates/pcmdi_diags/reference_alias.json new file mode 100755 index 00000000..a23f6349 --- /dev/null +++ b/zppy/templates/pcmdi_diags/reference_alias.json @@ -0,0 +1,340 @@ +{ + "rlds" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rldscs" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rlus" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rsds" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rsdscs" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + + "rsus" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rsuscs": { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rstcre" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rltcre" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rlut" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rlutcs" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rsdt" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rsut" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rsutcs" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "rtmt" : { + "default" : "ceres_ebaf_v4.1", + "alternate" : "ceres_ebaf_v4.0", + "alternate1" : "ceres_ebaf_v2.8", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "pr" : { + "default" : "GPCP_v2.3", + "alternate" : "GPCP_v2.2", + "alternate1" : "GPCP_1DD", + "alternate2" : "ERA5", + "alternate3" : "MERRA2", + "alternate4" : "ERA-Interim", + "alternate5" : "NOAA-20C" + }, + "prc" : { + "default" : "ERA5", + "alternate" : "NOAA-20C" + }, + "prsn" : { + "default" : "ERA5", + "alternate" : "NOAA-20C" + }, + "prw" : { + "default" : "ERA5", + "alternate" : "NOAA-20C", + "alternate1" : "MERRA2", + "alternate2" : "ERA-Interim", + "alternate3" : "NOAA-20C" + }, + "psl" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "ps" : { + "default" : "ERA5", + "alternate " : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "huss" : { + "default" : "MERRA2", + "alternate" : "NOAA-20C", + "alternate1" : "ERA5", + "alternate2" : "ERA-Interim" + }, + "ta" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "ua" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "va" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "hur" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "wap" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "zg" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "o3" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "hus" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "uas" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "vas" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "tauu" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C", + "alternate3" : "COREv2-Flux" + }, + "taux" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C", + "alternate3" : "COREv2-Flux" + }, + "tauv" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C", + "alternate3" : "COREv2-Flux" + }, + "tauy" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C", + "alternate3" : "COREv2-Flux" + }, + "tas" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C" + }, + "ts" : { + "default" : "ERA5", + "alternate" : "NOAA-20C", + "alternate1" : "HadISST2" + }, + "sst" : { + "default" : "ERA5", + "alternate" : "NOAA-20C", + "alternate1" : "HadISST2" + }, + "sfcWind" : { + "default" : "NOAA-20C", + "alternate" : "ERA5", + "alternate1" : "MERRA2", + "alternate2" : "ERA-Interim" + }, + "hfls" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C", + "alternate3" : "OAFlux" + }, + "hfss" : { + "default" : "ERA5", + "alternate" : "MERRA2", + "alternate1" : "ERA-Interim", + "alternate2" : "NOAA-20C", + "alternate3" : "OAFlux" + }, + "evspsbl" : { + "default" : "ERA5", + "alternate" : "NOAA-20C" + }, + "clt" : { + "default" : "ERA5", + "alternate3" : "NOAA-20C" + }, + "clwvi" : { + "default" : "ERA5", + "alternate" : "NOAA-20C" + }, + "clivi" : { + "default" : "ERA5", + "alternate" : "NOAA-20C" + }, + "tasmin" : { + "default" : "MERRA2" + }, + "tasmax" : { + "default" : "MERRA2" + }, + "sic" : { + "default" : "HadSST2" + }, + "tos" : { + "default" : "HadSST2" + }, + "zos" : { + "default" : "AVISO", + "alternate" : "HadISST" + }, + "sos" : { + "default" : "Aquarius", + "alternate" : "HadISST" + } +} diff --git a/zppy/templates/pcmdi_diags/regions_specs.json b/zppy/templates/pcmdi_diags/regions_specs.json new file mode 100755 index 00000000..811eb1e9 --- /dev/null +++ b/zppy/templates/pcmdi_diags/regions_specs.json @@ -0,0 +1,263 @@ +{ + "global": { + "domain": { "latitude":[-90.0, 90.0]} + }, + "NH": { + "domain": { "latitude":[0.0, 90.0]} + }, + "SH": { + "domain": { "latitude":[-90.0, 0]} + }, + "NHEX": { + "domain": { "latitude":[30.0, 90.0]} + }, + "SHEX": { + "domain": { "latitude":[-90.0, -30.0]} + }, + "TROPICS": { + "domain": { "latitude":[-30.0, 30.0]} + }, + "90S50S": { + "domain": { "latitude":[-90.0, -50.0]} + }, + "50S20S": { + "domain": { "latitude":[-50.0, -20.0]} + }, + "20S20N": { + "domain": { "latitude":[-20.0, 20.0]} + }, + "20N50N": { + "domain": { "latitude":[20.0, 50.0]} + }, + "50N90N": { + "domain": { "latitude":[50.0, 90.0]} + }, + "ocean_NH": { + "value": 0.0, + "domain": { "latitude":[0.0, 90.0]} + }, + "ocean_SH": { + "value": 0.0, + "domain": { "latitude":[-90.0, 0.0]} + }, + "land_NH": { + "value": 100, + "domain": { "latitude":[0.0, 90.0]} + }, + "land_SH": { + "value": 100, + "domain": { "latitude":[-90.0, 0.0]} + }, + "land_NHEX": { + "value": 100, + "domain": { "latitude":[30.0, 90.0]} + }, + "land_SHEX": { + "value": 100, + "domain": { "latitude":[-90.0, -30.0]} + }, + "land_TROPICS": { + "value": 100, + "domain": { "latitude":[-30.0, 30.0]} + }, + "land": { + "value": 100 + }, + "ocean_NHEX": { + "value": 0, + "domain": { "latitude":[30.0, 90.0]} + }, + "ocean_SHEX": { + "value": 0, + "domain": { "latitude":[-90.0, -30.0]} + }, + "ocean_TROPICS": { + "value": 0, + "domain": { "latitude":[30.0, 30.0]} + }, + "ocean": { + "value": 0 + }, + "ocean_50S50N": { + "value": 0.0, + "domain": { "latitude":[-50.0, 50.0]} + }, + "ocean_50S20S": { + "value": 0.0, + "domain": { "latitude":[-50.0, -20.0]} + }, + "ocean_20S20N": { + "value": 0.0, + "domain": { "latitude":[-20.0, 20.0]} + }, + "ocean_20N50N": { + "value": 0.0, + "domain": { "latitude":[20.0, 50.0]} + }, + "ocean_50N90N": { + "value": 0.0, + "domain": { "latitude":[50.0, 90.0]} + }, + "ocean_90S50S": { + "value": 0.0, + "domain": { "latitude":[-90.0, -50.0]} + }, + "NAM": { + "domain": { "latitude":[20.0, 90], + "longitude":[-180, 180]} + }, + "NAO": { + "domain": { "latitude":[20.0, 80], + "longitude":[-90, 40]} + }, + "SAM": { + "domain": { "latitude":[-20.0, -90], + "longitude":[0, 360]} + }, + "PSA1": { + "domain": { "latitude":[-20.0, -90], + "longitude":[0, 360]} + }, + "PSA2": { + "domain": { "latitude":[-20.0, -90], + "longitude":[0, 360]} + }, + "PNA": { + "domain": { "latitude":[20.0, 85], + "longitude":[120, 240]} + }, + "PDO": { + "domain": { "latitude":[20.0, 70], + "longitude":[110, 260]} + }, + "AMO": { + "domain": { "latitude":[0.0, 70], + "longitude":[-80, 0]} + }, + "AllMW": { + "domain": { "latitude":[-40.0, 45.0], + "longitude":[0.0, 360.0]} + }, + "AllM": { + "domain": { "latitude":[-45.0, 45.0], + "longitude":[0.0, 360.0]} + }, + "NAMM": { + "domain": { "latitude":[0.0, 45.0], + "longitude":[210.0, 310.0]} + }, + "SAMM": { + "domain": { "latitude":[-45.0, 0.0], + "longitude":[240.0, 330.0]} + }, + "NAFM": { + "domain": { "latitude":[0.0, 45.0], + "longitude":[310.0, 60.0]} + }, + "SAFM": { + "domain": { "latitude":[-45.0, 0.0], + "longitude":[0.0, 90.0]} + }, + "ASM": { + "domain": { "latitude":[0.0, 45.0], + "longitude":[60.0, 180.0]} + }, + "AUSM": { + "domain": { "latitude":[-45.0, 0.0], + "longitude":[90.0, 160.0]} + }, + "AIR": { + "domain": { "latitude":[7.0, 25.0], + "longitude":[65.0, 85.0]} + }, + "AUS": { + "domain": { "latitude":[-20.0, -10.0], + "longitude":[120.0, 150.0]} + }, + "Sahel": { + "domain": { "latitude":[13.0, 18.0], + "longitude":[-10.0, 10.0]} + }, + "GoG": { + "domain": { "latitude":[0.0, 5.0], + "longitude":[-10.0, 10.0]} + }, + "NAmo": { + "domain": { "latitude":[20.0, 37.0], + "longitude":[-112.0, -103.0]} + }, + "SAmo": { + "domain": { "latitude":[-20.0, 2.5], + "longitude":[-65.0, -40.0]} + }, + "Nino34": { + "value": 0.0, + "domain": { "latitude":[-5.0, 5.0], + "longitude":[190.0, 240.0]} + }, + "Nino3": { + "value": 0.0, + "domain": { "latitude":[-5.0, 5.0], + "longitude":[210.0, 270.0]} + }, + "Nino4": { + "value": 0.0, + "domain": { "latitude":[-5.0, 5.0], + "longitude":[160.0, 210.0]} + }, + "ONI": { + "value": 0.0, + "domain": { "latitude":[-5.0, 5.0], + "longitude":[190.0, 240.0]} + }, + "Nino12": { + "value": 0.0, + "domain": { "latitude":[-10.0, 0.0], + "longitude":[270.0, 280.0]} + }, + "AMMS": { + "value": 0.0, + "domain": { "latitude":[-15.0, -5.0], + "longitude":[-20.0, 10.0]} + }, + "AMMN": { + "value": 0.0, + "domain": { "latitude":[5.0, 15.0], + "longitude":[-50.0, -20.0]} + }, + "ATL3": { + "value": 0.0, + "domain": { "latitude":[-3.0, 3.0], + "longitude":[-20.0, 0.0]} + }, + "TSA": { + "value": 0.0, + "domain": { "latitude":[-20.0, 0.0], + "longitude":[-30.0, 10.0]} + }, + "TNA": { + "value": 0.0, + "domain": { "latitude":[5.5, 23.5], + "longitude":[302.5, 345.0]} + }, + "TIO": { + "value": 0.0, + "domain": { "latitude":[-15.0, 15.0], + "longitude":[40.0, 115.0]} + }, + "IODE": { + "value": 0.0, + "domain": { "latitude":[-10.0, 10.0], + "longitude":[50.0, 70.0]} + }, + "IODW": { + "value": 0.0, + "domain": { "latitude":[-10.0, 0.0], + "longitude":[90.0, 110.0]} + }, + "SOCN": { + "value": 0.0, + "domain": { "latitude":[-70.0, -50.0], + "longitude":[0.0, 360.0]} + } +} diff --git a/zppy/templates/ts.bash b/zppy/templates/ts.bash old mode 100644 new mode 100755 index 7141e60a..936cb507 --- a/zppy/templates/ts.bash +++ b/zppy/templates/ts.bash @@ -147,19 +147,38 @@ EOF cp -s $dest/*_{{ '%04d' % (yr_start) }}??_{{ '%04d' % (yr_end) }}??.nc $input_dir dest_cmip={{ output }}/post/{{ component }}/{{ grid }}/cmip_ts/{{ frequency }} mkdir -p ${dest_cmip} + {{ e3sm_to_cmip_environment_commands }} + + {% if input_files.split(".")[0] == 'cam' or input_files.split(".")[0] == 'eam' -%} + #add code to do vertical interpolation variables at model levels before e3sm_to_cmip + IFS=',' read -ra mlvars <<< "{{ interp_vars }}" + for var in "${mlvars[@]}" + do + for file in ${input_dir}/${var}_{{ '%04d' % (yr_start) }}??_{{ '%04d' % (yr_end) }}??.nc + do + if [ -f ${file} ]; then + ncks --rgr xtr_mth=mss_val --vrt_fl='{{cmip_plevdata}}' ${file} ${file}.plev + #overwrite the model level data + mv ${file}.plev ${file} + fi + done + done + {% endif -%} + + #call e3sm_to_cmip srun -N 1 e3sm_to_cmip \ --output-path \ ${dest_cmip}/${tmp_dir} \ {% if input_files.split(".")[0] == 'clm2' or input_files.split(".")[0] == 'elm' -%} --var-list \ - 'mrsos, mrso, mrfso, mrros, mrro, prveg, evspsblveg, evspsblsoi, tran, tsl, lai, cLitter, cProduct, cSoilFast, cSoilMedium, cSoilSlow, fFire, fHarvest, cVeg, nbp, gpp, ra, rh' \ + 'snd, mrsos, mrso, mrfso, mrros, mrro, prveg, evspsblveg, evspsblsoi, tran, tsl, lai, cLitter, cProduct, cSoilFast, cSoilMedium, cSoilSlow, fFire, fHarvest, cVeg, nbp, gpp, ra, rh' \ --realm \ lnd \ {% endif -%} {% if input_files.split(".")[0] == 'cam' or input_files.split(".")[0] == 'eam' -%} --var-list \ - 'pr, tas, rsds, rlds, rsus' \ + 'ua, va, ta, wa, zg, hur, pr, prc, prsn, ts, tas, prw, psl, sfcWind, tasmax, tasmin, tauu, tauv, rtmt, rsdt, rsds, rsdscs, rlds, rldscs, rsus, rsuscs, rsut, rsutcs, rlus, rlut, rlutcs, clivi, clwvi, clt, evspsbl, hfls, hfss, huss' \ --realm \ atm \ {% endif -%} diff --git a/zppy/ts.py b/zppy/ts.py index d07a6fa8..eab2a3d1 100644 --- a/zppy/ts.py +++ b/zppy/ts.py @@ -1,118 +1,81 @@ -import os -import pprint -import re +from typing import Any, Dict, List, Tuple -import jinja2 +from configobj import ConfigObj from zppy.bundle import handle_bundles from zppy.utils import ( - checkStatus, - getComponent, - getTasks, - getYears, - makeExecutable, - setMappingFile, - submitScript, + ParameterGuessType, + check_status, + define_or_guess, + get_file_names, + get_tasks, + get_years, + initialize_template, + make_executable, + set_component_and_prc_typ, + set_grid, + set_mapping_file, + submit_script, + write_settings_file, ) # ----------------------------------------------------------------------------- -def ts(config, scriptDir, existing_bundles, job_ids_file): +def ts(config: ConfigObj, script_dir: str, existing_bundles, job_ids_file): - # --- Initialize jinja2 template engine --- - templateLoader = jinja2.FileSystemLoader( - searchpath=config["default"]["templateDir"] - ) - templateEnv = jinja2.Environment(loader=templateLoader) - template = templateEnv.get_template("ts.bash") + template, _ = initialize_template(config, "ts.bash") # --- List of tasks --- - tasks = getTasks(config, "ts") + tasks: List[Dict[str, Any]] = get_tasks(config, "ts") if len(tasks) == 0: return existing_bundles # --- Generate and submit ts scripts --- for c in tasks: - - setMappingFile(c) - - # Grid name (if not explicitly defined) - # 'native' if no remapping - # or extracted from mapping filename - if c["grid"] == "": - if c["mapping_file"] == "": - c["grid"] = "native" - elif c["mapping_file"] == "glb": - c["grid"] = "glb" - else: - tmp = os.path.basename(c["mapping_file"]) - # FIXME: W605 invalid escape sequence '\.' - tmp = re.sub("\.[^.]*\.nc$", "", tmp) # noqa: W605 - tmp = tmp.split("_") - if tmp[0] == "map": - c["grid"] = "%s_%s" % (tmp[-2], tmp[-1]) - else: - raise ValueError( - "Cannot extract target grid name from mapping file %s" - % (c["mapping_file"]) - ) - - # Output component (for directory structure) and procedure type for ncclimo - c["component"], c["prc_typ"] = getComponent( - c["input_component"], c["input_files"] - ) - + set_mapping_file(c) + set_grid(c) + set_component_and_prc_typ(c) c["cmor_tables_prefix"] = c["diagnostics_base_path"] - + year_sets: List[Tuple[int, int]] = get_years(c["years"]) # Loop over year sets - year_sets = getYears(c["years"]) for s in year_sets: - c["yr_start"] = s[0] c["yr_end"] = s[1] if ("last_year" in c.keys()) and (c["yr_end"] > c["last_year"]): continue # Skip this year set c["ypf"] = s[1] - s[0] + 1 - c["scriptDir"] = scriptDir - if c["subsection"]: - sub = c["subsection"] - else: - sub = c["grid"] - prefix = "ts_%s_%04d-%04d-%04d" % ( - sub, - c["yr_start"], - c["yr_end"], - c["ypf"], + c["scriptDir"] = script_dir + sub: str = define_or_guess( + c, "subsection", "grid", ParameterGuessType.SECTION_GUESS ) + prefix = f"ts_{sub}_{c['yr_start']:04d}-{c['yr_end']:04d}-{c['ypf']:04d}" print(prefix) c["prefix"] = prefix - scriptFile = os.path.join(scriptDir, "%s.bash" % (prefix)) - statusFile = os.path.join(scriptDir, "%s.status" % (prefix)) - settingsFile = os.path.join(scriptDir, "%s.settings" % (prefix)) - skip = checkStatus(statusFile) + bash_file, settings_file, status_file = get_file_names(script_dir, prefix) + skip: bool = check_status(status_file) if skip: continue - # Create script - with open(scriptFile, "w") as f: + with open(bash_file, "w") as f: f.write(template.render(**c)) - makeExecutable(scriptFile) - - with open(settingsFile, "w") as sf: - p = pprint.PrettyPrinter(indent=2, stream=sf) - p.pprint(c) - p.pprint(s) - + make_executable(bash_file) + write_settings_file(settings_file, c, s) export = "ALL" existing_bundles = handle_bundles( - c, scriptFile, export, existing_bundles=existing_bundles + c, bash_file, export, existing_bundles=existing_bundles ) if not c["dry_run"]: if c["bundle"] == "": # Submit job - submitScript(scriptFile, statusFile, export, job_ids_file) + submit_script( + bash_file, + status_file, + export, + job_ids_file, + fail_on_dependency_skip=c["fail_on_dependency_skip"], + ) else: - print("...adding to bundle '%s'" % (c["bundle"])) + print(f"...adding to bundle {c['bundle']}") print(f" environment_commands={c['environment_commands']}") print( diff --git a/zppy/utils.py b/zppy/utils.py index 96c3a180..fe62014f 100644 --- a/zppy/utils.py +++ b/zppy/utils.py @@ -1,19 +1,98 @@ import os import os.path +import pprint +import re import shlex import stat import time +from enum import Enum from subprocess import PIPE, Popen -from typing import Any, Dict, List +from typing import Any, Dict, List, Set, Tuple + +import jinja2 +from configobj import ConfigObj + + +# Classes ##################################################################### +class ParameterGuessType(Enum): + PATH_GUESS = 1 + SECTION_GUESS = 2 + + +class ParameterNotProvidedError(RuntimeError): + pass + + +class DependencySkipError(RuntimeError): + pass + + +# Utitlities for this file #################################################### + + +def get_active_status(task: Dict[str, Any]) -> bool: + active: Any = task["active"] + if isinstance(active, bool): + return active + elif isinstance(active, str): + active_lower_case: str = active.lower() + if active_lower_case == "true": + return True + elif active_lower_case == "false": + return False + raise ValueError(f"Invalid value {active} for 'active'") + raise TypeError(f"Invalid type {type(active)} for 'active'") -# ----------------------------------------------------------------------------- -# Process specified section and its sub-sections to build list of tasks -# -# If the section includes sub-sections, one task will be created for each -# sub-section and no task will be created for the main section. +def get_guess_type_parameter(guess_type: ParameterGuessType) -> str: + guess_type_parameter: str + if guess_type == ParameterGuessType.PATH_GUESS: + guess_type_parameter = "guess_path_parameters" + elif guess_type == ParameterGuessType.SECTION_GUESS: + guess_type_parameter = "guess_section_parameters" + else: + raise ValueError(f"Invalid guess_type: {guess_type}") + return guess_type_parameter -def getTasks(config, section_name): + +def get_url_message(c: Dict[str, Any], task: str) -> str: + base_path = c["web_portal_base_path"] + base_url = c["web_portal_base_url"] + www = c["www"] + case = c["case"] + url_msg: str + if www.startswith(base_path): + # TODO: python 3.9 introduces `removeprefix` + # This will begin with a "/" + www_suffix = www[len(base_path) :] + url_msg = f"URL: {base_url}{www_suffix}/{case}/{task}" + else: + url_msg = f"Could not determine URL from www={www}" + return url_msg + + +# Beginning steps ############################################################# + + +# TODO: determine return type +def initialize_template(config: ConfigObj, template_name: str) -> Tuple[Any, Any]: + # --- Initialize jinja2 template engine --- + template_loader = jinja2.FileSystemLoader( + searchpath=config["default"]["templateDir"] + ) + template_env = jinja2.Environment(loader=template_loader) + template = template_env.get_template(template_name) + return template, template_env + + +# TODO: type aliases require python 3.12 or higher +# type TaskDict = Dict[str, Any] + + +# Process specified section and its sub-sections to build the list of tasks. +# If the section includes sub-sections, one task will be created for each +# sub-section and no task will be created for the main section. +def get_tasks(config: ConfigObj, section_name: str) -> List[Dict[str, Any]]: # mypy: resolves error: Need type annotation for "tasks" (hint: "tasks: List[] = ...") tasks: List[Dict[str, Any]] = [] @@ -21,17 +100,18 @@ def getTasks(config, section_name): # Sanity check # flake8: resolves E713 test for membership should be 'not in' if section_name not in config: - print('WARNING: Skipping section not found = "%s"' % (section_name)) + print(f'WARNING: Skipping section not found = "{section_name}"') return tasks # List of sub-sections - sub_section_names = config[section_name].sections + sub_section_names: List[str] = config[section_name].sections # Merge default with current section. Need to work with copies to avoid contamination - section_cfg = config["default"].copy() + section_cfg: Dict[str, Any] = config["default"].copy() section_cfg.update(config[section_name].copy()) # Construct list of tasks + task: Dict[str, Any] if len(sub_section_names) == 0: # No sub-section, single task @@ -46,21 +126,19 @@ def getTasks(config, section_name): tasks.append(task) else: - # One task for each sub-section for sub_section_name in sub_section_names: - # Merge current section with default task = config["default"].copy() task.update(config[section_name].copy()) # Merge sub-section with section. Start with a dictionary copy of sub-section - tmp = config[section_name][sub_section_name].copy() + tmp: Dict[str, Any] = config[section_name][sub_section_name].copy() # Remove all empty fields (None). These will be inherited from section - sub = {k: v for k, v in tmp.items() if v is not None} + sub: Dict[str, Any] = {k: v for k, v in tmp.items() if v is not None} # Merge content of sub-secton into section task.update(sub) # At this point, task will still include dictionary entries for - # all sub-sections. Remove them to clean-up + # all sub-sections. Remove them to clean up. for s in sub_section_names: task.pop(s) # Finally, add name of subsection to dictionary @@ -73,130 +151,246 @@ def getTasks(config, section_name): username = os.environ.get("USER") for c in tasks: for key in c: - if (type(c[key]) == str) and ("$USER" in c[key]): + if (isinstance(c[key], str)) and ("$USER" in c[key]): c[key] = c[key].replace("$USER", username) return tasks -# ----------------------------------------------------------------------------- -def get_active_status(task): - active = task["active"] - if type(active) == bool: - return active - elif type(active) == str: - active_lower_case = active.lower() - if active_lower_case == "true": - return True - elif active_lower_case == "false": - return False - raise ValueError("Invalid value {} for 'active'".format(active)) - raise TypeError("Invalid type {} for 'active'".format(type(active))) +# `for c in tasks` steps ###################################################### + + +def set_mapping_file(c: Dict[str, Any]) -> None: + if c["mapping_file"] and (c["mapping_file"] != "glb"): + directory: str = os.path.dirname(c["mapping_file"]) + if not directory: + # We use the mapping file from Mache's [diagnostics > base_path]. + # However, new mapping files should be added to Mache's [sync > public_diags]. + # These files will then be synced over. + c["mapping_file"] = os.path.join( + c["diagnostics_base_path"], "maps", c["mapping_file"] + ) + + +def set_grid(c: Dict[str, Any]) -> None: + # Grid name (if not explicitly defined) + # 'native' if no remapping + # or extracted from mapping filename + if c["grid"] == "": + if c["mapping_file"] == "": + c["grid"] = "native" + elif c["mapping_file"] == "glb": + c["grid"] = "glb" + else: + tmp = os.path.basename(c["mapping_file"]) + # FIXME: W605 invalid escape sequence '\.' + tmp = re.sub("\.[^.]*\.nc$", "", tmp) # noqa: W605 + tmp = tmp.split("_") + if tmp[0] == "map": + c["grid"] = f"{tmp[-2]}_{tmp[-1]}" + else: + raise ValueError( + f"Cannot extract target grid name from mapping file {c['mapping_file']}" + ) + # If grid is defined, just use that + + +# Output component (for directory structure) and procedure type for ncclimo +def set_component_and_prc_typ(c: Dict[str, Any]) -> None: + if c["input_component"] != "": + tmp = c["input_component"] + else: + tmp = c["input_files"].split(".")[0] + component: str + # Default ncclim procedure type is "sgs" + prc_typ: str = "sgs" + if tmp in ("cam", "eam", "eamxx"): + component = "atm" + prc_typ = tmp + elif tmp in ("cpl",): + component = "cpl" + elif tmp in ("clm2",): + component = "lnd" + prc_typ = "clm" + elif tmp in ("elm",): + component = "lnd" + prc_typ = tmp + elif tmp in ("mosart",): + component = "rof" + else: + raise ValueError( + f"Cannot extract output component name from {c['input_component']} or {c['input_files']}." + ) + c["component"] = component + c["prc_typ"] = prc_typ + + +def check_required_parameters( + c: Dict[str, Any], sets_with_requirement: Set[str], relevant_parameter: str +) -> None: + requested_sets = set(c["sets"]) + if ( + (sets_with_requirement & requested_sets) + and (relevant_parameter in c.keys()) + and (c[relevant_parameter] == "") + ): + raise ParameterNotProvidedError(relevant_parameter) -# ----------------------------------------------------------------------------- # Return all year sets from a configuration given by a list of strings # "year_begin:year_end:year_freq" # "year_begin-year_end" - - -def getYears(years_list): - if type(years_list) == str: +def get_years(years_input) -> List[Tuple[int, int]]: + years_list: List[str] + if isinstance(years_input, str): # This will be the case if years_list is missing a trailing comma - years_list = [years_list] - year_sets = [] + years_list = [years_input] + else: + years_list = years_input + year_sets: List[Tuple[int, int]] = [] for years in years_list: - if years.count(":") == 2: - - year_begin, year_end, year_freq = years.split(":") - year_begin = int(year_begin) - year_end = int(year_end) - year_freq = int(year_freq) - - year1 = year_begin - year2 = year1 + year_freq - 1 + year_begin: int + year_end: int + year_freq: int + year_begin, year_end, year_freq = tuple( + map(lambda y: int(y), years.split(":")) + ) + year1: int = year_begin + year2: int = year1 + year_freq - 1 while year2 <= year_end: year_sets.append((year1, year2)) year1 = year2 + 1 year2 = year1 + year_freq - 1 - elif years.count("-") == 1: - year1, year2 = years.split("-") - year1 = int(year1) - year2 = int(year2) + year1, year2 = tuple(map(lambda y: int(y), years.split("-"))) year_sets.append((year1, year2)) - elif years != "": - error_str = "Error interpreting years %s" % (years) + error_str = f"Error interpreting years {years}" print(error_str) raise ValueError(error_str) - return year_sets -# ----------------------------------------------------------------------------- -# Return output component name and procedure type based on either -# input_component or input_files +# `for s in year_sets` steps ################################################## + + +# This returns a value +def define_or_guess( + c: Dict[str, Any], + first_choice_parameter: str, + second_choice_parameter: str, + guess_type: ParameterGuessType, +) -> Any: + # Determine which type of guess to use. + guess_type_parameter: str = get_guess_type_parameter(guess_type) + # Define a value, if possible. + value: Any + if (first_choice_parameter in c.keys()) and c[first_choice_parameter]: + value = c[first_choice_parameter] + elif c[guess_type_parameter]: + # first_choice_parameter isn't defined, + # so let's make a guess for the value. + value = c[second_choice_parameter] + else: + raise ParameterNotProvidedError(first_choice_parameter) + return value + + +# This updates the dict c +def define_or_guess2( + c: Dict[str, Any], + parameter: str, + backup_option: str, + guess_type: ParameterGuessType, +) -> None: + # Determine which type of guess to use. + guess_type_parameter: str = get_guess_type_parameter(guess_type) + # Define a value, if possible. + if (parameter in c.keys()) and (c[parameter] == ""): + if c[guess_type_parameter]: + c[parameter] = backup_option + else: + raise ParameterNotProvidedError(parameter) + +def check_parameter_defined(c: Dict[str, Any], relevant_parameter: str) -> None: + if (relevant_parameter not in c.keys()) or (c[relevant_parameter] == ""): + raise ParameterNotProvidedError(relevant_parameter) -def getComponent(input_component, input_files): - if input_component != "": - tmp = input_component - else: - tmp = input_files.split(".")[0] +def get_file_names(script_dir: str, prefix: str): + return tuple( + [ + os.path.join(script_dir, f"{prefix}.{suffix}") + for suffix in ["bash", "settings", "status"] + ] + ) - # Default ncclim procedure type is "sgs" - prc_typ = "sgs" - # Output component (for directory structure) and ncclimo procedure type - if tmp in ("cam", "eam", "eamxx"): - component = "atm" - prc_typ = tmp - elif tmp in ("cpl",): - component = "cpl" - elif tmp in ("clm2",): - component = "lnd" - prc_typ = "clm" - elif tmp in ("elm",): - component = "lnd" - prc_typ = tmp - elif tmp in ("mosart",): - component = "rof" - else: - raise ValueError( - f"Cannot extract output component name from {input_component} or {input_files}." - ) +def check_status(status_file: str) -> bool: + skip: bool = False + if os.path.isfile(status_file): + with open(status_file, "r") as f: + tmp: List[str] = f.read().split() + if tmp[0] in ("OK", "WAITING", "RUNNING"): + skip = True + print(f"...skipping because status file says '{tmp[0]}'") - return component, prc_typ + return skip -# ----------------------------------------------------------------------------- +def make_executable(script_file: str) -> None: + st = os.stat(script_file) + os.chmod(script_file, st.st_mode | stat.S_IEXEC) -def setMappingFile(c): - if c["mapping_file"] and (c["mapping_file"] != "glb"): - directory = os.path.dirname(c["mapping_file"]) - if not directory: - # We use the mapping file from Mache's [diagnostics > base_path]. - # However, new mapping files should be added to Mache's [sync > public_diags]. - # These files will then be synced over. - c["mapping_file"] = os.path.join( - c["diagnostics_base_path"], "maps", c["mapping_file"] +def add_dependencies( + dependencies: List[str], + scriptDir: str, + prefix: str, + sub: str, + start_yr: int, + end_yr: int, + num_years: int, +) -> None: + y1: int = start_yr + y2: int = start_yr + num_years - 1 + while y2 <= end_yr: + dependencies.append( + os.path.join( + scriptDir, f"{prefix}_{sub}_{y1:04d}-{y2:04d}-{num_years:04d}.status" ) + ) + y1 += num_years + y2 += num_years -# ----------------------------------------------------------------------------- -def submitScript(scriptFile, statusFile, export, job_ids_file, dependFiles=[]): - +def write_settings_file( + settings_file: str, task_dict: Dict[str, Any], year_tuple: Tuple[int, int] +): + with open(settings_file, "w") as sf: + p = pprint.PrettyPrinter(indent=2, stream=sf) + p.pprint(task_dict) + p.pprint(year_tuple) + + +def submit_script( + script_file: str, + status_file: str, + export, + job_ids_file, + dependFiles: List[str] = [], + fail_on_dependency_skip: bool = False, +): # id of submitted job, or -1 if not submitted jobid = None # Handle dependencies - dependIds = [] + dependIds: List[int] = [] for dependFile in dependFiles: if os.path.isfile(dependFile): + tmp: List[str] with open(dependFile, "r") as f: tmp = f.read().split() if tmp[0] in ("OK"): @@ -204,31 +398,37 @@ def submitScript(scriptFile, statusFile, export, job_ids_file, dependFiles=[]): elif tmp[0] in ("WAITING", "RUNNING"): dependIds.append(int(tmp[1])) else: - print("...skipping because dependency says '%s'" % (tmp[0])) + skip_message = f"...skipping because dependency says '{tmp[0]}'" + if fail_on_dependency_skip: + raise DependencySkipError(skip_message) + else: + print(skip_message) + jobid = -1 + break + else: + skip_message = f"...skipping because of dependency status file missing\n {dependFile}" + if fail_on_dependency_skip: + raise DependencySkipError(skip_message) + else: + print(skip_message) jobid = -1 break - else: - print( - "...skipping because of dependency status file missing\n %s" - % (dependFile) - ) - jobid = -1 - break # If no exception occurred during dependency check, proceed with submission if jobid != -1: # Submit command + command: str if len(dependIds) == 0: - command = f"sbatch --export={export} {scriptFile}" + command = f"sbatch --export={export} {script_file}" else: - jobs = "" + jobs: str = "" for i in dependIds: jobs += ":{:d}".format(i) # Note that `--dependency` does handle bundles even though it lists individual tasks, not bundles. # Since each task of a bundle lists "RUNNING ", the bundle's job ID will be included. command = ( - f"sbatch --export={export} --dependency=afterok{jobs} {scriptFile}" + f"sbatch --export={export} --dependency=afterok{jobs} {script_file}" ) # Actual submission @@ -238,7 +438,7 @@ def submitScript(scriptFile, statusFile, export, job_ids_file, dependFiles=[]): out = stdout.decode().strip() print(f"...{out}") if status != 0 or not out.startswith("Submitted batch job"): - error_str = f"Problem submitting script {scriptFile}" + error_str = f"Problem submitting script {script_file}" print(error_str) print(command) print(stderr) @@ -255,68 +455,11 @@ def submitScript(scriptFile, statusFile, export, job_ids_file, dependFiles=[]): # Create status file if job has been submitted if jobid != -1: - with open(statusFile, "w") as f: - f.write("WAITING %d\n" % (jobid)) + with open(status_file, "w") as f: + f.write(f"WAITING {jobid:d}\n") return jobid -# ----------------------------------------------------------------------------- -def checkStatus(statusFile): - - skip = False - if os.path.isfile(statusFile): - with open(statusFile, "r") as f: - tmp = f.read().split() - if tmp[0] in ("OK", "WAITING", "RUNNING"): - skip = True - print(f"...skipping because status file says '{tmp[0]}'") - - return skip - - -# ----------------------------------------------------------------------------- -def makeExecutable(scriptFile): - - st = os.stat(scriptFile) - os.chmod(scriptFile, st.st_mode | stat.S_IEXEC) - - return - - -# ----------------------------------------------------------------------------- -def print_url(c, task): - base_path = c["web_portal_base_path"] - base_url = c["web_portal_base_url"] - www = c["www"] - case = c["case"] - if www.startswith(base_path): - # TODO: python 3.9 introduces `removeprefix` - # This will begin with a "/" - www_suffix = www[len(base_path) :] - print(f"URL: {base_url}{www_suffix}/{case}/{task}") - else: - print(f"Could not determine URL from www={www}") - - -# ----------------------------------------------------------------------------- -def add_dependencies( - dependencies: List[str], - scriptDir: str, - prefix: str, - sub: str, - start_yr: int, - end_yr: int, - num_years: int, -): - y1: int = start_yr - y2: int = start_yr + num_years - 1 - while y2 <= end_yr: - dependencies.append( - os.path.join( - scriptDir, - "%s_%s_%04d-%04d-%04d.status" % (prefix, sub, y1, y2, num_years), - ) - ) - y1 += num_years - y2 += num_years +def print_url(c: Dict[str, Any], task: str) -> None: + print(get_url_message(c, task))