From d39fdbe8e111ac167c8ffdeae5cb1ea866b0e4a3 Mon Sep 17 00:00:00 2001 From: "Paul J. Durack" Date: Tue, 2 Jul 2024 13:20:00 -0700 Subject: [PATCH] validate entries --- input4MIPs_source_id.json | 6 + src/registerSource_id.ipynb | 256 +++++++++++++++++++++++++++--------- 2 files changed, 201 insertions(+), 61 deletions(-) diff --git a/input4MIPs_source_id.json b/input4MIPs_source_id.json index 43081295..33845763 100644 --- a/input4MIPs_source_id.json +++ b/input4MIPs_source_id.json @@ -1150,6 +1150,7 @@ "source_id":"CR-CMIP-0-2-0", "source_version":"0.2.0", "target_mip":"CMIP", + "title":"Climate Resource CMIP 0.2.0 dataset prepared for input4MIPs", "|dataProviderExtra":{ "source_variables":"" }, @@ -1308,6 +1309,7 @@ "source_id":"DRES-CMIP-BB4CMIP7-1-0", "source_version":"1.0", "target_mip":"CMIP", + "title":"Deltares CMIP BB4CMIP7 1.0 global fire emissions", "|dataProviderExtra":{ "comment":"Datasource for global fire emissions from 1750 through 2022 based on GFED4s, FireMIP, visibility-observations and GCD data", "data_usage_tips":"Please check with the first year 1900 and last year 2022 annual global total emissions. Be careful to not double count deforestation carbon emissions. They are included in the estimates but several models also have deforestation carbon emissions based for example on historical deforestation rates", @@ -4663,6 +4665,7 @@ "source_id":"PCMDI-AMIP-ERSST5-1-0", "source_version":"1.0", "target_mip":"Prototype", + "title":"PCMDI-AMIP ERSST5 1.0 dataset prepared for input4MIPs", "|dataProviderExtra":{ "comment":"Prototype dataset for the evaluation of SST forcing uncertainty over the satellite era - not for production use", "data_usage_tips":"This dataset has been generated for evaluation purposes only - not for production use in CMIP7 simulations", @@ -4707,6 +4710,7 @@ "source_id":"PCMDI-AMIP-Had1p1-1-0", "source_version":"1.0", "target_mip":"Prototype", + "title":"PCMDI-AMIP Had-1.1 1.0 dataset prepared for input4MIPs", "|dataProviderExtra":{ "comment":"Prototype dataset for the evaluation of SST forcing uncertainty over the satellite era - not for production use", "data_usage_tips":"This dataset has been generated for evaluation purposes only - not for production use in CMIP7 simulations", @@ -4751,6 +4755,7 @@ "source_id":"PCMDI-AMIP-OI2p1-1-0", "source_version":"1.0", "target_mip":"Prototype", + "title":"PCMDI-AMIP OI-2.1 1.0 dataset prepared for input4MIPs", "|dataProviderExtra":{ "comment":"Prototype dataset for the evaluation of SST forcing uncertainty over the satellite era - not for production use", "data_usage_tips":"This dataset has been generated for evaluation purposes only - not for production use in CMIP7 simulations", @@ -4846,6 +4851,7 @@ "source_id":"SOLARIS-HEPPA-CMIP-4-1", "source_version":"4.1", "target_mip":"CMIP", + "title":"SOLARIS HEPPA CMIP 4.1 dataset prepared for input4MIPs", "|dataProviderExtra":{ "contributor_names":"Bernd Funke, Timo Asikainen, Stefan Bender, Odele Coddington, Thierry Dudok de Wit, Illaria Ermolli, Margit Haberreiter, Doug Kinnison, Judith Lean, Sergey Koldoboskiy, Daniel R. Marsh, Hilde Nesse, Annika Seppaelae, Miriam Sinnhuber, Ilya Usoskin, Max van de Kamp, Pekka T. Verronen", "metadata_url":"see http://solarisheppa.geomar.de/solarisheppa/sites/default/files/data/cmip7/CMIP7_metadata_description_4.1.pdf", diff --git a/src/registerSource_id.ipynb b/src/registerSource_id.ipynb index ba958237..f2be7b84 100644 --- a/src/registerSource_id.ipynb +++ b/src/registerSource_id.ipynb @@ -16,7 +16,9 @@ "PJD 1 Jul 2024 - Started using new source_id template\n", "PJD 2 Jul 2024 - Updating with new entries\n", "PJD 2 Jul 2024 - Add missing \"license\" in stdDict\n", - "PJD 2 Jul 2024 - Add missing \"region\" in stdDict" + "PJD 2 Jul 2024 - Add missing \"region\" in stdDict\n", + "PJD 2 Jul 2024 - Add missing \"title\" in stdDict\n", + "PJD 2 Jul 2024 - Validated all stdKeys and esgfKeys exist for all entries" ] }, { @@ -86,7 +88,7 @@ }, { "cell_type": "code", - "execution_count": 134, + "execution_count": 1, "id": "e8e04310-dd0f-4900-9903-b4dcc6f8f9f8", "metadata": {}, "outputs": [ @@ -94,8 +96,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 10 μs, sys: 1e+03 ns, total: 11 μs\n", - "Wall time: 14.1 μs\n" + "CPU times: user 38.3 ms, sys: 15 ms, total: 53.3 ms\n", + "Wall time: 59.4 ms\n" ] } ], @@ -103,6 +105,7 @@ "%%time\n", "import json\n", "import os\n", + "import pdb\n", "import requests\n", "from IPython.display import clear_output" ] @@ -119,7 +122,11 @@ "cell_type": "code", "execution_count": 135, "id": "776826bd-f567-44e5-ac7c-aee5d38933ee", - "metadata": {}, + "metadata": { + "jupyter": { + "source_hidden": true + } + }, "outputs": [], "source": [ "def make_srcId(keyId, stdDict, dataProviderDict, dataProviderFileDict):\n", @@ -133,7 +140,7 @@ " \"datetime_stop\", \"frequency\", \"further_info_url\",\n", " \"grid_label\", \"institution_id\", \"license\", \"mip_era\",\n", " \"nominal_resolution\", \"realm\", \"region\", \"source\",\n", - " \"source_id\", \"source_version\", \"target_mip\"]\n", + " \"source_id\", \"source_version\", \"target_mip\", \"title\"]\n", " # fill standard entries\n", " for cnt, key in enumerate(stdKeys):\n", " srcId[keyId][key] = stdDict[key]\n", @@ -173,7 +180,11 @@ "cell_type": "code", "execution_count": 136, "id": "e8c81dc8-252f-46bc-9011-d55aa456daf2", - "metadata": {}, + "metadata": { + "jupyter": { + "source_hidden": true + } + }, "outputs": [ { "name": "stdout", @@ -185,8 +196,7 @@ ], "source": [ "stdDict = {}\n", - "# required keys\n", - "# SOLARIS-HEPPA-CMIP-4-1\n", + "# required keys - SOLARIS-HEPPA-CMIP-4-1\n", "stdDict[\"_status\"] = \"Registered\"\n", "stdDict[\"contact\"] = \"bernd@iaa.es\"\n", "stdDict[\"dataset_category\"] = \"solar\"\n", @@ -314,7 +324,11 @@ "cell_type": "code", "execution_count": 153, "id": "b58189a2-5a81-488f-a206-d1318d168a1f", - "metadata": {}, + "metadata": { + "jupyter": { + "source_hidden": true + } + }, "outputs": [ { "name": "stdout", @@ -338,58 +352,170 @@ "# srcIds = json.load(f)\n", "\n", "# add new source_id\n", - "#srcIds[\"source_id\"][stdDict[\"source_id\"]] = {}\n", - "#srcIds[\"source_id\"][stdDict[\"source_id\"]] = newId[stdDict[\"source_id\"]]\n", + "srcIds[\"source_id\"][stdDict[\"source_id\"]] = {}\n", + "srcIds[\"source_id\"][stdDict[\"source_id\"]] = newId[stdDict[\"source_id\"]]\n", "\n", - "# add PCMDI-AMIP-* cleanup to 2024\n", - "for srcId in [\"PCMDI-AMIP-1-1-0\", \"PCMDI-AMIP-1-1-1\", \"PCMDI-AMIP-1-1-2\",\n", - " \"PCMDI-AMIP-1-1-3\", \"PCMDI-AMIP-1-1-4\", \"PCMDI-AMIP-1-1-5\",\n", - " \"PCMDI-AMIP-1-1-6\", \"PCMDI-AMIP-1-1-7\", \"PCMDI-AMIP-1-1-8\",\n", - " \"PCMDI-AMIP-1-1-9\"]:\n", - " srcIds[\"source_id\"][srcId][\"contact\"] = \"PCMDI (pcmdi-cmip@llnl.gov)\"\n", - " srcIds[\"source_id\"][srcId][\"further_info_url\"] = \"https://pcmdi.llnl.gov/mips/amip\"\n", - " srcIds[\"source_id\"][srcId][\"realm\"] = [\"ocean\", \"seaIce\"]\n", - " srcIds[\"source_id\"][srcId][\"region\"] = [\"global_ocean\"]\n", - " ver = int(srcId.split(\"-\")[4])\n", - " srcIds[\"source_id\"][srcId][\"title\"] = \"\".join([\"PCMDI-AMIP 1.1.\", str(ver),\" dataset prepared for input4MIPs\"])\n", - " # source_variables\n", - " if ver < 2:\n", - " srcIds[\"source_id\"][srcId][\"|dataProviderExtra\"][\"source_variables\"] = [\"areacello\", \"siconc\", \"siconcbcs\",\n", - " \"tos\", \"tosbcs\"]\n", - " elif ver >= 2:\n", - " srcIds[\"source_id\"][srcId][\"|dataProviderExtra\"][\"source_variables\"] = [\"areacello\", \"sftof\", \"siconc\",\n", - " \"siconcbcs\", \"tos\", \"tosbcs\"]\n", - " # datetime\n", - " if ver == 0:\n", - " srcIds[\"source_id\"][srcId][\"datetime_start\"] = \"1870-01-16\"\n", - " srcIds[\"source_id\"][srcId][\"datetime_stop\"] = \"2015-12-16\"\n", - " elif ver == 1:\n", - " srcIds[\"source_id\"][srcId][\"datetime_start\"] = \"1870-01-16\"\n", - " srcIds[\"source_id\"][srcId][\"datetime_stop\"] = \"2016-06-16\" \n", - " elif ver == 2:\n", - " srcIds[\"source_id\"][srcId][\"datetime_start\"] = \"1870-01-16\"\n", - " srcIds[\"source_id\"][srcId][\"datetime_stop\"] = \"2016-12-16\"\n", - " elif ver == 3:\n", - " srcIds[\"source_id\"][srcId][\"datetime_start\"] = \"1870-01-16\"\n", - " srcIds[\"source_id\"][srcId][\"datetime_stop\"] = \"2017-06-16\"\n", - " elif ver == 4:\n", - " srcIds[\"source_id\"][srcId][\"datetime_start\"] = \"1870-01-16\"\n", - " srcIds[\"source_id\"][srcId][\"datetime_stop\"] = \"2017-12-16\"\n", - " elif ver == 5:\n", - " srcIds[\"source_id\"][srcId][\"datetime_start\"] = \"1870-01-16\"\n", - " srcIds[\"source_id\"][srcId][\"datetime_stop\"] = \"2018-06-16\"\n", - " elif ver == 6:\n", - " srcIds[\"source_id\"][srcId][\"datetime_start\"] = \"1870-01-16\"\n", - " srcIds[\"source_id\"][srcId][\"datetime_stop\"] = \"2018-12-16\"\n", - " elif ver == 7:\n", - " srcIds[\"source_id\"][srcId][\"datetime_start\"] = \"1870-01-16\"\n", - " srcIds[\"source_id\"][srcId][\"datetime_stop\"] = \"2021-06-16\"\n", - " elif ver == 8:\n", - " srcIds[\"source_id\"][srcId][\"datetime_start\"] = \"1870-01-16\"\n", - " srcIds[\"source_id\"][srcId][\"datetime_stop\"] = \"2021-12-16\"\n", - " elif ver == 9:\n", - " srcIds[\"source_id\"][srcId][\"datetime_start\"] = \"1870-01-16\"\n", - " srcIds[\"source_id\"][srcId][\"datetime_stop\"] = \"2022-12-16\"\n", + "# Write all out\n", + "oF = \"../input4MIPs_source_id.json\"\n", + "if os.path.exists(oF):\n", + " os.remove(oF)\n", + "fH = open(oF, \"w\")\n", + "json.dump(srcIds, fH, ensure_ascii=True, sort_keys=True, indent=4, separators=(\",\", \":\"),)\n", + "fH.close()" + ] + }, + { + "cell_type": "markdown", + "id": "a2136e8a-5b0f-4fdf-9d88-76640063e89d", + "metadata": {}, + "source": [ + "## validate all source_id's have entries" + ] + }, + { + "cell_type": "code", + "execution_count": 157, + "id": "36564f5e-21ca-4224-98eb-97863cf993fa", + "metadata": {}, + "outputs": [], + "source": [ + "srcIdGithub = \"https://raw.githubusercontent.com/PCMDI/input4MIPs_CVs/main/input4MIPs_source_id.json\"\n", + "js = requests.get(srcIdGithub)\n", + "srcIds = json.loads(js.text)" + ] + }, + { + "cell_type": "code", + "execution_count": 167, + "id": "3bddbc1d-a3b9-4ad9-87c3-5284f8d896a0", + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['ACCESS1-3-rcp85-1-0', 'CCMI-hist-nat-1-0', 'CCMI-hist-nat-1-1', 'CCMI-hist-sol-1-0', 'CCMI-hist-sol-1-1', 'CCMI-hist-stratO3-1-0', 'CCMI-hist-volc-1-0', 'CCMI-hist-volc-1-1', 'CCMI-ssp245-nat-1-0', 'CCMI-ssp245-sol-1-0', 'CCMI-ssp245-stratO3-1-0', 'CCMI-ssp245-volc-1-0', 'CCSM4-rcp26-1-0', 'CCSM4-rcp85-1-0', 'CEDS-2016-06-18', 'CEDS-2016-06-18-sectorDimV2', 'CEDS-2016-06-18-supplemental-data', 'CEDS-2016-07-26', 'CEDS-2016-07-26-sectorDim', 'CEDS-2016-07-26-sectorDim-supplemental-data', 'CEDS-2017-05-18', 'CEDS-2017-05-18-supplemental-data', 'CEDS-2017-08-30', 'CEDS-2017-08-30-supplemental-data', 'CEDS-2017-10-05', 'CESM2-ssp585-1-0', 'CNRM-CM6-1-ssp126-1-0', 'CNRM-CM6-1-ssp585-1-0', 'CNRM-ESM2-1-ssp585-1-0', 'CR-CMIP-0-2-0', 'CSIRO-MK3-6-0-rcp85-1-0', 'DCPP-C-amv-1-1', 'DCPP-C-ipv-1-1', 'DRES-CMIP-BB4CMIP7-1-0', 'HadGEM2-ES-rcp85-1-0', 'IACETH-SAGE3lambda-2-1-0', 'IACETH-SAGE3lambda-3-0-0', 'IAMC-AIM-ssp370-1-0', 'IAMC-AIM-ssp370-1-1', 'IAMC-AIM-ssp370-1-1-supplemental-data', 'IAMC-AIM-ssp370-lowNTCF-1-1', 'IAMC-AIM-ssp370-lowNTCF-1-1-supplemental-data', 'IAMC-GCAM4-ssp434-1-0', 'IAMC-GCAM4-ssp434-1-1', 'IAMC-GCAM4-ssp434-1-1-supplemental-data', 'IAMC-GCAM4-ssp460-1-0', 'IAMC-GCAM4-ssp460-1-1', 'IAMC-GCAM4-ssp460-1-1-supplemental-data', 'IAMC-IMAGE-ssp119-1-0', 'IAMC-IMAGE-ssp119-1-1', 'IAMC-IMAGE-ssp119-1-1-supplemental-data', 'IAMC-IMAGE-ssp126-1-0', 'IAMC-IMAGE-ssp126-1-1', 'IAMC-IMAGE-ssp126-1-1-supplemental-data', 'IAMC-MESSAGE-GLOBIOM-ssp245-1-0', 'IAMC-MESSAGE-GLOBIOM-ssp245-1-1', 'IAMC-MESSAGE-GLOBIOM-ssp245-1-1-supplemental-data', 'IAMC-REMIND-MAGPIE-ssp534-over-1-0', 'IAMC-REMIND-MAGPIE-ssp534-over-1-1', 'IAMC-REMIND-MAGPIE-ssp534-over-1-1-supplemental-data', 'IAMC-REMIND-MAGPIE-ssp585-1-0', 'IAMC-REMIND-MAGPIE-ssp585-1-1', 'IAMC-REMIND-MAGPIE-ssp585-1-1-supplemental-data', 'IPSL-CM5A-MR-rcp26-1-0', 'IPSL-CM5A-MR-rcp85-1-0', 'ImperialCollege-1-0', 'ImperialCollege-1-1', 'ImperialCollege-2-0', 'ImperialCollege-AIM-ssp370-1-0', 'ImperialCollege-GLOBIOM-ssp245-1-0', 'ImperialCollege-IMAGE-ssp119-1-0', 'ImperialCollege-IMAGE-ssp126-1-0', 'ImperialCollege-REMIND-MAGPIE-ssp534os-1-0', 'ImperialCollege-REMIND-MAGPIE-ssp585-1-0', 'ImperialCollege-ssp245-covid-4-8-1', 'MIROC-ESM-CHEM-rcp26-1-0', 'MIROC-ESM-CHEM-rcp85-1-0', 'MIROC5-rcp26-1-0', 'MIROC5-rcp85-1-0', 'MOHC-HadISST-2-2-0-0-0', 'MOHC-highresSST-future-1-0-0', 'MOHC-highresSST-future-1-0-1', 'MPI-B-1pctNdep-1-0', 'MPI-M-MACv2-SP-1-0', 'MRI-JRA55-do-1-3', 'MRI-JRA55-do-1-3-2', 'MRI-JRA55-do-1-4-0', 'MRI-JRA55-do-1-5-0', 'MRI-JRA55-do-1-6-0', 'NCAR-CCMI-1-0', 'NCAR-CCMI-2-0', 'NCAR-CCMI-ssp119-1-0', 'NCAR-CCMI-ssp126-1-0', 'NCAR-CCMI-ssp126-2-0', 'NCAR-CCMI-ssp245-1-0', 'NCAR-CCMI-ssp245-2-0', 'NCAR-CCMI-ssp370-1-0', 'NCAR-CCMI-ssp370-2-0', 'NCAR-CCMI-ssp434-1-0', 'NCAR-CCMI-ssp460-1-0', 'NCAR-CCMI-ssp534os-1-0', 'NCAR-CCMI-ssp585-1-0', 'NCAR-CCMI-ssp585-2-0', 'NCAS-2-1-0', 'NorESM1-M-rcp26-1-0', 'NorESM1-M-rcp85-1-0', 'PCMDI-AMIP-1-1-0', 'PCMDI-AMIP-1-1-1', 'PCMDI-AMIP-1-1-2', 'PCMDI-AMIP-1-1-3', 'PCMDI-AMIP-1-1-4', 'PCMDI-AMIP-1-1-5', 'PCMDI-AMIP-1-1-6', 'PCMDI-AMIP-1-1-7', 'PCMDI-AMIP-1-1-8', 'PCMDI-AMIP-1-1-9', 'PCMDI-AMIP-ERSST5-1-0', 'PCMDI-AMIP-Had1p1-1-0', 'PCMDI-AMIP-OI2p1-1-0', 'SOLARIS-HEPPA-3-2', 'SOLARIS-HEPPA-CMIP-4-1', 'UCI-fu-prArctic-prAntarctic-1-0', 'UCI-fut2CAntarctic-1-0', 'UCI-fut2CArctic-1-0', 'UCI-fut2CArctic-2mAntarctic-1-0', 'UCI-fut2CBKSeas-1-0', 'UCI-fut2COkhotsk-1-0', 'UCI-pi-prArctic-prAntarctic-1-0', 'UCI-pi-prArctic-prAntarctic-1-1', 'UCI-piAntarctic-1-0', 'UCI-piArctic-1-0', 'UCI-preindustrial-1-0', 'UCI-present-1-0', 'UCI-present-197901-201412-Arctic-Antarctic-1-0', 'UCI-present-197901-201412-clim-Arctic-Antarctic-1-0', 'UCI-present-2mAntarctic-1-0', 'UColorado-RFMIP-0-4', 'UColorado-RFMIP-1-0', 'UColorado-RFMIP-1-1', 'UColorado-RFMIP-1-2', 'UKESM1-0-LL-ssp585-1-0', 'UReading-CCMI-1-0', 'UReading-CCMI-ssp119-1-0', 'UReading-CCMI-ssp119-1-1', 'UReading-CCMI-ssp126-1-0', 'UReading-CCMI-ssp245-1-0', 'UReading-CCMI-ssp370-1-0', 'UReading-CCMI-ssp434-1-0', 'UReading-CCMI-ssp434-1-1', 'UReading-CCMI-ssp460-1-0', 'UReading-CCMI-ssp460-1-1', 'UReading-CCMI-ssp534os-1-0', 'UReading-CCMI-ssp534os-1-1', 'UReading-CCMI-ssp585-1-0', 'UoM-AIM-ssp370-1-2-0', 'UoM-AIM-ssp370-1-2-1', 'UoM-AIM-ssp370-lowNTCF-1-2-0', 'UoM-AIM-ssp370-lowNTCF-1-2-1', 'UoM-CMIP-1-2-0', 'UoM-GCAM4-ssp434-1-2-0', 'UoM-GCAM4-ssp434-1-2-1', 'UoM-GCAM4-ssp460-1-2-0', 'UoM-GCAM4-ssp460-1-2-1', 'UoM-IMAGE-ssp119-1-2-0', 'UoM-IMAGE-ssp119-1-2-1', 'UoM-IMAGE-ssp126-1-2-0', 'UoM-IMAGE-ssp126-1-2-1', 'UoM-MESSAGE-GLOBIOM-ssp245-1-2-0', 'UoM-MESSAGE-GLOBIOM-ssp245-1-2-1', 'UoM-REMIND-MAGPIE-ssp534-over-1-2-0', 'UoM-REMIND-MAGPIE-ssp534-over-1-2-1', 'UoM-REMIND-MAGPIE-ssp585-1-2-0', 'UoM-REMIND-MAGPIE-ssp585-1-2-1', 'UoM-ssp126-1-1-0', 'UofMD-landState-2-1-h', 'UofMD-landState-AIM-ssp370-2-1-f', 'UofMD-landState-GCAM-ssp434-2-1-f', 'UofMD-landState-GCAM-ssp460-2-1-f', 'UofMD-landState-IMAGE-ssp119-2-1-f', 'UofMD-landState-IMAGE-ssp126-2-1-e', 'UofMD-landState-IMAGE-ssp126-2-1-f', 'UofMD-landState-MAGPIE-ssp534-2-1-e', 'UofMD-landState-MAGPIE-ssp534-2-1-f', 'UofMD-landState-MAGPIE-ssp585-2-1-e', 'UofMD-landState-MAGPIE-ssp585-2-1-f', 'UofMD-landState-MESSAGE-ssp245-2-1-f', 'UofMD-landState-high-2-1-h', 'UofMD-landState-low-2-1-h', 'VUA-CMIP-BB4CMIP6-1-0', 'VUA-CMIP-BB4CMIP6-1-1', 'VUA-CMIP-BB4CMIP6-1-2'])" + ] + }, + "execution_count": 167, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "srcIds[\"source_id\"].keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 175, + "id": "3cb6e8cc-2adb-4bdd-bb3e-cf55b8a398e5", + "metadata": { + "jupyter": { + "source_hidden": true + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 175, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "stdKeys = [\"_status\",\"contact\", \"dataset_category\", \"datetime_start\",\n", + " \"datetime_stop\", \"frequency\", \"further_info_url\",\n", + " \"grid_label\", \"institution_id\", \"license\", \"mip_era\",\n", + " \"nominal_resolution\", \"realm\", \"region\", \"source\",\n", + " \"source_id\", \"source_version\", \"target_mip\", \"title\"]\n", + "stdKeyList = list(srcIds[\"source_id\"][\"IAMC-IMAGE-ssp126-1-0\"].keys()) # PCMDI-AMIP-1-1-9\n", + "stdKeyList.remove(\"|dataProviderExtra\")\n", + "stdKeyList.remove(\"|dataProviderFile\")\n", + "stdKeyList.remove(\"|esgfIndex\")\n", + "stdKeyList\n", + "s = set(stdKeys)\n", + "diff = [x for x in stdKeyList if x not in s]\n", + "diff" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "03865dab-389c-418a-abb4-9c764b0a179d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 72.1 ms, sys: 27.3 ms, total: 99.4 ms\n", + "Wall time: 114 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "# read input4mips_cvs source_id\n", + "# remote\n", + "srcIdGithub = \"https://raw.githubusercontent.com/PCMDI/input4MIPs_CVs/main/input4MIPs_source_id.json\"\n", + "js = requests.get(srcIdGithub)\n", + "srcIds = json.loads(js.text)\n", + "\n", + "# title missing: PCMDI new, CR, DRES\n", + "srcIds[\"source_id\"][\"CR-CMIP-0-2-0\"][\"title\"] = \"Climate Resource CMIP 0.2.0 dataset prepared for input4MIPs\"\n", + "srcIds[\"source_id\"][\"DRES-CMIP-BB4CMIP7-1-0\"][\"title\"] = \"Deltares CMIP BB4CMIP7 1.0 global fire emissions\"\n", + "srcIds[\"source_id\"][\"PCMDI-AMIP-ERSST5-1-0\"][\"title\"] = \"PCMDI-AMIP ERSST5 1.0 dataset prepared for input4MIPs\"\n", + "srcIds[\"source_id\"][\"PCMDI-AMIP-Had1p1-1-0\"][\"title\"] = \"PCMDI-AMIP Had-1.1 1.0 dataset prepared for input4MIPs\"\n", + "srcIds[\"source_id\"][\"PCMDI-AMIP-OI2p1-1-0\"][\"title\"] = \"PCMDI-AMIP OI-2.1 1.0 dataset prepared for input4MIPs\"\n", + "srcIds[\"source_id\"][\"SOLARIS-HEPPA-CMIP-4-1\"][\"title\"] = \"SOLARIS HEPPA CMIP 4.1 dataset prepared for input4MIPs\"\n", + "\n", + "# entries to check\n", + "stdKeys = [\"_status\",\"contact\", \"dataset_category\", \"datetime_start\",\n", + " \"datetime_stop\", \"frequency\", \"further_info_url\",\n", + " \"grid_label\", \"institution_id\", \"license\", \"mip_era\",\n", + " \"nominal_resolution\", \"realm\", \"region\", \"source\",\n", + " \"source_id\", \"source_version\", \"target_mip\", \"title\"]\n", + "esgfKeys = [\"_timestamp\", \"data_node\", \"latest\", \"replica\",\n", + " \"version\", \"xlink\"]\n", + "\n", + "for cnt, srcId in enumerate(srcIds[\"source_id\"].keys()):\n", + " print(\"srcId:\", cnt, srcId)\n", + " srcId = srcIds[\"source_id\"][srcId]\n", + " print(srcId)\n", + " # check stdKeys\n", + " stdKeyList = list(srcId.keys())\n", + " stdKeyList.sort()\n", + " print(\"stdKeyList:\", stdKeyList)\n", + " s = set(stdKeys)\n", + " diff = [x for x in s if x not in stdKeyList]\n", + " if diff:\n", + " print()\n", + " print(\"diff:\", diff)\n", + " sys.exit()\n", + " #pdb.set_trace()\n", + " # check esgfKeys\n", + " esgfKeyList = list(srcId[\"|esgfIndex\"].keys())\n", + " esgfKeyList.sort()\n", + " print(\"esgfKeyList:\", esgfKeyList)\n", + " s = set(esgfKeys)\n", + " diff = [x for x in s if x not in esgfKeyList]\n", + " if diff:\n", + " print()\n", + " print(\"diff:\", diff)\n", + " sys.exit()\n", + " #pdb.set_trace() \n", + " print(\"catch\")\n", + " #pdb.set_trace()\n", + " clear_output(wait=False)\n", "\n", "# Write all out\n", "oF = \"../input4MIPs_source_id.json\"\n", @@ -399,6 +525,14 @@ "json.dump(srcIds, fH, ensure_ascii=True, sort_keys=True, indent=4, separators=(\",\", \":\"),)\n", "fH.close()" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "59167a41-f2aa-4494-8d62-ec8333f1d47f", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {