From 97244fa940e69d04f4fe015b647f94b46650f181 Mon Sep 17 00:00:00 2001 From: "Paul J. Durack" Date: Wed, 26 Jun 2024 08:21:07 -0700 Subject: [PATCH] updating source_id info, working --- src/getInput4MIPsESGF.ipynb | 291 +++++++++++++----------------------- 1 file changed, 106 insertions(+), 185 deletions(-) diff --git a/src/getInput4MIPsESGF.ipynb b/src/getInput4MIPsESGF.ipynb index 6e52c442..3ae15330 100644 --- a/src/getInput4MIPsESGF.ipynb +++ b/src/getInput4MIPsESGF.ipynb @@ -31,7 +31,7 @@ "To-do:\n", "- Add \"prototype\" as target_mip, new AMIP data\n", "- Register MRI-JRA55-do-1-6-0 https://github.com/PCMDI/input4MIPs_CVs/issues/18\n", - "- Register SOLARIS-HEPPA-4-0 https://github.com/PCMDI/input4MIPs_CVs/issues/17#issuecomment-2146223824\n", + "- Register SOLARIS-HEPPA-4-1 https://github.com/PCMDI/input4MIPs_CVs/issues/17#issuecomment-2146223824\n", "- Register CR-CMIP-0-2-0 https://github.com/PCMDI/input4MIPs_CVs/issues/13\n", "- Register DRES-CMIP-BB4CMIP7-1-0 https://github.com/PCMDI/input4MIPs_CVs/issues/10#issuecomment-2148147489\n", "- Register volcanic? https://github.com/PCMDI/input4MIPs_CVs/issues/9" @@ -50,8 +50,7 @@ "id": "1af44764-8e4a-46f4-a542-b18d2212cf45", "metadata": {}, "source": [ - "# env\n", - "Must be connected by home or office wired IP - *.95.87 or *.241.141\n", + "# env - must be connected by home or office wired IP - *.95.87 or *.241.141\n", "\n", "# Notes\n", "Datasets with no source_id 231026\n", @@ -66,27 +65,6 @@ "https://search.google.com/search-console" ] }, - { - "cell_type": "raw", - "id": "3622a21b-f640-4f42-b9a0-6cbf086a81fe", - "metadata": {}, - "source": [ - "\"CR-CMIP-1-0\":{\n", - " \"dataProvider\": {\n", - " \"this\": \"this\",\n", - " \"contact\":\"ISMIP6 Steering Team (ismip6@gmail.com)\",\n", - " \"that\": \"that\",\n", - " },\n", - " \"dataProviderExtra\": {\n", - " },\n", - " \"esgfIndex\": {\n", - " \"_timestamp\":\"2021-04-22T19:05:37.327Z\",\n", - " \"data_node\":\"esgf-data2.llnl.gov\",\n", - " }\n", - "}\n", - "# options in https://github.com/PCMDI/input4MIPs_CVs/pull/20/files" - ] - }, { "cell_type": "markdown", "id": "41322843-6c92-423b-a468-f1e0a7fe022b", @@ -305,7 +283,7 @@ "#print(srcIdFDictList)\n", "\n", "# determine missing\n", - "print(\"Search results: Dataset includes, excluded from File searches:\")\n", + "print(\"Search results: Dataset includes, excluded from File searches (likely latest:false):\")\n", "set(srcIdDDictList).difference(srcIdFDictList)" ] }, @@ -479,7 +457,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 24, "id": "7f4317ea-a580-4727-b244-a45cb7dd9fb1", "metadata": {}, "outputs": [ @@ -487,8 +465,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 6.72 ms, sys: 3.2 ms, total: 9.92 ms\n", - "Wall time: 83.6 ms\n" + "CPU times: user 7.24 ms, sys: 4.16 ms, total: 11.4 ms\n", + "Wall time: 320 ms\n" ] } ], @@ -497,16 +475,85 @@ "# read input4mips-cmor-tables holdings\n", "srcIdGithub = \"https://raw.githubusercontent.com/PCMDI/input4MIPs-cmor-tables/master/input4MIPs_source_id.json\"\n", "js = requests.get(srcIdGithub)\n", - "js_srcId = json.loads(js.text)\n", + "srcId = json.loads(js.text)\n", "\n", "# get required_global_attributes and prepopulate - controlled values\n", - "\n", "# iterate and add entries in input4mip-cmor-tables/input4MIPs_source_id.json - controlled and uncontrolled values\n", "\n", - "# create iteration list\n", - "fields = [\"calendar\", \"comment\", \"further_info_url\", \"grid\", \"grid_label\", \"license\", \"nominal_resolution\", \"product\",\n", - " \"references\", \"release_year\", \"region\", \"source\", \"source_description\", \"source_type\", \"source_variables\",\n", - " \"title\", \"website\"]" + "reqGlobAtt = [\"activity_id\", \"contact\", \"dataset_category\", \"frequency\",\n", + " \"further_info_url\", \"grid_label\", \"institution_id\", \"license\", \"mip_era\",\n", + " \"nominal_resolution\", \"realm\", \"region\", \"source\", \"source_id\", \"source_version\", \n", + " \"target_mip\", \"title\", \"variable_id\"]\n", + "# file \"Conventions\", \"creation_date\", \"tracking_id\", \n", + "# irrelevant \"institution\", \"table_id\",\n", + "# esgf fields\n", + "esgfAtt = [\"_timestamp\", \"data_node\", \"latest\", \"replica\", \"version\", \"xlink\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "6a07c289-50c8-4907-8027-6fabc288a5d1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "dict_keys(['ACCESS1-3-rcp85-1-0', 'CCSM4-rcp26-1-0', 'CCSM4-rcp85-1-0', 'CESM2-ssp585-1-0', 'CNRM-CM6-1-ssp126-1-0', 'CNRM-CM6-1-ssp585-1-0', 'CNRM-ESM2-1-ssp585-1-0', 'CSIRO-MK3-6-0-rcp85-1-0', 'HadGEM2-ES-rcp85-1-0', 'IPSL-CM5A-MR-rcp26-1-0', 'IPSL-CM5A-MR-rcp85-1-0', 'MIROC-ESM-CHEM-rcp26-1-0', 'MIROC-ESM-CHEM-rcp85-1-0', 'MIROC5-rcp26-1-0', 'MIROC5-rcp85-1-0', 'MRI-JRA55-do-1-3', 'MRI-JRA55-do-1-3-2', 'MRI-JRA55-do-1-4-0', 'MRI-JRA55-do-1-5-0', 'MRI-JRA55-do-1-6-0', 'NorESM1-M-rcp26-1-0', 'NorESM1-M-rcp85-1-0', 'PCMDI-AMIP-1-1-3', 'PCMDI-AMIP-1-1-4', 'PCMDI-AMIP-1-1-5', 'PCMDI-AMIP-1-1-6', 'PCMDI-AMIP-1-1-7', 'PCMDI-AMIP-1-1-8', 'PCMDI-AMIP-1-1-9', 'UKESM1-0-LL-ssp585-1-0'])" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "js_srcId[\"source_id\"].keys()" + ] + }, + { + "cell_type": "raw", + "id": "2a43ee03-26b4-44e0-baa4-d69fd85adf7d", + "metadata": {}, + "source": [ + "\"CR-CMIP-2-0\":{\n", + " \"Conventions\":\"\",\n", + " \"activity_id\":\"input4MIPs\",\n", + " \"contact\":\"ISMIP6 Steering Team (ismip6@gmail.com)\",\n", + " \"dataset_category\":\"surfaceFluxes\", # multiple\n", + " \"datetime_start\":\"1950-07-01T00:00:00Z\", # multiple, FILE\n", + " \"datetime_stop\":null, # multiple, FILE\n", + " \"frequency\":\"yrC\", # multiple, FILE\n", + " \"further_info_url\":\"\",\n", + " \"grid_label\":\"\",\n", + " \"institution_id\":\"NASA-GSFC\",\n", + " \"license\":\"\",\n", + " \"mip_era\":\"CMIP6\",\n", + " \"nominal_resolution\":\"\",\n", + " \"realm\":\"landIce\", # multiple\n", + " \"region\":\"\",\n", + " \"source\":\"\",\n", + " \"source_id\":\"ACCESS1-3-rcp85-1-0\",\n", + " \"source_version\":\"1.0\", # multiple, accounted for by unique source_id\n", + " \"target_mip\":\"ISMIP6\",\n", + " \"title\":\"\",\n", + " \"variable_id\":\"\",\n", + "\n", + " \"creation_date\":\"\", # FILE\n", + " \"tracking_id\":\"\", # FILE\n", + "\n", + " \"dataProviderExtra\": {\n", + " },\n", + " \"esgfIndex\": {\n", + " \"_timestamp\":\"2021-04-22T19:05:37.327Z\",\n", + " \"data_node\":\"esgf-data2.llnl.gov\",\n", + " \"latest\":true,\n", + " \"replica\":false,\n", + " \"version\":\"20210422\",\n", + " \"xlink\":\"http://cera-www.dkrz.de/WDCC/meta/CMIP6/input4MIPs.CMIP6.ISMIP6.NASA-GSFC.ACCESS1-3-rcp85-1-0.ocean.yrC.thetao.grg.v20210422.json|Citation|citation\"\n", + " }\n", + "}\n", + "# options in https://github.com/PCMDI/input4MIPs_CVs/pull/20/files" ] }, { @@ -569,167 +616,41 @@ }, { "cell_type": "code", - "execution_count": 10, - "id": "58d314e7-cfc4-4512-b13a-ca24d0e93de8", + "execution_count": 21, + "id": "b15a025a-dbf9-4666-8550-7e85f8187c7a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "processing: DRS\n", - "../input4MIPs_DRS.json\n", - "processing: activity_id\n", - "../input4MIPs_activity_id.json\n", - "processing: dataset_category\n", - "../input4MIPs_dataset_category.json\n", - "processing: license\n", - "../input4MIPs_license.json\n", - "processing: mip_era\n", - "../input4MIPs_mip_era.json\n", - "processing: product\n", - "../input4MIPs_product.json\n", - "processing: required_global_attributes\n", - "../input4MIPs_required_global_attributes.json\n", - "processing: target_mip\n", - "../input4MIPs_target_mip.json\n", - "processing: tracking_id\n", - "../input4MIPs_tracking_id.json\n" + "https://raw.githubusercontent.com/PCMDI/input4MIPs_CVs/main/input4MIPs_DRS.json\n", + "https://raw.githubusercontent.com/PCMDI/input4MIPs_CVs/main/input4MIPs_activity_id.json\n", + "https://raw.githubusercontent.com/PCMDI/input4MIPs_CVs/main/input4MIPs_dataset_category.json\n", + "https://raw.githubusercontent.com/PCMDI/input4MIPs_CVs/main/input4MIPs_institution_id.json\n", + "https://raw.githubusercontent.com/PCMDI/input4MIPs_CVs/main/input4MIPs_license.json\n", + "https://raw.githubusercontent.com/PCMDI/input4MIPs_CVs/main/input4MIPs_mip_era.json\n", + "https://raw.githubusercontent.com/PCMDI/input4MIPs_CVs/main/input4MIPs_product.json\n", + "https://raw.githubusercontent.com/PCMDI/input4MIPs_CVs/main/input4MIPs_required_global_attributes.json\n", + "https://raw.githubusercontent.com/PCMDI/input4MIPs_CVs/main/input4MIPs_target_mip.json\n", + "https://raw.githubusercontent.com/PCMDI/input4MIPs_CVs/main/input4MIPs_tracking_id.json\n", + "CPU times: user 77.2 ms, sys: 21.2 ms, total: 98.4 ms\n", + "Wall time: 2.07 s\n" ] } ], "source": [ - "keyDict = {\n", - " \"DRS\":{\n", - " \"directory_path_example\":\"input4MIPs/CMIP6Plus/CMIP/PCMDI/PCMDI-AMIP-1-1-9/ocean/mon/tos/gn/v20230512/\",\n", - " \"directory_path_template\":\"/////////\",\n", - " \"filename_example\":\"tos_input4MIPs_SSTsAndSeaIce_CMIP_PCMDI-AMIP-1-1-9_gn_187001-202212.nc\",\n", - " \"filename_template\":\"_____[_].nc\"\n", - " }\n", - " \"activity_id\": {\n", - " \"input4MIPs\": {\n", - " \"URL\": \"https://pcmdi.llnl.gov/mips/input4MIPs/\",\n", - " \"long_name\": \"input forcing datasets for Model Intercomparison Projects\"\n", - " }\n", - " },\n", - " \"dataset_category\": [\n", - " \"GHGConcentrations\",\n", - " \"SSTsAndSeaIce\",\n", - " \"aerosolProperties\",\n", - " \"atmosphericState\",\n", - " \"emissions\",\n", - " \"landState\",\n", - " \"ozone\",\n", - " \"radiation\",\n", - " \"solar\",\n", - " \"surfaceAir\",\n", - " \"surfaceFluxes\"\n", - " ],\n", - " \"license\" : {\n", - " \"license\":\" data produced by is licensed under a License (). Consult https://pcmdi.llnl.gov/CMIP6Plus/TermsOfUse for terms of use governing CMIP6Plus and input4MIPs output, including citation requirements and proper acknowledgment. Further information about this data, can be found at . The data producers and data providers make no warranty, either express or implied, including, but not limited to, warranties of merchantability and fitness for a particular purpose. All liabilities arising from the supply of the information (including any liability arising in negligence) are excluded to the fullest extent permitted by law.\",\n", - " \"license_options\":{\n", - " \"CC BY 4.0\":{\n", - " \"license_id\":\"Creative Commons Attribution 4.0 International\",\n", - " \"license_url\":\"https://creativecommons.org/licenses/by/4.0/\"\n", - " },\n", - " \"CC0 1.0\":{\n", - " \"license_id\":\"Creative Commons CC0 1.0 Universal Public Domain Dedication\",\n", - " \"license_url\":\"https://creativecommons.org/publicdomain/zero/1.0/\"\n", - " }\n", - " }\n", - " },\n", - " \"mip_era\": [\n", - " \"CMIP5\",\n", - " \"CMIP6\",\n", - " \"CMIP6Plus\"\n", - " ],\n", - " \"product\": [\n", - " \"derived\",\n", - " \"observations\",\n", - " \"reanalysis\"\n", - " ],\n", - " \"required_global_attributes\": [\n", - " \"Conventions\",\n", - " \"activity_id\",\n", - " \"contact\",\n", - " \"creation_date\",\n", - " \"dataset_category\",\n", - " \"frequency\",\n", - " \"further_info_url\",\n", - " \"grid_label\",\n", - " \"institution\",\n", - " \"institution_id\",\n", - " \"license\",\n", - " \"mip_era\",\n", - " \"nominal_resolution\",\n", - " \"realm\",\n", - " \"region\",\n", - " \"source\",\n", - " \"source_id\",\n", - " \"source_version\",\n", - " \"table_id\",\n", - " \"target_mip\",\n", - " \"title\",\n", - " \"tracking_id\",\n", - " \"variable_id\"\n", - " ],\n", - " \"target_mip\": {\n", - " \"CMIP6\": {\n", - " \"AerChemMIP\":\"Aerosols and Chemistry Model Intercomparison Project\",\n", - " \"C4MIP\":\"Coupled Climate Carbon Cycle Model Intercomparison Project\",\n", - " \"CDRMIP\":\"Carbon Dioxide Removal Model Intercomparison Project\",\n", - " \"CFMIP\":\"Cloud Feedback Model Intercomparison Project\",\n", - " \"CMIP\":\"CMIP DECK: 1pctCO2, abrupt4xCO2, amip, esm-piControl, esm-historical, historical, and piControl experiments\",\n", - " \"CORDEX\":\"Coordinated Regional Climate Downscaling Experiment\",\n", - " \"DAMIP\":\"Detection and Attribution Model Intercomparison Project\",\n", - " \"DCPP\":\"Decadal Climate Prediction Project\",\n", - " \"DynVarMIP\":\"Dynamics and Variability Model Intercomparison Project\",\n", - " \"FAFMIP\":\"Flux-Anomaly-Forced Model Intercomparison Project\",\n", - " \"GMMIP\":\"Global Monsoons Model Intercomparison Project\",\n", - " \"GeoMIP\":\"Geoengineering Model Intercomparison Project\",\n", - " \"HighResMIP\":\"High-Resolution Model Intercomparison Project\",\n", - " \"ISMIP6\":\"Ice Sheet Model Intercomparison Project for CMIP6\",\n", - " \"LS3MIP\":\"Land Surface, Snow and Soil Moisture\",\n", - " \"LUMIP\":\"Land-Use Model Intercomparison Project\",\n", - " \"OMIP\":\"Ocean Model Intercomparison Project\",\n", - " \"PAMIP\":\"Polar Amplification Model Intercomparison Project\",\n", - " \"PMIP\":\"Palaeoclimate Modelling Intercomparison Project\",\n", - " \"RFMIP\":\"Radiative Forcing Model Intercomparison Project\",\n", - " \"SIMIP\":\"Sea Ice Model Intercomparison Project\",\n", - " \"ScenarioMIP\":\"Scenario Model Intercomparison Project\",\n", - " \"VIACSAB\":\"Vulnerability, Impacts, Adaptation and Climate Services Advisory Board\",\n", - " \"VolMIP\":\"Volcanic Forcings Model Intercomparison Project\"\n", - " },\n", - " \"CMIP6Plus\": {\n", - " \"CMIP\": {\n", - " \"URL\": \"https://gmd.copernicus.org/articles/9/1937/2016/gmd-9-1937-2016.pdf\",\n", - " \"long_name\": \"CMIP DECK: 1pctCO2, abrupt4xCO2, amip, esm-piControl, esm-historical, historical, and piControl experiments\"\n", - " },\n", - " \"LESFMIP\": {\n", - " \"URL\": \"https://www.frontiersin.org/articles/10.3389/fclim.2022.955414/full\",\n", - " \"long_name\": \"The Large Ensemble Single Forcing Model Intercomparison Project\"\n", - " }\n", - " }\n", - " },\n", - " \"tracking_id\": [\n", - " \"hdl:21.14100/.*\"\n", - " ],\n", - "}\n", - "# Write all out\n", - "for key in keyDict.keys():\n", - " print(\"processing:\", key)\n", - " oF = \"\".join([\"../input4MIPs_\", key, \".json\"])\n", - " print(oF)\n", - " #pdb.set_trace()\n", - " # add dict identifier\n", - " tmp = {}\n", - " tmp[key] = keyDict[key]\n", - " # write to file\n", - " if os.path.exists(oF):\n", - " os.remove(oF)\n", - " fH = open(oF, \"w\")\n", - " json.dump(tmp, fH, ensure_ascii=True, sort_keys=True, indent=4, separators=(\",\", \":\"),)\n", - " fH.close()" + "%%time\n", + "# read input4mips_CVs\n", + "rawPath = \"https://raw.githubusercontent.com/PCMDI/input4MIPs_CVs/main/\"\n", + "CVList = [\"DRS\", \"activity_id\", \"dataset_category\", \"license\", \"mip_era\",\n", + " \"product\", \"required_global_attributes\", \"target_mip\", \"tracking_id\"]\n", + "# loop through entries\n", + "for count, cv in enumerate(CVList):\n", + " path = \"\".join([rawPath, \"input4MIPs_\", cv, \".json\"])\n", + " print(path)\n", + " js = requests.get(path)\n", + " vars()[cv] = json.loads(js.text)" ] }, {