Skip to content

Commit

Permalink
updating source_id info, working
Browse files Browse the repository at this point in the history
  • Loading branch information
durack1 committed Jun 26, 2024
1 parent 38659cd commit 97244fa
Showing 1 changed file with 106 additions and 185 deletions.
291 changes: 106 additions & 185 deletions src/getInput4MIPsESGF.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
"To-do:\n",
"- Add \"prototype\" as target_mip, new AMIP data\n",
"- Register MRI-JRA55-do-1-6-0 https://github.com/PCMDI/input4MIPs_CVs/issues/18\n",
"- Register SOLARIS-HEPPA-4-0 https://github.com/PCMDI/input4MIPs_CVs/issues/17#issuecomment-2146223824\n",
"- Register SOLARIS-HEPPA-4-1 https://github.com/PCMDI/input4MIPs_CVs/issues/17#issuecomment-2146223824\n",
"- Register CR-CMIP-0-2-0 https://github.com/PCMDI/input4MIPs_CVs/issues/13\n",
"- Register DRES-CMIP-BB4CMIP7-1-0 https://github.com/PCMDI/input4MIPs_CVs/issues/10#issuecomment-2148147489\n",
"- Register volcanic? https://github.com/PCMDI/input4MIPs_CVs/issues/9"
Expand All @@ -50,8 +50,7 @@
"id": "1af44764-8e4a-46f4-a542-b18d2212cf45",
"metadata": {},
"source": [
"# env\n",
"Must be connected by home or office wired IP - *.95.87 or *.241.141\n",
"# env - must be connected by home or office wired IP - *.95.87 or *.241.141\n",
"\n",
"# Notes\n",
"Datasets with no source_id 231026\n",
Expand All @@ -66,27 +65,6 @@
"https://search.google.com/search-console"
]
},
{
"cell_type": "raw",
"id": "3622a21b-f640-4f42-b9a0-6cbf086a81fe",
"metadata": {},
"source": [
"\"CR-CMIP-1-0\":{\n",
" \"dataProvider\": {\n",
" \"this\": \"this\",\n",
" \"contact\":\"ISMIP6 Steering Team (ismip6@gmail.com)\",\n",
" \"that\": \"that\",\n",
" },\n",
" \"dataProviderExtra\": {\n",
" },\n",
" \"esgfIndex\": {\n",
" \"_timestamp\":\"2021-04-22T19:05:37.327Z\",\n",
" \"data_node\":\"esgf-data2.llnl.gov\",\n",
" }\n",
"}\n",
"# options in https://github.com/PCMDI/input4MIPs_CVs/pull/20/files"
]
},
{
"cell_type": "markdown",
"id": "41322843-6c92-423b-a468-f1e0a7fe022b",
Expand Down Expand Up @@ -305,7 +283,7 @@
"#print(srcIdFDictList)\n",
"\n",
"# determine missing\n",
"print(\"Search results: Dataset includes, excluded from File searches:\")\n",
"print(\"Search results: Dataset includes, excluded from File searches (likely latest:false):\")\n",
"set(srcIdDDictList).difference(srcIdFDictList)"
]
},
Expand Down Expand Up @@ -479,16 +457,16 @@
},
{
"cell_type": "code",
"execution_count": 43,
"execution_count": 24,
"id": "7f4317ea-a580-4727-b244-a45cb7dd9fb1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 6.72 ms, sys: 3.2 ms, total: 9.92 ms\n",
"Wall time: 83.6 ms\n"
"CPU times: user 7.24 ms, sys: 4.16 ms, total: 11.4 ms\n",
"Wall time: 320 ms\n"
]
}
],
Expand All @@ -497,16 +475,85 @@
"# read input4mips-cmor-tables holdings\n",
"srcIdGithub = \"https://raw.githubusercontent.com/PCMDI/input4MIPs-cmor-tables/master/input4MIPs_source_id.json\"\n",
"js = requests.get(srcIdGithub)\n",
"js_srcId = json.loads(js.text)\n",
"srcId = json.loads(js.text)\n",
"\n",
"# get required_global_attributes and prepopulate - controlled values\n",
"\n",
"# iterate and add entries in input4mip-cmor-tables/input4MIPs_source_id.json - controlled and uncontrolled values\n",
"\n",
"# create iteration list\n",
"fields = [\"calendar\", \"comment\", \"further_info_url\", \"grid\", \"grid_label\", \"license\", \"nominal_resolution\", \"product\",\n",
" \"references\", \"release_year\", \"region\", \"source\", \"source_description\", \"source_type\", \"source_variables\",\n",
" \"title\", \"website\"]"
"reqGlobAtt = [\"activity_id\", \"contact\", \"dataset_category\", \"frequency\",\n",
" \"further_info_url\", \"grid_label\", \"institution_id\", \"license\", \"mip_era\",\n",
" \"nominal_resolution\", \"realm\", \"region\", \"source\", \"source_id\", \"source_version\", \n",
" \"target_mip\", \"title\", \"variable_id\"]\n",
"# file \"Conventions\", \"creation_date\", \"tracking_id\", \n",
"# irrelevant \"institution\", \"table_id\",\n",
"# esgf fields\n",
"esgfAtt = [\"_timestamp\", \"data_node\", \"latest\", \"replica\", \"version\", \"xlink\"]"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "6a07c289-50c8-4907-8027-6fabc288a5d1",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dict_keys(['ACCESS1-3-rcp85-1-0', 'CCSM4-rcp26-1-0', 'CCSM4-rcp85-1-0', 'CESM2-ssp585-1-0', 'CNRM-CM6-1-ssp126-1-0', 'CNRM-CM6-1-ssp585-1-0', 'CNRM-ESM2-1-ssp585-1-0', 'CSIRO-MK3-6-0-rcp85-1-0', 'HadGEM2-ES-rcp85-1-0', 'IPSL-CM5A-MR-rcp26-1-0', 'IPSL-CM5A-MR-rcp85-1-0', 'MIROC-ESM-CHEM-rcp26-1-0', 'MIROC-ESM-CHEM-rcp85-1-0', 'MIROC5-rcp26-1-0', 'MIROC5-rcp85-1-0', 'MRI-JRA55-do-1-3', 'MRI-JRA55-do-1-3-2', 'MRI-JRA55-do-1-4-0', 'MRI-JRA55-do-1-5-0', 'MRI-JRA55-do-1-6-0', 'NorESM1-M-rcp26-1-0', 'NorESM1-M-rcp85-1-0', 'PCMDI-AMIP-1-1-3', 'PCMDI-AMIP-1-1-4', 'PCMDI-AMIP-1-1-5', 'PCMDI-AMIP-1-1-6', 'PCMDI-AMIP-1-1-7', 'PCMDI-AMIP-1-1-8', 'PCMDI-AMIP-1-1-9', 'UKESM1-0-LL-ssp585-1-0'])"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"js_srcId[\"source_id\"].keys()"
]
},
{
"cell_type": "raw",
"id": "2a43ee03-26b4-44e0-baa4-d69fd85adf7d",
"metadata": {},
"source": [
"\"CR-CMIP-2-0\":{\n",
" \"Conventions\":\"\",\n",
" \"activity_id\":\"input4MIPs\",\n",
" \"contact\":\"ISMIP6 Steering Team (ismip6@gmail.com)\",\n",
" \"dataset_category\":\"surfaceFluxes\", # multiple\n",
" \"datetime_start\":\"1950-07-01T00:00:00Z\", # multiple, FILE\n",
" \"datetime_stop\":null, # multiple, FILE\n",
" \"frequency\":\"yrC\", # multiple, FILE\n",
" \"further_info_url\":\"\",\n",
" \"grid_label\":\"\",\n",
" \"institution_id\":\"NASA-GSFC\",\n",
" \"license\":\"\",\n",
" \"mip_era\":\"CMIP6\",\n",
" \"nominal_resolution\":\"\",\n",
" \"realm\":\"landIce\", # multiple\n",
" \"region\":\"\",\n",
" \"source\":\"\",\n",
" \"source_id\":\"ACCESS1-3-rcp85-1-0\",\n",
" \"source_version\":\"1.0\", # multiple, accounted for by unique source_id\n",
" \"target_mip\":\"ISMIP6\",\n",
" \"title\":\"\",\n",
" \"variable_id\":\"\",\n",
"\n",
" \"creation_date\":\"\", # FILE\n",
" \"tracking_id\":\"\", # FILE\n",
"\n",
" \"dataProviderExtra\": {\n",
" },\n",
" \"esgfIndex\": {\n",
" \"_timestamp\":\"2021-04-22T19:05:37.327Z\",\n",
" \"data_node\":\"esgf-data2.llnl.gov\",\n",
" \"latest\":true,\n",
" \"replica\":false,\n",
" \"version\":\"20210422\",\n",
" \"xlink\":\"http://cera-www.dkrz.de/WDCC/meta/CMIP6/input4MIPs.CMIP6.ISMIP6.NASA-GSFC.ACCESS1-3-rcp85-1-0.ocean.yrC.thetao.grg.v20210422.json|Citation|citation\"\n",
" }\n",
"}\n",
"# options in https://github.com/PCMDI/input4MIPs_CVs/pull/20/files"
]
},
{
Expand Down Expand Up @@ -569,167 +616,41 @@
},
{
"cell_type": "code",
"execution_count": 10,
"id": "58d314e7-cfc4-4512-b13a-ca24d0e93de8",
"execution_count": 21,
"id": "b15a025a-dbf9-4666-8550-7e85f8187c7a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"processing: DRS\n",
"../input4MIPs_DRS.json\n",
"processing: activity_id\n",
"../input4MIPs_activity_id.json\n",
"processing: dataset_category\n",
"../input4MIPs_dataset_category.json\n",
"processing: license\n",
"../input4MIPs_license.json\n",
"processing: mip_era\n",
"../input4MIPs_mip_era.json\n",
"processing: product\n",
"../input4MIPs_product.json\n",
"processing: required_global_attributes\n",
"../input4MIPs_required_global_attributes.json\n",
"processing: target_mip\n",
"../input4MIPs_target_mip.json\n",
"processing: tracking_id\n",
"../input4MIPs_tracking_id.json\n"
"https://raw.githubusercontent.com/PCMDI/input4MIPs_CVs/main/input4MIPs_DRS.json\n",
"https://raw.githubusercontent.com/PCMDI/input4MIPs_CVs/main/input4MIPs_activity_id.json\n",
"https://raw.githubusercontent.com/PCMDI/input4MIPs_CVs/main/input4MIPs_dataset_category.json\n",
"https://raw.githubusercontent.com/PCMDI/input4MIPs_CVs/main/input4MIPs_institution_id.json\n",
"https://raw.githubusercontent.com/PCMDI/input4MIPs_CVs/main/input4MIPs_license.json\n",
"https://raw.githubusercontent.com/PCMDI/input4MIPs_CVs/main/input4MIPs_mip_era.json\n",
"https://raw.githubusercontent.com/PCMDI/input4MIPs_CVs/main/input4MIPs_product.json\n",
"https://raw.githubusercontent.com/PCMDI/input4MIPs_CVs/main/input4MIPs_required_global_attributes.json\n",
"https://raw.githubusercontent.com/PCMDI/input4MIPs_CVs/main/input4MIPs_target_mip.json\n",
"https://raw.githubusercontent.com/PCMDI/input4MIPs_CVs/main/input4MIPs_tracking_id.json\n",
"CPU times: user 77.2 ms, sys: 21.2 ms, total: 98.4 ms\n",
"Wall time: 2.07 s\n"
]
}
],
"source": [
"keyDict = {\n",
" \"DRS\":{\n",
" \"directory_path_example\":\"input4MIPs/CMIP6Plus/CMIP/PCMDI/PCMDI-AMIP-1-1-9/ocean/mon/tos/gn/v20230512/\",\n",
" \"directory_path_template\":\"<activity_id>/<mip_era>/<target_mip>/<institution_id>/<source_id>/<realm>/<frequency>/<variable_id>/<grid_label>/<version>\",\n",
" \"filename_example\":\"tos_input4MIPs_SSTsAndSeaIce_CMIP_PCMDI-AMIP-1-1-9_gn_187001-202212.nc\",\n",
" \"filename_template\":\"<variable_id>_<activity_id>_<dataset_category>_<target_mip>_<source_id>_<grid_label>[_<time_range>].nc\"\n",
" }\n",
" \"activity_id\": {\n",
" \"input4MIPs\": {\n",
" \"URL\": \"https://pcmdi.llnl.gov/mips/input4MIPs/\",\n",
" \"long_name\": \"input forcing datasets for Model Intercomparison Projects\"\n",
" }\n",
" },\n",
" \"dataset_category\": [\n",
" \"GHGConcentrations\",\n",
" \"SSTsAndSeaIce\",\n",
" \"aerosolProperties\",\n",
" \"atmosphericState\",\n",
" \"emissions\",\n",
" \"landState\",\n",
" \"ozone\",\n",
" \"radiation\",\n",
" \"solar\",\n",
" \"surfaceAir\",\n",
" \"surfaceFluxes\"\n",
" ],\n",
" \"license\" : {\n",
" \"license\":\"<your_data_identifier> data produced by <Your Institution; see MIP_institution_id.json> is licensed under a <Creative Commons; select and insert a license_id; see below> License (<insert the matching license_url; see below>). Consult https://pcmdi.llnl.gov/CMIP6Plus/TermsOfUse for terms of use governing CMIP6Plus and input4MIPs output, including citation requirements and proper acknowledgment. Further information about this data, can be found at <URL_maintained_data_provider>. The data producers and data providers make no warranty, either express or implied, including, but not limited to, warranties of merchantability and fitness for a particular purpose. All liabilities arising from the supply of the information (including any liability arising in negligence) are excluded to the fullest extent permitted by law.\",\n",
" \"license_options\":{\n",
" \"CC BY 4.0\":{\n",
" \"license_id\":\"Creative Commons Attribution 4.0 International\",\n",
" \"license_url\":\"https://creativecommons.org/licenses/by/4.0/\"\n",
" },\n",
" \"CC0 1.0\":{\n",
" \"license_id\":\"Creative Commons CC0 1.0 Universal Public Domain Dedication\",\n",
" \"license_url\":\"https://creativecommons.org/publicdomain/zero/1.0/\"\n",
" }\n",
" }\n",
" },\n",
" \"mip_era\": [\n",
" \"CMIP5\",\n",
" \"CMIP6\",\n",
" \"CMIP6Plus\"\n",
" ],\n",
" \"product\": [\n",
" \"derived\",\n",
" \"observations\",\n",
" \"reanalysis\"\n",
" ],\n",
" \"required_global_attributes\": [\n",
" \"Conventions\",\n",
" \"activity_id\",\n",
" \"contact\",\n",
" \"creation_date\",\n",
" \"dataset_category\",\n",
" \"frequency\",\n",
" \"further_info_url\",\n",
" \"grid_label\",\n",
" \"institution\",\n",
" \"institution_id\",\n",
" \"license\",\n",
" \"mip_era\",\n",
" \"nominal_resolution\",\n",
" \"realm\",\n",
" \"region\",\n",
" \"source\",\n",
" \"source_id\",\n",
" \"source_version\",\n",
" \"table_id\",\n",
" \"target_mip\",\n",
" \"title\",\n",
" \"tracking_id\",\n",
" \"variable_id\"\n",
" ],\n",
" \"target_mip\": {\n",
" \"CMIP6\": {\n",
" \"AerChemMIP\":\"Aerosols and Chemistry Model Intercomparison Project\",\n",
" \"C4MIP\":\"Coupled Climate Carbon Cycle Model Intercomparison Project\",\n",
" \"CDRMIP\":\"Carbon Dioxide Removal Model Intercomparison Project\",\n",
" \"CFMIP\":\"Cloud Feedback Model Intercomparison Project\",\n",
" \"CMIP\":\"CMIP DECK: 1pctCO2, abrupt4xCO2, amip, esm-piControl, esm-historical, historical, and piControl experiments\",\n",
" \"CORDEX\":\"Coordinated Regional Climate Downscaling Experiment\",\n",
" \"DAMIP\":\"Detection and Attribution Model Intercomparison Project\",\n",
" \"DCPP\":\"Decadal Climate Prediction Project\",\n",
" \"DynVarMIP\":\"Dynamics and Variability Model Intercomparison Project\",\n",
" \"FAFMIP\":\"Flux-Anomaly-Forced Model Intercomparison Project\",\n",
" \"GMMIP\":\"Global Monsoons Model Intercomparison Project\",\n",
" \"GeoMIP\":\"Geoengineering Model Intercomparison Project\",\n",
" \"HighResMIP\":\"High-Resolution Model Intercomparison Project\",\n",
" \"ISMIP6\":\"Ice Sheet Model Intercomparison Project for CMIP6\",\n",
" \"LS3MIP\":\"Land Surface, Snow and Soil Moisture\",\n",
" \"LUMIP\":\"Land-Use Model Intercomparison Project\",\n",
" \"OMIP\":\"Ocean Model Intercomparison Project\",\n",
" \"PAMIP\":\"Polar Amplification Model Intercomparison Project\",\n",
" \"PMIP\":\"Palaeoclimate Modelling Intercomparison Project\",\n",
" \"RFMIP\":\"Radiative Forcing Model Intercomparison Project\",\n",
" \"SIMIP\":\"Sea Ice Model Intercomparison Project\",\n",
" \"ScenarioMIP\":\"Scenario Model Intercomparison Project\",\n",
" \"VIACSAB\":\"Vulnerability, Impacts, Adaptation and Climate Services Advisory Board\",\n",
" \"VolMIP\":\"Volcanic Forcings Model Intercomparison Project\"\n",
" },\n",
" \"CMIP6Plus\": {\n",
" \"CMIP\": {\n",
" \"URL\": \"https://gmd.copernicus.org/articles/9/1937/2016/gmd-9-1937-2016.pdf\",\n",
" \"long_name\": \"CMIP DECK: 1pctCO2, abrupt4xCO2, amip, esm-piControl, esm-historical, historical, and piControl experiments\"\n",
" },\n",
" \"LESFMIP\": {\n",
" \"URL\": \"https://www.frontiersin.org/articles/10.3389/fclim.2022.955414/full\",\n",
" \"long_name\": \"The Large Ensemble Single Forcing Model Intercomparison Project\"\n",
" }\n",
" }\n",
" },\n",
" \"tracking_id\": [\n",
" \"hdl:21.14100/.*\"\n",
" ],\n",
"}\n",
"# Write all out\n",
"for key in keyDict.keys():\n",
" print(\"processing:\", key)\n",
" oF = \"\".join([\"../input4MIPs_\", key, \".json\"])\n",
" print(oF)\n",
" #pdb.set_trace()\n",
" # add dict identifier\n",
" tmp = {}\n",
" tmp[key] = keyDict[key]\n",
" # write to file\n",
" if os.path.exists(oF):\n",
" os.remove(oF)\n",
" fH = open(oF, \"w\")\n",
" json.dump(tmp, fH, ensure_ascii=True, sort_keys=True, indent=4, separators=(\",\", \":\"),)\n",
" fH.close()"
"%%time\n",
"# read input4mips_CVs\n",
"rawPath = \"https://raw.githubusercontent.com/PCMDI/input4MIPs_CVs/main/\"\n",
"CVList = [\"DRS\", \"activity_id\", \"dataset_category\", \"license\", \"mip_era\",\n",
" \"product\", \"required_global_attributes\", \"target_mip\", \"tracking_id\"]\n",
"# loop through entries\n",
"for count, cv in enumerate(CVList):\n",
" path = \"\".join([rawPath, \"input4MIPs_\", cv, \".json\"])\n",
" print(path)\n",
" js = requests.get(path)\n",
" vars()[cv] = json.loads(js.text)"
]
},
{
Expand Down

0 comments on commit 97244fa

Please sign in to comment.