diff --git a/docs/datastores/quickstart.ipynb b/docs/datastores/quickstart.ipynb index 7b2e048c..03710350 100644 --- a/docs/datastores/quickstart.ipynb +++ b/docs/datastores/quickstart.ipynb @@ -32,12 +32,353 @@ "warnings.filterwarnings(\"ignore\") # Suppress warnings for these docs" ] }, + { + "cell_type": "markdown", + "id": "e62614a5", + "metadata": {}, + "source": [ + "# Building an Intake-ESM datastore - the quick way\n", + "\n", + "As of `access_nri_intake` version 1.1.0, it is possible to build an ESM-datastore from the command line, using the `build-esm-datastore` utility.\n", + "\n", + "Usage is as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4c9ed0d1", + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [], + "source": [ + "user@local_machine $ ssh gadi \n", + "user@gadi $ mkdir catalog_dir && cd catalog_dir # Change catalog_dir to your desired directory\n", + "user@gadi $ module load conda/analysis3\n", + "user@gadi $ build-esm-datastore --builder Mom6Builder --expt-dir /g/data/ik11/outputs/mom6-panan/panant-01-zstar-ACCESSyr2/ --cat-dir ." + ] + }, + { + "cell_type": "markdown", + "id": "ba0fa016", + "metadata": {}, + "source": [ + "This will create a new Intake-ESM catalog in the `catalog_dir` directory, using the `Mom6Builder` builder, and the experiment directory `/g/data/ik11/outputs/mom6-panan/panant-01-zstar-ACCESSyr2/`.\n", + "\n", + "The first time you run `build-esm-datastore`, you can expect to see some output like this:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "349c9147", + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [], + "source": [ + "$ build-esm-datastore --builder Mom6Builder --expt-dir /g/data/ik11/outputs/mom6-panan/panant-01-zstar-ACCESSyr2/ --cat-dir .\n", + "Generating esm-datastore for /g/data/ik11/outputs/mom6-panan/panant-01-zstar-ACCESSyr2\n", + "Building esm-datastore...\n", + "/home/189/ct1163/catalog_dir/venv/lib/python3.11/site-packages/access_nri_intake/source/utils.py:140: UserWarning: Time coordinate does not include bounds information. Guessing start and end times.\n", + " warnings.warn(\n", + "...\n", + "Sucessfully built esm-datastore!\n", + "Saving esm-datastore to /home/189/ct1163/catalog_dir\n", + "/home/189/ct1163/catalog_dir/venv/lib/python3.11/site-packages/intake_esm/cat.py:186: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/\n", + " data = self.dict().copy()\n", + "Successfully wrote ESM catalog json file to: file:///home/189/ct1163/catalog_dir/experiment_datastore.json\n", + "Hashing catalog to prevent unnecessary rebuilds.\n", + "This may take some time...\n", + "Catalog sucessfully hashed!\n", + "Datastore sucessfully written to /home/189/ct1163/catalog_dir/experiment_datastore.json!\n", + "Please note that this has not added the datastore to the access-nri-intake catalog.\n", + "To add to catalog, please run 'scaffold-catalog-entry' for help on how to do so.\n", + "To open the datastore, run `intake.open_esm_datastore('/home/189/ct1163/catalog_dir/experiment_datastore.json', columns_with_iterables=['variable'])` in a Python session.\n", + "$\n" + ] + }, + { + "cell_type": "markdown", + "id": "97db5843", + "metadata": {}, + "source": [ + "If you rerun `build-esm-datastore`, you can expect to see something like this if the tool detects a valid & current datastore in the specified directory:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b6117aaa", + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [], + "source": [ + "$ build-esm-datastore --builder Mom6Builder --expt-dir /g/data/ik11/outputs/mom6-panan/panant-01-zstar-ACCESSyr2/ --cat-dir .\n", + "Datastore found in current directory, verifying datastore integrity...\n", + "Parsing experiment dir...\n", + "Datastore integrity verified!\n", + "Datastore found in /home/189/ct1163/catalog_dir/experiment_datastore.json!\n", + "Please note that this has not added the datastore to the access-nri-intake catalog.\n", + "To add to catalog, please run 'scaffold-catalog-entry' for help on how to do so.\n", + "To open the datastore, run `intake.open_esm_datastore('/home/189/ct1163/catalog_dir/experiment_datastore.json', columns_with_iterables=['variable'])` in a Python session.\n", + "$" + ] + }, + { + "cell_type": "markdown", + "id": "eeee704d", + "metadata": {}, + "source": [ + "...or this if the tool detects that the datastore is out of date, and needs to be regenerated:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "da34d1f1", + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [], + "source": [ + "$ build-esm-datastore --builder Mom6Builder --expt-dir /g/data/ik11/outputs/mom6-panan/panant-01-zstar-ACCESSyr2/ --cat-dir .\n", + "Datastore found in current directory, verifying datastore integrity...\n", + "Parsing experiment dir...\n", + "Experiment directory and datastore do not match (missing files from datastore). Datastore regeneration required...\n", + "Building esm-datastore...\n", + "...\n", + "Sucessfully built esm-datastore!\n", + "Saving esm-datastore to /home/189/ct1163/catalog_dir\n", + "/home/189/ct1163/catalog_dir/venv/lib/python3.11/site-packages/intake_esm/cat.py:186: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.10/migration/\n", + " data = self.dict().copy()\n", + "Successfully wrote ESM catalog json file to: file:///home/189/ct1163/catalog_dir/experiment_datastore.json\n", + "Hashing catalog to prevent unnecessary rebuilds.\n", + "This may take some time...\n", + "Catalog sucessfully hashed!\n", + "Datastore sucessfully written to /home/189/ct1163/catalog_dir/experiment_datastore.json!\n", + "Please note that this has not added the datastore to the access-nri-intake catalog.\n", + "To add to catalog, please run 'scaffold-catalog-entry' for help on how to do so.\n", + "To open the datastore, run `intake.open_esm_datastore('/home/189/ct1163/catalog_dir/experiment_datastore.json', columns_with_iterables=['variable'])` in a Python session." + ] + }, + { + "cell_type": "markdown", + "id": "a591b4e0", + "metadata": {}, + "source": [ + "To see the full list of options, run `build-esm-datastore --help`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bdb56801", + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [], + "source": [ + "$ build-esm-datastore --help\n", + "usage: build-esm-datastore [-h] [--builder BUILDER] [--builder-kwargs [BUILDER_KWARGS ...]] [--expt-dir EXPT_DIR]\n", + " [--cat-dir CAT_DIR] [--datastore-name DATASTORE_NAME] [--description DESCRIPTION]\n", + "\n", + "Build an esm-datastore by inspecting a directory containing model outputs. If no datastore exists, a new one will be\n", + "created. If a datastore exists, it's integrity will be verified, and the datastore regenerated if necessary.\n", + "\n", + "options:\n", + " -h, --help show this help message and exit\n", + " --builder BUILDER Builder to use to create the esm-datastore. Builders are defined the source.builders module.\n", + " Currently available options are: AccessOm2Builder, AccessOm3Builder, Mom6Builder,\n", + " AccessEsm15Builder, AccessCm2Builder. To build a datastore for a new model, please contact the\n", + " ACCESS-NRI team.\n", + " --builder-kwargs [BUILDER_KWARGS ...]\n", + " Additional keyword arguments to pass to the builder. Should be in the form of key=value.\n", + " --expt-dir EXPT_DIR Directory containing the model outputs to be added to the esm-datastore. Defaults to the\n", + " current working directory. Although builders support adding multiple directories, this tool\n", + " only supports one directory at a time - at present.\n", + " --cat-dir CAT_DIR Directory in which to place the catalog.json file. Defaults to the value of --expt-dir if not\n", + " set.\n", + " --datastore-name DATASTORE_NAME\n", + " Name of the datastore to use. If not provided, this will default to 'experiment_datastore'.\n", + " --description DESCRIPTION\n", + " Description of the datastore. If not provided, a default description will be used:\n", + " 'esm_datastore for the model output in {--expt-dir}'" + ] + }, + { + "cell_type": "markdown", + "id": "db1869d1", + "metadata": {}, + "source": [ + "If you want to place multiple datastores in the same directory, you will need to specify different datastore names, using the `--datastore-name` option. For example:\n", + "\n", + "```bash\n", + "$ build-esm-datastore --builder Mom6Builder --expt-dir /g/data/ik11/outputs/mom6-panan/panant-01-zstar-ACCESSyr2/ --cat-dir . --datastore-name mom6_panant_01\n", + "...\n", + "$ build-esm-datastore --builder Mom6Builder --expt-dir /g/data/ik11/outputs/mom6-panan/panant-02-zstar-ACCESSyr2/ --cat-dir . --datastore-name mom6_panant_02\n", + "```\n" + ] + }, + { + "cell_type": "markdown", + "id": "c8290d6d", + "metadata": {}, + "source": [ + "In addition, you can access the `build-esm-datastore` functionality from within a python script, using the `use_datastore` function:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "5de825b9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[34m\u001b[22mDatastore found in \u001b[36m\u001b[1m/home/189/ct1163/catalog_dir\u001b[34m\u001b[22m, verifying datastore integrity...\u001b[0m\n", + "\u001b[34m\u001b[22mParsing experiment dir...\u001b[0m\n", + "\u001b[32m\u001b[22mDatastore integrity verified!\u001b[0m\n", + "\u001b[32m\u001b[22mDatastore found in \u001b[36m\u001b[1m/home/189/ct1163/catalog_dir/experiment_datastore.json\u001b[32m\u001b[22m!\n", + "\u001b[34m\u001b[22mPlease note that this has not added the datastore to the access-nri-intake catalog.\n", + "To add to catalog, please run '\u001b[37m\u001b[1mscaffold_catalog_entry\u001b[34m\u001b[22m' for help on how to do so.\n" + ] + }, + { + "data": { + "text/html": [ + "

experiment_datastore catalog with 13 dataset(s) from 12325 asset(s):

\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
unique
filename12325
file_id13
path12325
filename_timestamp82
frequency3
start_date3977
end_date3978
variable122
variable_long_name17
variable_standard_name17
variable_cell_methods17
variable_units17
realm2
derived_variable0
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from access_nri_intake.experiment import use_datastore\n", + "from access_nri_intake.source.builders import Mom6Builder\n", + "\n", + "ds = use_datastore(\n", + " experiment_dir=\"/g/data/ik11/outputs/mom6-panan/panant-01-zstar-ACCESSyr2/\",\n", + " catalog_dir=\"/home/189/ct1163/catalog_dir/\",\n", + " builder=Mom6Builder,\n", + " datastore_name=\"experiment_datastore\",\n", + " description=\"PanAnt experiment with ACCESS-OM2-01 forcing\",\n", + " )\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "id": "31c9ac00", + "metadata": {}, + "source": [ + "For even more fine grained control, follow the guide below:" + ] + }, { "cell_type": "markdown", "id": "c1526d2b-06b8-46e3-9005-638c04844c6e", "metadata": {}, "source": [ - "## Building an Intake-ESM datastore" + "## Building an Intake-ESM datastore - using builders directly" ] }, { @@ -45,7 +386,7 @@ "id": "9f8f5cd3-93bf-4612-afc9-54ac6b2ce516", "metadata": {}, "source": [ - "In this tutorial, we'll build an Intake-ESM datastore for an ACCESS-OM2 model run that is currently not included in the ACCESS-NRI catalog. The base output directory for this model run is:\n", + "In the rest of this tutorial, we'll build an Intake-ESM datastore for an ACCESS-OM2 model run that is currently not included in the ACCESS-NRI catalog. The base output directory for this model run is:\n", "\n", "`/g/data/ik11/outputs/access-om2/1deg_iamip2_CMCC-ESM2ssp126`\n", "\n", @@ -463,7 +804,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -477,7 +818,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.14" + "version": "3.11.9" } }, "nbformat": 4, diff --git a/docs/project_list.rst b/docs/project_list.rst index 214015e3..342f427e 100644 --- a/docs/project_list.rst +++ b/docs/project_list.rst @@ -1,13 +1,13 @@ * :code:`rr3` -* :code:`rt52` * :code:`hq89` -* :code:`zz63` +* :code:`p73` * :code:`al33` -* :code:`oi10` -* :code:`ik11` -* :code:`ig45` +* :code:`rt52` +* :code:`cj50` * :code:`fs38` -* :code:`p73` +* :code:`zz63` +* :code:`ig45` * :code:`xp65` * :code:`py18` -* :code:`cj50` +* :code:`ik11` +* :code:`oi10` diff --git a/docs/storage_flags.rst b/docs/storage_flags.rst index 8e5e17c9..75003af8 100644 --- a/docs/storage_flags.rst +++ b/docs/storage_flags.rst @@ -1,3 +1,3 @@ .. code-block:: - gdata/rr3+gdata/rt52+gdata/hq89+gdata/zz63+gdata/al33+gdata/oi10+gdata/ik11+gdata/ig45+gdata/fs38+gdata/p73+gdata/xp65+gdata/py18+gdata/cj50 \ No newline at end of file + gdata/rr3+gdata/hq89+gdata/p73+gdata/al33+gdata/rt52+gdata/cj50+gdata/fs38+gdata/zz63+gdata/ig45+gdata/xp65+gdata/py18+gdata/ik11+gdata/oi10 \ No newline at end of file diff --git a/docs/usage/quickstart.ipynb b/docs/usage/quickstart.ipynb index 6aa68e47..4b69afe8 100644 --- a/docs/usage/quickstart.ipynb +++ b/docs/usage/quickstart.ipynb @@ -1891,7 +1891,9 @@ "id": "775e07a4-9f74-45dc-900b-f655904e6150", "metadata": {}, "source": [ - "We can also use regex strings in our searches. For example, we could search for variables containing the substrings `\"burnt\"` and `\"Fire\"`. We can see that there are five variables, all with monthly frequency, across a large number of models that satisfy this criteria." + "We can also use regex strings in our searches. For example, we could search for variables containing the substrings `\"burnt\"` and `\"Fire\"`. We can see that there are five variables, all with monthly frequency, across a large number of models that satisfy this criteria.\n", + "\n", + "> **Note**: The catalog uses Python regex syntax. These may differ slightly from typical Unix regex syntax: for example, `.*` is used to match any character zero or more times, rather than `*`. For more info, see: [https://docs.python.org/3/library/re.html](https://docs.python.org/3/library/re.html), or [https://intake-esm.readthedocs.io/en/latest/how-to/filter-catalog-by-substring-and-regex-criteria.html](https://intake-esm.readthedocs.io/en/latest/how-to/filter-catalog-by-substring-and-regex-criteria.html)." ] }, { @@ -5301,11 +5303,21 @@ "source": [ "client.close()" ] + }, + { + "cell_type": "markdown", + "id": "dae993b2", + "metadata": {}, + "source": [ + "For further information on using the ACCESS-NRI Intake Catalog not covered in this tutorial, see the\n", + "following resources:\n", + "- https://github.com/COSIMA/cosima-recipes/blob/65df5974fd8d3f63b675ce4f666aa1decfe959c3/Tutorials/ACCESS-NRI_Intake_Catalog.ipynb" + ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "access-nri-intake-test", "language": "python", "name": "python3" }, @@ -5319,7 +5331,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.0" + "version": "3.11.10" } }, "nbformat": 4, diff --git a/src/access_nri_intake/cli.py b/src/access_nri_intake/cli.py index 45e9e558..04176ac7 100644 --- a/src/access_nri_intake/cli.py +++ b/src/access_nri_intake/cli.py @@ -585,7 +585,7 @@ def use_esm_datastore(argv: Sequence[str] | None = None) -> int: description=( "Build an esm-datastore by inspecting a directory containing model outputs." " If no datastore exists, a new one will be created. If a datastore exists," - " it's integrity will be verified, and the datastore regenerated if necessary." + " its integrity will be verified, and the datastore regenerated if necessary." ) ) parser.add_argument( diff --git a/src/access_nri_intake/experiment/main.py b/src/access_nri_intake/experiment/main.py index f41c639a..1926bb3f 100644 --- a/src/access_nri_intake/experiment/main.py +++ b/src/access_nri_intake/experiment/main.py @@ -22,9 +22,9 @@ def use_datastore( - experiment_dir: Path, + experiment_dir: Path | str, builder: Builder | None = None, - catalog_dir: Path | None = None, + catalog_dir: Path | str | None = None, builder_kwargs: dict | None = None, open_ds: bool = True, datastore_name: str = "experiment_datastore", @@ -41,12 +41,13 @@ def use_datastore( ---------- builder : Builder The builder object that will be used to build the datastore. - experiment_dir : Path - The directory containing the experiment. - catalog_dir : Path, optional + experiment_dir : Path | str + The directory containing the experiment. If a string is passed, it will be + converted to a Path object. + catalog_dir : Path | str, optional The directory containing/to write the catalog to, if it differs from the experiment directory. If None, the catalog will be written to the experiment - directory. + directory. If a string is passed, it will be converted to a Path object. open_ds : bool Whether to open the datastore after building it. Typically set to false when called from a console script. @@ -75,6 +76,8 @@ def use_datastore( catalog_dir = catalog_dir or experiment_dir builder_kwargs = builder_kwargs or {} + catalog_dir, experiment_dir = Path(catalog_dir), Path(experiment_dir) + catalog_dir_fmap = { ".": "current directory", "./": "current directory", diff --git a/tests/test_experiment.py b/tests/test_experiment.py index db06282b..56f14587 100644 --- a/tests/test_experiment.py +++ b/tests/test_experiment.py @@ -261,6 +261,7 @@ def test_verify_ds_current_fail_differing_hashes(mock_builder, test_data, tmpdir @pytest.mark.parametrize( "open_ds, return_type", [(True, esm_datastore), (False, type(None))] ) +@pytest.mark.parametrize("use_path", [True, False]) def test_use_datastore( test_data: Path, basedir, @@ -270,6 +271,7 @@ def test_use_datastore( tmp_path, open_ds, return_type, + use_path, capsys, ): """ @@ -288,9 +290,10 @@ def test_use_datastore( assert isinstance(builder.assets, list) assert len(builder.assets) == num_assets + exptdir = Path(basedir[0]) if use_path else basedir[0] # This creates a bunch of datastoers that we don't actually want here. ret = use_datastore( - experiment_dir=Path(basedir[0]), + experiment_dir=exptdir, builder=builder_type, open_ds=open_ds, builder_kwargs=kwargs,