From 4e2c14da7cf7b93c6c598e1955fbbef721944b01 Mon Sep 17 00:00:00 2001 From: Sandor Kertesz Date: Thu, 16 Jan 2025 12:56:00 +0000 Subject: [PATCH] Refactor settings --- docs/examples/cache.ipynb | 70 +++-- .../examples/{settings.ipynb => config.ipynb} | 91 +++--- ...s_env_vars.ipynb => config_env_vars.ipynb} | 32 +-- docs/examples/index.rst | 4 +- docs/guide/caching.rst | 66 ++--- docs/guide/config.rst | 162 +++++++++++ docs/guide/include/config-get.py | 9 + docs/guide/include/config-reset.py | 7 + .../{settings-set.py => config-set.py} | 8 +- docs/guide/include/config-temporary.py | 11 + docs/guide/include/settings-get.py | 9 - docs/guide/include/settings-reset.py | 7 - docs/guide/include/settings-temporary.py | 11 - docs/guide/index.rst | 2 +- docs/guide/misc/grib_memory.rst | 16 +- docs/guide/misc/grib_metadata.rst | 10 +- docs/guide/settings.rst | 162 ----------- docs/guide/sources.rst | 2 +- docs/release_notes/index.rst | 1 + docs/release_notes/version_0.13_updates.rst | 41 +++ pytest.ini | 3 +- src/earthkit/data/__init__.py | 5 +- src/earthkit/data/core/caching.py | 52 ++-- .../data/core/{settings.py => config.py} | 222 +++++++++------ src/earthkit/data/readers/__init__.py | 8 +- .../data/readers/grib/index/__init__.py | 4 +- src/earthkit/data/readers/grib/memory.py | 4 +- src/earthkit/data/sources/__init__.py | 6 +- src/earthkit/data/sources/cds.py | 2 +- src/earthkit/data/sources/ecmwf_api.py | 2 +- src/earthkit/data/sources/mars.py | 4 +- src/earthkit/data/sources/multi.py | 2 +- src/earthkit/data/sources/url.py | 16 +- src/earthkit/data/sources/wekeo.py | 2 +- src/earthkit/data/sources/wekeocds.py | 2 +- ..._env_rst.py => generate_config_env_rst.py} | 10 +- ...settings_rst.py => generate_config_rst.py} | 4 +- src/earthkit/data/sphinxext/module_output.py | 2 +- tests/conftest.py | 16 +- tests/core/test_config.py | 262 ++++++++++++++++++ tests/core/test_settings.py | 95 ++++--- tests/list_of_dicts/lod_fixtures.py | 10 +- 42 files changed, 921 insertions(+), 533 deletions(-) rename docs/examples/{settings.ipynb => config.ipynb} (64%) rename docs/examples/{settings_env_vars.ipynb => config_env_vars.ipynb} (86%) create mode 100644 docs/guide/config.rst create mode 100644 docs/guide/include/config-get.py create mode 100644 docs/guide/include/config-reset.py rename docs/guide/include/{settings-set.py => config-set.py} (55%) create mode 100644 docs/guide/include/config-temporary.py delete mode 100644 docs/guide/include/settings-get.py delete mode 100644 docs/guide/include/settings-reset.py delete mode 100644 docs/guide/include/settings-temporary.py delete mode 100644 docs/guide/settings.rst create mode 100644 docs/release_notes/version_0.13_updates.rst rename src/earthkit/data/core/{settings.py => config.py} (74%) rename src/earthkit/data/sphinxext/{generate_settings_env_rst.py => generate_config_env_rst.py} (78%) rename src/earthkit/data/sphinxext/{generate_settings_rst.py => generate_config_rst.py} (93%) create mode 100644 tests/core/test_config.py diff --git a/docs/examples/cache.ipynb b/docs/examples/cache.ipynb index 45f082d4..e768788e 100644 --- a/docs/examples/cache.ipynb +++ b/docs/examples/cache.ipynb @@ -21,7 +21,7 @@ "metadata": {}, "outputs": [], "source": [ - "from earthkit.data import settings, cache" + "from earthkit.data import config, cache" ] }, { @@ -39,7 +39,7 @@ "source": [ "earthkit-data uses a dedicated **directory** to store the results of remote data access and some GRIB/BUFR indexing information. By default this directory is **unmanaged** (its size is not checked/limited) and **no caching** is provided for the files in it, i.e. repeated calls to :func:`from_source` for remote services and URLSs will download the data again!\n", "\n", - "When :ref:`caching ` is **enabled** this directory will also serve as a :ref:`cache `. It means if we run :func:`from_source` again with the same arguments it will load the data from the cache instead of downloading it again. Additionally, caching offers **monitoring and disk space management**. When the cache is full, cached data is deleted according to the settings (i.e. oldest data is deleted first). " + "When :ref:`caching ` is **enabled** this directory will also serve as a :ref:`cache `. It means if we run :func:`from_source` again with the same arguments it will load the data from the cache instead of downloading it again. Additionally, caching offers **monitoring and disk space management**. When the cache is full, cached data is deleted according to the configuration (i.e. oldest data is deleted first). " ] }, { @@ -53,18 +53,24 @@ "tags": [] }, "source": [ - "In the examples below we will change the settings multiple times. First we ensure all the changes are temporary and no settings are saved into the configuration file. We also reset the settings to the defaults." + "In the examples below we will change the configuration multiple times. First we ensure all the changes are temporary and no options are saved into the configuration file. We also reset the configuration to the defaults." ] }, { "cell_type": "code", "execution_count": 2, "id": "71214b97-2b64-442d-bd23-98f831b064d0", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "outputs": [], "source": [ - "settings.auto_save_settings = False\n", - "settings.reset()" + "config.autosave = False\n", + "config.reset()" ] }, { @@ -87,7 +93,7 @@ "tags": [] }, "source": [ - "The primary key to control the cache in the settings is :ref:`cache-policy `. The default value is :ref:`\"off\" `, which means that no caching is available. \n", + "The primary key to control the cache in the configuration is :ref:`cache-policy `. The default value is :ref:`\"off\" `, which means that no caching is available. \n", "\n", "In this case all files are downloaded into an **unmanaged** temporary directory created by *tempfile.TemporaryDirectory*. Since caching is disabled all calls to :func:`from_source` for remote services and URLSs will download the data again! This temporary directory will be unique for each earthkit-data session. When the directory object goes out of scope (at the latest on exit) the directory will be **cleaned up**. " ] @@ -103,7 +109,7 @@ "tags": [] }, "source": [ - "The settings tells us the current cache policy:" + "The config tells us the current cache policy:" ] }, { @@ -124,7 +130,7 @@ } ], "source": [ - "settings.get(\"cache-policy\")" + "config.get(\"cache-policy\")" ] }, { @@ -157,7 +163,7 @@ { "data": { "text/plain": [ - "'/var/folders/93/w0p869rx17q98wxk83gn9ys40000gn/T/tmpuc3s5y8r'" + "'/var/folders/93/w0p869rx17q98wxk83gn9ys40000gn/T/tmptdj20reb'" ] }, "execution_count": 4, @@ -181,7 +187,7 @@ "tags": [] }, "source": [ - "We can specify the parent directory for the temporary directory by using the :ref:`temporary-directory-root ` settings. By default it is set to None (no parent directory specified)." + "We can specify the parent directory for the temporary directory by using the :ref:`temporary-directory-root ` config option. By default it is set to None (no parent directory specified)." ] }, { @@ -199,7 +205,7 @@ { "data": { "text/plain": [ - "'/var/folders/93/w0p869rx17q98wxk83gn9ys40000gn/T/tmpnjz5cnc_'" + "'/var/folders/93/w0p869rx17q98wxk83gn9ys40000gn/T/tmpfybjqiu6'" ] }, "execution_count": 5, @@ -210,14 +216,20 @@ "source": [ "s = {\"cache-policy\": \"off\", \n", " \"temporary-directory-root\": \"~/my_demo_tmp\"}\n", - "settings.set(s)\n", + "config.set(s)\n", "cache.directory()" ] }, { "cell_type": "markdown", "id": "8d0ede62-77d4-4914-86c8-12bc6846d16a", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ "#### Temporary cache directory" ] @@ -259,8 +271,8 @@ } ], "source": [ - "settings.set(\"cache-policy\", \"temporary\")\n", - "print(settings.get(\"cache-policy\"))" + "config.set(\"cache-policy\", \"temporary\")\n", + "print(config.get(\"cache-policy\"))" ] }, { @@ -293,7 +305,7 @@ { "data": { "text/plain": [ - "'/var/folders/93/w0p869rx17q98wxk83gn9ys40000gn/T/tmp43s9c97m'" + "'/var/folders/93/w0p869rx17q98wxk83gn9ys40000gn/T/tmp_i65c09a'" ] }, "execution_count": 7, @@ -318,7 +330,7 @@ "tags": [] }, "source": [ - "We can specify the parent directory for the the temporary cache by using the :ref:`temporary-cache-directory-root ` settings. By default it is set to None (no parent directory specified)." + "We can specify the parent directory for the the temporary cache by using the :ref:`temporary-cache-directory-root ` config option. By default it is set to None (no parent directory specified)." ] }, { @@ -336,7 +348,7 @@ { "data": { "text/plain": [ - "'~/my_demo_cache/tmp2jeqp8ig'" + "'/Users/cgr/my_demo_cache/tmp0yxt25rk'" ] }, "execution_count": 8, @@ -347,7 +359,7 @@ "source": [ "s = {\"cache-policy\": \"temporary\", \n", " \"temporary-cache-directory-root\": \"~/my_demo_cache\"}\n", - "settings.set(s)\n", + "config.set(s)\n", "cache.directory()" ] }, @@ -372,7 +384,7 @@ "tags": [] }, "source": [ - "When the :ref:`cache-policy ` is :ref:`\"user\" ` the **cache will be active** and created in a **managed directory** defined by the :ref:`user-cache-directory ` settings. \n", + "When the :ref:`cache-policy ` is :ref:`\"user\" ` the **cache will be active** and created in a **managed directory** defined by the :ref:`user-cache-directory ` config option. \n", "\n", "The user cache directory is **not cleaned up on exit**. So next time you start earthkit-data it will be there again unless it is deleted manually or it is set in way that on each startup a different path is assigned to it. Also, when you run multiple sessions of earthkit-data under the same user they will share the same cache. " ] @@ -389,7 +401,7 @@ "tags": [] }, "source": [ - "The settings tells us all the details about the cache policy and location:" + "The configuration tells us all the details about the cache policy and location:" ] }, { @@ -414,9 +426,9 @@ } ], "source": [ - "settings.set(\"cache-policy\", \"user\")\n", - "print(settings.get(\"cache-policy\"))\n", - "print(settings.get(\"user-cache-directory\"))" + "config.set(\"cache-policy\", \"user\")\n", + "print(config.get(\"cache-policy\"))\n", + "print(config.get(\"user-cache-directory\"))" ] }, { @@ -484,7 +496,7 @@ { "data": { "text/plain": [ - "'~/earthkit-data-demo-cache'" + "'/Users/cgr/earthkit-data-demo-cache'" ] }, "execution_count": 11, @@ -493,7 +505,7 @@ } ], "source": [ - "settings.set(\"user-cache-directory\", \"~/earthkit-data-demo-cache\")\n", + "config.set(\"user-cache-directory\", \"~/earthkit-data-demo-cache\")\n", "cache.directory()" ] }, @@ -514,9 +526,9 @@ ], "metadata": { "kernelspec": { - "display_name": "dev", + "display_name": "dev_ecc", "language": "python", - "name": "dev" + "name": "dev_ecc" }, "language_info": { "codemirror_mode": { diff --git a/docs/examples/settings.ipynb b/docs/examples/config.ipynb similarity index 64% rename from docs/examples/settings.ipynb rename to docs/examples/config.ipynb index 0d8f9935..16c488ce 100644 --- a/docs/examples/settings.ipynb +++ b/docs/examples/config.ipynb @@ -11,7 +11,7 @@ "tags": [] }, "source": [ - "## Settings" + "## Configuration" ] }, { @@ -21,7 +21,7 @@ "metadata": {}, "outputs": [], "source": [ - "from earthkit.data import settings" + "from earthkit.data import config" ] }, { @@ -35,7 +35,7 @@ "tags": [] }, "source": [ - "#### Settings basics" + "#### Config basics" ] }, { @@ -51,7 +51,7 @@ "tags": [] }, "source": [ - "The :ref:`settings` object is loaded from the **\"~/.earthkit-data/settings.yaml\"** file. Changes are immediately saved back into this file unless we explicitly disable it or use :ref:`temporary settings `." + "The :ref:`config` object is loaded from the ``~/.config/earthkit/data/config.yaml`` file. Changes are immediately saved back into this file unless we explicitly disable it with ``config.autosave` or use a :ref:`temporary configuration `." ] }, { @@ -65,7 +65,7 @@ "tags": [] }, "source": [ - "For the rest of this notebook we disable the settings autosave so the changes will not be written into our configuration file." + "For the rest of this notebook we disable the configuration autosave so the changes will not be written into our configuration file." ] }, { @@ -81,7 +81,7 @@ }, "outputs": [], "source": [ - "settings.auto_save_settings = False" + "config.autosave = False" ] }, { @@ -90,7 +90,7 @@ "id": "27d1d708-8090-43af-8be8-5d81eec4791b", "metadata": {}, "source": [ - "We can display the current settings and the default values with:" + "We can display the current configuration and the default values with:" ] }, { @@ -111,22 +111,27 @@ " vertical-align: top;\n", " text-align: left !important;\n", "}\n", - "
NameValueDefault
cache-policy'user''user'
check-out-of-date-urlsTrueTrue
download-out-of-date-urlsFalseFalse
maximum-cache-disk-usage'95%''95%'
maximum-cache-sizeNoneNone
number-of-download-threads55
reader-type-check-bytes6464
temporary-cache-directory-rootNoneNone
url-download-timeout'30s''30s'
use-message-position-index-cacheFalseFalse
use-standalone-mars-client-when-availableTrueTrue
user-cache-directory'/var/folders/ng/g0zkhc2s42xbslpsywwp_26m0000gn/T/earthkit-data-cgr''/var/folders/ng/g0zkhc2s42xbslpsywwp_26m0000gn/T/earthkit-data-cgr'
version'0.2.1.dev28+g10f7c90.d20230714'''
" + "
NameValueDefault
cache-policy'off''off'
check-out-of-date-urlsTrueTrue
download-out-of-date-urlsFalseFalse
grib-field-policy'persistent''persistent'
grib-handle-cache-size11
grib-handle-policy'cache''cache'
maximum-cache-disk-usage'95%''95%'
maximum-cache-sizeNoneNone
number-of-download-threads55
reader-type-check-bytes6464
temporary-cache-directory-rootNoneNone
temporary-directory-rootNoneNone
url-download-timeout'30s''30s'
use-grib-metadata-cacheTrueTrue
use-message-position-index-cacheFalseFalse
use-standalone-mars-client-when-availableTrueTrue
user-cache-directory'/var/folders/93/w0p869rx17q98wxk83gn9ys40000gn/T/earthkit-data-cgr''/var/folders/93/w0p869rx17q98wxk83gn9ys40000gn/T/earthkit-data-cgr'
version'0.11.5.dev2+g384bbb0.d20241209'''
" ], "text/plain": [ - "cache-policy: (user, user)\n", + "cache-policy: (off, off)\n", "check-out-of-date-urls: (True, True)\n", "download-out-of-date-urls: (False, False)\n", + "grib-field-policy: (persistent, persistent)\n", + "grib-handle-cache-size: (1, 1)\n", + "grib-handle-policy: (cache, cache)\n", "maximum-cache-disk-usage: (95%, 95%)\n", "maximum-cache-size: (None, None)\n", "number-of-download-threads: (5, 5)\n", "reader-type-check-bytes: (64, 64)\n", "temporary-cache-directory-root: (None, None)\n", + "temporary-directory-root: (None, None)\n", "url-download-timeout: (30s, 30s)\n", + "use-grib-metadata-cache: (True, True)\n", "use-message-position-index-cache: (False, False)\n", "use-standalone-mars-client-when-available: (True, True)\n", - "user-cache-directory: (/var/folders/ng/g0zkhc2s42xbslpsywwp_26m0000gn/T/earthkit-data-cgr, /var/folders/ng/g0zkhc2s42xbslpsywwp_26m0000gn/T/earthkit-data-cgr)\n", - "version: (0.2.1.dev28+g10f7c90.d20230714, )" + "user-cache-directory: (/var/folders/93/w0p869rx17q98wxk83gn9ys40000gn/T/earthkit-data-cgr, /var/folders/93/w0p869rx17q98wxk83gn9ys40000gn/T/earthkit-data-cgr)\n", + "version: (0.11.5.dev2+g384bbb0.d20241209, )" ] }, "execution_count": 3, @@ -135,7 +140,7 @@ } ], "source": [ - "settings" + "config" ] }, { @@ -150,7 +155,7 @@ "tags": [] }, "source": [ - "We can use :ref:`get() ` to access the settings values." + "We can use :ref:`get() ` to access the config values." ] }, { @@ -177,7 +182,7 @@ } ], "source": [ - "settings.get(\"number-of-download-threads\")" + "config.get(\"number-of-download-threads\")" ] }, { @@ -192,7 +197,7 @@ "tags": [] }, "source": [ - "We can use :ref:`set() ` to change the values." + "We can use :ref:`set() ` to change the values." ] }, { @@ -219,8 +224,8 @@ } ], "source": [ - "settings.set(\"number-of-download-threads\", 6)\n", - "settings.get(\"number-of-download-threads\")" + "config.set(\"number-of-download-threads\", 6)\n", + "config.get(\"number-of-download-threads\")" ] }, { @@ -247,9 +252,9 @@ } ], "source": [ - "settings.set({\"number-of-download-threads\": 7, \"url-download-timeout\": \"1m\"})\n", - "print(settings.get(\"number-of-download-threads\"))\n", - "print(settings.get(\"url-download-timeout\"))" + "config.set({\"number-of-download-threads\": 7, \"url-download-timeout\": \"1m\"})\n", + "print(config.get(\"number-of-download-threads\"))\n", + "print(config.get(\"url-download-timeout\"))" ] }, { @@ -282,9 +287,9 @@ } ], "source": [ - "settings.set(number_of_download_threads=8, url_download_timeout=\"2m\")\n", - "print(settings.get(\"number-of-download-threads\"))\n", - "print(settings.get(\"url-download-timeout\"))" + "config.set(number_of_download_threads=8, url_download_timeout=\"2m\")\n", + "print(config.get(\"number-of-download-threads\"))\n", + "print(config.get(\"url-download-timeout\"))" ] }, { @@ -292,7 +297,7 @@ "id": "b7c8edff-9355-4144-aee9-e0a2a55f8eba", "metadata": {}, "source": [ - "#### Temporary settings" + "#### Temporary configuration" ] }, { @@ -308,7 +313,7 @@ "tags": [] }, "source": [ - "We can create a :ref:`temporary settings ` (as a context manager) as a copy of the original settings. We will still refer to it as \"settings\", but it is completely independent from the original object and changes are not saved into the yaml file (even when *settings.auto_save_settings* is True)." + "We can create a :ref:`temporary configuration ` (as a context manager) as a copy of the original configuration. We will still refer to it as \"config\", but it is completely independent from the original object and changes are not saved into the yaml file (even when *config.auto_save* is True)." ] }, { @@ -333,10 +338,10 @@ } ], "source": [ - "with settings.temporary():\n", - " print(settings.get(\"number-of-download-threads\"))\n", - " settings.set(\"number-of-download-threads\", 12)\n", - " print(settings.get(\"number-of-download-threads\"))" + "with config.temporary():\n", + " print(config.get(\"number-of-download-threads\"))\n", + " config.set(\"number-of-download-threads\", 12)\n", + " print(config.get(\"number-of-download-threads\"))" ] }, { @@ -344,7 +349,7 @@ "id": "b1f883bd-c63b-45dd-b333-de6ed682a62f", "metadata": {}, "source": [ - "When we leave the context the settings are reverted to the original one:" + "When we leave the context the config is reverted to the original one:" ] }, { @@ -365,7 +370,7 @@ } ], "source": [ - "settings.get(\"number-of-download-threads\")" + "config.get(\"number-of-download-threads\")" ] }, { @@ -373,7 +378,7 @@ "id": "2670d106-7e09-42ba-bc66-335d84c38905", "metadata": {}, "source": [ - "A temporary settings can also be created with arguments:" + "A temporary configuration can also be created with arguments:" ] }, { @@ -392,10 +397,10 @@ } ], "source": [ - "with settings.temporary(\"number-of-download-threads\", 11):\n", - " print(settings.get(\"number-of-download-threads\"))\n", + "with config.temporary(\"number-of-download-threads\", 11):\n", + " print(config.get(\"number-of-download-threads\"))\n", "\n", - "print(settings.get(\"number-of-download-threads\"))" + "print(config.get(\"number-of-download-threads\"))" ] }, { @@ -419,7 +424,7 @@ "tags": [] }, "source": [ - "The :ref:`reset() ` method resets the settings to the defaults. We demonstrate it on a temporary settings:" + "The :ref:`reset() ` method resets the config to the defaults. We demonstrate it on a temporary configuration:" ] }, { @@ -444,11 +449,11 @@ } ], "source": [ - "with settings.temporary():\n", - " settings.set(\"number-of-download-threads\", 12)\n", - " print(settings.get(\"number-of-download-threads\"))\n", - " settings.reset()\n", - " print(settings.get(\"number-of-download-threads\"))\n", + "with config.temporary():\n", + " config.set(\"number-of-download-threads\", 12)\n", + " print(config.get(\"number-of-download-threads\"))\n", + " config.reset()\n", + " print(config.get(\"number-of-download-threads\"))\n", " " ] }, @@ -463,9 +468,9 @@ ], "metadata": { "kernelspec": { - "display_name": "dev", + "display_name": "dev_ecc", "language": "python", - "name": "dev" + "name": "dev_ecc" }, "language_info": { "codemirror_mode": { diff --git a/docs/examples/settings_env_vars.ipynb b/docs/examples/config_env_vars.ipynb similarity index 86% rename from docs/examples/settings_env_vars.ipynb rename to docs/examples/config_env_vars.ipynb index a78ac012..ebace542 100644 --- a/docs/examples/settings_env_vars.ipynb +++ b/docs/examples/config_env_vars.ipynb @@ -11,7 +11,7 @@ "tags": [] }, "source": [ - "## Settings environment variables" + "## Configuration environment variables" ] }, { @@ -27,7 +27,7 @@ }, "outputs": [], "source": [ - "from earthkit.data import settings" + "from earthkit.data import config" ] }, { @@ -42,7 +42,7 @@ "tags": [] }, "source": [ - "For the rest of this notebook we disable the :ref:`settings` autosave so the changes will not be written into our configuration file." + "For the rest of this notebook we disable the :ref:`config` autosave so the changes will not be written into our configuration file." ] }, { @@ -58,7 +58,7 @@ }, "outputs": [], "source": [ - "settings.auto_save_settings = False" + "config.autosave = False" ] }, { @@ -73,7 +73,7 @@ "tags": [] }, "source": [ - "Each :ref:`settings` parameter has a corresponding environment variable (see the full list :ref:`here `). When an environment variable is set, it takes precedence over the settings parameter as the following example demonstrates it." + "Each :ref:`config` parameter has a corresponding environment variable (see the full list :ref:`here `). When an environment variable is set, it takes precedence over the settings parameter as the following example demonstrates it." ] }, { @@ -87,7 +87,7 @@ "tags": [] }, "source": [ - "Assuming no environmental variable is set the value is read form the settings file." + "Assuming no environmental variable is set the value is read form the config file." ] }, { @@ -114,7 +114,7 @@ } ], "source": [ - "settings.get(\"number-of-download-threads\")" + "config.get(\"number-of-download-threads\")" ] }, { @@ -129,7 +129,7 @@ "tags": [] }, "source": [ - "When the environment variable is set :func:`get ` returns its value." + "When the environment variable is set :func:`get ` returns its value." ] }, { @@ -180,7 +180,7 @@ } ], "source": [ - "settings.get(\"number-of-download-threads\")" + "config.get(\"number-of-download-threads\")" ] }, { @@ -195,7 +195,7 @@ "tags": [] }, "source": [ - "Setting the value generates a warning. The new value is saved into the settings file, but :func:`get ` still returns the value of the environment variable." + "Setting the value generates a warning. The new value is saved into the config file, but :func:`get ` still returns the value of the environment variable." ] }, { @@ -214,7 +214,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/Users/cgr/git/earthkit-data/src/earthkit/data/core/settings.py:404: UserWarning: Setting 'number-of-download-threads' is also set by environment variable 'EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS'.The environment variable takes precedence and its value is returned when calling get().\n", + "/Users/cgr/git/earthkit-data/src/earthkit/data/core/config.py:406: UserWarning: Config option 'number-of-download-threads' is also set by environment variable 'EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS'.The environment variable takes precedence and its value is returned when calling get().\n", " warnings.warn(msg)\n" ] }, @@ -230,8 +230,8 @@ } ], "source": [ - "settings.set(\"number-of-download-threads\", 10)\n", - "settings.get(\"number-of-download-threads\")" + "config.set(\"number-of-download-threads\", 10)\n", + "config.get(\"number-of-download-threads\")" ] }, { @@ -274,7 +274,7 @@ } ], "source": [ - "settings.env()" + "config.env()" ] }, { @@ -288,7 +288,7 @@ "tags": [] }, "source": [ - "When we dump the settings the values set via environment variables are clearly indicated." + "When we dump the configuration the values set via environment variables are clearly indicated." ] }, { @@ -344,7 +344,7 @@ } ], "source": [ - "settings" + "config" ] }, { diff --git a/docs/examples/index.rst b/docs/examples/index.rst index bb31dc11..5c4b29b5 100644 --- a/docs/examples/index.rst +++ b/docs/examples/index.rst @@ -171,8 +171,8 @@ Miscellaneous :maxdepth: 1 :glob: - settings.ipynb - settings_env_vars.ipynb + config.ipynb + config_env_vars.ipynb cache.ipynb projection.ipynb metadata.ipynb diff --git a/docs/guide/caching.rst b/docs/guide/caching.rst index 6cffdaf8..cc340064 100644 --- a/docs/guide/caching.rst +++ b/docs/guide/caching.rst @@ -9,9 +9,9 @@ Purpose earthkit-data uses a dedicated **directory** to store the results of remote data access and some GRIB/BUFR indexing information. By default this directory is **unmanaged** (its size is not checked/limited) and **no caching** is provided for the files in it, i.e. repeated calls to :func:`from_source` for remote services and URLs will download the data again! -When **caching is enabled** this directory will also serve as a **cache**. It means if we run :func:`from_source` again with the same arguments it will load the data from the cache instead of downloading it again. Additionally, caching offers **monitoring and disk space management**. When the cache is full, cached data is deleted according to the settings (i.e. oldest data is deleted first). The cache is implemented by using a sqlite database running in a separate thread. +When **caching is enabled** this directory will also serve as a **cache**. It means if we run :func:`from_source` again with the same arguments it will load the data from the cache instead of downloading it again. Additionally, caching offers **monitoring and disk space management**. When the cache is full, cached data is deleted according to the configuration (i.e. oldest data is deleted first). The cache is implemented by using a sqlite database running in a separate thread. -Please note that the earthkit-data cache configuration is managed through the :doc:`settings`. +Please note that the earthkit-data cache configuration is managed through the :doc:`config`. .. warning:: @@ -31,7 +31,7 @@ Please note that the earthkit-data cache configuration is managed through the :d Cache policies ------------------------------ -The primary key to control the cache in the settings is ``cache-policy``, which can take the following values: +The primary config option to control the cache is ``cache-policy``, which can take the following values: - :ref:`off ` (default) - :ref:`temporary ` @@ -55,16 +55,16 @@ Off cache policy When the ``cache-policy`` is "off" no caching is available. This is the **default** value. In this case all files are downloaded into an **unmanaged** temporary directory created by ``tempfile.TemporaryDirectory``. Since caching is disabled, all repeated calls to :func:`from_source` for remote services and URLSs will download the data again! This temporary directory will be unique for each earthkit-data session. When the directory object goes out of scope (at the latest on exit) the directory will be **cleaned up**. -Due to the temporary nature of this directory path it cannot be queried via the :doc:`settings`, but we need to call the :meth:`~data.core.caching.Cache.directory` :ref:`cache method `. +Due to the temporary nature of this directory path it cannot be queried via the :doc:`config`, but we need to call the :meth:`~data.core.caching.Cache.directory` :ref:`cache method `. .. code-block:: python - >>> from earthkit.data import cache, settings - >>> settings.set("cache-policy", "off") + >>> from earthkit.data import cache, config + >>> config.set("cache-policy", "off") >>> cache.directory() '/var/folders/ng/g0zkhc2s42xbslpsywwp_26m0000gn/T/tmp_5bf5kq8' -We can specify the parent directory for the the temporary directory by using the ``temporary-directory-root`` settings. By default it is set to None (no parent directory specified). +We can specify the parent directory for the the temporary directory by using the ``temporary-directory-root`` config. By default it is set to None (no parent directory specified). .. code-block:: python @@ -73,7 +73,7 @@ We can specify the parent directory for the the temporary directory by using the ... "cache-policy": "off", ... "temporary-directory-root": "~/my_demo_tmp", ... } - >>> settings.set(s) + >>> config.set(s) >>> cache.directory() '~/my_demo_tmp/tmp0iiuvsz5' @@ -84,16 +84,16 @@ Temporary cache policy When the ``cache-policy`` is "temporary" the **cache will be active and located in a managed** temporary directory created by ``tempfile.TemporaryDirectory``. This directory will be unique for each earthkit-data session. When the directory object goes out of scope (at the latest on exit) the cache is **cleaned up**. -Due to the temporary nature of this directory path it cannot be queried via the :doc:`settings`, but we need to call the :meth:`~data.core.caching.Cache.directory` :ref:`cache method `. +Due to the temporary nature of this directory path it cannot be queried via the :doc:`config`, but we need to call the :meth:`~data.core.caching.Cache.directory` :ref:`cache method `. .. code-block:: python - >>> from earthkit.data import cache, settings - >>> settings.set("cache-policy", "temporary") + >>> from earthkit.data import cache, config + >>> config.set("cache-policy", "temporary") >>> cache.directory() '/var/folders/ng/g0zkhc2s42xbslpsywwp_26m0000gn/T/tmp_5bf5kq8' -We can specify the parent directory for the the temporary cache by using the ``temporary-cache-directory-root`` settings. By default it is set to None (no parent directory specified). +We can specify the parent directory for the the temporary cache by using the ``temporary-cache-directory-root`` config option. By default it is set to None (no parent directory specified). .. code-block:: python @@ -102,7 +102,7 @@ We can specify the parent directory for the the temporary cache by using the ``t ... "cache-policy": "temporary", ... "temporary-cache-directory-root": "~/my_demo_cache", ... } - >>> settings.set(s) + >>> config.set(s) >>> cache.directory() '~/my_demo_cache/tmp0iiuvsz5' @@ -111,7 +111,7 @@ We can specify the parent directory for the the temporary cache by using the ``t User cache policy +++++++++++++++++++ -When the ``cache-policy`` is "user" the **cache will be active** and created in a **managed directory** defined by the ``user-cache-directory`` settings. +When the ``cache-policy`` is "user" the **cache will be active** and created in a **managed directory** defined by the ``user-cache-directory`` config option. The user cache directory is **not cleaned up on exit**. So next time you start earthkit-data it will be there again unless it is deleted manually or it is set in way that on each startup a different path is assigned to it. Also, when you run multiple sessions of earthkit-data under the same user they will share the same cache. @@ -122,37 +122,37 @@ The default value of the user cache directory depends on your system: - ``/tmp/.../earthkit-data-$USER`` for MacOS -We can query the directory path via the :doc:`settings` and also by calling the :meth:`~data.core.caching.Cache.directory` :ref:`cache method `. +We can query the directory path via the :doc:`config` and also by calling the :meth:`~data.core.caching.Cache.directory` :ref:`cache method `. .. code-block:: python - >>> from earthkit.data import cache, settings - >>> settings.set("cache-policy", "user") - >>> settings.get("user-cache-directory") + >>> from earthkit.data import cache, config + >>> config.set("cache-policy", "user") + >>> config.get("user-cache-directory") /tmp/earthkit-data-myusername >>> cache.directory() /tmp/earthkit-data-myusername -The following code shows how to change the ``user-cache-directory`` settings: +The following code shows how to change the ``user-cache-directory`` config option: .. code:: python - >>> from earthkit.data import settings - >>> settings.get("user-cache-directory") # Find the current cache directory + >>> from earthkit.data import config + >>> config.get("user-cache-directory") # Find the current cache directory /tmp/earthkit-data-myusername >>> # Change the value of the setting - >>> settings.set("user-cache-directory", "/big-disk/earthkit-data-cache") + >>> config.set("user-cache-directory", "/big-disk/earthkit-data-cache") # Python kernel restarted - >>> from earthkit.data import settings - >>> settings.get("user-cache-directory") # Cache directory has been modified + >>> from earthkit.data import config + >>> config.get("user-cache-directory") # Cache directory has been modified /big-disk/earthkit-data-cache -More generally, the earthkit-data settings can be read, modified, reset +More generally, the earthkit-data config options can be read, modified, reset to their default values from Python, -see the :doc:`Settings documentation `. +see the :doc:`Configs documentation `. .. _cache_object: .. _cache_methods: @@ -196,7 +196,7 @@ there are a set of methods available on this object to manage and interact with .. warning:: :meth:`~data.core.caching.Cache.check_size` automatically runs when a new - entry is added to the cache or any of the :ref:`cache_settings` changes. + entry is added to the cache or any of the :ref:`cache_config` changes. Examples: @@ -223,7 +223,7 @@ Cache limits .. warning:: - These settings does not work when ``cache-policy`` is :ref:`off ` . + These config options do not work when ``cache-policy`` is :ref:`off ` . Maximum-cache-size @@ -249,16 +249,16 @@ Maximum-cache-disk-usage as it has a chance. .. .. note:: -.. When tweaking the cache settings, it is recommended to set the +.. When tweaking the cache config, it is recommended to set the .. ``maximum-cache-size`` to a value below the user disk quota (if applicable) .. and ``maximum-cache-disk-usage`` to ``None``. -.. _cache_settings: +.. _cache_config: -Cache settings parameters +Cache config parameters ------------------------------- -.. module-output:: generate_settings_rst .*-cache-.* cache-.* .*-cache +.. module-output:: generate_config_rst .*-cache-.* cache-.* .*-cache -Other earthkit-data settings can be found :ref:`here `. +Other earthkit-data config options can be found :ref:`here `. diff --git a/docs/guide/config.rst b/docs/guide/config.rst new file mode 100644 index 00000000..08a70910 --- /dev/null +++ b/docs/guide/config.rst @@ -0,0 +1,162 @@ +.. _config: + +Configuration +============= + +*earthkit-data* is maintaining a global configuration. + +The configuration is automatically loaded from and saved into a yaml file located at ``~/.config/earthkit/data/config.yaml``. An alternative path can be specified via the ``EARTHKIT_DATA_CONFIG_FILE`` environmental variable (it is only read at startup). + +The configuration can be accessed and modified from Python. The configuration options can also be defined as :ref:`environment variables `, which take precedence over the config file. + +See the following notebooks for examples: + + - :ref:`/examples/config.ipynb` + - :ref:`/examples/config_env_vars.ipynb` + + +.. _config_get: + +Accessing configuration options +-------------------------------- + +The earthkit-data configuration can be accessed using the python API: + +.. literalinclude:: include/config-get.py + +.. warning:: + + When an :ref:`environment variable ` is set, it takes precedence over the config parameter, and its value is returned from :func:`get() `. + +.. _config_set: + +Changing configuration +------------------------- + +.. note:: + + It is recommended to restart your Jupyter kernels after changing + or resetting config options. + +The earthkit-data configuration can be modified using the python API: + +.. literalinclude:: include/config-set.py + +.. warning:: + + When an :ref:`environment variable ` is set, the new value provided for :func:`set() ` is saved into the config file but :func:`get() ` wil still return the value of the environment variable. A warning is also generated. + + +.. _config_temporary: + +Temporary configuration +------------------------ + +We can create a temporary configuration (as a context manager) as a copy of the original configuration. We will still refer to it as “config”, but it is completely independent from the original object and changes are not saved into the yaml file (even when ``config.autosave`` is True). + +.. literalinclude:: include/config-temporary.py + +Output:: + + 8 + 12 + 11 + +.. warning:: + + When an :ref:`environment variable ` is set, the same rules applies as for :func:`set() `. + + +.. _config_reset: + +Resetting configuration +------------------------ + +.. note:: + + It is recommended to restart your Jupyter kernels after changing + or resetting the configuration. + +The earthkit-data configuration can be reset using the python API: + +.. literalinclude:: include/config-reset.py + +.. warning:: + + When an :ref:`environment variable ` is set, the same rules applies as for :func:`set() `. + + +.. _config_env: + +Environment variables +---------------------- + +Each configuration parameter has a corresponding environment variable (see the full list :ref:`here `). When an environment variable is set, it takes precedence over the config parameter as the following examples show. + +First, let us assume that the value of ``number-of-download-threads`` is 5 in the config file and no environment variable is set. + +.. code-block:: python + + >>> from earthkit.data import config + >>> config.get("number-of-download-threads") + 5 + +Then, set the environment variable ``EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS``. + +.. code-block:: bash + + export EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS=26 + + +.. code-block:: python + + >>> from earthkit.data import config + >>> config.get("number-of-download-threads") + 26 + >>> config.env() + {'number-of-download-threads': ('EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS', '26')} + >>> config.set("number-of-download-threads", 10) + UserWarning: Config option 'number-of-download-threads' is also set by environment variable + 'EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS'.The environment variable takes precedence and + its value is returned when calling get(). Still, the value set here will be + saved to the config file. + >>> config.get("number-of-download-threads") + 26 + +Finally, unset the environment variable and check the config value again, which is now the value from the config file. + +.. code-block:: bash + + unset EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS + + +.. code-block:: python + + >>> from earthkit.data import config + >>> config.get("number-of-download-threads") + 10 + + +See also the following notebook: + + - :ref:`/examples/config_env_vars.ipynb` + + +.. _config_table: + +List of configuration parameters +----------------------------------- + +This is the list of all the config parameters: + +.. module-output:: generate_config_rst + + +.. _config_env_table: + +List of environment variables +--------------------------------- + +This is the list of the config environment variables: + +.. module-output:: generate_config_env_rst diff --git a/docs/guide/include/config-get.py b/docs/guide/include/config-get.py new file mode 100644 index 00000000..409da672 --- /dev/null +++ b/docs/guide/include/config-get.py @@ -0,0 +1,9 @@ +import earthkit.data + +# Access one of the config options +cache_path = earthkit.data.config.get("user-cache-directory") +print(cache_path) + +# If this is the last line of a Notebook cell, this +# will display a table with all the current configuration +earthkit.data.config diff --git a/docs/guide/include/config-reset.py b/docs/guide/include/config-reset.py new file mode 100644 index 00000000..8c98f0f8 --- /dev/null +++ b/docs/guide/include/config-reset.py @@ -0,0 +1,7 @@ +import earthkit.data + +# Reset a named config option to its default value +earthkit.data.config.reset("user-cache-directory") + +# Reset all the config options to their default values +earthkit.data.config.reset() diff --git a/docs/guide/include/settings-set.py b/docs/guide/include/config-set.py similarity index 55% rename from docs/guide/include/settings-set.py rename to docs/guide/include/config-set.py index e980983c..1992a373 100644 --- a/docs/guide/include/settings-set.py +++ b/docs/guide/include/config-set.py @@ -1,16 +1,16 @@ import earthkit.data # Change the location of the user defined cache: -earthkit.data.settings.set("user-cache-directory", "/big-disk/earthkit-data-cache") +earthkit.data.config.set("user-cache-directory", "/big-disk/earthkit-data-cache") # Change number of download threads -earthkit.data.settings.set("number-of-download-threads", 7) +earthkit.data.config.set("number-of-download-threads", 7) # Multiple values can be set together. The argument list # can be a dictionary: -earthkit.data.settings.set({"number-of-download-threads": 7, "url-download-timeout": "1m"}) +earthkit.data.config.set({"number-of-download-threads": 7, "url-download-timeout": "1m"}) # Alternatively, we can use keyword arguments. However, because # the “-” character is not allowed in variable names in Python we have # to replace “-” with “_” in all the keyword arguments: -earthkit.data.settings.set(number_of_download_threads=8, url_download_timeout="2m") +earthkit.data.config.set(number_of_download_threads=8, url_download_timeout="2m") diff --git a/docs/guide/include/config-temporary.py b/docs/guide/include/config-temporary.py new file mode 100644 index 00000000..a7b94d82 --- /dev/null +++ b/docs/guide/include/config-temporary.py @@ -0,0 +1,11 @@ +import earthkit.data + +print(earthkit.data.config.get("number-of-download-threads")) + +with earthkit.data.config.temporary(): + earthkit.data.config.set("number-of-download-threads", 12) + print(earthkit.data.config.get("number-of-download-threads")) + +# Temporary config can also be created with arguments: +with earthkit.data.config.temporary("number-of-download-threads", 11): + print(earthkit.data.config.get("number-of-download-threads")) diff --git a/docs/guide/include/settings-get.py b/docs/guide/include/settings-get.py deleted file mode 100644 index bcd59629..00000000 --- a/docs/guide/include/settings-get.py +++ /dev/null @@ -1,9 +0,0 @@ -import earthkit.data - -# Access one of the settings -cache_path = earthkit.data.settings.get("user-cache-directory") -print(cache_path) - -# If this is the last line of a Notebook cell, this -# will display a table with all the current settings -earthkit.data.settings diff --git a/docs/guide/include/settings-reset.py b/docs/guide/include/settings-reset.py deleted file mode 100644 index 61e9a9fb..00000000 --- a/docs/guide/include/settings-reset.py +++ /dev/null @@ -1,7 +0,0 @@ -import earthkit.data - -# Reset a named setting to its default value -earthkit.data.settings.reset("user-cache-directory") - -# Reset all settings to their default values -earthkit.data.settings.reset() diff --git a/docs/guide/include/settings-temporary.py b/docs/guide/include/settings-temporary.py deleted file mode 100644 index b738691e..00000000 --- a/docs/guide/include/settings-temporary.py +++ /dev/null @@ -1,11 +0,0 @@ -import earthkit.data - -print(earthkit.data.settings.get("number-of-download-threads")) - -with earthkit.data.settings.temporary(): - earthkit.data.settings.set("number-of-download-threads", 12) - print(earthkit.data.settings.get("number-of-download-threads")) - -# Temporary settings can also be created with arguments: -with earthkit.data.settings.temporary("number-of-download-threads", 11): - print(earthkit.data.settings.get("number-of-download-threads")) diff --git a/docs/guide/index.rst b/docs/guide/index.rst index d1ed48b0..39d1d608 100644 --- a/docs/guide/index.rst +++ b/docs/guide/index.rst @@ -9,7 +9,7 @@ User guide data data_format/index.rst streams - settings + config caching xarray/index.rst misc/index.rst diff --git a/docs/guide/misc/grib_memory.rst b/docs/guide/misc/grib_memory.rst index 417da010..89750c8a 100644 --- a/docs/guide/misc/grib_memory.rst +++ b/docs/guide/misc/grib_memory.rst @@ -5,7 +5,7 @@ GRIB field memory management :ref:`grib` is a message-based binary format, where each message is regarded as a field. For reading GRIB, earthkit-data relies on :xref:`eccodes`, which, when loading a message into memory, represents it as a ``GRIB handle``. In the low level API, the GRIB handle is the object that holds the data and metadata of a GRIB field, therefore it can use up a significant amount of memory. -Determining when a GRIB handle needs to be created and when it can be released is important for memory management. Earthkit-data provides several settings to control this behaviour depending on how we actually read the data. +Determining when a GRIB handle needs to be created and when it can be released is important for memory management. Earthkit-data provides several config options to control this behaviour depending on how we actually read the data. Reading GRIB data as a stream iterator ======================================== @@ -40,7 +40,7 @@ With this, the entire ``ds`` fieldlist, including all the fields and the related Reading data from disk and managing its memory ============================================== -When reading :ref:`grib` data from disk as a :ref:`file source `, it is represented as a fieldlist and loaded lazily. After the (fast) initial scan for field offsets and lengths, no actual fields are created and no data is read into memory. When we start using the fieldlist, e.g. by iterating over the fields, accessing data or metadata etc., the fields will be created **on demand** and the related GRIB handles will be loaded from disk **when needed**. Whether this data or part of it stays in memory depends on the following :ref:`settings `: +When reading :ref:`grib` data from disk as a :ref:`file source `, it is represented as a fieldlist and loaded lazily. After the (fast) initial scan for field offsets and lengths, no actual fields are created and no data is read into memory. When we start using the fieldlist, e.g. by iterating over the fields, accessing data or metadata etc., the fields will be created **on demand** and the related GRIB handles will be loaded from disk **when needed**. Whether this data or part of it stays in memory depends on the following :ref:`config `: - :ref:`grib-field-policy ` - :ref:`grib-handle-policy ` @@ -56,9 +56,9 @@ Controls whether fields are kept in memory. The default is ``"persistent"``. The - ``"persistent"``: fields are kept in memory until the fieldlist is deleted - ``"temporary"``: fields are deleted when they go out of scope and recreated on demand -The actual memory used by a field depends on whether it owns the GRIB handle of the related GRIB message. This is controlled by the :ref:`grib-handle-policy ` settings. +The actual memory used by a field depends on whether it owns the GRIB handle of the related GRIB message. This is controlled by the :ref:`grib-handle-policy ` config option. -A field can also cache its metadata access for performance, thus increasing memory usage. This is controlled by the :ref:`use-grib-metadata-cache ` settings. +A field can also cache its metadata access for performance, thus increasing memory usage. This is controlled by the :ref:`use-grib-metadata-cache ` config option. .. _grib-handle-policy: @@ -76,12 +76,12 @@ Controls whether GRIB handles are kept in memory. The default is ``"cache"``. Th grib-handle-cache-size ++++++++++++++++++++++++++++ -When :ref:`grib-handle-policy ` is ``"cache"``, the setting ``grib-handle-cache-size`` (default is ``1``) specifies the maximum number of GRIB handles kept in an in-memory cache per fieldlist. This is an LRU cache, so when it is full, the least recently used GRIB handle is removed and a new GRIB message is loaded from disk and added to the cache. +When :ref:`grib-handle-policy ` is ``"cache"``, the config option ``grib-handle-cache-size`` (default is ``1``) specifies the maximum number of GRIB handles kept in an in-memory cache per fieldlist. This is an LRU cache, so when it is full, the least recently used GRIB handle is removed and a new GRIB message is loaded from disk and added to the cache. -Overriding the settings +Overriding the configuration ++++++++++++++++++++++++++++ -In addition to changing the :ref:`settings` themselves, it is possible to override the parameters above when loading a given fieldlist by passing them as keyword arguments to :func:`from_source`. The parameter names are the same but the dashes are replaced by underscores. When a parameter is not specified in :func:`from_source` or is set to None, its value is taken from the actual :ref:`settings`. E.g.: +In addition to changing the :ref:`config`, it is possible to override the parameters discussed above when loading a given fieldlist by passing them as keyword arguments to :func:`from_source`. The parameter names are the same but the dashes are replaced by underscores. When a parameter is not specified in :func:`from_source` or is set to None, its value is taken from the actual :ref:`config`. E.g.: .. code-block:: python @@ -116,5 +116,5 @@ fields up-front and keep them in memory by reading it as a :ref:`stream source < .. note:: - The default settings are chosen to keep the memory usage low and the performance high. However, depending on the use case, the settings can be adjusted to optimize the memory + The default config options are chosen to keep the memory usage low and the performance high. However, depending on the use case, the configuration can be adjusted to optimize the memory usage and performance. diff --git a/docs/guide/misc/grib_metadata.rst b/docs/guide/misc/grib_metadata.rst index fd86ae1a..34e6bb1f 100644 --- a/docs/guide/misc/grib_metadata.rst +++ b/docs/guide/misc/grib_metadata.rst @@ -3,23 +3,23 @@ GRIB field metadata caching ////////////////////////////// -The ``use-grib-metadata-cache`` :ref:`setting ` controls whether :ref:`grib` fields will cache their metadata access. The default value is ``True``. +The ``use-grib-metadata-cache`` :ref:`config option ` controls whether :ref:`grib` fields will cache their metadata access. The default value is ``True``. This is an in-memory cache attached to the field and implemented for the low-level metadata accessor for individual keys. Getting the values from the cache can be significantly faster than reading them from the GRIB handle, even when the handle is kept in memory. -This setting is applied to all the different GRIB field types, even for fields stored entirely in memory (see :ref:`grib-memory`). +This config option is applied to all the different GRIB field types, even for fields stored entirely in memory (see :ref:`grib-memory`). -Overriding the settings +Overriding the configuration ++++++++++++++++++++++++++++ -In addition to changing the :ref:`settings`, it is possible to override ``use-grib-metadata-cache`` when loading a given fieldlist by passing the ``use_grib_metadata_cache`` keyword argument (note the underscores) to :func:`from_source`. When this kwarg is not specified in :func:`from_source` or is set to None, its value is taken from the actual :ref:`settings`. E.g.: +In addition to changing the :ref:`config`, it is possible to override ``use-grib-metadata-cache`` when loading a given fieldlist by passing the ``use_grib_metadata_cache`` keyword argument (note the underscores) to :func:`from_source`. When this kwarg is not specified in :func:`from_source` or is set to None, its value is taken from the actual :ref:`config`. E.g.: .. code-block:: python import earthkit.data - # will override the settings + # will override the config ds = earthkit.data.from_source( "file", "test6.grib", diff --git a/docs/guide/settings.rst b/docs/guide/settings.rst deleted file mode 100644 index cc16042a..00000000 --- a/docs/guide/settings.rst +++ /dev/null @@ -1,162 +0,0 @@ -.. _settings: - -Settings -======== - -*earthkit-data* is maintaining a set of global settings which control -its behaviour. - -The settings are automatically loaded from and saved into ``~/.earthkit-data/settings.yaml`` and they can -be accessed and modified from Python. The settings can also be defined as :ref:`environment variables `, which take precedence over the settings file. - -See the following notebooks for examples: - - - :ref:`/examples/settings.ipynb` - - :ref:`/examples/settings_env_vars.ipynb` - - -.. _settings_get: - -Accessing settings ------------------- - -earthkit-data settings can be accessed using the python API: - -.. literalinclude:: include/settings-get.py - -.. warning:: - - When an :ref:`environment variable ` is set, it takes precedence over the settings parameter, and its value is returned from :func:`get() `. - -.. _settings_set: - -Changing settings ------------------- - -.. note:: - - It is recommended to restart your Jupyter kernels after changing - or resetting settings. - -earthkit-data settings can be modified using the python API: - -.. literalinclude:: include/settings-set.py - -.. warning:: - - When an :ref:`environment variable ` is set, the new value provided for :func:`set() ` is saved into the settings file but :func:`get() ` wil still return the value of the environment variable. A warning is also generated. - - -.. _settings_temporary: - -Temporary settings ------------------- - -We can create a temporary settings (as a context manager) as a copy of the original settings. We will still refer to it as “settings”, but it is completely independent from the original object and changes are not saved into the yaml file (even when ``settings.auto_save_settings`` is True). - -.. literalinclude:: include/settings-temporary.py - -Output:: - - 8 - 12 - 11 - -.. warning:: - - When an :ref:`environment variable ` is set, the same rules applies as for :func:`set() `. - - -.. _settings_reset: - -Resetting settings ------------------- - -.. note:: - - It is recommended to restart your Jupyter kernels after changing - or resetting settings. - -earthkit-data settings can be reset using the python API: - -.. literalinclude:: include/settings-reset.py - -.. warning:: - - When an :ref:`environment variable ` is set, the same rules applies as for :func:`set() `. - - -.. _settings_env: - -Environment variables ----------------------- - -Each settings parameter has a corresponding environment variable (see the full list :ref:`here `). When an environment variable is set, it takes precedence over the settings parameter as the following examples show. - -First, let us assume that the value of ``number-of-download-threads`` is 5 in the settings file and no environment variable is set. - -.. code-block:: python - - >>> from earthkit.data import settings - >>> settings.get("number-of-download-threads") - 5 - -Then, set the environment variable ``EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS``. - -.. code-block:: bash - - export EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS=26 - - -.. code-block:: python - - >>> from earthkit.data import settings - >>> settings.get("number-of-download-threads") - 26 - >>> settings.env() - {'number-of-download-threads': ('EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS', '26')} - >>> settings.set("number-of-download-threads", 10) - UserWarning: Setting 'number-of-download-threads' is also set by environment variable - 'EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS'.The environment variable takes precedence and - its value is returned when calling get(). Still, the value set here will be - saved to the settings file. - >>> settings.get("number-of-download-threads") - 26 - -Finally, unset the environment variable and check the settings value again, which is now the value from the settings file. - -.. code-block:: bash - - unset EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS - - -.. code-block:: python - - >>> from earthkit.data import settings - >>> settings.get("number-of-download-threads") - 10 - - -See also the following notebook: - - - :ref:`/examples/settings_env_vars.ipynb` - - -.. _settings_table: - -List of settings parameters ----------------------------- - -This is the list of all the settings parameters: - -.. module-output:: generate_settings_rst - - -.. _settings_env_table: - -List of environment variables ---------------------------------- - -This is the list of the settings environment variables: - -.. module-output:: generate_settings_env_rst diff --git a/docs/guide/sources.rst b/docs/guide/sources.rst index 60bf51e6..1cc7c899 100644 --- a/docs/guide/sources.rst +++ b/docs/guide/sources.rst @@ -826,7 +826,7 @@ mars To figure out which data you need, or discover relevant data available in MARS, see the publicly accessible `MARS catalog`_ (or this `access restricted catalog `_). - If the ``use-standalone-mars-client-when-available`` :ref:`settings ` is True and the MARS client is installed (e.g. at ECMWF) the MARS access is direct. In this case the MARS client command can be specified via the ``MARS_CLIENT_EXECUTABLE`` environment variable. When it is not set the ``"/usr/local/bin/mars"`` path will be used. + If the ``use-standalone-mars-client-when-available`` :ref:`config option` is True and the MARS client is installed (e.g. at ECMWF) the MARS access is direct. In this case the MARS client command can be specified via the ``MARS_CLIENT_EXECUTABLE`` environment variable. When it is not set the ``"/usr/local/bin/mars"`` path will be used. If the standalone MARS client is not available or not enabled the `web API`_ will be used. In order to use the `web API`_ you will need to register and retrieve an access token. For a more extensive documentation about MARS, please refer to the `MARS user documentation`_. diff --git a/docs/release_notes/index.rst b/docs/release_notes/index.rst index a736cd97..36507920 100644 --- a/docs/release_notes/index.rst +++ b/docs/release_notes/index.rst @@ -4,6 +4,7 @@ Release notes .. toctree:: :maxdepth: 1 + version_0.13_updates version_0.12_updates version_0.11_updates version_0.10_updates diff --git a/docs/release_notes/version_0.13_updates.rst b/docs/release_notes/version_0.13_updates.rst new file mode 100644 index 00000000..c0fc2a91 --- /dev/null +++ b/docs/release_notes/version_0.13_updates.rst @@ -0,0 +1,41 @@ +Version 0.13 Updates +///////////////////////// + + +Version 0.13.0 +=============== + +Configuration +++++++++++++++++++ + +- The "settings" has been renamed as :ref:`config `. The API did not change with the exception of ``settings.auto_save_settings``, which now is ``config.autosave``. The "settings" object is still available for backward compatibility but will be removed in a future release. Users are encouraged to migrate the code to use ``config`` instead. +- The configuration file changed to ``~/.config/earthkit/data/config.yaml``. When it is not available, the old configuration file at "~/.config/earthkit/settings.yaml" is loaded and saved into the new path. This is done until "settings" is removed. +- As new feature, the configuration file can be specified via the ``EARTHKIT_DATA_CONFIG_FILE`` environmental variable. The environmental variable takes precedence over the default configuration file (it is only read at startup). + +.. list-table:: Migrating form settings to config + :header-rows: 1 + + * - Settings (old code) + - Config (new code) + * - + .. code-block:: python + + # the old import + from earthkit.data import settings + + # the API is the same + v = settings.get("number-of-download-threads") + + # the only change is related to autosave + settings.auto_save_settings = False + - + .. code-block:: python + + # the new import + from earthkit.data import config + + # the API is the same + v = config.get("number-of-download-threads") + + # the only change is related to autosave + config.autosave = False diff --git a/pytest.ini b/pytest.ini index 54860fb6..c4c76001 100644 --- a/pytest.ini +++ b/pytest.ini @@ -4,7 +4,8 @@ markers = long_test: a test that is long to run. Typically more that 5 sec. download: a test downloading some data (not from the ECMWF download server) ftp: test that used FTP. FTP is an old protocol and is not supported by most recent firewalls. - notebook: testing notebooks can be slow. But needs to be performed to ensure that the documention is tested. + custom_config: a test that needs to be run with a custom configuration file specified via the EARTHKIT_DATA_CONFIG environment variable. Must be run with --forked. + notebook: testing notebooks can be slow. But needs to be performed to ensure that the documentation is tested. no_cache_init: a test where the cache is not initialised. Must be run with --forked. no_eccodes: a test which should pass when ecCodes is not installed plugin: a test that requires an earthkit-data plugin to be installed diff --git a/src/earthkit/data/__init__.py b/src/earthkit/data/__init__.py index 8e1c642d..816f2d98 100644 --- a/src/earthkit/data/__init__.py +++ b/src/earthkit/data/__init__.py @@ -20,8 +20,8 @@ from .arguments.transformers import ALL from .core.caching import CACHE as cache +from .core.config import CONFIG as config from .core.fieldlist import FieldList -from .core.settings import SETTINGS as settings from .indexing.fieldlist import SimpleFieldList from .readers.grib.output import new_grib_output from .sources import Source @@ -31,6 +31,8 @@ from .utils.examples import download_example_file from .utils.examples import remote_example_file +settings = config + __all__ = [ "ALL", "ArrayField", @@ -43,6 +45,7 @@ "transform", "new_grib_output", "remote_example_file", + "config", "settings", "SimpleFieldList", "Source", diff --git a/src/earthkit/data/core/caching.py b/src/earthkit/data/core/caching.py index 146a1883..c84ad73b 100644 --- a/src/earthkit/data/core/caching.py +++ b/src/earthkit/data/core/caching.py @@ -29,7 +29,7 @@ from copy import deepcopy from random import randrange -from earthkit.data.core.settings import SETTINGS +from earthkit.data.core.config import CONFIG from earthkit.data.core.temporary import temp_directory from earthkit.data.utils import humanize from earthkit.data.utils.html import css @@ -186,8 +186,8 @@ def enqueue(self, func, *args, **kwargs): def _ensure_in_cache(self, path): assert self._policy.file_in_cache_directory(path), f"File not in cache {path}" - def _settings_changed(self, policy): - LOG.debug("Settings changed") + def _config_changed(self, policy): + LOG.debug("Config changed") self._policy = policy self._connection = None # The user may have changed the cache directory self._check_cache_size() @@ -575,15 +575,15 @@ class CachePolicy(metaclass=ABCMeta): _name = None def __init__(self): - self._settings = {k: SETTINGS.get(k) for k in self.CACHE_KEYS} + self._config = {k: CONFIG.get(k) for k in self.CACHE_KEYS} @property def name(self): return self._name @staticmethod - def from_settings(): - name = SETTINGS.get("cache-policy") + def from_config(): + name = CONFIG.get("cache-policy") p = _cache_policies.get(name, None) if p is not None: return p() @@ -591,15 +591,15 @@ def from_settings(): raise NotImplementedError(f"Unknown cache policy={name}") def outdated(self): - return any(self._settings.get(k) != SETTINGS.get(k) for k in self.OUTDATED_CHECK_KEYS) + return any(self._config.get(k) != CONFIG.get(k) for k in self.OUTDATED_CHECK_KEYS) def update(self): changed = False for k in self.CACHE_KEYS: - if self._settings.get(k) != SETTINGS.get(k): + if self._config.get(k) != CONFIG.get(k): changed = True if k not in self.OUTDATED_CHECK_KEYS: - self._settings[k] = SETTINGS.get(k) + self._config[k] = CONFIG.get(k) return changed @abstractmethod @@ -672,7 +672,7 @@ def managed(self): def directory(self): if self._dir is None: if self._dir is None: - root_dir = self._expand_path(self._settings.get("temporary-directory-root")) + root_dir = self._expand_path(self._config.get("temporary-directory-root")) self._dir = temp_directory(dir=root_dir) return self._dir.path @@ -698,7 +698,7 @@ class UserCachePolicy(CachePolicy): def __init__(self): super().__init__() - self._path = self._expand_path(self._settings.get("user-cache-directory")) + self._path = self._expand_path(self._config.get("user-cache-directory")) if not os.path.exists(self._path): os.makedirs(self._path, exist_ok=True) @@ -709,16 +709,16 @@ def directory(self): return self._path def use_message_position_index_cache(self): - return self._settings.get("use-message-position-index-cache") + return self._config.get("use-message-position-index-cache") def is_cache_size_managed(self): return self.maximum_cache_size() is not None or self.maximum_cache_disk_usage() is not None def maximum_cache_size(self): - return self._settings.get("maximum-cache-size") + return self._config.get("maximum-cache-size") def maximum_cache_disk_usage(self): - return self._settings.get("maximum-cache-disk-usage") + return self._config.get("maximum-cache-disk-usage") def __repr__(self): r = ( @@ -737,7 +737,7 @@ class TmpCachePolicy(UserCachePolicy): def __init__(self): super().__init__() - root_dir = self._expand_path(self._settings.get("temporary-cache-directory-root")) + root_dir = self._expand_path(self._config.get("temporary-cache-directory-root")) self._dir = temp_directory(dir=root_dir) def directory(self): @@ -778,17 +778,17 @@ def policy(self): def _make_policy(self): if self._policy is None: - self._policy = CachePolicy.from_settings() + self._policy = CachePolicy.from_config() LOG.debug(f"Cache: created cache policy={self._policy}") if self._policy.managed(): with self._manager_lock: if self._manager is None: self._manager = CacheManager() self._manager.start() - self._call_manager_settings_changed() + self._call_manager_config_changed() - def _settings_changed(self): - LOG.debug("Cache: settings_changed, cache-policy=" + SETTINGS.get("cache-policy")) + def _config_changed(self): + LOG.debug("Cache: config_changed, cache-policy=" + CONFIG.get("cache-policy")) if self.policy.outdated(): with self._policy_lock: # Check again, another thread/process may have modified the policy @@ -797,7 +797,7 @@ def _settings_changed(self): self._make_policy() elif self.policy.update() and self.policy.managed(): with self._manager_lock: - self._call_manager_settings_changed() + self._call_manager_config_changed() def _call_manager(self, forget, name, *args, **kwargs): if self.policy.managed() and self._manager is not None: @@ -811,8 +811,8 @@ def _call_manager(self, forget, name, *args, **kwargs): s = self._manager.enqueue(func, *args, **kwargs) return s.result() - def _call_manager_settings_changed(self): - s = self._manager.enqueue(self._manager._settings_changed, deepcopy(self._policy)) + def _call_manager_config_changed(self): + s = self._manager.enqueue(self._manager._config_changed, deepcopy(self._policy)) return s.result() def _dump_database(self, *args, **kwargs): @@ -851,17 +851,17 @@ def check_size(self, *args, **kwargs): """Check the cache size and trim it down when needed. Automatically runs when a new entry is added to the cache or the - :ref:`cache_settings` change. Does not work when the + :ref:`cache_config` change. Does not work when the ``cache-policy`` is "off". The algorithm includes three steps: - first, the cache size is determined - next, if the size is larger than the limit defined by - the ``maximum-cache-size`` settings the oldest cache entries are + the ``maximum-cache-size`` config the oldest cache entries are removed until the desired size reached - finally, if the size is larger than the limit defined by the - ``maximum-cache-disk-usage`` settings the oldest cache entries are + ``maximum-cache-disk-usage`` config the oldest cache entries are removed until the desired size reached """ @@ -1098,4 +1098,4 @@ def create(target, args): # housekeeping() -SETTINGS.on_change(CACHE._settings_changed) +CONFIG.on_change(CACHE._config_changed) diff --git a/src/earthkit/data/core/settings.py b/src/earthkit/data/core/config.py similarity index 74% rename from src/earthkit/data/core/settings.py rename to src/earthkit/data/core/config.py index a4ee81be..553dff71 100644 --- a/src/earthkit/data/core/settings.py +++ b/src/earthkit/data/core/config.py @@ -18,6 +18,7 @@ from contextlib import contextmanager from typing import Callable +import deprecation import yaml from earthkit.data import __version__ as VERSION @@ -31,9 +32,10 @@ LOG = logging.getLogger(__name__) -DOT_EARTHKIT_DATA = os.path.expanduser("~/.earthkit_data") -EARTHKIT_SETTINGS_DIR = DOT_EARTHKIT_DATA - +OLD_CONFIG_FILE = os.path.expanduser("~/.earthkit_data/settings.yaml") +CONFIG_DIR = os.path.expanduser("~/.config/earthkit/data") +CONFIG_FILE_NAME = "config.yaml" +CONFIG_FILE_ENV_NAME = "CONFIG_FILE" ENV_PREFIX = "EARTHKIT_DATA_" @@ -42,6 +44,7 @@ class Validator(metaclass=ABCMeta): def check(self, value): pass + # @abstractmethod def explain(self): pass @@ -91,7 +94,7 @@ def explain(self): _validators = {Interval: IntervalValidator, bool: ValueValidator, list: ListValidator} -class Setting: +class ConfigOption: def __init__( self, default, @@ -136,13 +139,13 @@ def docs_description(self): def validate(self, name, value): if self.validator is not None and not self.validator.check(value): - raise ValueError(f"Settings {name} cannot be set to {value}. {self.validator.explain()}") + raise ValueError(f"Config option '{name}' cannot be set to {value}. {self.validator.explain()}") -_ = Setting +_ = ConfigOption -SETTINGS_AND_HELP = { +CONFIG_AND_HELP = { "user-cache-directory": _( os.path.join(tempfile.gettempdir(), "earthkit-data-%s" % (getpass.getuser(),)), """Cache directory used when ``cache-policy`` is ``user``. @@ -248,20 +251,20 @@ def validate(self, name, value): NONE = object() DEFAULTS = {} -for k, v in SETTINGS_AND_HELP.items(): +for k, v in CONFIG_AND_HELP.items(): DEFAULTS[k] = v.default @contextmanager -def new_settings(s): - """Context manager to create new settings""" - SETTINGS._stack.append(s) - SETTINGS._notify() +def new_config(s): + """Context manager to create new config""" + CONFIG._stack.append(s) + CONFIG._notify() try: yield None finally: - SETTINGS._stack.pop() - SETTINGS._notify() + CONFIG._stack.pop() + CONFIG._notify() def forward(func): @@ -274,8 +277,8 @@ def wrapped(self, *args, **kwargs): return wrapped -def save_settings(path, settings): - LOG.debug("Saving settings") +def save_config(path, config): + LOG.debug("Saving config") from filelock import FileLock lock = path + ".lock" @@ -284,8 +287,8 @@ def save_settings(path, settings): print("# This file is automatically generated", file=f) print(file=f) - for k, v in sorted(settings.items()): - h = SETTINGS_AND_HELP.get(k) + for k, v in sorted(config.items()): + h = CONFIG_AND_HELP.get(k) if h: print(file=f) print("#", "-" * 76, file=f) @@ -307,15 +310,15 @@ def env_var_name(name): return ENV_PREFIX + name.upper().replace("-", "_") -class Settings: - _auto_save_settings = True +class Config: + _auto_save_config = True _notify_enabled = True - def __init__(self, settings_yaml: str, defaults: dict, callbacks=[]): + def __init__(self, config_yaml: str, defaults: dict, callbacks=[]): self._defaults = defaults - self._settings = dict(**defaults) + self._config = dict(**defaults) self._callbacks = [c for c in callbacks] - self._settings_yaml = settings_yaml + self._config_yaml = config_yaml self._pytest = None self._stack = [] @@ -334,14 +337,14 @@ def get(self, name: str, default=NONE): ------- [type]: [description] """ - if name not in SETTINGS_AND_HELP: - raise KeyError("No setting name '%s'" % (name,)) + if name not in CONFIG_AND_HELP: + raise KeyError("No config name '%s'" % (name,)) - settings_item = SETTINGS_AND_HELP[name] + config_item = CONFIG_AND_HELP[name] getter, none_ok = ( - settings_item.getter, - settings_item.none_ok, + config_item.getter, + config_item.none_ok, ) if getter is None: getter = lambda name, value, none_ok: value # noqa: E731 @@ -351,9 +354,9 @@ def get(self, name: str, default=NONE): st, value, _ = self._env(name) if not st: if default is NONE: - value = self._settings[name] + value = self._config[name] else: - value = self._settings.get(name, default) + value = self._config.get(name, default) return getter(name, value, none_ok) @@ -391,21 +394,21 @@ def _set(self, name: str, *args, **kwargs): value: [type] [description] """ - if name not in SETTINGS_AND_HELP: - raise KeyError("No setting name '%s'" % (name,)) + if name not in CONFIG_AND_HELP: + raise KeyError("No config option '%s'" % (name,)) if self._has_env(name): msg = ( - f"Setting '{name}' is also set by environment variable '{env_var_name(name)}'." + f"Config option '{name}' is also set by environment variable '{env_var_name(name)}'." "The environment variable takes precedence and its value is returned when calling get()." ) - if self._auto_save_settings: - msg += " Still, the value set here will be saved to the settings file." + if self._auto_save_config: + msg += " Still, the value set here will be saved to the config file." warnings.warn(msg) - settings_item = SETTINGS_AND_HELP[name] + config_item = CONFIG_AND_HELP[name] - klass = settings_item.kind + klass = config_item.kind if klass in (bool, int, float, str): # TODO: Proper exceptions @@ -432,8 +435,8 @@ def _set(self, name: str, *args, **kwargs): value = args[0] getter, none_ok = ( - settings_item.getter, - settings_item.none_ok, + config_item.getter, + config_item.none_ok, ) if getter is not None: assert len(args) == 1 @@ -443,50 +446,50 @@ def _set(self, name: str, *args, **kwargs): getattr(self, getter)(name, value, none_ok) else: if not isinstance(value, klass): - raise TypeError("Setting '%s' must be of type '%s'" % (name, klass)) + raise TypeError("Config option '%s' must be of type '%s'" % (name, klass)) - settings_item.validate(name, value) - self._settings[name] = value + config_item.validate(name, value) + self._config[name] = value LOG.debug(f"_set {name}={value} stack_size={len(self._stack)}") @forward def reset(self, name: str = None): - """Reset setting(s) to default values. + """Reset config to default values. Parameters ---------- name: str, optional - The name of the setting to reset to default. If the setting - does not have a default, it is removed. If `None` is passed, all settings are + The name of the config option to reset to default. If it + does not have a default, it is removed. If `None` is passed, all config options are reset to their default values. Defaults to None. """ if name is None: - self._settings = dict(**DEFAULTS) + self._config = dict(**DEFAULTS) else: if name not in DEFAULTS: - raise KeyError("No setting name '%s'" % (name,)) + raise KeyError("No config option name '%s'" % (name,)) - self._settings.pop(name, None) + self._config.pop(name, None) if name in DEFAULTS: if self._has_env(name): msg = ( - f"Setting '{name}' is also set by environment variable '{env_var_name(name)}'." + f"Config option '{name}' is also set by environment variable '{env_var_name(name)}'." "The environment variable takes precedence and its value is returned when calling get()." ) - if self._auto_save_settings: - msg += " Still, the value reset here will be saved to the settings file." + if self._auto_save_config: + msg += " Still, the value reset here will be saved to the config file." warnings.warn(msg) - self._settings[name] = DEFAULTS[name] + self._config[name] = DEFAULTS[name] self._changed() @forward def __repr__(self): r = [] env = self.env() - for k, v in sorted(self._settings.items()): - setting = SETTINGS_AND_HELP.get(k, None) - default = setting.default if setting else "" + for k, v in sorted(self._config.items()): + config = CONFIG_AND_HELP.get(k, None) + default = config.default if config else "" if k in env: r.append(f"{k}: ({env[k][0]}={env[k][1]}, {v}, {default})") else: @@ -502,9 +505,9 @@ def _repr_html_(self): columns = ["Name", "Value", "Default"] html.append("%s" % ("".join(columns))) - for k, v in sorted(self._settings.items()): - setting = SETTINGS_AND_HELP.get(k, None) - default = setting.default if setting else "" + for k, v in sorted(self._config.items()): + config = CONFIG_AND_HELP.get(k, None) + default = config.default if config else "" if k in env: html.append( "%s%s=%r
(%r)%r" @@ -518,11 +521,11 @@ def _repr_html_(self): @forward def dump(self): - for k, v in sorted(self._settings.items()): - yield ((k, v, SETTINGS_AND_HELP.get(k))) + for k, v in sorted(self._config.items()): + yield ((k, v, CONFIG_AND_HELP.get(k))) def _changed(self): - if self._auto_save_settings: + if self._auto_save_config: self._save() self._notify() @@ -535,24 +538,24 @@ def on_change(self, callback: Callable[[], None]): self._callbacks.append(callback) def _save(self): - if self._settings_yaml is None: + if self._config_yaml is None: return try: - save_settings(self._settings_yaml, self._settings) + save_config(self._config_yaml, self._config) except Exception: LOG.error( - "Cannot save earthkit-data settings (%s)", - self._settings_yaml, + "Cannot save earthkit-data config (%s)", + self._config_yaml, exc_info=True, ) def save_as(self, path): try: - save_settings(path, self._settings) + save_config(path, self._config) except Exception: LOG.error( - f"Cannot save earthkit-data settings ({path})", + f"Cannot save earthkit-data config ({path})", exc_info=True, ) @@ -578,27 +581,37 @@ def _as_int(self, name, value, none_ok): return int(value) @forward - def temporary(self, *args, **kwargs): - tmp = Settings(None, self._settings) + def temporary(self, *args, config_yaml=None, **kwargs): + tmp = Config(config_yaml, self._config) # until the tmp object is at the top of the stack we do not want # notify the observers if len(args) > 0 or len(kwargs) > 0: # tmp does not have any callbacks so it will not broadcast the changes tmp.set(*args, **kwargs) tmp._callbacks = self._callbacks - return new_settings(tmp) + return new_config(tmp) + + @property + def autosave(self): + return Config._auto_save_config @property + @deprecation.deprecated(deprecated_in="0.13.0", removed_in=None, details="Use config.autosave instead") def auto_save_settings(self): - return Settings._auto_save_settings + return self.autosave + + @autosave.setter + def autosave(self, v): + Config._auto_save_config = v @auto_save_settings.setter + @deprecation.deprecated(deprecated_in="0.13.0", removed_in=None, details="Use config.autosave instead") def auto_save_settings(self, v): - Settings._auto_save_settings = v + self.autosave = v def env(self): r = {} - for k in sorted(self._settings.keys()): + for k in sorted(self._config.keys()): st, value, name = self._env(k) if st: r[k] = (name, value) @@ -616,43 +629,74 @@ def _env(name): return (False, None, None) +def _config_file(): + name = env_var_name(CONFIG_FILE_ENV_NAME) + if name in os.environ: + return os.environ[name] + + return os.path.expanduser(os.path.join(CONFIG_DIR, CONFIG_FILE_NAME)) + + +# TODO: remove this function when the old config (aka settings) is not used anymore +def migrate_old_config(): + old_config_yaml = os.path.expanduser(OLD_CONFIG_FILE) + if os.path.exists(old_config_yaml): + config = dict(**DEFAULTS) + try: + with open(old_config_yaml) as f: + s = yaml.load(f, Loader=yaml.SafeLoader) + if not isinstance(s, dict): + return False + + config.update(s) + save_config(config_yaml, config) + return True + except Exception: + pass + return False + + save = False -settings_yaml = os.path.expanduser(os.path.join(DOT_EARTHKIT_DATA, "settings.yaml")) +config_yaml = _config_file() try: - if not os.path.exists(DOT_EARTHKIT_DATA): - os.mkdir(DOT_EARTHKIT_DATA, 0o700) - if not os.path.exists(settings_yaml): - save_settings(settings_yaml, DEFAULTS) + config_dir = os.path.dirname(config_yaml) + if not os.path.exists(config_dir): + ori_mask = os.umask(0o077) + os.makedirs(config_dir, 0o700) + os.umask(ori_mask) + if not os.path.exists(config_yaml): + if not migrate_old_config(): + save_config(config_yaml, DEFAULTS) except Exception: LOG.error( - "Cannot create earthkit-data settings directory, using defaults (%s)", - settings_yaml, + "Cannot create earthkit-data config directory, using defaults (%s)", + config_yaml, exc_info=True, ) -settings = dict(**DEFAULTS) +config = dict(**DEFAULTS) try: - with open(settings_yaml) as f: + with open(config_yaml) as f: s = yaml.load(f, Loader=yaml.SafeLoader) if not isinstance(s, dict): s = {} - settings.update(s) + config.update(s) - # if s != settings: + # if s != config: # save = True - if settings.get("version") < VERSION: + if config.get("version") < VERSION: save = True except Exception: LOG.error( - "Cannot load earthkit-data settings (%s), reverting to defaults", - settings_yaml, + "Cannot load earthkit-data config (%s), reverting to defaults", + config_yaml, exc_info=True, ) -SETTINGS = Settings(settings_yaml, settings) +CONFIG = Config(config_yaml, config) if save: - SETTINGS._save() + CONFIG._save() diff --git a/src/earthkit/data/readers/__init__.py b/src/earthkit/data/readers/__init__.py index 5c939fd5..844c11af 100644 --- a/src/earthkit/data/readers/__init__.py +++ b/src/earthkit/data/readers/__init__.py @@ -13,7 +13,7 @@ from importlib import import_module from earthkit.data.core import Base -from earthkit.data.core.settings import SETTINGS +from earthkit.data.core.config import CONFIG from earthkit.data.decorators import detect_out_filename from earthkit.data.decorators import locked @@ -201,7 +201,7 @@ def reader(source, path, **kwargs): return r raise Exception(f"File is empty: '{path}'") - n_bytes = SETTINGS.get("reader-type-check-bytes") + n_bytes = CONFIG.get("reader-type-check-bytes") with open(path, "rb") as f: magic = f.read(n_bytes) @@ -219,7 +219,7 @@ def reader(source, path, **kwargs): def memory_reader(source, buffer, **kwargs): """Create a reader for data held in a memory buffer""" assert isinstance(buffer, (bytes, bytearray)), source - n_bytes = SETTINGS.get("reader-type-check-bytes") + n_bytes = CONFIG.get("reader-type-check-bytes") magic = buffer[: min(n_bytes, len(buffer) - 1)] return _find_reader("memory_reader", source, buffer, magic=magic, **kwargs) @@ -230,7 +230,7 @@ def stream_reader(source, stream, memory, **kwargs): magic = None if hasattr(stream, "peek") and callable(stream.peek): try: - n_bytes = SETTINGS.get("reader-type-check-bytes") + n_bytes = CONFIG.get("reader-type-check-bytes") magic = stream.peek(n_bytes) if len(magic) > n_bytes: magic = magic[:n_bytes] diff --git a/src/earthkit/data/readers/grib/index/__init__.py b/src/earthkit/data/readers/grib/index/__init__.py index 11a881c4..36b509dc 100644 --- a/src/earthkit/data/readers/grib/index/__init__.py +++ b/src/earthkit/data/readers/grib/index/__init__.py @@ -355,10 +355,10 @@ def __init__( ): super().__init__(*args, **kwargs) - from earthkit.data.core.settings import SETTINGS + from earthkit.data.core.config import CONFIG def _get_opt(v, name): - return v if v is not None else SETTINGS.get(name) + return v if v is not None else CONFIG.get(name) self._field_manager = GribFieldManager(_get_opt(grib_field_policy, "grib-field-policy"), self) self._handle_manager = GribHandleManager( diff --git a/src/earthkit/data/readers/grib/memory.py b/src/earthkit/data/readers/grib/memory.py index bd77c32a..47c21c22 100644 --- a/src/earthkit/data/readers/grib/memory.py +++ b/src/earthkit/data/readers/grib/memory.py @@ -21,9 +21,9 @@ def get_use_grib_metadata_cache(): - from earthkit.data.core.settings import SETTINGS + from earthkit.data.core.config import CONFIG - return SETTINGS.get("use-grib-metadata-cache") + return CONFIG.get("use-grib-metadata-cache") class GribMemoryReader(Reader): diff --git a/src/earthkit/data/sources/__init__.py b/src/earthkit/data/sources/__init__.py index 2ad6ecd8..4a3fe3a4 100644 --- a/src/earthkit/data/sources/__init__.py +++ b/src/earthkit/data/sources/__init__.py @@ -14,9 +14,9 @@ from earthkit.data.core import Base from earthkit.data.core.caching import cache_file +from earthkit.data.core.config import CONFIG from earthkit.data.core.plugins import find_plugin from earthkit.data.core.plugins import register as register_plugin -from earthkit.data.core.settings import SETTINGS class Source(Base): @@ -36,8 +36,8 @@ class Source(Base): def __init__(self, **kwargs): self._kwargs = kwargs - def settings(self, name): - return SETTINGS.get(name) + def config(self, name): + return CONFIG.get(name) def mutate(self): # Give a chance to `multi` to change source diff --git a/src/earthkit/data/sources/cds.py b/src/earthkit/data/sources/cds.py index bd3f5d60..35030670 100644 --- a/src/earthkit/data/sources/cds.py +++ b/src/earthkit/data/sources/cds.py @@ -120,7 +120,7 @@ def __init__(self, dataset, *args, prompt=True, **kwargs): self.client() # Trigger password prompt before threading - nthreads = min(self.settings("number-of-download-threads"), len(self.requests)) + nthreads = min(self.config("number-of-download-threads"), len(self.requests)) if nthreads < 2: self.path = [self._retrieve(dataset, r) for r in self.requests] diff --git a/src/earthkit/data/sources/ecmwf_api.py b/src/earthkit/data/sources/ecmwf_api.py index c9426422..d0f781a4 100644 --- a/src/earthkit/data/sources/ecmwf_api.py +++ b/src/earthkit/data/sources/ecmwf_api.py @@ -66,7 +66,7 @@ def __init__(self, *args, prompt=True, log="default", **kwargs): self.service() # Trigger password prompt before threading - nthreads = min(self.settings("number-of-download-threads"), len(requests)) + nthreads = min(self.config("number-of-download-threads"), len(requests)) if nthreads < 2: self.path = [self._retrieve(r) for r in requests] diff --git a/src/earthkit/data/sources/mars.py b/src/earthkit/data/sources/mars.py index bea7463d..abbacca9 100644 --- a/src/earthkit/data/sources/mars.py +++ b/src/earthkit/data/sources/mars.py @@ -11,7 +11,7 @@ import os import subprocess -from earthkit.data.core.settings import SETTINGS +from earthkit.data.core.config import CONFIG from earthkit.data.core.temporary import temp_file from .ecmwf_api import ECMWFApi @@ -64,7 +64,7 @@ def command(): @staticmethod def enabled(): - return SETTINGS.get("use-standalone-mars-client-when-available") and os.path.exists( + return CONFIG.get("use-standalone-mars-client-when-available") and os.path.exists( StandaloneMarsClient.command() ) diff --git a/src/earthkit/data/sources/multi.py b/src/earthkit/data/sources/multi.py index 6eb23a31..2e5da6d7 100644 --- a/src/earthkit/data/sources/multi.py +++ b/src/earthkit/data/sources/multi.py @@ -116,7 +116,7 @@ def _from_sources(self, sources): if not has_callables: return sources - nthreads = min(self.settings("number-of-download-threads"), len(callables)) + nthreads = min(self.config("number-of-download-threads"), len(callables)) if nthreads < 2: return [s() for s in sources] diff --git a/src/earthkit/data/sources/url.py b/src/earthkit/data/sources/url.py index 478ce78f..baffded7 100644 --- a/src/earthkit/data/sources/url.py +++ b/src/earthkit/data/sources/url.py @@ -13,7 +13,7 @@ from multiurl import Downloader from earthkit.data.core.caching import cache_file -from earthkit.data.core.settings import SETTINGS +from earthkit.data.core.config import CONFIG from earthkit.data.core.statistics import record_statistics from earthkit.data.utils.parts import PathAndParts from earthkit.data.utils.progbar import progress_bar @@ -52,7 +52,7 @@ def download_and_cache( downloader = Downloader( url, chunk_size=chunk_size, - timeout=SETTINGS.get("url-download-timeout"), + timeout=CONFIG.get("url-download-timeout"), verify=verify, parts=parts, range_method=range_method, @@ -76,11 +76,11 @@ def download_and_cache( return def out_of_date(url, path, cache_data): - if SETTINGS.get("check-out-of-date-urls") is False: + if CONFIG.get("check-out-of-date-urls") is False: return False if downloader.out_of_date(path, cache_data): - if SETTINGS.get("download-out-of-date-urls") or update_if_out_of_date: + if CONFIG.get("download-out-of-date-urls") or update_if_out_of_date: LOG.warning( "Invalidating cache version and re-downloading %s", url, @@ -261,7 +261,7 @@ def _download(self): self.downloader = Downloader( self.url_spec.zipped(), - timeout=SETTINGS.get("url-download-timeout"), + timeout=CONFIG.get("url-download-timeout"), statistics_gatherer=record_statistics, progress_bar=progress_bar, resume_transfers=True, @@ -300,11 +300,11 @@ def download(target, _): self.content_type = h.get("content-type") def out_of_date(self, url, path, cache_data): - if SETTINGS.get("check-out-of-date-urls") is False: + if CONFIG.get("check-out-of-date-urls") is False: return False if self.downloader.out_of_date(path, cache_data): - if SETTINGS.get("download-out-of-date-urls") or self.update_if_out_of_date: + if CONFIG.get("download-out-of-date-urls") or self.update_if_out_of_date: LOG.warning( "Invalidating cache version and re-downloading %s", self.url, @@ -348,7 +348,7 @@ def to_stream(self): downloader = Downloader( self.url_spec.zipped(), - timeout=SETTINGS.get("url-download-timeout"), + timeout=CONFIG.get("url-download-timeout"), # verify=self.verify, # range_method=self.range_method, # http_headers=self.prepare_headers(self.url), diff --git a/src/earthkit/data/sources/wekeo.py b/src/earthkit/data/sources/wekeo.py index 03b47932..0b048df4 100644 --- a/src/earthkit/data/sources/wekeo.py +++ b/src/earthkit/data/sources/wekeo.py @@ -114,7 +114,7 @@ def __init__(self, dataset, *args, prompt=True, **kwargs): self.client(self.prompt) # Trigger password prompt before threading - nthreads = min(self.settings("number-of-download-threads"), len(requests)) + nthreads = min(self.config("number-of-download-threads"), len(requests)) if nthreads < 2: self.path = [self._retrieve(dataset, r) for r in requests] diff --git a/src/earthkit/data/sources/wekeocds.py b/src/earthkit/data/sources/wekeocds.py index eb16b4a8..9dc422ae 100644 --- a/src/earthkit/data/sources/wekeocds.py +++ b/src/earthkit/data/sources/wekeocds.py @@ -76,7 +76,7 @@ def __init__(self, dataset, *args, prompt=True, **kwargs): self.client(self.prompt) # Trigger password prompt before threading - nthreads = min(self.settings("number-of-download-threads"), len(requests)) + nthreads = min(self.config("number-of-download-threads"), len(requests)) if nthreads < 2: self.path = [self._retrieve(dataset, r) for r in requests] diff --git a/src/earthkit/data/sphinxext/generate_settings_env_rst.py b/src/earthkit/data/sphinxext/generate_config_env_rst.py similarity index 78% rename from src/earthkit/data/sphinxext/generate_settings_env_rst.py rename to src/earthkit/data/sphinxext/generate_config_env_rst.py index fdd93def..3f52d74d 100644 --- a/src/earthkit/data/sphinxext/generate_settings_env_rst.py +++ b/src/earthkit/data/sphinxext/generate_config_env_rst.py @@ -11,8 +11,8 @@ import re -from earthkit.data.core.settings import SETTINGS_AND_HELP -from earthkit.data.core.settings import env_var_name +from earthkit.data.core.config import CONFIG_AND_HELP +from earthkit.data.core.config import env_var_name def execute(*args): @@ -21,10 +21,10 @@ def execute(*args): print(" :header-rows: 1") print(" :widths: 40 60") print() - print(" * - Name") - print(" - Environment Variable") + print(" * - Config option name") + print(" - Environment variable") print() - for k, v in sorted(SETTINGS_AND_HELP.items()): + for k, v in sorted(CONFIG_AND_HELP.items()): if len(args) and not any(re.match(arg, k) for arg in args): continue diff --git a/src/earthkit/data/sphinxext/generate_settings_rst.py b/src/earthkit/data/sphinxext/generate_config_rst.py similarity index 93% rename from src/earthkit/data/sphinxext/generate_settings_rst.py rename to src/earthkit/data/sphinxext/generate_config_rst.py index fbb4e1e9..34302332 100644 --- a/src/earthkit/data/sphinxext/generate_settings_rst.py +++ b/src/earthkit/data/sphinxext/generate_config_rst.py @@ -13,7 +13,7 @@ import os import re -from earthkit.data.core.settings import SETTINGS_AND_HELP +from earthkit.data.core.config import CONFIG_AND_HELP HOME = os.path.expanduser("~/") USER = getpass.getuser() @@ -50,7 +50,7 @@ def execute(*args): print(" - Default") print(" - Description") print() - for k, v in sorted(SETTINGS_AND_HELP.items()): + for k, v in sorted(CONFIG_AND_HELP.items()): if len(args) and not any(re.match(arg, k) for arg in args): continue diff --git a/src/earthkit/data/sphinxext/module_output.py b/src/earthkit/data/sphinxext/module_output.py index 301be074..559f1c1e 100644 --- a/src/earthkit/data/sphinxext/module_output.py +++ b/src/earthkit/data/sphinxext/module_output.py @@ -36,7 +36,7 @@ def run(self): module = import_module("..%s" % (name.replace("-", "_"),), package=__name__) - # from earthkit.data.sphinxext import generate_settings_rst as module + # from earthkit.data.sphinxext import generate_config_rst as module sys.stdout = StringIO() module.execute(*args) diff --git a/tests/conftest.py b/tests/conftest.py index c1dcf69a..f1f61d66 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -65,20 +65,20 @@ def pytest_runtest_setup(item): need_cache = "cache" in marks_in_items # settings - from earthkit.data import settings + from earthkit.data import config # ensure settings are not saved automatically - settings.auto_save_settings = False + config.autosave = False # ensure all the tests use the default settings if marked_no_cache_init: # do not broadcast setting changes, otherwise # the cache would be initialised - settings._notify_enabled = False - settings.reset() - settings._notify_enabled = True + config._notify_enabled = False + config.reset() + config._notify_enabled = True elif need_cache: - settings.reset() - settings.set("cache-policy", "user") + config.reset() + config.set("cache-policy", "user") else: - settings.reset() + config.reset() diff --git a/tests/core/test_config.py b/tests/core/test_config.py new file mode 100644 index 00000000..8512967f --- /dev/null +++ b/tests/core/test_config.py @@ -0,0 +1,262 @@ +#!/usr/bin/env python3 + +# (C) Copyright 2020 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +import os + +import pytest + +from earthkit.data import config +from earthkit.data.core.temporary import temp_directory +from earthkit.data.core.temporary import temp_file + + +def read_config_yaml(path=os.path.expanduser("~/.config/earthkit/data/config.yaml")): + try: + with open(path) as f: + import yaml + + s = yaml.load(f, Loader=yaml.SafeLoader) + if not isinstance(s, dict): + s = {} + return s + except Exception: + return {} + + +@pytest.mark.parametrize( + "param,default_value,new_value", + [ + ("number-of-download-threads", 5, 2), + ], +) +def test_configs_params_set_reset(param, default_value, new_value): + ori_value = config.get(param) + + with config.temporary(): + config.reset() + + assert config.get(param) == default_value + config.set(param, new_value) + assert config.get(param) == new_value + config.reset() + assert config.get(param) == default_value + + assert config.get(param) == ori_value + + +def test_config_invalid(): + # invalid param + with pytest.raises(KeyError): + config.get("_invalid_") + + with pytest.raises(KeyError): + config.set("_invalid_", 1) + + # invalid value + with pytest.raises(ValueError): + config.set("number-of-download-threads", "A") + + +@pytest.mark.parametrize( + "param,set_value,stored_value,raise_error", + [ + ("number-of-download-threads", "A", "A", ValueError), + ("url-download-timeout", 30, 30, None), + ("url-download-timeout", "30", 30, None), + ("url-download-timeout", "2m", 120, None), + ("url-download-timeout", "10h", 36000, None), + ("url-download-timeout", "7d", 7 * 24 * 3600, None), + ("url-download-timeout", "1x", None, ValueError), + ("url-download-timeout", "1M", 60, ValueError), + ("reader-type-check-bytes", 8, 8, None), + ("reader-type-check-bytes", 1, 1, ValueError), + ("reader-type-check-bytes", 4097, 4097, ValueError), + ], +) +def test_config_set_numbers(param, set_value, stored_value, raise_error): + with config.temporary(): + if raise_error is None: + config.set(param, set_value) + assert config.get(param) == stored_value + else: + with pytest.raises(raise_error): + config.set(param, set_value) + + +def test_config_set_cache_numbers(): + with temp_directory() as tmpdir: + with config.temporary({"cache-policy": "user", "user-cache-directory": tmpdir}): + data = [ + ("maximum-cache-size", "1", 1, None), + ("maximum-cache-size", "1k", 1024, None), + ("maximum-cache-size", "1kb", 1024, None), + ("maximum-cache-size", "1k", 1024, None), + ("maximum-cache-size", "1kb", 1024, None), + ("maximum-cache-size", "1K", 1024, None), + ("maximum-cache-size", "1M", 1024 * 1024, None), + ("maximum-cache-size", "1G", 1024 * 1024 * 1024, None), + ("maximum-cache-size", "1T", 1024 * 1024 * 1024 * 1024, None), + ("maximum-cache-size", "1P", 1024 * 1024 * 1024 * 1024 * 1024, None), + ("maximum-cache-size", None, None, None), + ("maximum-cache-size", "-1", None, ValueError), + ("maximum-cache-disk-usage", "2%", 2, None), + ("maximum-cache-disk-usage", None, None, None), + ("maximum-cache-disk-usage", "-2%", None, ValueError), + ("maximum-cache-disk-usage", "102%", 102, None), + ("maximum-cache-disk-usage", "0%", 0, None), + ] + + for param, set_value, stored_value, raise_error in data: + if raise_error is None: + config.set(param, set_value) + if stored_value is not None: + assert config.get(param) == stored_value + else: + assert config.get(param) is None + else: + with pytest.raises(raise_error): + config.set(param, set_value) + + +def test_config_set_multi(): + with config.temporary(): + config.set("number-of-download-threads", 7) + assert config.get("number-of-download-threads") == 7 + + config.set({"number-of-download-threads": 2, "url-download-timeout": 21}) + assert config.get("number-of-download-threads") == 2 + assert config.get("url-download-timeout") == 21 + + config.set(number_of_download_threads=3, url_download_timeout=11) + assert config.get("number-of-download-threads") == 3 + assert config.get("url-download-timeout") == 11 + + with pytest.raises(KeyError): + config.set({"number-of-download-threads": 2, "-invalid-": 21}) + + with pytest.raises(KeyError): + config.set(number_of_download_threads=3, __invalid__=11) + + +def test_config_temporary_single(): + with config.temporary("number-of-download-threads", 7): + assert config.get("number-of-download-threads") == 7 + + with config.temporary({"number-of-download-threads": 7}): + assert config.get("number-of-download-threads") == 7 + + with config.temporary(number_of_download_threads=7): + assert config.get("number-of-download-threads") == 7 + + +def test_config_temporary_multi(): + with config.temporary({"number-of-download-threads": 2, "url-download-timeout": 21}): + assert config.get("number-of-download-threads") == 2 + assert config.get("url-download-timeout") == 21 + + with config.temporary(number_of_download_threads=3, url_download_timeout=11): + assert config.get("number-of-download-threads") == 3 + assert config.get("url-download-timeout") == 11 + + +def test_config_temporary_nested(): + with config.temporary("number-of-download-threads", 7): + assert config.get("number-of-download-threads") == 7 + with config.temporary("number-of-download-threads", 10): + assert config.get("number-of-download-threads") == 10 + assert config.get("number-of-download-threads") == 7 + + +def test_config_temporary_autosave_1(): + with temp_file() as config_file: + with config.temporary(config_yaml=config_file): + # now config should contain the default values + # we ensure that the configs are saved into the file + config._save() + + key = "number-of-download-threads" + + v_ori = config.autosave + config.autosave = False + + # when a key has a default value, it is not saved into the config file + s = read_config_yaml(config_file) + assert key not in s + + v = config.get(key) + config.set(key, v + 10) + assert config.get(key) == v + 10 + + # the config file should be the same + s = read_config_yaml(config_file) + assert key not in s + + config.autosave = v_ori + + +def test_config_temporary_autosave_2(): + with temp_file() as config_file: + with config.temporary(config_yaml=config_file): + # now config should contain the default values + # we ensure that the config is saved into the file + config._save() + + key = "number-of-download-threads" + + v_ori = config.autosave + config.autosave = True + + # when a key has a default value, it is not saved into the config file + s = read_config_yaml(config_file) + assert key not in s + + v = config.get(key) + v_new = v + 10 + config.set(key, v_new) + assert config.get(key) == v_new + + # the file changed + s = read_config_yaml(config_file) + assert s[key] == v_new + + config.autosave = False + config.set(key, v) + assert config.get(key) == v + s = read_config_yaml(config_file) + assert s[key] == v_new + + config.autosave = v_ori + + +@pytest.mark.parametrize( + "value,error", [("10000", None), (10000, None), ("1b", ValueError), ("A", ValueError)] +) +def test_config_env(monkeypatch, value, error): + env_key = "EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS" + monkeypatch.setenv(env_key, value) + + v_ori = config.autosave + config.autosave = True + + if error is None: + v = config.get("number-of-download-threads") + assert v == 10000 + else: + with pytest.raises(error): + config.get("number-of-download-threads") + + config.autosave = v_ori + + +if __name__ == "__main__": + from earthkit.data.testing import main + + main(__file__) diff --git a/tests/core/test_settings.py b/tests/core/test_settings.py index 5212fadd..fd2072b1 100644 --- a/tests/core/test_settings.py +++ b/tests/core/test_settings.py @@ -9,14 +9,18 @@ # nor does it submit to any jurisdiction. # +import os import pytest from earthkit.data import settings from earthkit.data.core.temporary import temp_directory +from earthkit.data.core.temporary import temp_file +# TODO: remove all these tests when settings are removed -def read_settings_yaml(path="~/.earthkit-data/settings.yaml"): + +def read_settings_yaml(path=os.path.expanduser("~/.config/earthkit/data/config.yaml")): try: with open(path) as f: import yaml @@ -173,50 +177,65 @@ def test_settings_temporary_nested(): assert settings.get("number-of-download-threads") == 7 -@pytest.mark.parametrize("autosave", [True, False]) -def test_settings_temporary_autosave(autosave): - v_ori = settings.auto_save_settings - with settings.temporary(): - settings.auto_save_settings = autosave - v = settings.get("number-of-download-threads") - settings.set("number-of-download-threads", v + 10) - s = read_settings_yaml() - if s: - assert s["number-of-download-threads"] == v - assert settings.auto_save_settings == autosave - settings.auto_save_settings = v_ori +def test_settings_temporary_autosave_1(): + with temp_file() as config_file: + with settings.temporary(config_yaml=config_file): + # now settings should contain the default values + # we ensure that the settings are saved into the file + settings._save() + key = "number-of-download-threads" -def test_settings_auto_save_1(): - v_ori = settings.auto_save_settings - settings.auto_save_settings = False - v = settings.get("number-of-download-threads") - settings.set("number-of-download-threads", v + 10) - assert settings.get("number-of-download-threads") == v + 10 - s = read_settings_yaml() - if s: - assert s["number-of-download-threads"] == v - settings.auto_save_settings = v_ori + v_ori = settings.auto_save_settings + settings.auto_save_settings = False + # when a key has a default value, it is not saved into the settings file + s = read_settings_yaml(config_file) + assert key not in s -def test_settings_auto_save_2(): - v_ori = settings.auto_save_settings - settings.auto_save_settings = True + v = settings.get(key) + settings.set(key, v + 10) + assert settings.get(key) == v + 10 - v = settings.get("number-of-download-threads") - settings.set("number-of-download-threads", v + 10) - assert settings.get("number-of-download-threads") == v + 10 - s = read_settings_yaml() - if s: - assert s["number-of-download-threads"] == v + 10 + # the settings file should be the same + s = read_settings_yaml(config_file) + assert key not in s - settings.set("number-of-download-threads", v) - assert settings.get("number-of-download-threads") == v - s = read_settings_yaml() - if s: - assert s["number-of-download-threads"] == v + 10 + settings.auto_save_settings = v_ori - settings.auto_save_settings = v_ori + +def test_settings_temporary_autosave_2(): + with temp_file() as config_file: + with settings.temporary(config_yaml=config_file): + # now settings should contain the default values + # we ensure that the settings are saved into the file + settings._save() + + key = "number-of-download-threads" + + v_ori = settings.auto_save_settings + settings.auto_save_settings = True + + # when a key has a default value, it is not saved into the settings file + s = read_settings_yaml(config_file) + assert key not in s + + v = settings.get(key) + v_new = v + 10 + settings.set(key, v_new) + assert settings.get(key) == v_new + + # the file changed + s = read_settings_yaml(config_file) + assert s[key] == v_new + + settings.auto_save_settings = False + settings.set(key, v) + assert settings.get(key) == v + s = read_settings_yaml(config_file) + assert s[key] == v_new + + settings.auto_save_settings = v_ori @pytest.mark.parametrize( diff --git a/tests/list_of_dicts/lod_fixtures.py b/tests/list_of_dicts/lod_fixtures.py index 68b11276..4d66957a 100644 --- a/tests/list_of_dicts/lod_fixtures.py +++ b/tests/list_of_dicts/lod_fixtures.py @@ -12,10 +12,6 @@ import numpy as np import pytest -from earthkit.data import from_source -from earthkit.data.indexing.fieldlist import FieldArray -from earthkit.data.sources.array_list import ArrayField - def _build_list(prototype): return [ @@ -171,10 +167,14 @@ def lod_ll_forecast_4(): def build_lod_fieldlist(lod, mode): + from earthkit.data import from_source + from earthkit.data.indexing.fieldlist import SimpleFieldList + from earthkit.data.sources.array_list import ArrayField + if mode == "list-of-dicts": return from_source("list-of-dicts", lod) elif mode == "loop": - ds = FieldArray() + ds = SimpleFieldList() for f in lod: ds.append(ArrayField(f["values"], f)) return ds