diff --git a/docs/examples/config.ipynb b/docs/examples/config.ipynb index 16c488ce..9403af1a 100644 --- a/docs/examples/config.ipynb +++ b/docs/examples/config.ipynb @@ -173,7 +173,7 @@ { "data": { "text/plain": [ - "5" + "30" ] }, "execution_count": 4, @@ -182,7 +182,7 @@ } ], "source": [ - "config.get(\"number-of-download-threads\")" + "config.get(\"url-download-timeout\")" ] }, { @@ -215,7 +215,7 @@ { "data": { "text/plain": [ - "6" + "5" ] }, "execution_count": 5, @@ -224,8 +224,8 @@ } ], "source": [ - "config.set(\"number-of-download-threads\", 6)\n", - "config.get(\"number-of-download-threads\")" + "config.set(\"url-download-timeout\", 5)\n", + "config.get(\"url-download-timeout\")" ] }, { @@ -246,15 +246,15 @@ "name": "stdout", "output_type": "stream", "text": [ - "7\n", - "60\n" + "10\n", + "True\n" ] } ], "source": [ - "config.set({\"number-of-download-threads\": 7, \"url-download-timeout\": \"1m\"})\n", - "print(config.get(\"number-of-download-threads\"))\n", - "print(config.get(\"url-download-timeout\"))" + "config.set({\"url-download-timeout\": 10, \"check-out-of-date-urls\": True})\n", + "print(config.get(\"url-download-timeout\"))\n", + "print(config.get(\"check-out-of-date-urls\"))" ] }, { @@ -281,15 +281,15 @@ "name": "stdout", "output_type": "stream", "text": [ - "8\n", - "120\n" + "10\n", + "True\n" ] } ], "source": [ - "config.set(number_of_download_threads=8, url_download_timeout=\"2m\")\n", - "print(config.get(\"number-of-download-threads\"))\n", - "print(config.get(\"url-download-timeout\"))" + "config.set(url_download_timeout=10, check_out_of_date_urls=True)\n", + "print(config.get(\"url-download-timeout\"))\n", + "print(config.get(\"check-out-of-date-urls\"))" ] }, { @@ -332,16 +332,16 @@ "name": "stdout", "output_type": "stream", "text": [ - "8\n", + "10\n", "12\n" ] } ], "source": [ "with config.temporary():\n", - " print(config.get(\"number-of-download-threads\"))\n", - " config.set(\"number-of-download-threads\", 12)\n", - " print(config.get(\"number-of-download-threads\"))" + " print(config.get(\"url-download-timeout\"))\n", + " config.set(\"url-download-timeout\", 12)\n", + " print(config.get(\"url-download-timeout\"))" ] }, { @@ -361,7 +361,7 @@ { "data": { "text/plain": [ - "8" + "10" ] }, "execution_count": 9, @@ -370,7 +370,7 @@ } ], "source": [ - "config.get(\"number-of-download-threads\")" + "config.get(\"url-download-timeout\")" ] }, { @@ -391,16 +391,16 @@ "name": "stdout", "output_type": "stream", "text": [ - "11\n", - "8\n" + "12\n", + "10\n" ] } ], "source": [ - "with config.temporary(\"number-of-download-threads\", 11):\n", - " print(config.get(\"number-of-download-threads\"))\n", + "with config.temporary(\"url-download-timeout\", 12):\n", + " print(config.get(\"url-download-timeout\"))\n", "\n", - "print(config.get(\"number-of-download-threads\"))" + "print(config.get(\"url-download-timeout\"))" ] }, { @@ -408,7 +408,7 @@ "id": "313fbf8f-a540-449e-b340-5c46014d931c", "metadata": {}, "source": [ - "#### Reset to defaults" + "#### Resetting to defaults" ] }, { @@ -444,17 +444,15 @@ "output_type": "stream", "text": [ "12\n", - "5\n" + "10\n" ] } ], "source": [ - "with config.temporary():\n", - " config.set(\"number-of-download-threads\", 12)\n", - " print(config.get(\"number-of-download-threads\"))\n", - " config.reset()\n", - " print(config.get(\"number-of-download-threads\"))\n", - " " + "with config.temporary(\"url-download-timeout\", 12):\n", + " print(config.get(\"url-download-timeout\"))\n", + "\n", + "print(config.get(\"url-download-timeout\"))" ] }, { diff --git a/docs/examples/config_env_vars.ipynb b/docs/examples/config_env_vars.ipynb index ebace542..32f4e839 100644 --- a/docs/examples/config_env_vars.ipynb +++ b/docs/examples/config_env_vars.ipynb @@ -105,7 +105,7 @@ { "data": { "text/plain": [ - "5" + "30" ] }, "execution_count": 3, @@ -114,7 +114,7 @@ } ], "source": [ - "config.get(\"number-of-download-threads\")" + "config.get(\"url-download-timeout\")" ] }, { @@ -148,12 +148,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "env: EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS=26\n" + "env: EARTHKIT_DATA_URL_DOWNLOAD_TIMEOUT=26\n" ] } ], "source": [ - "%env EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS=26" + "%env EARTHKIT_DATA_URL_DOWNLOAD_TIMEOUT=26" ] }, { @@ -180,7 +180,7 @@ } ], "source": [ - "config.get(\"number-of-download-threads\")" + "config.get(\"url-download-timeout\")" ] }, { @@ -214,7 +214,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/Users/cgr/git/earthkit-data/src/earthkit/data/core/config.py:406: UserWarning: Config option 'number-of-download-threads' is also set by environment variable 'EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS'.The environment variable takes precedence and its value is returned when calling get().\n", + "/Users/cgr/git/earthkit-data/src/earthkit/data/core/config.py:407: UserWarning: Config option 'url-download-timeout' is also set by environment variable 'EARTHKIT_DATA_URL_DOWNLOAD_TIMEOUT'.The environment variable takes precedence and its value is returned when calling get().\n", " warnings.warn(msg)\n" ] }, @@ -230,8 +230,8 @@ } ], "source": [ - "config.set(\"number-of-download-threads\", 10)\n", - "config.get(\"number-of-download-threads\")" + "config.set(\"url-download-timeout\", 10)\n", + "config.get(\"url-download-timeout\")" ] }, { @@ -264,8 +264,7 @@ { "data": { "text/plain": [ - "{'number-of-download-threads': ('EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS',\n", - " '26')}" + "{'url-download-timeout': ('EARTHKIT_DATA_URL_DOWNLOAD_TIMEOUT', '26')}" ] }, "execution_count": 7, @@ -315,7 +314,7 @@ " vertical-align: top;\n", " text-align: left !important;\n", "}\n", - "
NameValueDefault
cache-policy'off''off'
check-out-of-date-urlsTrueTrue
download-out-of-date-urlsFalseFalse
grib-field-policy'persistent''persistent'
grib-handle-cache-size11
grib-handle-policy'cache''cache'
maximum-cache-disk-usage'95%''95%'
maximum-cache-sizeNoneNone
number-of-download-threadsEARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS='26'
(10)
5
reader-type-check-bytes6464
temporary-cache-directory-rootNoneNone
temporary-directory-rootNoneNone
url-download-timeout'30s''30s'
use-grib-metadata-cacheTrueTrue
use-message-position-index-cacheFalseFalse
use-standalone-mars-client-when-availableTrueTrue
user-cache-directory'/var/folders/93/w0p869rx17q98wxk83gn9ys40000gn/T/earthkit-data-cgr''/var/folders/93/w0p869rx17q98wxk83gn9ys40000gn/T/earthkit-data-cgr'
version'0.11.5.dev2+g384bbb0.d20241209'''
" + "
NameValueDefault
cache-policy'off''off'
check-out-of-date-urlsTrueTrue
download-out-of-date-urlsFalseFalse
grib-field-policy'persistent''persistent'
grib-handle-cache-size11
grib-handle-policy'cache''cache'
maximum-cache-disk-usage'95%''95%'
maximum-cache-sizeNoneNone
number-of-download-threads55
reader-type-check-bytes6464
temporary-cache-directory-rootNoneNone
temporary-directory-rootNoneNone
url-download-timeoutEARTHKIT_DATA_URL_DOWNLOAD_TIMEOUT='26'
(10)
'30s'
use-grib-metadata-cacheTrueTrue
use-message-position-index-cacheFalseFalse
use-standalone-mars-client-when-availableTrueTrue
user-cache-directory'/var/folders/93/w0p869rx17q98wxk83gn9ys40000gn/T/earthkit-data-cgr''/var/folders/93/w0p869rx17q98wxk83gn9ys40000gn/T/earthkit-data-cgr'
version'0.11.5.dev2+g384bbb0.d20241209'''
" ], "text/plain": [ "cache-policy: (off, off)\n", @@ -326,11 +325,11 @@ "grib-handle-policy: (cache, cache)\n", "maximum-cache-disk-usage: (95%, 95%)\n", "maximum-cache-size: (None, None)\n", - "number-of-download-threads: (EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS=26, 10, 5)\n", + "number-of-download-threads: (5, 5)\n", "reader-type-check-bytes: (64, 64)\n", "temporary-cache-directory-root: (None, None)\n", "temporary-directory-root: (None, None)\n", - "url-download-timeout: (30s, 30s)\n", + "url-download-timeout: (EARTHKIT_DATA_URL_DOWNLOAD_TIMEOUT=26, 10, 30s)\n", "use-grib-metadata-cache: (True, True)\n", "use-message-position-index-cache: (False, False)\n", "use-standalone-mars-client-when-available: (True, True)\n", diff --git a/docs/guide/caching.rst b/docs/guide/caching.rst index cc340064..5993c04f 100644 --- a/docs/guide/caching.rst +++ b/docs/guide/caching.rst @@ -15,7 +15,7 @@ Please note that the earthkit-data cache configuration is managed through the :d .. warning:: - By default the caching is disabled, i.e. the :ref:`cache-policy ` is "off". + By default the caching is disabled, i.e. the :ref:`cache-policy ` is :ref:`off `. .. warning:: @@ -206,7 +206,7 @@ Examples: >>> cache.policy.name 'user' >>> cache.directory() - '/var/folders/ng/g0zkhc2s42xbslpsywwp_26m0000gn/T/earthkit-data-cgr' + '/var/folders/ng/g0zkhc2s42xbslpsywwp_26m0000gn/T/earthkit-data-myusername' >>> cache.size() 846785699 >>> cache.summary_dump_database() diff --git a/docs/guide/config.rst b/docs/guide/config.rst index 08a70910..d5eee5af 100644 --- a/docs/guide/config.rst +++ b/docs/guide/config.rst @@ -58,8 +58,8 @@ We can create a temporary configuration (as a context manager) as a copy of the Output:: - 8 - 12 + 30 + 5 11 .. warning:: @@ -93,47 +93,46 @@ Environment variables Each configuration parameter has a corresponding environment variable (see the full list :ref:`here `). When an environment variable is set, it takes precedence over the config parameter as the following examples show. -First, let us assume that the value of ``number-of-download-threads`` is 5 in the config file and no environment variable is set. +First, let us assume that the value of ``url-download-timeout`` is 5 in the config file and no environment variable is set. .. code-block:: python >>> from earthkit.data import config - >>> config.get("number-of-download-threads") - 5 + >>> config.get("url-download-timeout") + 30 -Then, set the environment variable ``EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS``. +Then, set the environment variable ``EARTHKIT_DATA_URL_DOWNLOAD_TIMEOUT``. .. code-block:: bash - export EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS=26 - + export EARTHKIT_REGRID_URL_DOWNLOAD_TIMEOUT=5 .. code-block:: python >>> from earthkit.data import config - >>> config.get("number-of-download-threads") - 26 + >>> config.get("url-download-timeout") + 5 >>> config.env() - {'number-of-download-threads': ('EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS', '26')} - >>> config.set("number-of-download-threads", 10) - UserWarning: Config option 'number-of-download-threads' is also set by environment variable - 'EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS'.The environment variable takes precedence and + {'url-download-timeout': ('EARTHKIT_DATA_URL_DOWNLOAD_TIMEOUT', '5')} + >>> config.set("url-download-timeout", 10) + UserWarning: Config option 'url-download-timeout' is also set by environment variable + 'EARTHKIT_DATA_URL_DOWNLOAD_TIMEOUT'.The environment variable takes precedence and its value is returned when calling get(). Still, the value set here will be saved to the config file. - >>> config.get("number-of-download-threads") - 26 + >>> config.get("url-download-timeout") + 5 Finally, unset the environment variable and check the config value again, which is now the value from the config file. .. code-block:: bash - unset EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS + unset EARTHKIT_DATA_URL_DOWNLOAD_TIMEOUT .. code-block:: python >>> from earthkit.data import config - >>> config.get("number-of-download-threads") + >>> config.get("url-download-timeout") 10 diff --git a/docs/guide/include/config-set.py b/docs/guide/include/config-set.py index 1992a373..9eb97178 100644 --- a/docs/guide/include/config-set.py +++ b/docs/guide/include/config-set.py @@ -3,14 +3,14 @@ # Change the location of the user defined cache: earthkit.data.config.set("user-cache-directory", "/big-disk/earthkit-data-cache") -# Change number of download threads -earthkit.data.config.set("number-of-download-threads", 7) +# Change download timeout +earthkit.data.config.set("url-download-timeout", "1m") # Multiple values can be set together. The argument list # can be a dictionary: -earthkit.data.config.set({"number-of-download-threads": 7, "url-download-timeout": "1m"}) +earthkit.data.config.set({"url-download-timeout": "1m", "check-out-of-date-urls": True}) # Alternatively, we can use keyword arguments. However, because # the “-” character is not allowed in variable names in Python we have # to replace “-” with “_” in all the keyword arguments: -earthkit.data.config.set(number_of_download_threads=8, url_download_timeout="2m") +earthkit.data.config.set(url_download_timeout="1m", check_out_of_date_urls=True) diff --git a/docs/guide/include/config-temporary.py b/docs/guide/include/config-temporary.py index a7b94d82..9d9d3854 100644 --- a/docs/guide/include/config-temporary.py +++ b/docs/guide/include/config-temporary.py @@ -1,11 +1,11 @@ import earthkit.data -print(earthkit.data.config.get("number-of-download-threads")) +print(earthkit.data.config.get("url-download-timeout")) with earthkit.data.config.temporary(): - earthkit.data.config.set("number-of-download-threads", 12) - print(earthkit.data.config.get("number-of-download-threads")) + earthkit.data.config.set("url-download-timeout", 5) + print(earthkit.data.config.get("url-download-timeout")) # Temporary config can also be created with arguments: -with earthkit.data.config.temporary("number-of-download-threads", 11): - print(earthkit.data.config.get("number-of-download-threads")) +with earthkit.data.config.temporary("url-download-timeout", 11): + print(earthkit.data.config.get("url-download-timeout")) diff --git a/tests/core/test_cache.py b/tests/core/test_cache.py index f4489af5..cc8151e4 100644 --- a/tests/core/test_cache.py +++ b/tests/core/test_cache.py @@ -14,8 +14,8 @@ import pytest from earthkit.data import cache +from earthkit.data import config from earthkit.data import from_source -from earthkit.data import settings from earthkit.data.core.caching import cache_file from earthkit.data.core.temporary import temp_directory from earthkit.data.testing import earthkit_examples_file @@ -58,19 +58,19 @@ def touch(target, args): @pytest.mark.cache def test_cache_1(): - with settings.temporary(): - settings.set("maximum-cache-disk-usage", "99%") + with config.temporary(): + config.set("maximum-cache-disk-usage", "99%") cache.purge(matcher=lambda e: ["owner"] == "test_cache") - check_cache_files(settings.get("user-cache-directory")) + check_cache_files(config.get("user-cache-directory")) # 1GB ram disk on MacOS (blocks of 512 bytes) # diskutil erasevolume HFS+ "RAMDisk" `hdiutil attach -nomount ram://2097152` @pytest.mark.skipif(not os.path.exists("/Volumes/RAMDisk"), reason="No RAM disk") def test_cache_4(): - with settings.temporary(): - settings.set("cache-directory", "/Volumes/RAMDisk/earthkit_data") - settings.set("maximum-cache-disk-usage", "90%") + with config.temporary(): + config.set("cache-directory", "/Volumes/RAMDisk/earthkit_data") + config.set("maximum-cache-disk-usage", "90%") for n in range(10): from_source("dummy-source", "zeros", size=100 * 1024 * 1024, n=n) @@ -78,10 +78,10 @@ def test_cache_4(): def test_cache_policy(): with temp_directory() as user_dir: # cache = user dir - with settings.temporary(): - settings.set({"cache-policy": "user", "user-cache-directory": user_dir}) - assert settings.get("cache-policy") == "user" - assert settings.get("user-cache-directory") == user_dir + with config.temporary(): + config.set({"cache-policy": "user", "user-cache-directory": user_dir}) + assert config.get("cache-policy") == "user" + assert config.get("user-cache-directory") == user_dir assert cache.policy.managed() is True cache_dir = cache.policy.directory() assert cache_dir == user_dir @@ -89,17 +89,17 @@ def test_cache_policy(): check_cache_files(cache_dir) # cache = temporary with auto generated path - with settings.temporary({"cache-policy": "temporary", "temporary-cache-directory-root": None}): - assert settings.get("cache-policy") == "temporary" - assert settings.get("temporary-cache-directory-root") is None + with config.temporary({"cache-policy": "temporary", "temporary-cache-directory-root": None}): + assert config.get("cache-policy") == "temporary" + assert config.get("temporary-cache-directory-root") is None assert cache.policy.managed() is True cache_dir = cache.policy.directory() assert os.path.exists(cache_dir) check_cache_files(cache_dir) # cache = user dir (again) - assert settings.get("cache-policy") == "user" - assert settings.get("user-cache-directory") == user_dir + assert config.get("cache-policy") == "user" + assert config.get("user-cache-directory") == user_dir assert cache.policy.managed() is True cache_dir = cache.policy.directory() assert cache_dir == user_dir @@ -108,14 +108,14 @@ def test_cache_policy(): # cache = temporary with user defined root path with temp_directory() as root_dir: - with settings.temporary( + with config.temporary( { "cache-policy": "temporary", "temporary-cache-directory-root": root_dir, } ): - assert settings.get("cache-policy") == "temporary" - assert settings.get("temporary-cache-directory-root") == root_dir + assert config.get("cache-policy") == "temporary" + assert config.get("temporary-cache-directory-root") == root_dir assert cache.policy.managed() is True cache_dir = cache.policy.directory() assert os.path.exists(cache_dir) @@ -123,9 +123,9 @@ def test_cache_policy(): check_cache_files(cache_dir) # cache = off - with settings.temporary("cache-policy", "off"): - assert settings.get("cache-policy") == "off" - assert settings.get("user-cache-directory") == user_dir + with config.temporary("cache-policy", "off"): + assert config.get("cache-policy") == "off" + assert config.get("user-cache-directory") == user_dir assert cache.policy.managed() is False cache_dir = cache.policy.directory() @@ -133,8 +133,8 @@ def test_cache_policy(): check_cache_files(cache_dir, managed=False) # cache = user dir (again) - assert settings.get("cache-policy") == "user" - assert settings.get("user-cache-directory") == user_dir + assert config.get("cache-policy") == "user" + assert config.get("user-cache-directory") == user_dir assert cache.policy.managed() is True cache_dir = cache.policy.directory() assert cache_dir == user_dir @@ -143,7 +143,7 @@ def test_cache_policy(): def test_url_source_no_cache(): - with settings.temporary("cache-policy", "off"): + with config.temporary("cache-policy", "off"): ds = from_source( "url", "https://get.ecmwf.int/repository/test-data/earthkit-data/examples/test.grib", @@ -152,7 +152,7 @@ def test_url_source_no_cache(): def test_grib_no_cache(): - with settings.temporary("cache-policy", "off"): + with config.temporary("cache-policy", "off"): ds = from_source("file", earthkit_examples_file("tuv_pl.grib")) assert len(ds) == 18 @@ -163,7 +163,7 @@ def test_grib_no_cache(): @pytest.mark.parametrize("index_cache", [True, False]) def test_grib_offset_index_cache(index_cache): s = {"cache-policy": "temporary", "use-message-position-index-cache": index_cache} - with settings.temporary(s): + with config.temporary(s): ds = from_source("file", earthkit_examples_file("tuv_pl.grib")) assert len(ds) == 18 @@ -263,12 +263,12 @@ def test_cache_zip_file_changed_modtime(): @pytest.mark.parametrize("policy", ["user", "temporary"]) def test_cache_management(policy): with temp_directory() as tmp_dir_path: - with settings.temporary(): + with config.temporary(): if policy == "user": - settings.set({"cache-policy": "user", "user-cache-directory": tmp_dir_path}) + config.set({"cache-policy": "user", "user-cache-directory": tmp_dir_path}) assert cache.directory() == tmp_dir_path elif policy == "temporary": - settings.set( + config.set( { "cache-policy": "temporary", "temporary-cache-directory-root": tmp_dir_path, @@ -302,7 +302,7 @@ def test_cache_management(policy): latest_path = x["path"] # limit cache size so that only one file should remain - settings.set({"maximum-cache-size": "12K", "maximum-cache-disk-usage": None}) + config.set({"maximum-cache-size": "12K", "maximum-cache-disk-usage": None}) num, size = cache.summary_dump_database() assert num == 1 diff --git a/tests/core/test_cache_with_settings.py b/tests/core/test_cache_with_settings.py new file mode 100644 index 00000000..78eecdf3 --- /dev/null +++ b/tests/core/test_cache_with_settings.py @@ -0,0 +1,373 @@ +#!/usr/bin/env python3 + +# (C) Copyright 2020 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# + +import os + +import pytest + +from earthkit.data import cache +from earthkit.data import from_source +from earthkit.data import settings +from earthkit.data.core.caching import cache_file +from earthkit.data.core.temporary import temp_directory +from earthkit.data.testing import earthkit_examples_file + + +def check_cache_files(dir_path, managed=True): + def touch(target, args): + assert args["foo"] in (1, 2) + with open(target, "w"): + pass + + path1 = cache_file( + "test_cache", + touch, + {"foo": 1}, + extension=".test", + ) + + path2 = cache_file( + "test_cache", + touch, + {"foo": 2}, + extension=".test", + ) + + assert os.path.exists(path1) + assert os.path.exists(path2) + assert os.path.dirname(path1) == dir_path + assert os.path.dirname(path1) == dir_path + assert path1 != path2 + + if managed: + cnt = 0 + for f in cache.entries(): + if f["owner"] == "test_cache": + cnt += 1 + + assert cnt == 2 + + +@pytest.mark.cache +def test_cache_1_setting(): + with settings.temporary(): + settings.set("maximum-cache-disk-usage", "99%") + cache.purge(matcher=lambda e: ["owner"] == "test_cache") + check_cache_files(settings.get("user-cache-directory")) + + +# 1GB ram disk on MacOS (blocks of 512 bytes) +# diskutil erasevolume HFS+ "RAMDisk" `hdiutil attach -nomount ram://2097152` +@pytest.mark.skipif(not os.path.exists("/Volumes/RAMDisk"), reason="No RAM disk") +def test_cache_4_setting(): + with settings.temporary(): + settings.set("cache-directory", "/Volumes/RAMDisk/earthkit_data") + settings.set("maximum-cache-disk-usage", "90%") + for n in range(10): + from_source("dummy-source", "zeros", size=100 * 1024 * 1024, n=n) + + +def test_cache_policy_setting(): + with temp_directory() as user_dir: + # cache = user dir + with settings.temporary(): + settings.set({"cache-policy": "user", "user-cache-directory": user_dir}) + assert settings.get("cache-policy") == "user" + assert settings.get("user-cache-directory") == user_dir + assert cache.policy.managed() is True + cache_dir = cache.policy.directory() + assert cache_dir == user_dir + assert os.path.exists(cache_dir) + check_cache_files(cache_dir) + + # cache = temporary with auto generated path + with settings.temporary({"cache-policy": "temporary", "temporary-cache-directory-root": None}): + assert settings.get("cache-policy") == "temporary" + assert settings.get("temporary-cache-directory-root") is None + assert cache.policy.managed() is True + cache_dir = cache.policy.directory() + assert os.path.exists(cache_dir) + check_cache_files(cache_dir) + + # cache = user dir (again) + assert settings.get("cache-policy") == "user" + assert settings.get("user-cache-directory") == user_dir + assert cache.policy.managed() is True + cache_dir = cache.policy.directory() + assert cache_dir == user_dir + assert os.path.exists(cache_dir) + check_cache_files(cache_dir) + + # cache = temporary with user defined root path + with temp_directory() as root_dir: + with settings.temporary( + { + "cache-policy": "temporary", + "temporary-cache-directory-root": root_dir, + } + ): + assert settings.get("cache-policy") == "temporary" + assert settings.get("temporary-cache-directory-root") == root_dir + assert cache.policy.managed() is True + cache_dir = cache.policy.directory() + assert os.path.exists(cache_dir) + os.path.dirname(cache_dir) == root_dir + check_cache_files(cache_dir) + + # cache = off + with settings.temporary("cache-policy", "off"): + assert settings.get("cache-policy") == "off" + assert settings.get("user-cache-directory") == user_dir + assert cache.policy.managed() is False + + cache_dir = cache.policy.directory() + assert os.path.exists(cache_dir) + check_cache_files(cache_dir, managed=False) + + # cache = user dir (again) + assert settings.get("cache-policy") == "user" + assert settings.get("user-cache-directory") == user_dir + assert cache.policy.managed() is True + cache_dir = cache.policy.directory() + assert cache_dir == user_dir + assert os.path.exists(cache_dir) + check_cache_files(cache_dir) + + +def test_url_source_no_cache_setting(): + with settings.temporary("cache-policy", "off"): + ds = from_source( + "url", + "https://get.ecmwf.int/repository/test-data/earthkit-data/examples/test.grib", + ) + assert len(ds) == 2 + + +def test_grib_no_cache_setting(): + with settings.temporary("cache-policy", "off"): + ds = from_source("file", earthkit_examples_file("tuv_pl.grib")) + assert len(ds) == 18 + + f = ds[3] + assert f.metadata("param") == "t" + + +@pytest.mark.parametrize("index_cache", [True, False]) +def test_grib_offset_index_cache_setting(index_cache): + s = {"cache-policy": "temporary", "use-message-position-index-cache": index_cache} + with settings.temporary(s): + ds = from_source("file", earthkit_examples_file("tuv_pl.grib")) + assert len(ds) == 18 + + f = ds[3] + assert f.metadata("param") == "t", f"index-cache={index_cache}" + + +# See github #155. This test can hang so we must set a timeout. +@pytest.mark.no_cache_init +@pytest.mark.timeout(20) +def test_cache_with_log_debug_setting(caplog): + import logging + + # the cache must not be initialised at this point + assert cache._policy is None + assert cache._manager is None + + caplog.set_level(logging.DEBUG) + LOG = logging.getLogger(__name__) + + class A: + def __repr__(self): + d = cache.directory() + return d + + a = A() + LOG.debug(f"dir {a}") + # NOTE: if we use "%s" formatting e.g. "LOG.debug("dir %s", a)" + # the problem still occurs! + + +@pytest.mark.cache +def test_cache_zip_file_overwritten_1_setting(): + with temp_directory() as tmp_dir: + import shutil + import zipfile + + # copy input data to work dir + grb1_path = os.path.join(tmp_dir, "test.grib") + shutil.copyfile(earthkit_examples_file("test.grib"), grb1_path) + + grb2_path = os.path.join(tmp_dir, "test6.grib") + shutil.copyfile(earthkit_examples_file("test6.grib"), grb2_path) + + # first pass + zip_path = os.path.join(tmp_dir, "test.zip") + with zipfile.ZipFile(zip_path, "w") as zip_object: + zip_object.write(grb1_path) + + ds = from_source("file", zip_path) + assert len(ds) == 2 + ds_path = ds.path + + # second pass - same zip file, the grib should be read + # from the cache + ds1 = from_source("file", zip_path) + assert len(ds1) == 2 + assert ds1.path == ds_path + + # third pass - same zipfile path with different contents + with zipfile.ZipFile(zip_path, "w") as zip_object: + zip_object.write(grb2_path) + + ds2 = from_source("file", zip_path) + assert len(ds2) == 6 + assert ds2.path != ds_path + + +@pytest.mark.cache +def test_cache_zip_file_changed_modtime_setting(): + with temp_directory() as tmp_dir: + import shutil + import zipfile + + # copy input data to work dir + grb1_path = os.path.join(tmp_dir, "test.grib") + shutil.copyfile(earthkit_examples_file("test.grib"), grb1_path) + + # first pass + zip_path = os.path.join(tmp_dir, "test.zip") + with zipfile.ZipFile(zip_path, "w") as zip_object: + zip_object.write(grb1_path) + + ds = from_source("file", zip_path) + assert len(ds) == 2 + ds_path = ds.path + + # second pass - changed modtime + st = os.stat(zip_path) + m_time = (st.st_atime_ns + 10, st.st_mtime_ns + 10) + os.utime(zip_path, ns=m_time) + ds2 = from_source("file", zip_path) + assert len(ds2) == 2 + assert ds2.path != ds_path + + +@pytest.mark.parametrize("policy", ["user", "temporary"]) +def test_cache_management_setting(policy): + with temp_directory() as tmp_dir_path: + with settings.temporary(): + if policy == "user": + settings.set({"cache-policy": "user", "user-cache-directory": tmp_dir_path}) + assert cache.directory() == tmp_dir_path + elif policy == "temporary": + settings.set( + { + "cache-policy": "temporary", + "temporary-cache-directory-root": tmp_dir_path, + } + ) + assert os.path.dirname(cache.directory()) == tmp_dir_path + else: + assert False + + data_size = 10 * 1024 + + # create 3 files existing only in the cache + r = [] + for n in range(3): + r.append(from_source("dummy-source", "zeros", size=data_size, n=n)) + + for ds in r: + assert os.path.exists(ds.path) + assert os.path.dirname(ds.path) == cache.directory() + + # check cache contents + num, size = cache.summary_dump_database() + assert num == 3 + assert size == 3 * data_size + assert len(cache.entries()) == 3 + + for i, x in enumerate(cache.entries()): + assert x["size"] == data_size + assert x["owner"] == "dummy-source" + assert x["args"] == {"size": data_size, "n": i} + latest_path = x["path"] + + # limit cache size so that only one file should remain + settings.set({"maximum-cache-size": "12K", "maximum-cache-disk-usage": None}) + + num, size = cache.summary_dump_database() + assert num == 1 + assert size == data_size + assert len(cache.entries()) == 1 + for x in cache.entries(): + assert x["size"] == data_size + assert x["owner"] == "dummy-source" + assert x["args"] == {"size": data_size, "n": 2} + x["path"] == latest_path + break + + # purge the cache + r = None + cache.purge() + num, size = cache.summary_dump_database() + assert num == 0 + assert size == 0 + assert len(cache.entries()) == 0 + + +@pytest.mark.cache +def test_cache_force_setting(): + import time + + def _force_true(args, path, owner_data): + time.sleep(0.001) + return True + + def _force_false(args, path, owner_data): + time.sleep(0.001) + return False + + data_size = 10 * 1024 + ds = from_source("dummy-source", "zeros", size=data_size, n=0) + st = os.stat(ds.path) + m_time_ref = st.st_mtime_ns + + ds1 = from_source("dummy-source", "zeros", size=data_size, n=0) + assert ds1.path == ds.path + st = os.stat(ds1.path) + m_time = st.st_mtime_ns + assert m_time == m_time_ref + + ds2 = from_source("dummy-source", "zeros", force=_force_false, size=data_size, n=0) + assert ds2.path == ds.path + st = os.stat(ds2.path) + m_time = st.st_mtime_ns + assert m_time == m_time_ref + + ds3 = from_source("dummy-source", "zeros", force=_force_true, size=data_size, n=0) + assert ds3.path == ds.path + st = os.stat(ds3.path) + m_time = st.st_mtime_ns + assert m_time != m_time_ref + m_time_ref = m_time + + ds4 = from_source("dummy-source", "zeros", size=data_size, n=0) + assert ds4.path == ds.path + st = os.stat(ds4.path) + m_time = st.st_mtime_ns + assert m_time == m_time_ref + + +if __name__ == "__main__": + from earthkit.data.testing import main + + main(__file__) diff --git a/tests/core/test_config.py b/tests/core/test_config.py index 20a1f37d..f5f91750 100644 --- a/tests/core/test_config.py +++ b/tests/core/test_config.py @@ -34,7 +34,7 @@ def read_config_yaml(path=os.path.expanduser("~/.config/earthkit/data/config.yam @pytest.mark.parametrize( "param,default_value,new_value", [ - ("number-of-download-threads", 5, 2), + ("url-download-timeout", 30, 5), ], ) def test_configs_params_set_reset(param, default_value, new_value): @@ -62,7 +62,7 @@ def test_config_invalid(): # invalid value with pytest.raises(ValueError): - config.set("number-of-download-threads", "A") + config.set("url-download-timeout", "A") @pytest.mark.parametrize( @@ -128,51 +128,51 @@ def test_config_set_cache_numbers(): def test_config_set_multi(): with config.temporary(): - config.set("number-of-download-threads", 7) - assert config.get("number-of-download-threads") == 7 + config.set("url-download-timeout", 7) + assert config.get("url-download-timeout") == 7 - config.set({"number-of-download-threads": 2, "url-download-timeout": 21}) - assert config.get("number-of-download-threads") == 2 - assert config.get("url-download-timeout") == 21 + config.set({"url-download-timeout": 2, "check-out-of-date-urls": False}) + assert config.get("url-download-timeout") == 2 + assert not config.get("check-out-of-date-urls") - config.set(number_of_download_threads=3, url_download_timeout=11) - assert config.get("number-of-download-threads") == 3 + config.set(url_download_timeout=11, check_out_of_date_urls=False) assert config.get("url-download-timeout") == 11 + assert not config.get("check-out-of-date-urls") with pytest.raises(KeyError): - config.set({"number-of-download-threads": 2, "-invalid-": 21}) + config.set({"url-download-timeout": 2, "-invalid-": 21}) with pytest.raises(KeyError): - config.set(number_of_download_threads=3, __invalid__=11) + config.set(url_download_timeout=3, __invalid__=11) def test_config_temporary_single(): - with config.temporary("number-of-download-threads", 7): - assert config.get("number-of-download-threads") == 7 + with config.temporary("url-download-timeout", 7): + assert config.get("url-download-timeout") == 7 - with config.temporary({"number-of-download-threads": 7}): - assert config.get("number-of-download-threads") == 7 + with config.temporary({"url-download-timeout": 7}): + assert config.get("url-download-timeout") == 7 - with config.temporary(number_of_download_threads=7): - assert config.get("number-of-download-threads") == 7 + with config.temporary(url_download_timeout=7): + assert config.get("url-download-timeout") == 7 def test_config_temporary_multi(): - with config.temporary({"number-of-download-threads": 2, "url-download-timeout": 21}): - assert config.get("number-of-download-threads") == 2 - assert config.get("url-download-timeout") == 21 + with config.temporary({"url-download-timeout": 2, "check-out-of-date-urls": False}): + assert config.get("url-download-timeout") == 2 + assert not config.get("check-out-of-date-urls") - with config.temporary(number_of_download_threads=3, url_download_timeout=11): - assert config.get("number-of-download-threads") == 3 - assert config.get("url-download-timeout") == 11 + with config.temporary(url_download_timeout=3, check_out_of_date_urls=False): + assert config.get("url-download-timeout") == 3 + assert not config.get("check-out-of-date-urls") def test_config_temporary_nested(): - with config.temporary("number-of-download-threads", 7): - assert config.get("number-of-download-threads") == 7 - with config.temporary("number-of-download-threads", 10): - assert config.get("number-of-download-threads") == 10 - assert config.get("number-of-download-threads") == 7 + with config.temporary("url-download-timeout", 7): + assert config.get("url-download-timeout") == 7 + with config.temporary("url-download-timeout", 10): + assert config.get("url-download-timeout") == 10 + assert config.get("url-download-timeout") == 7 def test_config_temporary_autosave_1(): @@ -182,7 +182,7 @@ def test_config_temporary_autosave_1(): # we ensure that the configs are saved into the file config.save_as(config_file) - key = "number-of-download-threads" + key = "url-download-timeout" v_ori = config.autosave config.autosave = False @@ -209,7 +209,7 @@ def test_config_temporary_autosave_2(): # we ensure that the config is saved into the file config.save_as(config_file) - key = "number-of-download-threads" + key = "url-download-timeout" v_ori = config.autosave config.autosave = True @@ -240,18 +240,18 @@ def test_config_temporary_autosave_2(): "value,error", [("10000", None), (10000, None), ("1b", ValueError), ("A", ValueError)] ) def test_config_env(monkeypatch, value, error): - env_key = "EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS" + env_key = "EARTHKIT_DATA_URL_DOWNLOAD_TIMEOUT" monkeypatch.setenv(env_key, value) # v_ori = config.autosave # config.autosave = True if error is None: - v = config.get("number-of-download-threads") + v = config.get("url-download-timeout") assert v == 10000 else: with pytest.raises(error): - config.get("number-of-download-threads") + config.get("url-download-timeout") # config.autosave = v_ori diff --git a/tests/documentation/test_examples.py b/tests/documentation/test_examples.py index e5d21121..55f37c70 100644 --- a/tests/documentation/test_examples.py +++ b/tests/documentation/test_examples.py @@ -20,9 +20,9 @@ "xml2rst.py", "actions.py", "generate-examples-maps.py", - "settings-set.py", - "settings-reset.py", - "settings-temporary.py", + "config-set.py", + "config-reset.py", + "config-temporary.py", "xref.py", ] diff --git a/tests/grib/test_grib_cache.py b/tests/grib/test_grib_cache.py index c86533a4..2d8b4599 100644 --- a/tests/grib/test_grib_cache.py +++ b/tests/grib/test_grib_cache.py @@ -15,8 +15,8 @@ import pytest +from earthkit.data import config from earthkit.data import from_source -from earthkit.data import settings from earthkit.data.testing import earthkit_examples_file from earthkit.data.utils.diag import field_cache_diag @@ -65,7 +65,7 @@ def _check_diag(diag, ref): @pytest.mark.parametrize("serialise", [True, False]) def test_grib_cache_basic_file_patched(handle_cache_size, serialise, patch_metadata_cache): - with settings.temporary( + with config.temporary( { "grib-field-policy": "persistent", "grib-handle-policy": "cache", @@ -149,7 +149,7 @@ def test_grib_cache_basic_file_non_patched(): """This test is the same as test_grib_cache_basic but without the patch_metadata_cache fixture. So metadata cache hits and misses are not counted.""" - with settings.temporary( + with config.temporary( { "grib-field-policy": "persistent", "grib-handle-policy": "cache", @@ -221,7 +221,7 @@ def test_grib_cache_basic_file_non_patched(): @pytest.mark.parametrize("fl_type", ["file", "array", "memory"]) def test_grib_cache_basic_metadata_patched(serialise, fl_type, patch_metadata_cache): - with settings.temporary( + with config.temporary( { "grib-field-policy": "persistent", "grib-handle-policy": "cache", @@ -287,7 +287,7 @@ def test_grib_cache_basic_metadata_patched(serialise, fl_type, patch_metadata_ca def test_grib_cache_options_1(patch_metadata_cache): - with settings.temporary( + with config.temporary( { "grib-field-policy": "persistent", "grib-handle-policy": "temporary", @@ -367,7 +367,7 @@ def test_grib_cache_options_1(patch_metadata_cache): def test_grib_cache_options_2(patch_metadata_cache): - with settings.temporary( + with config.temporary( { "grib-field-policy": "persistent", "grib-handle-policy": "persistent", @@ -449,7 +449,7 @@ def test_grib_cache_options_2(patch_metadata_cache): def test_grib_cache_options_3(patch_metadata_cache): - with settings.temporary( + with config.temporary( { "grib-field-policy": "persistent", "grib-handle-policy": "cache", @@ -529,7 +529,7 @@ def test_grib_cache_options_3(patch_metadata_cache): def test_grib_cache_options_4(patch_metadata_cache): - with settings.temporary( + with config.temporary( { "grib-field-policy": "temporary", "grib-handle-policy": "temporary", @@ -626,7 +626,7 @@ def test_grib_cache_options_4(patch_metadata_cache): def test_grib_cache_options_5(patch_metadata_cache): - with settings.temporary( + with config.temporary( { "grib-field-policy": "temporary", "grib-handle-policy": "persistent", @@ -725,7 +725,7 @@ def test_grib_cache_options_5(patch_metadata_cache): def test_grib_cache_options_6(patch_metadata_cache): - with settings.temporary( + with config.temporary( { "grib-field-policy": "temporary", "grib-handle-policy": "cache", @@ -858,7 +858,7 @@ def test_grib_cache_file_use_kwargs_2(): @pytest.mark.parametrize("fl_type", ["file", "array", "memory"]) def test_grib_cache_metadata_use_kwargs_1(fl_type, patch_metadata_cache): - with settings.temporary( + with config.temporary( { "grib-field-policy": "persistent", "grib-handle-policy": "cache", @@ -900,7 +900,7 @@ def test_grib_cache_metadata_use_kwargs_1(fl_type, patch_metadata_cache): @pytest.mark.parametrize("fl_type", ["file", "array", "memory"]) def test_grib_cache_metadata_use_kwargs_2(fl_type, patch_metadata_cache): - with settings.temporary( + with config.temporary( { "grib-field-policy": "persistent", "grib-handle-policy": "cache", diff --git a/tests/readers/test_reader_padding_bytes.py b/tests/readers/test_reader_padding_bytes.py index 7234d6fe..43aeb3d3 100644 --- a/tests/readers/test_reader_padding_bytes.py +++ b/tests/readers/test_reader_padding_bytes.py @@ -11,8 +11,8 @@ import pytest +from earthkit.data import config from earthkit.data import from_source -from earthkit.data import settings from earthkit.data.core.temporary import temp_file from earthkit.data.testing import earthkit_examples_file @@ -56,7 +56,7 @@ def test_reader_padding_bytes(file_path, expected_type, expected_len): assert "file.File" in str(type(ds)) assert "unknown" in str(type(ds._reader)).lower() - with settings.temporary("reader-type-check-bytes", 100): + with config.temporary("reader-type-check-bytes", 100): ds = from_source("file", tmp.path) if hasattr(ds, "_reader"): assert expected_type in str(type(ds._reader)).lower() diff --git a/tests/sources/test_url.py b/tests/sources/test_url.py index 9e1cc8d4..08b8d3b0 100644 --- a/tests/sources/test_url.py +++ b/tests/sources/test_url.py @@ -14,8 +14,8 @@ import pytest +from earthkit.data import config from earthkit.data import from_source -from earthkit.data import settings from earthkit.data.core.temporary import temp_directory from earthkit.data.core.temporary import temp_file from earthkit.data.testing import earthkit_examples_file @@ -49,11 +49,11 @@ def load(): ) with temp_directory() as tmpdir: - with settings.temporary(): - settings.set("user-cache-directory", tmpdir) + with config.temporary(): + config.set("user-cache-directory", tmpdir) load() - settings.set("check-out-of-date-urls", False) + config.set("check-out-of-date-urls", False) with network_off(): load()