diff --git a/docs/examples/config.ipynb b/docs/examples/config.ipynb
index 16c488ce..9403af1a 100644
--- a/docs/examples/config.ipynb
+++ b/docs/examples/config.ipynb
@@ -173,7 +173,7 @@
{
"data": {
"text/plain": [
- "5"
+ "30"
]
},
"execution_count": 4,
@@ -182,7 +182,7 @@
}
],
"source": [
- "config.get(\"number-of-download-threads\")"
+ "config.get(\"url-download-timeout\")"
]
},
{
@@ -215,7 +215,7 @@
{
"data": {
"text/plain": [
- "6"
+ "5"
]
},
"execution_count": 5,
@@ -224,8 +224,8 @@
}
],
"source": [
- "config.set(\"number-of-download-threads\", 6)\n",
- "config.get(\"number-of-download-threads\")"
+ "config.set(\"url-download-timeout\", 5)\n",
+ "config.get(\"url-download-timeout\")"
]
},
{
@@ -246,15 +246,15 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "7\n",
- "60\n"
+ "10\n",
+ "True\n"
]
}
],
"source": [
- "config.set({\"number-of-download-threads\": 7, \"url-download-timeout\": \"1m\"})\n",
- "print(config.get(\"number-of-download-threads\"))\n",
- "print(config.get(\"url-download-timeout\"))"
+ "config.set({\"url-download-timeout\": 10, \"check-out-of-date-urls\": True})\n",
+ "print(config.get(\"url-download-timeout\"))\n",
+ "print(config.get(\"check-out-of-date-urls\"))"
]
},
{
@@ -281,15 +281,15 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "8\n",
- "120\n"
+ "10\n",
+ "True\n"
]
}
],
"source": [
- "config.set(number_of_download_threads=8, url_download_timeout=\"2m\")\n",
- "print(config.get(\"number-of-download-threads\"))\n",
- "print(config.get(\"url-download-timeout\"))"
+ "config.set(url_download_timeout=10, check_out_of_date_urls=True)\n",
+ "print(config.get(\"url-download-timeout\"))\n",
+ "print(config.get(\"check-out-of-date-urls\"))"
]
},
{
@@ -332,16 +332,16 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "8\n",
+ "10\n",
"12\n"
]
}
],
"source": [
"with config.temporary():\n",
- " print(config.get(\"number-of-download-threads\"))\n",
- " config.set(\"number-of-download-threads\", 12)\n",
- " print(config.get(\"number-of-download-threads\"))"
+ " print(config.get(\"url-download-timeout\"))\n",
+ " config.set(\"url-download-timeout\", 12)\n",
+ " print(config.get(\"url-download-timeout\"))"
]
},
{
@@ -361,7 +361,7 @@
{
"data": {
"text/plain": [
- "8"
+ "10"
]
},
"execution_count": 9,
@@ -370,7 +370,7 @@
}
],
"source": [
- "config.get(\"number-of-download-threads\")"
+ "config.get(\"url-download-timeout\")"
]
},
{
@@ -391,16 +391,16 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "11\n",
- "8\n"
+ "12\n",
+ "10\n"
]
}
],
"source": [
- "with config.temporary(\"number-of-download-threads\", 11):\n",
- " print(config.get(\"number-of-download-threads\"))\n",
+ "with config.temporary(\"url-download-timeout\", 12):\n",
+ " print(config.get(\"url-download-timeout\"))\n",
"\n",
- "print(config.get(\"number-of-download-threads\"))"
+ "print(config.get(\"url-download-timeout\"))"
]
},
{
@@ -408,7 +408,7 @@
"id": "313fbf8f-a540-449e-b340-5c46014d931c",
"metadata": {},
"source": [
- "#### Reset to defaults"
+ "#### Resetting to defaults"
]
},
{
@@ -444,17 +444,15 @@
"output_type": "stream",
"text": [
"12\n",
- "5\n"
+ "10\n"
]
}
],
"source": [
- "with config.temporary():\n",
- " config.set(\"number-of-download-threads\", 12)\n",
- " print(config.get(\"number-of-download-threads\"))\n",
- " config.reset()\n",
- " print(config.get(\"number-of-download-threads\"))\n",
- " "
+ "with config.temporary(\"url-download-timeout\", 12):\n",
+ " print(config.get(\"url-download-timeout\"))\n",
+ "\n",
+ "print(config.get(\"url-download-timeout\"))"
]
},
{
diff --git a/docs/examples/config_env_vars.ipynb b/docs/examples/config_env_vars.ipynb
index ebace542..32f4e839 100644
--- a/docs/examples/config_env_vars.ipynb
+++ b/docs/examples/config_env_vars.ipynb
@@ -105,7 +105,7 @@
{
"data": {
"text/plain": [
- "5"
+ "30"
]
},
"execution_count": 3,
@@ -114,7 +114,7 @@
}
],
"source": [
- "config.get(\"number-of-download-threads\")"
+ "config.get(\"url-download-timeout\")"
]
},
{
@@ -148,12 +148,12 @@
"name": "stdout",
"output_type": "stream",
"text": [
- "env: EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS=26\n"
+ "env: EARTHKIT_DATA_URL_DOWNLOAD_TIMEOUT=26\n"
]
}
],
"source": [
- "%env EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS=26"
+ "%env EARTHKIT_DATA_URL_DOWNLOAD_TIMEOUT=26"
]
},
{
@@ -180,7 +180,7 @@
}
],
"source": [
- "config.get(\"number-of-download-threads\")"
+ "config.get(\"url-download-timeout\")"
]
},
{
@@ -214,7 +214,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/Users/cgr/git/earthkit-data/src/earthkit/data/core/config.py:406: UserWarning: Config option 'number-of-download-threads' is also set by environment variable 'EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS'.The environment variable takes precedence and its value is returned when calling get().\n",
+ "/Users/cgr/git/earthkit-data/src/earthkit/data/core/config.py:407: UserWarning: Config option 'url-download-timeout' is also set by environment variable 'EARTHKIT_DATA_URL_DOWNLOAD_TIMEOUT'.The environment variable takes precedence and its value is returned when calling get().\n",
" warnings.warn(msg)\n"
]
},
@@ -230,8 +230,8 @@
}
],
"source": [
- "config.set(\"number-of-download-threads\", 10)\n",
- "config.get(\"number-of-download-threads\")"
+ "config.set(\"url-download-timeout\", 10)\n",
+ "config.get(\"url-download-timeout\")"
]
},
{
@@ -264,8 +264,7 @@
{
"data": {
"text/plain": [
- "{'number-of-download-threads': ('EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS',\n",
- " '26')}"
+ "{'url-download-timeout': ('EARTHKIT_DATA_URL_DOWNLOAD_TIMEOUT', '26')}"
]
},
"execution_count": 7,
@@ -315,7 +314,7 @@
" vertical-align: top;\n",
" text-align: left !important;\n",
"}\n",
- "
Name | Value | Default |
---|
cache-policy | 'off' | 'off' |
check-out-of-date-urls | True | True |
download-out-of-date-urls | False | False |
grib-field-policy | 'persistent' | 'persistent' |
grib-handle-cache-size | 1 | 1 |
grib-handle-policy | 'cache' | 'cache' |
maximum-cache-disk-usage | '95%' | '95%' |
maximum-cache-size | None | None |
number-of-download-threads | EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS='26' (10) | 5 |
reader-type-check-bytes | 64 | 64 |
temporary-cache-directory-root | None | None |
temporary-directory-root | None | None |
url-download-timeout | '30s' | '30s' |
use-grib-metadata-cache | True | True |
use-message-position-index-cache | False | False |
use-standalone-mars-client-when-available | True | True |
user-cache-directory | '/var/folders/93/w0p869rx17q98wxk83gn9ys40000gn/T/earthkit-data-cgr' | '/var/folders/93/w0p869rx17q98wxk83gn9ys40000gn/T/earthkit-data-cgr' |
version | '0.11.5.dev2+g384bbb0.d20241209' | '' |
"
+ "Name | Value | Default |
---|
cache-policy | 'off' | 'off' |
check-out-of-date-urls | True | True |
download-out-of-date-urls | False | False |
grib-field-policy | 'persistent' | 'persistent' |
grib-handle-cache-size | 1 | 1 |
grib-handle-policy | 'cache' | 'cache' |
maximum-cache-disk-usage | '95%' | '95%' |
maximum-cache-size | None | None |
number-of-download-threads | 5 | 5 |
reader-type-check-bytes | 64 | 64 |
temporary-cache-directory-root | None | None |
temporary-directory-root | None | None |
url-download-timeout | EARTHKIT_DATA_URL_DOWNLOAD_TIMEOUT='26' (10) | '30s' |
use-grib-metadata-cache | True | True |
use-message-position-index-cache | False | False |
use-standalone-mars-client-when-available | True | True |
user-cache-directory | '/var/folders/93/w0p869rx17q98wxk83gn9ys40000gn/T/earthkit-data-cgr' | '/var/folders/93/w0p869rx17q98wxk83gn9ys40000gn/T/earthkit-data-cgr' |
version | '0.11.5.dev2+g384bbb0.d20241209' | '' |
"
],
"text/plain": [
"cache-policy: (off, off)\n",
@@ -326,11 +325,11 @@
"grib-handle-policy: (cache, cache)\n",
"maximum-cache-disk-usage: (95%, 95%)\n",
"maximum-cache-size: (None, None)\n",
- "number-of-download-threads: (EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS=26, 10, 5)\n",
+ "number-of-download-threads: (5, 5)\n",
"reader-type-check-bytes: (64, 64)\n",
"temporary-cache-directory-root: (None, None)\n",
"temporary-directory-root: (None, None)\n",
- "url-download-timeout: (30s, 30s)\n",
+ "url-download-timeout: (EARTHKIT_DATA_URL_DOWNLOAD_TIMEOUT=26, 10, 30s)\n",
"use-grib-metadata-cache: (True, True)\n",
"use-message-position-index-cache: (False, False)\n",
"use-standalone-mars-client-when-available: (True, True)\n",
diff --git a/docs/guide/caching.rst b/docs/guide/caching.rst
index cc340064..5993c04f 100644
--- a/docs/guide/caching.rst
+++ b/docs/guide/caching.rst
@@ -15,7 +15,7 @@ Please note that the earthkit-data cache configuration is managed through the :d
.. warning::
- By default the caching is disabled, i.e. the :ref:`cache-policy ` is "off".
+ By default the caching is disabled, i.e. the :ref:`cache-policy ` is :ref:`off `.
.. warning::
@@ -206,7 +206,7 @@ Examples:
>>> cache.policy.name
'user'
>>> cache.directory()
- '/var/folders/ng/g0zkhc2s42xbslpsywwp_26m0000gn/T/earthkit-data-cgr'
+ '/var/folders/ng/g0zkhc2s42xbslpsywwp_26m0000gn/T/earthkit-data-myusername'
>>> cache.size()
846785699
>>> cache.summary_dump_database()
diff --git a/docs/guide/config.rst b/docs/guide/config.rst
index 08a70910..d5eee5af 100644
--- a/docs/guide/config.rst
+++ b/docs/guide/config.rst
@@ -58,8 +58,8 @@ We can create a temporary configuration (as a context manager) as a copy of the
Output::
- 8
- 12
+ 30
+ 5
11
.. warning::
@@ -93,47 +93,46 @@ Environment variables
Each configuration parameter has a corresponding environment variable (see the full list :ref:`here `). When an environment variable is set, it takes precedence over the config parameter as the following examples show.
-First, let us assume that the value of ``number-of-download-threads`` is 5 in the config file and no environment variable is set.
+First, let us assume that the value of ``url-download-timeout`` is 5 in the config file and no environment variable is set.
.. code-block:: python
>>> from earthkit.data import config
- >>> config.get("number-of-download-threads")
- 5
+ >>> config.get("url-download-timeout")
+ 30
-Then, set the environment variable ``EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS``.
+Then, set the environment variable ``EARTHKIT_DATA_URL_DOWNLOAD_TIMEOUT``.
.. code-block:: bash
- export EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS=26
-
+ export EARTHKIT_REGRID_URL_DOWNLOAD_TIMEOUT=5
.. code-block:: python
>>> from earthkit.data import config
- >>> config.get("number-of-download-threads")
- 26
+ >>> config.get("url-download-timeout")
+ 5
>>> config.env()
- {'number-of-download-threads': ('EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS', '26')}
- >>> config.set("number-of-download-threads", 10)
- UserWarning: Config option 'number-of-download-threads' is also set by environment variable
- 'EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS'.The environment variable takes precedence and
+ {'url-download-timeout': ('EARTHKIT_DATA_URL_DOWNLOAD_TIMEOUT', '5')}
+ >>> config.set("url-download-timeout", 10)
+ UserWarning: Config option 'url-download-timeout' is also set by environment variable
+ 'EARTHKIT_DATA_URL_DOWNLOAD_TIMEOUT'.The environment variable takes precedence and
its value is returned when calling get(). Still, the value set here will be
saved to the config file.
- >>> config.get("number-of-download-threads")
- 26
+ >>> config.get("url-download-timeout")
+ 5
Finally, unset the environment variable and check the config value again, which is now the value from the config file.
.. code-block:: bash
- unset EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS
+ unset EARTHKIT_DATA_URL_DOWNLOAD_TIMEOUT
.. code-block:: python
>>> from earthkit.data import config
- >>> config.get("number-of-download-threads")
+ >>> config.get("url-download-timeout")
10
diff --git a/docs/guide/include/config-set.py b/docs/guide/include/config-set.py
index 1992a373..9eb97178 100644
--- a/docs/guide/include/config-set.py
+++ b/docs/guide/include/config-set.py
@@ -3,14 +3,14 @@
# Change the location of the user defined cache:
earthkit.data.config.set("user-cache-directory", "/big-disk/earthkit-data-cache")
-# Change number of download threads
-earthkit.data.config.set("number-of-download-threads", 7)
+# Change download timeout
+earthkit.data.config.set("url-download-timeout", "1m")
# Multiple values can be set together. The argument list
# can be a dictionary:
-earthkit.data.config.set({"number-of-download-threads": 7, "url-download-timeout": "1m"})
+earthkit.data.config.set({"url-download-timeout": "1m", "check-out-of-date-urls": True})
# Alternatively, we can use keyword arguments. However, because
# the “-” character is not allowed in variable names in Python we have
# to replace “-” with “_” in all the keyword arguments:
-earthkit.data.config.set(number_of_download_threads=8, url_download_timeout="2m")
+earthkit.data.config.set(url_download_timeout="1m", check_out_of_date_urls=True)
diff --git a/docs/guide/include/config-temporary.py b/docs/guide/include/config-temporary.py
index a7b94d82..9d9d3854 100644
--- a/docs/guide/include/config-temporary.py
+++ b/docs/guide/include/config-temporary.py
@@ -1,11 +1,11 @@
import earthkit.data
-print(earthkit.data.config.get("number-of-download-threads"))
+print(earthkit.data.config.get("url-download-timeout"))
with earthkit.data.config.temporary():
- earthkit.data.config.set("number-of-download-threads", 12)
- print(earthkit.data.config.get("number-of-download-threads"))
+ earthkit.data.config.set("url-download-timeout", 5)
+ print(earthkit.data.config.get("url-download-timeout"))
# Temporary config can also be created with arguments:
-with earthkit.data.config.temporary("number-of-download-threads", 11):
- print(earthkit.data.config.get("number-of-download-threads"))
+with earthkit.data.config.temporary("url-download-timeout", 11):
+ print(earthkit.data.config.get("url-download-timeout"))
diff --git a/tests/core/test_cache.py b/tests/core/test_cache.py
index f4489af5..cc8151e4 100644
--- a/tests/core/test_cache.py
+++ b/tests/core/test_cache.py
@@ -14,8 +14,8 @@
import pytest
from earthkit.data import cache
+from earthkit.data import config
from earthkit.data import from_source
-from earthkit.data import settings
from earthkit.data.core.caching import cache_file
from earthkit.data.core.temporary import temp_directory
from earthkit.data.testing import earthkit_examples_file
@@ -58,19 +58,19 @@ def touch(target, args):
@pytest.mark.cache
def test_cache_1():
- with settings.temporary():
- settings.set("maximum-cache-disk-usage", "99%")
+ with config.temporary():
+ config.set("maximum-cache-disk-usage", "99%")
cache.purge(matcher=lambda e: ["owner"] == "test_cache")
- check_cache_files(settings.get("user-cache-directory"))
+ check_cache_files(config.get("user-cache-directory"))
# 1GB ram disk on MacOS (blocks of 512 bytes)
# diskutil erasevolume HFS+ "RAMDisk" `hdiutil attach -nomount ram://2097152`
@pytest.mark.skipif(not os.path.exists("/Volumes/RAMDisk"), reason="No RAM disk")
def test_cache_4():
- with settings.temporary():
- settings.set("cache-directory", "/Volumes/RAMDisk/earthkit_data")
- settings.set("maximum-cache-disk-usage", "90%")
+ with config.temporary():
+ config.set("cache-directory", "/Volumes/RAMDisk/earthkit_data")
+ config.set("maximum-cache-disk-usage", "90%")
for n in range(10):
from_source("dummy-source", "zeros", size=100 * 1024 * 1024, n=n)
@@ -78,10 +78,10 @@ def test_cache_4():
def test_cache_policy():
with temp_directory() as user_dir:
# cache = user dir
- with settings.temporary():
- settings.set({"cache-policy": "user", "user-cache-directory": user_dir})
- assert settings.get("cache-policy") == "user"
- assert settings.get("user-cache-directory") == user_dir
+ with config.temporary():
+ config.set({"cache-policy": "user", "user-cache-directory": user_dir})
+ assert config.get("cache-policy") == "user"
+ assert config.get("user-cache-directory") == user_dir
assert cache.policy.managed() is True
cache_dir = cache.policy.directory()
assert cache_dir == user_dir
@@ -89,17 +89,17 @@ def test_cache_policy():
check_cache_files(cache_dir)
# cache = temporary with auto generated path
- with settings.temporary({"cache-policy": "temporary", "temporary-cache-directory-root": None}):
- assert settings.get("cache-policy") == "temporary"
- assert settings.get("temporary-cache-directory-root") is None
+ with config.temporary({"cache-policy": "temporary", "temporary-cache-directory-root": None}):
+ assert config.get("cache-policy") == "temporary"
+ assert config.get("temporary-cache-directory-root") is None
assert cache.policy.managed() is True
cache_dir = cache.policy.directory()
assert os.path.exists(cache_dir)
check_cache_files(cache_dir)
# cache = user dir (again)
- assert settings.get("cache-policy") == "user"
- assert settings.get("user-cache-directory") == user_dir
+ assert config.get("cache-policy") == "user"
+ assert config.get("user-cache-directory") == user_dir
assert cache.policy.managed() is True
cache_dir = cache.policy.directory()
assert cache_dir == user_dir
@@ -108,14 +108,14 @@ def test_cache_policy():
# cache = temporary with user defined root path
with temp_directory() as root_dir:
- with settings.temporary(
+ with config.temporary(
{
"cache-policy": "temporary",
"temporary-cache-directory-root": root_dir,
}
):
- assert settings.get("cache-policy") == "temporary"
- assert settings.get("temporary-cache-directory-root") == root_dir
+ assert config.get("cache-policy") == "temporary"
+ assert config.get("temporary-cache-directory-root") == root_dir
assert cache.policy.managed() is True
cache_dir = cache.policy.directory()
assert os.path.exists(cache_dir)
@@ -123,9 +123,9 @@ def test_cache_policy():
check_cache_files(cache_dir)
# cache = off
- with settings.temporary("cache-policy", "off"):
- assert settings.get("cache-policy") == "off"
- assert settings.get("user-cache-directory") == user_dir
+ with config.temporary("cache-policy", "off"):
+ assert config.get("cache-policy") == "off"
+ assert config.get("user-cache-directory") == user_dir
assert cache.policy.managed() is False
cache_dir = cache.policy.directory()
@@ -133,8 +133,8 @@ def test_cache_policy():
check_cache_files(cache_dir, managed=False)
# cache = user dir (again)
- assert settings.get("cache-policy") == "user"
- assert settings.get("user-cache-directory") == user_dir
+ assert config.get("cache-policy") == "user"
+ assert config.get("user-cache-directory") == user_dir
assert cache.policy.managed() is True
cache_dir = cache.policy.directory()
assert cache_dir == user_dir
@@ -143,7 +143,7 @@ def test_cache_policy():
def test_url_source_no_cache():
- with settings.temporary("cache-policy", "off"):
+ with config.temporary("cache-policy", "off"):
ds = from_source(
"url",
"https://get.ecmwf.int/repository/test-data/earthkit-data/examples/test.grib",
@@ -152,7 +152,7 @@ def test_url_source_no_cache():
def test_grib_no_cache():
- with settings.temporary("cache-policy", "off"):
+ with config.temporary("cache-policy", "off"):
ds = from_source("file", earthkit_examples_file("tuv_pl.grib"))
assert len(ds) == 18
@@ -163,7 +163,7 @@ def test_grib_no_cache():
@pytest.mark.parametrize("index_cache", [True, False])
def test_grib_offset_index_cache(index_cache):
s = {"cache-policy": "temporary", "use-message-position-index-cache": index_cache}
- with settings.temporary(s):
+ with config.temporary(s):
ds = from_source("file", earthkit_examples_file("tuv_pl.grib"))
assert len(ds) == 18
@@ -263,12 +263,12 @@ def test_cache_zip_file_changed_modtime():
@pytest.mark.parametrize("policy", ["user", "temporary"])
def test_cache_management(policy):
with temp_directory() as tmp_dir_path:
- with settings.temporary():
+ with config.temporary():
if policy == "user":
- settings.set({"cache-policy": "user", "user-cache-directory": tmp_dir_path})
+ config.set({"cache-policy": "user", "user-cache-directory": tmp_dir_path})
assert cache.directory() == tmp_dir_path
elif policy == "temporary":
- settings.set(
+ config.set(
{
"cache-policy": "temporary",
"temporary-cache-directory-root": tmp_dir_path,
@@ -302,7 +302,7 @@ def test_cache_management(policy):
latest_path = x["path"]
# limit cache size so that only one file should remain
- settings.set({"maximum-cache-size": "12K", "maximum-cache-disk-usage": None})
+ config.set({"maximum-cache-size": "12K", "maximum-cache-disk-usage": None})
num, size = cache.summary_dump_database()
assert num == 1
diff --git a/tests/core/test_cache_with_settings.py b/tests/core/test_cache_with_settings.py
new file mode 100644
index 00000000..78eecdf3
--- /dev/null
+++ b/tests/core/test_cache_with_settings.py
@@ -0,0 +1,373 @@
+#!/usr/bin/env python3
+
+# (C) Copyright 2020 ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+#
+
+import os
+
+import pytest
+
+from earthkit.data import cache
+from earthkit.data import from_source
+from earthkit.data import settings
+from earthkit.data.core.caching import cache_file
+from earthkit.data.core.temporary import temp_directory
+from earthkit.data.testing import earthkit_examples_file
+
+
+def check_cache_files(dir_path, managed=True):
+ def touch(target, args):
+ assert args["foo"] in (1, 2)
+ with open(target, "w"):
+ pass
+
+ path1 = cache_file(
+ "test_cache",
+ touch,
+ {"foo": 1},
+ extension=".test",
+ )
+
+ path2 = cache_file(
+ "test_cache",
+ touch,
+ {"foo": 2},
+ extension=".test",
+ )
+
+ assert os.path.exists(path1)
+ assert os.path.exists(path2)
+ assert os.path.dirname(path1) == dir_path
+ assert os.path.dirname(path1) == dir_path
+ assert path1 != path2
+
+ if managed:
+ cnt = 0
+ for f in cache.entries():
+ if f["owner"] == "test_cache":
+ cnt += 1
+
+ assert cnt == 2
+
+
+@pytest.mark.cache
+def test_cache_1_setting():
+ with settings.temporary():
+ settings.set("maximum-cache-disk-usage", "99%")
+ cache.purge(matcher=lambda e: ["owner"] == "test_cache")
+ check_cache_files(settings.get("user-cache-directory"))
+
+
+# 1GB ram disk on MacOS (blocks of 512 bytes)
+# diskutil erasevolume HFS+ "RAMDisk" `hdiutil attach -nomount ram://2097152`
+@pytest.mark.skipif(not os.path.exists("/Volumes/RAMDisk"), reason="No RAM disk")
+def test_cache_4_setting():
+ with settings.temporary():
+ settings.set("cache-directory", "/Volumes/RAMDisk/earthkit_data")
+ settings.set("maximum-cache-disk-usage", "90%")
+ for n in range(10):
+ from_source("dummy-source", "zeros", size=100 * 1024 * 1024, n=n)
+
+
+def test_cache_policy_setting():
+ with temp_directory() as user_dir:
+ # cache = user dir
+ with settings.temporary():
+ settings.set({"cache-policy": "user", "user-cache-directory": user_dir})
+ assert settings.get("cache-policy") == "user"
+ assert settings.get("user-cache-directory") == user_dir
+ assert cache.policy.managed() is True
+ cache_dir = cache.policy.directory()
+ assert cache_dir == user_dir
+ assert os.path.exists(cache_dir)
+ check_cache_files(cache_dir)
+
+ # cache = temporary with auto generated path
+ with settings.temporary({"cache-policy": "temporary", "temporary-cache-directory-root": None}):
+ assert settings.get("cache-policy") == "temporary"
+ assert settings.get("temporary-cache-directory-root") is None
+ assert cache.policy.managed() is True
+ cache_dir = cache.policy.directory()
+ assert os.path.exists(cache_dir)
+ check_cache_files(cache_dir)
+
+ # cache = user dir (again)
+ assert settings.get("cache-policy") == "user"
+ assert settings.get("user-cache-directory") == user_dir
+ assert cache.policy.managed() is True
+ cache_dir = cache.policy.directory()
+ assert cache_dir == user_dir
+ assert os.path.exists(cache_dir)
+ check_cache_files(cache_dir)
+
+ # cache = temporary with user defined root path
+ with temp_directory() as root_dir:
+ with settings.temporary(
+ {
+ "cache-policy": "temporary",
+ "temporary-cache-directory-root": root_dir,
+ }
+ ):
+ assert settings.get("cache-policy") == "temporary"
+ assert settings.get("temporary-cache-directory-root") == root_dir
+ assert cache.policy.managed() is True
+ cache_dir = cache.policy.directory()
+ assert os.path.exists(cache_dir)
+ os.path.dirname(cache_dir) == root_dir
+ check_cache_files(cache_dir)
+
+ # cache = off
+ with settings.temporary("cache-policy", "off"):
+ assert settings.get("cache-policy") == "off"
+ assert settings.get("user-cache-directory") == user_dir
+ assert cache.policy.managed() is False
+
+ cache_dir = cache.policy.directory()
+ assert os.path.exists(cache_dir)
+ check_cache_files(cache_dir, managed=False)
+
+ # cache = user dir (again)
+ assert settings.get("cache-policy") == "user"
+ assert settings.get("user-cache-directory") == user_dir
+ assert cache.policy.managed() is True
+ cache_dir = cache.policy.directory()
+ assert cache_dir == user_dir
+ assert os.path.exists(cache_dir)
+ check_cache_files(cache_dir)
+
+
+def test_url_source_no_cache_setting():
+ with settings.temporary("cache-policy", "off"):
+ ds = from_source(
+ "url",
+ "https://get.ecmwf.int/repository/test-data/earthkit-data/examples/test.grib",
+ )
+ assert len(ds) == 2
+
+
+def test_grib_no_cache_setting():
+ with settings.temporary("cache-policy", "off"):
+ ds = from_source("file", earthkit_examples_file("tuv_pl.grib"))
+ assert len(ds) == 18
+
+ f = ds[3]
+ assert f.metadata("param") == "t"
+
+
+@pytest.mark.parametrize("index_cache", [True, False])
+def test_grib_offset_index_cache_setting(index_cache):
+ s = {"cache-policy": "temporary", "use-message-position-index-cache": index_cache}
+ with settings.temporary(s):
+ ds = from_source("file", earthkit_examples_file("tuv_pl.grib"))
+ assert len(ds) == 18
+
+ f = ds[3]
+ assert f.metadata("param") == "t", f"index-cache={index_cache}"
+
+
+# See github #155. This test can hang so we must set a timeout.
+@pytest.mark.no_cache_init
+@pytest.mark.timeout(20)
+def test_cache_with_log_debug_setting(caplog):
+ import logging
+
+ # the cache must not be initialised at this point
+ assert cache._policy is None
+ assert cache._manager is None
+
+ caplog.set_level(logging.DEBUG)
+ LOG = logging.getLogger(__name__)
+
+ class A:
+ def __repr__(self):
+ d = cache.directory()
+ return d
+
+ a = A()
+ LOG.debug(f"dir {a}")
+ # NOTE: if we use "%s" formatting e.g. "LOG.debug("dir %s", a)"
+ # the problem still occurs!
+
+
+@pytest.mark.cache
+def test_cache_zip_file_overwritten_1_setting():
+ with temp_directory() as tmp_dir:
+ import shutil
+ import zipfile
+
+ # copy input data to work dir
+ grb1_path = os.path.join(tmp_dir, "test.grib")
+ shutil.copyfile(earthkit_examples_file("test.grib"), grb1_path)
+
+ grb2_path = os.path.join(tmp_dir, "test6.grib")
+ shutil.copyfile(earthkit_examples_file("test6.grib"), grb2_path)
+
+ # first pass
+ zip_path = os.path.join(tmp_dir, "test.zip")
+ with zipfile.ZipFile(zip_path, "w") as zip_object:
+ zip_object.write(grb1_path)
+
+ ds = from_source("file", zip_path)
+ assert len(ds) == 2
+ ds_path = ds.path
+
+ # second pass - same zip file, the grib should be read
+ # from the cache
+ ds1 = from_source("file", zip_path)
+ assert len(ds1) == 2
+ assert ds1.path == ds_path
+
+ # third pass - same zipfile path with different contents
+ with zipfile.ZipFile(zip_path, "w") as zip_object:
+ zip_object.write(grb2_path)
+
+ ds2 = from_source("file", zip_path)
+ assert len(ds2) == 6
+ assert ds2.path != ds_path
+
+
+@pytest.mark.cache
+def test_cache_zip_file_changed_modtime_setting():
+ with temp_directory() as tmp_dir:
+ import shutil
+ import zipfile
+
+ # copy input data to work dir
+ grb1_path = os.path.join(tmp_dir, "test.grib")
+ shutil.copyfile(earthkit_examples_file("test.grib"), grb1_path)
+
+ # first pass
+ zip_path = os.path.join(tmp_dir, "test.zip")
+ with zipfile.ZipFile(zip_path, "w") as zip_object:
+ zip_object.write(grb1_path)
+
+ ds = from_source("file", zip_path)
+ assert len(ds) == 2
+ ds_path = ds.path
+
+ # second pass - changed modtime
+ st = os.stat(zip_path)
+ m_time = (st.st_atime_ns + 10, st.st_mtime_ns + 10)
+ os.utime(zip_path, ns=m_time)
+ ds2 = from_source("file", zip_path)
+ assert len(ds2) == 2
+ assert ds2.path != ds_path
+
+
+@pytest.mark.parametrize("policy", ["user", "temporary"])
+def test_cache_management_setting(policy):
+ with temp_directory() as tmp_dir_path:
+ with settings.temporary():
+ if policy == "user":
+ settings.set({"cache-policy": "user", "user-cache-directory": tmp_dir_path})
+ assert cache.directory() == tmp_dir_path
+ elif policy == "temporary":
+ settings.set(
+ {
+ "cache-policy": "temporary",
+ "temporary-cache-directory-root": tmp_dir_path,
+ }
+ )
+ assert os.path.dirname(cache.directory()) == tmp_dir_path
+ else:
+ assert False
+
+ data_size = 10 * 1024
+
+ # create 3 files existing only in the cache
+ r = []
+ for n in range(3):
+ r.append(from_source("dummy-source", "zeros", size=data_size, n=n))
+
+ for ds in r:
+ assert os.path.exists(ds.path)
+ assert os.path.dirname(ds.path) == cache.directory()
+
+ # check cache contents
+ num, size = cache.summary_dump_database()
+ assert num == 3
+ assert size == 3 * data_size
+ assert len(cache.entries()) == 3
+
+ for i, x in enumerate(cache.entries()):
+ assert x["size"] == data_size
+ assert x["owner"] == "dummy-source"
+ assert x["args"] == {"size": data_size, "n": i}
+ latest_path = x["path"]
+
+ # limit cache size so that only one file should remain
+ settings.set({"maximum-cache-size": "12K", "maximum-cache-disk-usage": None})
+
+ num, size = cache.summary_dump_database()
+ assert num == 1
+ assert size == data_size
+ assert len(cache.entries()) == 1
+ for x in cache.entries():
+ assert x["size"] == data_size
+ assert x["owner"] == "dummy-source"
+ assert x["args"] == {"size": data_size, "n": 2}
+ x["path"] == latest_path
+ break
+
+ # purge the cache
+ r = None
+ cache.purge()
+ num, size = cache.summary_dump_database()
+ assert num == 0
+ assert size == 0
+ assert len(cache.entries()) == 0
+
+
+@pytest.mark.cache
+def test_cache_force_setting():
+ import time
+
+ def _force_true(args, path, owner_data):
+ time.sleep(0.001)
+ return True
+
+ def _force_false(args, path, owner_data):
+ time.sleep(0.001)
+ return False
+
+ data_size = 10 * 1024
+ ds = from_source("dummy-source", "zeros", size=data_size, n=0)
+ st = os.stat(ds.path)
+ m_time_ref = st.st_mtime_ns
+
+ ds1 = from_source("dummy-source", "zeros", size=data_size, n=0)
+ assert ds1.path == ds.path
+ st = os.stat(ds1.path)
+ m_time = st.st_mtime_ns
+ assert m_time == m_time_ref
+
+ ds2 = from_source("dummy-source", "zeros", force=_force_false, size=data_size, n=0)
+ assert ds2.path == ds.path
+ st = os.stat(ds2.path)
+ m_time = st.st_mtime_ns
+ assert m_time == m_time_ref
+
+ ds3 = from_source("dummy-source", "zeros", force=_force_true, size=data_size, n=0)
+ assert ds3.path == ds.path
+ st = os.stat(ds3.path)
+ m_time = st.st_mtime_ns
+ assert m_time != m_time_ref
+ m_time_ref = m_time
+
+ ds4 = from_source("dummy-source", "zeros", size=data_size, n=0)
+ assert ds4.path == ds.path
+ st = os.stat(ds4.path)
+ m_time = st.st_mtime_ns
+ assert m_time == m_time_ref
+
+
+if __name__ == "__main__":
+ from earthkit.data.testing import main
+
+ main(__file__)
diff --git a/tests/core/test_config.py b/tests/core/test_config.py
index 20a1f37d..f5f91750 100644
--- a/tests/core/test_config.py
+++ b/tests/core/test_config.py
@@ -34,7 +34,7 @@ def read_config_yaml(path=os.path.expanduser("~/.config/earthkit/data/config.yam
@pytest.mark.parametrize(
"param,default_value,new_value",
[
- ("number-of-download-threads", 5, 2),
+ ("url-download-timeout", 30, 5),
],
)
def test_configs_params_set_reset(param, default_value, new_value):
@@ -62,7 +62,7 @@ def test_config_invalid():
# invalid value
with pytest.raises(ValueError):
- config.set("number-of-download-threads", "A")
+ config.set("url-download-timeout", "A")
@pytest.mark.parametrize(
@@ -128,51 +128,51 @@ def test_config_set_cache_numbers():
def test_config_set_multi():
with config.temporary():
- config.set("number-of-download-threads", 7)
- assert config.get("number-of-download-threads") == 7
+ config.set("url-download-timeout", 7)
+ assert config.get("url-download-timeout") == 7
- config.set({"number-of-download-threads": 2, "url-download-timeout": 21})
- assert config.get("number-of-download-threads") == 2
- assert config.get("url-download-timeout") == 21
+ config.set({"url-download-timeout": 2, "check-out-of-date-urls": False})
+ assert config.get("url-download-timeout") == 2
+ assert not config.get("check-out-of-date-urls")
- config.set(number_of_download_threads=3, url_download_timeout=11)
- assert config.get("number-of-download-threads") == 3
+ config.set(url_download_timeout=11, check_out_of_date_urls=False)
assert config.get("url-download-timeout") == 11
+ assert not config.get("check-out-of-date-urls")
with pytest.raises(KeyError):
- config.set({"number-of-download-threads": 2, "-invalid-": 21})
+ config.set({"url-download-timeout": 2, "-invalid-": 21})
with pytest.raises(KeyError):
- config.set(number_of_download_threads=3, __invalid__=11)
+ config.set(url_download_timeout=3, __invalid__=11)
def test_config_temporary_single():
- with config.temporary("number-of-download-threads", 7):
- assert config.get("number-of-download-threads") == 7
+ with config.temporary("url-download-timeout", 7):
+ assert config.get("url-download-timeout") == 7
- with config.temporary({"number-of-download-threads": 7}):
- assert config.get("number-of-download-threads") == 7
+ with config.temporary({"url-download-timeout": 7}):
+ assert config.get("url-download-timeout") == 7
- with config.temporary(number_of_download_threads=7):
- assert config.get("number-of-download-threads") == 7
+ with config.temporary(url_download_timeout=7):
+ assert config.get("url-download-timeout") == 7
def test_config_temporary_multi():
- with config.temporary({"number-of-download-threads": 2, "url-download-timeout": 21}):
- assert config.get("number-of-download-threads") == 2
- assert config.get("url-download-timeout") == 21
+ with config.temporary({"url-download-timeout": 2, "check-out-of-date-urls": False}):
+ assert config.get("url-download-timeout") == 2
+ assert not config.get("check-out-of-date-urls")
- with config.temporary(number_of_download_threads=3, url_download_timeout=11):
- assert config.get("number-of-download-threads") == 3
- assert config.get("url-download-timeout") == 11
+ with config.temporary(url_download_timeout=3, check_out_of_date_urls=False):
+ assert config.get("url-download-timeout") == 3
+ assert not config.get("check-out-of-date-urls")
def test_config_temporary_nested():
- with config.temporary("number-of-download-threads", 7):
- assert config.get("number-of-download-threads") == 7
- with config.temporary("number-of-download-threads", 10):
- assert config.get("number-of-download-threads") == 10
- assert config.get("number-of-download-threads") == 7
+ with config.temporary("url-download-timeout", 7):
+ assert config.get("url-download-timeout") == 7
+ with config.temporary("url-download-timeout", 10):
+ assert config.get("url-download-timeout") == 10
+ assert config.get("url-download-timeout") == 7
def test_config_temporary_autosave_1():
@@ -182,7 +182,7 @@ def test_config_temporary_autosave_1():
# we ensure that the configs are saved into the file
config.save_as(config_file)
- key = "number-of-download-threads"
+ key = "url-download-timeout"
v_ori = config.autosave
config.autosave = False
@@ -209,7 +209,7 @@ def test_config_temporary_autosave_2():
# we ensure that the config is saved into the file
config.save_as(config_file)
- key = "number-of-download-threads"
+ key = "url-download-timeout"
v_ori = config.autosave
config.autosave = True
@@ -240,18 +240,18 @@ def test_config_temporary_autosave_2():
"value,error", [("10000", None), (10000, None), ("1b", ValueError), ("A", ValueError)]
)
def test_config_env(monkeypatch, value, error):
- env_key = "EARTHKIT_DATA_NUMBER_OF_DOWNLOAD_THREADS"
+ env_key = "EARTHKIT_DATA_URL_DOWNLOAD_TIMEOUT"
monkeypatch.setenv(env_key, value)
# v_ori = config.autosave
# config.autosave = True
if error is None:
- v = config.get("number-of-download-threads")
+ v = config.get("url-download-timeout")
assert v == 10000
else:
with pytest.raises(error):
- config.get("number-of-download-threads")
+ config.get("url-download-timeout")
# config.autosave = v_ori
diff --git a/tests/documentation/test_examples.py b/tests/documentation/test_examples.py
index e5d21121..55f37c70 100644
--- a/tests/documentation/test_examples.py
+++ b/tests/documentation/test_examples.py
@@ -20,9 +20,9 @@
"xml2rst.py",
"actions.py",
"generate-examples-maps.py",
- "settings-set.py",
- "settings-reset.py",
- "settings-temporary.py",
+ "config-set.py",
+ "config-reset.py",
+ "config-temporary.py",
"xref.py",
]
diff --git a/tests/grib/test_grib_cache.py b/tests/grib/test_grib_cache.py
index c86533a4..2d8b4599 100644
--- a/tests/grib/test_grib_cache.py
+++ b/tests/grib/test_grib_cache.py
@@ -15,8 +15,8 @@
import pytest
+from earthkit.data import config
from earthkit.data import from_source
-from earthkit.data import settings
from earthkit.data.testing import earthkit_examples_file
from earthkit.data.utils.diag import field_cache_diag
@@ -65,7 +65,7 @@ def _check_diag(diag, ref):
@pytest.mark.parametrize("serialise", [True, False])
def test_grib_cache_basic_file_patched(handle_cache_size, serialise, patch_metadata_cache):
- with settings.temporary(
+ with config.temporary(
{
"grib-field-policy": "persistent",
"grib-handle-policy": "cache",
@@ -149,7 +149,7 @@ def test_grib_cache_basic_file_non_patched():
"""This test is the same as test_grib_cache_basic but without the patch_metadata_cache fixture.
So metadata cache hits and misses are not counted."""
- with settings.temporary(
+ with config.temporary(
{
"grib-field-policy": "persistent",
"grib-handle-policy": "cache",
@@ -221,7 +221,7 @@ def test_grib_cache_basic_file_non_patched():
@pytest.mark.parametrize("fl_type", ["file", "array", "memory"])
def test_grib_cache_basic_metadata_patched(serialise, fl_type, patch_metadata_cache):
- with settings.temporary(
+ with config.temporary(
{
"grib-field-policy": "persistent",
"grib-handle-policy": "cache",
@@ -287,7 +287,7 @@ def test_grib_cache_basic_metadata_patched(serialise, fl_type, patch_metadata_ca
def test_grib_cache_options_1(patch_metadata_cache):
- with settings.temporary(
+ with config.temporary(
{
"grib-field-policy": "persistent",
"grib-handle-policy": "temporary",
@@ -367,7 +367,7 @@ def test_grib_cache_options_1(patch_metadata_cache):
def test_grib_cache_options_2(patch_metadata_cache):
- with settings.temporary(
+ with config.temporary(
{
"grib-field-policy": "persistent",
"grib-handle-policy": "persistent",
@@ -449,7 +449,7 @@ def test_grib_cache_options_2(patch_metadata_cache):
def test_grib_cache_options_3(patch_metadata_cache):
- with settings.temporary(
+ with config.temporary(
{
"grib-field-policy": "persistent",
"grib-handle-policy": "cache",
@@ -529,7 +529,7 @@ def test_grib_cache_options_3(patch_metadata_cache):
def test_grib_cache_options_4(patch_metadata_cache):
- with settings.temporary(
+ with config.temporary(
{
"grib-field-policy": "temporary",
"grib-handle-policy": "temporary",
@@ -626,7 +626,7 @@ def test_grib_cache_options_4(patch_metadata_cache):
def test_grib_cache_options_5(patch_metadata_cache):
- with settings.temporary(
+ with config.temporary(
{
"grib-field-policy": "temporary",
"grib-handle-policy": "persistent",
@@ -725,7 +725,7 @@ def test_grib_cache_options_5(patch_metadata_cache):
def test_grib_cache_options_6(patch_metadata_cache):
- with settings.temporary(
+ with config.temporary(
{
"grib-field-policy": "temporary",
"grib-handle-policy": "cache",
@@ -858,7 +858,7 @@ def test_grib_cache_file_use_kwargs_2():
@pytest.mark.parametrize("fl_type", ["file", "array", "memory"])
def test_grib_cache_metadata_use_kwargs_1(fl_type, patch_metadata_cache):
- with settings.temporary(
+ with config.temporary(
{
"grib-field-policy": "persistent",
"grib-handle-policy": "cache",
@@ -900,7 +900,7 @@ def test_grib_cache_metadata_use_kwargs_1(fl_type, patch_metadata_cache):
@pytest.mark.parametrize("fl_type", ["file", "array", "memory"])
def test_grib_cache_metadata_use_kwargs_2(fl_type, patch_metadata_cache):
- with settings.temporary(
+ with config.temporary(
{
"grib-field-policy": "persistent",
"grib-handle-policy": "cache",
diff --git a/tests/readers/test_reader_padding_bytes.py b/tests/readers/test_reader_padding_bytes.py
index 7234d6fe..43aeb3d3 100644
--- a/tests/readers/test_reader_padding_bytes.py
+++ b/tests/readers/test_reader_padding_bytes.py
@@ -11,8 +11,8 @@
import pytest
+from earthkit.data import config
from earthkit.data import from_source
-from earthkit.data import settings
from earthkit.data.core.temporary import temp_file
from earthkit.data.testing import earthkit_examples_file
@@ -56,7 +56,7 @@ def test_reader_padding_bytes(file_path, expected_type, expected_len):
assert "file.File" in str(type(ds))
assert "unknown" in str(type(ds._reader)).lower()
- with settings.temporary("reader-type-check-bytes", 100):
+ with config.temporary("reader-type-check-bytes", 100):
ds = from_source("file", tmp.path)
if hasattr(ds, "_reader"):
assert expected_type in str(type(ds._reader)).lower()
diff --git a/tests/sources/test_url.py b/tests/sources/test_url.py
index 9e1cc8d4..08b8d3b0 100644
--- a/tests/sources/test_url.py
+++ b/tests/sources/test_url.py
@@ -14,8 +14,8 @@
import pytest
+from earthkit.data import config
from earthkit.data import from_source
-from earthkit.data import settings
from earthkit.data.core.temporary import temp_directory
from earthkit.data.core.temporary import temp_file
from earthkit.data.testing import earthkit_examples_file
@@ -49,11 +49,11 @@ def load():
)
with temp_directory() as tmpdir:
- with settings.temporary():
- settings.set("user-cache-directory", tmpdir)
+ with config.temporary():
+ config.set("user-cache-directory", tmpdir)
load()
- settings.set("check-out-of-date-urls", False)
+ config.set("check-out-of-date-urls", False)
with network_off():
load()