From 5c0d83d52ab76ba199d3baf79cf8f9d30325610d Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Mon, 27 Jul 2020 14:42:45 +0200 Subject: [PATCH 1/3] Split out separate utility classes for Minio This will make it practical for other packages to reuse the code. It is deliberately designed not to use the SkipTest exception, which is specific to nosetests, so that it can easily be integrated into other test frameworks (pytest). The version test has been removed and left to documentation: newer versions of minio have a different way of querying the version, and it didn't seem worth trying to support both flavours. --- katdal/test/s3_utils.py | 204 ++++++++++++++++++++++++++++++ katdal/test/test_chunkstore_s3.py | 58 +-------- 2 files changed, 211 insertions(+), 51 deletions(-) create mode 100644 katdal/test/s3_utils.py diff --git a/katdal/test/s3_utils.py b/katdal/test/s3_utils.py new file mode 100644 index 00000000..d2527833 --- /dev/null +++ b/katdal/test/s3_utils.py @@ -0,0 +1,204 @@ +################################################################################ +# Copyright (c) 2017-2020, National Research Foundation (Square Kilometre Array) +# +# Licensed under the BSD 3-Clause License (the "License"); you may not use +# this file except in compliance with the License. You may obtain a copy +# of the License at +# +# https://opensource.org/licenses/BSD-3-Clause +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +"""Test utilities for code that interacts with the S3 API. + +It provides a class for managing running an external S3 server (currently +`MinIO`_). + +Versions of minio prior to 2018-08-25T01:56:38Z contain a `race condition`_ +that can cause it to crash when queried at the wrong point during startup, so +should not be used. + +.. _minio: https://github.com/minio/minio +.. _race condition: https://github.com/minio/minio/issues/6324 +""" + +import contextlib +import os +import pathlib +import socket +import subprocess +import time +import urllib.parse + +import requests + + +class MissingProgram(RuntimeError): + """An required executable program was not found.""" + + +class ProgramFailed(RuntimeError): + """An external program did not run successfully.""" + + +class S3User: + """Credentials for an S3 user.""" + + def __init__(self, access_key: str, secret_key: str) -> None: + self.access_key = access_key + self.secret_key = secret_key + + +class S3Server: + """Run and manage an external program to run an S3 server. + + This can be used as a context manager, to shut down the server when + finished. + + Parameters + ---------- + path + Directory in which objects and config will be stored. + user + Credentials for the default admin user. + + Attributes + ---------- + host + Hostname for connecting to the server + port + Port for connecting to the server + url + Base URL for the server + auth_url + URL with the access_key and secret_key baked in + path + Path given to the constructor + user + User given to the constructor + + Raises + ------ + MissingProgram + if the ``minio`` binary was not found. + ProgramFailed + if minio started but failed before it became healthy + """ + + def __init__(self, path: pathlib.Path, user: S3User) -> None: + self.host = '127.0.0.1' # Unlike 'localhost', ensures IPv4 + self.path = path + self.user = user + self._process = None + + env = os.environ.copy() + env['MINIO_BROWSER'] = 'off' + env['MINIO_ACCESS_KEY'] = self.user.access_key + env['MINIO_SECRET_KEY'] = self.user.secret_key + with contextlib.ExitStack() as exit_stack: + sock = exit_stack.enter_context(socket.socket()) + # Allows minio to bind to the same socket. Setting both + # SO_REUSEPORT and SO_REUSEADDR might not be necessary, but + # could make this more portable. + sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1) + sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + sock.bind(('127.0.0.1', 0)) + self.port = sock.getsockname()[1] + try: + self._process = subprocess.Popen( + [ + 'minio', 'server', '--quiet', + '--address', f'{self.host}:{self.port}', + '-C', str(self.path / 'config'), + str(self.path / 'data'), + ], + stdout=subprocess.DEVNULL, + env=env + ) + except OSError as exc: + raise MissingProgram(f'Could not run minio: {exc}') from exc + exit_stack.callback(self._process.terminate) + + self.url = f'http://{self.host}:{self.port}' + self.auth_url = f'http://{user.access_key}:{user.secret_key}@{self.host}:{self.port}' + health_url = urllib.parse.urljoin(self.url, '/minio/health/live') + for i in range(100): + try: + with requests.get(health_url) as resp: + if resp.ok: + break + except requests.ConnectionError: + pass + if self._process.poll() is not None: + raise ProgramFailed('Minio died before it became healthy') + time.sleep(0.1) + else: + raise ProgramFailed('Timed out waiting for minio to be ready') + sock.close() + exit_stack.pop_all() + + def wipe(self) -> None: + """Remove all buckets and objects, but leave the server running. + + See :meth:`mc` for information about exceptions. + """ + self.mc('rb', '--force', '--dangerous', 'minio') + + def close(self) -> None: + """Shut down the server.""" + if self._process: + self._process.terminate() + self._process.wait() + self._process = None + + def __enter__(self) -> 'S3Server': + return self + + def __exit__(self, exc_type, exc_value, exc_tb) -> None: + self.close() + + def mc(self, *args) -> None: + """Run a (minio) mc subcommand against the running server. + + The running server has the alias ``minio``. + + .. note:: + + The credentials will be exposed in the environment. This is only + intended for unit testing, and hence not with sensitive + credentials. + + Raises + ------ + MissingProgram + if the ``mc`` command is not found on the path + ProgramFailed + if the command returned a non-zero exit status. The exception + message will include the stderr output. + """ + env = os.environ.copy() + env['MC_HOST_minio'] = self.auth_url + # --config-dir is set just to prevent any config set by the user + # from interfering with the test. + try: + subprocess.run( + [ + 'mc', '--quiet', '--no-color', f'--config-dir={self.path}', + *args + ], + stdout=subprocess.DEVNULL, + stderr=subprocess.PIPE, + env=env, + encoding='utf-8', + errors='replace', + check=True + ) + except OSError as exc: + raise MissingProgram(f'mc could not be run: {exc}') from exc + except subprocess.CalledProcessError: + raise ProgramFailed(exc.stderr) from exc diff --git a/katdal/test/test_chunkstore_s3.py b/katdal/test/test_chunkstore_s3.py index 726a19d1..f683ca91 100644 --- a/katdal/test/test_chunkstore_s3.py +++ b/katdal/test/test_chunkstore_s3.py @@ -47,6 +47,7 @@ import io import warnings import re +import pathlib from urllib3.util.retry import Retry import numpy as np @@ -61,6 +62,7 @@ _DEFAULT_SERVER_GLITCHES) from katdal.chunkstore import StoreUnavailable, ChunkNotFound from katdal.test.test_chunkstore import ChunkStoreTestBase +from katdal.test.s3_utils import S3User, S3Server, MissingProgram, ProgramFailed BUCKET = 'katdal-unittest' @@ -195,54 +197,11 @@ class TestS3ChunkStore(ChunkStoreTestBase): @classmethod def start_minio(cls, host): """Start Fake S3 service on `host` and return its URL.""" - - # Check minio version try: - version_data = subprocess.check_output(['minio', 'version']) - except OSError as e: - raise SkipTest('Could not run minio (is it installed): {}'.format(e)) - except subprocess.CalledProcessError: - raise SkipTest('Failed to get minio version (is it too old)?') - - min_version = u'2018-08-25T01:56:38Z' - version = None - version_fields = version_data.decode('utf-8').splitlines() - for line in version_fields: - if line.startswith(u'Version: '): - version = line.split(u' ', 1)[1] - if version is None: - raise RuntimeError('Could not parse minio version') - elif version < min_version: - raise SkipTest(u'Minio version is {} but {} is required'.format(version, min_version)) - - with get_free_port(host) as port: - try: - env = os.environ.copy() - env['MINIO_BROWSER'] = 'off' - env['MINIO_ACCESS_KEY'] = cls.credentials[0] - env['MINIO_SECRET_KEY'] = cls.credentials[1] - cls.minio = subprocess.Popen(['minio', 'server', - '--quiet', - '--address', '{}:{}'.format(host, port), - '-C', os.path.join(cls.tempdir, 'config'), - os.path.join(cls.tempdir, 'data')], - stdout=subprocess.DEVNULL, - env=env) - except OSError: - raise SkipTest('Could not start minio server (is it installed?)') - - # Wait for minio to be ready to service requests - url = 'http://%s:%s' % (host, port) - health_url = urllib.parse.urljoin(url, '/minio/health/live') - for i in range(100): - try: - with requests.get(health_url) as resp: - if resp.status_code == 200: - return url - except requests.ConnectionError: - pass - time.sleep(0.1) - raise OSError('Timed out waiting for minio to be ready') + cls.minio = S3Server(pathlib.Path(cls.tempdir), S3User(*cls.credentials)) + except MissingProgram as exc: + raise SkipTest(str(exc)) + return cls.minio.url @classmethod def from_url(cls, url, authenticate=True, **kwargs): @@ -256,8 +215,6 @@ def setup_class(cls): """Start minio service running on temp dir, and ChunkStore on that.""" cls.credentials = ('access*key', 'secret*key') cls.tempdir = tempfile.mkdtemp() - os.mkdir(os.path.join(cls.tempdir, 'config')) - os.mkdir(os.path.join(cls.tempdir, 'data')) cls.minio = None try: cls.url = cls.start_minio('127.0.0.1') @@ -271,8 +228,7 @@ def setup_class(cls): @classmethod def teardown_class(cls): if cls.minio: - cls.minio.terminate() - cls.minio.wait() + cls.minio.close() shutil.rmtree(cls.tempdir) def array_name(self, path, suggestion=None): From 2e49c70fec23646011257461ba4082012e37220c Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Mon, 27 Jul 2020 15:01:23 +0200 Subject: [PATCH 2/3] Fix some issues picked up by flake8 --- katdal/test/s3_utils.py | 2 +- katdal/test/test_chunkstore_s3.py | 7 +------ 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/katdal/test/s3_utils.py b/katdal/test/s3_utils.py index d2527833..f3ee32be 100644 --- a/katdal/test/s3_utils.py +++ b/katdal/test/s3_utils.py @@ -200,5 +200,5 @@ def mc(self, *args) -> None: ) except OSError as exc: raise MissingProgram(f'mc could not be run: {exc}') from exc - except subprocess.CalledProcessError: + except subprocess.CalledProcessError as exc: raise ProgramFailed(exc.stderr) from exc diff --git a/katdal/test/test_chunkstore_s3.py b/katdal/test/test_chunkstore_s3.py index f683ca91..fb4a3141 100644 --- a/katdal/test/test_chunkstore_s3.py +++ b/katdal/test/test_chunkstore_s3.py @@ -33,12 +33,7 @@ import tempfile import shutil -# Using subprocess32 is important (on 2.7) because it closes non-stdio file -# descriptors in the child. Without that, OS X runs into problems with minio -# failing to bind the socket. -import subprocess32 as subprocess import threading -import os import time import socket import http.server @@ -62,7 +57,7 @@ _DEFAULT_SERVER_GLITCHES) from katdal.chunkstore import StoreUnavailable, ChunkNotFound from katdal.test.test_chunkstore import ChunkStoreTestBase -from katdal.test.s3_utils import S3User, S3Server, MissingProgram, ProgramFailed +from katdal.test.s3_utils import S3User, S3Server, MissingProgram BUCKET = 'katdal-unittest' From 2b9bf3649066322bea46d945a3d61a41f26d908f Mon Sep 17 00:00:00 2001 From: Bruce Merry Date: Tue, 28 Jul 2020 10:51:46 +0200 Subject: [PATCH 3/3] Don't try to hold socket open while starting MinIO It wasn't working on MacOS. The S3Server class is simplified to just take a host and port, rather than trying to allocate one itself. --- katdal/test/s3_utils.py | 50 ++++++++++++++----------------- katdal/test/test_chunkstore_s3.py | 8 ++++- 2 files changed, 30 insertions(+), 28 deletions(-) diff --git a/katdal/test/s3_utils.py b/katdal/test/s3_utils.py index f3ee32be..fee7dc9e 100644 --- a/katdal/test/s3_utils.py +++ b/katdal/test/s3_utils.py @@ -62,6 +62,10 @@ class S3Server: Parameters ---------- + host + Host to bind to + port + Port to bind to path Directory in which objects and config will be stored. user @@ -90,42 +94,35 @@ class S3Server: if minio started but failed before it became healthy """ - def __init__(self, path: pathlib.Path, user: S3User) -> None: - self.host = '127.0.0.1' # Unlike 'localhost', ensures IPv4 + def __init__(self, host: str, port: int, path: pathlib.Path, user: S3User) -> None: + self.host = host + self.port = port self.path = path self.user = user + self.url = f'http://{self.host}:{self.port}' + self.auth_url = f'http://{user.access_key}:{user.secret_key}@{self.host}:{self.port}' self._process = None env = os.environ.copy() env['MINIO_BROWSER'] = 'off' env['MINIO_ACCESS_KEY'] = self.user.access_key env['MINIO_SECRET_KEY'] = self.user.secret_key + try: + self._process = subprocess.Popen( + [ + 'minio', 'server', '--quiet', + '--address', f'{self.host}:{self.port}', + '-C', str(self.path / 'config'), + str(self.path / 'data'), + ], + stdout=subprocess.DEVNULL, + env=env + ) + except OSError as exc: + raise MissingProgram(f'Could not run minio: {exc}') from exc + with contextlib.ExitStack() as exit_stack: - sock = exit_stack.enter_context(socket.socket()) - # Allows minio to bind to the same socket. Setting both - # SO_REUSEPORT and SO_REUSEADDR might not be necessary, but - # could make this more portable. - sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1) - sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - sock.bind(('127.0.0.1', 0)) - self.port = sock.getsockname()[1] - try: - self._process = subprocess.Popen( - [ - 'minio', 'server', '--quiet', - '--address', f'{self.host}:{self.port}', - '-C', str(self.path / 'config'), - str(self.path / 'data'), - ], - stdout=subprocess.DEVNULL, - env=env - ) - except OSError as exc: - raise MissingProgram(f'Could not run minio: {exc}') from exc exit_stack.callback(self._process.terminate) - - self.url = f'http://{self.host}:{self.port}' - self.auth_url = f'http://{user.access_key}:{user.secret_key}@{self.host}:{self.port}' health_url = urllib.parse.urljoin(self.url, '/minio/health/live') for i in range(100): try: @@ -139,7 +136,6 @@ def __init__(self, path: pathlib.Path, user: S3User) -> None: time.sleep(0.1) else: raise ProgramFailed('Timed out waiting for minio to be ready') - sock.close() exit_stack.pop_all() def wipe(self) -> None: diff --git a/katdal/test/test_chunkstore_s3.py b/katdal/test/test_chunkstore_s3.py index fb4a3141..7d82a264 100644 --- a/katdal/test/test_chunkstore_s3.py +++ b/katdal/test/test_chunkstore_s3.py @@ -193,7 +193,13 @@ class TestS3ChunkStore(ChunkStoreTestBase): def start_minio(cls, host): """Start Fake S3 service on `host` and return its URL.""" try: - cls.minio = S3Server(pathlib.Path(cls.tempdir), S3User(*cls.credentials)) + host = '127.0.0.1' # Unlike 'localhost', guarantees IPv4 + with get_free_port(host) as port: + pass + # The port is now closed, which makes it available for minio to + # bind to. While MinIO on Linux is able to bind to the same port + # as the socket held open by get_free_port, Mac OS is not. + cls.minio = S3Server(host, port, pathlib.Path(cls.tempdir), S3User(*cls.credentials)) except MissingProgram as exc: raise SkipTest(str(exc)) return cls.minio.url