diff --git a/docker-compose.yml b/docker-compose.yml index cda864829..e523cd138 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,7 +6,7 @@ services: command: "--workers=4" environment: - IDUNN_MIMIR_ES= - - IDUNN_WIKI_API_REDIS_URL=idunn-redis:6379 + - IDUNN_REDIS_URL=redis://idunn-redis:6379 - IDUNN_LOG_JSON=1 idunn-redis: diff --git a/idunn/api/directions.py b/idunn/api/directions.py index 70bc96392..82062af71 100644 --- a/idunn/api/directions.py +++ b/idunn/api/directions.py @@ -1,9 +1,19 @@ -from apistar.http import QueryParams +from apistar.http import QueryParams, Request from apistar.exceptions import BadRequest + +from idunn import settings +from idunn.utils.rate_limiter import IdunnRateLimiter from ..directions.client import directions_client +rate_limiter = IdunnRateLimiter( + resource='idunn.api.directions', + max_requests=int(settings['DIRECTIONS_RL_MAX_REQUESTS']), + expire=int(settings['DIRECTIONS_RL_EXPIRE']) +) + +def get_directions(f_lon, f_lat, t_lon, t_lat, params: QueryParams, request: Request): + rate_limiter.check_limit_per_client(request) -def get_directions(f_lon, f_lat, t_lon, t_lat, params: QueryParams): from_position = (f_lon, f_lat) to_position = (t_lon, t_lat) params_dict = dict(params) @@ -13,6 +23,6 @@ def get_directions(f_lon, f_lat, t_lon, t_lat, params: QueryParams): if not mode: raise BadRequest('"type" query param is required') - return directions_client.get_directions(from_position, to_position, - mode=mode, lang=lang + return directions_client.get_directions( + from_position, to_position, mode=mode, lang=lang ) diff --git a/idunn/blocks/wikipedia.py b/idunn/blocks/wikipedia.py index 8826134bd..8d9589435 100644 --- a/idunn/blocks/wikipedia.py +++ b/idunn/blocks/wikipedia.py @@ -4,13 +4,13 @@ import pybreaker from apistar import validators from requests.exceptions import HTTPError, RequestException, Timeout -from redis import Redis, ConnectionError as RedisConnectionError, TimeoutError, RedisError -from redis_rate_limit import RateLimiter, TooManyRequests +from redis import Redis, RedisError from idunn import settings from idunn.utils import prometheus from idunn.utils.redis import get_redis_pool, RedisNotConfigured from idunn.utils.circuit_breaker import IdunnCircuitBreaker +from idunn.utils.rate_limiter import IdunnRateLimiter, TooManyRequestsException from .base import BaseBlock @@ -18,7 +18,7 @@ GET_TITLE_IN_LANGUAGE = "get_title_in_language" GET_SUMMARY = "get_summary" -DISABLED_STATE = object() # Used to flag rate-limiter or cache as disabled by settings +DISABLED_STATE = object() # Used to flag cache as disabled by settings logger = logging.getLogger(__name__) @@ -26,57 +26,6 @@ class WikiUndefinedException(Exception): pass -class WikipediaLimiter: - _limiter = None - - @classmethod - def get_limiter(cls): - if cls._limiter is None: - try: - redis_pool = get_redis_pool(settings, db=settings['WIKI_API_REDIS_DB']) - except RedisNotConfigured: - logger.warning("Redis URL not configured: rate limiter not started") - cls._limiter = DISABLED_STATE - else: - """ - If a redis is configured, - then we use the corresponding redis - service in the rate limiter. - """ - max_calls = int(settings['WIKI_API_RL_MAX_CALLS']) - redis_period = int(settings['WIKI_API_RL_PERIOD']) - cls._limiter = RateLimiter( - resource='WikipediaAPI', - max_requests=max_calls, - expire=redis_period, - redis_pool=redis_pool - ) - return cls._limiter - - @classmethod - def request(cls, f): - def wrapper(*args, **kwargs): - limiter = cls.get_limiter() - - if limiter is not DISABLED_STATE: - """ - We use the RateLimiter since - the redis service url has been provided - """ - try: - with limiter.limit(client="Idunn"): - return f(*args, **kwargs) - except RedisError: - logger.error("Got a RedisError in {}".format(f.__name__), exc_info=True) - return None - """ - No redis service has been set, so we - bypass the "redis-based" rate limiter - """ - return f(*args, **kwargs) - return wrapper - - class CacheNotAvailable(Exception): pass @@ -107,7 +56,7 @@ def init_cache(cls): cls._expire = int(settings['WIKI_CACHE_TIMEOUT']) # seconds redis_db = settings['WIKI_CACHE_REDIS_DB'] try: - redis_pool = get_redis_pool(settings, db=redis_db) + redis_pool = get_redis_pool(db=redis_db) except RedisNotConfigured: logger.warning("No Redis URL has been set for caching", exc_info=True) cls._connection = DISABLED_STATE @@ -145,9 +94,14 @@ def with_cache(*args, **kwargs): return f(*args, **kwargs) return with_cache + @classmethod + def disable(cls): + cls._connection = DISABLED_STATE + class WikipediaSession: _session = None + _rate_limiter = None timeout = 1. # seconds API_V1_BASE_PATTERN = "https://{lang}.wikipedia.org/api/rest_v1" @@ -160,7 +114,8 @@ class Helpers: def handle_requests_error(f): def wrapped_f(*args, **kwargs): try: - return WikipediaLimiter.request(f)(*args, **kwargs) + with WikipediaSession.get_rate_limiter().limit(client='idunn') as limit: + return f(*args, **kwargs) except pybreaker.CircuitBreakerError: prometheus.exception("CircuitBreakerError") logger.error("Got CircuitBreakerError in {}".format(f.__name__), exc_info=True) @@ -173,15 +128,12 @@ def wrapped_f(*args, **kwargs): except RequestException: prometheus.exception("RequestException") logger.error("Got Request exception in {}".format(f.__name__), exc_info=True) - except TooManyRequests: + except TooManyRequestsException: prometheus.exception("TooManyRequests") logger.warning("Got TooManyRequests{}".format(f.__name__), exc_info=True) - except RedisConnectionError: - prometheus.exception("RedisConnectionError") + except RedisError: + prometheus.exception("RedisError") logger.warning("Got redis ConnectionError{}".format(f.__name__), exc_info=True) - except TimeoutError: - prometheus.exception("RedisTimeoutError") - logger.warning("Got redis TimeoutError{}".format(f.__name__), exc_info=True) return wrapped_f @property @@ -192,6 +144,16 @@ def session(self): self._session.headers.update({"User-Agent": user_agent}) return self._session + @classmethod + def get_rate_limiter(cls): + if cls._rate_limiter is None: + cls._rate_limiter = IdunnRateLimiter( + resource='WikipediaAPI', + max_requests=int(settings['WIKI_API_RL_MAX_CALLS']), + expire=int(settings['WIKI_API_RL_PERIOD']) + ) + return cls._rate_limiter + @Helpers.handle_requests_error @circuit_breaker def get_summary(self, title, lang): diff --git a/idunn/utils/default_settings.yaml b/idunn/utils/default_settings.yaml index 8d7defbaa..fd4544175 100644 --- a/idunn/utils/default_settings.yaml +++ b/idunn/utils/default_settings.yaml @@ -13,10 +13,8 @@ ES_WIKI_LANG: "de,en,es,fr,it" # the (comma separated) list of languages existin WIKI_API_RL_MAX_CALLS: 100 # Max number of external calls allowed by the rate limiter WIKI_API_RL_PERIOD: 1 # Duration (in seconds) of the period where no more than the max number of external calls are expected -WIKI_API_REDIS_URL: # URL of the Redis service used by the rate limiter of the Wikipedia API calls -WIKI_API_REDIS_DB: 0 +WIKI_API_REDIS_URL: # DEPRECATED. Use REDIS_URL instead WIKI_CACHE_REDIS_DB: 1 -WIKI_REDIS_TIMEOUT: 1 # seconds WIKI_CACHE_TIMEOUT: 86400 # seconds LOG_LEVEL_BY_MODULE: '{"": "info", "elasticsearch": "warning"}' # json config to set, for each module a log level @@ -45,11 +43,21 @@ WIKI_DESC_MAX_SIZE: 325 # max size allowed to the description of the wiki block LIST_PLACES_MAX_SIZE: 50 +######################## +## Redis +REDIS_URL: +REDIS_TIMEOUT: "0.3" # seconds + + ######################## ## Circuit Breaker CIRCUIT_BREAKER_TIMEOUT: 120 # timeout period in seconds CIRCUIT_BREAKER_MAXFAIL: 20 # consecutive failures before breaking +######################## +## Rate Limiter +RATE_LIMITER_REDIS_DB: 0 + ######################## ## Images @@ -83,6 +91,8 @@ CORS_OPTIONS_REQUESTS_ENABLED: False ####################### ## Directions +DIRECTIONS_RL_MAX_REQUESTS: 30 # per client +DIRECTIONS_RL_EXPIRE: 60 # seconds DIRECTIONS_TIMEOUT: 8 # seconds QWANT_DIRECTIONS_API_BASE_URL: MAPBOX_DIRECTIONS_API_BASE_URL: "https://api.mapbox.com/directions/v5/mapbox" diff --git a/idunn/utils/rate_limiter.py b/idunn/utils/rate_limiter.py new file mode 100644 index 000000000..329185722 --- /dev/null +++ b/idunn/utils/rate_limiter.py @@ -0,0 +1,68 @@ +import logging +from apistar.exceptions import HTTPException +from contextlib import contextmanager +from redis import RedisError +from redis_rate_limit import RateLimiter, TooManyRequests +from idunn.utils.redis import get_redis_pool, RedisNotConfigured +from idunn import settings + +logger = logging.getLogger(__name__) + +TooManyRequestsException = TooManyRequests + +@contextmanager +def dummy_limit(): + yield + +class HTTPTooManyRequests(HTTPException): + default_status_code = 429 + default_detail = 'Too Many Requests' + +class IdunnRateLimiter: + def __init__(self, resource, max_requests, expire): + try: + redis_pool = get_redis_pool(db=settings['RATE_LIMITER_REDIS_DB']) + except RedisNotConfigured: + logger.warning("Redis URL not configured: rate limiter not started") + self._limiter = None + else: + """ + If a redis is configured, + then we use the corresponding redis + service in the rate limiter. + """ + self._limiter = RateLimiter( + resource=resource, + max_requests=max_requests, + expire=expire, + redis_pool=redis_pool + ) + + def limit(self, client, ignore_redis_error=False): + if self._limiter is None: + return dummy_limit() + + @contextmanager + def limit(): + try: + with self._limiter.limit(client): + yield + except RedisError as e: + if ignore_redis_error: + logger.warning( + 'Ignoring RedisError in rate limiter for %s', + self._limiter.resource, exc_info=True + ) + yield + else: + raise + + return limit() + + def check_limit_per_client(self, request): + client_id = request.headers.get('x-client-hash') or 'default' + try: + with self.limit(client=client_id, ignore_redis_error=True): + pass + except TooManyRequestsException: + raise HTTPTooManyRequests diff --git a/idunn/utils/redis.py b/idunn/utils/redis.py index 153490a77..b4ef238f1 100644 --- a/idunn/utils/redis.py +++ b/idunn/utils/redis.py @@ -1,23 +1,30 @@ +import logging from redis import ConnectionPool, RedisError +from idunn import settings + +logger = logging.getLogger(__name__) +REDIS_TIMEOUT = float(settings['REDIS_TIMEOUT']) -REDIS_URL_SETTING = 'WIKI_API_REDIS_URL' -REDIS_TIMEOUT_SETTING = 'WIKI_REDIS_TIMEOUT' class RedisNotConfigured(RedisError): pass -def get_redis_pool(settings, db): - redis_url = settings[REDIS_URL_SETTING] - redis_timeout = int(settings[REDIS_TIMEOUT_SETTING]) +def get_redis_pool(db): + redis_url = settings['REDIS_URL'] + if redis_url is None: + # Fallback to old setting name + redis_url = settings['WIKI_API_REDIS_URL'] + if redis_url: + logger.warning('"WIKI_API_REDIS_URL" setting is deprecated. Use REDIS_URL instead') if not redis_url: - raise RedisNotConfigured('Missing redis url: %s not set' % REDIS_URL_SETTING) + raise RedisNotConfigured('Redis URL is not set') if not redis_url.startswith('redis://'): redis_url = 'redis://' + redis_url return ConnectionPool.from_url( url=redis_url, - socket_timeout=redis_timeout, + socket_timeout=REDIS_TIMEOUT, db=db ) diff --git a/tests/test_cache.py b/tests/test_cache.py index 205a51362..b18477a82 100644 --- a/tests/test_cache.py +++ b/tests/test_cache.py @@ -16,10 +16,10 @@ def cache_test_normal(redis): We define here settings specific to the test of the Wikipedia/Wikidata cache """ - settings._settings['WIKI_API_REDIS_URL'] = redis + settings._settings['REDIS_URL'] = redis WikipediaCache._connection = None yield - settings._settings['WIKI_API_REDIS_URL'] = None + settings._settings['REDIS_URL'] = None WikipediaCache._connection = None diff --git a/tests/test_rate_limiter.py b/tests/test_rate_limiter.py index 59bd3e9ae..404b56e1b 100644 --- a/tests/test_rate_limiter.py +++ b/tests/test_rate_limiter.py @@ -1,11 +1,10 @@ import responses import pytest import re -from time import sleep from freezegun import freeze_time from app import app, settings from apistar.test import TestClient -from idunn.blocks.wikipedia import WikipediaLimiter +from idunn.blocks.wikipedia import WikipediaSession, WikipediaCache from .utils import override_settings from redis import RedisError @@ -13,30 +12,36 @@ from functools import wraps @pytest.fixture(scope="function") -def limiter_test_normal(redis): +def disable_wikipedia_cache(): + WikipediaCache.disable() + yield + WikipediaCache._connection = None + +@pytest.fixture(scope="function") +def limiter_test_normal(redis, disable_wikipedia_cache): """ We define here settings specific to tests. We define low max calls limits to avoid too large number of requests made """ - with override_settings({'WIKI_API_RL_PERIOD': 5, 'WIKI_API_RL_MAX_CALLS': 6, 'WIKI_API_REDIS_URL': redis}): + with override_settings({'WIKI_API_RL_PERIOD': 5, 'WIKI_API_RL_MAX_CALLS': 6, 'REDIS_URL': redis}): # To force settings overriding we need to set to None the limiter - WikipediaLimiter._limiter = None + WikipediaSession._rate_limiter = None yield - WikipediaLimiter._limiter = None + WikipediaSession._rate_limiter = None @pytest.fixture(scope="function") -def limiter_test_interruption(redis): +def limiter_test_interruption(redis, disable_wikipedia_cache): """ In the 'Redis interruption' test below we made more requests than the limits allowed by the fixture 'limiter_test_normal' So we need another specific fixture. """ - with override_settings({'WIKI_API_RL_PERIOD': 5, 'WIKI_API_RL_MAX_CALLS': 100, 'WIKI_API_REDIS_URL': redis}): - WikipediaLimiter._limiter = None + with override_settings({'WIKI_API_RL_PERIOD': 5, 'WIKI_API_RL_MAX_CALLS': 100, 'REDIS_URL': redis}): + WikipediaSession._rate_limiter = None yield - WikipediaLimiter._limiter = None + WikipediaSession._rate_limiter = None @pytest.fixture(scope='function') def mock_wikipedia(redis): @@ -154,8 +159,8 @@ def restart_wiki_redis(docker_services): del docker_services._services['wiki_redis'] port = docker_services.port_for("wiki_redis", 6379) url = f'{docker_services.docker_ip}:{port}' - settings._settings['WIKI_API_REDIS_URL'] = url - WikipediaLimiter._limiter = None + settings._settings['REDIS_URL'] = url + WikipediaSession._rate_limiter = None def test_rate_limiter_with_redisError(limiter_test_interruption, mock_wikipedia, monkeypatch): """