Skip to content
This repository has been archived by the owner on Jun 3, 2024. It is now read-only.

Commit

Permalink
Merge pull request #92 from QwantResearch/directions-rate-limiter
Browse files Browse the repository at this point in the history
Refactor rate limiter and define limits on directions endpoint
  • Loading branch information
amatissart authored Sep 25, 2019
2 parents 113d053 + 0b242b5 commit 95d2e10
Show file tree
Hide file tree
Showing 8 changed files with 153 additions and 91 deletions.
2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ services:
command: "--workers=4"
environment:
- IDUNN_MIMIR_ES=
- IDUNN_WIKI_API_REDIS_URL=idunn-redis:6379
- IDUNN_REDIS_URL=redis://idunn-redis:6379
- IDUNN_LOG_JSON=1

idunn-redis:
Expand Down
18 changes: 14 additions & 4 deletions idunn/api/directions.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,19 @@
from apistar.http import QueryParams
from apistar.http import QueryParams, Request
from apistar.exceptions import BadRequest

from idunn import settings
from idunn.utils.rate_limiter import IdunnRateLimiter
from ..directions.client import directions_client

rate_limiter = IdunnRateLimiter(
resource='idunn.api.directions',
max_requests=int(settings['DIRECTIONS_RL_MAX_REQUESTS']),
expire=int(settings['DIRECTIONS_RL_EXPIRE'])
)

def get_directions(f_lon, f_lat, t_lon, t_lat, params: QueryParams, request: Request):
rate_limiter.check_limit_per_client(request)

def get_directions(f_lon, f_lat, t_lon, t_lat, params: QueryParams):
from_position = (f_lon, f_lat)
to_position = (t_lon, t_lat)
params_dict = dict(params)
Expand All @@ -13,6 +23,6 @@ def get_directions(f_lon, f_lat, t_lon, t_lat, params: QueryParams):
if not mode:
raise BadRequest('"type" query param is required')

return directions_client.get_directions(from_position, to_position,
mode=mode, lang=lang
return directions_client.get_directions(
from_position, to_position, mode=mode, lang=lang
)
86 changes: 24 additions & 62 deletions idunn/blocks/wikipedia.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,79 +4,28 @@
import pybreaker
from apistar import validators
from requests.exceptions import HTTPError, RequestException, Timeout
from redis import Redis, ConnectionError as RedisConnectionError, TimeoutError, RedisError
from redis_rate_limit import RateLimiter, TooManyRequests
from redis import Redis, RedisError

from idunn import settings
from idunn.utils import prometheus
from idunn.utils.redis import get_redis_pool, RedisNotConfigured
from idunn.utils.circuit_breaker import IdunnCircuitBreaker
from idunn.utils.rate_limiter import IdunnRateLimiter, TooManyRequestsException
from .base import BaseBlock


GET_WIKI_INFO = "get_wiki_info"
GET_TITLE_IN_LANGUAGE = "get_title_in_language"
GET_SUMMARY = "get_summary"

DISABLED_STATE = object() # Used to flag rate-limiter or cache as disabled by settings
DISABLED_STATE = object() # Used to flag cache as disabled by settings

logger = logging.getLogger(__name__)


class WikiUndefinedException(Exception):
pass

class WikipediaLimiter:
_limiter = None

@classmethod
def get_limiter(cls):
if cls._limiter is None:
try:
redis_pool = get_redis_pool(settings, db=settings['WIKI_API_REDIS_DB'])
except RedisNotConfigured:
logger.warning("Redis URL not configured: rate limiter not started")
cls._limiter = DISABLED_STATE
else:
"""
If a redis is configured,
then we use the corresponding redis
service in the rate limiter.
"""
max_calls = int(settings['WIKI_API_RL_MAX_CALLS'])
redis_period = int(settings['WIKI_API_RL_PERIOD'])
cls._limiter = RateLimiter(
resource='WikipediaAPI',
max_requests=max_calls,
expire=redis_period,
redis_pool=redis_pool
)
return cls._limiter

@classmethod
def request(cls, f):
def wrapper(*args, **kwargs):
limiter = cls.get_limiter()

if limiter is not DISABLED_STATE:
"""
We use the RateLimiter since
the redis service url has been provided
"""
try:
with limiter.limit(client="Idunn"):
return f(*args, **kwargs)
except RedisError:
logger.error("Got a RedisError in {}".format(f.__name__), exc_info=True)
return None
"""
No redis service has been set, so we
bypass the "redis-based" rate limiter
"""
return f(*args, **kwargs)
return wrapper


class CacheNotAvailable(Exception):
pass

Expand Down Expand Up @@ -107,7 +56,7 @@ def init_cache(cls):
cls._expire = int(settings['WIKI_CACHE_TIMEOUT']) # seconds
redis_db = settings['WIKI_CACHE_REDIS_DB']
try:
redis_pool = get_redis_pool(settings, db=redis_db)
redis_pool = get_redis_pool(db=redis_db)
except RedisNotConfigured:
logger.warning("No Redis URL has been set for caching", exc_info=True)
cls._connection = DISABLED_STATE
Expand Down Expand Up @@ -145,9 +94,14 @@ def with_cache(*args, **kwargs):
return f(*args, **kwargs)
return with_cache

@classmethod
def disable(cls):
cls._connection = DISABLED_STATE


class WikipediaSession:
_session = None
_rate_limiter = None
timeout = 1. # seconds

API_V1_BASE_PATTERN = "https://{lang}.wikipedia.org/api/rest_v1"
Expand All @@ -160,7 +114,8 @@ class Helpers:
def handle_requests_error(f):
def wrapped_f(*args, **kwargs):
try:
return WikipediaLimiter.request(f)(*args, **kwargs)
with WikipediaSession.get_rate_limiter().limit(client='idunn') as limit:
return f(*args, **kwargs)
except pybreaker.CircuitBreakerError:
prometheus.exception("CircuitBreakerError")
logger.error("Got CircuitBreakerError in {}".format(f.__name__), exc_info=True)
Expand All @@ -173,15 +128,12 @@ def wrapped_f(*args, **kwargs):
except RequestException:
prometheus.exception("RequestException")
logger.error("Got Request exception in {}".format(f.__name__), exc_info=True)
except TooManyRequests:
except TooManyRequestsException:
prometheus.exception("TooManyRequests")
logger.warning("Got TooManyRequests{}".format(f.__name__), exc_info=True)
except RedisConnectionError:
prometheus.exception("RedisConnectionError")
except RedisError:
prometheus.exception("RedisError")
logger.warning("Got redis ConnectionError{}".format(f.__name__), exc_info=True)
except TimeoutError:
prometheus.exception("RedisTimeoutError")
logger.warning("Got redis TimeoutError{}".format(f.__name__), exc_info=True)
return wrapped_f

@property
Expand All @@ -192,6 +144,16 @@ def session(self):
self._session.headers.update({"User-Agent": user_agent})
return self._session

@classmethod
def get_rate_limiter(cls):
if cls._rate_limiter is None:
cls._rate_limiter = IdunnRateLimiter(
resource='WikipediaAPI',
max_requests=int(settings['WIKI_API_RL_MAX_CALLS']),
expire=int(settings['WIKI_API_RL_PERIOD'])
)
return cls._rate_limiter

@Helpers.handle_requests_error
@circuit_breaker
def get_summary(self, title, lang):
Expand Down
16 changes: 13 additions & 3 deletions idunn/utils/default_settings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,8 @@ ES_WIKI_LANG: "de,en,es,fr,it" # the (comma separated) list of languages existin

WIKI_API_RL_MAX_CALLS: 100 # Max number of external calls allowed by the rate limiter
WIKI_API_RL_PERIOD: 1 # Duration (in seconds) of the period where no more than the max number of external calls are expected
WIKI_API_REDIS_URL: # URL of the Redis service used by the rate limiter of the Wikipedia API calls
WIKI_API_REDIS_DB: 0
WIKI_API_REDIS_URL: # DEPRECATED. Use REDIS_URL instead
WIKI_CACHE_REDIS_DB: 1
WIKI_REDIS_TIMEOUT: 1 # seconds
WIKI_CACHE_TIMEOUT: 86400 # seconds

LOG_LEVEL_BY_MODULE: '{"": "info", "elasticsearch": "warning"}' # json config to set, for each module a log level
Expand Down Expand Up @@ -45,11 +43,21 @@ WIKI_DESC_MAX_SIZE: 325 # max size allowed to the description of the wiki block

LIST_PLACES_MAX_SIZE: 50

########################
## Redis
REDIS_URL:
REDIS_TIMEOUT: "0.3" # seconds


########################
## Circuit Breaker
CIRCUIT_BREAKER_TIMEOUT: 120 # timeout period in seconds
CIRCUIT_BREAKER_MAXFAIL: 20 # consecutive failures before breaking

########################
## Rate Limiter
RATE_LIMITER_REDIS_DB: 0


########################
## Images
Expand Down Expand Up @@ -83,6 +91,8 @@ CORS_OPTIONS_REQUESTS_ENABLED: False

#######################
## Directions
DIRECTIONS_RL_MAX_REQUESTS: 30 # per client
DIRECTIONS_RL_EXPIRE: 60 # seconds
DIRECTIONS_TIMEOUT: 8 # seconds
QWANT_DIRECTIONS_API_BASE_URL:
MAPBOX_DIRECTIONS_API_BASE_URL: "https://api.mapbox.com/directions/v5/mapbox"
Expand Down
68 changes: 68 additions & 0 deletions idunn/utils/rate_limiter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import logging
from apistar.exceptions import HTTPException
from contextlib import contextmanager
from redis import RedisError
from redis_rate_limit import RateLimiter, TooManyRequests
from idunn.utils.redis import get_redis_pool, RedisNotConfigured
from idunn import settings

logger = logging.getLogger(__name__)

TooManyRequestsException = TooManyRequests

@contextmanager
def dummy_limit():
yield

class HTTPTooManyRequests(HTTPException):
default_status_code = 429
default_detail = 'Too Many Requests'

class IdunnRateLimiter:
def __init__(self, resource, max_requests, expire):
try:
redis_pool = get_redis_pool(db=settings['RATE_LIMITER_REDIS_DB'])
except RedisNotConfigured:
logger.warning("Redis URL not configured: rate limiter not started")
self._limiter = None
else:
"""
If a redis is configured,
then we use the corresponding redis
service in the rate limiter.
"""
self._limiter = RateLimiter(
resource=resource,
max_requests=max_requests,
expire=expire,
redis_pool=redis_pool
)

def limit(self, client, ignore_redis_error=False):
if self._limiter is None:
return dummy_limit()

@contextmanager
def limit():
try:
with self._limiter.limit(client):
yield
except RedisError as e:
if ignore_redis_error:
logger.warning(
'Ignoring RedisError in rate limiter for %s',
self._limiter.resource, exc_info=True
)
yield
else:
raise

return limit()

def check_limit_per_client(self, request):
client_id = request.headers.get('x-client-hash') or 'default'
try:
with self.limit(client=client_id, ignore_redis_error=True):
pass
except TooManyRequestsException:
raise HTTPTooManyRequests
21 changes: 14 additions & 7 deletions idunn/utils/redis.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,30 @@
import logging
from redis import ConnectionPool, RedisError
from idunn import settings

logger = logging.getLogger(__name__)
REDIS_TIMEOUT = float(settings['REDIS_TIMEOUT'])

REDIS_URL_SETTING = 'WIKI_API_REDIS_URL'
REDIS_TIMEOUT_SETTING = 'WIKI_REDIS_TIMEOUT'

class RedisNotConfigured(RedisError):
pass

def get_redis_pool(settings, db):
redis_url = settings[REDIS_URL_SETTING]
redis_timeout = int(settings[REDIS_TIMEOUT_SETTING])
def get_redis_pool(db):
redis_url = settings['REDIS_URL']
if redis_url is None:
# Fallback to old setting name
redis_url = settings['WIKI_API_REDIS_URL']
if redis_url:
logger.warning('"WIKI_API_REDIS_URL" setting is deprecated. Use REDIS_URL instead')

if not redis_url:
raise RedisNotConfigured('Missing redis url: %s not set' % REDIS_URL_SETTING)
raise RedisNotConfigured('Redis URL is not set')

if not redis_url.startswith('redis://'):
redis_url = 'redis://' + redis_url

return ConnectionPool.from_url(
url=redis_url,
socket_timeout=redis_timeout,
socket_timeout=REDIS_TIMEOUT,
db=db
)
4 changes: 2 additions & 2 deletions tests/test_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ def cache_test_normal(redis):
We define here settings specific to the
test of the Wikipedia/Wikidata cache
"""
settings._settings['WIKI_API_REDIS_URL'] = redis
settings._settings['REDIS_URL'] = redis
WikipediaCache._connection = None
yield
settings._settings['WIKI_API_REDIS_URL'] = None
settings._settings['REDIS_URL'] = None
WikipediaCache._connection = None


Expand Down
Loading

0 comments on commit 95d2e10

Please sign in to comment.