From b1878d732e789da2359e1e21cbf7fca7667c7560 Mon Sep 17 00:00:00 2001 From: stepan_oksanichenko Date: Wed, 14 Jul 2021 20:06:30 +0300 Subject: [PATCH] - It's added validation of a mirror config - Skip a mirror if it has a broken timestamp file - Library `geoip` is replaced by `geoip2` - GeoIP DB is updated --- mirrors_update.py | 108 ++++++++++++++++++++++++++++++++++++++-------- requiremnts.txt | 7 ++- 2 files changed, 93 insertions(+), 22 deletions(-) diff --git a/mirrors_update.py b/mirrors_update.py index e7835558..3bdbcdcc 100755 --- a/mirrors_update.py +++ b/mirrors_update.py @@ -2,7 +2,6 @@ import logging import os -from copy import copy from glob import glob import dateparser @@ -12,11 +11,73 @@ from collections import defaultdict from pathlib import Path from typing import Dict, AnyStr, List, Union, Tuple -from geoip import IPInfo, open_database +from geoip2.database import Reader +from geoip2.errors import AddressNotFoundError +from geoip2.models import City import requests import yaml +from jsonschema import ValidationError, validate from urllib3.exceptions import HTTPError + +MIRROR_CONFIG_SCHEMA = { + "$schema": "http://json-schema.org/draft-04/schema#", + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "address": { + "type": "object", + "properties": { + "http": { + "type": "string" + }, + "https": { + "type": "string" + }, + "rsync": { + "type": "string" + }, + "ftp": { + "type": "string" + }, + }, + "anyOf": [ + { + "required": [ + "http", + ], + }, + { + "required": [ + "https", + ], + }, + ], + }, + "update_frequency": { + "type": "string" + }, + "sponsor": { + "type": "string" + }, + "sponsor_url": { + "type": "string" + }, + "email": { + "type": "string" + } + }, + "required": [ + "name", + "address", + "update_frequency", + "sponsor", + "sponsor_url", + ] +} + REQUIRED_MIRROR_PROTOCOLS = ( 'https', 'http', @@ -151,7 +212,16 @@ def set_repo_status( mirror_should_updated_at = dateparser.parse( f'now-{allowed_outdate} UTC' ).timestamp() - mirror_last_updated = float(request.content) + try: + mirror_last_updated = float(request.content) + except ValueError: + logger.info( + 'Mirror "%s" has broken timestamp file by url "%s"', + mirror_info['name'], + timestamp_url, + ) + mirror_info['status'] = 'expired' + return if mirror_last_updated > mirror_should_updated_at: mirror_info['status'] = 'ok' else: @@ -175,18 +245,17 @@ def get_mirrors_info( for config_path in Path(mirrors_dir).rglob('*.yml'): with open(str(config_path), 'r') as config_file: mirror_info = yaml.safe_load(config_file) - if 'name' not in mirror_info: - logger.error( - 'Mirror file "%s" doesn\'t have name of the mirror', - config_path, + try: + validate( + mirror_info, + MIRROR_CONFIG_SCHEMA, ) - continue - if 'address' not in mirror_info: + except ValidationError as err: logger.error( - 'Mirror file "%s" doesn\'t have addresses of the mirror', - mirror_info, + 'Mirror by path "%s" is not valid, because "%s"', + config_path, + err, ) - continue ALL_MIRROR_PROTOCOLS.extend( protocol for protocol in mirror_info['address'].keys() if protocol not in ALL_MIRROR_PROTOCOLS @@ -306,14 +375,17 @@ def set_mirror_country( logger.error('Can\'t get IP of mirror %s', mirror_name) mirror_info['country'] = 'Unknown' return - db = open_database(GEOPIP_DB) - match = db.lookup(ip) # type: IPInfo + db = Reader(GEOPIP_DB) logger.info('Set country for mirror "%s"', mirror_name) - if match is None: + try: + match = db.city(ip) # type: City + mirror_info['country'] = match.country.name + except AddressNotFoundError: + logger.warning( + 'GeoIP db does not have information about IP "%s"', + ip, + ) mirror_info['country'] = 'Unknown' - else: - country = match.get_info_dict()['country']['names']['en'] - mirror_info['country'] = country def generate_mirrors_table( diff --git a/requiremnts.txt b/requiremnts.txt index c0ba094b..7cbd4256 100644 --- a/requiremnts.txt +++ b/requiremnts.txt @@ -1,4 +1,3 @@ -dateparser -python-geoip-geolite2 -python-geoip -python-geoip-python3 \ No newline at end of file +dateparser==1.0.0 +geoip2==4.2.0 +jsonschema==3.2.0 \ No newline at end of file