Skip to content

Commit

Permalink
- It's added validation of a mirror config
Browse files Browse the repository at this point in the history
- Skip a mirror if it has a broken timestamp file
- Library `geoip` is replaced by `geoip2`
- GeoIP DB is updated
  • Loading branch information
stepan_oksanichenko committed Jul 14, 2021
1 parent 6809f7c commit b1878d7
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 22 deletions.
108 changes: 90 additions & 18 deletions mirrors_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import logging
import os
from copy import copy
from glob import glob

import dateparser
Expand All @@ -12,11 +11,73 @@
from collections import defaultdict
from pathlib import Path
from typing import Dict, AnyStr, List, Union, Tuple
from geoip import IPInfo, open_database
from geoip2.database import Reader
from geoip2.errors import AddressNotFoundError
from geoip2.models import City
import requests
import yaml
from jsonschema import ValidationError, validate
from urllib3.exceptions import HTTPError


MIRROR_CONFIG_SCHEMA = {
"$schema": "http://json-schema.org/draft-04/schema#",
"type": "object",
"properties": {
"name": {
"type": "string"
},
"address": {
"type": "object",
"properties": {
"http": {
"type": "string"
},
"https": {
"type": "string"
},
"rsync": {
"type": "string"
},
"ftp": {
"type": "string"
},
},
"anyOf": [
{
"required": [
"http",
],
},
{
"required": [
"https",
],
},
],
},
"update_frequency": {
"type": "string"
},
"sponsor": {
"type": "string"
},
"sponsor_url": {
"type": "string"
},
"email": {
"type": "string"
}
},
"required": [
"name",
"address",
"update_frequency",
"sponsor",
"sponsor_url",
]
}

REQUIRED_MIRROR_PROTOCOLS = (
'https',
'http',
Expand Down Expand Up @@ -151,7 +212,16 @@ def set_repo_status(
mirror_should_updated_at = dateparser.parse(
f'now-{allowed_outdate} UTC'
).timestamp()
mirror_last_updated = float(request.content)
try:
mirror_last_updated = float(request.content)
except ValueError:
logger.info(
'Mirror "%s" has broken timestamp file by url "%s"',
mirror_info['name'],
timestamp_url,
)
mirror_info['status'] = 'expired'
return
if mirror_last_updated > mirror_should_updated_at:
mirror_info['status'] = 'ok'
else:
Expand All @@ -175,18 +245,17 @@ def get_mirrors_info(
for config_path in Path(mirrors_dir).rglob('*.yml'):
with open(str(config_path), 'r') as config_file:
mirror_info = yaml.safe_load(config_file)
if 'name' not in mirror_info:
logger.error(
'Mirror file "%s" doesn\'t have name of the mirror',
config_path,
try:
validate(
mirror_info,
MIRROR_CONFIG_SCHEMA,
)
continue
if 'address' not in mirror_info:
except ValidationError as err:
logger.error(
'Mirror file "%s" doesn\'t have addresses of the mirror',
mirror_info,
'Mirror by path "%s" is not valid, because "%s"',
config_path,
err,
)
continue
ALL_MIRROR_PROTOCOLS.extend(
protocol for protocol in mirror_info['address'].keys() if
protocol not in ALL_MIRROR_PROTOCOLS
Expand Down Expand Up @@ -306,14 +375,17 @@ def set_mirror_country(
logger.error('Can\'t get IP of mirror %s', mirror_name)
mirror_info['country'] = 'Unknown'
return
db = open_database(GEOPIP_DB)
match = db.lookup(ip) # type: IPInfo
db = Reader(GEOPIP_DB)
logger.info('Set country for mirror "%s"', mirror_name)
if match is None:
try:
match = db.city(ip) # type: City
mirror_info['country'] = match.country.name
except AddressNotFoundError:
logger.warning(
'GeoIP db does not have information about IP "%s"',
ip,
)
mirror_info['country'] = 'Unknown'
else:
country = match.get_info_dict()['country']['names']['en']
mirror_info['country'] = country


def generate_mirrors_table(
Expand Down
7 changes: 3 additions & 4 deletions requiremnts.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
dateparser
python-geoip-geolite2
python-geoip
python-geoip-python3
dateparser==1.0.0
geoip2==4.2.0
jsonschema==3.2.0

0 comments on commit b1878d7

Please sign in to comment.