diff --git a/kartograf/collectors/routeviews.py b/kartograf/collectors/routeviews.py index c3458a6..b341600 100644 --- a/kartograf/collectors/routeviews.py +++ b/kartograf/collectors/routeviews.py @@ -7,6 +7,7 @@ import requests from kartograf.timed import timed +from kartograf.util import calculate_sha256 # Routeviews Prefix to AS mappings Dataset for IPv4 and IPv6 # https://www.caida.org/catalog/datasets/routeviews-prefix2as/ @@ -56,11 +57,10 @@ def year_and_month(now): def download(url, file): - print(f'Downloading {url}') - gz_file = file + ".gz" + print(f'Downloading from {url}') response = requests.get(url, stream=True, timeout=300) - with open(gz_file, 'wb') as gz: + with open(file, 'wb') as gz: for chunk in response.iter_content(chunk_size=8192): gz.write(chunk) @@ -88,11 +88,13 @@ def extract(file, context): @timed def fetch_routeviews_pfx2as(context): path = context.data_dir_collectors - v4_file = f'{path}routeviews_pfx2asn_ip4.txt' - v6_file = f'{path}routeviews_pfx2asn_ip6.txt' + v4_file_gz = f'{path}routeviews_pfx2asn_ip4.txt.gz' + v6_file_gz = f'{path}routeviews_pfx2asn_ip6.txt.gz' - download(latest_link(PFX2AS_V4), v4_file) - download(latest_link(PFX2AS_V6), v6_file) + download(latest_link(PFX2AS_V4), v4_file_gz) + print(f"Downloaded {v4_file_gz}, file hash: {calculate_sha256(v4_file_gz)}") + download(latest_link(PFX2AS_V6), v6_file_gz) + print(f"Downloaded {v6_file_gz}, file hash: {calculate_sha256(v6_file_gz)}") def extract_routeviews_pfx2as(context): diff --git a/kartograf/irr/fetch.py b/kartograf/irr/fetch.py index 9742b31..752c024 100644 --- a/kartograf/irr/fetch.py +++ b/kartograf/irr/fetch.py @@ -3,6 +3,7 @@ import shutil from kartograf.timed import timed +from kartograf.util import calculate_sha256 IRR_FILE_ADDRESSES = [ # AFRINIC @@ -40,6 +41,8 @@ def fetch_irr(context): ftp.close() + print(f"Downloaded {file_name}, file hash: {calculate_sha256(local_file_path)}") + def extract_irr(context): for ftp_file in IRR_FILE_ADDRESSES: diff --git a/kartograf/rpki/fetch.py b/kartograf/rpki/fetch.py index da61cd4..84127ce 100644 --- a/kartograf/rpki/fetch.py +++ b/kartograf/rpki/fetch.py @@ -6,6 +6,10 @@ from tqdm import tqdm from kartograf.timed import timed +from kartograf.util import ( + calculate_sha256, + calculate_sha256_directory, +) TAL_URLS = { "afrinic": "http://rpki.afrinic.net/tal/afrinic.tal", @@ -28,7 +32,7 @@ def download_rir_tals(context): with open(tal_path, 'wb') as file: file.write(response.content) - print(f"Downloaded TAL for {rir.upper()} to {tal_path}") + print(f"Downloaded TAL for {rir.upper()} to {tal_path}, file hash: {calculate_sha256(tal_path)}") tals.append(tal_path) except requests.RequestException as e: @@ -51,12 +55,14 @@ def fetch_rpki_db(context): # Download TALs and presist them in the RPKI data folder download_rir_tals(context) tal_options = [item for path in data_tals(context) for item in ('-t', path)] - print("Downloading RPKI Data") + print("Downloading RPKI Data, this may take a while.") subprocess.run(["rpki-client", "-d", context.data_dir_rpki_cache ] + tal_options, capture_output=True) + print(f"Downloaded RPKI Data, hash sum: {calculate_sha256_directory(context.data_dir_rpki_cache)}") + @timed def validate_rpki_db(context): @@ -93,4 +99,4 @@ def process_file(file): res_file.write(",".join(json_results)) res_file.write("]") - print(f"{len(json_results)} RKPI ROAs validated and saved to {result_path}") + print(f"{len(json_results)} RKPI ROAs validated and saved to {result_path}, file hash: {calculate_sha256(result_path)}") diff --git a/kartograf/util.py b/kartograf/util.py index 6dc8e38..d1c3457 100644 --- a/kartograf/util.py +++ b/kartograf/util.py @@ -1,5 +1,6 @@ import hashlib import ipaddress +import os import re import subprocess import time @@ -15,6 +16,19 @@ def calculate_sha256(file_path): return sha256_hash.hexdigest() +def calculate_sha256_directory(directory_path): + sha256_hash = hashlib.sha256() + + for root, dirs, files in os.walk(directory_path): + for file in sorted(files): + file_path = os.path.join(root, file) + with open(file_path, "rb") as f: + for byte_block in iter(lambda: f.read(4096), b""): + sha256_hash.update(byte_block) + + return sha256_hash.hexdigest() + + def print_section_header(name): print() print("-" * 3 + f" {name} " + "-" * 3)