Skip to content

Commit

Permalink
Print file hashes after fetching for all sources
Browse files Browse the repository at this point in the history
  • Loading branch information
fjahr committed Jan 27, 2024
1 parent 46045f0 commit d69be46
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 10 deletions.
16 changes: 9 additions & 7 deletions kartograf/collectors/routeviews.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import requests

from kartograf.timed import timed
from kartograf.util import calculate_sha256

# Routeviews Prefix to AS mappings Dataset for IPv4 and IPv6
# https://www.caida.org/catalog/datasets/routeviews-prefix2as/
Expand Down Expand Up @@ -56,11 +57,10 @@ def year_and_month(now):


def download(url, file):
print(f'Downloading {url}')
gz_file = file + ".gz"
print(f'Downloading from {url}')

response = requests.get(url, stream=True, timeout=300)
with open(gz_file, 'wb') as gz:
with open(file, 'wb') as gz:
for chunk in response.iter_content(chunk_size=8192):
gz.write(chunk)

Expand Down Expand Up @@ -88,11 +88,13 @@ def extract(file, context):
@timed
def fetch_routeviews_pfx2as(context):
path = context.data_dir_collectors
v4_file = f'{path}routeviews_pfx2asn_ip4.txt'
v6_file = f'{path}routeviews_pfx2asn_ip6.txt'
v4_file_gz = f'{path}routeviews_pfx2asn_ip4.txt.gz'
v6_file_gz = f'{path}routeviews_pfx2asn_ip6.txt.gz'

download(latest_link(PFX2AS_V4), v4_file)
download(latest_link(PFX2AS_V6), v6_file)
download(latest_link(PFX2AS_V4), v4_file_gz)
print(f"Downloaded {v4_file_gz}, file hash: {calculate_sha256(v4_file_gz)}")
download(latest_link(PFX2AS_V6), v6_file_gz)
print(f"Downloaded {v6_file_gz}, file hash: {calculate_sha256(v6_file_gz)}")


def extract_routeviews_pfx2as(context):
Expand Down
3 changes: 3 additions & 0 deletions kartograf/irr/fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import shutil

from kartograf.timed import timed
from kartograf.util import calculate_sha256

IRR_FILE_ADDRESSES = [
# AFRINIC
Expand Down Expand Up @@ -40,6 +41,8 @@ def fetch_irr(context):

ftp.close()

print(f"Downloaded {file_name}, file hash: {calculate_sha256(local_file_path)}")


def extract_irr(context):
for ftp_file in IRR_FILE_ADDRESSES:
Expand Down
12 changes: 9 additions & 3 deletions kartograf/rpki/fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
from tqdm import tqdm

from kartograf.timed import timed
from kartograf.util import (
calculate_sha256,
calculate_sha256_directory,
)

TAL_URLS = {
"afrinic": "http://rpki.afrinic.net/tal/afrinic.tal",
Expand All @@ -28,7 +32,7 @@ def download_rir_tals(context):
with open(tal_path, 'wb') as file:
file.write(response.content)

print(f"Downloaded TAL for {rir.upper()} to {tal_path}")
print(f"Downloaded TAL for {rir.upper()} to {tal_path}, file hash: {calculate_sha256(tal_path)}")
tals.append(tal_path)

except requests.RequestException as e:
Expand All @@ -51,12 +55,14 @@ def fetch_rpki_db(context):
# Download TALs and presist them in the RPKI data folder
download_rir_tals(context)
tal_options = [item for path in data_tals(context) for item in ('-t', path)]
print("Downloading RPKI Data")
print("Downloading RPKI Data, this may take a while.")
subprocess.run(["rpki-client",
"-d", context.data_dir_rpki_cache
] + tal_options,
capture_output=True)

print(f"Downloaded RPKI Data, hash sum: {calculate_sha256_directory(context.data_dir_rpki_cache)}")


@timed
def validate_rpki_db(context):
Expand Down Expand Up @@ -93,4 +99,4 @@ def process_file(file):
res_file.write(",".join(json_results))
res_file.write("]")

print(f"{len(json_results)} RKPI ROAs validated and saved to {result_path}")
print(f"{len(json_results)} RKPI ROAs validated and saved to {result_path}, file hash: {calculate_sha256(result_path)}")
14 changes: 14 additions & 0 deletions kartograf/util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import hashlib
import ipaddress
import os
import re
import subprocess
import time
Expand All @@ -15,6 +16,19 @@ def calculate_sha256(file_path):
return sha256_hash.hexdigest()


def calculate_sha256_directory(directory_path):
sha256_hash = hashlib.sha256()

for root, dirs, files in os.walk(directory_path):
for file in sorted(files):
file_path = os.path.join(root, file)
with open(file_path, "rb") as f:
for byte_block in iter(lambda: f.read(4096), b""):
sha256_hash.update(byte_block)

return sha256_hash.hexdigest()


def print_section_header(name):
print()
print("-" * 3 + f" {name} " + "-" * 3)
Expand Down

0 comments on commit d69be46

Please sign in to comment.