From eb6d6f2ca3ed74d5cbcc3e9d04d1f93e766cffb8 Mon Sep 17 00:00:00 2001 From: thenav56 Date: Tue, 18 Jun 2024 15:28:58 +0545 Subject: [PATCH] Update data import script to handle update logic --- api/management/commands/ingest_icrc.py | 20 ++-- .../commands/ingest_ns_directory.py | 9 +- api/management/commands/ingest_ns_document.py | 28 +++--- .../commands/ingest_ns_initiatives.py | 21 +++-- api/models.py | 2 + databank/management/commands/FDRS_INCOME.py | 17 ++-- .../management/commands/fdrs_annual_income.py | 48 +++++++--- databank/management/commands/ingest_acaps.py | 64 ++++++++----- .../management/commands/ingest_climate.py | 93 ++++++++++++------- databank/management/commands/ingest_hdr.py | 29 ++++-- databank/models.py | 2 + 11 files changed, 211 insertions(+), 122 deletions(-) diff --git a/api/management/commands/ingest_icrc.py b/api/management/commands/ingest_icrc.py index cdf029b2bd..0368b44194 100644 --- a/api/management/commands/ingest_icrc.py +++ b/api/management/commands/ingest_icrc.py @@ -60,19 +60,19 @@ def handle(self, *args, **kwargs): "Description": description, } ) + added = 0 for data in country_list: - country = Country.objects.filter(name__exact=data["Country"]) - if country.exists(): - dict_data = { - "country": country.first(), - "icrc_presence": data["ICRC presence"], - "url": data["URL"], - "key_operation": data["Key operation"], - "description": data["Description"], - } + country = Country.objects.filter(name__exact=data["Country"]).first() + if country: + country_icrc_presence, _ = CountryICRCPresence.objects.get_or_create(country=country) + + country_icrc_presence.icrc_presence = data["ICRC presence"] + country_icrc_presence.url = data["URL"] + country_icrc_presence.key_operation = data["Key operation"] + country_icrc_presence.description = data["Description"] + country_icrc_presence.save() added += 1 - CountryICRCPresence.objects.create(**dict_data) text_to_log = "%s ICRC added" % added logger.info(text_to_log) diff --git a/api/management/commands/ingest_ns_directory.py b/api/management/commands/ingest_ns_directory.py index a9157e2179..0fb3a1cd71 100644 --- a/api/management/commands/ingest_ns_directory.py +++ b/api/management/commands/ingest_ns_directory.py @@ -57,7 +57,14 @@ def handle(self, *args, **kwargs): "position": data["CON_title"], "country": country, } - CountryDirectory.objects.create(**data) + existing_qs = CountryDirectory.objects.filter( + country=country, + first_name__iexact=data["first_name"], + last_name__iexact=data["last_name"], + position__iexact=data["position"], + ) + if not existing_qs.exists(): + CountryDirectory.objects.create(**data) text_to_log = "%s Ns Directory added" % added logger.info(text_to_log) body = {"name": "ingest_ns_directory", "message": text_to_log, "num_result": added, "status": CronJobStatus.SUCCESSFUL} diff --git a/api/management/commands/ingest_ns_document.py b/api/management/commands/ingest_ns_document.py index eb2200ac44..92f5fb3c39 100644 --- a/api/management/commands/ingest_ns_document.py +++ b/api/management/commands/ingest_ns_document.py @@ -100,19 +100,21 @@ def save_documents_to_database(self, result): added = 0 for document in result: country = Country.objects.filter(fdrs=document["country_code"]).first() - if country: - added += 1 - data = { - "country": country, - "name": document["name"], - "url": document["url"], - "thumbnail": document["thumbnail"], - "document_type": document["document_type"], - "year": document["year"], - "end_year": document["end_year"], - "year_text": document["year_text"], - } - CountryKeyDocument.objects.create(**data) + if country is None: + continue + + country_key_document, _ = CountryKeyDocument.objects.get_or_create( + country=country, + url=document["url"], + ) + country_key_document.name = document["name"] + country_key_document.thumbnail = document["thumbnail"] + country_key_document.document_type = document["document_type"] + country_key_document.year = document["year"] + country_key_document.end_year = document["end_year"] + country_key_document.year_text = document["year_text"] + country_key_document.save() + added += 1 return added def sync_cron_success(self, text_to_log, added): diff --git a/api/management/commands/ingest_ns_initiatives.py b/api/management/commands/ingest_ns_initiatives.py index 09460380cf..40227e3e34 100644 --- a/api/management/commands/ingest_ns_initiatives.py +++ b/api/management/commands/ingest_ns_initiatives.py @@ -47,17 +47,18 @@ def handle(self, *args, **kwargs): # TODO: Filter not by society name country = Country.objects.filter(society_name__iexact=data[0]).first() if country: - dict_data = { - "country": country, - "title": data[3], - "fund_type": data[2], - "allocation": data[5], - "year": data[1], - "funding_period": data[6], - "categories": data[4], - } + nsd_initiatives, _ = NSDInitiatives.objects.get_or_create( + country=country, + year=data[1], + fund_type=data[2], + ) + nsd_initiatives.title = data[3] + nsd_initiatives.allocation = data[5] + nsd_initiatives.funding_period = data[6] + nsd_initiatives.categories = data[4] + nsd_initiatives.save() added += 1 - NSDInitiatives.objects.create(**dict_data) + text_to_log = "%s Ns initiatives added" % added logger.info(text_to_log) body = {"name": "ingest_ns_initiatives", "message": text_to_log, "num_result": added, "status": CronJobStatus.SUCCESSFUL} diff --git a/api/models.py b/api/models.py index 72157b6ff9..45278ac426 100644 --- a/api/models.py +++ b/api/models.py @@ -349,6 +349,7 @@ class CountryKeyDocument(models.Model): end_year = models.DateField(verbose_name=_("End Year"), null=True, blank=True) year_text = models.CharField(verbose_name=_("Year Text"), max_length=255, null=True, blank=True) + # TODO: Add unique_together country, url def __str__(self): return f"{self.country.name} - {self.name}" @@ -391,6 +392,7 @@ class NSDInitiatives(models.Model): funding_period = models.IntegerField(verbose_name=_("Funding Period in Month")) categories = ArrayField(models.CharField(max_length=255), verbose_name=_("Funding categories"), default=list, null=True) + # TODO: Add unique_together country, year, fund_type def __str__(self): return f"{self.country.name} - {self.title}" diff --git a/databank/management/commands/FDRS_INCOME.py b/databank/management/commands/FDRS_INCOME.py index 1fe5278d57..69d8a84271 100644 --- a/databank/management/commands/FDRS_INCOME.py +++ b/databank/management/commands/FDRS_INCOME.py @@ -47,13 +47,16 @@ def handle(self, *args, **kwargs): fdrs_entities = fdrs_entities.json() for d in fdrs_entities["data"]: indicator = next(iter(d.values())) + fdrs_indicator = map_indicators[fdrs_indicator_enum_data[indicator]] income_list = d["data"][0]["data"] if len(income_list): for income in income_list: - data = { - "date": str(income["year"]) + "-01-01", - "value": income["value"], - "indicator": map_indicators.get(fdrs_indicator_enum_data.get(indicator)), - "overview": overview, - } - FDRSIncome.objects.create(**data) + income_value = income["value"] + fdrs_income, _ = FDRSIncome.objects.get_or_create( + overview=overview, + indicator=fdrs_indicator, + date=str(income["year"]) + "-01-01", + ) + fdrs_income.value = income_value + # TODO: Use bulk + fdrs_income.save(update_fields=("value",)) diff --git a/databank/management/commands/fdrs_annual_income.py b/databank/management/commands/fdrs_annual_income.py index 7e7e9c41ac..fca1cf7caf 100644 --- a/databank/management/commands/fdrs_annual_income.py +++ b/databank/management/commands/fdrs_annual_income.py @@ -3,6 +3,7 @@ import requests from django.conf import settings from django.core.management.base import BaseCommand +from django.db import models from sentry_sdk.crons import monitor from databank.models import CountryOverview, FDRSAnnualIncome @@ -16,20 +17,41 @@ class Command(BaseCommand): help = "Import FDRS income data" def handle(self, *args, **kwargs): - for overview in CountryOverview.objects.all(): - country_fdrs_code = overview.country.fdrs - fdrs_api = f"https://data-api.ifrc.org/api/data?indicator=KPI_IncomeLC_CHF&KPI_Don_Code={country_fdrs_code}&apiKey={settings.FDRS_APIKEY}" # noqa: E501 - fdrs_entities = requests.get(fdrs_api) + overview_qs = CountryOverview.objects.annotate( + country_fdrd=models.F("country__fdrs"), + ) + fdrs_data_count = 0 + for overview in overview_qs.all(): + country_fdrs_code = overview.country_fdrd + fdrs_entities = requests.get( + "https://data-api.ifrc.org/api/data", + params={ + "apiKey": settings.FDRS_APIKEY, + "indicator": "KPI_IncomeLC_CHF", + "KPI_Don_Code": country_fdrs_code, + }, + ) if fdrs_entities.status_code != 200: - return + continue + fdrs_entities.raise_for_status() fdrs_entities = fdrs_entities.json() - fdrs_data_count = 0 - if len(fdrs_entities["data"]): - income_list = fdrs_entities["data"][0]["data"][0]["data"] - if len(income_list): - for income in income_list: - data = {"date": str(income["year"]) + "-01-01", "value": income["value"], "overview": overview} - fdrs_data_count += 1 - FDRSAnnualIncome.objects.get_or_create(**data) + + if len(fdrs_entities["data"]) == 0: + continue + + income_list = fdrs_entities["data"][0]["data"][0]["data"] + if len(income_list) == 0: + continue + + for income in income_list: + income_value = income["value"] + fdrs_annual_income, _ = FDRSAnnualIncome.objects.get_or_create( + overview=overview, + date=str(income["year"]) + "-01-01", + ) + fdrs_annual_income.value = income_value + fdrs_annual_income.save(update_fields=("value",)) + fdrs_data_count += 1 + logger.info(f"Successfully created {fdrs_data_count} country data") diff --git a/databank/management/commands/ingest_acaps.py b/databank/management/commands/ingest_acaps.py index a02ca1cdc8..d764f9737e 100644 --- a/databank/management/commands/ingest_acaps.py +++ b/databank/management/commands/ingest_acaps.py @@ -4,6 +4,7 @@ import requests from django.conf import settings from django.core.management.base import BaseCommand +from django.db import models, transaction from sentry_sdk.crons import monitor from api.logger import logger @@ -16,31 +17,44 @@ class Command(BaseCommand): help = "Add Acaps seasonal calender data" + @transaction.atomic + def load_country(self, overview): + # Remove all existing Seasonal Calendar data for this country + AcapsSeasonalCalender.objects.filter(overview=overview).all().delete() + + name = overview.country_name + if "," in name: + name = name.split(",")[0] + response = requests.get( + "https://api.acaps.org/api/v1/seasonal-events-calendar/seasonal-calendar/", + params={"country": name}, + headers={"Authorization": "Token %s" % settings.ACAPS_API_TOKEN}, + ) + logger.info(f"Importing for country {name}") + response_data = response.json() + if "results" in response_data and len(response_data["results"]): + df = pd.DataFrame.from_records(response_data["results"]) + for df_data in df.values.tolist(): + df_country = df_data[2] + if name.lower() == df_country[0].lower(): + dict_data = { + "overview": overview, + "month": df_data[6], + "event": df_data[7], + "event_type": df_data[8], + "label": df_data[9], + "source": df_data[11], + "source_date": df_data[12], + } + # Use bulk manager + AcapsSeasonalCalender.objects.create(**dict_data) + # NOTE: Acaps throttles our requests + time.sleep(5) + def handle(self, *args, **kwargs): logger.info("Importing Acaps Data") - country_name = CountryOverview.objects.filter(country__record_type=CountryType.COUNTRY).values_list( - "country__name", flat=True + country_overview_qs = CountryOverview.objects.filter(country__record_type=CountryType.COUNTRY).annotate( + country_name=models.F("country__name"), ) - for name in country_name: - if "," in name: - name = name.split(",")[0] - SEASONAL_EVENTS_API = f"https://api.acaps.org/api/v1/seasonal-events-calendar/seasonal-calendar/?country={name}" - response = requests.get(SEASONAL_EVENTS_API, headers={"Authorization": "Token %s" % settings.ACAPS_API_TOKEN}) - logger.info(f"Importing for country {name}") - response_data = response.json() - if "results" in response_data and len(response_data["results"]): - df = pd.DataFrame.from_records(response_data["results"]) - for df_data in df.values.tolist(): - df_country = df_data[2] - if name.lower() == df_country[0].lower(): - dict_data = { - "overview": CountryOverview.objects.filter(country__name__icontains=name).first(), - "month": df_data[6], - "event": df_data[7], - "event_type": df_data[8], - "label": df_data[9], - "source": df_data[11], - "source_date": df_data[12], - } - AcapsSeasonalCalender.objects.create(**dict_data) - time.sleep(5) + for overview in country_overview_qs: + self.load_country(overview) diff --git a/databank/management/commands/ingest_climate.py b/databank/management/commands/ingest_climate.py index 7b4580f674..3fd13cd010 100644 --- a/databank/management/commands/ingest_climate.py +++ b/databank/management/commands/ingest_climate.py @@ -2,6 +2,7 @@ import requests from django.core.management.base import BaseCommand +from django.db import models from sentry_sdk.crons import monitor from api.models import CountryType @@ -16,45 +17,67 @@ class Command(BaseCommand): help = "Add minimum, maximum and Average temperature of country temperature data from source api" def handle(self, *args, **options): - for co in CountryOverview.objects.filter(country__record_type=CountryType.COUNTRY, country__iso3__isnull=False).all(): - country_iso3 = co.country.iso3 - if country_iso3: - response = requests.get( - f"https://climateknowledgeportal.worldbank.org/api/v1/cru-x0.5_climatology_tasmin,tas,tasmax,pr_climatology_monthly_1991-2020_mean_historical_cru_ts4.07_mean/{country_iso3}?_format=json" # noqa: E501 - ) - response.raise_for_status() + overview_qs = CountryOverview.objects.filter( + country__record_type=CountryType.COUNTRY, + country__iso3__isnull=False, + ).annotate( + country_iso3=models.F("country__iso3"), + ) + + for overview in overview_qs.all(): + country_iso3 = overview.country_iso3 + if not country_iso3: + continue + + response = requests.get( + f"https://climateknowledgeportal.worldbank.org/api/v1/cru-x0.5_climatology_tasmin,tas,tasmax,pr_climatology_monthly_1991-2020_mean_historical_cru_ts4.07_mean/{country_iso3}?_format=json" # noqa: E501 + ) + try: + response.raise_for_status() response_data = response.json() data = response_data.get("data", {}) - if data: - precipation = data.get("pr", {}) - average_temp = data.get("tas", {}) - min_temp = data.get("tasmin", {}) - max_temp = data.get("tasmax", {}) - merged_data = { - country: { - date: ( - precipation[country][date], - average_temp[country][date], - min_temp[country][date], - max_temp[country][date], - ) - for date in precipation[country] - } - for country in precipation + if not data: + continue + + precipitation = data.get("pr", {}) + average_temp = data.get("tas", {}) + min_temp = data.get("tasmin", {}) + max_temp = data.get("tasmax", {}) + merged_data = { + country: { + date: ( + precipitation[country][date], + average_temp[country][date], + min_temp[country][date], + max_temp[country][date], + ) + for date in precipitation[country] } - for key, value in merged_data.items(): - for k, v in value.items(): - year_month = k.split("-") - data = { - "year": year_month[0], - "month": year_month[1], - "max_temp": v[3], - "min_temp": v[2], - "avg_temp": v[1], - "precipitation": v[0], - } - CountryKeyClimate.objects.create(overview=co, **data) + for country in precipitation + } + + for value in merged_data.values(): + for k, v in value.items(): + year_month = k.split("-") + country_key_climate, _ = CountryKeyClimate.objects.get_or_create( + overview=overview, + year=year_month[0], + month=year_month[1], + ) + country_key_climate.max_temp = v[3] + country_key_climate.min_temp = v[2] + country_key_climate.avg_temp = v[1] + country_key_climate.precipitation = v[0] + # TODO: Use bulk manager + country_key_climate.save( + update_fields=( + "max_temp", + "min_temp", + "avg_temp", + "precipitation", + ) + ) except Exception: logger.error("Error in ingesting climate data", exc_info=True) continue diff --git a/databank/management/commands/ingest_hdr.py b/databank/management/commands/ingest_hdr.py index 01b9fca82c..28ec9e6cf7 100644 --- a/databank/management/commands/ingest_hdr.py +++ b/databank/management/commands/ingest_hdr.py @@ -2,9 +2,10 @@ import requests from django.core.management.base import BaseCommand +from django.db import models from sentry_sdk.crons import monitor -from databank.models import CountryOverview as CO +from databank.models import CountryOverview from main.sentry import SentryMonitor logger = logging.getLogger(__name__) @@ -15,14 +16,26 @@ class Command(BaseCommand): help = "Add HDR GII data" def handle(self, *args, **kwargs): - for overview in CO.objects.all(): - HDR_API = f"https://api.hdrdata.org/CountryIndicators/filter?country={overview.country.iso3}&year=2021&indicator=gii" - hdr_entities = requests.get(HDR_API) + overview_qs = CountryOverview.objects.annotate( + country_iso3=models.F("country__iso3"), + ) + for overview in overview_qs.all(): + hdr_entities = requests.get( + "https://api.hdrdata.org/CountryIndicators/filter", + params={ + "country": overview.country_iso3, + "year": 2021, + "indicator": "gii", + }, + ) if hdr_entities.status_code != 200: continue hdr_entities.raise_for_status() hdr_entities = hdr_entities.json() - if len(hdr_entities): - hdr_gii = hdr_entities[0]["value"] - overview.hdr_gii = hdr_gii - overview.save(update_fields=["hdr_gii"]) + + if len(hdr_entities) == 0: + continue + + hdr_gii = hdr_entities[0]["value"] + overview.hdr_gii = hdr_gii + overview.save(update_fields=["hdr_gii"]) diff --git a/databank/models.py b/databank/models.py index 2bcd3e1cfd..19f9fdbf01 100644 --- a/databank/models.py +++ b/databank/models.py @@ -435,6 +435,7 @@ class FDRSIncome(models.Model): indicator = models.ForeignKey(FDRSIndicator, on_delete=models.CASCADE, verbose_name=_("FDRS Indicator")) value = models.FloatField(verbose_name=_("value"), null=True, blank=True) + # TODO: Add unique_together (overview, date, indicator) def __str__(self): return f"{self.overview.country.name} - {self.date} - {self.indicator.title} - {self.value}" @@ -446,6 +447,7 @@ class FDRSAnnualIncome(models.Model): date = models.DateField(verbose_name=_("date")) value = models.FloatField(verbose_name=_("value"), null=True, blank=True) + # TODO: Add unique_together (overview, date) def __str__(self): return f"{self.overview.country.name} - {self.date} - {self.value}"