diff --git a/README.md b/README.md index 90bfac51..28a8bffd 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,45 @@ # 🌐 Blue-GEO -🌐 `blue-geo` is wip πŸ”₯ +πŸ”· [ukraine-timemap](#ukraine-timemap-) πŸ‡ΊπŸ‡¦ πŸ”· + +--- + +## ukraine-timemap πŸ‡ΊπŸ‡¦ + +[`ukraine-timemap`](./notebooks_and_scripts/.abcli/ukraine-timemap/) ingests the [Civilian Harm in Ukraine TimeMap](https://github.com/bellingcat/ukraine-timemap) dataset, available through [this UI](https://ukraine.bellingcat.com/) and [this API](https://bellingcat-embeds.ams3.cdn.digitaloceanspaces.com/production/ukr/timemap/api.json), and generates a `geojson`, a QGIS project, and more. + +```bash + > ukraine_timemap help +ukraine_timemap browse \ + [dataset|github] + . browse ukraine-timemap. +ukraine_timemap ingest \ + [dryrun,~upload] \ + [-|] \ + [--verbose 1] + . ingest the latest dataset from https://github.com/bellingcat/ukraine-timemap. +``` + +example use, + +``` +@select ukraine-timemap-$(@@timestamp) +ukraine_timemap ingest - . --verbose 1 +@open . QGIS +@publish tar . +``` + +![image](https://github.com/kamangir/assets/blob/main/nbs/ukraine-timemap/ingest_log.png?raw=true) + +latest ingested object: [ukraine-timemap.tar.gz](https://kamangir-public.s3.ca-central-1.amazonaws.com/ukraine_timemap.tar.gz), sandbox: [ukraine-timemap/sandbox.ipynb](./notebooks/ukraine-timemap/sandbox.ipynb). + +last build [πŸ”—](https://kamangir-public.s3.ca-central-1.amazonaws.com/ukraine_timemap/ukraine_timemap.png) + +![image](https://kamangir-public.s3.ca-central-1.amazonaws.com/ukraine_timemap/ukraine_timemap.png) + +![image](https://github.com/kamangir/assets/blob/main/nbs/ukraine-timemap/QGIS.png?raw=true) + +more: https://arash-kamangir.medium.com/%EF%B8%8F-openai-experiments-93-bf0cee062693 --- diff --git a/blue_geo/.abcli/aka.sh b/blue_geo/.abcli/aka.sh index e5ca515c..4af8b912 100644 --- a/blue_geo/.abcli/aka.sh +++ b/blue_geo/.abcli/aka.sh @@ -1 +1,5 @@ #! /usr/bin/env bash + +function ukraine() { + ukraine_timemap "$@" +} diff --git a/blue_geo/.abcli/tests/ukraine_timemap.sh b/blue_geo/.abcli/tests/ukraine_timemap.sh new file mode 100644 index 00000000..5fd25630 --- /dev/null +++ b/blue_geo/.abcli/tests/ukraine_timemap.sh @@ -0,0 +1,25 @@ +#! /usr/bin/env bash + +function test_blue_geo_ukraine_timemap() { + local options=$1 + local do_dryrun=$(abcli_option_int "$options" dryrun 0) + local do_upload=$(abcli_option_int "$options" upload 0) + + local object_name=ukraine-timemap-$(abcli_string_timestamp_short) + + abcli_eval dryrun=$do_dryrun \ + ukraine_timemap ingest \ + upload=$do_upload,$2 \ + $object_name \ + "${@:3}" + + abcli_publish \ + as=ukraine_timemap,tar \ + $object_name + + abcli_publish \ + as=ukraine_timemap,suffix=.png \ + $object_name + + return 0 +} diff --git a/blue_geo/.abcli/ukraine-timemap.sh b/blue_geo/.abcli/ukraine-timemap.sh new file mode 100644 index 00000000..42a058a2 --- /dev/null +++ b/blue_geo/.abcli/ukraine-timemap.sh @@ -0,0 +1,27 @@ +#! /usr/bin/env bash + +function ukraine_timemap() { + local task=$(abcli_unpack_keyword $1 help) + + if [ "$task" == "help" ]; then + ukraine_timemap_browse "$@" + ukraine_timemap_ingest "$@" + return + fi + + if [[ "$task" == "init" ]]; then + notebooks_and_scripts "$@" + return + fi + + local function_name=ukraine_timemap_$task + if [[ $(type -t $function_name) == "function" ]]; then + $function_name "${@:2}" + return + fi + + abcli_log_error "-ukraine_timemap: $task: command not found." + return 1 +} + +abcli_source_path - caller,suffix=/ukraine-timemap diff --git a/blue_geo/.abcli/ukraine-timemap/browse.sh b/blue_geo/.abcli/ukraine-timemap/browse.sh new file mode 100644 index 00000000..a51bba60 --- /dev/null +++ b/blue_geo/.abcli/ukraine-timemap/browse.sh @@ -0,0 +1,24 @@ +#! /usr/bin/env bash + +function ukraine_timemap_browse() { + local options=$1 + + if [ $(abcli_option_int "$options" help 0) == 1 ]; then + options="dataset|github" + abcli_show_usage "ukraine_timemap browse$ABCUL[$options]" \ + "browse ukraine-timemap." + return + fi + + local do_dataset=$(abcli_option_int "$options" dataset 0) + local do_github=$(abcli_option_int "$options" github 0) + + local url="https://ukraine.bellingcat.com/" + [[ "$do_github" == 1 ]] && + url="https://github.com/bellingcat/ukraine-timemap" + [[ "$do_dataset" == 1 ]] && + url="https://bellingcat-embeds.ams3.cdn.digitaloceanspaces.com/production/ukr/timemap/api.json" + + abcli_browse $url + +} diff --git a/blue_geo/.abcli/ukraine-timemap/ingest.sh b/blue_geo/.abcli/ukraine-timemap/ingest.sh new file mode 100644 index 00000000..24241f2c --- /dev/null +++ b/blue_geo/.abcli/ukraine-timemap/ingest.sh @@ -0,0 +1,49 @@ +#! /usr/bin/env bash + +function ukraine_timemap_ingest() { + local options=$1 + + if [ $(abcli_option_int "$options" help 0) == 1 ]; then + options="${EOP}~copy_template,dryrun,~upload$EOPE" + local open_options="open$EOP,~QGIS$EOPE" + abcli_show_usage "ukraine_timemap ingest$ABCUL$options$ABCUL-$EOP|$EOPE$ABCUL$open_options" \ + "ingest the latest dataset from https://github.com/bellingcat/ukraine-timemap." + return + fi + + local do_dryrun=$(abcli_option_int "$options" dryrun 0) + local do_copy_template=$(abcli_option_int "$options" copy_template 1) + local do_upload=$(abcli_option_int "$options" upload $(abcli_not $do_dryrun)) + + local object_name=$(abcli_clarify_object $2 ukraine-timemap-$(abcli_string_timestamp_short)) + + if [[ "$do_copy_template" == 1 ]]; then + abcli_clone \ + $UKRAINE_TIMEMAP_TEMPLATE \ + $object_name \ + ~meta + rm -v \ + $abcli_object_root/$object_name/ukraine_timemap.* + fi + + abcli_eval dryrun=$do_dryrun \ + python3 -m blue_geo.ukraine_timemap \ + ingest \ + --object_name $object_name \ + "${@:4}" + + abcli_tag set \ + $object_name \ + ukraine_timemap_ingest + + [[ "$do_upload" == 1 ]] && + abcli_upload - $object_name + + local open_options=$3 + local do_open=$(abcli_option_int "$open_options" open 0) + [[ "$do_open" == 1 ]] && + abcli_open $object_name \ + QGIS,$open_options + + return 0 +} diff --git a/blue_geo/__init__.py b/blue_geo/__init__.py index 2404fbce..66a69078 100644 --- a/blue_geo/__init__.py +++ b/blue_geo/__init__.py @@ -4,6 +4,6 @@ DESCRIPTION = f"{ICON} AI for precise geospatial data analysis and visualization." -VERSION = "4.7.1" +VERSION = "4.8.1" REPO_NAME = "blue-geo" diff --git a/blue_geo/node/__init__.py b/blue_geo/node/__init__.py deleted file mode 100644 index ebce3bc1..00000000 --- a/blue_geo/node/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from blue_geo import NAME - -NAME = f"{NAME}.node" - - diff --git a/blue_geo/node/functions.py b/blue_geo/node/functions.py deleted file mode 100644 index 7cd4befe..00000000 --- a/blue_geo/node/functions.py +++ /dev/null @@ -1,8 +0,0 @@ -from blue_geo.logger import logger - - -def func(arg: str) -> bool: - logger.info(f"arg:{arg}") - return True - - diff --git a/blue_geo/tests/test_ukraine_timemap_ingest.py b/blue_geo/tests/test_ukraine_timemap_ingest.py new file mode 100644 index 00000000..8790a8dc --- /dev/null +++ b/blue_geo/tests/test_ukraine_timemap_ingest.py @@ -0,0 +1,11 @@ +from abcli.modules import objects +from blue_geo.ukraine_timemap.functions import ingest + + +def test_ukraine_timemap_ingest(): + object_name = objects.unique_object() + + success, gdf = ingest(object_name) + assert success + + assert not gdf.empty diff --git a/blue_geo/ukraine_timemap/__init__.py b/blue_geo/ukraine_timemap/__init__.py new file mode 100644 index 00000000..e5f2fa70 --- /dev/null +++ b/blue_geo/ukraine_timemap/__init__.py @@ -0,0 +1,3 @@ +from blue_geo import NAME + +NAME = f"{NAME}.ukraine_timemap" diff --git a/blue_geo/node/__main__.py b/blue_geo/ukraine_timemap/__main__.py similarity index 62% rename from blue_geo/node/__main__.py rename to blue_geo/ukraine_timemap/__main__.py index 9ab7a538..94506637 100644 --- a/blue_geo/node/__main__.py +++ b/blue_geo/ukraine_timemap/__main__.py @@ -1,7 +1,7 @@ import argparse from blue_geo import VERSION -from blue_geo.node import NAME -from blue_geo.node.functions import func +from blue_geo.ukraine_timemap import NAME +from blue_geo.ukraine_timemap.functions import ingest from blue_geo.logger import logger from blueness.argparse.generic import sys_exit @@ -9,22 +9,18 @@ parser.add_argument( "task", type=str, - help="task", + help="ingest", ) parser.add_argument( - "--arg", - type=bool, - default=0, - help="0|1", + "--object_name", + type=str, ) args = parser.parse_args() success = False -if args.task == "task": - success = func(args.arg) +if args.task == "ingest": + success, _ = ingest(object_name=args.object_name) else: success = None sys_exit(logger, NAME, args.task, success) - - diff --git a/blue_geo/ukraine_timemap/functions.py b/blue_geo/ukraine_timemap/functions.py new file mode 100644 index 00000000..dea7bee4 --- /dev/null +++ b/blue_geo/ukraine_timemap/functions.py @@ -0,0 +1,155 @@ +from abcli import file, fullname, string +from datetime import datetime +from collections import Counter +from typing import Tuple +from abcli.modules import objects +import geopandas as gpd +from geojson import Point +from blue_geo import VERSION +from blue_geo.ukraine_timemap import NAME +from blue_geo.logger import logger +import matplotlib.pyplot as plt +from typing import Dict +from typing import Any + +API_URL = "https://bellingcat-embeds.ams3.cdn.digitaloceanspaces.com/production/ukr/timemap/api.json" + +DESCRIPTION = "Civilian Harm in Ukraine TimeMap" + + +def ingest( + object_name: str, + do_save: bool = True, + do_visualize: bool = True, + log: bool = True, +) -> Tuple[bool, gpd.GeoDataFrame]: + logger.info(f"{NAME}.ingest -> {object_name}") + filename = objects.path_of( + "ukraine_timemap.json", + object_name, + create=True, + ) + + gdf = gpd.GeoDataFrame() + metadata: Dict[str, Any] = { + "description": DESCRIPTION, + "created_by": f"{NAME}-{VERSION}.{fullname()}", + "creation_date": string.pretty_date(), + } + + success = file.download(API_URL, filename) + if not success: + return success, gdf + + success, list_of_events = file.load_json(filename) + if not success: + return success, gdf + logger.info("{:,} event(s) ingested from the api.".format(len(list_of_events))) + metadata["api_count"] = len(list_of_events) + + records = [] + failure_count = 0 + for event in list_of_events: + try: + point = Point( + ( + float(event["longitude"]), + float(event["latitude"]), + ) + ) + record = { + "geometry": point, + "sources": ", ".join(event["sources"]), + "id": event["id"], + "description": event["description"], + "date": event["date"], + "date_obj": datetime.strptime(event["date"], "%m/%d/%Y").date(), + "location": event["location"], + "graphic": event["graphic"], + "associations": ", ".join(event["associations"]), + "time": event["time"], + } + except Exception as e: + logger.error(f"ingest failed:\nevent: {event}\nerror: {e}") + failure_count += 1 + continue + + records.append(record) + gdf = gpd.GeoDataFrame(records) + + gdf.set_crs(epsg=4326, inplace=True) # WGS 84 + + gdf = gdf.sort_values(by="date_obj", ascending=False) + + logger.info("{:,} event(s) -ingested-> gdf.".format(len(gdf))) + metadata["ingested_count"] = len(gdf) + if failure_count: + logger.error(f"{failure_count:,} event(s) failed to ingest.") + metadata["failure_count"] = failure_count + + histogram = Counter(list(gdf["date_obj"].values)) + + dates = sorted(histogram.keys()) + logger.info( + "{:,} day(s) of events, starting {}, until {}.".format( + len(dates), + min(dates), + max(dates), + ) + ) + metadata["range"] = [min(dates), max(dates)] + + if do_visualize: + values = [histogram[date] for date in dates] + + plt.figure(figsize=(10, 5)) + plt.bar(dates, values, color="blue") + plt.xlabel( + " | ".join( + [ + "Date", + object_name, + f"{NAME}-{VERSION}.{fullname()}", + ] + ) + ) + plt.ylabel("# Events / Day") + plt.title(DESCRIPTION) + + date_count = 20 + if len(dates) > date_count: + selected_dates = [ + dates[i] for i in range(0, len(dates), len(dates) // date_count) + ] + if dates[-1] not in selected_dates: + selected_dates.append(dates[-1]) + else: + selected_dates = dates + plt.xticks(selected_dates, rotation=45) + + plt.tight_layout() + plt.grid(True) + + if do_save: + file.save_fig( + objects.path_of("ukraine_timemap.png", object_name), + log=log, + ) + + gdf["date"] = gdf["date_obj"].apply(lambda d: f"{d.year}/{d.month:02}/{d.day:02}") + + gdf = gdf.drop(columns=["date_obj"]) + + if do_save and not gdf.empty: + if not file.save_geojson( + objects.path_of("ukraine_timemap.geojson", object_name), + gdf, + log=log, + ) or not file.save_yaml( + objects.path_of("metadata.yaml", object_name), + metadata, + log=log, + ): + return False, gdf + + return True, gdf