diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 46567b15..2449f4a9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: "v0.9.1" + rev: "v0.9.2" hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] diff --git a/scripts/cligen/assign_climate_file.py b/scripts/cligen/assign_climate_file.py index 54ef2eed..72698ef1 100644 --- a/scripts/cligen/assign_climate_file.py +++ b/scripts/cligen/assign_climate_file.py @@ -2,7 +2,8 @@ import sys -from pyiem.util import get_dbconn, logger +from pyiem.database import get_dbconn +from pyiem.util import logger from pydep.util import get_cli_fname @@ -23,24 +24,26 @@ def main(argv): cursor.execute( """ SELECT st_x(st_pointn(st_transform(geom, 4326), 1)), - st_y(st_pointn(st_transform(geom, 4326), 1)), fid, climate_file - from flowpaths WHERE scenario = %s + st_y(st_pointn(st_transform(geom, 4326), 1)), fid + from flowpaths WHERE scenario = %s and climate_file_id is null """, (scenario,), ) ok = wrong = updated = 0 for row in cursor: fn = get_cli_fname(row[0], row[1], clscenario) - if row[3] == fn: - ok += 1 - continue - if row[3] is not None and row[3] != fn: - wrong += 1 cursor2.execute( - "UPDATE flowpaths SET climate_file = %s where fid = %s", - (fn, row[2]), + "update flowpaths SET " + "climate_file_id = (select id from climate_files " + "where filepath = %s and scenario = %s) WHERE fid = %s", + (fn, clscenario, row[2]), ) updated += 1 + if updated % 1000 == 0: + LOG.info("updated %s rows", updated) + cursor2.close() + pgconn.commit() + cursor2 = pgconn.cursor() LOG.info( "%s rows updated: %s ok: %s wrong: %s", cursor.rowcount, diff --git a/scripts/cligen/locate_clifile.py b/scripts/cligen/locate_clifile.py index 944abf0f..e7300ac7 100644 --- a/scripts/cligen/locate_clifile.py +++ b/scripts/cligen/locate_clifile.py @@ -2,11 +2,12 @@ import os import subprocess -import sys import click -from pyiem.database import get_dbconn +import pandas as pd +from pyiem.database import get_sqlalchemy_conn from pyiem.util import logger +from sqlalchemy import text from pydep.util import get_cli_fname @@ -34,46 +35,39 @@ def finder(lon, lat, clscenario): @click.command() -@click.option("--scenario", type=int, required=True, help="Scenario ID") +@click.option("--scenario", "-s", type=int, required=True, help="Scenario ID") def main(scenario: int): """Go Main Go.""" - pgconn = get_dbconn("idep") - cursor = pgconn.cursor() - # This given scenario may use a different climate scenario's files. - cursor.execute( - "SELECT climate_scenario from scenarios where id = %s", (scenario,) - ) - clscenario = cursor.fetchone()[0] - LOG.info("DEP scenario: %s uses clscenario: %s", scenario, clscenario) + # Load up the climate_files + with get_sqlalchemy_conn("idep") as conn: + clidf = pd.read_sql( + text(""" + select filepath, st_x(geom) as lon, st_y(geom) as lat + from climate_files WHERE scenario = :scenario + """), + conn, + params={"scenario": scenario}, + index_col=None, + ) + LOG.info("Found %s climate files", len(clidf.index)) - cursor.execute( - "SELECT climate_file, fid from flowpaths where scenario = %s", - (scenario,), - ) created = 0 - for row in cursor: - fn = row[0] - if fn is None: - LOG.error("FATAL, found null climate_file, run assign first") - return + for _, row in clidf.iterrows(): + fn = row["filepath"] if os.path.isfile(fn): continue - created += 1 - # /i/0/cli/092x041/092.30x041.22.cli - lon, lat = fn.split("/")[-1][:-4].split("x") - lon = 0 - float(lon) - lat = float(lat) - copyfn, xoff, yoff = finder(lon, lat, clscenario) + copyfn, _, _ = finder(row["lon"], row["lat"], scenario) if copyfn is None: - LOG.info("missing %s for fid: %s ", fn, row[1]) - sys.exit() - LOG.info("cp %s %s xoff: %s yoff: %s", copyfn, fn, xoff, yoff) + LOG.info("FATAL: Failed to find a file for %s", row["filepath"]) + return mydir = os.path.dirname(fn) if not os.path.isdir(mydir): os.makedirs(mydir) + LOG.info("Copying %s to %s", copyfn, fn) subprocess.call(["cp", copyfn, fn]) # Now fix the header to match its location subprocess.call(["python", "edit_cli_header.py", fn]) + created += 1 LOG.info("added %s files", created) diff --git a/scripts/import/README.md b/scripts/import/README.md index 188ec27a..9a7ea6c5 100644 --- a/scripts/import/README.md +++ b/scripts/import/README.md @@ -22,6 +22,19 @@ is one file per HUC12. 1. On IEM run `cligen/locate_clifile.py --scenario=` 1. On IEM run `util/make_dirs.py --scenario=` +Local laptop queries to support the above + + copy (select * from fields where scenario = 0) to '/tmp/fields.db'; + copy (select o.* from flowpath_ofes o, fields f where o.field_id = f.field_id and scenario = 0) to '/tmp/ofes.db'; + copy (select * from flowpaths where scenario = 0) to '/tmp/flowpaths.db'; + +and corresponding server side queries: + + delete from field_operations o USING fields f WHERE o.field_id = f.field_id and f.scenario = 0; + delete from flowpath_ofes o USING fields f WHERE o.field_id = f.field_id and f.scenario = 0; + delete from fields where scenario = 0; + delete from flowpaths where scenario = 0; + This query finds any new HUC12s and inserts the geometry into a table. insert into huc12 diff --git a/scripts/import/flowpath_importer.py b/scripts/import/flowpath_importer.py index ab924c19..ccf91d8c 100644 --- a/scripts/import/flowpath_importer.py +++ b/scripts/import/flowpath_importer.py @@ -293,7 +293,7 @@ def insert_ofe(cursor, gdf, db_fid, ofe, ofe_starts): groupid = "_".join( [ str(get_slope_class(bulk_slope * 100.0)), - str(get_kwfact_class(kwfact)), + "N" if kwfact is None else str(get_kwfact_class(kwfact)), firstpt["management"][0], firstpt["GenLU"], ] @@ -321,7 +321,7 @@ def insert_ofe(cursor, gdf, db_fid, ofe, ofe_starts): ) -def get_cli_fname_and_id(cursor, lon, lat, scenario): +def get_cli_fname_and_id(cursor, lon, lat, scenario) -> tuple[str, int]: """Get database entry or add one.""" clifn = get_cli_fname(lon, lat, scenario) cursor.execute( @@ -329,12 +329,19 @@ def get_cli_fname_and_id(cursor, lon, lat, scenario): (scenario, clifn), ) if cursor.rowcount == 0: + # Danger, we are doing lame things from the database on my laptop to + # how it syncs to the server, so we need to manually track the next + # sequence value + cursor.execute("select max(id) + 1 from climate_files") + cli_id = cursor.fetchone()[0] cursor.execute( - "INSERT into climate_files(scenario, filepath) values (%s, %s) " - "RETURNING id", - (scenario, clifn), + "INSERT into climate_files(id, scenario, filepath, geom) " + "values (%s, %s, %s, ST_Point(%s, %s, 4326))", + (cli_id, scenario, clifn, lon, lat), ) - return clifn, cursor.fetchone()[0] + else: + cli_id = cursor.fetchone()[0] + return clifn, cli_id def process_flowpath( @@ -586,7 +593,12 @@ def main(scenario, datadir, mpe: int, cl: Optional[str], cm: Optional[str]): if i > 0 and i % 100 == 0: pgconn.commit() cursor = pgconn.cursor() - fp_df, fld_df = get_data(fn) + try: + fp_df, fld_df = get_data(fn) + except Exception as exp: + logging.error(exp, exc_info=True) + print(huc12) + return # Sometimes we get no flowpaths, so skip writing those huc12 = process(cursor, scenario, fp_df, fld_df) if not fp_df.empty: diff --git a/scripts/import/package_myhucs.py b/scripts/import/package_myhucs.py index a2023561..217a2de9 100644 --- a/scripts/import/package_myhucs.py +++ b/scripts/import/package_myhucs.py @@ -21,7 +21,9 @@ def main(scenario: int): LOG.info("removed old dep.tar file") os.unlink("dep.tar") for huc in tqdm(myhucs): - cmd = f"tar -uf dep.tar {{man,slp,sol,meta}}/{huc[:8]}/{huc[8:]}" + cmd = ( + f"tar -uf dep.tar {{man,prj,rot,slp,sol,meta}}/{huc[:8]}/{huc[8:]}" + ) subprocess.call(cmd, shell=True)