From 3ee7f87f583e7687b6d4c3be0b91aa9253e591b8 Mon Sep 17 00:00:00 2001 From: Abi Hunter Date: Fri, 2 Feb 2024 15:45:26 -0500 Subject: [PATCH 1/2] add test for dedup config validation --- pdgstaging/ConfigManager.py | 14 +++++++++++++- tests/test_deduplicator.py | 16 ++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 tests/test_deduplicator.py diff --git a/pdgstaging/ConfigManager.py b/pdgstaging/ConfigManager.py index 212c59f..d0bbfb7 100644 --- a/pdgstaging/ConfigManager.py +++ b/pdgstaging/ConfigManager.py @@ -1471,4 +1471,16 @@ def color_list_from_cmaps(cmap_name): pal_len = 10 if cmap.N > 10 else cmap.N rgb_vals = (cmap.discrete(pal_len).colors * 255).astype(int).tolist() rgb_hex = [f'#{i:02x}{j:02x}{k:02x}' for i, j, k in rgb_vals] - return rgb_hex \ No newline at end of file + return rgb_hex + + +def validate_dedup_rules(gdf, config): + gdf_attrs = gdf.attrs + allowed_comp_operators = ['smaller', 'larger'] + for rule in config: + if rule[0] not in gdf_attrs.keys(): + raise ValueError( + f'invalid deduplication rule: property {rule[0]} does not exist in dataset') + if rule[1] not in allowed_comp_operators: + raise ValueError( + f'invalid deduplication rule: second parameter must be one of {allowed_comp_operators}') diff --git a/tests/test_deduplicator.py b/tests/test_deduplicator.py new file mode 100644 index 0000000..9159578 --- /dev/null +++ b/tests/test_deduplicator.py @@ -0,0 +1,16 @@ +import geopandas as gpd +import shapely +from pdgstaging.ConfigManager import ConfigManager + +def test_validate_dedup_rules(): + tests = { + "valid_rule1": { + "data": gpd.DataFrame({ + "col1": ["a", "b"], + "geometry": [shapely.Point(1, 1), shapely.Point(1,2)] + }), + "rules": [["col1", "smaller"]] + } + } + for test in tests: + ConfigManager.validate_dedup_rules(test["data"], test["rules"]) From 0c42b6f2fd381b63e75828b6658a66b91e9074df Mon Sep 17 00:00:00 2001 From: Abi Hunter Date: Mon, 5 Feb 2024 10:51:58 -0500 Subject: [PATCH 2/2] add dedup_rule validation and test for same --- pdgstaging/ConfigManager.py | 23 +++++++++++++---------- tests/test_deduplicator.py | 5 +++-- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/pdgstaging/ConfigManager.py b/pdgstaging/ConfigManager.py index d0bbfb7..b78172f 100644 --- a/pdgstaging/ConfigManager.py +++ b/pdgstaging/ConfigManager.py @@ -1433,6 +1433,19 @@ def list_updates(self): return updates + @staticmethod + def validate_dedup_rules(gdf, config): + gdf_cols = gdf.columns + print("cols: " + str(gdf_cols)) + allowed_comp_operators = ['smaller', 'larger'] + for rule in config: + if rule[0] not in gdf_cols: + raise ValueError( + f'invalid deduplication rule: property {rule[0]} does not exist in dataset') + if rule[1] not in allowed_comp_operators: + raise ValueError( + f'invalid deduplication rule: second parameter must be one of {allowed_comp_operators}') + @staticmethod def validate_palette(palette): if isinstance(palette, list): @@ -1474,13 +1487,3 @@ def color_list_from_cmaps(cmap_name): return rgb_hex -def validate_dedup_rules(gdf, config): - gdf_attrs = gdf.attrs - allowed_comp_operators = ['smaller', 'larger'] - for rule in config: - if rule[0] not in gdf_attrs.keys(): - raise ValueError( - f'invalid deduplication rule: property {rule[0]} does not exist in dataset') - if rule[1] not in allowed_comp_operators: - raise ValueError( - f'invalid deduplication rule: second parameter must be one of {allowed_comp_operators}') diff --git a/tests/test_deduplicator.py b/tests/test_deduplicator.py index 9159578..d931621 100644 --- a/tests/test_deduplicator.py +++ b/tests/test_deduplicator.py @@ -2,15 +2,16 @@ import shapely from pdgstaging.ConfigManager import ConfigManager + def test_validate_dedup_rules(): tests = { "valid_rule1": { - "data": gpd.DataFrame({ + "data": gpd.GeoDataFrame({ "col1": ["a", "b"], "geometry": [shapely.Point(1, 1), shapely.Point(1,2)] }), "rules": [["col1", "smaller"]] } } - for test in tests: + for test in tests.values(): ConfigManager.validate_dedup_rules(test["data"], test["rules"])