From 968b830c8d97fee15f1f6a5364db76f5bcd0672d Mon Sep 17 00:00:00 2001 From: "martin.holmer@gmail.com" Date: Thu, 24 Oct 2024 15:50:34 -0400 Subject: [PATCH] Ensure RECID values are unique --- Makefile | 4 +++- tmd/datasets/tmd.py | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f73f7d9e..1c2d7197 100644 --- a/Makefile +++ b/Makefile @@ -39,7 +39,9 @@ tmd/storage/output/tmd_weights.csv.gz: \ tmd/storage/output/cached_files: \ tmd/storage/output/tmd.csv.gz \ tmd/storage/output/tmd_growfactors.csv \ - tmd/storage/output/tmd_weights.csv.gz + tmd/storage/output/tmd_weights.csv.gz \ + tmd/storage/__init__.py \ + tmd/create_taxcalc_cached_files.py python tmd/create_taxcalc_cached_files.py .PHONY=tmd_files diff --git a/tmd/datasets/tmd.py b/tmd/datasets/tmd.py index c7069aa1..87037ca6 100644 --- a/tmd/datasets/tmd.py +++ b/tmd/datasets/tmd.py @@ -29,6 +29,9 @@ def create_tmd_2021(): print("Combining PUF and CPS nonfilers...") combined = pd.concat([tc_puf_21, tc_cps_21], ignore_index=True) + # ensure RECID values are unique + combined["RECID"] = np.arange(1, len(combined) + 1, dtype=int) + trace1("A", combined) print("Adding Tax-Calculator outputs for 2021...")