diff --git a/tmd/create_taxcalc_cached_files.py b/tmd/create_taxcalc_cached_files.py index 1bc97942..8f292cbb 100644 --- a/tmd/create_taxcalc_cached_files.py +++ b/tmd/create_taxcalc_cached_files.py @@ -40,9 +40,11 @@ def create_cached_files(): varray = calc.array(vname) fpath = STORAGE_FOLDER / "output" / f"cached_{vname}.npy" np.save(fpath, varray, allow_pickle=False) + + # provide timestamp for Makefile fpath = STORAGE_FOLDER / "output" / "cached_files" with open(fpath, "w", encoding="utf-8") as cfiles: - cfiles.write(" ") # provides timestamp for Makefile + cfiles.write(" ") return 0 diff --git a/tmd/create_taxcalc_input_variables.py b/tmd/create_taxcalc_input_variables.py index fa0d1a43..18d49cab 100644 --- a/tmd/create_taxcalc_input_variables.py +++ b/tmd/create_taxcalc_input_variables.py @@ -16,6 +16,7 @@ TAXYEAR = 2021 +DUMP_ALL_UNROUNDED_VARIABLES = False def create_variable_file(write_file=True): @@ -33,10 +34,9 @@ def create_variable_file(write_file=True): vdf = create_tmd_2021() vdf.FLPDYR = TAXYEAR vdf.agi_bin = 0 - weights = vdf.s006.copy() - if write_file: - # save a copy containing both input and output variables - fname = STORAGE_FOLDER / "output" / "tmd_2021.csv" + # optionally dump all input and output variables unrounded + if write_file and DUMP_ALL_UNROUNDED_VARIABLES: + fname = STORAGE_FOLDER / "allvars_unrounded_2021.csv" print(f"Writing PUF+CPS file... [{fname}]") vdf.to_csv(fname, index=False) # streamline dataframe so that it includes only input variables @@ -52,6 +52,7 @@ def create_variable_file(write_file=True): ) vdf.drop(columns=rec.IGNORED_VARS, inplace=True) # round all float variables to nearest integer except for weights + weights = vdf.s006.copy() vdf = vdf.astype(int) vdf.s006 = weights for var in ["e00200", "e00900", "e02100"]: diff --git a/tmd/datasets/tmd.py b/tmd/datasets/tmd.py index 87037ca6..2b0cea2d 100644 --- a/tmd/datasets/tmd.py +++ b/tmd/datasets/tmd.py @@ -58,4 +58,3 @@ def create_tmd_2021(): if __name__ == "__main__": tmd = create_tmd_2021() - tmd.to_csv(STORAGE_FOLDER / "output" / "tmd_2021.csv", index=False) diff --git a/tmd/storage/output/README.md b/tmd/storage/output/README.md index 2c099bed..daa4d74c 100644 --- a/tmd/storage/output/README.md +++ b/tmd/storage/output/README.md @@ -4,15 +4,3 @@ Three national files suitable for input to Tax-Calculator: - tmd.csv.gz - tmd_weights.csv.gz - tmd_growfactors.csv - -## Warning about `tmd_2021.csv` file - -There is a special-purpose `tmd_2021.csv` file that includes 2021 -Tax-Calculator output variables and the pre-optimization weight, -`s006_original`. The weights and input variables in this file are not -rounded for Tax-Calculator input (as they are in the `tmd.csv` file), -and therefore, there has always been minor differences between the -content of `tmd_2021.csv` and `tmd.csv` files. As a result, using the -`tmd_2021.csv` file is not recommended. There are plans to remove the -`tmd_2021.csv` file in the future. -