Skip to content

Commit

Permalink
Merge pull request #278 from PSLmodels/simplify-output
Browse files Browse the repository at this point in the history
Simplify output in order to reduce potential confusion about tmd data files
  • Loading branch information
martinholmer authored Nov 5, 2024
2 parents 15f290d + 587bf8a commit 3aafa8d
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 18 deletions.
4 changes: 3 additions & 1 deletion tmd/create_taxcalc_cached_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,11 @@ def create_cached_files():
varray = calc.array(vname)
fpath = STORAGE_FOLDER / "output" / f"cached_{vname}.npy"
np.save(fpath, varray, allow_pickle=False)

# provide timestamp for Makefile
fpath = STORAGE_FOLDER / "output" / "cached_files"
with open(fpath, "w", encoding="utf-8") as cfiles:
cfiles.write(" ") # provides timestamp for Makefile
cfiles.write(" ")

return 0

Expand Down
9 changes: 5 additions & 4 deletions tmd/create_taxcalc_input_variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@


TAXYEAR = 2021
DUMP_ALL_UNROUNDED_VARIABLES = False


def create_variable_file(write_file=True):
Expand All @@ -33,10 +34,9 @@ def create_variable_file(write_file=True):
vdf = create_tmd_2021()
vdf.FLPDYR = TAXYEAR
vdf.agi_bin = 0
weights = vdf.s006.copy()
if write_file:
# save a copy containing both input and output variables
fname = STORAGE_FOLDER / "output" / "tmd_2021.csv"
# optionally dump all input and output variables unrounded
if write_file and DUMP_ALL_UNROUNDED_VARIABLES:
fname = STORAGE_FOLDER / "allvars_unrounded_2021.csv"
print(f"Writing PUF+CPS file... [{fname}]")
vdf.to_csv(fname, index=False)
# streamline dataframe so that it includes only input variables
Expand All @@ -52,6 +52,7 @@ def create_variable_file(write_file=True):
)
vdf.drop(columns=rec.IGNORED_VARS, inplace=True)
# round all float variables to nearest integer except for weights
weights = vdf.s006.copy()
vdf = vdf.astype(int)
vdf.s006 = weights
for var in ["e00200", "e00900", "e02100"]:
Expand Down
1 change: 0 additions & 1 deletion tmd/datasets/tmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,3 @@ def create_tmd_2021():

if __name__ == "__main__":
tmd = create_tmd_2021()
tmd.to_csv(STORAGE_FOLDER / "output" / "tmd_2021.csv", index=False)
12 changes: 0 additions & 12 deletions tmd/storage/output/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,3 @@ Three national files suitable for input to Tax-Calculator:
- tmd.csv.gz
- tmd_weights.csv.gz
- tmd_growfactors.csv

## Warning about `tmd_2021.csv` file

There is a special-purpose `tmd_2021.csv` file that includes 2021
Tax-Calculator output variables and the pre-optimization weight,
`s006_original`. The weights and input variables in this file are not
rounded for Tax-Calculator input (as they are in the `tmd.csv` file),
and therefore, there has always been minor differences between the
content of `tmd_2021.csv` and `tmd.csv` files. As a result, using the
`tmd_2021.csv` file is not recommended. There are plans to remove the
`tmd_2021.csv` file in the future.

0 comments on commit 3aafa8d

Please sign in to comment.