Skip to content

Commit

Permalink
Merge pull request #348 from PSLmodels/precision
Browse files Browse the repository at this point in the history
Add tmd/examination/2022/precision.sh script and associated files
  • Loading branch information
martinholmer authored Jan 17, 2025
2 parents d4a67f9 + f507418 commit 0c0ee44
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 0 deletions.
67 changes: 67 additions & 0 deletions tmd/examination/2022/bootstrap_sampling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""
Generate sample precision estimates using bootstrap sampling methods.
"""

import sys
import numpy as np
import pandas as pd

USAGE = "USAGE: python bootstrap_sampling.py tc_dump_output_file_name\n"

BS_SAMPLES = 1000
BS_RNSEED = 192837465
BS_DUMP = False
BS_CI = True


def bootstrap_sampling(outfile):
"""
High-level logic of the script.
"""
# read odf from outfile
odf = pd.read_csv(outfile)
print("len(odf) =", len(odf))
fdf = odf[(odf["data_source"] == 1) & (odf["iitax"] > 0)]
print("len(fdf) =", len(fdf))
print(f"FILE wght (#M) = {fdf['s006'].sum() * 1e-6:.3f}")
print(f"FILE itax ($B) = {(fdf['s006'] * fdf['iitax']).sum() * 1e-9:.3f}")

# compute sum of wght and wght*itax for each bootstrap sample
xdf = pd.DataFrame({"wght": fdf["s006"], "itax": fdf["iitax"]})
rng = np.random.default_rng(seed=BS_RNSEED)
wght_list = []
itax_list = []
for bss in range(1, BS_SAMPLES + 1):
bsdf = xdf.sample(n=len(xdf), replace=True, random_state=rng)
wght_value = bsdf["wght"].sum() * 1e-6
itax_value = (bsdf["wght"] * bsdf["itax"]).sum() * 1e-9
if BS_DUMP:
print(f"{itax_value:9.3f} {wght_value:7.3f} {bss:4d}")
wght_list.append(wght_value)
itax_list.append(itax_value)
wght = np.sort(np.array(wght_list))
itax = np.sort(np.array(itax_list))
print(
f"BS:wght num,mean,stdev = {BS_SAMPLES:4d} "
f"{wght.mean():9.3f} {wght.std():7.3f}"
)
if BS_CI and BS_SAMPLES == 1000:
print(f"BS:wght median = {wght[499]:9.3f}")
print(f"BS:wght 95%_conf_intv = {wght[24]:9.3f} , {wght[974]:9.3f}")
print(
f"BS:itax num,mean,stdev = {BS_SAMPLES:4d} "
f"{itax.mean():9.3f} {itax.std():7.3f}"
)
if BS_CI and BS_SAMPLES == 1000:
print(f"BS:itax median = {itax[499]:9.3f}")
print(f"BS:itax 95%_conf_intv = {itax[24]:9.3f} , {itax[974]:9.3f}")

return 0


if __name__ == "__main__":
if len(sys.argv) - 1 != 1:
sys.stderr.write("ERROR: one command-line argument not specified\n")
sys.stderr.write(USAGE)
sys.exit(1)
sys.exit(bootstrap_sampling(sys.argv[1]))
43 changes: 43 additions & 0 deletions tmd/examination/2022/precision.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# The examination/2022/precision.sh script using bootstapping methods to
# estimate the sampling precision of the weighted positive iitax estimate.
# PREREQUISITE:
# (taxcalc-dev) tax-microdata-benchmarking% make test
# USAGE:
# (taxcalc-dev) 2022% ./precision.sh

# === SETUP ===
TMD=../..
cp $TMD/storage/output/tmd*csv* .
gunzip -f tmd.csv.gz
STATES="ak mn nj nm sc va"

# === WEIGHTS ===
for S in $STATES; do
echo Generating weights for $S ...
unzip -oq phase6-state-targets.zip ${S}_targets.csv
mv -f ${S}_targets.csv $TMD/areas/targets
pushd $TMD/areas > /dev/null
rm -f weights/${S}_tmd_weights.csv.gz
python create_area_weights.py $S > ${S}_local.log
mv -f ${S}_local.log ../examination/2022
mv -f weights/${S}_tmd_weights.csv.gz ../examination/2022
popd > /dev/null
done

# === RESULTS ===
cd $TMD/examination/2022
TC_OPTIONS="--exact --dump --dvars precvars"
echo Generating results for US ...
tc tmd.csv 2022 $TC_OPTIONS | grep -v data
python bootstrap_sampling.py tmd-22-#-#-#.csv
for S in $STATES; do
echo Generating results for $S ...
TMD_AREA=$S tc tmd.csv 2022 $TC_OPTIONS | grep -v data
python bootstrap_sampling.py tmd_${S}-22-#-#-#.csv
done

# === CLEANUP ===
rm -f ./*tmd*csv*
rm -f ./tmd*-22-*
rm -f ./*.log
exit 0
3 changes: 3 additions & 0 deletions tmd/examination/2022/precvars
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
iitax
s006
data_source

0 comments on commit 0c0ee44

Please sign in to comment.