Skip to content

Commit

Permalink
Merge pull request #345 from PSLmodels/examination-2022
Browse files Browse the repository at this point in the history
Add 2022 US and selected states examination results
  • Loading branch information
martinholmer authored Jan 16, 2025
2 parents 9ad5a91 + 23a6281 commit 6a1d66b
Show file tree
Hide file tree
Showing 6 changed files with 222 additions and 0 deletions.
47 changes: 47 additions & 0 deletions tmd/examination/2022/dbtab.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
-- tabulate iitax component results using only PUF records:
SELECT
round(sum(s006*e00200)*1e-9, 3), -- wage and salary (ALL)
round(sum(s006*e00600)*1e-9, 3), -- ordinary dividends
round(sum(s006*e00900)*1e-9, 3), -- Sch C net business income
round(sum(s006*e02000)*1e-9, 3), -- rent,partnership,S-Corp income (Sch E)
round(sum(s006*e02300)*1e-9, 3), -- unemployment compensation
round(sum(s006*c02500)*1e-9, 3), -- taxable social security benefits
round(sum(s006*c00100)*1e-9, 3), -- adjusted gross income (AGI)
round(sum(s006*c04800)*1e-9, 3), -- taxable income
round(sum(s006*refund)*1e-9, 3) -- refundable credits
--round(sum(s006*iitax)*1e-9, 3) -- total iitax liability (after credits)
FROM baseline
WHERE data_source = 1;
SELECT
round(sum(s006*iitax)*1e-9, 3) -- total POSITIVE iitax liability
FROM baseline
WHERE data_source = 1
AND iitax >= 0;


-- USING TC_OPTIONS="--exact --dump --dvars outvars --sqldb"
-- 2022 US targets from SOI Bulletin (Spring 2024) in $B:
-- < https://www.irs.gov/pub/irs-soi/soi-a-inpre-id2401.pdf >
-- SOI TMD (TMD/SOI-1)*100(%)
--: wage_and_salary 9648.553 9654.475 +0.1
--: ordin_dividends 420.403 420.299 -0.0
--: SchC_net_income 395.136 396.303 +0.3
--: SchE_part_Scorp 1108.445 1114.474 +0.5
--: unemploy_compen 29.554 29.909 +1.2
--: taxable_soc_sec 471.017 473.755 +0.6
--: adj_gross_income 15142.763 14851.081 -1.9
--: taxable_income 11954.522 11842.505 -0.9
--: refundable_credits 106.380 116.717 +9.7
--: itax_after_credits 2285.496 2289.792 +0.2
--
-- % awk '$1~/--:/{print 100*($4/$3-1)}' dbtab-by-component.sql
-- 0.0613771
-- -0.0247382
-- 0.295341
-- 0.543915
-- 1.20119
-- 0.581295
-- -1.92621
-- -0.937026
-- 9.71705
-- 0.187968
122 changes: 122 additions & 0 deletions tmd/examination/2022/generate_tmd_results.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# The examination/2022/generate_tmd_results.sh script.
# PREREQUISITE:
# (taxcalc-dev) tax-microdata-benchmarking% make test
# USAGE:
# (taxcalc-dev) 2022% ./generate_tmd_results.sh

# === SETUP ===
TMD=../..
cp $TMD/storage/output/tmd*csv* .
gunzip -f tmd.csv.gz
STATES="ak mn nj nm sc va"

# === WEIGHTS ===
for S in $STATES; do
echo Generating weights for $S ...
unzip -oq phase6-state-targets.zip ${S}_targets.csv
mv -f ${S}_targets.csv $TMD/areas/targets
pushd $TMD/areas > /dev/null
rm -f weights/${S}_tmd_weights.csv.gz
python create_area_weights.py $S > ${S}_local.log
mv -f ${S}_local.log ../examination/2022
mv -f weights/${S}_tmd_weights.csv.gz ../examination/2022
popd > /dev/null
awk -f log_extract.awk ${S}_local.log
done

# === RESULTS ===
cd $TMD/examination/2022
TC_OPTIONS="--exact --dump --dvars outvars --sqldb"
echo Generating results for US ...
tc tmd.csv 2022 $TC_OPTIONS | grep -v data
sqlite3 tmd-22-#-#-#.db < dbtab.sql
for S in $STATES; do
echo Generating results for $S ...
TMD_AREA=$S tc tmd.csv 2022 $TC_OPTIONS | grep -v data
sqlite3 tmd_${S}-22-#-#-#.db < dbtab.sql
done

# === CLEANUP ===
rm -f ./*tmd*csv*
rm -f ./tmd*-22-*
rm -f ./*.log
exit 0


# WE HAVE THESE RESULTS ON 2025-01-16:
#
# (taxcalc-dev) 2022% time ./generate_tmd_results.sh
# Generating weights for ak ...
# ::loop,delta,misses,exectime(secs): 1 1.000000e-09 0 401.1
# DISTRIBUTION OF TARGET ACT/EXP RATIOS (n=146):
# with REGULARIZATION_DELTA= 1.000000e-09
# low bin ratio high bin ratio bin # cum # bin % cum %
# >= 0.996000, < 1.004000: 146 146 100.00% 100.00%
# MINIMUM VALUE OF TARGET ACT/EXP RATIO = 0.998
# MAXIMUM VALUE OF TARGET ACT/EXP RATIO = 1.001
# Generating weights for mn ...
# ::loop,delta,misses,exectime(secs): 1 1.000000e-09 0 240.2
# DISTRIBUTION OF TARGET ACT/EXP RATIOS (n=147):
# with REGULARIZATION_DELTA= 1.000000e-09
# low bin ratio high bin ratio bin # cum # bin % cum %
# >= 0.996000, < 1.004000: 147 147 100.00% 100.00%
# MINIMUM VALUE OF TARGET ACT/EXP RATIO = 0.999
# MAXIMUM VALUE OF TARGET ACT/EXP RATIO = 1.001
# Generating weights for nj ...
# ::loop,delta,misses,exectime(secs): 1 1.000000e-09 0 180.3
# DISTRIBUTION OF TARGET ACT/EXP RATIOS (n=147):
# with REGULARIZATION_DELTA= 1.000000e-09
# low bin ratio high bin ratio bin # cum # bin % cum %
# >= 0.996000, < 1.004000: 147 147 100.00% 100.00%
# MINIMUM VALUE OF TARGET ACT/EXP RATIO = 0.999
# MAXIMUM VALUE OF TARGET ACT/EXP RATIO = 1.000
# Generating weights for nm ...
# ::loop,delta,misses,exectime(secs): 1 1.000000e-09 0 168.4
# DISTRIBUTION OF TARGET ACT/EXP RATIOS (n=147):
# with REGULARIZATION_DELTA= 1.000000e-09
# low bin ratio high bin ratio bin # cum # bin % cum %
# >= 0.996000, < 1.004000: 147 147 100.00% 100.00%
# MINIMUM VALUE OF TARGET ACT/EXP RATIO = 1.000
# MAXIMUM VALUE OF TARGET ACT/EXP RATIO = 1.001
# Generating weights for sc ...
# ::loop,delta,misses,exectime(secs): 1 1.000000e-09 0 231.2
# DISTRIBUTION OF TARGET ACT/EXP RATIOS (n=147):
# with REGULARIZATION_DELTA= 1.000000e-09
# low bin ratio high bin ratio bin # cum # bin % cum %
# >= 0.996000, < 1.004000: 147 147 100.00% 100.00%
# MINIMUM VALUE OF TARGET ACT/EXP RATIO = 0.999
# MAXIMUM VALUE OF TARGET ACT/EXP RATIO = 1.000
# Generating weights for va ...
# ::loop,delta,misses,exectime(secs): 1 1.000000e-09 2 456.7
# DISTRIBUTION OF TARGET ACT/EXP RATIOS (n=147):
# with REGULARIZATION_DELTA= 1.000000e-09
# low bin ratio high bin ratio bin # cum # bin % cum %
# >= 0.900000, < 0.990000: 1 1 0.68% 0.68%
# >= 0.990000, < 0.996000: 0 1 0.00% 0.68%
# >= 0.996000, < 1.004000: 145 146 98.64% 99.32%
# >= 1.004000, < 1.010000: 0 146 0.00% 99.32%
# >= 1.010000, < 1.100000: 1 147 0.68% 100.00%
# MINIMUM VALUE OF TARGET ACT/EXP RATIO = 0.948
# MAXIMUM VALUE OF TARGET ACT/EXP RATIO = 1.050
# Generating results for US ...
# 9654.475|420.299|396.303|1114.474|29.909|473.755|14851.081|11842.505|116.717
# 2289.792
# Generating results for ak ...
# 19.843|0.554|0.607|1.911|0.109|0.906|29.261|22.975|0.217
# 3.796
# Generating results for mn ...
# 179.963|6.321|6.362|20.908|0.71|9.72|267.728|212.373|1.392
# 38.722
# Generating results for nj ...
# 346.77|16.023|10.934|32.693|1.049|16.227|506.593|415.249|2.642
# 82.935
# Generating results for nm ...
# 41.203|1.354|2.04|3.184|0.199|2.932|63.316|46.637|0.947
# 7.416
# Generating results for sc ...
# 119.387|3.966|4.309|14.67|0.429|8.407|185.522|141.508|2.133
# 24.706
# Generating results for va ...
# 276.779|10.304|8.814|28.721|0.767|13.378|408.918|327.695|2.597
# 61.405
# ./generate_tmd_results.sh 6396.15s user 93.15s system 310% cpu 34:52.81 total
6 changes: 6 additions & 0 deletions tmd/examination/2022/log_extract.awk
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
$1~/::loop/{scan=1; print $0; next}
$0~/final delta loop iterations/{print $0; next}
$1~/message:/{print $0; next}
scan==1 && $0~/DISTRIBUTION OF TARGET/{prnt=1}
scan==1 && $0~/AREA-OPTIMIZED/{prnt=0}
prnt==1{print $0}
12 changes: 12 additions & 0 deletions tmd/examination/2022/outvars
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
e00200
e00600
e00900
e02000
e02300
c02500
c00100
c04800
refund
iitax
s006
data_source
Binary file added tmd/examination/2022/phase6-state-targets.zip
Binary file not shown.
35 changes: 35 additions & 0 deletions tmd/examination/2022/results.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
Phase 6 National and Selected State Data Examination Results
============================================================

**RESULTS AS OF 2025-01-16**

In the table below, Phase 6 US and selected state TMD estimates for
2022 are compared with IRS/SOI tabulations of 2022 sampled income tax
returns. See the Sources section below the table for details on the
TMD and IRS/SOI estimates.

<br>

**CY2022 Total Income Tax Liability** ($ billion)<br>
| Area | TMD | SOI | PctDiff |
| ---: | ---: | ---: | ---: |
| US | 2289.792 | 2250.798 | +1.7 |
| | | | |
| AK | 3.796 | 4.114 | -7.7 |
| MN | 38.722 | 37.221 | +4.0 |
| NJ | 82.935 | 84.148 | -1.4 |
| NM | 7.416 | 7.840 | -5.4 |
| SC | 24.706 | 25.518 | -3.2 |
| VA | 61.405 | 61.516 | -0.2 |

<br>

**Sources**:

The 2022 TMD estimates are generated by the
[`generate_tmd_results.sh`](./generate_tmd_results.sh) script.

The SOI estimates are taken directly from the "All returns" "Total tax
liability: Amount" cell (row 157, column B) in the [Tax Year 2022:
Historic Table 2 (SOI Bulletin) XSLX
files](https://www.irs.gov/statistics/soi-tax-stats-historic-table-2).

0 comments on commit 6a1d66b

Please sign in to comment.