-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #304 from PSLmodels/pr-initial-setup-prepare-state…
…-targets Pr initial setup prepare state targets
- Loading branch information
Showing
17 changed files
with
3,910 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
source("renv/activate.R") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
# prepare_states | ||
|
||
# ignore quarto at the root level | ||
/.quarto/ | ||
|
||
# folders to ignore (anywhere in project since not preceded by root /) | ||
.Rproj.user/ | ||
#_docs/ | ||
#_targetprep/ | ||
#_freeze/ | ||
_web/ | ||
#libs/ | ||
# Local Netlify folder | ||
.netlify | ||
|
||
# Ignore `renv` directories that are system-specific | ||
renv/library/ | ||
renv/cache/ | ||
|
||
# Track the lockfile and settings file for reproducibility | ||
!renv.lock | ||
!renv/settings.dcf | ||
|
||
# file types to ignore (unless not ignored elsewhere) | ||
~* | ||
*.csv | ||
*.html | ||
*.rds | ||
|
||
# specific files to ignore regardless of how file types are treated | ||
.Rhistory | ||
|
||
# Do not ignore anything in raw data folder, including files or folders nested within | ||
!states/raw_data/** | ||
# but continue to ignore Word temp files | ||
states/raw_data/~* | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
|
||
CDZIPURL <- "https://www.irs.gov/pub/irs-soi/congressional2021.zip" | ||
CDDOCURL <- "https://www.irs.gov/pub/irs-soi/21incddocguide.docx" | ||
|
||
CDDIR <- here::here("cds") | ||
CDRAW <- fs::path(CDDIR, "raw_data") | ||
CDINTERMEDIATE <- fs::path(CDDIR, "intermediate") | ||
CDFINAL <- fs::path(CDDIR, "final") | ||
|
||
CDDOCEXTRACT <- "cd_documentation_extracted_from_21incddocguide.docx.xlsx" | ||
|
||
TMDHOME <- fs::path(here::here(), "..", "..", "..", "..", "..") | ||
# normalizePath(TMDHOME) | ||
TMDDATA <- fs::path(TMDHOME, "tmd", "storage", "output") | ||
# normalizePath(TMDDATA) | ||
|
||
CDAGICUTS <- c(-Inf, 1, 10e3, 25e3, 50e3, 75e3, 100e3, 200e3, 500e3, Inf) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
|
||
|
||
ht <- function(df, nrecs = 6) { | ||
print(utils::head(df, nrecs)) | ||
print(utils::tail(df, nrecs)) | ||
} | ||
|
||
ns <- function(obj){ | ||
sort(names(obj)) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
# libraries --------------------------------------------------------------- | ||
|
||
library(renv) | ||
library(here) | ||
|
||
library(DT) | ||
library(fs) | ||
library(gt) | ||
library(knitr) | ||
library(readxl) | ||
library(skimr) | ||
library(stringr) | ||
library(tidyverse) | ||
# includes: dplyr, forcats, ggplot2, lubridate, purrr, stringr, tibble, tidyr | ||
|
||
tprint <- 75 # default tibble print | ||
options(tibble.print_max = tprint, tibble.print_min = tprint) # show up to tprint rows | ||
|
||
library(vroom) | ||
|
||
# census_api_key("b27cb41e46ffe3488af186dd80c64dce66bd5e87", install = TRUE) # stored in .Renviron | ||
# libraries needed for census population | ||
library(sf) | ||
library(tidycensus) | ||
library(tigris) | ||
options(tigris_use_cache = TRUE) | ||
|
||
|
||
# possible libraries ------------------------------------------------------ | ||
|
||
# library(rlang) | ||
# library(tidyverse) | ||
# tprint <- 75 # default tibble print | ||
# options(tibble.print_max = tprint, tibble.print_min = tprint) # show up to tprint rows | ||
# | ||
# library(fs) | ||
|
||
# tools | ||
# library(vroom) | ||
# library(readxl) | ||
# library(openxlsx) # for writing xlsx files | ||
# library(lubridate) | ||
# library(RColorBrewer) | ||
# library(RcppRoll) | ||
# library(fredr) | ||
# library(tidycensus) | ||
# library(googledrive) | ||
# library(arrow) | ||
# | ||
# library(jsonlite) | ||
# library(tidyjson) | ||
# | ||
# | ||
# # boyd libraries | ||
# # library(btools) | ||
# # library(bdata) | ||
# # library(bggtools) | ||
# # library(bmaps) | ||
# | ||
# # graphics | ||
# library(scales) | ||
# library(ggbeeswarm) | ||
# library(patchwork) | ||
# library(gridExtra) | ||
# library(ggrepel) | ||
# library(ggbreak) | ||
# | ||
# # tables | ||
# library(knitr) | ||
# library(kableExtra) | ||
# library(DT) | ||
# library(gt) | ||
# library(gtExtras) | ||
# library(janitor) | ||
# library(skimr) | ||
# library(vtable) | ||
# | ||
# # maps | ||
# library(maps) | ||
# # https://cran.r-project.org/web/packages/usmap/vignettes/mapping.html | ||
# library(usmap) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
project: | ||
type: book | ||
output-dir: _web | ||
|
||
# https://prerelease.quarto.org/ # quarto documentation at this link | ||
|
||
|
||
# site info: | ||
# OLD id: 4d646266-9d1f-4d69-acb4-b9a17b63a5ff | ||
# Unique deploy URL: https://671e13320a7e7cfb68b1ba7d--tmd-areas-prepare-targets.netlify.app | ||
# url: https://tmd-areas-targets-prepare.netlify.app | ||
|
||
# publishing with netlify cli: | ||
# open terminal in prepare | ||
# quarto render && netlify deploy --prod --dir=_targetprep | ||
|
||
# quarto render # inspect to be sure it is as desired | ||
# netlify deploy --prod --dir=_targetprep | ||
|
||
# or step by step | ||
# netlify deploy # to test it, give _examine as publish directory | ||
# netlify deploy --prod # to deploy, give _docs as publish directory | ||
|
||
execute: | ||
eval: true | ||
echo: true | ||
output: true | ||
freeze: false # auto: during global project renders, re-render only when source changes | ||
|
||
book: | ||
title: "Develop targets for States" | ||
subtitle: "Create csv target files for use by area targeting routines" | ||
# author: "Don Boyd" | ||
date: today | ||
date-format: long | ||
chapters: | ||
- index.qmd | ||
- part: "Usage" | ||
chapters: | ||
- usage.qmd | ||
- part: "IRS SOI State data" | ||
chapters: | ||
# - cd_download_and_clean_census_population_data.qmd | ||
- download_soi_data.qmd | ||
- explore_soi_data.qmd | ||
# - cd_construct_soi_variable_documentation.qmd | ||
# - cd_construct_long_soi_data_file.qmd | ||
# - cd_create_basefile_for_117Congress_cd_target_files.qmd | ||
# - cd_create_cd_117_118_crosswalk_and_cdbasefile_118.qmd | ||
# - cd_create_basefile_multiple_sessions.qmd | ||
# - cd_create_variable_mapping.qmd | ||
# - cd_compare_us_totals_tmd_vs_irs_published.qmd | ||
# - cd_enhance_basefile_with_special_targets.qmd | ||
# appendices: | ||
# - cd_issues_and_TODOs.qmd | ||
# - cd_IRS_documentation.qmd | ||
|
||
format: | ||
html: | ||
theme: cosmo | ||
code-fold: true | ||
|
||
editor_options: | ||
chunk_output_type: console | ||
|
||
# rendering commands | ||
# quarto render | ||
# quarto publish netlify --no-prompt --no-render --no-browser | ||
|
||
# possibly use this at start of each doc | ||
# --- | ||
# output: html_document | ||
# editor_options: | ||
# chunk_output_type: console | ||
# --- | ||
|
||
|
||
|
100 changes: 100 additions & 0 deletions
100
tmd/areas/targets/prepare/prepare_states/cd_download_and_clean_census_population_data.qmd
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
--- | ||
output: html_document | ||
editor_options: | ||
chunk_output_type: console | ||
--- | ||
|
||
# Get Congressional District Census population data | ||
|
||
## Setup | ||
|
||
```{r} | ||
#| label: setup | ||
source(here::here("R", "libraries.R")) | ||
source(here::here("R", "constants.R")) | ||
source(here::here("R", "functions.R")) | ||
# 334283385.27000004 national pop | ||
``` | ||
|
||
## Get Congressional District population | ||
|
||
The quarto chunk `cdpop-download` will, if a user sets the chunk `eval` option to `eval: true`, download and save Congressional District population data, based on the 116th Congress, from the American Community Survey (ACS) for 2021. | ||
|
||
Ordinarily this will not be necessary because previously downloaded files are included with the project in the "../cds/raw_data" folder. Thus, the default chunk option is `eval: false` and the chunk will not be run when this project is rendered. | ||
|
||
```{r} | ||
#| label: cdpop-download | ||
#| eval: false | ||
# Note that 2021 gets data from the 116th Congress and 2022 gets the 218th Congress | ||
cdpop1year <- get_acs( | ||
geography = "congressional district", | ||
variables = "B01003_001", # Total population variable | ||
year = 2021, | ||
survey = "acs1" # Consider using 5-year estimates for better coverage, especially in smaller areas | ||
) | ||
write_csv(cdpop1year, fs::path(CDRAW, "cdpop1year_acs.csv")) | ||
# Optionally get 5-year ACS data - possibly useful in the future. | ||
# cdpop5year <- get_acs( | ||
# geography = "congressional district", | ||
# variables = "B01003_001", # Total population variable | ||
# year = 2021, | ||
# survey = "acs5" # Use 5-year estimates for better coverage, especially in smaller areas | ||
# ) | ||
# write_csv(cdpop5year, fs::path(CDRAW, "cdpop5year_acs.csv")) | ||
``` | ||
|
||
## Clean Congressional District population | ||
|
||
This chunk gets previously saved Congressional District population data, does minor cleaning, and saves the cleaned file to the "../cds/intermediate" folder | ||
|
||
```{r} | ||
#| label: cdpop-clean | ||
#| output: false | ||
cdpop1year <- read_csv(fs::path(CDRAW, "cdpop1year_acs.csv")) | ||
cdpop1year |> summarise(estimate=sum(estimate)) # 335157329 | ||
# cdpop5year <- read_csv(fs::path(CDRAW, "cdpop5year_acs.csv")) | ||
# cdpop5year |> summarise(estimate=sum(estimate)) # 333036755 | ||
stcodes <- tigris::states() |> | ||
as.data.frame() |> | ||
select(STATEFIPS=STATEFP, STATE=STUSPS) | ||
cdpop1 <- cdpop1year |> | ||
mutate(STATEFIPS = str_sub(GEOID, 1, 2), | ||
CONG_DISTRICT = str_sub(GEOID, 3, 4)) |> | ||
left_join(stcodes, by = join_by(STATEFIPS)) |> | ||
filter(STATE != "PR") |> # we're not using Puerto Rico | ||
mutate(CONG_DISTRICT = ifelse(STATE == "DC", | ||
"00", # Census data has 98 for DC | ||
CONG_DISTRICT)) |> | ||
select(STATEFIPS, STATE, CONG_DISTRICT, cdname=NAME, pop2021=estimate) | ||
count(cdpop1, STATEFIPS, STATE) | ||
cdpop1 |> filter(STATE=="NY") | ||
cdpop1 |> filter(STATE=="AK") | ||
cdpop1 |> filter(STATE=="DC") | ||
cdpop1 |> filter(STATE=="WY") | ||
count(cdpop1, CONG_DISTRICT) | ||
count(cdpop1, STATEFIPS, STATE) | ||
glimpse(cdpop1) | ||
sum(cdpop1$pop2021) # 331,893,745 compared to Martin's 334,283,385 | ||
write_csv(cdpop1, fs::path(CDINTERMEDIATE, "cdpop1year.csv")) | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# Ignore everything in this directory | ||
* | ||
|
||
# Allow the .gitignore file itself | ||
!.gitignore |
Oops, something went wrong.