Skip to content

Commit

Permalink
Add data-raw files
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewallenbruce committed Nov 22, 2024
1 parent f82e244 commit e2408aa
Show file tree
Hide file tree
Showing 71 changed files with 54,833 additions and 0 deletions.
815 changes: 815 additions & 0 deletions data-raw/clean/csvs/nucc_taxonomy_100

Large diffs are not rendered by default.

818 changes: 818 additions & 0 deletions data-raw/clean/csvs/nucc_taxonomy_101

Large diffs are not rendered by default.

821 changes: 821 additions & 0 deletions data-raw/clean/csvs/nucc_taxonomy_110

Large diffs are not rendered by default.

824 changes: 824 additions & 0 deletions data-raw/clean/csvs/nucc_taxonomy_111

Large diffs are not rendered by default.

827 changes: 827 additions & 0 deletions data-raw/clean/csvs/nucc_taxonomy_120

Large diffs are not rendered by default.

829 changes: 829 additions & 0 deletions data-raw/clean/csvs/nucc_taxonomy_121

Large diffs are not rendered by default.

830 changes: 830 additions & 0 deletions data-raw/clean/csvs/nucc_taxonomy_130

Large diffs are not rendered by default.

830 changes: 830 additions & 0 deletions data-raw/clean/csvs/nucc_taxonomy_131

Large diffs are not rendered by default.

831 changes: 831 additions & 0 deletions data-raw/clean/csvs/nucc_taxonomy_140

Large diffs are not rendered by default.

833 changes: 833 additions & 0 deletions data-raw/clean/csvs/nucc_taxonomy_141

Large diffs are not rendered by default.

834 changes: 834 additions & 0 deletions data-raw/clean/csvs/nucc_taxonomy_150

Large diffs are not rendered by default.

838 changes: 838 additions & 0 deletions data-raw/clean/csvs/nucc_taxonomy_151

Large diffs are not rendered by default.

845 changes: 845 additions & 0 deletions data-raw/clean/csvs/nucc_taxonomy_160

Large diffs are not rendered by default.

848 changes: 848 additions & 0 deletions data-raw/clean/csvs/nucc_taxonomy_161

Large diffs are not rendered by default.

851 changes: 851 additions & 0 deletions data-raw/clean/csvs/nucc_taxonomy_170

Large diffs are not rendered by default.

853 changes: 853 additions & 0 deletions data-raw/clean/csvs/nucc_taxonomy_171

Large diffs are not rendered by default.

854 changes: 854 additions & 0 deletions data-raw/clean/csvs/nucc_taxonomy_180

Large diffs are not rendered by default.

855 changes: 855 additions & 0 deletions data-raw/clean/csvs/nucc_taxonomy_181

Large diffs are not rendered by default.

856 changes: 856 additions & 0 deletions data-raw/clean/csvs/nucc_taxonomy_190

Large diffs are not rendered by default.

862 changes: 862 additions & 0 deletions data-raw/clean/csvs/nucc_taxonomy_191

Large diffs are not rendered by default.

862 changes: 862 additions & 0 deletions data-raw/clean/csvs/nucc_taxonomy_200

Large diffs are not rendered by default.

862 changes: 862 additions & 0 deletions data-raw/clean/csvs/nucc_taxonomy_201

Large diffs are not rendered by default.

865 changes: 865 additions & 0 deletions data-raw/clean/csvs/nucc_taxonomy_210

Large diffs are not rendered by default.

866 changes: 866 additions & 0 deletions data-raw/clean/csvs/nucc_taxonomy_211

Large diffs are not rendered by default.

868 changes: 868 additions & 0 deletions data-raw/clean/csvs/nucc_taxonomy_220

Large diffs are not rendered by default.

868 changes: 868 additions & 0 deletions data-raw/clean/csvs/nucc_taxonomy_221

Large diffs are not rendered by default.

873 changes: 873 additions & 0 deletions data-raw/clean/csvs/nucc_taxonomy_230

Large diffs are not rendered by default.

874 changes: 874 additions & 0 deletions data-raw/clean/csvs/nucc_taxonomy_231

Large diffs are not rendered by default.

874 changes: 874 additions & 0 deletions data-raw/clean/csvs/nucc_taxonomy_240

Large diffs are not rendered by default.

874 changes: 874 additions & 0 deletions data-raw/clean/csvs/nucc_taxonomy_241

Large diffs are not rendered by default.

810 changes: 810 additions & 0 deletions data-raw/clean/csvs/nucc_taxonomy_90

Large diffs are not rendered by default.

814 changes: 814 additions & 0 deletions data-raw/clean/csvs/nucc_taxonomy_91

Large diffs are not rendered by default.

85 changes: 85 additions & 0 deletions data-raw/nucc_files.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# "https://www.nucc.org/index.php/code-sets-mainmenu-41/provider-taxonomy-mainmenu-40/csv-mainmenu-57"

urls <- paste0("https://www.nucc.org", paste0("https://www.nucc.org",
rvest::session("https://www.nucc.org") |>
rvest::session_follow_link("Code Sets") |>
rvest::session_follow_link("Taxonomy") |>
rvest::session_follow_link("CSV") |>
rvest::html_elements("a") |>
rvest::html_attr("href") |>
stringr::str_subset("taxonomy") |>
stringr::str_subset("csv")) |>
rvest::read_html() |>
rvest::html_elements("a") |>
rvest::html_attr("href") |>
stringr::str_subset("nucc_taxonomy"))

infotable <- readLines(paste0(here::here(), "/posts/taxonomy/data/nucc_csv_titles_dates.txt")) |>
stringr::str_split("/CSV", simplify = TRUE) |>
as.data.frame() |>
dplyr::select(V2) |>
dplyr::mutate(
V2 = stringr::str_remove(V2, '/') |>
stringr::str_remove('"') |>
stringr::str_replace('>', " ") |>
stringr::str_remove("</a></li>") |>
stringr::str_remove(",")) |>
tidyr::separate_wider_regex(V2,
c(filename = "nucc_taxonomy_[0-9]{2,3}.csv",
' Version ',
version = "[0-9]{1,2}[.][0-9]{1}",
" ",
release_date = "[0-9][/][0-9][/][0-9]{2}")) |>
dplyr::mutate(release_date = readr::parse_date(release_date, format = "%m/%d/%y"),
file_url = urls)

fs::dir_create(glue::glue("{here::here()}/posts/taxonomy/data/csvs"))

curl::multi_download(
urls = infotable$file_url,
destfile = glue::glue("{here::here()}/posts/taxonomy/data/csvs/{infotable$filename}"),
resume = TRUE)

###################################################################

infotable <- readr::read_csv(glue::glue("{here::here()}/posts/taxonomy/data/infotable.csv"), show_col_types = FALSE, col_types = "ccDc")

clean_cols <- \(x) fuimus::remove_quotes(stringr::str_squish(dplyr::na_if(x, "")))

selcols <- c("version", "release_date", "code", "type" = "grouping", "grouping", "classification", "specialization", "definition", "notes")

notes_regs <- c("http[s]?:" = "", "//" = "", "<br/>" = " ", "<br><br>" = " ",
# "([0-9]{1,2})//([0-9]{1,2})//([0-9]{4})" = "",
"" = "")

nucc_paths <- fs::dir_info(glue::glue("{here::here()}/posts/taxonomy/data/csvs"))$path

parse_nucc_csvs <- function(path) {

suppressWarnings(
readr::read_csv(
file = path,
id = "filename",
show_col_types = FALSE,
col_types = "c",
name_repair = janitor::make_clean_names)
) |>
dplyr::slice(-1) |>
dplyr::mutate(
filename = basename(filename),
dplyr::across(dplyr::everything(), clean_cols),
notes = stringr::str_replace_all(notes, notes_regs) |> stringr::str_squish()) |>
dplyr::left_join(infotable, by = "filename") |>
dplyr::select(
dplyr::any_of(selcols),
dplyr::everything(),
-c(filename, file_url)) |>
dplyr::arrange(code) |>
readr::write_csv(
file = glue::glue("{here::here()}/posts/taxonomy/data/cleaned/{tools::file_path_sans_ext(basename(path))}"),
num_threads = 4L)

}

purrr::walk(nucc_paths, parse_nucc_csvs)

10 changes: 10 additions & 0 deletions data-raw/nucc_pins.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
source(here::here("data-raw", "pins_internal.R"))

# -- Data wrangling code here -- #

pin_update(
dataset,
name = "object_name",
title = "Short Description",
description = "Long Description"
)
23 changes: 23 additions & 0 deletions data-raw/pins_internal.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
pin_update <- function(x, name, title, description) {

board <- pins::board_folder(
here::here("inst/extdata/pins"))

board |>
pins::pin_write(
x,
name = name,
title = title,
description = description,
type = "qs")

board |> pins::write_board_manifest()
}

delete_pins <- function(pin_names) {

board <- pins::board_folder(
here::here("inst/extdata/pins"))

pins::pin_delete(board, names = pin_names)
}
245 changes: 245 additions & 0 deletions data-raw/raw/National Uniform Claim Committee - CSV.htm

Large diffs are not rendered by default.

245 changes: 245 additions & 0 deletions data-raw/raw/National_Uniform_Claim_Committee_CSV.txt

Large diffs are not rendered by default.

816 changes: 816 additions & 0 deletions data-raw/raw/csvs/nucc_taxonomy_100.csv

Large diffs are not rendered by default.

819 changes: 819 additions & 0 deletions data-raw/raw/csvs/nucc_taxonomy_101.csv

Large diffs are not rendered by default.

822 changes: 822 additions & 0 deletions data-raw/raw/csvs/nucc_taxonomy_110.csv

Large diffs are not rendered by default.

825 changes: 825 additions & 0 deletions data-raw/raw/csvs/nucc_taxonomy_111.csv

Large diffs are not rendered by default.

828 changes: 828 additions & 0 deletions data-raw/raw/csvs/nucc_taxonomy_120.csv

Large diffs are not rendered by default.

830 changes: 830 additions & 0 deletions data-raw/raw/csvs/nucc_taxonomy_121.csv

Large diffs are not rendered by default.

831 changes: 831 additions & 0 deletions data-raw/raw/csvs/nucc_taxonomy_130.csv

Large diffs are not rendered by default.

831 changes: 831 additions & 0 deletions data-raw/raw/csvs/nucc_taxonomy_131.csv

Large diffs are not rendered by default.

832 changes: 832 additions & 0 deletions data-raw/raw/csvs/nucc_taxonomy_140.csv

Large diffs are not rendered by default.

834 changes: 834 additions & 0 deletions data-raw/raw/csvs/nucc_taxonomy_141.csv

Large diffs are not rendered by default.

835 changes: 835 additions & 0 deletions data-raw/raw/csvs/nucc_taxonomy_150.csv

Large diffs are not rendered by default.

839 changes: 839 additions & 0 deletions data-raw/raw/csvs/nucc_taxonomy_151.csv

Large diffs are not rendered by default.

846 changes: 846 additions & 0 deletions data-raw/raw/csvs/nucc_taxonomy_160.csv

Large diffs are not rendered by default.

849 changes: 849 additions & 0 deletions data-raw/raw/csvs/nucc_taxonomy_161.csv

Large diffs are not rendered by default.

852 changes: 852 additions & 0 deletions data-raw/raw/csvs/nucc_taxonomy_170.csv

Large diffs are not rendered by default.

854 changes: 854 additions & 0 deletions data-raw/raw/csvs/nucc_taxonomy_171.csv

Large diffs are not rendered by default.

855 changes: 855 additions & 0 deletions data-raw/raw/csvs/nucc_taxonomy_180.csv

Large diffs are not rendered by default.

856 changes: 856 additions & 0 deletions data-raw/raw/csvs/nucc_taxonomy_181.csv

Large diffs are not rendered by default.

857 changes: 857 additions & 0 deletions data-raw/raw/csvs/nucc_taxonomy_190.csv

Large diffs are not rendered by default.

863 changes: 863 additions & 0 deletions data-raw/raw/csvs/nucc_taxonomy_191.csv

Large diffs are not rendered by default.

863 changes: 863 additions & 0 deletions data-raw/raw/csvs/nucc_taxonomy_200.csv

Large diffs are not rendered by default.

863 changes: 863 additions & 0 deletions data-raw/raw/csvs/nucc_taxonomy_201.csv

Large diffs are not rendered by default.

866 changes: 866 additions & 0 deletions data-raw/raw/csvs/nucc_taxonomy_210.csv

Large diffs are not rendered by default.

867 changes: 867 additions & 0 deletions data-raw/raw/csvs/nucc_taxonomy_211.csv

Large diffs are not rendered by default.

869 changes: 869 additions & 0 deletions data-raw/raw/csvs/nucc_taxonomy_220.csv

Large diffs are not rendered by default.

869 changes: 869 additions & 0 deletions data-raw/raw/csvs/nucc_taxonomy_221.csv

Large diffs are not rendered by default.

874 changes: 874 additions & 0 deletions data-raw/raw/csvs/nucc_taxonomy_230.csv

Large diffs are not rendered by default.

875 changes: 875 additions & 0 deletions data-raw/raw/csvs/nucc_taxonomy_231.csv

Large diffs are not rendered by default.

875 changes: 875 additions & 0 deletions data-raw/raw/csvs/nucc_taxonomy_240.csv

Large diffs are not rendered by default.

875 changes: 875 additions & 0 deletions data-raw/raw/csvs/nucc_taxonomy_241.csv

Large diffs are not rendered by default.

811 changes: 811 additions & 0 deletions data-raw/raw/csvs/nucc_taxonomy_90.csv

Large diffs are not rendered by default.

815 changes: 815 additions & 0 deletions data-raw/raw/csvs/nucc_taxonomy_91.csv

Large diffs are not rendered by default.

33 changes: 33 additions & 0 deletions data-raw/raw/infotable.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
filename,version,release_date,file_url
nucc_taxonomy_241.csv,24.1,2024-07-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_241.csv
nucc_taxonomy_240.csv,24.0,2024-01-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_240.csv
nucc_taxonomy_231.csv,23.1,2023-07-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_231.csv
nucc_taxonomy_230.csv,23.0,2023-01-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_230.csv
nucc_taxonomy_221.csv,22.1,2022-07-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_221.csv
nucc_taxonomy_220.csv,22.0,2022-01-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_220.csv
nucc_taxonomy_211.csv,21.1,2021-07-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_211.csv
nucc_taxonomy_210.csv,21.0,2021-01-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_210.csv
nucc_taxonomy_201.csv,20.1,2020-07-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_201.csv
nucc_taxonomy_200.csv,20.0,2020-01-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_200.csv
nucc_taxonomy_191.csv,19.1,2019-07-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_191.csv
nucc_taxonomy_190.csv,19.0,2019-01-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_190.csv
nucc_taxonomy_181.csv,18.1,2018-07-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_181.csv
nucc_taxonomy_180.csv,18.0,2018-01-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_180.csv
nucc_taxonomy_171.csv,17.1,2017-07-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_171.csv
nucc_taxonomy_170.csv,17.0,2017-01-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_170.csv
nucc_taxonomy_161.csv,16.1,2016-07-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_161.csv
nucc_taxonomy_160.csv,16.0,2016-01-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_160.csv
nucc_taxonomy_151.csv,15.1,2015-07-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_151.csv
nucc_taxonomy_150.csv,15.0,2015-01-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_150.csv
nucc_taxonomy_141.csv,14.1,2014-07-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_141.csv
nucc_taxonomy_140.csv,14.0,2014-01-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_140.csv
nucc_taxonomy_131.csv,13.1,2013-07-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_131.csv
nucc_taxonomy_130.csv,13.0,2013-01-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_130.csv
nucc_taxonomy_121.csv,12.1,2012-07-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_121.csv
nucc_taxonomy_120.csv,12.0,2012-01-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_120.csv
nucc_taxonomy_111.csv,11.1,2011-07-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_111.csv
nucc_taxonomy_110.csv,11.0,2011-01-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_110.csv
nucc_taxonomy_101.csv,10.1,2010-07-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_101.csv
nucc_taxonomy_100.csv,10.0,2010-01-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_100.csv
nucc_taxonomy_91.csv,9.1,2009-07-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_91.csv
nucc_taxonomy_90.csv,9.0,2009-01-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_90.csv
32 changes: 32 additions & 0 deletions data-raw/raw/nucc_csv_titles_dates.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
<li><a href="/images/stories/CSV/nucc_taxonomy_241.csv">Version 24.1, 7/1/24</a></li>
<li><a href="/images/stories/CSV/nucc_taxonomy_240.csv">Version 24.0, 1/1/24</a></li>
<li><a href="/images/stories/CSV/nucc_taxonomy_231.csv">Version 23.1, 7/1/23</a></li>
<li><a href="/images/stories/CSV/nucc_taxonomy_230.csv">Version 23.0, 1/1/23</a></li>
<li><a href="/images/stories/CSV/nucc_taxonomy_221.csv">Version 22.1, 7/1/22</a></li>
<li><a href="/images/stories/CSV/nucc_taxonomy_220.csv">Version 22.0, 1/1/22</a></li>
<li><a href="/images/stories/CSV/nucc_taxonomy_211.csv">Version 21.1, 7/1/21</a></li>
<li><a href="/images/stories/CSV/nucc_taxonomy_210.csv">Version 21.0, 1/1/21</a></li>
<li><a href="/images/stories/CSV/nucc_taxonomy_201.csv">Version 20.1, 7/1/20</a></li>
<li><a href="/images/stories/CSV/nucc_taxonomy_200.csv">Version 20.0, 1/1/20</a></li>
<li><a href="/images/stories/CSV/nucc_taxonomy_191.csv">Version 19.1, 7/1/19</a></li>
<li><a href="/images/stories/CSV/nucc_taxonomy_190.csv">Version 19.0, 1/1/19</a></li>
<li><a href="/images/stories/CSV/nucc_taxonomy_181.csv">Version 18.1, 7/1/18</a></li>
<li><a href="/images/stories/CSV/nucc_taxonomy_180.csv">Version 18.0, 1/1/18</a></li>
<li><a href="/images/stories/CSV/nucc_taxonomy_171.csv">Version 17.1, 7/1/17</a></li>
<li><a href="/images/stories/CSV/nucc_taxonomy_170.csv">Version 17.0, 1/1/17</a></li>
<li><a href="/images/stories/CSV/nucc_taxonomy_161.csv">Version 16.1, 7/1/16</a></li>
<li><a href="/images/stories/CSV/nucc_taxonomy_160.csv">Version 16.0, 1/1/16</a></li>
<li><a href="/images/stories/CSV/nucc_taxonomy_151.csv">Version 15.1, 7/1/15</a></li>
<li><a href="/images/stories/CSV/nucc_taxonomy_150.csv">Version 15.0, 1/1/15</a></li>
<li><a href="/images/stories/CSV/nucc_taxonomy_141.csv">Version 14.1, 7/1/14</a></li>
<li><a href="/images/stories/CSV/nucc_taxonomy_140.csv">Version 14.0, 1/1/14</a></li>
<li><a href="/images/stories/CSV/nucc_taxonomy_131.csv">Version 13.1, 7/1/13</a></li>
<li><a href="/images/stories/CSV/nucc_taxonomy_130.csv">Version 13.0, 1/1/13</a></li>
<li><a href="/images/stories/CSV/nucc_taxonomy_121.csv">Version 12.1, 7/1/12</a></li>
<li><a href="/images/stories/CSV/nucc_taxonomy_120.csv">Version 12.0, 1/1/12</a></li>
<li><a href="/images/stories/CSV/nucc_taxonomy_111.csv">Version 11.1, 7/1/11</a></li>
<li><a href="/images/stories/CSV/nucc_taxonomy_110.csv">Version 11.0, 1/1/11</a></li>
<li><a href="/images/stories/CSV/nucc_taxonomy_101.csv">Version 10.1, 7/1/10</a></li>
<li><a href="/images/stories/CSV/nucc_taxonomy_100.csv">Version 10.0, 1/1/10</a></li>
<li><a href="/images/stories/CSV/nucc_taxonomy_91.csv">Version 9.1, 7/1/09</a></li>
<li><a href="/images/stories/CSV/nucc_taxonomy_90.csv">Version 9.0, 1/1/09</a></li>

0 comments on commit e2408aa

Please sign in to comment.