-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
- Loading branch information
There are no files selected for viewing
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
# "https://www.nucc.org/index.php/code-sets-mainmenu-41/provider-taxonomy-mainmenu-40/csv-mainmenu-57" | ||
|
||
urls <- paste0("https://www.nucc.org", paste0("https://www.nucc.org", | ||
rvest::session("https://www.nucc.org") |> | ||
rvest::session_follow_link("Code Sets") |> | ||
rvest::session_follow_link("Taxonomy") |> | ||
rvest::session_follow_link("CSV") |> | ||
rvest::html_elements("a") |> | ||
rvest::html_attr("href") |> | ||
stringr::str_subset("taxonomy") |> | ||
stringr::str_subset("csv")) |> | ||
rvest::read_html() |> | ||
rvest::html_elements("a") |> | ||
rvest::html_attr("href") |> | ||
stringr::str_subset("nucc_taxonomy")) | ||
|
||
infotable <- readLines(paste0(here::here(), "/posts/taxonomy/data/nucc_csv_titles_dates.txt")) |> | ||
stringr::str_split("/CSV", simplify = TRUE) |> | ||
as.data.frame() |> | ||
dplyr::select(V2) |> | ||
dplyr::mutate( | ||
V2 = stringr::str_remove(V2, '/') |> | ||
stringr::str_remove('"') |> | ||
stringr::str_replace('>', " ") |> | ||
stringr::str_remove("</a></li>") |> | ||
stringr::str_remove(",")) |> | ||
tidyr::separate_wider_regex(V2, | ||
c(filename = "nucc_taxonomy_[0-9]{2,3}.csv", | ||
' Version ', | ||
version = "[0-9]{1,2}[.][0-9]{1}", | ||
" ", | ||
release_date = "[0-9][/][0-9][/][0-9]{2}")) |> | ||
dplyr::mutate(release_date = readr::parse_date(release_date, format = "%m/%d/%y"), | ||
file_url = urls) | ||
|
||
fs::dir_create(glue::glue("{here::here()}/posts/taxonomy/data/csvs")) | ||
|
||
curl::multi_download( | ||
urls = infotable$file_url, | ||
destfile = glue::glue("{here::here()}/posts/taxonomy/data/csvs/{infotable$filename}"), | ||
resume = TRUE) | ||
|
||
################################################################### | ||
|
||
infotable <- readr::read_csv(glue::glue("{here::here()}/posts/taxonomy/data/infotable.csv"), show_col_types = FALSE, col_types = "ccDc") | ||
|
||
clean_cols <- \(x) fuimus::remove_quotes(stringr::str_squish(dplyr::na_if(x, ""))) | ||
|
||
selcols <- c("version", "release_date", "code", "type" = "grouping", "grouping", "classification", "specialization", "definition", "notes") | ||
|
||
notes_regs <- c("http[s]?:" = "", "//" = "", "<br/>" = " ", "<br><br>" = " ", | ||
# "([0-9]{1,2})//([0-9]{1,2})//([0-9]{4})" = "", | ||
"�" = "") | ||
|
||
nucc_paths <- fs::dir_info(glue::glue("{here::here()}/posts/taxonomy/data/csvs"))$path | ||
|
||
parse_nucc_csvs <- function(path) { | ||
|
||
suppressWarnings( | ||
readr::read_csv( | ||
file = path, | ||
id = "filename", | ||
show_col_types = FALSE, | ||
col_types = "c", | ||
name_repair = janitor::make_clean_names) | ||
) |> | ||
dplyr::slice(-1) |> | ||
dplyr::mutate( | ||
filename = basename(filename), | ||
dplyr::across(dplyr::everything(), clean_cols), | ||
notes = stringr::str_replace_all(notes, notes_regs) |> stringr::str_squish()) |> | ||
dplyr::left_join(infotable, by = "filename") |> | ||
dplyr::select( | ||
dplyr::any_of(selcols), | ||
dplyr::everything(), | ||
-c(filename, file_url)) |> | ||
dplyr::arrange(code) |> | ||
readr::write_csv( | ||
file = glue::glue("{here::here()}/posts/taxonomy/data/cleaned/{tools::file_path_sans_ext(basename(path))}"), | ||
num_threads = 4L) | ||
|
||
} | ||
|
||
purrr::walk(nucc_paths, parse_nucc_csvs) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
source(here::here("data-raw", "pins_internal.R")) | ||
|
||
# -- Data wrangling code here -- # | ||
|
||
pin_update( | ||
dataset, | ||
name = "object_name", | ||
title = "Short Description", | ||
description = "Long Description" | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
pin_update <- function(x, name, title, description) { | ||
|
||
board <- pins::board_folder( | ||
here::here("inst/extdata/pins")) | ||
|
||
board |> | ||
pins::pin_write( | ||
x, | ||
name = name, | ||
title = title, | ||
description = description, | ||
type = "qs") | ||
|
||
board |> pins::write_board_manifest() | ||
} | ||
|
||
delete_pins <- function(pin_names) { | ||
|
||
board <- pins::board_folder( | ||
here::here("inst/extdata/pins")) | ||
|
||
pins::pin_delete(board, names = pin_names) | ||
} |
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
filename,version,release_date,file_url | ||
nucc_taxonomy_241.csv,24.1,2024-07-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_241.csv | ||
nucc_taxonomy_240.csv,24.0,2024-01-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_240.csv | ||
nucc_taxonomy_231.csv,23.1,2023-07-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_231.csv | ||
nucc_taxonomy_230.csv,23.0,2023-01-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_230.csv | ||
nucc_taxonomy_221.csv,22.1,2022-07-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_221.csv | ||
nucc_taxonomy_220.csv,22.0,2022-01-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_220.csv | ||
nucc_taxonomy_211.csv,21.1,2021-07-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_211.csv | ||
nucc_taxonomy_210.csv,21.0,2021-01-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_210.csv | ||
nucc_taxonomy_201.csv,20.1,2020-07-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_201.csv | ||
nucc_taxonomy_200.csv,20.0,2020-01-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_200.csv | ||
nucc_taxonomy_191.csv,19.1,2019-07-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_191.csv | ||
nucc_taxonomy_190.csv,19.0,2019-01-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_190.csv | ||
nucc_taxonomy_181.csv,18.1,2018-07-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_181.csv | ||
nucc_taxonomy_180.csv,18.0,2018-01-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_180.csv | ||
nucc_taxonomy_171.csv,17.1,2017-07-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_171.csv | ||
nucc_taxonomy_170.csv,17.0,2017-01-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_170.csv | ||
nucc_taxonomy_161.csv,16.1,2016-07-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_161.csv | ||
nucc_taxonomy_160.csv,16.0,2016-01-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_160.csv | ||
nucc_taxonomy_151.csv,15.1,2015-07-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_151.csv | ||
nucc_taxonomy_150.csv,15.0,2015-01-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_150.csv | ||
nucc_taxonomy_141.csv,14.1,2014-07-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_141.csv | ||
nucc_taxonomy_140.csv,14.0,2014-01-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_140.csv | ||
nucc_taxonomy_131.csv,13.1,2013-07-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_131.csv | ||
nucc_taxonomy_130.csv,13.0,2013-01-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_130.csv | ||
nucc_taxonomy_121.csv,12.1,2012-07-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_121.csv | ||
nucc_taxonomy_120.csv,12.0,2012-01-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_120.csv | ||
nucc_taxonomy_111.csv,11.1,2011-07-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_111.csv | ||
nucc_taxonomy_110.csv,11.0,2011-01-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_110.csv | ||
nucc_taxonomy_101.csv,10.1,2010-07-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_101.csv | ||
nucc_taxonomy_100.csv,10.0,2010-01-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_100.csv | ||
nucc_taxonomy_91.csv,9.1,2009-07-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_91.csv | ||
nucc_taxonomy_90.csv,9.0,2009-01-01,https://www.nucc.org/images/stories/CSV/nucc_taxonomy_90.csv |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
<li><a href="/images/stories/CSV/nucc_taxonomy_241.csv">Version 24.1, 7/1/24</a></li> | ||
<li><a href="/images/stories/CSV/nucc_taxonomy_240.csv">Version 24.0, 1/1/24</a></li> | ||
<li><a href="/images/stories/CSV/nucc_taxonomy_231.csv">Version 23.1, 7/1/23</a></li> | ||
<li><a href="/images/stories/CSV/nucc_taxonomy_230.csv">Version 23.0, 1/1/23</a></li> | ||
<li><a href="/images/stories/CSV/nucc_taxonomy_221.csv">Version 22.1, 7/1/22</a></li> | ||
<li><a href="/images/stories/CSV/nucc_taxonomy_220.csv">Version 22.0, 1/1/22</a></li> | ||
<li><a href="/images/stories/CSV/nucc_taxonomy_211.csv">Version 21.1, 7/1/21</a></li> | ||
<li><a href="/images/stories/CSV/nucc_taxonomy_210.csv">Version 21.0, 1/1/21</a></li> | ||
<li><a href="/images/stories/CSV/nucc_taxonomy_201.csv">Version 20.1, 7/1/20</a></li> | ||
<li><a href="/images/stories/CSV/nucc_taxonomy_200.csv">Version 20.0, 1/1/20</a></li> | ||
<li><a href="/images/stories/CSV/nucc_taxonomy_191.csv">Version 19.1, 7/1/19</a></li> | ||
<li><a href="/images/stories/CSV/nucc_taxonomy_190.csv">Version 19.0, 1/1/19</a></li> | ||
<li><a href="/images/stories/CSV/nucc_taxonomy_181.csv">Version 18.1, 7/1/18</a></li> | ||
<li><a href="/images/stories/CSV/nucc_taxonomy_180.csv">Version 18.0, 1/1/18</a></li> | ||
<li><a href="/images/stories/CSV/nucc_taxonomy_171.csv">Version 17.1, 7/1/17</a></li> | ||
<li><a href="/images/stories/CSV/nucc_taxonomy_170.csv">Version 17.0, 1/1/17</a></li> | ||
<li><a href="/images/stories/CSV/nucc_taxonomy_161.csv">Version 16.1, 7/1/16</a></li> | ||
<li><a href="/images/stories/CSV/nucc_taxonomy_160.csv">Version 16.0, 1/1/16</a></li> | ||
<li><a href="/images/stories/CSV/nucc_taxonomy_151.csv">Version 15.1, 7/1/15</a></li> | ||
<li><a href="/images/stories/CSV/nucc_taxonomy_150.csv">Version 15.0, 1/1/15</a></li> | ||
<li><a href="/images/stories/CSV/nucc_taxonomy_141.csv">Version 14.1, 7/1/14</a></li> | ||
<li><a href="/images/stories/CSV/nucc_taxonomy_140.csv">Version 14.0, 1/1/14</a></li> | ||
<li><a href="/images/stories/CSV/nucc_taxonomy_131.csv">Version 13.1, 7/1/13</a></li> | ||
<li><a href="/images/stories/CSV/nucc_taxonomy_130.csv">Version 13.0, 1/1/13</a></li> | ||
<li><a href="/images/stories/CSV/nucc_taxonomy_121.csv">Version 12.1, 7/1/12</a></li> | ||
<li><a href="/images/stories/CSV/nucc_taxonomy_120.csv">Version 12.0, 1/1/12</a></li> | ||
<li><a href="/images/stories/CSV/nucc_taxonomy_111.csv">Version 11.1, 7/1/11</a></li> | ||
<li><a href="/images/stories/CSV/nucc_taxonomy_110.csv">Version 11.0, 1/1/11</a></li> | ||
<li><a href="/images/stories/CSV/nucc_taxonomy_101.csv">Version 10.1, 7/1/10</a></li> | ||
<li><a href="/images/stories/CSV/nucc_taxonomy_100.csv">Version 10.0, 1/1/10</a></li> | ||
<li><a href="/images/stories/CSV/nucc_taxonomy_91.csv">Version 9.1, 7/1/09</a></li> | ||
<li><a href="/images/stories/CSV/nucc_taxonomy_90.csv">Version 9.0, 1/1/09</a></li> |