diff --git a/DESCRIPTION b/DESCRIPTION index 74f483e..fad8dc4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -10,10 +10,13 @@ URL: https://github.com/andrewallenbruce/arktax, https://andrewallenbruce.github.io/arktax/ BugReports: https://github.com/andrewallenbruce/arktax/issues Imports: + cli, fs, fuimus, glue, - pins + pins, + rlang, + stringfish Suggests: qs, roxyglobals, diff --git a/NAMESPACE b/NAMESPACE index c1066cd..7bf9777 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,7 +4,10 @@ export(get_pin) export(gh_raw) export(list_pins) export(mount_board) -export(retrieve_ark) +export(retrieve_changelog) +export(retrieve_hierarchy) +export(retrieve_raw) +export(retrieve_sources) importFrom(fs,path_package) importFrom(fuimus,search_in_if) importFrom(glue,glue) diff --git a/NEWS.md b/NEWS.md index 5f3a322..99575be 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,3 @@ -# arktax (development version) +# arktax 0.1.0.9000 (2024-11-16) * Initialize package diff --git a/R/arktax.R b/R/arktax.R index 072c506..b0257b5 100644 --- a/R/arktax.R +++ b/R/arktax.R @@ -1,42 +1,129 @@ -#' Get Taxomony Source File +#' Retrieve Taxonomy Source File #' #' @param year `` year of source file release; options are `2009:2024` #' #' @param version `` version of source file; options are `0` or `1` #' -#' @param code `` Health Care Provider Taxonomy code, a unique alphanumeric code, ten characters in length -#' -#' @param which `` `wide` or `long` version of the taxonomy +#' @param code `` Health Care Provider Taxonomy code, a unique +#' alphanumeric code, ten characters in length #' #' @returns `` of search results #' #' @examples -#' retrieve_ark(year = 2024, code = "101Y00000X", which = "wide") +#' retrieve_raw(year = 2024, code = "101Y00000X") #' -#' retrieve_ark(code = "101Y00000X", which = "long") +#' retrieve_raw(code = "101Y00000X") #' #' @importFrom fuimus search_in_if #' #' @autoglobal #' #' @export -retrieve_ark <- function(year = NULL, +retrieve_raw <- function(year = NULL, version = NULL, - code = NULL, - which = c("wide", "long")) { - - ark <- switch( - which, - wide = get_pin("ark_taxonomy"), - long = get_pin("ark_long") - ) - - if (which == "wide") { - ark <- search_in_if(ark, ark[["year"]], year) - ark <- search_in_if(ark, ark[["version"]], version) - } + code = NULL) { + check_nchar(code, 10) + pin <- get_pin("ark_taxonomy") + pin <- search_in_if(pin, pin[["year"]], year) + pin <- search_in_if(pin, pin[["version"]], version) + pin <- search_in_if(pin, pin[["code"]], code) + return(pin) +} + +#' Retrieve Taxonomy Sources +#' +#' @param code `` Health Care Provider Taxonomy code, a unique +#' alphanumeric code, ten characters in length +#' +#' @returns `` of search results +#' +#' @examples +#' retrieve_sources(code = "101Y00000X") +#' +#' retrieve_sources(code = "103TA0400X") +#' +#' @importFrom fuimus search_in_if +#' +#' @autoglobal +#' +#' @export +retrieve_sources <- function(code = NULL) { + check_nchar(code, 10) + pin <- get_pin("sources") + search_in_if(pin, pin[["code"]], code) +} + +#' Retrieve Taxonomy Changelog +#' +#' @param code `` Health Care Provider Taxonomy code, a unique +#' alphanumeric code, ten characters in length +#' +#' @returns `` of search results +#' +#' @examples +#' retrieve_changelog(code = "103GC0700X") +#' +#' retrieve_changelog(code = "103G00000X") +#' +#' @importFrom fuimus search_in_if +#' +#' @autoglobal +#' +#' @export +retrieve_changelog <- function(code = NULL) { + check_nchar(code, 10) + pin <- get_pin("changelog") + search_in_if(pin, pin[["code"]], code) +} + +#' Retrieve Taxonomy Hierarchy +#' +#' @param code `` Health Care Provider Taxonomy code, a unique +#' alphanumeric code, ten characters in length +#' +#' @returns `` of search results +#' +#' @examples +#' retrieve_hierarchy(code = "101Y00000X") +#' +#' retrieve_hierarchy(code = "103TA0400X") +#' +#' @importFrom fuimus search_in_if +#' +#' @autoglobal +#' +#' @export +retrieve_hierarchy <- function(code = NULL) { + check_nchar(code, 10) + pin <- get_pin("ark_long") + search_in_if(pin, pin[["code"]], code) +} + + +#' Check that input is `n` character(s) long +#' +#' @param x `` string +#' +#' @param n `` number of characters +#' +#' @autoglobal +#' +#' @noRd +check_nchar <- function(x, n) { + + if (!is.null(x)) { - ark <- search_in_if(ark, ark[["code"]], code) + stopifnot(rlang::is_integerish(n), n > 0) - return(ark) + arg <- rlang::caller_arg(x) + call <- rlang::caller_env() + + if (any(stringfish::sf_nchar(x) != n, na.rm = TRUE)) { + cli::cli_abort( + "{.arg {arg}} must be {.val {n}} character{?s} long.", + arg = arg, + call = call) + } + stringfish::sf_toupper(x) + } } diff --git a/data-raw/nucc_pins.R b/data-raw/nucc_pins.R index fa6e8d1..b3ce29e 100644 --- a/data-raw/nucc_pins.R +++ b/data-raw/nucc_pins.R @@ -165,6 +165,12 @@ pin_update( description = "Health Care Provider Taxonomy Code Set Archive 2009-2024 (Long)" ) +get_pin("ark_long") |> + dplyr::count(code, level, sort = TRUE) |> + dplyr::filter(n > 1) |> + dplyr::pull(code) + + ark_long |> dplyr::group_by(code) |> dplyr::mutate(years = ivs::iv(min(year), max(year))) |> diff --git a/data-raw/splitting.R b/data-raw/splitting.R index 8654d4f..eed3188 100644 --- a/data-raw/splitting.R +++ b/data-raw/splitting.R @@ -1,11 +1,21 @@ source(here::here("data-raw", "pins_internal.R")) -wide <- arktax::retrieve_ark(year = 2024, which = "wide") |> - dplyr::select(-modified, -effective, -deactivated) +# Use wide version +wide <- arktax::retrieve_ark( + year = 2024, + version = 1, + which = "wide") |> + dplyr::select(-year, -version) |> + fuimus::remove_quiet() +#------------ BASE base <- wide |> dplyr::select(code, notes) |> - dplyr::distinct(code, notes, .keep_all = TRUE) |> + dplyr::distinct( + code, + notes, + .keep_all = TRUE) |> + dplyr::filter(codex::not_na(notes)) |> dplyr::mutate( .id = dplyr::row_number(), updates = stringr::str_extract_all(notes, "\\[.*\\]"), @@ -13,9 +23,65 @@ base <- wide |> notes = NULL) |> tidyr::unnest(updates, keep_empty = TRUE) +#--------- SOURCES +sources <- base |> + dplyr::select(-updates, -.id) |> + dplyr::filter(codex::not_na(sources)) |> + dplyr::distinct() |> + dplyr::mutate( + sources = dplyr::case_when( + code == "2085U0001X" ~ stringr::str_glue( + "{sources}. Additional Resources: See 2085R0202X. ", + "The American Osteopathic Board of Radiology no longer offers a certificate in this specialty ", + "(Diagnostic Ultrasound is part of the scope of a Diagnostic Radiologist)."), + .default = sources)) |> + dplyr::reframe( + code, + type = "primary", + source = stringr::str_remove(sources, "^[Ss]ources?: "), + addition = stringr::str_detect(sources, " Additional Resources: ")) + +sources <- vctrs::vec_rbind( + sources |> + dplyr::filter(!addition), + sources |> + dplyr::filter(addition) |> + tidyr::separate_longer_delim( + cols = source, + delim = stringr::fixed(" Additional Resources: ")) |> + dplyr::mutate( + id = dplyr::row_number(), + .before = 1, + .by = code) |> + dplyr::mutate( + type = dplyr::if_else( + id == 1, + "primary", + "additional"), + id = NULL)) |> + dplyr::select(-addition) |> + dplyr::mutate( + type = factor(type, + levels = c("primary", "additional"), ordered = TRUE)) |> + dplyr::arrange(code, type) + +# 2085U0001X +# 2008-07-01 +# Additional Resources: The American Osteopathic Board of Radiology no longer offers a certificate in this specialty. +# Note: In medical practice, Diagnostic Ultrasound is part of the scope of training and practice of a Diagnostic Radiologist - see Taxonomy Code 2085R0202X. + +pin_update( + sources, + "sources", + "NUCC Taxonomy Sources 2009-2024", + "Health Care Provider Taxonomy Code Set Sources 2009-2024" +) + + +#----------------------------- UPDATES updates <- base |> dplyr::select(-sources) |> - dplyr::filter(not_na(updates)) |> + dplyr::filter(codex::not_na(updates)) |> tidyr::separate_longer_delim( cols = updates, delim = stringr::fixed("; ")) |> @@ -24,8 +90,8 @@ updates <- base |> first = stringr::str_extract_all(updates, ", \\d{1,2}")) |> tidyr::unnest(first, keep_empty = TRUE) |> dplyr::mutate( - first = str_remove(first, ", "), - multi = str_count(updates, ", \\d{1}/\\d{1}/\\d{4}")) + first = stringr::str_remove(first, ", "), + multi = stringr::str_count(updates, ", \\d{1,2}/\\d{1,2}/\\d{2,4}")) updates <- vctrs::vec_rbind( updates |> dplyr::filter(multi == 0), @@ -36,21 +102,70 @@ updates <- vctrs::vec_rbind( delim = stringr::regex(", \\d{1,2}")) |> dplyr::mutate(updates = dplyr::if_else( stringr::str_detect(updates, "^[/]"), - stringr::str_glue("{first}{updates}"), updates)) -) |> - dplyr::select(-first, -multi) |> + stringr::str_glue("{first}{updates}"), updates))) |> + dplyr::select(-multi, -first) |> dplyr::mutate( - dates = stringr::str_extract_all(updates, "\\d{1,2}[/]\\d{1,2}[/]\\d{4}"), - updates = stringr::str_remove_all(updates, "\\d{1,2}[/]\\d{1,2}[/]\\d{4}[:, ]\\s?")) |> + dates = stringr::str_extract_all(updates, "\\d{1,2}[/]\\d{1,2}[/]\\d{2,4}"), + updates = stringr::str_remove_all(updates, "\\d{1,2}[/]\\d{1,2}[/]\\d{2,4}[:]\\s?"), + updates = dplyr::if_else(updates == "New", "new", updates), + updates = stringr::str_remove_all(updates, "\\d{1}[/]\\d{1}[/]\\d{4},?\\s?"), + updates = dplyr::if_else(updates == "42 U.S.C. 1396d(1)(3)(B)] [added definition", "added definition", updates)) |> tidyr::unnest(dates, keep_empty = TRUE) |> - dplyr::mutate(dates = clock::date_parse(dates, format = "%m/%d/%Y")) - -updates |> dplyr::mutate( - date_type = dplyr::case_when( - stringr::str_detect(updates, "[Nn]ew") ~ "effective", + dates = dplyr::if_else(dates == "7/1/24", "7/1/2024", dates), + dates = clock::date_parse(dates, format = "%m/%d/%Y"), + updates = dplyr::case_when( + code == "207RX0202X" & dates == "2007-07-01" ~ "added definition, added source", + code == "207RX0202X" & dates == "2007-11-05" ~ "corrected definition", + code == "320600000X" & dates == "2003-07-01" ~ "new", + code == "320600000X" & dates == "2021-01-01" ~ "modified title and definition", + code == "2085U0001X" & dates == "2008-07-01" ~ "definition added, source added", + .default = updates)) |> + dplyr::distinct() |> + dplyr::mutate( + category = dplyr::case_when( + stringr::str_detect(updates, "new") ~ "effective", stringr::str_detect(updates, "inactive") ~ "deactivated", - .default = "modified") |> forcats::as_factor()) |> - dplyr::arrange(code, dates) |> - dplyr::count(updates, sort = TRUE) |> - print(n = 50) + .default = "modified"), + category = factor( + category, + levels = c("effective", "modified", "deactivated"), ordered = TRUE) + ) |> + dplyr::arrange(code, dates, category) |> + dplyr::reframe( + code, + date_type = category, + date = dates, + change = stringr::str_replace(updates, "^marked inactive, use value ", "marked inactive, use ")) + +pin_update( + updates, + "changelog", + "NUCC Taxonomy Changelog 2009-2024", + "Health Care Provider Taxonomy Code Set Changelog 2009-2024" +) + +#--------- Comparing raw dates to the updates +updates |> dplyr::filter(type == "modified") + +arktax::retrieve_ark(which = "wide") |> + dplyr::select(code, modified) |> + dplyr::filter(codex::not_na(modified)) |> + dplyr::distinct() |> + print(n = 200) + +updates |> dplyr::filter(type == "effective") + +arktax::retrieve_ark(which = "wide") |> + dplyr::select(code, effective) |> + dplyr::filter(codex::not_na(effective)) |> + dplyr::distinct() + +updates |> + dplyr::filter(type == "deactivated") |> + print(n = 30) + +arktax::retrieve_ark(which = "wide") |> + dplyr::select(code, deactivated) |> + dplyr::filter(codex::not_na(deactivated)) |> + dplyr::distinct() diff --git a/inst/extdata/pins/_pins.yaml b/inst/extdata/pins/_pins.yaml index 5fe81ca..c1bca94 100644 --- a/inst/extdata/pins/_pins.yaml +++ b/inst/extdata/pins/_pins.yaml @@ -2,3 +2,7 @@ ark_long: - ark_long/20241129T111715Z-13827/ ark_taxonomy: - ark_taxonomy/20241124T214441Z-05252/ +changelog: +- changelog/20241129T163133Z-a31f6/ +sources: +- sources/20241129T163125Z-e153d/ diff --git a/inst/extdata/pins/changelog/20241129T163133Z-a31f6/changelog.qs b/inst/extdata/pins/changelog/20241129T163133Z-a31f6/changelog.qs new file mode 100644 index 0000000..3623fee Binary files /dev/null and b/inst/extdata/pins/changelog/20241129T163133Z-a31f6/changelog.qs differ diff --git a/inst/extdata/pins/changelog/20241129T163133Z-a31f6/data.txt b/inst/extdata/pins/changelog/20241129T163133Z-a31f6/data.txt new file mode 100644 index 0000000..8feca47 --- /dev/null +++ b/inst/extdata/pins/changelog/20241129T163133Z-a31f6/data.txt @@ -0,0 +1,10 @@ +file: changelog.qs +file_size: 4175 +pin_hash: a31f66067c8d6c9b +type: qs +title: NUCC Taxonomy Changelog 2009-2024 +description: Health Care Provider Taxonomy Code Set Changelog 2009-2024 +tags: ~ +urls: ~ +created: 20241129T163133Z +api_version: 1 diff --git a/inst/extdata/pins/sources/20241129T163125Z-e153d/data.txt b/inst/extdata/pins/sources/20241129T163125Z-e153d/data.txt new file mode 100644 index 0000000..2e5ac00 --- /dev/null +++ b/inst/extdata/pins/sources/20241129T163125Z-e153d/data.txt @@ -0,0 +1,10 @@ +file: sources.qs +file_size: 15091 +pin_hash: e153d50e1f21ad6c +type: qs +title: NUCC Taxonomy Sources 2009-2024 +description: Health Care Provider Taxonomy Code Set Sources 2009-2024 +tags: ~ +urls: ~ +created: 20241129T163125Z +api_version: 1 diff --git a/inst/extdata/pins/sources/20241129T163125Z-e153d/sources.qs b/inst/extdata/pins/sources/20241129T163125Z-e153d/sources.qs new file mode 100644 index 0000000..938b607 Binary files /dev/null and b/inst/extdata/pins/sources/20241129T163125Z-e153d/sources.qs differ diff --git a/man/retrieve_ark.Rd b/man/retrieve_ark.Rd deleted file mode 100644 index 9902039..0000000 --- a/man/retrieve_ark.Rd +++ /dev/null @@ -1,34 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/arktax.R -\name{retrieve_ark} -\alias{retrieve_ark} -\title{Get Taxomony Source File} -\usage{ -retrieve_ark( - year = NULL, - version = NULL, - code = NULL, - which = c("wide", "long") -) -} -\arguments{ -\item{year}{\verb{} year of source file release; options are \code{2009:2024}} - -\item{version}{\verb{} version of source file; options are \code{0} or \code{1}} - -\item{code}{\verb{} Health Care Provider Taxonomy code, a unique alphanumeric code, ten characters in length} - -\item{which}{\verb{} \code{wide} or \code{long} version of the taxonomy} -} -\value{ -\verb{} of search results -} -\description{ -Get Taxomony Source File -} -\examples{ -retrieve_ark(year = 2024, code = "101Y00000X", which = "wide") - -retrieve_ark(code = "101Y00000X", which = "long") - -} diff --git a/man/retrieve_changelog.Rd b/man/retrieve_changelog.Rd new file mode 100644 index 0000000..60c5b1b --- /dev/null +++ b/man/retrieve_changelog.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/arktax.R +\name{retrieve_changelog} +\alias{retrieve_changelog} +\title{Retrieve Taxonomy Changelog} +\usage{ +retrieve_changelog(code = NULL) +} +\arguments{ +\item{code}{\verb{} Health Care Provider Taxonomy code, a unique +alphanumeric code, ten characters in length} +} +\value{ +\verb{} of search results +} +\description{ +Retrieve Taxonomy Changelog +} +\examples{ +retrieve_changelog(code = "103GC0700X") + +retrieve_changelog(code = "103G00000X") + +} diff --git a/man/retrieve_hierarchy.Rd b/man/retrieve_hierarchy.Rd new file mode 100644 index 0000000..2ccab13 --- /dev/null +++ b/man/retrieve_hierarchy.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/arktax.R +\name{retrieve_hierarchy} +\alias{retrieve_hierarchy} +\title{Retrieve Taxonomy Hierarchy} +\usage{ +retrieve_hierarchy(code = NULL) +} +\arguments{ +\item{code}{\verb{} Health Care Provider Taxonomy code, a unique +alphanumeric code, ten characters in length} +} +\value{ +\verb{} of search results +} +\description{ +Retrieve Taxonomy Hierarchy +} +\examples{ +retrieve_hierarchy(code = "101Y00000X") + +retrieve_hierarchy(code = "103TA0400X") + +} diff --git a/man/retrieve_raw.Rd b/man/retrieve_raw.Rd new file mode 100644 index 0000000..1ea736a --- /dev/null +++ b/man/retrieve_raw.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/arktax.R +\name{retrieve_raw} +\alias{retrieve_raw} +\title{Retrieve Taxonomy Source File} +\usage{ +retrieve_raw(year = NULL, version = NULL, code = NULL) +} +\arguments{ +\item{year}{\verb{} year of source file release; options are \code{2009:2024}} + +\item{version}{\verb{} version of source file; options are \code{0} or \code{1}} + +\item{code}{\verb{} Health Care Provider Taxonomy code, a unique +alphanumeric code, ten characters in length} +} +\value{ +\verb{} of search results +} +\description{ +Retrieve Taxonomy Source File +} +\examples{ +retrieve_raw(year = 2024, code = "101Y00000X") + +retrieve_raw(code = "101Y00000X") + +} diff --git a/man/retrieve_sources.Rd b/man/retrieve_sources.Rd new file mode 100644 index 0000000..c992a98 --- /dev/null +++ b/man/retrieve_sources.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/arktax.R +\name{retrieve_sources} +\alias{retrieve_sources} +\title{Retrieve Taxonomy Sources} +\usage{ +retrieve_sources(code = NULL) +} +\arguments{ +\item{code}{\verb{} Health Care Provider Taxonomy code, a unique +alphanumeric code, ten characters in length} +} +\value{ +\verb{} of search results +} +\description{ +Retrieve Taxonomy Sources +} +\examples{ +retrieve_sources(code = "101Y00000X") + +retrieve_sources(code = "103TA0400X") + +}