Skip to content

Commit

Permalink
updates to taxonomy functions
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewallenbruce committed Dec 2, 2024
1 parent 96001fe commit 950034f
Show file tree
Hide file tree
Showing 17 changed files with 146 additions and 64 deletions.
25 changes: 20 additions & 5 deletions R/taxonomy.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,25 +93,40 @@ taxonomy_changelog <- function(taxonomy_code = NULL) {
#' @param taxonomy_code `<chr>` Health Care Provider Taxonomy code, a unique
#' alphanumeric code, ten characters in length
#'
#' @param taxonomy_level `<chr>` Taxonomy level; options are `"I. Section"`,
#' `"II. Grouping"`, `"III. Classification"` and `"IV. Specialization"`
#'
#' @param taxonomy_level_title `<chr>` Taxonomy level title
#'
#' @returns `<tibble>` of search results
#'
#' @examples
#' taxonomy_hierarchy(taxonomy_code = "101Y00000X")
#'
#' taxonomy_hierarchy(taxonomy_code = "103TA0400X")
#'
#' taxonomy_hierarchy(taxonomy_level = "I. Section")
#'
#' taxonomy_hierarchy(taxonomy_level_title = "Allopathic & Osteopathic Physicians")
#'
#' @importFrom fuimus search_in_if
#'
#' @autoglobal
#'
#' @export
taxonomy_hierarchy <- function(taxonomy_code = NULL) {
taxonomy_hierarchy <- function(taxonomy_code = NULL,
taxonomy_level = NULL,
taxonomy_level_title = NULL) {

check_nchar(taxonomy_code, 10)

pin <- get_pin("tax_hierarchy")

search_in_if(pin, pin[["code"]], taxonomy_code)
pin <- search_in_if(pin, pin[["taxonomy_code"]], taxonomy_code)
pin <- search_in_if(pin, pin[["taxonomy_level"]], taxonomy_level)
pin <- search_in_if(pin, pin[["taxonomy_level_title"]], taxonomy_level_title)

return(pin)
}

#' Taxonomy Display Names
Expand All @@ -137,7 +152,7 @@ taxonomy_display <- function(taxonomy_code = NULL) {

pin <- get_pin("tax_display")

search_in_if(pin, pin[["code"]], taxonomy_code)
search_in_if(pin, pin[["taxonomy_code"]], taxonomy_code)
}

#' Taxonomy Definitions
Expand All @@ -161,7 +176,7 @@ taxonomy_definition <- function(taxonomy_code = NULL) {

check_nchar(taxonomy_code, 10)

pin <- get_pin("tax_definitions")
pin <- get_pin("tax_definition")

search_in_if(pin, pin[["code"]], taxonomy_code)
search_in_if(pin, pin[["taxonomy_code"]], taxonomy_code)
}
15 changes: 3 additions & 12 deletions data-raw/02_nucc_pins.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,7 @@ readr::read_csv(
name_repair = janitor::make_clean_names)

deactivated <- ark_spt |>
dplyr::filter(
codex::not_na(deactivated)
) |>
dplyr::filter(codex::not_na(deactivated)) |>
dplyr::pull(code)

ark_spt
Expand Down Expand Up @@ -150,19 +148,12 @@ ark_long <- ark_long |>
tidyr::fill(nlevels, nyears)


hierarchy <- get_pin("ark_long") |>
hierarchy <- get_pin("tax_hierarchy") |>
dplyr::mutate(id = dplyr::consecutive_id(level), .by = code) |>
dplyr::filter(id != dplyr::lag(id, default = 0)) |>
dplyr::select(-id)

pin_update(
hierarchy,
name = "hierarchy",
title = "NUCC Taxonomy Gierarchy",
description = "Health Care Provider Taxonomy Code Set Archive 2009-2024 (Hierarchy)"
)

get_pin("ark_long") |>
get_pin("tax_hierarchy") |>
dplyr::count(code, level, sort = TRUE) |>
dplyr::filter(n > 1) |>
dplyr::pull(code)
Expand Down
34 changes: 3 additions & 31 deletions data-raw/03_nucc_split.R
Original file line number Diff line number Diff line change
Expand Up @@ -189,15 +189,15 @@ pin_update(
#--------- Comparing raw dates to the updates
updates |> dplyr::filter(type == "modified")

arktax::retrieve_ark(which = "wide") |>
arktax::taxonomy_raw() |>
dplyr::select(code, modified) |>
dplyr::filter(codex::not_na(modified)) |>
dplyr::distinct() |>
print(n = 200)

updates |> dplyr::filter(type == "effective")

arktax::retrieve_ark(which = "wide") |>
arktax::taxonomy_raw() |>
dplyr::select(code, effective) |>
dplyr::filter(codex::not_na(effective)) |>
dplyr::distinct()
Expand All @@ -206,35 +206,7 @@ updates |>
dplyr::filter(type == "deactivated") |>
print(n = 30)

arktax::retrieve_ark(which = "wide") |>
arktax::taxonomy_raw() |>
dplyr::select(code, deactivated) |>
dplyr::filter(codex::not_na(deactivated)) |>
dplyr::distinct()


#-------DISPLAY NAME
display <- arktax::taxonomy_raw(year = 2024, version = 1) |>
dplyr::select(code, display_name)

pin_update(
display,
"display",
"NUCC Taxonomy Display Names 2009-2024",
"Health Care Provider Taxonomy Code Set Display Names 2009-2024"
)

#-------DEFINITION
definitions <- arktax::taxonomy_raw(year = 2024, version = 1) |>
dplyr::select(code, definition) |>
dplyr::mutate(
definition = dplyr::if_else(
codex::na(definition),
"None",
definition))

pin_update(
definitions,
"definitions",
"NUCC Taxonomy Definitions 2009-2024",
"Health Care Provider Taxonomy Code Set Definitions 2009-2024"
)
29 changes: 29 additions & 0 deletions data-raw/04_nucc_display.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
source(here::here("data-raw", "pins_internal.R"))

raw_display <- arktax::taxonomy_raw() |>
dplyr::select(code, year, version, display_name)

no_display <- raw_display |>
dplyr::filter(is.na(display_name)) |>
dplyr::pull(code) |>
fuimus::uniq_rmna()

raw_display |>
dplyr::filter(!is.na(display_name)) |>
# dplyr::filter(code %in% no_display) |>
dplyr::filter(.by = code,
year == max(year),
version == max(version)) |>
dplyr::count(code, sort = TRUE)

tax_display <- get_pin("tax_display") |>
dplyr::select(
taxonomy_code = code,
taxonomy_display = display_name)

pin_update(
tax_display,
"tax_display",
"NUCC Taxonomy Display Names 2009-2024",
"Health Care Provider Taxonomy Code Set Display Names 2009-2024"
)
47 changes: 47 additions & 0 deletions data-raw/05_nucc_definition.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
source(here::here("data-raw", "pins_internal.R"))

raw_definition <- arktax::taxonomy_raw() |>
dplyr::select(code, year, version, definition) |>
dplyr::mutate(definition = stringr::str_replace_all(definition, notes_regs) |>
stringr::str_squish())

no_definition <- raw_definition |>
dplyr::filter(is.na(definition)) |>
dplyr::pull(code) |>
fuimus::uniq_rmna()

has_definition <- raw_definition |>
dplyr::filter(code %in% no_definition) |>
dplyr::filter(!is.na(definition)) |>
dplyr::pull(code) |>
fuimus::uniq_rmna()

vctrs::vec_slice(no_definition, vctrs::vec_in(no_definition, has_definition))
vctrs::vec_slice(has_definition, vctrs::vec_in(has_definition, no_definition))

raw_definition |>
dplyr::filter(.by = code,
!is.na(definition),
year == max(year),
version == max(version)) |>
dplyr::arrange(code, year, version) |>
dplyr::filter(year < 2024)
print(n = 200)
dplyr::filter(is.na(definition))
dplyr::select(code, definition)



definitions <- get_pin("tax_definitions") |>
dplyr::reframe(
taxonomy_code = code,
taxonomy_definition = dplyr::if_else(definition == "None", NA_character_, definition)
)


pin_update(
definitions,
"tax_definition",
"NUCC Taxonomy Definitions 2009-2024",
"Health Care Provider Taxonomy Code Set Definitions 2009-2024"
)
15 changes: 15 additions & 0 deletions data-raw/06_nucc_hierarchy.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
source(here::here("data-raw", "pins_internal.R"))

hierarchy <- get_pin("tax_hierarchy") |>
dplyr::select(
taxonomy_code = code,
taxonomy_level = level,
taxonomy_level_title = description
)

pin_update(
hierarchy,
name = "tax_hierarchy",
title = "NUCC Taxonomy Hierarchy",
description = "Health Care Provider Taxonomy Code Set Hierarchy 2009-2024"
)
8 changes: 4 additions & 4 deletions inst/extdata/pins/_pins.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@ cross_tax:
- cross_tax/20241202T105926Z-6e97a/
tax_changelog:
- tax_changelog/20241202T101138Z-342e2/
tax_definitions:
- tax_definitions/20241202T101218Z-79f37/
tax_definition:
- tax_definition/20241202T165819Z-5bf90/
tax_display:
- tax_display/20241202T101342Z-6657b/
- tax_display/20241202T170112Z-8d652/
tax_hierarchy:
- tax_hierarchy/20241202T101433Z-582c1/
- tax_hierarchy/20241202T170833Z-7cc99/
tax_raw:
- tax_raw/20241202T101525Z-05252/
tax_sources:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
file: tax_definitions.qs
file_size: 75021
pin_hash: 79f378e9d0fd77bd
file: tax_definition.qs
file_size: 75008
pin_hash: 5bf90ad3e935769f
type: qs
title: NUCC Taxonomy Definitions 2009-2024
description: Health Care Provider Taxonomy Code Set Definitions 2009-2024
tags: ~
urls: ~
created: 20241202T101218Z
created: 20241202T165819Z
api_version: 1
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
file: tax_display.qs
file_size: 11358
pin_hash: 6657beed09bdcfc0
file_size: 11365
pin_hash: 8d652affb5c85d60
type: qs
title: NUCC Taxonomy Display Names 2009-2024
description: Health Care Provider Taxonomy Code Set Display Names 2009-2024
tags: ~
urls: ~
created: 20241202T101342Z
created: 20241202T170112Z
api_version: 1
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
file: tax_hierarchy.qs
file_size: 13373
pin_hash: 582c1ecdec65c40d
file_size: 13384
pin_hash: 7cc99691802694b7
type: qs
title: NUCC Taxonomy Hierarchy 2009-2024
title: NUCC Taxonomy Hierarchy
description: Health Care Provider Taxonomy Code Set Hierarchy 2009-2024
tags: ~
urls: ~
created: 20241202T101433Z
created: 20241202T170833Z
api_version: 1
Binary file not shown.
15 changes: 14 additions & 1 deletion man/taxonomy_hierarchy.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 950034f

Please sign in to comment.