diff --git a/DESCRIPTION b/DESCRIPTION index 8f672f94..0ae35533 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -3,13 +3,30 @@ Package: osdc Title: Open Source Diabetes Classifier (OSCD) for Danish Registers Version: 0.0.1.9000 Authors@R: c( - # cre = maintainer, even though it translates to "creator" - person(c("Luke", "William"), "Johnston", , "lwjohnst@gmail.com", - comment = c(ORCID = "0000-0003-4169-2616"), role = c("aut", "cre")), - person(c("Signe", "Kirk"), "Brødbæk", , "signekb@clin.au.dk", role = "aut"), - person(c("Anders", "Aasted"), "Isaksen", , "andaas@rm.dk", role = "aut"), - person("Steno Diabetes Center Aarhus", role = "cph"), - person("Aarhus University", role = "cph") + person( + c("Luke", "William"), "Johnston", + email = "lwjohnst@gmail.com", + role = c("aut", "cre"), + comment = c(ORCID = "0000-0003-4169-2616") + ), + person( + c("Signe", "Kirk"), "Brødbæk", + email = "signekb@clin.au.dk", + role = "aut" + ), + person( + c("Anders", "Aasted"), "Isaksen", + email = "andaas@rm.dk", + role = "aut" + ), + person( + "Steno Diabetes Center Aarhus", + role = "cph" + ), + person( + "Aarhus University", + role = "cph" + ) ) Description: This classifier first identifies a population of individuals with any type of diabetes mellitus and then splits this population diff --git a/R/get-variables.R b/R/get-variables.R index e9cc945c..26a0a89a 100644 --- a/R/get-variables.R +++ b/R/get-variables.R @@ -2,10 +2,31 @@ #' Get a list of the registers' abbreviations. #' #' @return A character string. -#' @export +#' @keywords internal #' #' @examples #' get_register_abbrev() get_register_abbrev <- function() { - unique(required_variables$register_abbrev) + unique(variable_description$register_abbrev) +} + +#' Get a list of required variables from a specific register. +#' +#' @param register The abbreviation of the register name. See list of +#' abbreviations in [get_register_abbrev()]. +#' +#' @return A character vector of variable names. +#' @keywords internal +#' +#' @examples +#' get_required_variables("bef") +get_required_variables <- function(register) { + if (!checkmate::test_scalar(register)) { + cli::cli_abort("You are giving too many registers, please give only one.") + } + checkmate::assert_choice(register, get_register_abbrev()) + register <- rlang::arg_match(register, get_register_abbrev()) + variable_description |> + dplyr::filter(.data$register_abbrev == register) |> + dplyr::pull(.data$variable_name) } diff --git a/R/sysdata.rda b/R/sysdata.rda deleted file mode 100644 index f86c22a8..00000000 Binary files a/R/sysdata.rda and /dev/null differ diff --git a/R/variable-description.R b/R/variable-description.R new file mode 100644 index 00000000..468cdd36 --- /dev/null +++ b/R/variable-description.R @@ -0,0 +1,17 @@ +#' Variables from registers and their descriptions that are required for the +#' osdc algorithm. +#' +#' @format ## `variable_description` +#' A data frame with 39 rows and 6 columns: +#' \describe{ +#' \item{register_name}{The official, full Danish name of the register.} +#' \item{register_abbrev}{The official abbreviation for the register.} +#' \item{variable_name}{The official name of the variable found in the register.} +#' \item{years_covered}{The years when the variable is available from.} +#' \item{danish_description}{The official description in Danish for the variable.} +#' \item{english_description}{The translated description in English for the variable.} +#' } +#' @source Many of the details within the `variable_description` dataset come +#' from the full official list of registers from Statistics Denmark (DST): +#' +"variable_description" diff --git a/R/verify-variables.R b/R/verify-variables.R index 48d55fe5..73877075 100644 --- a/R/verify-variables.R +++ b/R/verify-variables.R @@ -1,8 +1,12 @@ #' Verify that the dataset has the required variables for the algorithm. #' +#' Use this function inside another function within an `if` condition to provide an +#' informative error message within the function used. This is done to make the +#' error message more informative to the location that the error actually +#' occurs, rather than within this function. +#' #' @param data The dataset to check. -#' @param register The abbreviation of the register name. See list of -#' abbreviations in [get_register_abbrev()]. +#' @inheritParams get_required_variables #' #' @return Either TRUE if the verification passes, or a character string if #' there is an error. @@ -16,10 +20,12 @@ #' verify_required_variables(example_bef_data, "bef") verify_required_variables <- function(data, register) { checkmate::assert_choice(register, get_register_abbrev()) - expected_variables <- required_variables |> - dplyr::filter(.data$register_abbrev == register) |> - dplyr::pull(.data$variable_name) + + # TODO: Consider using/looking into rlang::try_fetch() to provide contextual error messages. + expected_variables <- get_required_variables(register) + actual_variables <- colnames(data) + checkmate::check_names( x = actual_variables, must.include = expected_variables diff --git a/data-raw/variable-description.R b/data-raw/variable-description.R index a96544fa..b355d7fa 100644 --- a/data-raw/variable-description.R +++ b/data-raw/variable-description.R @@ -2,7 +2,15 @@ library(tidyverse) -required_variables <- read_csv(here::here("data-raw/variable_description.csv")) |> - select(register_abbrev = raw_register_filename, variable_name) +variable_description <- here::here("data-raw/variable_description.csv") |> + read_csv() |> + select( + register_name, + register_abbrev = raw_register_filename, + variable_name, + years_covered, + danish_description, + english_description + ) -usethis::use_data(required_variables, overwrite = TRUE, internal = TRUE) +usethis::use_data(variable_description, overwrite = TRUE) diff --git a/data/variable_description.rda b/data/variable_description.rda new file mode 100644 index 00000000..d663147a Binary files /dev/null and b/data/variable_description.rda differ diff --git a/tests/testthat/test-get-variables.R b/tests/testthat/test-get-variables.R new file mode 100644 index 00000000..e1d9aac2 --- /dev/null +++ b/tests/testthat/test-get-variables.R @@ -0,0 +1,12 @@ +test_that("internal `get_` variable helper functions give correct output", { + + # Should be character. Not sure if other tests are needed here. + expect_type(get_register_abbrev(), "character") + expect_type(get_required_variables("bef"), "character") + + # Only able to use register ids that are real. + expect_error(get_required_variables("fake")) + + # Only allows a vector of one. + expect_error(get_required_variables(c("bef", "atc"))) +})