From 376f2de3666197bfbc641e802cb8e73a45f7fc9f Mon Sep 17 00:00:00 2001 From: Ernest Guevarra Date: Sun, 30 Jun 2024 05:54:30 +0100 Subject: [PATCH] add cod_check_summary function and refactor outputs --- NAMESPACE | 1 + R/cod_check_code.R | 92 +++++++++++---- R/cod_structure_input.R | 2 +- R/cod_summary.R | 57 ++++++++++ README.Rmd | 43 ++++++- README.md | 205 +++++++++++++++++++++++++++++++++- man/cod_check_code_summary.Rd | 27 +++++ 7 files changed, 401 insertions(+), 26 deletions(-) create mode 100644 R/cod_summary.R create mode 100644 man/cod_check_code_summary.Rd diff --git a/NAMESPACE b/NAMESPACE index ccf42c3..999e850 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,6 +12,7 @@ export(cod_check_code_sex_icd11) export(cod_check_code_sex_icd11_) export(cod_check_code_structure_icd10) export(cod_check_code_structure_icd11) +export(cod_check_code_summary) export(cod_check_code_unlikely_icd10) export(cod_check_code_unlikely_icd11) export(cod_check_codedit_input) diff --git a/R/cod_check_code.R b/R/cod_check_code.R index 98f6d65..2f7f5af 100644 --- a/R/cod_check_code.R +++ b/R/cod_check_code.R @@ -50,7 +50,7 @@ cod_check_code <- function(cod, version = c("icd10", "icd11"), sex) { cod_check_code_unlikely, cod_check_code_sex ) |> dplyr::mutate( - cod_check = rowSums( + cod_check_code = rowSums( data.frame( .data$cod_check_structure, .data$cod_check_ill_defined, @@ -59,7 +59,15 @@ cod_check_code <- function(cod, version = c("icd10", "icd11"), sex) { ), na.rm = TRUE ) |> - (\(x) ifelse(x == 0, 0, 1))() + (\(x) ifelse(x == 0, 0, 1))(), + cod_check_code_note = ifelse( + cod_check_code == 0, + "No issues found in CoD code", + "Issues found in CoD code" + ) |> + factor( + levels = c("No issues found in CoD code", "Issues found in CoD code") + ) ) } @@ -239,20 +247,6 @@ cod_check_code_structure_icd11 <- function(cod) { #' @export #' cod_check_code_ill_defined_icd10 <- function(cod) { - # I46.1 - # I46.9 - # I50.- - # I95.9 - # I99 - # J96.0 - # J96.9 - # P28.5 - # R00-R57.1, - # R57.8-R64, - # R65.2-R65.3, - # R68.0-R94, - # R96-R99 - set1 <- ifelse( cod %in% c("I46.1", "I46.9", "I95.9", "I99", "J96.0", "P28.5"), 1L, 0L ) @@ -274,7 +268,16 @@ cod_check_code_ill_defined_icd10 <- function(cod) { "CoD code is an ill-defined code" ) - tibble(cod_check, cod_check_note) + tibble(cod_check, cod_check_note) |> + dplyr::mutate( + cod_check_note = factor( + x = cod_check_note, + levels = c( + "No issues found in CoD code", + "CoD code is an ill-defined code" + ) + ) + ) } @@ -294,7 +297,16 @@ cod_check_code_ill_defined_icd11 <- function(cod) { "CoD code is an ill-defined code" ) - tibble::tibble(cod_check, cod_check_note) + tibble(cod_check, cod_check_note) |> + dplyr::mutate( + cod_check_note = factor( + x = cod_check_note, + levels = c( + "No issues found in CoD code", + "CoD code is an ill-defined code" + ) + ) + ) } @@ -312,7 +324,16 @@ cod_check_code_unlikely_icd10 <- function(cod) { "CoD code is an unlikely cause-of-death" ) - tibble(cod_check, cod_check_note) + tibble(cod_check, cod_check_note) |> + dplyr::mutate( + cod_check_note = factor( + x = cod_check_note, + levels = c( + "No issues found in CoD code", + "CoD code is an unlikely cause-of-death" + ) + ) + ) } #' @@ -331,7 +352,16 @@ cod_check_code_unlikely_icd11 <- function(cod) { "CoD code is an unlikely cause-of-death" ) - tibble::tibble(cod_check, cod_check_note) + tibble(cod_check, cod_check_note) |> + dplyr::mutate( + cod_check_note = factor( + x = cod_check_note, + levels = c( + "No issues found in CoD code", + "CoD code is an unlikely cause-of-death" + ) + ) + ) } #' @@ -356,7 +386,16 @@ cod_check_code_sex_icd10_ <- function(cod, sex) { "CoD code is not appropriate for person's sex" ) - tibble::tibble(cod_check, cod_check_note) + tibble::tibble(cod_check, cod_check_note) |> + dplyr::mutate( + cod_check_note = factor( + x = cod_check_note, + levels = c( + "No issues found in CoD code", + "CoD code is not appropriate for person's sex" + ) + ) + ) } @@ -397,7 +436,16 @@ cod_check_code_sex_icd11_ <- function(cod, sex) { "CoD code is not appropriate for person's sex" ) - tibble::tibble(cod_check, cod_check_note) + tibble::tibble(cod_check, cod_check_note) |> + dplyr::mutate( + cod_check_note = factor( + x = cod_check_note, + levels = c( + "No issues found in CoD code", + "CoD code is not appropriate for person's sex" + ) + ) + ) } diff --git a/R/cod_structure_input.R b/R/cod_structure_input.R index 792897e..5b80730 100644 --- a/R/cod_structure_input.R +++ b/R/cod_structure_input.R @@ -43,7 +43,7 @@ cod_structure_input <- function(df, sex, sex_code = c(1, 2), if (is.null(id)) { FreeId <- seq(from = 1, to = nrow(df)) } else { - FreeId <- id + FreeId <- df[[id]] } Sex <- cod_recode_sex(sex_value = df[[sex]], sex_code = sex_code) diff --git a/R/cod_summary.R b/R/cod_summary.R new file mode 100644 index 0000000..02c19cc --- /dev/null +++ b/R/cod_summary.R @@ -0,0 +1,57 @@ +#' +#' Summarise cause-of-death check results +#' +#' @param cod_check A data.frame output of the various `cod_check_code_*` +#' functions +#' @param simplify Logical. Should output be converted into a data.frame? +#' Default is FALSE. +#' +#' @returns If `simplify` is FALSE (default), a list of summary check outputs. +#' Otherwise, a tabulated summary of check outputs. +#' +#' @examples +#' cod_check_code(cod_data_raw_example$code, version = "icd11", sex = "sex") |> +#' cod_check_code_summary() +#' +#' @rdname cod_check_code_summary +#' @export +#' + +cod_check_code_summary <- function(cod_check, simplify = FALSE) { + cod_check_list <- list( + cod_check |> dplyr::select(dplyr::contains("note_structure")), + cod_check |> dplyr::select(dplyr::contains("note_ill")), + cod_check |> dplyr::select(dplyr::contains("note_unlikely")), + cod_check |> dplyr::select(dplyr::contains("note_sex")), + cod_check |> dplyr::select(dplyr::all_of("cod_check_code_note")) + ) |> + (\(x) + { + names(x) <- c( + "Code structure", "Ill-defined code", + "Unlikely cause-of-death code", "Code not appropriate for sex", + "Overall" + ) + x + } + )() + + cod_check_summary <- lapply( + X = cod_check_list, + FUN = function(x) dplyr::count( + x, + dplyr::across( + dplyr::everything() + ), + .drop = FALSE + ) |> + dplyr::rename_with(.fn = function(x) c("cod_check_note", "n")) + ) + + if (simplify) { + cod_check_summary <- cod_check_summary |> + dplyr::bind_rows(.id = "cod_check_type") + } + + cod_check_summary +} diff --git a/README.Rmd b/README.Rmd index 379d7d3..9a29d87 100644 --- a/README.Rmd +++ b/README.Rmd @@ -74,14 +74,55 @@ library(codeditr) Using the `icd10_example` dataset which is a dataset already formatted into a compatible structure required by the CoDEdit tool, we can perform a check on this dataset to see possible issues in its formatting and structure before using with the CoDEdit tool. -```{r use-case-1} +```{r use-case-1a} cod_check_codedit_input(icd10_example) ``` +2. Structure raw cause-of-death data for input into CoDEdit tool +Using the `cod_data_raw_example` dataset, we can format it into a compatible structure required by the CoDEdit tool. + +```{r use-case-2a} +cod_structure_input( + df = cod_data_raw_example, + sex = "sex", dob = "dob", dod = "dod", code = "code", id = "id" +) +``` + +The output is a data.frame that can then be saved as an `.xlsx` file for use as input into the CoDEdit tool. ### CoDEdit tool replacement workflow +1. Perform all checks on cause-of-death data + +The `cod_check_code()` function performs all the checks implemented by the CoDEdit tool. + +```{r use-case-3} +cod_check_code(cod_data_raw_example$code, version = "icd11", sex = "sex") +``` + +Results of the per row cause-of-death checks can also be summarised to give a count of issues found in the dataset. + +```{r use-case-4} +cod_check_code(cod_data_raw_example$code, version = "icd11", sex = "sex") |> + cod_check_code_summary() +``` + +2. Perform specific check types on cause-of-death data + +The family of `cod_check_code_*` functions can be used to perform specific check types on the cause-of-death data. + +```{r use-case-5} +### Perform code structure check on cause-of-death data ---- +cod_check_code_structure_icd10(icd10_example$Code) + +### Perform check for ill-defined codes on cause-of-death data ---- +cod_check_code_ill_defined_icd11(cod_data_raw_example$code) + +### Perform check for unlikely cause-of-death codes ---- +cod_check_code_unlikely_icd11(cod_data_raw_example$code) +``` + ## Citation If you find the `codeditr` package useful please cite using the suggested citation provided by a call to the `citation()` function as follows: diff --git a/README.md b/README.md index 9a2f0dd..576143e 100644 --- a/README.md +++ b/README.md @@ -112,7 +112,7 @@ structure before using with the CoDEdit tool. cod_check_codedit_input(icd10_example) #> # A tibble: 3,613 × 8 #> sex_check sex_check_note age_check age_check_note code_check code_check_note -#> +#> #> 1 0 No issues with… 0 No issues wit… 0 Cause of death… #> 2 0 No issues with… 0 No issues wit… 0 Cause of death… #> 3 0 No issues with… 0 No issues wit… 0 Cause of death… @@ -124,11 +124,212 @@ cod_check_codedit_input(icd10_example) #> 9 0 No issues with… 0 No issues wit… 0 Cause of death… #> 10 0 No issues with… 0 No issues wit… 0 Cause of death… #> # ℹ 3,603 more rows -#> # ℹ 2 more variables: dod_check , dod_check_note +#> # ℹ 2 more variables: dod_check , dod_check_note ``` +2. Structure raw cause-of-death data for input into CoDEdit tool + +Using the `cod_data_raw_example` dataset, we can format it into a +compatible structure required by the CoDEdit tool. + +``` r +cod_structure_input( + df = cod_data_raw_example, + sex = "sex", dob = "dob", dod = "dod", code = "code", id = "id" +) +#> # A tibble: 20 × 6 +#> FreeId Sex `Age Value` `Age Type` Code `Death Date` +#> +#> 1 4136 1 1318 Y NE84&XA6KU8 2023 +#> 2 4137 2 1318 Y 2B6D&XS9R 2023 +#> 3 4138 1 1318 Y 2C82&XS9R 2023 +#> 4 4139 1 1318 Y CA40.Z&XK9J 2023 +#> 5 4140 2 1318 Y 6C40.3&XS25 2023 +#> 6 4141 1 1318 Y 6C40.3&XS25 2023 +#> 7 4142 1 1318 Y DB94.1&XT8W 2023 +#> 8 4143 2 1318 Y BD40.Z 2023 +#> 9 4144 2 1318 Y 2C76.Z&XA8QA8 2023 +#> 10 4145 1 1318 Y 6C40.3&XS25 2023 +#> 11 4146 2 1318 Y 8B11.5Z 2023 +#> 12 4147 1 1318 Y 2B90.Y&XH74S1 2023 +#> 13 4148 1 1318 Y BD10&XT5R 2023 +#> 14 4149 1 1318 Y 1G41 2023 +#> 15 4150 1 1318 Y BD10&XT5R 2023 +#> 16 4151 2 1318 Y CA40.Z&XB25 2023 +#> 17 4152 2 1318 Y BA01 2023 +#> 18 4153 1 1318 Y 1G41 2023 +#> 19 4154 2 1318 Y BB40 2023 +#> 20 4155 1 1318 Y 1B91 2023 +``` + +The output is a data.frame that can then be saved as an `.xlsx` file for +use as input into the CoDEdit tool. + ### CoDEdit tool replacement workflow +1. Perform all checks on cause-of-death data + +The `cod_check_code()` function performs all the checks implemented by +the CoDEdit tool. + +``` r +cod_check_code(cod_data_raw_example$code, version = "icd11", sex = "sex") +#> # A tibble: 20 × 10 +#> cod_check_structure cod_check_note_structure cod_check_ill_defined +#> +#> 1 0 No issues found in CoD code 0 +#> 2 0 No issues found in CoD code 0 +#> 3 0 No issues found in CoD code 0 +#> 4 0 No issues found in CoD code 0 +#> 5 0 No issues found in CoD code 0 +#> 6 0 No issues found in CoD code 0 +#> 7 0 No issues found in CoD code 0 +#> 8 0 No issues found in CoD code 0 +#> 9 0 No issues found in CoD code 0 +#> 10 0 No issues found in CoD code 0 +#> 11 0 No issues found in CoD code 0 +#> 12 0 No issues found in CoD code 0 +#> 13 0 No issues found in CoD code 0 +#> 14 0 No issues found in CoD code 0 +#> 15 0 No issues found in CoD code 0 +#> 16 0 No issues found in CoD code 0 +#> 17 0 No issues found in CoD code 0 +#> 18 0 No issues found in CoD code 0 +#> 19 0 No issues found in CoD code 0 +#> 20 0 No issues found in CoD code 0 +#> # ℹ 7 more variables: cod_check_note_ill_defined , +#> # cod_check_unlikely , cod_check_note_unlikely , +#> # cod_check_sex , cod_check_note_sex , cod_check_code , +#> # cod_check_code_note +``` + +Results of the per row cause-of-death checks can also be summarised to +give a count of issues found in the dataset. + +``` r +cod_check_code(cod_data_raw_example$code, version = "icd11", sex = "sex") |> + cod_check_code_summary() +#> $`Code structure` +#> # A tibble: 65 × 2 +#> cod_check_note n +#> +#> 1 No issues found in CoD code 20 +#> 2 CoD code has a period (`.`) character in the wrong place 0 +#> 3 CoD code starts with `O` or `I` 0 +#> 4 CoD code has a period (`.`) character in the wrong place; CoD code sta… 0 +#> 5 CoD code has a number as its second value 0 +#> 6 CoD code has a period (`.`) character in the wrong place; CoD code has… 0 +#> 7 CoD code starts with `O` or `I`; CoD code has a number as its second v… 0 +#> 8 CoD code has a period (`.`) character in the wrong place; CoD code sta… 0 +#> 9 CoD code has `O` or `I` as its second value 0 +#> 10 CoD code has a period (`.`) character in the wrong place; CoD code has… 0 +#> # ℹ 55 more rows +#> +#> $`Ill-defined code` +#> # A tibble: 2 × 2 +#> cod_check_note n +#> +#> 1 No issues found in CoD code 20 +#> 2 CoD code is an ill-defined code 0 +#> +#> $`Unlikely cause-of-death code` +#> # A tibble: 2 × 2 +#> cod_check_note n +#> +#> 1 No issues found in CoD code 20 +#> 2 CoD code is an unlikely cause-of-death 0 +#> +#> $`Code not appropriate for sex` +#> # A tibble: 2 × 2 +#> cod_check_note n +#> +#> 1 No issues found in CoD code 20 +#> 2 CoD code is not appropriate for person's sex 0 +#> +#> $Overall +#> # A tibble: 2 × 2 +#> cod_check_note n +#> +#> 1 No issues found in CoD code 20 +#> 2 Issues found in CoD code 0 +``` + +2. Perform specific check types on cause-of-death data + +The family of `cod_check_code_*` functions can be used to perform +specific check types on the cause-of-death data. + +``` r +### Perform code structure check on cause-of-death data ---- +cod_check_code_structure_icd10(icd10_example$Code) +#> # A tibble: 3,613 × 2 +#> cod_check cod_check_note +#> +#> 1 0 No issues found in CoD code +#> 2 0 No issues found in CoD code +#> 3 0 No issues found in CoD code +#> 4 0 No issues found in CoD code +#> 5 0 No issues found in CoD code +#> 6 0 No issues found in CoD code +#> 7 0 No issues found in CoD code +#> 8 0 No issues found in CoD code +#> 9 0 No issues found in CoD code +#> 10 0 No issues found in CoD code +#> # ℹ 3,603 more rows + +### Perform check for ill-defined codes on cause-of-death data ---- +cod_check_code_ill_defined_icd11(cod_data_raw_example$code) +#> # A tibble: 20 × 2 +#> cod_check cod_check_note +#> +#> 1 0 No issues found in CoD code +#> 2 0 No issues found in CoD code +#> 3 0 No issues found in CoD code +#> 4 0 No issues found in CoD code +#> 5 0 No issues found in CoD code +#> 6 0 No issues found in CoD code +#> 7 0 No issues found in CoD code +#> 8 0 No issues found in CoD code +#> 9 0 No issues found in CoD code +#> 10 0 No issues found in CoD code +#> 11 0 No issues found in CoD code +#> 12 0 No issues found in CoD code +#> 13 0 No issues found in CoD code +#> 14 0 No issues found in CoD code +#> 15 0 No issues found in CoD code +#> 16 0 No issues found in CoD code +#> 17 0 No issues found in CoD code +#> 18 0 No issues found in CoD code +#> 19 0 No issues found in CoD code +#> 20 0 No issues found in CoD code + +### Perform check for unlikely cause-of-death codes ---- +cod_check_code_unlikely_icd11(cod_data_raw_example$code) +#> # A tibble: 20 × 2 +#> cod_check cod_check_note +#> +#> 1 0 No issues found in CoD code +#> 2 0 No issues found in CoD code +#> 3 0 No issues found in CoD code +#> 4 0 No issues found in CoD code +#> 5 0 No issues found in CoD code +#> 6 0 No issues found in CoD code +#> 7 0 No issues found in CoD code +#> 8 0 No issues found in CoD code +#> 9 0 No issues found in CoD code +#> 10 0 No issues found in CoD code +#> 11 0 No issues found in CoD code +#> 12 0 No issues found in CoD code +#> 13 0 No issues found in CoD code +#> 14 0 No issues found in CoD code +#> 15 0 No issues found in CoD code +#> 16 0 No issues found in CoD code +#> 17 0 No issues found in CoD code +#> 18 0 No issues found in CoD code +#> 19 0 No issues found in CoD code +#> 20 0 No issues found in CoD code +``` + ## Citation If you find the `codeditr` package useful please cite using the diff --git a/man/cod_check_code_summary.Rd b/man/cod_check_code_summary.Rd new file mode 100644 index 0000000..3e14e07 --- /dev/null +++ b/man/cod_check_code_summary.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cod_summary.R +\name{cod_check_code_summary} +\alias{cod_check_code_summary} +\title{Summarise cause-of-death check results} +\usage{ +cod_check_code_summary(cod_check, simplify = FALSE) +} +\arguments{ +\item{cod_check}{A data.frame output of the various \verb{cod_check_code_*} +functions} + +\item{simplify}{Logical. Should output be converted into a data.frame? +Default is FALSE.} +} +\value{ +If \code{simplify} is FALSE (default), a list of summary check outputs. +Otherwise, a tabulated summary of check outputs. +} +\description{ +Summarise cause-of-death check results +} +\examples{ +cod_check_code(cod_data_raw_example$code, version = "icd11", sex = "sex") |> + cod_check_code_summary() + +}