From c1357357cf196b1fed8ab2da91fe47be80c1de37 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Tue, 25 Jun 2024 11:33:31 +0200 Subject: [PATCH 01/22] refactor: quiet the reading message --- data-raw/variable-description.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data-raw/variable-description.R b/data-raw/variable-description.R index 56a80ecb..2e596484 100644 --- a/data-raw/variable-description.R +++ b/data-raw/variable-description.R @@ -1,6 +1,6 @@ create_variable_description_data <- function(path) { variable_description <- path |> - readr::read_csv() |> + readr::read_csv(show_col_types = FALSE) |> dplyr::select( "register_name", "register_abbrev", From 24ebe2631bf10a6c3e6026348e71fdac8901908c Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Tue, 25 Jun 2024 12:12:51 +0200 Subject: [PATCH 02/22] chore: ignore these files in the build process --- .Rbuildignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.Rbuildignore b/.Rbuildignore index 1aaf0d9a..19a046da 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -10,3 +10,5 @@ ^pkgdown$ ^\.vscode$ ^_targets$ +^_targets\.R$ +^justfile$ From 69b8bc7b404ae8e2c47308ef90296af3246ffab2 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Tue, 25 Jun 2024 12:13:23 +0200 Subject: [PATCH 03/22] refactor: these functions should only output data --- data-raw/algorithm.R | 6 ++---- data-raw/variable-description.R | 11 ++--------- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/data-raw/algorithm.R b/data-raw/algorithm.R index 8dcefd73..2dba3df7 100644 --- a/data-raw/algorithm.R +++ b/data-raw/algorithm.R @@ -1,5 +1,3 @@ -create_algorithm_data <- function(path) { - algorithm <- readr::read_csv(path, show_col_types = FALSE) - usethis::use_data(algorithm, overwrite = TRUE) - fs::dir_ls(here::here("data"), regexp = "algorithm") +read_algorithm_data <- function(path) { + readr::read_csv(path, show_col_types = FALSE) } diff --git a/data-raw/variable-description.R b/data-raw/variable-description.R index 2e596484..19f0f213 100644 --- a/data-raw/variable-description.R +++ b/data-raw/variable-description.R @@ -1,5 +1,5 @@ -create_variable_description_data <- function(path) { - variable_description <- path |> +read_variable_description_data <- function(path) { + path |> readr::read_csv(show_col_types = FALSE) |> dplyr::select( "register_name", @@ -10,11 +10,4 @@ create_variable_description_data <- function(path) { "danish_description", "english_description" ) - - # Save to `data/` to give users access to descriptions - usethis::use_data(variable_description, overwrite = TRUE) - - # Save as internal as well to give our functions access it - usethis::use_data(variable_description, overwrite = TRUE, internal = TRUE) - fs::dir_ls(here::here("data"), regexp = "variable_description") } From e7e8201acfb76a56ab3b10724f698bed4f4ae7a1 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Tue, 25 Jun 2024 12:15:34 +0200 Subject: [PATCH 04/22] build: update the targets pipeline to properly save sysdata --- _targets.R | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/_targets.R b/_targets.R index 28ce5a2c..6ce2035a 100644 --- a/_targets.R +++ b/_targets.R @@ -24,9 +24,16 @@ list( command = "data-raw/algorithm.csv", format = "file" ), + tar_target( + name = algorithm, + command = read_algorithm_data(algorithm_csv) + ), tar_target( name = algorithm_rda, - command = create_algorithm_data(algorithm_csv), + command = { + usethis::use_data(algorithm, overwrite = TRUE) + here::here("data/algorithm.rda") + }, format = "file" ), tar_target( @@ -34,9 +41,24 @@ list( command = "data-raw/variable-description.csv", format = "file" ), + tar_target( + name = variable_description, + command = read_variable_description_data(variable_description_csv) + ), tar_target( name = variable_description_rda, - command = create_variable_description_data(variable_description_csv), + command = { + usethis::use_data(variable_description, overwrite = TRUE) + here::here("data/variable_description.rda") + }, + format = "file" + ), + tar_target( + name = internal_rda, + command = { + usethis::use_data(algorithm, variable_description, overwrite = TRUE, internal = TRUE) + here::here("R/sysdata.rda") + }, format = "file" ) ) From 3216b9ccb8cfb8ee40ac0c509d4ffe77ab24d94d Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Tue, 25 Jun 2024 12:16:00 +0200 Subject: [PATCH 05/22] chore: regenerate pipeline outputs --- R/sysdata.rda | Bin 1550 -> 1758 bytes _targets/meta/meta | 21 ++++++++++++++------- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/R/sysdata.rda b/R/sysdata.rda index 00d3472f5a9ee1d5f6c54ff77612d9eb157de5ea..e04eb6cd8b71a9ca92e2ac62e088826a9a88ff80 100644 GIT binary patch literal 1758 zcmV<41|j)ET4*^jL0KkKS-~maH2?-B|M&m@|JQf#|HJGB-a^0c-{8PR00EEyKmY&` z1OPw*;0NA5(U7vXil6`rPyn!$lO;Ws14GIh2A+|ofHVMT13&-(+L;*u4j7Fz&}aid z0009)kZGW30MVcfLqKrEX{Lif8UO$Q8VrL?13(6i0Ad;gh9gZh8UWA$007Wr8fY2- zG-v}5&O0iXZ?00E|r zrkNqKqp)%IM;?1svYsXQIL9GwSzYsO)g9&nNdjW118G#;)!mj@TCCd3$gu>r+AUOK zDQ>+7QZCi&btXiSFnzz<>$gyDMHc#K6co9{t}<~NPhqSO9nEZJZ8{(7|Bi_&Cnrbn z#UDKG{$vOMA7C! z{z6*ES__6St4Gk)O4Vg9PLm;0J&y`>^~|xqj7V;y96-+9SkA;tT`KI8v_br*E?rWq zqcu9yWF*T@6t0!UJyHwi7O6&1jz!(-IAAJf0t)~DBFP{ST16T`L1_ep5Jdd?s4HYq z9ic`-LH~iEU(m;9X)pPbHkBYhL2|Zx$PR|vLhdGrVb#z4`g&kyVwLNj325iCHxm|A z3zSWK#R8aUfrA-wb1f)_F@{9Pqr&P8tP{Rd*{*b&S3zLKDi|1rrKqt53XptegChdd z5|6)E&|M&gIJK#(E)#=RFe^t}0dzP3PI&_Q{@uzEL}>h&2jbzCVzK${L}GxcY}f)P zQ+gVVo>dK zzAr13z-;a}noTKC72{fljj0G4`cK6B`zTZP%RL7iHCpS&m`zBSHZWrb zPEbvCi~?y?fdy|Jf`WnTy+HtSsb-QWGANe`0wDnNS<880xf(_ z&Bsp^!LXsJR3-2>^Vo1Y)Q6Bjax?&5Vgc}ka2gRJZH)w%KS*viVPnKx7aIi-4-L~C z9a<1vT($ww_e0xNxdBy*C5paDbc$q~w_zkJ0b&@tPT*1`R0{`?Ns|RKc#p=&3}d{a zS*`u2mv*|BGVwH&5QfrQ`YFg*nhGS1JW*SaP)!U$1Rx;JFsZSy=;cJY0^qqgq8M~m z_EQ+hMnIhgPf7!kN+ysJJU$R69W_pj6u$1Oc`J}D3+wC83(yABoqy>bC4Rk>B?LsNXQu1-X=m) z6A8zN}`DjM!_!n77PH)f~abKATl*#mZX=5 znfW}0DE+ToBYpsG&rzT=3N1l#tKUAC051So7lr{7&~U`tM7j64L6b_#>YGWI6o$x& z)K3)E6a(x{I*>60g>wUu;B37u8C*f2CvYJ0ns+5~pdx@s%}Q+wgEDBK?-LDo1}P{I zVjA#C@0z?~Qc}jptkjHgCX%xf)M-lq^k_JKA8Wq4#Ri@nCA|9|!1WjH4zwGCAg7uR z!JjxBM64?p14r#bJ!E~F=T%*l2dtyCHq)$ literal 1550 zcmV+p2J!hqT4*^jL0KkKS!xxPIsgSGfA|0Y|JQf#*Z}MWUO>O^-{8PR00EEyKmY&` z1OPw*;0E6Mg$V!v04M-Q20^1h88pN+&}0~f5HtpiOh$kJ4h(}vfHG-_X`sk43?OI? z7?_O!02~A} zJx%H(Oqyi$0W=#>U=tCD%>WvQkRGHGBnTrwYGfKArluoA+JU1(AkmX1Pf5Kb@?{5# zk9qL%9@{LIq}yxA_Kj7oQdKLc*a!e{u{rY+*GK?5rvj2fXhHX}eJoI6_xri)Q35AR zB*>DdzQQAo1;`efB_%})-1>b4zq0gQoKhpblJuo+|7vQT#ovu!@26BjeL^3o&`362 z3V1%PRIOH0Cn6N%+V|6|@|j~d19TYT27f&mO~#qg)hAek>zue}D!MaEtu?|-wBbtW zTvOK|zF|a&P6n&a9nB#JP$UU}keD?L0%%ZGyUQ|+p;sONDF{KYgMh92V3*2!Mjd8V zh0sk9!#iW_Uv7VF}-RlIfxYjXjT2dtU%ZRtm8 z%aneD%|-GZr^pvw@fX@w^VVKMJ*NXeNIk4b^c+H@jA#}V6avIc?%`Qp8jIir^nQ~; zAPJ9DB@Hjl@FPnX#$%H|!!9-?wCb;S>*g~Vvc{#rF$tVkIWXf(BL*p05uXggz&fPE zI0GQue54~4B?3njFT|DP&%@L}Yu;1s@IF9In#>NwrV8`zgR+Sb9JWa*s;VUpgC4ea6} z`-HHz>d_?m%4S1GH6mcx z!HgL>K{e1Y38hj56})x|3I~N|69&SD zN}(@_x3g)?bSV#5Ky#V^FEIf9K^)Bpkv7JHUE#RdeNMu`VYRzx@HluC#mi_OfcKS< z6ERl7T_jH=-Fe9&R0=~5gQOIR6#~KH5@f+lz^8?f7{?-r)w`ha{CcWf%f!-9LK{hM z>{F1jG!#i1d68R?P)!U$1Rx;dWRdLkuMWnGFtC=b?FWS{HYS>EZG-F?vq}O>UeR|9 zRxAM{0V@^`u&8w!0$@x9QWdRXoNyR2V4+-bfpGn%l2C?1+PQiX@+G4+GBvg_tY8;V z&_hUP5)=>;N<&HKz?U?FwGRmp4#oas;SYaaX=zEkLy~D2Vqju{F^D>*M8TE=H5ny9 z*1Wa?K|tLHDJ7Gh_YsgWuVFG0keE(grOFL|3{h+7zDFyj7bO-;C6k}QYK4j-$y8Ax zh}b2{g@XV#5LFI-05zFnmPsz(F)2RpWFvLJZN&G(P)%qH8a^ZQ<^|vjad==6pwAO* z((2^l2DX)!H8&G3DGiYmsvaq-Ce;-{miVeBvEYI2b9-jQ0V+O2V;lHTEGNVGlLUg?tbll^S0tUnLuai2sYZBAh5lH44g|0GLUZ Am;e9( diff --git a/_targets/meta/meta b/_targets/meta/meta index e9d4b955..54603f2e 100644 --- a/_targets/meta/meta +++ b/_targets/meta/meta @@ -1,18 +1,25 @@ name|type|data|command|depend|seed|path|time|size|bytes|format|repository|iteration|parent|children|seconds|warnings|error .__global__|object|87b8d4f266c27bf4||||||||||||||| -algorithm_csv|stem|98f92607dd48318b|72b6696561a29259|2c530c1562a7fbd1|48686226|data-raw/algorithm.csv|t19893.417610553s|530f140998b50d1c|112|file|local|vector|||0.003|| -algorithm_rda|stem|66f7362503371611|299578725db47571|a73128224189f273|-930623318|/home/luke/Documents/organizations/steno-aarhus/osdc/data/algorithm.rda|t19893.4400342999s|a96add13dbb67aa3|376|file|local|vector|||0.166|| +algorithm|stem|10ca7cf5842c7665|e6ba0f398c6f1faa|60a40402a9fb99aa|-536052671||t19899.4087817699s|9fa82258c19da069|337|rds|local|vector|||0.001|| +algorithm_csv|stem|98f92607dd48318b|72b6696561a29259|2c530c1562a7fbd1|48686226|data-raw/algorithm.csv|t19893.5100784042s|530f140998b50d1c|112|file|local|vector|||0.003|| +algorithm_rda|stem|66f7362503371611|07904a5fa14d0614|9fd483aca6d9122e|-930623318|/home/luke/Documents/organizations/steno-aarhus/osdc/data/algorithm.rda|t19899.4272374029s|a96add13dbb67aa3|376|file|local|vector|||0.01|| column_names_to_lower|function|1f9ed89ec76f0ce4||||||||||||||| -create_algorithm_data|function|260984c321b10f83||||||||||||||| -create_variable_description_data|function|e6984763ad307bc4||||||||||||||| +create_algorithm_data|function|721802c40fcbce4e||||||||||||||| +create_variable_description_data|function|ede086e002225ffb||||||||||||||| get_algorithm_logic|function|87ff06f2c9ce0900||||||||||||||| get_register_abbrev|function|a82c561691b651df||||||||||||||| get_required_variables|function|986ccd76798db1ba||||||||||||||| -include_hba1c|function|dd23db245d8e630d||||||||||||||| +include_hba1c|function|e0b1243dd274269a||||||||||||||| +internal_rda|stem|7d593b3244eb6fb5|76d906cefcbfa41d|cbd1dbe82f00b76b|765355582|/home/luke/Documents/organizations/steno-aarhus/osdc/R/sysdata.rda|t19899.427237588s|f58dd1a2582f84fa|1758|file|local|vector|||0.008|| +read_algorithm_data|function|ef107d4466f53ae2||||||||||||||| +read_variable_description_data|function|bac5d03645e8328d||||||||||||||| register_as_md_header|function|1b9b0bb62cc1e264||||||||||||||| -register_data_as_md_table|function|27e92012b850a984||||||||||||||| +register_data_as_md_table|function|bbdda3785a7bdb63||||||||||||||| registers_as_md_table|function|ab56ce7262ba313b||||||||||||||| +variable_description|stem|6d3dd1f2baa26b82|c2153d67e5651737|a01d9e94aead7bb8|1287048845||t19899.4087817236s|b9834ab70a709536|1311|rds|local|vector|||0.133|| variable_description_csv|stem|900853341b756f57|2347307fa16d111b|2c530c1562a7fbd1|113481566|data-raw/variable-description.csv|t19845.4834059685s|75bbd1de9a7a9806|4683|file|local|vector|||0.005|| -variable_description_rda|stem|450aefe34ecbfb32|1742010767fda041|824677c99eb59944|-1262878967|/home/luke/Documents/organizations/steno-aarhus/osdc/data/variable_description.rda|t19893.4447566339s|5272409a13fc9df9|1550|file|local|vector|||0.496|Use of .data in tidyselect expressions was deprecated in tidyselect 1.2.0.ℹ Please use register_name instead of .dataregister_name. Use of .data in tidyselect expressions was deprecated in tidyselect 1.2.0.ℹ Please use register_abbrev instead of .dataregister_abbrev. Use of .data in tidyselect expressions was deprecated in tidyselect 1.2.0.ℹ Please use variable_name instead of .datavariable_name. Use of .data in tidyselect expressions was deprecated in tidyselect 1.2.0.ℹ Please use start_year instead of .datastart_year. Use of .data in tidyselect expressions was deprecated in tidyselect 1.2.0.ℹ Please use end_year instead of .dataend_year. Use of .data in tidyselect expressions was deprecated in tidyselect 1.2.0.ℹ Please use danish_description instead of .datadanish_description. Use of .data in tidyselect expressions was deprecated in tidyselect 1.2.0.ℹ Please use english_description instead of .dataenglish_description| +variable_description_rda|stem|450aefe34ecbfb32|afe48e93eb17b9c8|92ce3d1b47fe163e|-1262878967|/home/luke/Documents/organizations/steno-aarhus/osdc/data/variable_description.rda|t19899.427237264s|5272409a13fc9df9|1550|file|local|vector|||0.058|| variables_as_md_table|function|bf10d1f0df6a170a||||||||||||||| verify_required_variables|function|52bf07b827f57531||||||||||||||| +write_external_rda|function|c0c90c7048a0d89b||||||||||||||| +write_internal_rda|function|95b61d841de16e96||||||||||||||| From d6fa1b2a42c1956513d045aec87a3802ca2d58f3 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Tue, 25 Jun 2024 12:18:36 +0200 Subject: [PATCH 06/22] test: confirm that only the last two dates are selected --- tests/testthat/test-include-hba1c.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-include-hba1c.R b/tests/testthat/test-include-hba1c.R index 91545b65..eaf7f8b5 100644 --- a/tests/testthat/test-include-hba1c.R +++ b/tests/testthat/test-include-hba1c.R @@ -1,6 +1,9 @@ lab_forsker <- tibble::tribble( ~patient_cpr, ~samplingdate, ~analysiscode, ~value, + # Three events, so only earliest two should be kept. "498718589800", "20230101", "NPU27300", 49, + "498718589800", "20210101", "NPU27300", 49, + "498718589800", "20220101", "NPU27300", 49, "498718589801", "20230101", "NPU03835", 6.6, "498718589802", "20230101", "NPU03835", 6.3, "498718589803", "20230101", "NPU27300", 47, @@ -22,7 +25,8 @@ lab_forsker <- tibble::tribble( expected <- tibble::tribble( ~pnr, ~date, ~included_hba1c, - "498718589800", "20230101", TRUE, + "498718589800", "20210101", TRUE, + "498718589800", "20220101", TRUE, "498718589801", "20230101", TRUE, "498718589803", "20210101", TRUE, "498718589803", "20220101", TRUE, From eb7446d1727e996165c09bf0ffb97d199998f945 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Tue, 25 Jun 2024 12:18:54 +0200 Subject: [PATCH 07/22] test: include some missing values --- tests/testthat/test-include-hba1c.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-include-hba1c.R b/tests/testthat/test-include-hba1c.R index eaf7f8b5..9c78f258 100644 --- a/tests/testthat/test-include-hba1c.R +++ b/tests/testthat/test-include-hba1c.R @@ -20,7 +20,12 @@ lab_forsker <- tibble::tribble( "498718589807", "20200101", "NPU03835", 6.6, "498718589807", "20200101", "NPU27300", 47, "498718589808", "20220101", "NPU00000", 100, - "498718589809", "20220101", "NPU00000", 5 + "498718589809", "20220101", "NPU00000", 5, + # If there are NA values, they should be ignored. + "498718589809", "20220101", "NPU00000", NA, + "498718589809", "20220101", NA, 5, + "498718589809", NA, "NPU00000", 5, + NA, "20220101", "NPU00000", 5 ) expected <- tibble::tribble( From 2d207988c56346fd9965e45bd0338c48f6a5f811 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Tue, 25 Jun 2024 12:19:16 +0200 Subject: [PATCH 08/22] test: this was incorrect, need to remove a column to test it correctly --- tests/testthat/test-include-hba1c.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-include-hba1c.R b/tests/testthat/test-include-hba1c.R index 9c78f258..138805b1 100644 --- a/tests/testthat/test-include-hba1c.R +++ b/tests/testthat/test-include-hba1c.R @@ -41,7 +41,7 @@ expected <- tibble::tribble( ) test_that("dataset needs expected variables", { - actual <- lab_forsker + actual <- lab_forsker[-2] expect_error(include_hba1c(actual)) }) From e5ac7d26f64d78fc6a0038983e695390200b67aa Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Tue, 25 Jun 2024 12:19:35 +0200 Subject: [PATCH 09/22] test: import from dplyr to be explicit --- tests/testthat/test-include-hba1c.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-include-hba1c.R b/tests/testthat/test-include-hba1c.R index 138805b1..5a7c1bf5 100644 --- a/tests/testthat/test-include-hba1c.R +++ b/tests/testthat/test-include-hba1c.R @@ -52,7 +52,7 @@ test_that("those with inclusion are kept", { test_that("casing of input variables doesn't matter", { actual <- lab_forsker |> - rename_with(\(columns) toupper(columns)) |> + dplyr::rename_with(\(columns) toupper(columns)) |> include_hba1c() expect_equal(actual, expected) }) From 155154c028d8debd32803b951cdb35955505ebe0 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Tue, 25 Jun 2024 12:20:06 +0200 Subject: [PATCH 10/22] test: update the tests to correctly compare with other data object types --- tests/testthat/test-include-hba1c.R | 33 ++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/tests/testthat/test-include-hba1c.R b/tests/testthat/test-include-hba1c.R index 5a7c1bf5..a087b473 100644 --- a/tests/testthat/test-include-hba1c.R +++ b/tests/testthat/test-include-hba1c.R @@ -61,26 +61,49 @@ test_that("verification works for DuckDB Database", { actual <- arrow::to_duckdb(lab_forsker) |> include_hba1c() - expect_equal(actual, expected) + actual_rows <- actual |> + dplyr::count() |> + dplyr::pull(n) + + expect_equal(actual_rows, nrow(expected)) + expect_equal(colnames(actual), colnames(expected)) }) test_that("verification works for Arrow Tables (from Parquet)", { actual <- arrow::as_arrow_table(lab_forsker) |> - include_hba1c() + include_hba1c() |> + # TODO: Arrow doesn't like the `row_number()` function, find a fix? + # Ignoring the warning for now, low priority. + suppressWarnings() - expect_equal(actual, expected) + actual_rows <- actual |> + dplyr::count() |> + dplyr::pull(n) + + expect_equal(actual_rows, nrow(expected)) + expect_equal(colnames(actual), colnames(expected)) }) test_that("verification works for data.frame", { actual <- as.data.frame(lab_forsker) |> include_hba1c() - expect_equal(actual, expected) + actual_rows <- actual |> + dplyr::count() |> + dplyr::pull(n) + + expect_equal(actual_rows, nrow(expected)) + expect_equal(colnames(actual), colnames(expected)) }) test_that("verification works for data.table", { actual <- data.table::as.data.table(lab_forsker) |> include_hba1c() - expect_equal(actual, expected) + actual_rows <- actual |> + dplyr::count() |> + dplyr::pull(n) + + expect_equal(actual_rows, nrow(expected)) + expect_equal(colnames(actual), colnames(expected)) }) From 61cd93eb97d3d9320374e7d9704b01e6957a5657 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Tue, 25 Jun 2024 12:21:06 +0200 Subject: [PATCH 11/22] refactor: take a dataframe rather than a string name of the register --- R/as-markdown.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/as-markdown.R b/R/as-markdown.R index 30b7495d..c0992aa0 100644 --- a/R/as-markdown.R +++ b/R/as-markdown.R @@ -50,18 +50,18 @@ register_as_md_header <- function(register) { #' Convert the fake register data into a Markdown table. #' -#' @param register The abbreviation of the register name. +#' @param data The data of a specific register from [register_data]. #' @param caption A caption to add to the table. #' #' @return A character vector as a Markdown table. #' @keywords internal #' -register_data_as_md_table <- function(register, caption = NULL) { +register_data_as_md_table <- function(data, caption = NULL) { rlang::check_installed("glue") rlang::check_installed("knitr") - register_data[[register]] |> - head(4) |> + data |> + utils::head(4) |> knitr::kable(caption = caption) } From 85c393be4ae0437aba1fd04afa15c62a74c44a10 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Tue, 25 Jun 2024 12:21:22 +0200 Subject: [PATCH 12/22] docs: remove periods from headers --- R/as-markdown.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/as-markdown.R b/R/as-markdown.R index c0992aa0..3dddd4af 100644 --- a/R/as-markdown.R +++ b/R/as-markdown.R @@ -30,7 +30,7 @@ registers_as_md_table <- function(caption = NULL) { knitr::kable(caption = caption) } -#' Convert the register name into text to use in a Markdown header. +#' Convert the register name into text to use in a Markdown header #' #' @param register The abbreviation of the register name. #' @@ -48,7 +48,7 @@ register_as_md_header <- function(register) { ) } -#' Convert the fake register data into a Markdown table. +#' Convert the fake register data into a Markdown table #' #' @param data The data of a specific register from [register_data]. #' @param caption A caption to add to the table. @@ -65,7 +65,7 @@ register_data_as_md_table <- function(data, caption = NULL) { knitr::kable(caption = caption) } -#' Converts the variables for a register into a Markdown table. +#' Converts the variables for a register into a Markdown table #' #' @inheritParams register_data_as_md_table #' From 9a9c47566f6fb76292593bc990d12b305885d1f2 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Tue, 25 Jun 2024 12:21:48 +0200 Subject: [PATCH 13/22] docs: internal function examples can't be run during checks --- R/get-algorithm.R | 2 ++ R/include-hba1c.R | 2 ++ 2 files changed, 4 insertions(+) diff --git a/R/get-algorithm.R b/R/get-algorithm.R index b3a292e3..93fcf25d 100644 --- a/R/get-algorithm.R +++ b/R/get-algorithm.R @@ -6,7 +6,9 @@ #' @keywords internal #' #' @examples +#' \dontrun{ #' get_algorithm_logic("hba1c") +#' } get_algorithm_logic <- function(criteria) { algorithm |> dplyr::filter(.data$name == criteria) |> diff --git a/R/include-hba1c.R b/R/include-hba1c.R index f044fe0f..ff7ab7e4 100644 --- a/R/include-hba1c.R +++ b/R/include-hba1c.R @@ -10,7 +10,9 @@ #' @keywords internal #' #' @examples +#' \dontrun{ #' register_data$lab_forsker |> include_hba1c() +#' } include_hba1c <- function(data) { verify_required_variables(data, "lab_forsker") hba1c_criteria <- get_algorithm_logic("hba1c") From d3f0d76db4bd0d84cd6f44a7bea36def0cafb233 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Tue, 25 Jun 2024 12:22:23 +0200 Subject: [PATCH 14/22] fix: the logic criteria wasn't being correctly loaded, need to convert to expression --- R/include-hba1c.R | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/R/include-hba1c.R b/R/include-hba1c.R index ff7ab7e4..3180e7b1 100644 --- a/R/include-hba1c.R +++ b/R/include-hba1c.R @@ -15,22 +15,25 @@ #' } include_hba1c <- function(data) { verify_required_variables(data, "lab_forsker") - hba1c_criteria <- get_algorithm_logic("hba1c") + hba1c_criteria <- get_algorithm_logic("hba1c") |> + # To convert the string into an R expression. + rlang::parse_expr() data |> column_names_to_lower() |> - dplyr::filter({{ hba1c_criteria }}) |> + # Use !! to inject the expression into filter. + dplyr::filter(!!hba1c_criteria) |> # Keep only the columns we need. dplyr::mutate( pnr = .data$patient_cpr, - date == .data$samplingdate, + date = .data$samplingdate, included_hba1c = TRUE, .keep = "none" ) |> # Remove any duplicates dplyr::distinct() |> - dplyr::group_by(pnr) |> - # FIXME: This might not work with some databases + # FIXME: This might be computationally intensive. + dplyr::group_by(.data$pnr) |> # Keep earliest two dates. - dplyr::slice_min(date, n = 2) |> + dplyr::filter(dplyr::row_number(date) %in% 1:2) |> dplyr::ungroup() } From 16ea193dc548684d6b025d18f7d64f34625c35bc Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Tue, 25 Jun 2024 12:22:33 +0200 Subject: [PATCH 15/22] docs: this object needs to be documented --- R/osdc-package.R | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/R/osdc-package.R b/R/osdc-package.R index 0c59f81a..6efccea0 100644 --- a/R/osdc-package.R +++ b/R/osdc-package.R @@ -19,3 +19,17 @@ utils::globalVariables(".data") #' @format #' Is a list with several registers. "register_data" + +#' Data frame of the logic for the OSDC algorithm +#' +#' This data frame contains the logic details of the algorithm for specific +#' inclusion and exclusion criteria. +#' +#' @format +#' Is a [tibble::tibble()] with two columns: +#' +#' \describe{ +#' \item{name}{The inclusion or exclusion criteria name.} +#' \item{logic}{The logic for the criteria.} +#' } +"algorithm" From 855722bd8ef47c0741ef67badfb979c22091936c Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Tue, 25 Jun 2024 12:22:52 +0200 Subject: [PATCH 16/22] docs: typo in package title of DESCRIPTION --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5b989cf0..e11c5da9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Type: Package Package: osdc -Title: Open Source Diabetes Classifier (OSCD) for Danish Registers +Title: Open Source Diabetes Classifier (OSDC) for Danish Registers Version: 0.0.1.9000 Authors@R: c( person(c("Luke", "William"), "Johnston", , "lwjohnst@gmail.com", role = c("aut", "cre"), From cab4afe1d187c6b2240bd9cebf6df7327bef5ea8 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Tue, 25 Jun 2024 12:23:34 +0200 Subject: [PATCH 17/22] build: move the needed packages for tests into Suggests ...was giving issues during testing --- DESCRIPTION | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e11c5da9..4dbc346e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -25,7 +25,8 @@ Imports: cli, dplyr, lifecycle, - rlang + rlang, + utils Suggests: dbplyr, glue, @@ -34,14 +35,13 @@ Suggests: spelling, stringr, testthat (>= 3.0.0), - tibble -VignetteBuilder: - knitr -Config/testthat/edition: 3 -Config/Needs/tests: + tibble, data.table, arrow, duckdb +VignetteBuilder: + knitr +Config/testthat/edition: 3 Config/Needs/data: fabricatr, codeCollection, From 11704ce1218846f4af63b32a2106543b2d1aa53f Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Tue, 25 Jun 2024 12:23:53 +0200 Subject: [PATCH 18/22] chore: regenerate the roxygen docs --- man/algorithm.Rd | 22 ++++++++++++++++++++++ man/get_algorithm_logic.Rd | 2 ++ man/include_hba1c.Rd | 2 ++ man/osdc-package.Rd | 2 +- man/register_as_md_header.Rd | 18 ++++++++++++++++++ man/register_data_as_md_table.Rd | 20 ++++++++++++++++++++ man/registers_as_md_table.Rd | 18 ++++++++++++++++++ man/variables_as_md_table.Rd | 18 ++++++++++++++++++ 8 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 man/algorithm.Rd create mode 100644 man/register_as_md_header.Rd create mode 100644 man/register_data_as_md_table.Rd create mode 100644 man/registers_as_md_table.Rd create mode 100644 man/variables_as_md_table.Rd diff --git a/man/algorithm.Rd b/man/algorithm.Rd new file mode 100644 index 00000000..9cfc10da --- /dev/null +++ b/man/algorithm.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/osdc-package.R +\docType{data} +\name{algorithm} +\alias{algorithm} +\title{Data frame of the logic for the OSDC algorithm} +\format{ +Is a \code{\link[tibble:tibble]{tibble::tibble()}} with two columns: + +\describe{ +\item{name}{The inclusion or exclusion criteria name.} +\item{logic}{The logic for the criteria.} +} +} +\usage{ +algorithm +} +\description{ +This data frame contains the logic details of the algorithm for specific +inclusion and exclusion criteria. +} +\keyword{datasets} diff --git a/man/get_algorithm_logic.Rd b/man/get_algorithm_logic.Rd index dfcb8948..59c44862 100644 --- a/man/get_algorithm_logic.Rd +++ b/man/get_algorithm_logic.Rd @@ -16,6 +16,8 @@ A character string. Get the criteria algorithmic logic and convert to an R logic condition. } \examples{ +\dontrun{ get_algorithm_logic("hba1c") } +} \keyword{internal} diff --git a/man/include_hba1c.Rd b/man/include_hba1c.Rd index 91e528c5..b0e8be92 100644 --- a/man/include_hba1c.Rd +++ b/man/include_hba1c.Rd @@ -18,6 +18,8 @@ In the \code{lab_forsker} register, NPU27300 is HbA1c in the modern units (IFCC) while NPU03835 is HbA1c in old units (DCCT). } \examples{ +\dontrun{ register_data$lab_forsker |> include_hba1c() } +} \keyword{internal} diff --git a/man/osdc-package.Rd b/man/osdc-package.Rd index fc732fc5..6c8a49c3 100644 --- a/man/osdc-package.Rd +++ b/man/osdc-package.Rd @@ -4,7 +4,7 @@ \name{osdc-package} \alias{osdc} \alias{osdc-package} -\title{osdc: Open Source Diabetes Classifier (OSCD) for Danish Registers} +\title{osdc: Open Source Diabetes Classifier (OSDC) for Danish Registers} \description{ This classifier first identifies a population of individuals with any type of diabetes mellitus and then splits this population into T1D and T2D by identifying individuals with T1D and classifying the remainder of the diabetes population as T2D. } diff --git a/man/register_as_md_header.Rd b/man/register_as_md_header.Rd new file mode 100644 index 00000000..4fefca7f --- /dev/null +++ b/man/register_as_md_header.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/as-markdown.R +\name{register_as_md_header} +\alias{register_as_md_header} +\title{Convert the register name into text to use in a Markdown header} +\usage{ +register_as_md_header(register) +} +\arguments{ +\item{register}{The abbreviation of the register name.} +} +\value{ +A character vector. +} +\description{ +Convert the register name into text to use in a Markdown header +} +\keyword{internal} diff --git a/man/register_data_as_md_table.Rd b/man/register_data_as_md_table.Rd new file mode 100644 index 00000000..df7989b6 --- /dev/null +++ b/man/register_data_as_md_table.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/as-markdown.R +\name{register_data_as_md_table} +\alias{register_data_as_md_table} +\title{Convert the fake register data into a Markdown table} +\usage{ +register_data_as_md_table(data, caption = NULL) +} +\arguments{ +\item{data}{The data of a specific register from \link{register_data}.} + +\item{caption}{A caption to add to the table.} +} +\value{ +A character vector as a Markdown table. +} +\description{ +Convert the fake register data into a Markdown table +} +\keyword{internal} diff --git a/man/registers_as_md_table.Rd b/man/registers_as_md_table.Rd new file mode 100644 index 00000000..08049d20 --- /dev/null +++ b/man/registers_as_md_table.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/as-markdown.R +\name{registers_as_md_table} +\alias{registers_as_md_table} +\title{Convert the register data sources} +\usage{ +registers_as_md_table(caption = NULL) +} +\arguments{ +\item{caption}{Caption to add to the table.} +} +\value{ +A character vector as a Markdown table. +} +\description{ +Convert the register data sources +} +\keyword{internal} diff --git a/man/variables_as_md_table.Rd b/man/variables_as_md_table.Rd new file mode 100644 index 00000000..b7cc319b --- /dev/null +++ b/man/variables_as_md_table.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/as-markdown.R +\name{variables_as_md_table} +\alias{variables_as_md_table} +\title{Converts the variables for a register into a Markdown table} +\usage{ +variables_as_md_table(register, caption = NULL) +} +\arguments{ +\item{caption}{A caption to add to the table.} +} +\value{ +A character vector as a Markdown table. +} +\description{ +Converts the variables for a register into a Markdown table +} +\keyword{internal} From 3aa70956eda48301fce55e50b89f8eda565da7b7 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Tue, 25 Jun 2024 12:24:04 +0200 Subject: [PATCH 19/22] docs: fix code after change to function --- vignettes/data-sources.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/data-sources.Rmd b/vignettes/data-sources.Rmd index fb3075e1..b6bb617c 100644 --- a/vignettes/data-sources.Rmd +++ b/vignettes/data-sources.Rmd @@ -76,7 +76,7 @@ for (register in osdc:::get_register_abbrev()) { print() osdc:::register_data_as_md_table( - register, + register_data[[register]], caption = glue::glue("Simulated example of what the data looks like for the `{register}` register.") ) |> print() From c75c16049d70e755692a1cf561fec994653d607b Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Tue, 25 Jun 2024 12:24:26 +0200 Subject: [PATCH 20/22] chore: update wordlist using `spelling::update_wordlist()` --- inst/WORDLIST | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/inst/WORDLIST b/inst/WORDLIST index fc02524c..04791e12 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -1,19 +1,35 @@ Aarhus CMD +DCCT +GDM +GLD HbA -HbA1c +ICD +IFCC Isaksen's +LPR Lifecycle +Lægemiddelsdatabasen Metformin +NPU ORCID -OSDC +PNR +PPV RSCD Saxenda +Uncoloured +Wegovy +dapagliflozin dplyr +empagliflozin endocrinological +lmdb mellitus +metformin mmol mol onwards polycystic +pseudonymised reproducibility +semaglutid From 01692d758e362f6002beef09a8e7d1a1cb6f008a Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Tue, 25 Jun 2024 12:24:38 +0200 Subject: [PATCH 21/22] docs: need to include the version number in NEWS --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index a5c44734..b3fc9003 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# osdc (development version) +# osdc 0.0.1.9000 (development version) ## General From c000e16c44bab24b302145e797cbddea12becb06 Mon Sep 17 00:00:00 2001 From: "Luke W. Johnston" Date: Tue, 25 Jun 2024 12:25:49 +0200 Subject: [PATCH 22/22] docs: update dependency install instructions --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b860c8ba..e82d10ea 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ more data or running the full test suite, use: ``` r pak::pak( "steno-aarhus/osdc", - dependencies = c("all", "Config/Needs/tests", "Config/Needs/data") + dependencies = c("all", "Config/Needs/data") ) ```