From 944c231401cb4b925609414e69dcb5ced1d5c77a Mon Sep 17 00:00:00 2001 From: Robin Denz Date: Thu, 7 Mar 2024 12:49:29 +0100 Subject: [PATCH] docs: minor changes, prepare for CRAN release --- DESCRIPTION | 2 +- NEWS.md | 22 ++++++++++--- codemeta.json | 51 +++++++++++++++++++++++++++++-- cran-comments.md | 2 +- man/simDAG.Rd | 2 +- man/sim_discrete_time.Rd | 2 +- vignettes/v_covid_example.Rmd | 20 ++++++++---- vignettes/v_sim_discrete_time.Rmd | 6 ++-- vignettes/v_sim_from_dag.Rmd | 3 +- 9 files changed, 89 insertions(+), 21 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index db3a657..7750198 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: simDAG Title: Simulate Data from a DAG and Associated Node Information -Version: 0.1.0.9000 +Version: 0.1.1 Authors@R: c( person("Robin", "Denz", , "robin.denz@rub.de", role = c("aut", "cre")), person("Katharina", "Meiszl", , "meiszl@amib.rub-uni-bochum.de", role = c("aut")) diff --git a/NEWS.md b/NEWS.md index e955d28..f76aa98 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,8 +4,20 @@ # simDAG 0.1.1 -* Minor changes to documentation -* dag2matrix() now returns a numeric matrix even if the dag object contains only root nodes -* node() and node_td() now support character vectors in the 'name' argument, allowing easy creation of multiple nodes with the same definition -* Add sim_n_datasets() function -* Fix Bug in node_time_to_event() function that lead to the `immunity_duration` parameter being used incorrectly +Enhancements + +* `node()` and `node_td()` now support character vectors in the 'name' argument, allowing easy creation of multiple nodes with the same definition + +Bug Fixes + +* There was a bug in the `node_time_to_event()` function that lead to the `immunity_duration` parameter being used incorrectly. Since events were still recorded correctly, this was only apparent when using `save_states="all"`. Works correctly now. +* There was a small bug in `dag2matrix()` if the dag object contained only root nodes. In this case, a logical matrix was returned. Now it returns the correct numeric matrix. + +New Features + +* Added the `sim_n_datasets()` function to generate multiple datasets from a single dag object, possibly using multicore processing + +Documentation + +* Minor changes to documentation pages +* Minor changes to vignettes diff --git a/codemeta.json b/codemeta.json index cad5791..6f44105 100644 --- a/codemeta.json +++ b/codemeta.json @@ -8,13 +8,19 @@ "codeRepository": "https://github.com/RobinDenz1/simDAG", "issueTracker": "https://github.com/RobinDenz1/siMDAG/issues", "license": "https://spdx.org/licenses/GPL-3.0", - "version": "0.1.0", + "version": "0.1.1", "programmingLanguage": { "@type": "ComputerLanguage", "name": "R", "url": "https://r-project.org" }, "runtimePlatform": "R version 4.2.1 (2022-06-23 ucrt)", + "provider": { + "@id": "https://cran.r-project.org", + "@type": "Organization", + "name": "Comprehensive R Archive Network (CRAN)", + "url": "https://cran.r-project.org" + }, "author": [ { "@type": "Person", @@ -147,6 +153,47 @@ "url": "https://cran.r-project.org" }, "sameAs": "https://CRAN.R-project.org/package=covr" + }, + { + "@type": "SoftwareApplication", + "identifier": "foreach", + "name": "foreach", + "provider": { + "@id": "https://cran.r-project.org", + "@type": "Organization", + "name": "Comprehensive R Archive Network (CRAN)", + "url": "https://cran.r-project.org" + }, + "sameAs": "https://CRAN.R-project.org/package=foreach" + }, + { + "@type": "SoftwareApplication", + "identifier": "doSNOW", + "name": "doSNOW", + "provider": { + "@id": "https://cran.r-project.org", + "@type": "Organization", + "name": "Comprehensive R Archive Network (CRAN)", + "url": "https://cran.r-project.org" + }, + "sameAs": "https://CRAN.R-project.org/package=doSNOW" + }, + { + "@type": "SoftwareApplication", + "identifier": "doRNG", + "name": "doRNG", + "provider": { + "@id": "https://cran.r-project.org", + "@type": "Organization", + "name": "Comprehensive R Archive Network (CRAN)", + "url": "https://cran.r-project.org" + }, + "sameAs": "https://CRAN.R-project.org/package=doRNG" + }, + { + "@type": "SoftwareApplication", + "identifier": "parallel", + "name": "parallel" } ], "softwareRequirements": { @@ -200,7 +247,7 @@ }, "SystemRequirements": null }, - "fileSize": "924.187KB", + "fileSize": "943.984KB", "citation": [ { "@type": "CreativeWork", diff --git a/cran-comments.md b/cran-comments.md index eb3001f..6743c9b 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -2,7 +2,7 @@ 0 errors | 0 warnings | 0 notes -* This is the first release of this package +* This is an update The package was tested on Windows 10, macOS and ubuntu using github actions and rhub as well as local machines. There were no errors or warnings and no notes. There is currently no literature directly associated with this package. diff --git a/man/simDAG.Rd b/man/simDAG.Rd index 660c41f..d33d1f8 100644 --- a/man/simDAG.Rd +++ b/man/simDAG.Rd @@ -28,7 +28,7 @@ If you want to simulate data that is easily described using a standard DAG witho \strong{\emph{What features are missing from this package?}} -The package currently only implements some possible child nodes. In the future we would like to implement more child node types, such as nodes with generalised mixed linear models or more complex survival time models. +The package currently only implements some possible child nodes. In the future we would like to implement more child node types, such as nodes with generalized mixed linear models or more complex survival time models. \strong{\emph{Why should I use this package instead of the \pkg{simCausal} package?}} diff --git a/man/sim_discrete_time.Rd b/man/sim_discrete_time.Rd index 0e30274..a2fb13a 100644 --- a/man/sim_discrete_time.Rd +++ b/man/sim_discrete_time.Rd @@ -109,7 +109,7 @@ Returns a \code{simDT} object, containing some general information about the sim \item{\code{t0_var_names}: A character vector containing the names of all variable names that do not vary over time.} } -To obtain a single dataset from this function that can be processed futher, please use the \code{\link{sim2data}} function. +To obtain a single dataset from this function that can be processed further, please use the \code{\link{sim2data}} function. } \seealso{ \code{\link{empty_dag}}, \code{\link{node}}, \code{\link{node_td}}, \code{\link{sim2data}}, \code{\link{plot.simDT}} diff --git a/vignettes/v_covid_example.Rmd b/vignettes/v_covid_example.Rmd index 77c4302..d2b5d4b 100644 --- a/vignettes/v_covid_example.Rmd +++ b/vignettes/v_covid_example.Rmd @@ -62,7 +62,7 @@ Since this vignette is mostly concerned with the practical implementation of the ## Research goal -Our actual research goal was to identify a suitable data analysis strategy for the assessment of Covid-19 vaccine side-effects for a particular real-life data set. To do this we decided to simulate data that is as close to the real data as possible. Using this data we could then try out different analysis strategies and see which one performed adequately. The goal for this vignette is to use parts of this model to showcase the capabilities of the `simDAG` package. +Our actual research goal was to identify a suitable data analysis strategy for the assessment of Covid-19 vaccine side-effects for a particular real-life data set. To do this we decided to simulate data that is as close to the real data as possible. Using this data we could then try out different analysis strategies and see which one performed adequately. The goal for this vignette is to use parts of this model to showcase the capabilities of the `simDAG` package. More information on the actual simulation can be found in the first related publication (Denz et al. 2023). ## Theoretical model @@ -132,7 +132,7 @@ Instead of passing a constant value to the `prob_fun` argument, we are now passi ## Part 3: Making the vaccine useful -So far we assumed that it makes no difference whether the person received the vaccine or not. We will now change this by implementing a time-window after receiving the vaccine in which the person cannot develop a `covid` infection. Again, this can be done by defining an appropriate `prob_fun` function, this time for the `covid` node: +So far we assumed that the `covid` infection probability is unaffected by whether the person received the vaccine or not. We will now change this by implementing a time-window after receiving the vaccine in which the person cannot develop a `covid` infection. Again, this can be done by defining an appropriate `prob_fun` function, this time for the `covid` node: ```{r} prob_covid <- function(data, base_p, vacc_duration) { @@ -164,7 +164,7 @@ Instead of just updating the `parents` and `prob_fun` arguments of the `covid` n ## Part 4: Sick people don't get vaccinated -In reality, very little people who were currently experiencing a Covid-19 infection went and got the vaccine. In fact, this is absolutely discouraged by doctors world-wide. To add this circumstance to the mode, we once again simply have to update the probability of receiving a vaccination, by defining an appropriate `prob_fun`: +In reality, very little people who were currently experiencing a Covid-19 infection went and got the vaccine. In fact, this is absolutely discouraged by doctors world-wide. To add this circumstance to the model, we once again simply have to update the probability of receiving a vaccination, by defining an appropriate `prob_fun`: ```{r} prob_vaccination <- function(data, base_p) { @@ -200,11 +200,11 @@ Again we simply changed the `prob_fun` argument and added the correct `parents` plot(dag, mark_td_nodes=FALSE) ``` -Note that in this plot it doesn't look like a classic DAG anymore, because it has an bi-directional arrow between `covid` and `vaccination` due to the time-dependent nature of their relationship. +Note that in this plot it doesn't look like a classic DAG anymore, because it has a bi-directional arrow between `covid` and `vaccination` due to the time-dependent nature of their relationship. ## Generating Data using the final model -We are now pleased with the complexity of our data-generation algorithm and want to simulate data from it. We can do this by simply calling the `sim_discrete_time()` function on the specified DAG: +Suppose we are now pleased with the complexity of our data-generation algorithm and want to simulate data from it. We can do this by simply calling the `sim_discrete_time()` function on the specified DAG: ```{r} set.seed(42) @@ -218,7 +218,13 @@ For exemplary purposes, we kind of arbitrarily used 1000 individuals and let the plot(sim, box_text_size=4) ``` -As can be seen, we managed to implement a fairly complex data-generation mechanism using only a few small function definitions and a few lines of code, allowing us to generate a complex dataset with three interdependent time-varying variables with only minimal effort. By utilizing the `sim2data()` function, we could directly transform the `sim` object into a dataset in the start-stop format to run some statistical models on it (such as a cox-regression model). +A more useful output of the resulting data can be obtained using the `sim2data()` function. For example, we could transform the output to the start-stop format: + +```{r} +sim2data(sim, to="start_stop") +``` + +As can be seen, we managed to implement a fairly complex data-generation mechanism using only a few small function definitions and a few lines of code, allowing us to generate a complex dataset with three interdependent time-varying variables with only minimal effort. ## Going even further @@ -235,3 +241,5 @@ There are of course many more possible extensions, all of which can be implement # References Banks, Jerry, John S. Carson II, Barry L. Nelson, and David M. Nicol (2014). Discrete-Event System Simulation. Vol. 5. Edinburgh Gate: Pearson Education Limited. + +Denz, Robin, Katharina Meiszl, Peter Ihle, Doris F. Oberle, Ursula Drechsel-Bäuerle, Katrin Scholz, Ingo Meyer and Nina Timmesfeld (2023). "Impact of Record-Linkage Errors in Covid-19 Vaccine-Safety Analyses using German Health-Care Data: A Simulation Study". In: arXiv:2310.15016 diff --git a/vignettes/v_sim_discrete_time.Rmd b/vignettes/v_sim_discrete_time.Rmd index 4997073..28f3dfc 100644 --- a/vignettes/v_sim_discrete_time.Rmd +++ b/vignettes/v_sim_discrete_time.Rmd @@ -54,7 +54,7 @@ Let us consider a very simple example first. Suppose we want to generate data ac knitr::include_graphics("./images_v_sim_discrete_time/simple_dag.png") ``` -Here, `sex` is a time-invariant variables, whereas `age` and `death` are not. Suppose that each tick of the simulation corresponds to a duration of one year. Then, naturally, people will age one year on every simulation tick. We assume that `sex` and `age` have a direct causal effect on the probability of death, regardless of the time. Once people are dead, they stay dead (no reincarnation allowed). +Here, `sex` is a time-invariant variable, whereas `age` and `death` are not. Suppose that each tick of the simulation corresponds to a duration of one year. Then, naturally, people will age one year on every simulation tick. We assume that `sex` and `age` have a direct causal effect on the probability of death, regardless of the time. Once people are dead, they stay dead (no reincarnation allowed). If we want to use this structure in the `sim_discrete_time()` function, we first have to generate an initial dataset for the state of the population at $t = 0$ as described above. We do this by first specifying the `t0_root_nodes` as follows: @@ -68,7 +68,7 @@ dag <- empty_dag() + node("sex", type="rbernoulli", p=0.5) ``` -We assume that `age` is normally distributed and that we have equal numbers of each `sex`. This information is enough to specify the data set at $t = 0$. Now we only need to add additional time-dependent nodes using the `node_td()` function and were ready. First, we define a function that increases the age of all individuals by 1 at each step: +We assume that `age` is normally distributed and that we have equal numbers of each `sex`. This information is enough to specify the data set at $t = 0$. Now we only need to add additional time-dependent nodes using the `node_td()` function and we are ready. First, we define a function that increases the age of all individuals by 1 at each step: ```{r} node_advance_age <- function(data) { @@ -130,7 +130,7 @@ This particular example could be simulated in a much easier fashion, without rel # Extending the Simple Example - Recurrent Events -Suppose that the event of interest wasn't `death`, but a cardiovascular event (`cve`). For the case of simplicity we will assume that the same causal structure and causal coefficients from above still apply, but that the event is now no longer terminal and my reoccur arbitrary number of times. First, let's redefine the nodes to get the new name right: +Suppose that the event of interest wasn't `death`, but a cardiovascular event (`cve`). For the case of simplicity we will assume that the same causal structure and causal coefficients from above still apply, but that the event is now no longer terminal and may re-occur an arbitrary number of times. First, let's redefine the nodes to get the new name right: ```{r} dag <- empty_dag() + diff --git a/vignettes/v_sim_from_dag.Rmd b/vignettes/v_sim_from_dag.Rmd index 67035e1..1eb7a5f 100644 --- a/vignettes/v_sim_from_dag.Rmd +++ b/vignettes/v_sim_from_dag.Rmd @@ -11,7 +11,8 @@ vignette: > ```{r, include=FALSE} knitr::opts_chunk$set( collapse=TRUE, - comment="#>" + comment="#>", + fig.align="center" ) ```