diff --git a/DESCRIPTION b/DESCRIPTION index da00fbb..0ede4d7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -3,9 +3,12 @@ Type: Package Title: Augmented Backward Elimination Version: 5.1.1 Date: 2022-11-30 -Authors@R: c(person(c("Rok","Blagus"),role=c("aut","cre"),email="rok.blagus@mf.uni-lj.si"),person("Sladana", "Babic", role = "ctb", - email = "sladja93babic@gmail.com")) -Author: Rok Blagus [aut, cre], Sladana Babic [ctb] +Authors@R: c(person(c("Rok","Blagus"), role=c("aut","cre"), email = "rok.blagus@mf.uni-lj.si"), + person("Sladana", "Babic", role = "ctb", email = "sladja93babic@gmail.com"), + person("Daniela", "Dunkler", role = "ctb", email = "daniela.dunkler@meduniwien.ac.at"), + person("Georg", "Heinze", role = "ctb", email = "georg.heinze@meduniwien.ac.at"), + person("Gregor", "Steiner", role = "ctb", email = "gregor.steiner@warwick.ac.uk")) +Author: Rok Blagus [aut, cre], Sladana Babic [ctb], Daniela Dunkler [ctb], Gregor Steiner [ctb] Maintainer: Rok Blagus Description: Performs augmented backward elimination and checks the stability of the obtained model. Augmented backward elimination combines significance or information based criteria with the change in estimate to either select the optimal model for prediction purposes or to serve as a tool to obtain a practically sound, highly interpretable model. More details can be found in Dunkler et al. (2014) . License: GPL-3 + file LICENSE diff --git a/R/abe.R b/R/abe.R index d27a77e..38736f7 100644 --- a/R/abe.R +++ b/R/abe.R @@ -338,6 +338,7 @@ if (class(bt0)=="try-error") bt else bt0 #' `id` the rows of the data which were used when refitting the model; the list with elements `id1` (the rows used to refit the model; when `type.resampling="Wallisch2021"` these are based on bootstrap) and `id2` (`NULL` unless when `type.resampling="Wallisch2021"` in which case these are the rows used to refit the models based on subsampling) #' #' @author Rok Blagus, \email{rok.blagus@@mf.uni-lj.si} +#' @author Daniela Dunkler #' @author Sladana Babic #' @details `type.resampling` can be `bootstrap` (n observations drawn from the original data with replacement), `mn.bootstrap` (m out of n observations drawn from the original data with replacement), `subsampling` (m out of n observations drawn from the original data without replacement, where m is `prop.sampling*n` ) and `"Wallisch2021"`. When using `"Wallisch2021"` the resampling is done twice: first time using bootstrap (these results are contained in `models`) and the second time using resampling with `prop.sampling` equal to 0.5 (these results are contained in `models.wallisch`); see Wallisch et al. (2021). #' @details When using `parallel=TRUE` parallel backend must be registered before using `abe.resampling`. The parallel backends available will be system-specific; see [foreach()] for more details. @@ -368,7 +369,11 @@ if (class(bt0)=="try-error") bt else bt0 #' criterion = "alpha", alpha = c(0.2, 0.05), type.test = "Chisq", #' num.resamples = 50, type.resampling = "Wallisch2021") #' -#' summary(fit.resample1) +#' names(summary(fit.resample1)) +#' summary(fit.resample1)$var.rel.frequencies +#' summary(fit.resample1)$model.rel.frequencies +#' summary(fit.resample1)$var.coefs[1] +#' summary(fit.resample1)$pair.rel.frequencies[1] #' print(fit.resample1) #' #' # use ABE on 50 bootstrap re-samples considering different @@ -1638,6 +1643,7 @@ if(type.boot.or!="Wallisch2021") {id1<-ids;id2<-NULL} else {id1<-idsb;id2<-idss} #' `misc` the parameters of the call to `abe.boot` #' #' @author Rok Blagus, \email{rok.blagus@@mf.uni-lj.si} +#' @author Daniela Dunkler #' @author Sladana Babic #' @details Used only for compatibility with the previous versions and will be removed at some point; see/use [abe.resampling()] instead. #' @references Daniela Dunkler, Max Plischke, Karen Lefondre, and Georg Heinze. Augmented backward elimination: a pragmatic and purposeful way to develop statistical models. PloS one, 9(11):e113677, 2014. @@ -2022,6 +2028,7 @@ abe.boot<-function(fit,data=NULL,include=NULL,active=NULL,tau=0.05,exp.beta=TRUE #' #' @author Rok Blagus, \email{rok.blagus@@mf.uni-lj.si} #' @author Sladana Babic +#' @author Daniela Dunkler #' @author Gregor Steiner #' @details Parameter `conf.level` defines the lower and upper quantile of the bootstrapped/resampled distribution such that equal proportion of values are smaller and larger than the lower and the upper quantile, respectively. #' @details The `models.n` parameter controls the number of models printed in `model.rel.frequencies`. One option is to directly specify the number of models to return (i.e. an integer larger than 1). Alternatively, if `models.n` is set to a number less than (or equal to) 1, the number of models returned is such that the cumulative frequency attains that value. By default (`models.n = NULL`), the top 20 models or all models up to a cumulative frequency of 0.8, whichever is shorter, are returned. The selected model is marked with an asterisk. If it is not among the printed models, it is added as the last model. @@ -2292,6 +2299,7 @@ summary.abe <- function(object, conf.level = 0.95, pval = 0.01, alpha = NULL, ta #' @param ... additional arguments affecting the summary produced. #' @author Rok Blagus, \email{rok.blagus@@mf.uni-lj.si} #' @author Sladana Babic +#' @author Daniela Dunkler #' @author Gregor Steiner #' @details When using `type.resampling="Wallisch2021"` in a call to [abe.resampling()], the results for the relative inclusion frequencies of the covariates from the initial model are based on subsampling with sampling proportion equal to 0.5 and the other results are based on bootstrap as suggested by Wallisch et al. (2021); otherwise all the results are obtained by using the method as specified in `type.resampling`. #' Parameter `conf.level` defines the lower and upper quantile of the bootstrapped/resampled distribution such that equal proportion of values are smaller and larger than the lower and the upper quantile, respectively. @@ -2301,21 +2309,20 @@ summary.abe <- function(object, conf.level = 0.95, pval = 0.01, alpha = NULL, ta #' @seealso [abe.resampling()], [summary.abe()], [plot.abe()], [pie.abe()] #' @export #' @examples -#' set.seed(1) -#' n=100 -#' x1<-runif(n) -#' x2<-runif(n) -#' x3<-runif(n) -#' y<--5+5*x1+5*x2+ rnorm(n,sd=5) -#' dd<-data.frame(y=y,x1=x1,x2=x2,x3=x3) -#' fit<-lm(y~x1+x2+x3,x=TRUE,y=TRUE,data=dd) +#' set.seed(100) +#' n = 100 +#' x1 <- runif(n) +#' x2 <- runif(n) +#' x3 <- runif(n) +#' y<- -5 + 5 * x1 + 5 * x2 + rnorm(n, sd = 5) +#' dd <- data.frame(y = y,x1 = x1, x2 = x2, x3 = x3) +#' fit <- lm(y ~ x1 + x2 + x3, x = TRUE, y = TRUE, data= dd) #' -#' fit.resample<-abe.resampling(fit,data=dd,include="x1",active="x2", -#' tau=c(0.05,0.1),exact=TRUE, -#' criterion="alpha",alpha=c(0.2,0.05),type.test="Chisq", -#' num.resamples=50,type.resampling="Wallisch2021") +#' fit.resample <- abe.resampling(fit, data = dd, include = "x1", active = "x2", +#' tau = c(0.05, 0.1), exact = TRUE, criterion = "alpha", alpha = c(0.2, 0.05), +#' type.test = "Chisq", num.resamples = 50, type.resampling = "Wallisch2021") #' -#' print(fit.resample,conf.level=0.95,alpha=0.2,tau=0.05) +#' print(fit.resample, conf.level = 0.95, alpha = 0.2, tau = 0.05) print.abe <- function(x, type = c("coefficients", "coefficients reporting", "models"), models.n = NULL, conf.level = 0.95, alpha = NULL, tau = NULL, digits = 3,...){ @@ -2377,13 +2384,14 @@ print.abe <- function(x, type = c("coefficients", "coefficients reporting", "mod #' @param ... Arguments to be passed to methods, such as graphical parameters. #' @author Rok Blagus, \email{rok.blagus@@mf.uni-lj.si} #' @author Sladana Babic +#' @author Daniela Dunkler #' @author Gregor Steiner #' @details When using `type.plot="coefficients"` the function plots a histogram of the estimated regression coefficients for the specified variables, alpha(s) and tau(s) obtained from different re-sampled datasets. #' When the variable is not included in the final model, its regression coefficient is set to zero. When using `type.resampling="Wallisch2021"` the plot is based on bootstrap, otherwise as specified in `type.resampling`. #' -#' When using `type.plot="variables"` the function plots a barplot of the relative inclusion frequencies of the specified variables, for the specified values of alpha and tau. When using `type.resampling="Wallisch2021"` the plot is based on subsampling with sampling proportion equal to 0.5, otherwise as specified in `type.resampling`. +#' When using \code{type.plot="variables"} the function plots a barplot of the relative inclusion frequencies of the specified variables, for the specified values of alpha and tau. When using `type.resampling="Wallisch2021"` the plot is based on subsampling with sampling proportion equal to 0.5, otherwise as specified in `type.resampling`. #' -#' When using `type.plot="models"` the function plots a barplot of the relative frequencies of the final models for specified alpha(s) and tau(s). When using `type.resampling="Wallisch2021"` the plot is based on subsampling with sampling proportion equal to 0.5, otherwise as specified in `type.resampling`. +#' When using \code{type.plot="models"} the function plots a barplot of the relative frequencies of the final models for specified alpha(s) and tau(s). When using `type.resampling="Wallisch2021"` the plot is based on subsampling with sampling proportion equal to 0.5, otherwise as specified in `type.resampling`. #' #' When using `type.plot="stability"` the function plots variable inclusion frequencies for each value of alpha. `type.stability` specifies if inclusion frequencies should be plotted as a function of alpha (default) or tau. #' @@ -2695,28 +2703,26 @@ plot.abe<-function(x,type.plot=c("coefficients", "variables", "models", "stabili #' @export #' @seealso [abe.resampling()], [summary.abe()], [plot.abe()] #' @examples -#' set.seed(1) -#' n=100 -#' x1<-runif(n) -#' x2<-runif(n) -#' x3<-runif(n) -#' y<--5+5*x1+5*x2+ rnorm(n,sd=5) -#' dd<-data.frame(y=y,x1=x1,x2=x2,x3=x3) -#' fit<-lm(y~x1+x2+x3,x=TRUE,y=TRUE,data=dd) +#' set.seed(10) +#' n = 100 +#' x1 <- runif(n) +#' x2 <- runif(n) +#' x3 <- runif(n) +#' y <- -5 + 5 * x1 + 5 * x2 + rnorm(n, sd = 5) +#' dd <- data.frame(y = y, x1 = x1, x2 = x2, x3 = x3) +#' fit <- lm(y ~ x1 + x2 + x3, x = TRUE, y = TRUE, data = dd) #' -#' fit.resample<-abe.resampling(fit,data=dd,include="x1",active="x2", -#' tau=c(0.05,0.1),exact=TRUE, -#' criterion="alpha",alpha=c(0.2,0.05),type.test="Chisq", -#' num.resamples=50,type.resampling="Wallisch2021") +#' fit.resample <- abe.resampling(fit, data = dd, include = "x1", active = "x2", +#' tau = c(0.05, 0.1), exact = TRUE, criterion = "alpha", alpha = c(0.2, 0.05), +#' type.test = "Chisq", num.resamples = 50, type.resampling = "Wallisch2021") #' -#' pie.abe(fit.resample, alpha=0.2,tau=0.1) +#' pie.abe(fit.resample, alpha = 0.2, tau = 0.1) #' -#' fit.resample<-abe.resampling(fit,data=dd,include="x1",active="x2", -#' tau=c(0.05,0.1),exact=TRUE, -#' criterion="alpha",alpha=c(0.2,0.05),type.test="Chisq", -#' num.resamples=50,type.resampling="subsampling") +#' fit.resample <- abe.resampling(fit, data = dd, include = "x1", active = "x2", +#' tau= c(0.05, 0.1), exact=TRUE, criterion = "alpha", alpha = c(0.2, 0.05), +#' type.test = "Chisq", num.resamples = 50, type.resampling = "subsampling") #' -#' pie.abe(fit.resample, alpha=0.2,tau=0.1) +#' pie.abe(fit.resample, alpha = 0.2, tau = 0.1) diff --git a/man/abe-package.Rd b/man/abe-package.Rd index e58d0be..510b61d 100644 --- a/man/abe-package.Rd +++ b/man/abe-package.Rd @@ -14,6 +14,9 @@ Performs augmented backward elimination and checks the stability of the obtained Other contributors: \itemize{ \item Sladana Babic \email{sladja93babic@gmail.com} [contributor] + \item Daniela Dunkler \email{daniela.dunkler@meduniwien.ac.at} [contributor] + \item Georg Heinze \email{georg.heinze@meduniwien.ac.at} [contributor] + \item Gregor Steiner \email{gregor.steiner@warwick.ac.uk} [contributor] } } diff --git a/man/abe.boot.Rd b/man/abe.boot.Rd index b1d7f04..f53a778 100644 --- a/man/abe.boot.Rd +++ b/man/abe.boot.Rd @@ -100,6 +100,8 @@ Riccardo De Bin, Silke Janitza, Willi Sauerbrei and Anne-Laure Boulesteix. Subsa \author{ Rok Blagus, \email{rok.blagus@mf.uni-lj.si} +Daniela Dunkler + Sladana Babic } \keyword{internal} diff --git a/man/abe.resampling.Rd b/man/abe.resampling.Rd index d85d137..9c7f493 100644 --- a/man/abe.resampling.Rd +++ b/man/abe.resampling.Rd @@ -98,7 +98,7 @@ A list with the following elements: Performs Augmented backward elimination on re-sampled data sets using different bootstrap and re-sampling techniques. } \details{ -`type.resampling` can be `bootstrap` (n observations drawn from the original data with replacement), `mn.bootstrap` (m out of n observations drawn from the original data with replacement), `subsampling` (m out of n observations drawn from the original data without replacement, where m is `prop.sampling*n` ) and `"Wallisch2021"`. When using `"Wallisch2021"` the resampling is done twice: first time using bootstrap (these results are contained in `models`) and the second time using resampling with `prop.sampling` equal to 0.5 (these results are contained in `models.wallisch`); see Walisch et al. (2021). +`type.resampling` can be `bootstrap` (n observations drawn from the original data with replacement), `mn.bootstrap` (m out of n observations drawn from the original data with replacement), `subsampling` (m out of n observations drawn from the original data without replacement, where m is `prop.sampling*n` ) and `"Wallisch2021"`. When using `"Wallisch2021"` the resampling is done twice: first time using bootstrap (these results are contained in `models`) and the second time using resampling with `prop.sampling` equal to 0.5 (these results are contained in `models.wallisch`); see Wallisch et al. (2021). When using `parallel=TRUE` parallel backend must be registered before using `abe.resampling`. The parallel backends available will be system-specific; see [foreach()] for more details. @@ -124,7 +124,11 @@ active = "x2", tau = c(0.05, 0.1), exact = TRUE, criterion = "alpha", alpha = c(0.2, 0.05), type.test = "Chisq", num.resamples = 50, type.resampling = "Wallisch2021") -summary(fit.resample1) +names(summary(fit.resample1)) +summary(fit.resample1)$var.rel.frequencies +summary(fit.resample1)$model.rel.frequencies +summary(fit.resample1)$var.coefs[1] +summary(fit.resample1)$pair.rel.frequencies[1] print(fit.resample1) # use ABE on 50 bootstrap re-samples considering different @@ -198,5 +202,7 @@ Wallisch Christine, Dunkler Daniela, Rauch Geraldine, de Bin Ricardo, Heinze Geo \author{ Rok Blagus, \email{rok.blagus@mf.uni-lj.si} +Daniela Dunkler + Sladana Babic } diff --git a/man/pie.abe.Rd b/man/pie.abe.Rd index 223e352..03c1897 100644 --- a/man/pie.abe.Rd +++ b/man/pie.abe.Rd @@ -24,28 +24,26 @@ Pie function for the resampled/bootstrapped version of ABE. Plots a pie chart of When using `type.resampling="Wallisch2021"` the plot is based on subsampling with sampling proportion equal to 0.5, otherwise as specified in `type.resampling`. } \examples{ -set.seed(1) -n=100 -x1<-runif(n) -x2<-runif(n) -x3<-runif(n) -y<--5+5*x1+5*x2+ rnorm(n,sd=5) -dd<-data.frame(y=y,x1=x1,x2=x2,x3=x3) -fit<-lm(y~x1+x2+x3,x=TRUE,y=TRUE,data=dd) - -fit.resample<-abe.resampling(fit,data=dd,include="x1",active="x2", -tau=c(0.05,0.1),exact=TRUE, -criterion="alpha",alpha=c(0.2,0.05),type.test="Chisq", -num.resamples=50,type.resampling="Wallisch2021") - -pie.abe(fit.resample, alpha=0.2,tau=0.1) - -fit.resample<-abe.resampling(fit,data=dd,include="x1",active="x2", -tau=c(0.05,0.1),exact=TRUE, -criterion="alpha",alpha=c(0.2,0.05),type.test="Chisq", -num.resamples=50,type.resampling="subsampling") - -pie.abe(fit.resample, alpha=0.2,tau=0.1) +set.seed(10) +n = 100 +x1 <- runif(n) +x2 <- runif(n) +x3 <- runif(n) +y <- -5 + 5 * x1 + 5 * x2 + rnorm(n, sd = 5) +dd <- data.frame(y = y, x1 = x1, x2 = x2, x3 = x3) +fit <- lm(y ~ x1 + x2 + x3, x = TRUE, y = TRUE, data = dd) + +fit.resample <- abe.resampling(fit, data = dd, include = "x1", active = "x2", +tau = c(0.05, 0.1), exact = TRUE, criterion = "alpha", alpha = c(0.2, 0.05), +type.test = "Chisq", num.resamples = 50, type.resampling = "Wallisch2021") + +pie.abe(fit.resample, alpha = 0.2, tau = 0.1) + +fit.resample <- abe.resampling(fit, data = dd, include = "x1", active = "x2", +tau= c(0.05, 0.1), exact=TRUE, criterion = "alpha", alpha = c(0.2, 0.05), +type.test = "Chisq", num.resamples = 50, type.resampling = "subsampling") + +pie.abe(fit.resample, alpha = 0.2, tau = 0.1) } \seealso{ [abe.resampling()], [summary.abe()], [plot.abe()] diff --git a/man/plot.abe.Rd b/man/plot.abe.Rd index d355e29..5ed2ef0 100644 --- a/man/plot.abe.Rd +++ b/man/plot.abe.Rd @@ -39,9 +39,9 @@ Plot function for the resampled/bootstrapped version of ABE. When using `type.plot="coefficients"` the function plots a histogram of the estimated regression coefficients for the specified variables, alpha(s) and tau(s) obtained from different re-sampled datasets. When the variable is not included in the final model, its regression coefficient is set to zero. When using `type.resampling="Wallisch2021"` the plot is based on bootstrap, otherwise as specified in `type.resampling`. -When using `type.plot="variables"` the function plots a barplot of the relative inclusion frequencies of the specified variables, for the specified values of alpha and tau. When using `type.resampling="Wallisch2021"` the plot is based on subsampling with sampling proportion equal to 0.5, otherwise as specified in `type.resampling`. +When using \code{type.plot="variables"} the function plots a barplot of the relative inclusion frequencies of the specified variables, for the specified values of alpha and tau. When using `type.resampling="Wallisch2021"` the plot is based on subsampling with sampling proportion equal to 0.5, otherwise as specified in `type.resampling`. -When using `type.plot="models"` the function plots a barplot of the relative frequencies of the final models for specified alpha(s) and tau(s). When using `type.resampling="Wallisch2021"` the plot is based on subsampling with sampling proportion equal to 0.5, otherwise as specified in `type.resampling`. +When using \code{type.plot="models"} the function plots a barplot of the relative frequencies of the final models for specified alpha(s) and tau(s). When using `type.resampling="Wallisch2021"` the plot is based on subsampling with sampling proportion equal to 0.5, otherwise as specified in `type.resampling`. When using `type.plot="stability"` the function plots variable inclusion frequencies for each value of alpha. `type.stability` specifies if inclusion frequencies should be plotted as a function of alpha (default) or tau. @@ -104,5 +104,7 @@ Rok Blagus, \email{rok.blagus@mf.uni-lj.si} Sladana Babic +Daniela Dunkler + Gregor Steiner } diff --git a/man/print.abe.Rd b/man/print.abe.Rd index d62c0d7..18405b8 100644 --- a/man/print.abe.Rd +++ b/man/print.abe.Rd @@ -40,27 +40,26 @@ resampled median and percentiles for the estimates of the regression coefficient root mean squared difference ratio (RMSD) and relative bias conditional on selection (RBCS), see `details`. } \details{ -When using `type.resampling="Wallisch2021"` in a call to [abe.resampling()], the results for the relative inclusion frequencies of the covariates from the initial model are based on subsampling with sampling propotion equal to 0.5 and the other results are based on bootstrap as suggested by Wallisch et al. (2021); otherwise all the results are obtained by using the method as specified in `type.resampling`. +When using `type.resampling="Wallisch2021"` in a call to [abe.resampling()], the results for the relative inclusion frequencies of the covariates from the initial model are based on subsampling with sampling proportion equal to 0.5 and the other results are based on bootstrap as suggested by Wallisch et al. (2021); otherwise all the results are obtained by using the method as specified in `type.resampling`. Parameter `conf.level` defines the lower and upper quantile of the bootstrapped/resampled distribution such that equal proportion of values are smaller and larger than the lower and the upper quantile, respectively. If `type = "models"`, the `models.n` parameter controls the number of models printed. One option is to directly specify the number of models to return (i.e. an integer larger than 1). Alternatively, if `models.n` is set to a number less than (or equal to) 1, the number of models returned is such that the cumulative frequency attains that value. By default (`models.n = NULL`), the top 20 models or all models up to a cumulative frequency of 0.8, whichever is shorter, are returned. The selected model is marked with an asterisk. If it is not among the printed models, it is added as the last model. } \examples{ -set.seed(1) -n=100 -x1<-runif(n) -x2<-runif(n) -x3<-runif(n) -y<--5+5*x1+5*x2+ rnorm(n,sd=5) -dd<-data.frame(y=y,x1=x1,x2=x2,x3=x3) -fit<-lm(y~x1+x2+x3,x=TRUE,y=TRUE,data=dd) +set.seed(100) +n = 100 +x1 <- runif(n) +x2 <- runif(n) +x3 <- runif(n) +y<- -5 + 5 * x1 + 5 * x2 + rnorm(n, sd = 5) +dd <- data.frame(y = y,x1 = x1, x2 = x2, x3 = x3) +fit <- lm(y ~ x1 + x2 + x3, x = TRUE, y = TRUE, data= dd) -fit.resample<-abe.resampling(fit,data=dd,include="x1",active="x2", -tau=c(0.05,0.1),exact=TRUE, -criterion="alpha",alpha=c(0.2,0.05),type.test="Chisq", -num.resamples=50,type.resampling="Wallisch2021") +fit.resample <- abe.resampling(fit, data = dd, include = "x1", active = "x2", +tau = c(0.05, 0.1), exact = TRUE, criterion = "alpha", alpha = c(0.2, 0.05), +type.test = "Chisq", num.resamples = 50, type.resampling = "Wallisch2021") -print(fit.resample,conf.level=0.95,alpha=0.2,tau=0.05) +print(fit.resample, conf.level = 0.95, alpha = 0.2, tau = 0.05) } \references{ Wallisch C, Dunkler D, Rauch G, de Bin R, Heinze G. Selection of variables for multivariable models: Opportunities and limitations in quantifying model stability by resampling. Statistics in Medicine 40:369-381, 2021. @@ -73,5 +72,7 @@ Rok Blagus, \email{rok.blagus@mf.uni-lj.si} Sladana Babic +Daniela Dunkler + Gregor Steiner } diff --git a/man/summary.abe.Rd b/man/summary.abe.Rd index 7b8d4f1..ed863e0 100644 --- a/man/summary.abe.Rd +++ b/man/summary.abe.Rd @@ -73,5 +73,7 @@ Rok Blagus, \email{rok.blagus@mf.uni-lj.si} Sladana Babic +Daniela Dunkler + Gregor Steiner }