Skip to content

Commit

Permalink
bug fixes for variance calculations to get edf; update docs
Browse files Browse the repository at this point in the history
  • Loading branch information
Nicholas Clark committed Mar 25, 2024
1 parent 5698279 commit 23933c2
Show file tree
Hide file tree
Showing 110 changed files with 616 additions and 516 deletions.
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -110,15 +110,18 @@ importFrom(bayesplot,neff_ratio)
importFrom(bayesplot,nuts_params)
importFrom(brms,bernoulli)
importFrom(brms,conditional_effects)
importFrom(brms,dbeta_binomial)
importFrom(brms,dstudent_t)
importFrom(brms,get_prior)
importFrom(brms,logm1)
importFrom(brms,lognormal)
importFrom(brms,mcmc_plot)
importFrom(brms,ndraws)
importFrom(brms,pbeta_binomial)
importFrom(brms,prior_string)
importFrom(brms,pstudent_t)
importFrom(brms,qstudent_t)
importFrom(brms,rbeta_binomial)
importFrom(brms,rstudent_t)
importFrom(brms,student)
importFrom(ggplot2,scale_colour_discrete)
Expand Down
7 changes: 3 additions & 4 deletions R/compute_edf.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ compute_edf = function(mgcv_model, object, rho_names, sigma_raw_names){
names(random_sp) <- gsub('\\))', ')', names(random_sp))
}

mgcv_model$sp <- c(rho_sp, random_sp)
mgcv_model$sp <- exp(c(rho_sp, random_sp))

# Compute estimated degrees of freedom based on edf.type = 1 from
# https://github.com/cran/mgcv/blob/master/R/jagam.r
Expand Down Expand Up @@ -88,10 +88,9 @@ compute_edf = function(mgcv_model, object, rho_names, sigma_raw_names){
mu[which(mu_variance == 0)]
}

w <- as.numeric(mgcv_model$family$mu.eta(eta) / mu_variance)
w <- as.numeric(mgcv_model$family$mu.eta(eta)^2 / mu_variance)
XWX <- t(X) %*% (w * X)
rho <- mgcv_model$sp
lambda <- exp(rho)
lambda <- mgcv_model$sp
XWXS <- XWX

for(i in 1:length(lambda)){
Expand Down
24 changes: 16 additions & 8 deletions R/families.R
Original file line number Diff line number Diff line change
Expand Up @@ -401,8 +401,11 @@ mvgam_predict = function(Xp,
xpred <- extraDistr::rtpois(n = length(lambdas),
lambda = lambdas,
b = cap)
# Variance of a Binomial distribution
out <- xpred * p * (1 - p)

# Variance of a Binomial distribution using the
# weights convention from stats::glm()
mu <- p / xpred
out <- mu * (1 - mu)
} else {
# Expectations
xpred <- extraDistr::rtpois(n = length(lambdas),
Expand Down Expand Up @@ -478,8 +481,9 @@ mvgam_predict = function(Xp,
}

} else if(type == 'variance') {
out <- as.vector(family_pars$nu) /
(pmax(2.01, as.vector(family_pars$nu)) - 2)
out <- as.vector(family_pars$sigma_obs)^2 *
as.vector(family_pars$nu) /
pmax(1.01, (as.vector(family_pars$nu) - 2))

} else {
out <- rstudent_t(n = NROW(Xp),
Expand Down Expand Up @@ -568,8 +572,9 @@ mvgam_predict = function(Xp,
size = as.vector(family_pars$trials))
} else if(type == 'variance'){
mu <- plogis(as.vector((matrix(Xp, ncol = NCOL(Xp)) %*%
betas) + attr(Xp, 'model.offset')))
out <- as.vector(family_pars$trials) * mu * (1 - mu)
betas) + attr(Xp, 'model.offset'))) /
as.vector(family_pars$trials)
out <- mu * (1 - mu)
} else {
out <- plogis(((matrix(Xp, ncol = NCOL(Xp)) %*%
betas)) +
Expand Down Expand Up @@ -669,7 +674,7 @@ mvgam_predict = function(Xp,
} else if(type == 'variance'){
mu <- plogis(as.vector((matrix(Xp, ncol = NCOL(Xp)) %*%
betas) + attr(Xp, 'model.offset')))
out <- mu * (1 - mu) / (1 + exp(as.vector(family_pars$phi)))
out <- mu * (1 - mu) / (1 + as.vector(family_pars$phi))
} else {
out <- plogis(as.vector((matrix(Xp, ncol = NCOL(Xp)) %*%
betas) + attr(Xp, 'model.offset')))
Expand Down Expand Up @@ -1658,7 +1663,10 @@ dsresids_vec = function(object){
if(is.matrix(family_pars[[j]])){
as.vector(family_pars[[j]][, series_obs])
} else {
family_pars[[j]][]
as.vector(matrix(rep(family_pars[[j]],
NCOL(preds)),
nrow = NROW(preds),
byrow = FALSE))
}
})
names(family_extracts) <- names(family_pars)
Expand Down
15 changes: 11 additions & 4 deletions R/forecast.mvgam.R
Original file line number Diff line number Diff line change
Expand Up @@ -201,18 +201,25 @@ forecast.mvgam = function(object, newdata, data_test,
family_pars <- extract_family_pars(object = object)
par_extracts <- lapply(seq_along(family_pars), function(j){
if(is.matrix(family_pars[[j]])){
family_pars[[j]][, series]
as.vector(matrix(rep(as.vector(family_pars[[j]][, series]),
NCOL(preds)),
nrow = NROW(preds),
byrow = FALSE))

} else {
family_pars[[j]]
as.vector(matrix(rep(family_pars[[j]],
NCOL(preds)),
nrow = NROW(preds),
byrow = FALSE))
}
})
names(par_extracts) <- names(family_pars)

# Add trial information if this is a Binomial model
if(object$family %in% c('binomial', 'beta_binomial')){
trials <- as.vector(matrix(rep(as.vector(attr(object$mgcv_model, 'trials')[,series]),
NROW(mus)),
nrow = NROW(mus),
NROW(preds)),
nrow = NROW(preds),
byrow = TRUE))
par_extracts$trials <- trials
}
Expand Down
11 changes: 9 additions & 2 deletions R/hindcast.mvgam.R
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,16 @@ if(series == 'all'){
family_pars <- extract_family_pars(object = object)
par_extracts <- lapply(seq_along(family_pars), function(j){
if(is.matrix(family_pars[[j]])){
family_pars[[j]][, series]
as.vector(matrix(rep(as.vector(family_pars[[j]][, series]),
NCOL(preds)),
nrow = NROW(preds),
byrow = FALSE))

} else {
family_pars[[j]]
as.vector(matrix(rep(family_pars[[j]],
NCOL(preds)),
nrow = NROW(preds),
byrow = FALSE))
}
})
names(par_extracts) <- names(family_pars)
Expand Down
5 changes: 4 additions & 1 deletion R/logLik.mvgam.R
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,10 @@ logLik.mvgam = function(object,
if(is.matrix(family_pars[[j]])){
as.vector(family_pars[[j]][, series_obs])
} else {
family_pars[[j]][]
as.vector(matrix(rep(family_pars[[j]],
NCOL(mus)),
nrow = NROW(mus),
byrow = FALSE))
}
})
names(family_extracts) <- names(family_pars)
Expand Down
37 changes: 3 additions & 34 deletions R/mvgam.R
Original file line number Diff line number Diff line change
Expand Up @@ -2164,11 +2164,9 @@ mvgam = function(formula,
}

# Add Bayesian coefficients to the mgcv model to help with plotting of
# smooths that aren't yet supported by mvgam plotting functions
# Extract median beta params for smooths and their covariances
# so that uncertainty from mgcv plots is reasonably accurate
if(all(run_model, !lfo, residuals)){
# Use the empirical covariance matrix from the fitted coefficients
# smooths that aren't yet supported by mvgam plotting functions; this is
# also necessary for computing EDFs and approximate p-values of smooths
if(!lfo){
V <- cov(mcmc_chains(out_gam_mod, 'b'))
ss_gam$Ve <- ss_gam$Vp <- ss_gam$Vc <- V

Expand All @@ -2181,49 +2179,20 @@ mvgam = function(formula,
names(p) <- names(ss_gam$coefficients)
ss_gam$coefficients <- p

# Compute estimated degrees of freedom for smooths
object = list(
model_output = out_gam_mod,
call = orig_formula,
model_data = if(use_stan){
vectorised$model_data
} else {
ss_jagam$jags.data
},
fit_engine = fit_engine,
family = family_char,
share_obs_params = share_obs_params,
obs_data = data_train,
test_data = data_test,
trend_model = trend_model,
mgcv_model = ss_gam,
ytimes = ytimes)
class(object) <- 'mvgam'
ss_gam <- compute_edf(ss_gam, object, 'rho', 'sigma_raw')

# Repeat for any trend-specific mgcv model
if(!missing(trend_formula)){
V <- cov(mcmc_chains(out_gam_mod, 'b_trend'))
trend_mgcv_model$Ve <- trend_mgcv_model$Vp <- trend_mgcv_model$Vc <- V

p <- mcmc_summary(out_gam_mod, 'b_trend',
variational = algorithm %in% c('meanfield',
'fullrank',
'pathfinder',
'laplace'))[,c(4)]
names(p) <- names(trend_mgcv_model$coefficients)
trend_mgcv_model$coefficients <- p

object$trend_mgcv_model <- trend_mgcv_model
trend_mgcv_model <- compute_edf(trend_mgcv_model,
object, 'rho_trend', 'sigma_raw_trend')
}
}

if(lfo){
ss_gam <- NULL
}

#### Return the output as class mvgam ####
trim_data <- list()
attr(trim_data, 'trend_model') <- trend_model
Expand Down
17 changes: 5 additions & 12 deletions R/predict.mvgam.R
Original file line number Diff line number Diff line change
Expand Up @@ -262,12 +262,15 @@ predict.mvgam = function(object, newdata,
# Determine which series each observation belongs to
series_ind <- as.numeric(newdata$series)

# Family parameters spread into a vector
# Family parameters spread into long vectors
family_extracts <- lapply(seq_along(family_pars), function(j){
if(is.matrix(family_pars[[j]])){
as.vector(family_pars[[j]][, series_ind])
} else {
family_pars[[j]]
as.vector(matrix(rep(family_pars[[j]],
NROW(Xp)),
nrow = NROW(betas),
byrow = FALSE))
}
})
names(family_extracts) <- names(family_pars)
Expand All @@ -283,16 +286,6 @@ predict.mvgam = function(object, newdata,
}

trials <- newdata[[trial_name]]
# trial_df <- data.frame(series = newdata$series,
# time = newdata$time,
# trial = newdata[[trial_name]])
# trials <- do.call(cbind, lapply(length(levels(newdata$series)), function(i){
# trial_df %>%
# dplyr::filter(series == levels(newdata$series)[i]) %>%
# dplyr::arrange(time) %>%
# dplyr::pull(trial)
# }))

trials <- as.vector(matrix(rep(as.vector(trials),
NROW(betas)),
nrow = NROW(betas),
Expand Down
31 changes: 26 additions & 5 deletions R/summary.mvgam.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
#'@param include_betas Logical. Print a summary that includes posterior summaries
#'of all linear predictor beta coefficients (including spline coefficients)?
#'Defaults to \code{TRUE} but use \code{FALSE} for a more concise summary
#'@param smooth_test Logical. Compute estimated degrees of freedom and approximate
#'p-values for smooth terms? Defaults to \code{TRUE}, but users may wish to set
#'to \code{FALSE} for complex models with many smooth terms
#'@param digits The number of significant digits for printing out the summary;
#' defaults to \code{2}.
#'@param ... Ignored
Expand All @@ -28,7 +31,25 @@
#'For `coef.mvgam`, either a \code{matrix} of posterior coefficient distributions
#'(if \code{summarise == FALSE} or \code{data.frame} of coefficient summaries)
#'@export
summary.mvgam = function(object, include_betas = TRUE, digits = 2, ...){
summary.mvgam = function(object,
include_betas = TRUE,
smooth_test = TRUE,
digits = 2, ...){

#### Smooth tests ####
if(smooth_test){
object$mgcv_model <- compute_edf(object$mgcv_model,
object,
'rho',
'sigma_raw')

if(!is.null(object$trend_call)){
object$trend_mgcv_model <- compute_edf(object$trend_mgcv_model,
object,
'rho_trend',
'sigma_raw_trend')
}
}

#### Standard summary of formula and model arguments ####
if(!is.null(object$trend_call)){
Expand Down Expand Up @@ -252,8 +273,8 @@ if(!is.null(attr(object$mgcv_model, 'gp_att_table'))){
print(rbind(alpha_summary, rho_summary))
}

if(any(!is.na(object$sp_names))){
gam_sig_table <- summary(object$mgcv_model)$s.table[, c(1,3,4), drop = FALSE]
if(any(!is.na(object$sp_names)) & smooth_test){
gam_sig_table <- summary(object$mgcv_model)$s.table[, c(1,2,3,4), drop = FALSE]
if(!is.null(attr(object$mgcv_model, 'gp_att_table'))){
gp_names <- unlist(purrr::map(attr(object$mgcv_model,
'gp_att_table'), 'name'))
Expand Down Expand Up @@ -676,8 +697,8 @@ if(!is.null(object$trend_call)){
print(rbind(alpha_summary, rho_summary))
}

if(any(!is.na(object$trend_sp_names))){
gam_sig_table <- summary(object$trend_mgcv_model)$s.table[, c(1,3,4), drop = FALSE]
if(any(!is.na(object$trend_sp_names)) & smooth_test){
gam_sig_table <- summary(object$trend_mgcv_model)$s.table[, c(1,2,3,4), drop = FALSE]
if(!is.null(attr(object$trend_mgcv_model, 'gp_att_table'))){
gp_names <- unlist(purrr::map(attr(object$trend_mgcv_model, 'gp_att_table'), 'name'))
if(all(rownames(gam_sig_table) %in% gsub('gp(', 's(', gp_names, fixed = TRUE))){
Expand Down
Loading

0 comments on commit 23933c2

Please sign in to comment.