diff --git a/.Rbuildignore b/.Rbuildignore index 4829d19..31a1cee 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -15,3 +15,4 @@ ^cran-comments\.md$ ^\.DS_Store$ Build_Code.R +^dev$ diff --git a/.github/workflows/rhub.yaml b/.github/workflows/rhub.yaml new file mode 100644 index 0000000..74ec7b0 --- /dev/null +++ b/.github/workflows/rhub.yaml @@ -0,0 +1,95 @@ +# R-hub's generic GitHub Actions workflow file. It's canonical location is at +# https://github.com/r-hub/actions/blob/v1/workflows/rhub.yaml +# You can update this file to a newer version using the rhub2 package: +# +# rhub::rhub_setup() +# +# It is unlikely that you need to modify this file manually. + +name: R-hub +run-name: "${{ github.event.inputs.id }}: ${{ github.event.inputs.name || format('Manually run by {0}', github.triggering_actor) }}" + +on: + workflow_dispatch: + inputs: + config: + description: 'A comma separated list of R-hub platforms to use.' + type: string + default: 'linux,windows,macos' + name: + description: 'Run name. You can leave this empty now.' + type: string + id: + description: 'Unique ID. You can leave this empty now.' + type: string + +jobs: + + setup: + runs-on: ubuntu-latest + outputs: + containers: ${{ steps.rhub-setup.outputs.containers }} + platforms: ${{ steps.rhub-setup.outputs.platforms }} + + steps: + # NO NEED TO CHECKOUT HERE + - uses: r-hub/actions/setup@v1 + with: + config: ${{ github.event.inputs.config }} + id: rhub-setup + + linux-containers: + needs: setup + if: ${{ needs.setup.outputs.containers != '[]' }} + runs-on: ubuntu-latest + name: ${{ matrix.config.label }} + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.setup.outputs.containers) }} + container: + image: ${{ matrix.config.container }} + + steps: + - uses: r-hub/actions/checkout@v1 + - uses: r-hub/actions/platform-info@v1 + with: + token: ${{ secrets.RHUB_TOKEN }} + job-config: ${{ matrix.config.job-config }} + - uses: r-hub/actions/setup-deps@v1 + with: + token: ${{ secrets.RHUB_TOKEN }} + job-config: ${{ matrix.config.job-config }} + - uses: r-hub/actions/run-check@v1 + with: + token: ${{ secrets.RHUB_TOKEN }} + job-config: ${{ matrix.config.job-config }} + + other-platforms: + needs: setup + if: ${{ needs.setup.outputs.platforms != '[]' }} + runs-on: ${{ matrix.config.os }} + name: ${{ matrix.config.label }} + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.setup.outputs.platforms) }} + + steps: + - uses: r-hub/actions/checkout@v1 + - uses: r-hub/actions/setup-r@v1 + with: + job-config: ${{ matrix.config.job-config }} + token: ${{ secrets.RHUB_TOKEN }} + - uses: r-hub/actions/platform-info@v1 + with: + token: ${{ secrets.RHUB_TOKEN }} + job-config: ${{ matrix.config.job-config }} + - uses: r-hub/actions/setup-deps@v1 + with: + job-config: ${{ matrix.config.job-config }} + token: ${{ secrets.RHUB_TOKEN }} + - uses: r-hub/actions/run-check@v1 + with: + job-config: ${{ matrix.config.job-config }} + token: ${{ secrets.RHUB_TOKEN }} diff --git a/DESCRIPTION b/DESCRIPTION index 4d10a4d..d24eb10 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,33 +1,37 @@ Type: Package Package: hdflex Title: High-Dimensional Aggregate Density Forecasts -Version: 0.2.2 +Version: 0.3.0 Authors@R: c( person("Sven", "Lehmann", , "sven.lehmann@uni-rostock.de", role = c("aut", "cre", "cph")), person("Philipp", "Adämmer", , "philipp.adaemmer@uni-greifswald.de", role = "aut"), person("Rainer", "Schüssler", , "rainer.schüssler@uni-rostock.de", role = "aut") ) Maintainer: Sven Lehmann -Description: Provides a forecasting method that maps vast numbers of - (scalar-valued) signals of any type into an aggregate density forecast +Description: Provides a forecasting method that efficiently maps vast + numbers of (scalar-valued) signals into an aggregate density forecast in a time-varying and computationally fast manner. The method proceeds in two steps: First, it transforms a predictive signal into a density - forecast. Second, it combines the generated candidate density - forecasts into an ultimate density forecast. The methods are explained - in detail in Adaemmer et al. (2023) . + forecast and, second, it combines the resulting candidate density + forecasts into an ultimate aggregate density forecast. For a detailed + explanation of the method, please refer to Adaemmer et al. (2023) + . License: GPL (>= 2) +URL: https://github.com/lehmasve/hdflex +BugReports: https://github.com/lehmasve/hdflex/issues Depends: R (>= 4.3.0) Imports: checkmate (>= 2.3.1), - dplyr (>= 1.1.4), - parallel (>= 4.3.0), + ggplot2 (>= 3.5.1), + parallel, Rcpp, - roll (>= 1.1.6), - stats (>= 4.3.0), - stringr (>= 1.5.1) + reshape2 (>= 1.4.4), + stats, + utils Suggests: - testthat (>= 3.2.1) + testthat (>= 3.2.1), + cowplot (>= 1.1.3) LinkingTo: Rcpp, RcppArmadillo, @@ -37,5 +41,3 @@ Encoding: UTF-8 LazyData: true NeedsCompilation: Yes RoxygenNote: 7.3.2 -URL: https://github.com/lehmasve/hdflex -BugReports: https://github.com/lehmasve/hdflex/issues diff --git a/NAMESPACE b/NAMESPACE index 8fec091..46905ca 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,15 +1,16 @@ # Generated by roxygen2: do not edit by hand +S3method(summary,dsc_obj) +S3method(summary,stsc_obj) export(dsc) export(stsc) -export(summary_stsc) export(tvc) import(checkmate) -import(parallel) +import(ggplot2) importFrom(Rcpp,evalCpp) -importFrom(dplyr,lag) -importFrom(roll,roll_sum) +importFrom(reshape2,melt) +importFrom(stats,complete.cases) +importFrom(stats,dnorm) importFrom(stats,na.omit) -importFrom(stats,t.test) -importFrom(stringr,str_split) +importFrom(stats,pnorm) useDynLib(hdflex, .registration = TRUE) diff --git a/R/ClassMethods.R b/R/ClassMethods.R new file mode 100644 index 0000000..69adbd7 --- /dev/null +++ b/R/ClassMethods.R @@ -0,0 +1,398 @@ +######################### STSC - S3 Object ######################### +#' @name summary.stsc_obj +#' @title Plots the evolution of the tuning parameter for 'stsc' object +#' and returns basic performance metrics +#' @param object An object of type 'stsc' +#' @param eval_period (Optional) A vector of indices to specify the evaluation period +#' @param ... Additional arguments to be consistent with S3 print() function +#' @method summary stsc_obj +#' @import ggplot2 +#' @importFrom reshape2 melt +#' @importFrom stats complete.cases dnorm pnorm +#' @references +#' Gneiting, T., Raftery, A. E., Westveld, A. H., and Goldman, T. (2005): Calibrated Probabilistic Forecasting Using Ensemble Model Output Statistics and Minimum CRPS Estimation. \emph{Monthly Weather Review}, 133: 1098–1118. +#' Jordan, A., Krueger, F., and Lerch, S. (2019): "Evaluating Probabilistic Forecasts with scoringRules." \emph{Journal of Statistical Software}, 90(12): 1-37. +#' @export + +summary.stsc_obj <- function(object, eval_period = NULL, ...) { + + ### Data + # Extract realized values from object + y <- object$Forecasts$Realization + point_forecast <- object$Forecasts$Point_Forecasts + variance_forecast <- object$Forecasts$Variance_Forecasts + chosen_gamma <- object$Tuning_Parameters$Gamma + chosen_psi <- object$Tuning_Parameters$Psi + chosen_lambda <- object$Tuning_Parameters$Lambda + chosen_kappa <- object$Tuning_Parameters$Kappa + chosen_signals <- object$Tuning_Parameters$Signals + gamma_grid <- object$Model$Gamma_grid + psi_grid <- object$Model$Psi_grid + init <- object$Model$Init + burn_in <- object$Model$Burn_in + burn_in_dsc <- object$Model$Burn_in_dsc + + ### Evaluation + # Set Evaluation Period + if (is.null(eval_period)) { + start <- max(init, burn_in, burn_in_dsc) + 1 + eval_period <- start:nrow(y) + } + + # Check for NaNs in point_forecast within eval_period + if (any(is.na(point_forecast[eval_period]))) { + stop("Invalid 'eval_period': results contain NaNs. Please adjust 'eval_period'.") + } + + # Cut Objects to evaluation period + y <- y[eval_period, ] + point_forecast <- point_forecast[eval_period] + variance_forecast <- variance_forecast[eval_period] + chosen_gamma <- chosen_gamma[eval_period] + chosen_psi <- chosen_psi[eval_period] + chosen_lambda <- chosen_lambda[eval_period, , drop = FALSE] + chosen_kappa <- chosen_kappa[eval_period, , drop = FALSE] + chosen_signals <- chosen_signals[eval_period, , drop = FALSE] + + # Calculate MSE / SE + se <- (y - point_forecast)^2 + mse <- mean(se) + + # Calculate ACRPS / CRPS + z <- (y - point_forecast) / sqrt(variance_forecast) + pdf <- dnorm(z, 0.0, 1.0) + cdf <- pnorm(z, 0.0, 1.0) + pi_inv <- 1.0 / sqrt(pi) + crps <- sqrt(variance_forecast) * (z * (2.0 * cdf - 1.0) + 2.0 * pdf - pi_inv) + acrps <- mean(crps) + + # (Mean) Predictive Log-Likelihood + pll <- pnorm(y, point_forecast, sqrt(variance_forecast), log.p = TRUE) + apll <- mean(pll) + + ### Visualization + # Function to create a factor plot + plot_factor <- function(x, y, grid, main, xlab, ylab) { + + # Convert to factor for plotting + factor_y <- factor(y, levels = grid) + data <- data.frame(Time = x, Value = factor_y) + + # Ticks / Breaks for y-axis + max_ticks <- 30 + breaks <- if (length(levels(factor_y)) > max_ticks) { + pretty(seq_along(levels(factor_y)), n = max_ticks) + } else { + seq_along(levels(factor_y)) + } + breaks <- levels(factor_y)[breaks] + + # Create the ggplot + ggplot(data, aes(x = Time, y = Value)) + + geom_point() + + scale_y_discrete(drop = FALSE, breaks = breaks) + + labs(title = main, x = xlab, y = ylab) + + theme_minimal(base_size = 15) + + theme( + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.border = element_rect(colour = "black", fill = NA), + axis.ticks = element_line(colour = "black"), + plot.title = element_text(hjust = 0.5) + ) + } + + # Function to create an area plot + plot_area <- function(x, m, main, xlab, ylab, legend_title) { + + # Normalize matrix (adding up to 100%) + m_normalized <- sweep(m, 1, rowSums(m), FUN = "/") + + # Convert to data frame for ggplot + data <- as.data.frame(m_normalized) + data$Time <- x + data_long <- reshape2::melt(data, + id.vars = "Time", + variable.name = "Variable", + value.name = "Value" + ) + + # Create the ggplot + ggplot(data_long, aes(x = Time, y = Value, fill = Variable)) + + geom_area(position = "fill") + + labs(title = main, x = xlab, y = ylab, fill = legend_title) + + scale_fill_grey(start = 0.3, end = 0.9) + + theme_minimal(base_size = 15) + + theme( + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.border = element_rect(colour = "black", fill = NA), + axis.ticks = element_line(colour = "black"), + plot.title = element_text(hjust = 0.5), + legend.position = "bottom", + legend.box = "horizontal" + ) + } + + # Function to create a signal plot + plot_signal <- function(x, signals, main, xlab, ylab) { + + # Dimension restriction + if (ncol(signals) > 5000) { + # Select top 5000 signals by non-zero counts + non_zero_counts <- colSums(signals != 0) + top_columns <- order(non_zero_counts, decreasing = TRUE)[1:5000] + signals <- signals[, sort(top_columns), drop = FALSE] + } + + # Prepare data for ggplot + mat <- signals %*% diag(seq_len(ncol(signals))) + mat[mat == 0] <- NA + data <- as.data.frame(mat) + data$Time <- x + data_long <- reshape2::melt(data, + id.vars = "Time", + variable.name = "Variable", + value.name = "Value" + ) + + # Create the ggplot + ggplot(data_long, aes(x = Time, y = Value)) + + geom_point(size = 0.5, na.rm = TRUE) + + labs(title = main, x = xlab, y = ylab) + + expand_limits(y = c(1, ncol(mat))) + + theme_minimal(base_size = 15) + + theme( + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.border = element_rect(colour = "black", fill = NA), + axis.ticks = element_line(colour = "black"), + plot.title = element_text(hjust = 0.5) + ) + } + + # List to store ggplot objects + plots_list <- list() + + ### Plot: Tuning Parameter Gamma + plots_list$Gamma <- plot_factor(eval_period, + chosen_gamma, + gamma_grid, + expression("Discount Factor" ~ gamma), + "Time Index", + expression(gamma)) + + ### Plot: Tuning Parameter Psi + plots_list$Psi <- plot_factor(eval_period, + chosen_psi, + psi_grid, + expression("Subset-Size" ~ psi), + "Time Index", + expression(psi)) + + ### Plot: Evolution of selected signals + plots_list$Signals <- plot_signal(eval_period, + chosen_signals, + "Selected Signals", + "Time Index", + "Predictive Signal") + + ### Plot: Tuning Parameter Lambda + plots_list$Lambda <- plot_area(eval_period, + chosen_lambda, + expression("Discount Factor" ~ lambda), + "Time Index", + expression(lambda), + "Values") + + ### Plot: Tuning Parameter Kappa + plots_list$Kappa <- plot_area(eval_period, + chosen_kappa, + expression("Discount Factor" ~ kappa), + "Time Index", + expression(kappa), + "Values") + + # Return + return( + list( + MSE = list(mse, se), + ACRPS = list(acrps, crps), + APLL = list(apll, pll), + Plots = plots_list + ) + ) +} + +######################### DSC - S3 Object ######################### +#' @name summary.dsc_obj +#' @title Plots the evolution of the tuning parameter for 'dsc' object +#' and returns basic performance metrics +#' @param object An object of type 'dsc' +#' @param eval_period (Optional) A vector of indices to specify the evaluation period +#' @param ... Additional arguments to be consistent with S3 print() function +#' @method summary dsc_obj +#' @import ggplot2 +#' @importFrom reshape2 melt +#' @importFrom stats complete.cases dnorm na.omit pnorm +#' @references +#' Gneiting, T., Raftery, A. E., Westveld, A. H., and Goldman, T. (2005): Calibrated Probabilistic Forecasting Using Ensemble Model Output Statistics and Minimum CRPS Estimation. \emph{Monthly Weather Review}, 133: 1098–1118. +#' Jordan, A., Krueger, F., and Lerch, S. (2019): "Evaluating Probabilistic Forecasts with scoringRules." \emph{Journal of Statistical Software}, 90(12): 1-37. +#' @export + +summary.dsc_obj <- function(object, eval_period = NULL, ...) { + + ### Data + # Extract realized values from object + y <- object$Forecasts$Realization + point_forecast <- object$Forecasts$Point_Forecasts + variance_forecast <- object$Forecasts$Variance_Forecasts + gamma_grid <- object$Model$Gamma_grid + psi_grid <- object$Model$Psi_grid + chosen_gamma <- object$Tuning_Parameters$Gamma + chosen_psi <- object$Tuning_Parameters$Psi + chosen_cfms <- object$Tuning_Parameters$CFM + burn_in <- object$Model$Burn_in + burn_in_dsc <- object$Model$Burn_in_dsc + + ### Evaluation + # Set Evaluation Period + if (is.null(eval_period)) { + start <- max(burn_in, burn_in_dsc) + 1 + eval_period <- start:nrow(y) + } + + # Check for NaNs in point_forecast within eval_period + if (any(is.na(point_forecast[eval_period]))) { + stop("Invalid 'eval_period': results contain NaNs. Please adjust 'eval_period'.") + } + + # Cut Objects to evaluation period + y <- y[eval_period, ] + point_forecast <- point_forecast[eval_period] + variance_forecast <- variance_forecast[eval_period] + chosen_gamma <- chosen_gamma[eval_period] + chosen_psi <- chosen_psi[eval_period] + chosen_cfms <- chosen_cfms[eval_period, , drop = FALSE] + + # Calculate MSE / SE + se <- (y - point_forecast)^2 + mse <- mean(se) + + # Calculate ACRPS / CRPS + z <- (y - point_forecast) / sqrt(variance_forecast) + pdf <- dnorm(z, 0.0, 1.0) + cdf <- pnorm(z, 0.0, 1.0) + pi_inv <- 1.0 / sqrt(pi) + crps <- sqrt(variance_forecast) * (z * (2.0 * cdf - 1.0) + 2.0 * pdf - pi_inv) + acrps <- mean(crps) + + # (Mean) Predictive Log-Likelihood + pll <- pnorm(y, point_forecast, sqrt(variance_forecast), log.p = TRUE) + apll <- mean(pll) + + ### Visualization + # Function to create a factor plot + plot_factor <- function(x, y, grid, main, xlab, ylab) { + + # Convert to factor for plotting + factor_y <- factor(y, levels = grid) + data <- data.frame(Time = x, Value = factor_y) + + # Ticks / Breaks for y-axis + max_ticks <- 30 + breaks <- if (length(levels(factor_y)) > max_ticks) { + pretty(seq_along(levels(factor_y)), n = max_ticks) + } else { + seq_along(levels(factor_y)) + } + breaks <- levels(factor_y)[breaks] + + # Create the ggplot + ggplot(data, aes(x = Time, y = Value)) + + geom_point() + + scale_y_discrete(drop = FALSE, breaks = breaks) + + labs(title = main, x = xlab, y = ylab) + + theme_minimal(base_size = 15) + + theme( + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.border = element_rect(colour = "black", fill = NA), + axis.ticks = element_line(colour = "black"), + plot.title = element_text(hjust = 0.5) + ) + } + + # Function to create a cfm plot + plot_cfm <- function(x, cfms, main, xlab, ylab) { + + # Dimension restriction + if (ncol(cfms) > 5000) { + # Select top 5000 CFMs by non-zero counts + non_zero_counts <- colSums(cfms != 0) + top_columns <- order(non_zero_counts, decreasing = TRUE)[1:5000] + cfms <- cfms[, sort(top_columns), drop = FALSE] + } + + # Prepare data for ggplot + mat <- cfms %*% diag(seq_len(ncol(cfms))) + mat[mat == 0] <- NA + data <- as.data.frame(mat) + data$Time <- x + data_long <- reshape2::melt(data, + id.vars = "Time", + variable.name = "Variable", + value.name = "Value" + ) + + # Create the ggplot + ggplot(data_long, aes(x = Time, y = Value)) + + geom_point(size = 0.5, na.rm = TRUE) + + labs(title = main, x = xlab, y = ylab) + + expand_limits(y = c(1, ncol(mat))) + + theme_minimal(base_size = 15) + + theme( + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.border = element_rect(colour = "black", fill = NA), + axis.ticks = element_line(colour = "black"), + plot.title = element_text(hjust = 0.5) + ) + } + + # List to store ggplot objects + plots_list <- list() + + ### Plot: Tuning Parameter Gamma + plots_list$Gamma <- plot_factor(eval_period, + chosen_gamma, + gamma_grid, + expression("Discount Factor" ~ gamma), + "Time Index", + expression(gamma)) + + ### Plot: Tuning Parameter Psi + plots_list$Psi <- plot_factor(eval_period, + chosen_psi, + psi_grid, + expression("Subset-Size" ~ psi), + "Time Index", + expression(psi)) + + ### Plot: Evolution of selected CFMs + plots_list$CFM <- plot_cfm(eval_period, + chosen_cfms, + "Selected CFMs", + "Time Index", + "Predictive Signal") + + # Return + return( + list( + MSE = list(mse, se), + ACRPS = list(acrps, crps), + APLL = list(apll, pll), + Plots = plots_list + ) + ) +} diff --git a/R/RcppExports.R b/R/RcppExports.R index 8407953..2c30d64 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -1,43 +1,19 @@ # Generated by using Rcpp::compileAttributes() -> do not edit by hand # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 -init_dsc <- function(number_forecasts) { - .Call(`_hdflex_init_dsc`, number_forecasts) +dsc_ <- function(y, point_forecasts, variance_forecasts, gamma_grid, psi_grid, delta, burn_in, burn_in_dsc, metric, equal_weight, incl_, portfolio_params_) { + .Call(`_hdflex_dsc_`, y, point_forecasts, variance_forecasts, gamma_grid, psi_grid, delta, burn_in, burn_in_dsc, metric, equal_weight, incl_, portfolio_params_) } -forget_dsc <- function(weights, gamma) { - .Call(`_hdflex_forget_dsc`, weights, gamma) +stsc_loop_ <- function(y, X_, Ext_F_, init, lambda_grid, kappa_grid, bias, gamma_grid, psi_grid, delta, burn_in, burn_in_dsc, metric, equal_weight, incl_, portfolio_params_) { + .Call(`_hdflex_stsc_loop_`, y, X_, Ext_F_, init, lambda_grid, kappa_grid, bias, gamma_grid, psi_grid, delta, burn_in, burn_in_dsc, metric, equal_weight, incl_, portfolio_params_) } -active_models_dsc <- function(weights, psi) { - .Call(`_hdflex_active_models_dsc`, weights, psi) +stsc_loop_par_ <- function(y, X_, Ext_F_, init, lambda_grid, kappa_grid, bias, gamma_grid, psi_grid, delta, burn_in, burn_in_dsc, metric, equal_weight, incl_, n_threads, portfolio_params_) { + .Call(`_hdflex_stsc_loop_par_`, y, X_, Ext_F_, init, lambda_grid, kappa_grid, bias, gamma_grid, psi_grid, delta, burn_in, burn_in_dsc, metric, equal_weight, incl_, n_threads, portfolio_params_) } -matrix_subset_idx <- function(mat, col_idx, t) { - .Call(`_hdflex_matrix_subset_idx`, mat, col_idx, t) -} - -agg_density_dsc <- function(active_weights, oos_target_var, oos_forecast_tvp, oos_variance_tvp, idx_sub, t) { - .Call(`_hdflex_agg_density_dsc`, active_weights, oos_target_var, oos_forecast_tvp, oos_variance_tvp, idx_sub, t) -} - -update_dsc <- function(weights, oos_target_var, oos_forecast_tvp, oos_variance_tvp, n_models, t) { - .Call(`_hdflex_update_dsc`, weights, oos_target_var, oos_forecast_tvp, oos_variance_tvp, n_models, t) -} - -dsc_loop <- function(weights, gamma, psi, oos_target_var, oos_forecast_tvp, oos_variance_tvp, len_para_grid, oos_length, n_models) { - .Call(`_hdflex_dsc_loop`, weights, gamma, psi, oos_target_var, oos_forecast_tvp, oos_variance_tvp, len_para_grid, oos_length, n_models) -} - -stsc_loop_ <- function(y, X_, Ext_F_, sample_length, lambda_grid, kappa_grid, burn_in_tvc, bias, gamma_grid, psi_grid, delta, burn_in_dsc, method, equal_weight, incl_, risk_aversion_, min_weight_, max_weight_) { - .Call(`_hdflex_stsc_loop_`, y, X_, Ext_F_, sample_length, lambda_grid, kappa_grid, burn_in_tvc, bias, gamma_grid, psi_grid, delta, burn_in_dsc, method, equal_weight, incl_, risk_aversion_, min_weight_, max_weight_) -} - -stsc_loop_par_ <- function(y, X_, Ext_F_, sample_length, lambda_grid, kappa_grid, burn_in_tvc, bias, gamma_grid, psi_grid, delta, burn_in_dsc, method, equal_weight, incl_, n_threads, risk_aversion_, min_weight_, max_weight_) { - .Call(`_hdflex_stsc_loop_par_`, y, X_, Ext_F_, sample_length, lambda_grid, kappa_grid, burn_in_tvc, bias, gamma_grid, psi_grid, delta, burn_in_dsc, method, equal_weight, incl_, n_threads, risk_aversion_, min_weight_, max_weight_) -} - -tvc_ <- function(y, X_, Ext_F_, sample_length, lambda_grid, kappa_grid, bias) { - .Call(`_hdflex_tvc_`, y, X_, Ext_F_, sample_length, lambda_grid, kappa_grid, bias) +tvc_ <- function(y, X_, Ext_F_, init, lambda_grid, kappa_grid, bias) { + .Call(`_hdflex_tvc_`, y, X_, Ext_F_, init, lambda_grid, kappa_grid, bias) } diff --git a/R/data-documentation.R b/R/data-documentation.R index ef690fb..6073346 100644 --- a/R/data-documentation.R +++ b/R/data-documentation.R @@ -1,93 +1,43 @@ -#' @title Dataset to estimate quarterly U.S. inflation -#' @description A novel, high-dimensional dataset built by Koop and Korobilis (2023) -#' that merges predictive signals from several mainstream aggregate -#' macroeconomic and financial datasets. The dataset includes -#' the FRED-QD dataset of McCracken and Ng (2020), -#' augment with portfolio data used in Jurado et al. (2015), -#' stock market predictors from Welch and Goyal (2008), -#' survey data from University of Michigan consumer surveys, -#' commodity prices from the World Bank’s Pink Sheet database, -#' and key macroeconomic indicators from the Federal Reserve Economic Data -#' for four economies (Canada, Germany, Japan, United Kingdom). -#' The data is already pre-processed to perform one-step-ahead forecasts -#' and augmented with (external) point forecasts from Koop & Korobilis (2023). -#' The dataset spans the period 1960-Q3 to 2021-Q4. -#' -#' @format A \link{matrix} with 245 quarterly observations (rows) and 516 variables (columns). +#' @title Quarterly U.S. Inflation Dataset (Total CPI) +#' @description A high-dimensional dataset created by \emph{Koop and Korobilis (2023)} +#' that integrates predictive signals from various macroeconomic and financial sources. +#' +#' @details The dataset includes data from the following sources: +#' - **FRED-QD dataset** (McCracken and Ng, 2020) +#' - **Portfolio data** (Jurado et al., 2015) +#' - **Stock market predictors** (Welch and Goyal, 2008) +#' - **University of Michigan consumer surveys** +#' - **World Bank’s Pink Sheet commodity prices** +#' - **Key macroeconomic indicators** from the Federal Reserve Economic Data for Canada, Germany, Japan, and the United Kingdom +#' +#' The dataset is pre-processed for one-step-ahead forecasts and includes external point forecasts. +#' It spans from 1960-Q3 to 2021-Q4. +#' @format A \link{matrix} with 245 quarterly observations (rows) and 462 signals (columns): #' \describe{ -#' -#' \item{Column 1:4}{Transformed target variables: -#' GDP deflator (GDPCTPI), PCE deflator (PCECTPI), -#' Total CPI (CPIAUCSL), Core CPI (CPILFESL)} -#' -#' \item{Column 5:8}{First lag of the target variables} -#' -#' \item{Column 9:12}{Second lag of the target variables} -#' -#' \item{Column 13:16}{All four (lagged) price series transformed with second differences of logarithms} -#' -#' \item{Column 17:452}{All remaining (lagged and transformed) signals from the -#' FRED-QD dataset of McCracken and Ng (2020), -#' portfolio data used in Jurado et al. (2015), -#' stock market predictors from Welch and Goyal (2008), -#' survey data from University of Michigan consumer surveys, -#' commodity prices from the World Bank’s Pink Sheet database, -#' and key macroeconomic indicators from the Federal Reserve Economic Data -#' for Canada, Germany, Japan & United Kingdom. } -#' -#' \item{Column 453:468}{External point forecasts for quarterly GDP deflator (GDPCTPI) -#' generated by the MatLab Code from Koop and Korobilis (2023). -#' The forecasts were generated out-of-sample from 1976-Q1 to 2021-Q4.} -#' -#' \item{Column 469:484}{External point forecasts for quarterly PCE deflator (PCECTPI) -#' generated by the MatLab Code from Koop and Korobilis (2023). -#' The forecasts were generated out-of-sample from 1976-Q1 to 2021-Q4.} -#' -#' \item{Column 485:500}{External point forecasts for quarterly Total CPI (CPIAUCSL) -#' generated by the MatLab Code from Koop and Korobilis (2023). -#' The forecasts were generated out-of-sample from 1976-Q1 to 2021-Q4.} -#' -#' \item{Column 501:516}{External point forecasts for quarterly Core CPI (CPILFESL) -#' generated by the MatLab Code from Koop and Korobilis (2023). -#' The forecasts were generated out-of-sample from 1976-Q1 to 2021-Q4.} +#' \item{Column 1}{Transformed target variable: Total CPI (CPIAUCSL)} +#' \item{Columns 2-3}{First and second lag of the target variable} +#' \item{Columns 4-442}{Lagged and transformed signals from the sources listed above} +#' \item{Columns 443-462}{External point forecasts available from 1976-Q1 to 2021-Q4 +#' for quarterly Total CPI (CPIAUCSL), including: +#' \describe{ +#' \item{First 12 forecasts}{Generated using regression trees, +#' ridge regressions, and elastic nets +#' over expanding and rolling windows} +#' \item{Remaining 8 forecasts}{Based on models discussed in Koop and Korobilis (2023) +#' such as Gaussian process regressions (GPR_FAC5), +#' Unobserved Component Stochastic Volatility (UCSV), +#' and Variational Bayes Dynamic Variable Selection (VBDVS_X)} +#' } +#' } #' } #' @references #' -#' Jurado, K., Ludvigson, S. C., and Ng, S. (2015) "Measuring uncertainty." -#' \emph{American Economic Review}, 105 (3): 1177–1216. -#' -#' Koop, G. and Korobilis, D. (2023) "Bayesian dynamic variable selection in high dimensions." -#' \emph{International Economic Review}. +#' Jurado, K., Ludvigson, S. C., and Ng, S. (2015) "Measuring uncertainty." \emph{American Economic Review}, 105 (3): 1177–1216. #' -#' McCracken, M., and S. Ng (2020) “FRED-QD: A Quarterly Database for Macroeconomic Research” -#' \emph{National Bureau of Economic Research}, Working Paper 26872. -#' -#' Welch, I. and Goyal, A. (2008) "A comprehensive look at the empirical performance of equity premium prediction." -#' \emph{The Review of Financial Studies}, 21 (4): 1455–1508. -#' -#' @source -"inflation_data" - - -#' @title AR(2) benchmark forecasts for quarterly U.S. inflation -#' @description Out-of-sample one-step-ahead AR(2) benchmark forecasts for the period -#' from 1991-Q2 to 2021-Q4. The AR(2) models are estimated with OLS and intercept. -#' -#' @format A \link{matrix} with 123 quarterly observations (rows) and 4 benchmarks (columns): -#' \describe{ -#' -#' \item{GDPCTPI}{OOS-AR2-benchmark forecast for quarterly GDP deflator (GDPCTPI).} -#' -#' \item{PCECTPI}{OOS-AR2-benchmark forecast for quarterly PCE deflator (PCECTPI).} -#' -#' \item{CPIAUCSL}{OOS-AR2-benchmark forecast for quarterly Total CPI (CPIAUCSL).} -#' -#' \item{CPILFESL}{OOS-AR2-benchmark forecast for quarterly Core CPI (CPILFESL).} -#' } -#' @references +#' Koop, G. and Korobilis, D. (2023) "Bayesian dynamic variable selection in high dimensions." \emph{International Economic Review}. #' -#' Koop, G. and Korobilis, D. (2023) "Bayesian dynamic variable selection in high dimensions." -#' \emph{International Economic Review}. +#' McCracken, M., and S. Ng (2020) “FRED-QD: A Quarterly Database for Macroeconomic Research” \emph{National Bureau of Economic Research}, Working Paper 26872. #' +#' Welch, I. and Goyal, A. (2008) "A comprehensive look at the empirical performance of equity premium prediction." \emph{The Review of Financial Studies}, 21 (4): 1455–1508. #' @source -"benchmark_ar2" +"inflation_data" \ No newline at end of file diff --git a/R/dsc.R b/R/dsc.R index 566424c..7f5a381 100644 --- a/R/dsc.R +++ b/R/dsc.R @@ -1,61 +1,124 @@ #' @name dsc #' @title Generate dynamic subset forecast combinations -#' @description `dsc()` can be used to generate forecast combinations -#' from a set of candidate density forecasts. For each period, -#' `dsc()` selects a subset of predictive densities with highest ranks -#' regarding (local) predictive accuracy. -#' Both the identities of the candidate forecasts -#' that are used for building the combined forecast and -#' the subset sizes may vary over time based on the data. -#' If only one candidate forecast is picked, the approach (temporarily) -#' collapses to pure model selection. -#' @param gamma_grid A numerical vector that contains discount factors -#' to exponentially down-weight the past predictive performance -#' of the candidate forecasts. -#' @param psi_grid An integer vector that controls -#' the (possible) sizes of the active subsets. +#' @description The `dsc()` function generates +#' dynamic forecast combinations from a set of +#' candidate density forecasts. For each period, +#' it selects and combines a subset of predictive densities +#' with the highest ranks regarding local predictive accuracy. +#' The identities of the candidate forecasting models and +#' the subset sizes used for building the aggregate predictive density +#' may vary over time based on the data. +#' If only one candidate forecast is picked, +#' the approach temporarily collapses to pure model selection. #' @param y A matrix of dimension `T * 1` or numeric vector of length `T` #' containing the observations of the target variable. -#' @param mu_mat A matrix with `T` rows containing -#' the first moment of each predictive density in each column. -#' @param var_mat A matrix with `T` rows containing -#' the second moment of each predictive density in each column. -#' @param delta A numeric value denoting the discount factor used -#' to down-weight the past predictive performance of the subset combinations. -#' @param n_cores An integer that denotes the number of CPU-cores -#' used for the computational estimation. -#' @return A list that contains: -#' * (1) a vector with the first moments (point forecasts) of the STSC-Model, -#' * (2) a vector with the the second moments (variance) of the STSC-Model, -#' * (3) a vector that contains the selected values for gamma, -#' * (4) a vector that contains the selected values for psi and -#' * (5) a matrix that indicates the selected signals for every point in time. +#' @param point_forecasts A matrix with `T` rows containing +#' the first moments of (conditionally) normal distributed +#' predictive densities in each column. +#' @param variance_forecasts A matrix with `T` rows containing +#' the second moments of (conditionally) normal distributed +#' predictive densities in each column. +#' @param gamma_grid A numeric vector containing potential discount factors +#' between 0 and 1 to exponentially down-weight the past predictive performance +#' of the candidate forecasting models. The values of this tuning parameter are chosen +#' in a procedure that amounts to leave-one-out cross-validation, +#' taking into account the time series structure of the data. +#' For details, see Adaemmer et al. (2023). +#' @param psi_grid An integer vector that controls +#' the (possible) sizes of the subsets. The values of this tuning parameter are chosen +#' in a procedure that amounts to leave-one-out cross-validation, +#' taking taking into account the time series structure of the data. +#' For details, see Adaemmer et al. (2023). +#' @param delta A numeric value between 0 and 1 denoting the discount factor +#' applied to down-weight the past predictive performance of the +#' aggregate predictive densities. +#' @param burn_in An integer value `>= 1` that denotes the number of +#' observations used to 'initialize' the rankings. +#' After 'burn_in' observations, the rankings for both, +#' the candidate forecasting models and aggregate predictive densities +#' are reset. `burn_in = 1` means no burn-in period is applied. +#' @param burn_in_dsc An integer value `>= 1` that denotes the number of +#' observations used to 'initialize' the rankings. +#' After 'burn_in_dsc' observations, only the ranking of the +#' aggregate predictive densities is reset. +#' `burn_in_dsc = 1` means no burn-in period is applied. +#' @param metric An integer from the set `1, 2, 3, 4, 5` representing +#' the metric used to rank the candidate forecasting models (TV-C models) +#' and subset combinations based on their predictive performance. +#' The default value is `metric = 5` which ranks them according to the +#' sum of (discounted) Continuous-Ranked-Probability-Scores (CRPS). +#' `metric = 1` uses discounted Predictive Log-Likelihoods, +#' `metric = 2` uses discounted Squared-Errors, +#' `metric = 3` uses discounted Absolute-Errors, +#' `metric = 4` uses discounted Compounded-Returns (in this case the target variable +#' y has to be a time series of financial returns). +#' @param equal_weight A boolean that denotes whether equal weights are used to +#' combine the candidate forecasts within a subset. If `FALSE`, the weights are +#' calculated applying the softmax function on the ranking scores of +#' the candidate forecasting models. The method proposed in Adaemmer et al. (2023) uses +#' equal weights to combine the candidate forecasting models. +#' @param incl An optional integer vector that denotes signals that +#' must be included in the subset combinations. For example, `incl = c(1, 3)` +#' includes all candidate forecasting models generated by the first and third signals. +#' If `NULL`, no signal is forced to be included. +#' @param portfolio_params A numeric vector of length 3 +#' containing the following elements: +#' \describe{ +#' \item{risk_aversion}{A non-negative double representing the investor's risk aversion. +#' Higher values indicate more risk-averse behavior.} +#' \item{min_weight}{A double specifying the minimum weight allocated to the market. +#' A non-negative lower bound effectively rules out short sales.} +#' \item{max_weight}{A double specifying the maximum weight allocated to the market. +#' For example, a value of 2 allows for a maximum leverage ratio of two.} +#' } +#' This parameter is only required if `metric = 4`. +#' @return A list containing: +#' \describe{ +#' \item{Forecasts}{A list containing: +#' \describe{ +#' \item{Realization}{A vector with the actual values of the target variable.} +#' \item{Point_Forecasts}{A vector with the first moments of the aggregate predictive densities of the DSC model.} +#' \item{Variance_Prediction}{A vector with the second moments of the aggregate predictive densities of the DSC model.} +#' } +#' } +#' \item{Tuning_Parameters}{A list containing: +#' \describe{ +#' \item{Gamma}{A vector containing the selected values for the tuning parameter gamma.} +#' \item{Psi}{A vector containing the selected values for the tuning parameter psi.} +#' \item{CFM}{A matrix containing the selected candidate forecasting models.} +#' } +#' } +#' \item{Model}{A list containing: +#' \describe{ +#' \item{Gamma_grid}{The grid of gamma values used in the model.} +#' \item{Psi_grid}{The grid of psi values used in the model.} +#' \item{Delta}{The delta value used in the model.} +#' \item{Burn_in}{The burn-in period used in the model.} +#' \item{Burn_in_dsc}{The burn-in period used in the model.} +#' \item{Metric}{The ranking metric used in the model.} +#' \item{Equal_weight}{A boolean indicating if equal weighting was used.} +#' \item{Incl}{Additional included parameters.} +#' } +#' } +#' } #' @export #' @seealso \url{https://github.com/lehmasve/hdflex#readme} #' @author Philipp Adämmer, Sven Lehmann, Rainer Schüssler #' @references -#' Beckmann, J., Koop, G., Korobilis, D., and Schüssler, R. A. (2020) "Exchange rate predictability and dynamic bayesian learning." -#' \emph{Journal of Applied Econometrics}, 35 (4): 410–421. +#' Beckmann, J., Koop, G., Korobilis, D., and Schüssler, R. A. (2020) "Exchange rate predictability and dynamic bayesian learning." \emph{Journal of Applied Econometrics}, 35 (4): 410–421. #' -#' Koop, G. and Korobilis, D. (2012) "Forecasting inflation using dynamic model averaging." -#' \emph{International Economic Review}, 53 (3): 867–886. +#' Dangl, T. and Halling, M. (2012) "Predictive regressions with time-varying coefficients." \emph{Journal of Financial Economics}, 106 (1): 157–181. #' -#' Koop, G. and Korobilis, D. (2023) "Bayesian dynamic variable selection in high dimensions." -#' \emph{International Economic Review}. +#' Del Negro, M., Hasegawa, R. B., and Schorfheide, F. (2016) "Dynamic prediction pools: An investigation of financial frictions and forecasting performance." \emph{Journal of Econometrics}, 192 (2): 391–405. #' -#' Raftery, A. E., Kárn`y, M., and Ettler, P. (2010) "Online prediction under model uncertainty via dynamic model averaging: Application to a cold rolling mill." -#' \emph{Technometrics}, 52 (1): 52–66. -#' -#' Del Negro, M., Hasegawa, R. B., and Schorfheide, F. (2016) "Dynamic prediction pools: An investigation of financial frictions and forecasting performance." -#' \emph{Journal of Econometrics}, 192 (2): 391–405. +#' Koop, G. and Korobilis, D. (2012) "Forecasting inflation using dynamic model averaging." \emph{International Economic Review}, 53 (3): 867–886. #' -#' West, M. and Harrison, J. (1997) "Bayesian forecasting and dynamic models" -#' \emph{Springer}, 2nd edn. -#' @import parallel +#' Koop, G. and Korobilis, D. (2023) "Bayesian dynamic variable selection in high dimensions." \emph{International Economic Review}. +#' +#' Raftery, A. E., Kárn`y, M., and Ettler, P. (2010) "Online prediction under model uncertainty via dynamic model averaging: Application to a cold rolling mill." \emph{Technometrics}, 52 (1): 52–66. +#' +#' West, M. and Harrison, J. (1997) "Bayesian forecasting and dynamic models" \emph{Springer}, 2nd edn. #' @import checkmate -#' @importFrom stringr str_split -#' @importFrom dplyr lag -#' @importFrom roll roll_sum #' @examples #' \donttest{ #' @@ -64,217 +127,153 @@ #' } ### Dynamic Subset Combination -dsc <- function(gamma_grid, - psi_grid, - y, - mu_mat, - var_mat, - delta, - n_cores) { - +dsc <- function(y, + point_forecasts, + variance_forecasts, + gamma_grid, + psi_grid, + delta, + burn_in, + burn_in_dsc, + metric, + equal_weight, + incl, + portfolio_params = NULL) { + + + # Convert y to matrix + y <- as.matrix(y, ncol = 1) + + ######################################################## ### Checkmate - # Check if y is numeric vector without missing / infinite values - checkmate::assertNumeric(y, - min.len = 1, - any.missing = FALSE, - finite = TRUE) + # Check if numeric vector without missing or infinite values + assertNumeric(y, min.len = 1, any.missing = FALSE, finite = TRUE) - # Check if mu_mat is numeric matrix and has the same number of observations as y - checkmate::assertMatrix(mu_mat, - mode = "double", - any.missing = FALSE, - nrows = length(y)) + # Check if numeric matrix and same length as y + assertMatrix(point_forecasts, mode = "double", any.missing = FALSE, nrows = length(y)) + assertMatrix(variance_forecasts, mode = "double", any.missing = FALSE, nrows = length(y), ncols = ncol(point_forecasts)) + qassert(variance_forecasts, c("M+[0,]")) - # Check if var_mat is numeric matrix and has the same number of observations as y - checkmate::assertMatrix(var_mat, - mode = "double", - any.missing = FALSE, - nrows = length(y), - ncols = ncol(mu_mat)) + # Check if numeric vectors with values between exp(-10) and 1 + assertNumeric(gamma_grid, lower = exp(-10), upper = 1, min.len = 1, any.missing = FALSE, finite = TRUE) - # Check if gamma_grid is numeric vector with values between 0 and 1 - checkmate::assertNumeric(gamma_grid, - lower = 0, - upper = 1, - min.len = 1, - any.missing = FALSE, - finite = TRUE) + # Check if integer vector with values between 1 and ncol(point_forecasts) + assertIntegerish(psi_grid, lower = 1, upper = ncol(point_forecasts), min.len = 1, any.missing = FALSE) - # Check if psi_grid is Integer vector with values between 1 and ncol(mu_mat) - checkmate::assertIntegerish(psi_grid, - lower = 1, - upper = ncol(mu_mat), - min.len = 1, - any.missing = FALSE) + # Check if numeric value between exp(-10) and 1 + assertNumber(delta, lower = exp(-10), upper = 1, finite = TRUE, na.ok = FALSE, null.ok = FALSE) - # Check if var_mat has only non-negativ entries - checkmate::qassert(var_mat, - c("M+[0,]")) + # Check if integers between 1 and the length of y + assertInt(burn_in, lower = 1, upper = nrow(y), na.ok = FALSE, null.ok = FALSE) + assertInt(burn_in_dsc, lower = 1, upper = nrow(y), na.ok = FALSE, null.ok = FALSE) - # Check if delta is Numeric between 0 and 1 - checkmate::assertNumber(delta, - lower = exp(-10), - upper = 1, - na.ok = FALSE) + # Check if element of the set {1, 2, 3, 4, 5} + assertChoice(metric, c(1, 2, 3, 4, 5), null.ok = FALSE) - # Check if n_cores is Integer bigger or equal to 1 - checkmate::assertInt(n_cores, - lower = 1) + # Check if boolean + assertLogical(equal_weight, len = 1, any.missing = FALSE) - ### Parameter Grid - # Create List with Parameter Grids to use as ID - n_combs <- length(gamma_grid) * length(psi_grid) - parameter_grid <- vector(mode = "list", length = n_combs) - index <- 1L - for (i in seq_along(gamma_grid)) { - for (j in seq_along(psi_grid)) { - parameter_grid[[index]] <- c(i, j) - index <- index + 1 - } - } - - ### 1) Compute (all) Subset Combinations - # Set Size - n_models <- ncol(mu_mat) - len <- nrow(mu_mat) - - # Set up Backend for Parallel Processing - cores <- n_cores - cl <- parallel::makeCluster(cores, type = "PSOCK") - parallel::clusterExport(cl = cl, varlist = c("parameter_grid", - "n_models", - "n_combs", - "len", - "y", - "mu_mat", - "var_mat"), - envir = environment()) - - # Parallelize with parLapply - idx <- seq_along(parameter_grid) - dsc_tmp <- parallel::parLapply(cl, idx, function(i) { + # Additional checks if metric == 4 + if (metric == 4) { - # Set Gamma and Psi - gamma <- gamma_grid[parameter_grid[[i]][1]] - psi <- psi_grid[parameter_grid[[i]][2]] + # Check if "returns" + assertNumeric(y, lower = -1) - # Create Initial Weights - weights <- init_dsc(n_models) + # Check if numeric vector of length 3 + assertNumeric(portfolio_params, len = 3, any.missing = FALSE) - # Loop over DSC-Function - dsc_results <- dsc_loop(weights, - gamma, - psi, - y, - mu_mat, - var_mat, - n_combs, - len, - n_models) - # Return Results - return(list(cbind(dsc_results[[1]], - dsc_results[[2]], - dsc_results[[3]]), - dsc_results[[4]]))}) - - # Stop Cluster - parallel::stopCluster(cl) - - # Assign Results - tmp <- lapply(dsc_tmp, "[[", 1) - forecasts_comb <- sapply(tmp, function(x) x[, 1]) - variances_comb <- sapply(tmp, function(x) x[, 2]) - ln_scores <- sapply(tmp, function(x) x[, 3]) - models_idx <- lapply(dsc_tmp, "[[", 2) + # Extract values from portfolio_params + risk_aversion <- portfolio_params[1] + min_weight <- portfolio_params[2] + max_weight <- portfolio_params[3] - # Remove Objects - rm(list = c("dsc_tmp", "tmp")) + # Check if numeric value and at least 0.0 + assertNumber(risk_aversion, lower = 0, upper = Inf) - ### 2) Create Combination Names - # Set up Vector & Loop over Grid - model_names_comb <- rep(NA, n_combs) - for (i in as.integer(seq_along(parameter_grid))) { + # Check if min_weight is a number smaller than max_weight + assertNumber(min_weight, lower = -Inf, upper = max_weight) - # Set Gamma and Psi - gamma <- gamma_grid[parameter_grid[[i]][1]] - psi <- psi_grid[parameter_grid[[i]][2]] - - # Create Model Name - mod_name <- paste("gamma", gamma, "psi", psi, sep = "_") - - # Assign Name - model_names_comb[i] <- mod_name + # Check if max_weight is a number greater than min_weight + assertNumber(max_weight, lower = min_weight, upper = Inf) } - ### 3) Compute Dynamic Subset Combination - ### -> select subset combination for each point in time - # Compute exponentially discounted sum of predictive log-likelihoods (DPLL) - weights <- delta^(seq_len(len)) # - 1) - cum_ln_scores_lag <- dplyr::lag(roll::roll_sum(ln_scores, - weights = rev(weights), - width = len, min_obs = 1), n = 1L) #nolint - - # Select highest DPLL for each point in time - chosen_parameter <- matrix(FALSE, ncol = n_combs, nrow = len, dimnames = list(NULL, model_names_comb)) #nolint - chosen_parameter[cbind(seq_len(len), max.col(cum_ln_scores_lag, "first"))] <- TRUE #nolint - - # Set first Subset Combination deterministically - chosen_parameter[1, n_combs] <- TRUE - - # Compute DSC-Forecast and DSC-Variance - forecast_dsc <- rowSums(forecasts_comb * chosen_parameter) - variance_dsc <- rowSums(variances_comb * chosen_parameter) + # Check if nullable integer vector + assertIntegerish(incl, lower = 1, upper = ncol(point_forecasts), null.ok = TRUE) + # Check if minimum psi matches the keep argument + if (!is.null(incl)) { + assertTRUE(min(psi_grid) >= length(incl)) + } - ### 4) Diagnosis: Get selected values for gamma & psi - ### and the names of the candidate forecasts - # Get selected gamma & psi value for each point in time - gamma_psi <- apply(chosen_parameter == TRUE, 1, function(x) model_names_comb[x]) #nolint - val_gamma <- unname(sapply(gamma_psi, function(x) as.numeric(stringr::str_split(x, "_")[[1]][2]))) #nolint - val_psi <- unname(sapply(gamma_psi, function(x) as.numeric(stringr::str_split(x, "_")[[1]][4]))) #nolint + ######################################################## + # Apply Rcpp-Function + dsc_results <- dsc_(y, + point_forecasts, + variance_forecasts, + gamma_grid, + psi_grid, + delta, + burn_in, + burn_in_dsc, + metric, + equal_weight, + incl, + portfolio_params) - # Get / Set the Candidate Forecast Names - if (!is.null(colnames(mu_mat))) { - model_names_tvc <- colnames(mu_mat) + # Assign Results + dsc_forecast <- as.numeric(dsc_results[[1]]) + dsc_variance <- as.numeric(dsc_results[[2]]) + dsc_comb_mod <- as.integer(dsc_results[[3]]) + dsc_cand_mod <- dsc_results[[4]] + + # Get Values for Gamma & Psi + para_grid <- expand.grid(psi_grid, gamma_grid) + chosen_psi <- para_grid[dsc_comb_mod + 1, 1] + chosen_gamma <- para_grid[dsc_comb_mod + 1, 2] + + # Candidate Forecast Model Names + if (!is.null(colnames(point_forecasts))) { + cfm_names <- colnames(point_forecasts) } else { - model_names_tvc <- paste0("C_", as.character(seq_len(n_models))) + cfm_names <- paste0("CFM", seq_len(ncol(point_forecasts))) } + # Set up matrix for selected CFM + chosen_cfm <- matrix(0, nrow = nrow(y), + ncol = length(cfm_names), + dimnames = list(NULL, cfm_names)) - # Get index of the selected Subset Combination at every point in time - ind <- which(chosen_parameter, arr.ind = TRUE, useNames = TRUE) - ind <- ind[order(ind[, "row"]), ] - - # Get the Candidate Forecast(s) in the Subset Combination - tmp <- lapply(seq_len(nrow(ind)), function(x) model_names_tvc[models_idx[[ind[x, 2]]][[ind[x, 1]]] + 1]) #nolint - - # Transform to 0 / 1 - Matrix - cand_models <- matrix(0, ncol = length(model_names_tvc), - nrow = nrow(chosen_parameter), - dimnames = list(NULL, model_names_tvc)) - for (i in seq_len(nrow(chosen_parameter))) { - col_idx <- match(tmp[[i]], model_names_tvc) - cand_models[i, col_idx] <- 1 + # Fill matrices + for (t in seq(max(burn_in, burn_in_dsc) + 1, nrow(y))) { + col_names <- cfm_names[dsc_cand_mod[[t]] + 1] + chosen_cfm[t, col_names] <- 1 } - # Clean Column Names - cols_clean <- sapply(stringr::str_split(model_names_tvc, "-"), `[`, 1) - preds <- unique(cols_clean) - n_preds <- length(preds) - - # Count how often a signal was selected - chosen_signals <- sapply(1:n_preds, function(x) rowSums(cand_models[, which(preds[x] == cols_clean), drop = FALSE])) #nolint - - # Change to Selected / not Selected -> 0 vs. 1 - chosen_signals[chosen_signals > 1] <- 1 - - # Change Row and Column Names - colnames(chosen_signals) <- preds - # Return Results - return(list(forecast_dsc, - variance_dsc, - val_gamma, - val_psi, - chosen_signals)) -} \ No newline at end of file + return( + structure( + list( + Forecasts = list( + Realization = y, + Point_Forecasts = dsc_forecast, + Variance_Forecasts = dsc_variance + ), + Tuning_Parameters = list( + Gamma = chosen_gamma, + Psi = chosen_psi, + CFM = chosen_cfm + ), + Model = list( + Gamma_grid = gamma_grid, + Psi_grid = psi_grid, + Delta = delta, + Burn_in = burn_in, + Burn_in_dsc = burn_in_dsc, + Metric = metric, + Equal_weight = equal_weight, + Incl = incl + ) + ), class = "dsc_obj" + ) + ) +} diff --git a/R/stsc.R b/R/stsc.R index 27dc6b9..176f4e8 100644 --- a/R/stsc.R +++ b/R/stsc.R @@ -1,20 +1,19 @@ #' @name stsc -#' @title Signal-Transform-Subset-Combination (STSC) +#' @title Signal-Transformed-Subset-Combination (STSC) #' @description `stsc()` is a time series forecasting method designed to handle -#' vast sets of predictive signals, many of which are irrelevant or short-lived. -#' The method transforms heterogeneous scalar-valued signals into +#' vast sets of predictive signals, many of which may be irrelevant or short-lived. +#' This method transforms heterogeneous scalar-valued signals into #' candidate density forecasts via time-varying coefficient models (TV-C), -#' and subsequently, combines them into an ultimate aggregated density forecast +#' and subsequently, combines them into an ultimate aggregate density forecast #' via dynamic subset combinations (DSC). #' @param y A matrix of dimension `T * 1` or numeric vector of length `T` #' containing the observations of the target variable. -#' @param X A matrix with `T` rows containing -#' the lagged 'P' signals in each column. -#' Use NULL if no 'P' signal shall be included. -#' @param Ext_F A matrix with `T` rows containing -#' (external) 'F' signals for y in each column. -#' Use NULL if no (external) 'F' signal shall be included. -#' @param sample_length An integer that denotes the number of observations used +#' @param X A matrix with `T` rows containing the lagged 'P-signals' in each column. +#' Use `NULL` if no (external) 'P-signal' is to be included. +#' @param Ext_F A matrix with `T` rows containing the (external) 'F-signals' in each column. +#' For 'F-Signals', the slope of the TV-C models is fixed to 1. +#' Use `NULL` if no (external) 'F-signal' is to be included. +#' @param init An integer that denotes the number of observations used #' to initialize the observational variance and the coefficients' variance #' in the TV-C models. #' @param lambda_grid A numeric vector which takes values between 0 and 1 @@ -22,107 +21,130 @@ #' coefficients. Each signal in combination with each value of #' lambda provides a separate candidate forecast. #' Constant coefficients are nested for the case `lambda = 1`. -#' @param kappa_grid A numeric vector between 0 and 1 to accommodate -#' time-varying volatility in the TV-C models. The observational variance -#' is estimated via Exponentially Weighted Moving Average and kappa -#' denotes the underlying decay factor. +#' @param kappa_grid A numeric vector which takes values between 0 and 1 +#' to accommodate time-varying volatility in the TV-C models. +#' The observational variance is estimated via Exponentially Weighted Moving Average +#' and kappa denotes the underlying decay factor. #' Constant variance is nested for the case `kappa = 1`. #' Each signal in combination with each value of -#' kappa provides a separate candidate forecast. +#' kappa provides a separate candidate density forecast. #' For the values of kappa, we follow the recommendation #' of RiskMetrics (Reuters, 1996). -#' @param burn_in_tvc An integer value `>= 1` that denotes the number of -#' observations used to 'initialize' the TV-C models. -#' After 'burn_in_tvc' observations, the ranking for the candidate forecasting models -#' and aggregated predictive densities are resetted. -#' `burn_in_tvc = 1` means no burn-in period is applied. -#' @param bias A boolean to indicate whether the TVC-Models should -#' perform a bias correction to external point forecasts -#' (TRUE -> time-varying intercept) or -#' take it 'as is' (FALSE -> constant intercept of 0.0). -#' @param gamma_grid A numerical vector that contains discount factors +#' @param bias A boolean to indicate whether the TV-C-models +#' allow for a bias correction to F-signals. +#' `TRUE` allows for a time-varying intercept, and `FALSE` sets (and fixes) the intercept to 0. +#' @param gamma_grid A numeric vector containing potential discount factors #' between 0 and 1 to exponentially down-weight the past predictive performance -#' of the candidate forecasting models. +#' of the candidate forecasting models. The values of this tuning parameter are chosen +#' in a procedure that amounts to leave-one-out cross-validation, +#' taking into account the time series structure of the data. +#' For details, \emph{see Adaemmer et al. (2023)}. #' @param psi_grid An integer vector that controls -#' the (possible) sizes of the subsets. +#' the (possible) sizes of the subsets. The values of this tuning parameter are chosen +#' in a procedure that amounts to leave-one-out cross-validation, +#' taking taking into account the time series structure of the data. +#' For details, \emph{see Adaemmer et al. (2023)}. #' @param delta A numeric value between 0 and 1 denoting the discount factor -#' used to down-weight the past predictive performance of the -#' subset combinations. +#' applied to down-weight the past predictive performance of the +#' aggregate predictive densities. +#' @param burn_in An integer value `>= 1` that denotes the number of +#' observations used to 'initialize' the rankings. +#' After 'burn_in' observations, the rankings for both, +#' the candidate forecasting models and aggregate predictive densities +#' are reset. `burn_in = 1` means no burn-in period is applied. #' @param burn_in_dsc An integer value `>= 1` that denotes the number of -#' observations used to 'initialize' the Dynamic Subset Combinations. -#' After 'burn_in_dsc' observations the ranking of the -#' aggregated predictive densities is resetted. +#' observations used to 'initialize' the rankings. +#' After 'burn_in_dsc' observations, only the ranking of the +#' aggregate predictive densities is reset. #' `burn_in_dsc = 1` means no burn-in period is applied. -#' @param method An integer of the set `1, 2, 3, 4, 5` that denotes -#' the method used to rank the candidate forecasting models (TV-C models) -#' and subset combinations according to their performance. -#' Default is `method = 1` which ranks according to their -#' generated sum of discounted predictive log-likelihoods (DPLLs), -#' `method = 2` uses Squared-Errors (SE) instead of DPLLs, -#' `method = 3` uses Absolute-Errors (AE), -#' `method = 4` uses Compounded-Returns (in this case the target variable -#' y has to be a time series of financial returns) and -#' `method = 5` uses Continuous-Ranked-Probability-Scores (CRPS). +#' @param metric An integer from the set `1, 2, 3, 4, 5` representing +#' the metric used to rank the candidate forecasting models (TV-C models) +#' and subset combinations based on their predictive performance. +#' The default value is `metric = 5` which ranks them according to the +#' sum of (discounted) Continuous-Ranked-Probability-Scores (CRPS). +#' `metric = 1` uses discounted Predictive Log-Likelihoods, +#' `metric = 2` uses discounted Squared-Errors, +#' `metric = 3` uses discounted Absolute-Errors, +#' `metric = 4` uses discounted Compounded-Returns (in this case the target variable +#' y has to be a time series of financial returns). #' @param equal_weight A boolean that denotes whether equal weights are used to #' combine the candidate forecasts within a subset. If `FALSE`, the weights are -#' calculated using the softmax-function on the ranking scores of -#' the candidate forecasting models. The method proposed in Adaemmer et al (2023) uses -#' equal weights to combine the candidate forecasts. -#' @param incl An (optional) integer vector that denotes signals that -#' must be included in the subset combinations. E.g. `incl = c(1, 3)` -#' includes all TVC-Models generated by the first and third signal. +#' calculated applying the softmax function on the ranking scores of +#' the candidate forecasting models. The method proposed in Adaemmer et al. (2023) uses +#' equal weights to combine the candidate forecasting models. +#' @param incl An optional integer vector that denotes signals that +#' must be included in the subset combinations. For example, `incl = c(1, 3)` +#' includes all candidate forecasting models generated by the first and third signals. #' If `NULL`, no signal is forced to be included. -#' @param parallel A boolean that denotes whether the function should +#' @param parallel A boolean indicating whether the function should #' be parallelized. #' @param n_threads An integer that denotes the number of cores used -#' for parallelization. -#' @param risk_aversion A double `>= 0` that denotes the risk aversion -#' of an investor. A higher value indicates a risk avoiding behaviour. -#' Only necessary if `method = 4`. -#' @param min_weight A double that denotes the lower bound -#' for the weight placed on the market. -#' A non-negative value rules out short sales. -#' Only necessary if `method = 4`. -#' @param max_weight A double that denotes the upper bound -#' for the weight placed on the market. -#' A value of e.g. 2 allows for a maximum leverage ratio of two. -#' Only necessary if `method = 4`. -#' @return A list that contains: -#' * (1) a vector with the first moments (point forecasts) of the STSC-Model, -#' * (2) a vector with the second moments (variance) of the STSC-Model, -#' * (3) a vector that contains the selected values for gamma, -#' * (4) a vector that contains the selected values for psi, -#' * (5) a matrix that contains the selected signals, -#' * (6) a matrix that contains the selected values for lambda and -#' * (7) a matrix that contains the selected values for kappa -#' for every point in time. -#' +#' for parallelization. Only necessary if `parallel = TRUE`. +#' @param portfolio_params A numeric vector of length 3 +#' containing the following elements: +#' \describe{ +#' \item{risk_aversion}{A non-negative double representing the investor's risk aversion. +#' Higher values indicate more risk-averse behavior.} +#' \item{min_weight}{A double specifying the minimum weight allocated to the market. +#' A non-negative lower bound effectively rules out short sales.} +#' \item{max_weight}{A double specifying the maximum weight allocated to the market. +#' For example, a value of 2 allows for a maximum leverage ratio of two.} +#' } +#' This parameter is only required if `metric = 4`. +#' @return A list containing: +#' \describe{ +#' \item{Forecasts}{A list containing: +#' \describe{ +#' \item{Realization}{A vector with the actual values of the target variable.} +#' \item{Point_Forecast}{A vector with the first moments of the aggregate predictive densities of the STSC model.} +#' \item{Variance_Prediction}{A vector with the second moments of the aggregate predictive densities of the STSC model.} +#' } +#' } +#' \item{Tuning_Parameters}{A list containing: +#' \describe{ +#' \item{Gamma}{A vector containing the selected values for the tuning parameter gamma.} +#' \item{Psi}{A vector containing the selected values for the tuning parameter psi.} +#' \item{Signals}{A matrix containing the selected signals.} +#' \item{Lambda}{A matrix containing the selected values for the tuning parameter lambda.} +#' \item{Kappa}{A matrix containing the selected values for the tuning parameter kappa.} +#' } +#' } +#' \item{Model}{A list containing: +#' \describe{ +#' \item{Lambda_grid}{The grid of lambda values used in the model.} +#' \item{Kappa_grid}{The grid of kappa values used in the model.} +#' \item{Gamma_grid}{The grid of gamma values used in the model.} +#' \item{Psi_grid}{The grid of psi values used in the model.} +#' \item{Delta}{The delta value used in the model.} +#' \item{Init}{The init value used in the model.} +#' \item{Burn_in}{The burn-in period used in the model.} +#' \item{Burn_in_dsc}{The burn-in period used in the model.} +#' \item{metric}{The ranking metric used in the model.} +#' \item{Equal_weight}{A boolean indicating if equal weighting was used.} +#' \item{Bias}{A boolean indicating if bias correct was applied to F-signals.} +#' \item{Incl}{Additional included parameters.} +#' } +#' } +#' } #' @seealso \url{https://github.com/lehmasve/hdflex#readme} #' @author Philipp Adämmer, Sven Lehmann, Rainer Schüssler #' @references -#' Beckmann, J., Koop, G., Korobilis, D., and Schüssler, R. A. (2020) "Exchange rate predictability and dynamic bayesian learning." -#' \emph{Journal of Applied Econometrics}, 35 (4): 410–421. -#' -#' Dangl, T. and Halling, M. (2012) "Predictive regressions with time-varying coefficients." -#' \emph{Journal of Financial Economics}, 106 (1): 157–181. +#' Beckmann, J., Koop, G., Korobilis, D., and Schüssler, R. A. (2020) "Exchange rate predictability and dynamic bayesian learning." \emph{Journal of Applied Econometrics}, 35 (4): 410–421. #' -#' Del Negro, M., Hasegawa, R. B., and Schorfheide, F. (2016) "Dynamic prediction pools: An investigation of financial frictions and forecasting performance." -#' \emph{Journal of Econometrics}, 192 (2): 391–405. +#' Dangl, T. and Halling, M. (2012) "Predictive regressions with time-varying coefficients." \emph{Journal of Financial Economics}, 106 (1): 157–181. #' -#' Koop, G. and Korobilis, D. (2012) "Forecasting inflation using dynamic model averaging." -#' \emph{International Economic Review}, 53 (3): 867–886. +#' Del Negro, M., Hasegawa, R. B., and Schorfheide, F. (2016) "Dynamic prediction pools: An investigation of financial frictions and forecasting performance." \emph{Journal of Econometrics}, 192 (2): 391–405. #' -#' Koop, G. and Korobilis, D. (2023) "Bayesian dynamic variable selection in high dimensions." -#' \emph{International Economic Review}. +#' Koop, G. and Korobilis, D. (2012) "Forecasting inflation using dynamic model averaging." \emph{International Economic Review}, 53 (3): 867–886. #' -#' Raftery, A. E., Kárn`y, M., and Ettler, P. (2010) "Online prediction under model uncertainty via dynamic model averaging: Application to a cold rolling mill." -#' \emph{Technometrics}, 52 (1): 52–66. +#' Koop, G. and Korobilis, D. (2023) "Bayesian dynamic variable selection in high dimensions." \emph{International Economic Review}. #' -#' West, M. and Harrison, J. (1997) "Bayesian forecasting and dynamic models" -#' \emph{Springer}, 2nd edn. +#' Raftery, A. E., Kárn`y, M., and Ettler, P. (2010) "Online prediction under model uncertainty via dynamic model averaging: Application to a cold rolling mill." \emph{Technometrics}, 52 (1): 52–66. #' -#' @export +#' West, M. and Harrison, J. (1997) "Bayesian forecasting and dynamic models" \emph{Springer}, 2nd edn. #' @import checkmate +#' @importFrom stats na.omit +#' @export #' @examples #' \donttest{ #' @@ -132,365 +154,293 @@ #' #### details regarding the data & external forecasts #### #' ######################################################### #' -#' # Packages +#' # Load Package #' library("hdflex") +#' library("ggplot2") +#' library("cowplot") #' #' ########## Get Data ########## -#' # Load Data +#' # Load Package Data #' inflation_data <- inflation_data -#' benchmark_ar2 <- benchmark_ar2 #' -#' # Set Index for Target Variable -#' i <- 1 # (1 -> GDPCTPI; 2 -> PCECTPI; 3 -> CPIAUCSL; 4 -> CPILFESL) +#' # Set Target Variable +#' y <- inflation_data[, 1] +#' +#' # Set 'P-Signals' +#' X <- inflation_data[, 2:442] #' -#' # Subset Data (keep only data relevant for target variable i) -#' dataset <- inflation_data[, c(1+(i-1), # Target Variable -#' 5+(i-1), # Lag 1 -#' 9+(i-1), # Lag 2 -#' (13:16)[-i], # Remaining Price Series -#' 17:452, # Exogenous Predictor Variables -#' seq(453+(i-1)*16,468+(i-1)*16))] # Ext. Point Forecasts +#' # Set 'F-Signals' +#' Ext_F <- inflation_data[, 443:462] #' -#' ########## STSC ########## -#' # Set Target Variable -#' y <- dataset[, 1, drop = FALSE] +#' # Get Dates and Number of Observations +#' tdates <- rownames(inflation_data) +#' tlength <- length(tdates) +#' +#' # First complete observation (no missing values) +#' first_complete <- which(complete.cases(inflation_data))[1] +#' +#' ########## Rolling AR2-Benchmark ########## +#' # Set up matrix for predictions +#' benchmark <- matrix(NA, nrow = tlength, +#' ncol = 1, dimnames = list(tdates, "AR2")) +#' +#' # Set Window-Size (15 years of quarterly data) +#' window_size <- 15 * 4 #' -#' # Set 'Simple' Signals -#' X <- dataset[, 2:442, drop = FALSE] +#' # Time Sequence +#' t_seq <- seq(window_size, tlength - 1) #' -#' # Set External Point Forecasts (Koop & Korobilis 2023) -#' Ext_F <- dataset[, 443:458, drop = FALSE] +#' # Loop with rolling window +#' for (t in t_seq) { #' -#' # Set Dates -#' dates <- rownames(dataset) +#' # Split Data for Training Train Data +#' x_train <- cbind(int = 1, X[(t - window_size + 1):t, 1:2]) +#' y_train <- y[(t - window_size + 1):t] #' +#' # Split Data for Prediction +#' x_pred <- cbind(int = 1, X[t + 1, 1:2, drop = FALSE]) +#' +#' # Fit AR-Model +#' model_ar <- .lm.fit(x_train, y_train) +#' +#' # Predict and store in benchmark matrix +#' benchmark[t + 1, ] <- x_pred %*% model_ar$coefficients +#' } +#' +#' ########## STSC ########## #' # Set TV-C-Parameter -#' sample_length <- 4 * 5 -#' lambda_grid <- c(0.90, 0.95, 1) -#' kappa_grid <- 0.98 -#' burn_in_tvc <- 79 -#' bias <- TRUE +#' init <- 5 * 4 +#' lambda_grid <- c(0.90, 0.95, 1.00) +#' kappa_grid <- c(0.94, 0.96, 0.98) +#' bias <- TRUE #' #' # Set DSC-Parameter -#' gamma_grid <- c(0.40, 0.50, 0.60, 0.70, 0.80, 0.90, -#' 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, -#' 0.97, 0.98, 0.99, 1.00) -#' psi_grid <- c(1:100) -#' delta <- 0.95 -#' burn_in_dsc <- 1 -#' method <- 1 +#' gamma_grid <- c(0.40, 0.50, 0.60, 0.70, 0.80, 0.90, +#' 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.00) +#' n_tvc <- (ncol(X) + ncol(Ext_F)) * length(lambda_grid) * length(kappa_grid) +#' psi_grid <- c(1:100, sapply(1:4, function(i) floor(i * n_tvc / 4))) +#' delta <- 0.95 +#' burn_in <- first_complete + init / 2 +#' burn_in_dsc <- 1 +#' metric <- 5 #' equal_weight <- TRUE -#' incl <- NULL -#' parallel <- FALSE -#' n_threads <- NULL +#' incl <- NULL +#' parallel <- FALSE +#' n_threads <- NULL #' #' # Apply STSC-Function #' results <- hdflex::stsc(y, #' X, #' Ext_F, -#' sample_length, +#' init, #' lambda_grid, #' kappa_grid, -#' burn_in_tvc, #' bias, #' gamma_grid, #' psi_grid, #' delta, +#' burn_in, #' burn_in_dsc, -#' method, +#' metric, #' equal_weight, #' incl, #' parallel, #' n_threads, -#' NULL, -#' NULL, #' NULL) #' -#' # Assign DSC-Results -#' forecast_stsc <- results[[1]] -#' variance_stsc <- results[[2]] -#' chosen_gamma <- results[[3]] -#' chosen_psi <- results[[4]] -#' chosen_signals <- results[[5]] -#' -#' # Define Evaluation Period -#' eval_date_start <- "1991-01-01" -#' eval_date_end <- "2021-12-31" -#' eval_period_idx <- which(dates > eval_date_start & dates <= eval_date_end) +#' ########## Evaluation ########## +#' # Define Evaluation Period (OOS-Period) +#' eval_period <- which(tdates >= "1991-04-01" & tdates <= "2021-12-01") #' -#' # Trim Objects -#' oos_y <- y[eval_period_idx, ] -#' oos_forecast_stsc <- forecast_stsc[eval_period_idx] -#' oos_variance_stsc <- variance_stsc[eval_period_idx] -#' oos_chosen_gamma <- chosen_gamma[eval_period_idx] -#' oos_chosen_psi <- chosen_psi[eval_period_idx] -#' oos_chosen_signals <- chosen_signals[eval_period_idx, , drop = FALSE] -#' oos_dates <- dates[eval_period_idx] +#' # Get Evaluation Summary for STSC +#' eval_results <- summary(obj = results, eval_period = eval_period) #' -#' # Add Dates -#' names(oos_forecast_stsc) <- oos_dates -#' names(oos_variance_stsc) <- oos_dates -#' names(oos_chosen_gamma) <- oos_dates -#' names(oos_chosen_psi) <- oos_dates -#' rownames(oos_chosen_signals) <- oos_dates +#' # Calculate (Mean-)Squared-Errors for AR2-Benchmark +#' se_ar2 <- (y[eval_period] - benchmark[eval_period, 1])^2 +#' mse_ar2 <- mean(se_ar2) #' -#' ### Part 2: Evaluation ### -#' # Apply Summary-Function -#' summary_results <- summary_stsc(oos_y, -#' benchmark_ar2[, i], -#' oos_forecast_stsc) +#' # Create CSSED-Plot +#' cssed <- cumsum(se_ar2 - eval_results$MSE[[2]]) +#' plot_cssed <- ggplot(data = data.frame(eval_period, cssed), aes(x = eval_period, y = cssed)) + +#' geom_line() + +#' ylim(-0.0008, 0.0008) + +#' ggtitle("Cumulative Squared Error Differences") + +#' xlab("Time Index") + +#' ylab("CSSED") + +#' geom_hline(yintercept = 0, linetype = "dashed", color = "darkgray") + +#' theme_minimal(base_size = 15) + +#' theme( +#' panel.grid.major = element_blank(), +#' panel.grid.minor = element_blank(), +#' panel.border = element_rect(colour = "black", fill = NA), +#' axis.ticks = element_line(colour = "black"), +#' plot.title = element_text(hjust = 0.5) +#' ) #' -#' # Assign Summary-Results -#' cssed <- summary_results[[3]] -#' mse <- summary_results[[4]] +#' # Show Plots +#' options(repr.plot.width = 15, repr.plot.height = 15) +#' plots_list <- eval_results$Plots +#' plots_list <- c(list(plot_cssed), plots_list) +#' cowplot::plot_grid(plotlist = plots_list, ncol = 2, nrow = 3, align = "hv") #' -#' ########## Results ########## #' # Relative MSE -#' print(paste("Relative MSE:", round(mse[[1]] / mse[[2]], 4))) -#' -#' # Plot CSSED -#' plot(x = as.Date(oos_dates), -#' y = cssed, -#' ylim = c(-0.0008, 0.0008), -#' main = "Cumulated squared error differences", -#' type = "l", -#' lwd = 1.5, -#' xlab = "Date", -#' ylab = "CSSED") + abline(h = 0, lty = 2, col = "darkgray") -#' -#' # Plot Predictive Signals -#' vec <- seq_len(dim(oos_chosen_signals)[2]) -#' mat <- oos_chosen_signals %*% diag(vec) -#' mat[mat == 0] <- NA -#' matplot(x = as.Date(oos_dates), -#' y = mat, -#' cex = 0.4, -#' pch = 20, -#' type = "p", -#' main = "Evolution of selected signal(s)", -#' xlab = "Date", -#' ylab = "Predictive Signal") -#' -#' # Plot Psi -#' plot(x = as.Date(oos_dates), -#' y = oos_chosen_psi, -#' ylim = c(1, 100), -#' main = "Evolution of the subset size", -#' type = "p", -#' cex = 0.75, -#' pch = 20, -#' xlab = "Date", -#' ylab = "Psi") +#' print(paste("Relative MSE:", round(eval_results$MSE[[1]] / mse_ar2, 4))) #' } -### New STSC-Function +### STSC-Function stsc <- function(y, X, Ext_F, - sample_length, + init, lambda_grid, kappa_grid, - burn_in_tvc, bias, gamma_grid, psi_grid, delta, + burn_in, burn_in_dsc, - method, + metric, equal_weight, incl, parallel = FALSE, n_threads = parallel::detectCores() - 2, - risk_aversion = NULL, - min_weight = NULL, - max_weight = NULL) { + portfolio_params = NULL) { + + # Convert y to matrix + y <- as.matrix(y, ncol = 1) ######################################################## ### Checkmate - # Check if y is numeric vector without missing / infinite values - checkmate::assertNumeric(y, - min.len = 1, - any.missing = FALSE, - finite = TRUE) - - # Either x or f must not be null - checkmate::assert(checkmate::checkMatrix(X), - checkmate::checkMatrix(Ext_F), - combine = "or") - - # Check if x is numeric matrix and has the same number of observations as y - checkmate::assertMatrix(X, - mode = "numeric", - nrow = length(y), - null.ok = TRUE) - - # Check if Ext_F is numeric matrix and has the same number of observations as y - checkmate::assertMatrix(Ext_F, - mode = "numeric", - nrow = length(y), - null.ok = TRUE) - - # Check if sample_length is Integer between 2 and N - checkmate::assertInt(sample_length, - lower = 2, - upper = length(y)) - - # Check if lambda_grid is numeric vector with values between 0 and 1 - checkmate::assertNumeric(lambda_grid, - lower = exp(-10), - upper = 1, - min.len = 1, - any.missing = FALSE, - finite = TRUE) - - # Check if kappa_grid is numeric vector with values between 0 and 1 - checkmate::assertNumeric(kappa_grid, - lower = exp(-10), - upper = 1, - min.len = 1, - any.missing = FALSE, - finite = TRUE) - - # Check if bias is Boolean - checkmate::assertLogical(bias, - len = 1, - any.missing = FALSE) - - # Check if gamma_grid is numeric vector with values between 0 and 1 - checkmate::assertNumeric(gamma_grid, - lower = exp(-10), - upper = 1, - min.len = 1, - any.missing = FALSE, - finite = TRUE) - - # Check if psi_grid is Integer vector with values between 1 and ncol(mu_mat) - checkmate::assertIntegerish(psi_grid, - lower = 1, - upper = (ncol(cbind(if (exists("X")) X, - if (exists("Ext_F")) Ext_F)) * - length(lambda_grid) * length(kappa_grid)), - min.len = 1, - any.missing = FALSE, - null.ok = FALSE) - - # Check if delta is Numeric between 0 and 1 - checkmate::assertNumber(delta, - lower = exp(-10), - upper = 1, - finite = TRUE, - na.ok = FALSE, - null.ok = FALSE) - - # Check if burn_in_tvc is Integer between 1 and N - checkmate::assertInt(burn_in_tvc, - lower = 1, - upper = length(y), - na.ok = FALSE, - null.ok = FALSE) - - # Check if burn_in_dsc is Integer between 1 and N - checkmate::assertInt(burn_in_dsc, - lower = 1, - upper = length(y), - na.ok = FALSE, - null.ok = FALSE) - - # Check if method is element of set {1, 2, 3, 4, 5} - checkmate::assertChoice(method, - c(1, 2, 3, 4, 5), - null.ok = FALSE) - - # Check if equal_weight is Boolean - checkmate::assertLogical(equal_weight, - len = 1, - any.missing = FALSE) - - # Check if method == 4: risk_aversion, min_weight & max_weight are given ... - # ... & y are returns - if (method == 4) { - - # Check if returns - checkmate::assertNumeric(y, lower = -1) # -> check - - # Check if not NULL - checkmate::assert(checkmate::checkNumber(risk_aversion, na.ok = FALSE), - checkmate::checkNumber(min_weight, na.ok = FALSE), - checkmate::checkNumber(max_weight, na.ok = FALSE), - combine = "and") - } + # Check if numeric vector without missing or infinite values + assertNumeric(y, min.len = 1, any.missing = FALSE, finite = TRUE) + + # Either X or Ext_F must not be null + assert(checkMatrix(X), checkMatrix(Ext_F), combine = "or") + + # Check if numeric matrices and have same length as y + assertMatrix(X, mode = "numeric", nrows = nrow(y), null.ok = TRUE) + assertMatrix(Ext_F, mode = "numeric", nrows = nrow(y), null.ok = TRUE) + + # Check if integer between 2 and the length of y + assertInt(init, lower = 2, upper = nrow(y)) + + # Check if numeric vectors with values between exp(-10) and 1 + assertNumeric(lambda_grid, lower = exp(-10), upper = 1, min.len = 1, any.missing = FALSE, finite = TRUE) + assertNumeric(kappa_grid, lower = exp(-10), upper = 1, min.len = 1, any.missing = FALSE, finite = TRUE) + assertNumeric(gamma_grid, lower = exp(-10), upper = 1, min.len = 1, any.missing = FALSE, finite = TRUE) + + # Check if boolean + assertLogical(bias, len = 1, any.missing = FALSE) + assertLogical(equal_weight, len = 1, any.missing = FALSE) + + # Check if integer vector with values between 1 and J + assertIntegerish( + psi_grid, + lower = 1, + upper = ( + ncol(cbind(if (exists("X")) X, if (exists("Ext_F")) Ext_F)) * + length(lambda_grid) * length(kappa_grid) + ), + min.len = 1, + any.missing = FALSE, + null.ok = FALSE + ) + + # Check if numeric value between exp(-10) and 1 + assertNumber(delta, lower = exp(-10), upper = 1, finite = TRUE, na.ok = FALSE, null.ok = FALSE) + + # Check if integers between 1 and the length of y + assertInt(burn_in, lower = 1, upper = nrow(y), na.ok = FALSE, null.ok = FALSE) + assertInt(burn_in_dsc, lower = 1, upper = nrow(y), na.ok = FALSE, null.ok = FALSE) + + # Check if element of the set {1, 2, 3, 4, 5} + assertChoice(metric, c(1, 2, 3, 4, 5), null.ok = FALSE) + + # Additional checks if metric == 4 + if (metric == 4) { - # Check if risk_aversion is Numeric and at least 0.0 - checkmate::assertNumber(risk_aversion, - lower = 0, - upper = Inf, - null.ok = TRUE, - na.ok = FALSE) - - # Check if min_weight Number smaller than max_weight - checkmate::assertNumber(min_weight, - lower = -Inf, - upper = max_weight, - null.ok = TRUE, - na.ok = FALSE) - - # Check if max_weight is Number greater than min_weight - checkmate::assertNumber(max_weight, - lower = min_weight, - upper = Inf, - null.ok = TRUE, - na.ok = FALSE) + # Check if "returns" + assertNumeric(y, lower = -1) + + # Check if numeric vector of length 3 + assertNumeric(portfolio_params, len = 3, any.missing = FALSE) + + # Extract values from portfolio_params + risk_aversion <- portfolio_params[1] + min_weight <- portfolio_params[2] + max_weight <- portfolio_params[3] + + # Check if numeric value and at least 0.0 + assertNumber(risk_aversion, lower = 0, upper = Inf) + + # Check if min_weight is a number smaller than max_weight + assertNumber(min_weight, lower = -Inf, upper = max_weight) + + # Check if max_weight is a number greater than min_weight + assertNumber(max_weight, lower = min_weight, upper = Inf) + } # Check if there are only NA values in any row - all_na <- any(apply(cbind(if (exists("X")) X, - if (exists("Ext_F")) Ext_F), - 1, function(x) sum(is.na(x)) == length(x))) - checkmate::assertFALSE(all_na) - - # Check if there are any Na-values not from the start - non_consec_na <- any(apply(is.na(cbind(if (exists("X")) X, - if (exists("Ext_F")) Ext_F)), - 2, function(x) { - sum(abs(diff(x))) > 1 | - sum(diff(x)) == 1 })) - checkmate::assertFALSE(non_consec_na) - - # Check Keep Argument I: Nullable Integer Vector - checkmate::assertIntegerish(incl, - lower = 1, - upper = (ncol(cbind(if (exists("X")) X, - if (exists("Ext_F")) Ext_F)) * - length(lambda_grid) * length(kappa_grid)), - null.ok = TRUE) - - # Check Keep Argument II: Minimum Psi must match keep argument + all_na <- any( + apply( + cbind(if (exists("X")) X, if (exists("Ext_F")) Ext_F), 1, + function(x) sum(is.na(x)) == length(x) + ) + ) + assertFALSE(all_na) + + # Check if there are any non-consecutive NA values + non_consec_na <- any( + apply( + is.na(cbind(if (exists("X")) X, if (exists("Ext_F")) Ext_F)), 2, + function(x) sum(abs(diff(x))) > 1 | sum(diff(x)) == 1 + ) + ) + assertFALSE(non_consec_na) + + # Check if nullable integer vector + assertIntegerish( + incl, + lower = 1, + upper = ( + ncol(cbind(if (exists("X")) X, if (exists("Ext_F")) Ext_F)) * + length(lambda_grid) * length(kappa_grid) + ), + null.ok = TRUE + ) + + # Check if minimum psi matches the keep argument if (!is.null(incl)) { - checkmate::assertTRUE(min(psi_grid) >= length(incl) * length(lambda_grid) * length(kappa_grid)) + assertTRUE( + min(psi_grid) >= length(incl) * length(lambda_grid) * length(kappa_grid) + ) } - # Check Keep Argument III: No NA-Values in Keep-Columns + # Check if there are no NA values in the keep columns if (!is.null(incl)) { - checkmate::assertFALSE(any(is.na(cbind(if (exists("X")) X, if (exists("Ext_F")) Ext_F)[, incl, drop = FALSE]))) + assertFALSE( + any( + is.na(cbind(if (exists("X")) X, if (exists("Ext_F")) Ext_F)[, incl, drop = FALSE]) + ) + ) } # Check if any column in X is constant for the first observations if (!is.null(X)) { - if (any(apply(X, 2, function(x) length(unique(na.omit(x)[1:sample_length])) == 1))) { - print("One or more columns in X are constant for the first 1:sample_length observations.") + if (any(apply(X, 2, function(x) length(unique(na.omit(x)[1:init])) == 1))) { + print("One or more columns in X are constant for the first 1:init observations.") } } # Check if any column in Ext_F is constant for the first observations if (!is.null(Ext_F)) { - if (any(apply(Ext_F, 2, function(x) length(unique(na.omit(x)[1:sample_length])) == 1))) { - print("One or more columns in Ext_F are constant for the first 1:sample_length observations.") + if (any(apply(Ext_F, 2, function(x) length(unique(na.omit(x)[1:init])) == 1))) { + print("One or more columns in Ext_F are constant for the first 1:init observations.") } } - ######################################################## - - # Convert y to matrix - y <- as.matrix(y, ncol = 1) + ######################################################## # Parallel or Single Core if (parallel) { if (is.null(n_threads)) { @@ -500,43 +450,39 @@ stsc <- function(y, stsc_results <- stsc_loop_par_(y, X, Ext_F, - sample_length, + init, lambda_grid, kappa_grid, - burn_in_tvc, bias, gamma_grid, psi_grid, delta, + burn_in, burn_in_dsc, - method, + metric, equal_weight, incl, n_threads, - risk_aversion, - min_weight, - max_weight) + portfolio_params) } else { # Apply Single-Core-Rcpp-Function stsc_results <- stsc_loop_(y, X, Ext_F, - sample_length, + init, lambda_grid, kappa_grid, - burn_in_tvc, bias, gamma_grid, psi_grid, delta, + burn_in, burn_in_dsc, - method, + metric, equal_weight, incl, - risk_aversion, - min_weight, - max_weight) + portfolio_params) } # Assign Results @@ -549,8 +495,8 @@ stsc <- function(y, rm(list = c("stsc_results")) # Get Values for Gamma & Psi - para_grid <- expand.grid(psi_grid, gamma_grid) - chosen_psi <- para_grid[stsc_comb_mod + 1, 1] + para_grid <- expand.grid(psi_grid, gamma_grid) + chosen_psi <- para_grid[stsc_comb_mod + 1, 1] chosen_gamma <- para_grid[stsc_comb_mod + 1, 2] # P-Signal / F-Signal Names @@ -565,24 +511,21 @@ stsc <- function(y, stringsAsFactors = FALSE) # Set up matrix for selected signals - chosen_signals <- matrix(0, - nrow = nrow(y), + chosen_signals <- matrix(0, nrow = nrow(y), ncol = length(signal_names), dimnames = list(NULL, signal_names)) # Set up matrix for selected lambda - chosen_lambda <- matrix(0, - nrow = nrow(y), + chosen_lambda <- matrix(0, nrow = nrow(y), ncol = length(lambda_grid), dimnames = list(NULL, lambda_grid)) # Set up matrix for selected kappa - chosen_kappa <- matrix(0, - nrow = nrow(y), + chosen_kappa <- matrix(0, nrow = nrow(y), ncol = length(kappa_grid), dimnames = list(NULL, kappa_grid)) # Fill matrices - for (t in seq(max(burn_in_tvc, burn_in_dsc) + 1, nrow(y))) { + for (t in seq(max(burn_in, burn_in_dsc) + 1, nrow(y))) { # Select Signals col_names <- signal_grid[stsc_cand_mod[[t]] + 1, 1] @@ -598,11 +541,37 @@ stsc <- function(y, } # Return Results - return(list(Point_Forecast = stsc_forecast, - Variance = stsc_variance, - Gamma = chosen_gamma, - Psi = chosen_psi, - Signals = chosen_signals, - Lambda = chosen_lambda, - Kappa = chosen_kappa)) -} \ No newline at end of file + return( + structure( + list( + Forecasts = list( + Realization = y, + Point_Forecasts = stsc_forecast, + Variance_Forecasts = stsc_variance + ), + Tuning_Parameters = list( + Gamma = chosen_gamma, + Psi = chosen_psi, + Signals = chosen_signals, + Lambda = chosen_lambda, + Kappa = chosen_kappa + ), + Model = list( + Lambda_grid = lambda_grid, + Kappa_grid = kappa_grid, + Gamma_grid = gamma_grid, + Psi_grid = psi_grid, + Delta = delta, + Init = init, + Burn_in = burn_in, + Burn_in_dsc = burn_in_dsc, + Metric = metric, + Equal_weight = equal_weight, + Bias = bias, + Incl = incl + ) + ), + class = "stsc_obj" + ) + ) +} diff --git a/R/summary_stsc.R b/R/summary_stsc.R deleted file mode 100644 index 9c825ea..0000000 --- a/R/summary_stsc.R +++ /dev/null @@ -1,96 +0,0 @@ -#' @name summary_stsc -#' @title Statistical summary of the STSC-results -#' @description `summary_stsc()` returns a statistical summary -#' of the results from dsc(). It provides statistical measures -#' such as Clark-West-Statistic, OOS-R2, Mean-Squared-Error and -#' Cumulated Sum of Squared-Error-Differences. -#' @param oos_y A matrix of dimension `T * 1` or numeric vector of length `T` -#' containing the out-of-sample observations of the target variable. -#' @param oos_benchmark A matrix of dimension `T * 1` or -#' numeric vector of length `T` containing the -#' out-of-sample forecasts of an arbitrary benchmark -#' (i.e. prevailing historical mean). -#' @param oos_forecast_stsc A matrix of dimension `T * 1` -#' or numeric vector of length `T` containing the -#' out-of-sample forecasts of dsc(). -#' @export -#' @return List that contains: -#' * (1) the Clark-West-Statistic, -#' * (2) the Out-of-Sample R2, -#' * (3) a vector with the CSSED between the STSC-Forecast and the benchmark and -#' * (4) a list with the MSE of the STSC-Model and the benchmark. -#' @seealso \url{https://github.com/lehmasve/hdflex#readme} -#' @author Philipp Adämmer, Sven Lehmann, Rainer Schüssler -#' @references -#' Clark, T. E. and West, K. D. (2007) "Approximately normal tests for equal predictive accuracy in nested models." -#' \emph{Journal of Econometrics}, 138 (1): 291–311. -#' -#' @import checkmate -#' @importFrom stats t.test -#' @examples -#'\donttest{ -#' -#' # See example for tvc(). -#' -#' } - -### Evaluation -summary_stsc <- function(oos_y, - oos_benchmark, - oos_forecast_stsc) { - - ### Checkmate - # Check if oos_y is numeric vector without missing / infinite values - checkmate::assertNumeric(oos_y, - min.len = 1, - any.missing = FALSE, - finite = TRUE) - - # Check if oos_benchmark is numeric vector without missing / infinite values - checkmate::assertNumeric(oos_benchmark, - len = length(oos_y), - any.missing = FALSE, - finite = TRUE) - - # Check if oos_forecast_stsc is numeric vector without missing / infinite values - checkmate::assertNumeric(oos_forecast_stsc, - len = length(oos_y), - any.missing = FALSE, - finite = TRUE) - - ### 1) Clark-West and OOS-R2 - # Squared Error (SE) Target-Variable vs. Benchmark - se_benchmark <- (oos_y - oos_benchmark) ** 2 - - # SE Target-Variable vs. Dynamic Subset Combination - se_stsc <- (oos_y - oos_forecast_stsc) ** 2 - - # SE Benchmark vs. Dynamic Subset Combination - se_benchmark_stsc <- (oos_benchmark - oos_forecast_stsc) ** 2 - - # SED Benchmark vs. Dynamic Subset Combination - sed_stsc <- se_benchmark - se_stsc - - # Cumulated SED - cssed <- cumsum(sed_stsc) - - # Clark-West-Statistic - cw_statistic <- se_benchmark - se_stsc + se_benchmark_stsc - cw_t <- stats::t.test(cw_statistic, - mu = 0, - alternative = "greater")$statistic - - # Out-of-Sample R2 - oos_r2 <- 1 - sum(se_stsc) / sum(se_benchmark) - - # Mean-Squared Error - mse_stsc <- mean(se_stsc) - mse_benchmark <- mean(se_benchmark) - mse <- list(STSC = mse_stsc, Benchmark = mse_benchmark) - - # Return Results - return(list(cw_t, - oos_r2, - cssed, - mse)) -} \ No newline at end of file diff --git a/R/tvc.R b/R/tvc.R index 190fbb5..0fef8e8 100644 --- a/R/tvc.R +++ b/R/tvc.R @@ -1,68 +1,73 @@ #' @name tvc -#' @title Compute density forecasts based on univariate time-varying -#' coefficient (TV-C) models in state-space form -#' @description `tvc()` can be used to generate density forecasts based on -#' univariate time-varying coefficient models. In each forecasting model, -#' we include an intercept and one predictive signal. The predictive signal -#' either represents the value of a 'simple' signal -#' or the the value of an external point forecast. -#' All models are estimated independently from each other and +#' @title Compute density forecasts using univariate time-varying coefficient (TV-C) models +#' @description The `tvc()` function generates density forecasts +#' based on univariate time-varying coefficient models in state-space form. +#' Each forecasting model includes an intercept and one predictive signal, +#' which can either be a 'P-signal' or 'F-signal'. +#' All models are estimated independently and both #' estimation and forecasting are carried out recursively. #' @param y A matrix of dimension `T * 1` or numeric vector of length `T` #' containing the observations of the target variable. -#' @param X A matrix with `T` rows containing -#' the lagged 'simple' signals in each column. -#' Use NULL if no 'simple' signal shall be included. -#' @param Ext_F A matrix with `T` rows containing -#' point forecasts of y in each column. -#' Use NULL if no point forecasts shall be included. -#' @param sample_length An integer that denotes the number of observations used -#' to initialize the observational variance and the coefficients' variance. -#' @param lambda_grid A numeric vector denoting the discount factor(s) -#' that control the dynamics of the coefficients. -#' Each signal in combination with each value of +#' @param X A matrix with `T` rows containing the lagged 'P-signals' in each column. +#' Use `NULL` if no (external) 'P-signal' is to be included. +#' @param Ext_F A matrix with `T` rows containing the (external) 'F-signals' in each column. +#' For 'F-Signals', the slope of the TV-C models is fixed to 1. +#' Use `NULL` if no (external) 'F-signal' is to be included. +#' @param init An integer that denotes the number of observations used +#' to initialize the observational variance and the coefficients' variance +#' in the TV-C models. +#' @param lambda_grid A numeric vector which takes values between 0 and 1 +#' denoting the discount factor(s) that control the dynamics of the time-varying +#' coefficients. Each signal in combination with each value of #' lambda provides a separate candidate forecast. #' Constant coefficients are nested for the case `lambda = 1`. -#' @param kappa_grid A numeric vector to accommodate time-varying volatility. -#' The observational variance is estimated via -#' Exponentially Weighted Moving Average. +#' @param kappa_grid A numeric vector which takes values between 0 and 1 +#' to accommodate time-varying volatility in the TV-C models. +#' The observational variance is estimated via Exponentially Weighted Moving Average +#' and kappa denotes the underlying decay factor. #' Constant variance is nested for the case `kappa = 1`. #' Each signal in combination with each value of -#' kappa provides a separate forecast. -#' @param bias A boolean to indicate whether the TVC-Models should -#' perform a bias correction to external point forecasts -#' (TRUE -> time-varying intercept) or -#' take it 'as is' (FALSE -> constant intercept of 0.0). -#' @return A list that contains: -#' -#' * (1) a matrix with the first moments (point forecasts) -#' of the conditionally normal predictive distributions and -#' -#' * (2) a matrix with the second moments (variance) -#' of the conditionally normal predictive distributions. -#' +#' kappa provides a separate candidate density forecast. +#' For the values of kappa, we follow the recommendation +#' of RiskMetrics (Reuters, 1996). +#' @param bias A boolean to indicate whether the TV-C-models +#' allow for a bias correction to F-signals. +#' `TRUE` allows for a time-varying intercept, and `FALSE` sets (and fixes) the intercept to 0. +#' @return A list containing: +#' \describe{ +#' \item{Forecasts}{A list containing: +#' \describe{ +#' \item{Realization: }{A vector with the actual values of the target variable.} +#' \item{Point_Forecasts: }{A vector with the first moments of the predictive densities.} +#' \item{Variance_Forecasts: }{A vector with the second moments of the predictive densities.} +#' } +#' } +#' \item{Model}{A list containing: +#' \describe{ +#' \item{Lambda_grid}{The grid of lambda values used in the model.} +#' \item{Kappa_grid}{The grid of kappa values used in the model.} +#' \item{Init}{The init value used in the model.} +#' \item{Bias}{A boolean indicating if bias correct was applied to F-signals.} +#' } +#' } +#' } #' @seealso \url{https://github.com/lehmasve/hdflex#readme} #' @author Philipp Adämmer, Sven Lehmann, Rainer Schüssler #' @references -#' Beckmann, J., Koop, G., Korobilis, D., and Schüssler, R. A. (2020) "Exchange rate predictability and dynamic bayesian learning." -#' \emph{Journal of Applied Econometrics}, 35 (4): 410–421. +#' Beckmann, J., Koop, G., Korobilis, D., and Schüssler, R. A. (2020) "Exchange rate predictability and dynamic bayesian learning." \emph{Journal of Applied Econometrics}, 35 (4): 410–421. +#' +#' Dangl, T. and Halling, M. (2012) "Predictive regressions with time-varying coefficients." \emph{Journal of Financial Economics}, 106 (1): 157–181. #' -#' Dangl, T. and Halling, M. (2012) "Predictive regressions with time-varying coefficients." -#' \emph{Journal of Financial Economics}, 106 (1): 157–181. +#' Del Negro, M., Hasegawa, R. B., and Schorfheide, F. (2016) "Dynamic prediction pools: An investigation of financial frictions and forecasting performance." \emph{Journal of Econometrics}, 192 (2): 391–405. #' -#' Koop, G. and Korobilis, D. (2012) "Forecasting inflation using dynamic model averaging." -#' \emph{International Economic Review}, 53 (3): 867–886. +#' Koop, G. and Korobilis, D. (2012) "Forecasting inflation using dynamic model averaging." \emph{International Economic Review}, 53 (3): 867–886. #' -#' Koop, G. and Korobilis, D. (2023) "Bayesian dynamic variable selection in high dimensions." -#' \emph{International Economic Review}. +#' Koop, G. and Korobilis, D. (2023) "Bayesian dynamic variable selection in high dimensions." \emph{International Economic Review}. #' -#' Raftery, A. E., Kárn`y, M., and Ettler, P. (2010) "Online prediction under model uncertainty via dynamic model averaging: Application to a cold rolling mill." -#' \emph{Technometrics}, 52 (1): 52–66. +#' Raftery, A. E., Kárn`y, M., and Ettler, P. (2010) "Online prediction under model uncertainty via dynamic model averaging: Application to a cold rolling mill." \emph{Technometrics}, 52 (1): 52–66. #' -#' West, M. and Harrison, J. (1997) "Bayesian forecasting and dynamic models" -#' \emph{Springer}, 2nd edn. +#' West, M. and Harrison, J. (1997) "Bayesian forecasting and dynamic models" \emph{Springer}, 2nd edn. #' @export -#' @import parallel #' @import checkmate #' @importFrom stats na.omit #' @examples @@ -74,236 +79,220 @@ #' #### details regarding the data & external forecasts #### #' ######################################################### #' -#' # Packages +#' # Load Package #' library("hdflex") +#' library("ggplot2") +#' library("cowplot") #' #' ########## Get Data ########## -#' # Load Data +#' # Load Package Data #' inflation_data <- inflation_data -#' benchmark_ar2 <- benchmark_ar2 #' -#' # Set Index for Target Variable -#' i <- 1 # (1 -> GDPCTPI; 2 -> PCECTPI; 3 -> CPIAUCSL; 4 -> CPILFESL) +#' # Set Target Variable +#' y <- inflation_data[, 1] #' -#' # Subset Data (keep only data relevant for target variable i) -#' dataset <- inflation_data[, c(1+(i-1), # Target Variable -#' 5+(i-1), # Lag 1 -#' 9+(i-1), # Lag 2 -#' (13:16)[-i], # Remaining Price Series -#' 17:452, # Exogenous Predictor Variables -#' seq(453+(i-1)*16,468+(i-1)*16))] # Ext. Point Forecasts +#' # Set 'P-Signals' +#' X <- inflation_data[, 2:442] #' -#' ########## STSC ########## -#' ### Part 1: TV-C Model ### -#' # Set Target Variable -#' y <- dataset[, 1, drop = FALSE] +#' # Set 'F-Signals' +#' Ext_F <- inflation_data[, 443:462] +#' +#' # Get Dates and Number of Observations +#' tdates <- rownames(inflation_data) +#' tlength <- length(tdates) +#' +#' # First complete observation (no missing values) +#' first_complete <- which(complete.cases(inflation_data))[1] +#' +#' ########## Rolling AR2-Benchmark ########## +#' # Set up matrix for predictions +#' benchmark <- matrix(NA, nrow = tlength, +#' ncol = 1, dimnames = list(tdates, "AR2")) +#' +#' # Set Window-Size (15 years of quarterly data) +#' window_size <- 15 * 4 +#' +#' # Time Sequence +#' t_seq <- seq(window_size, tlength - 1) +#' +#' # Loop with rolling window +#' for (t in t_seq) { +#' +#' # Split Data for Training Train Data +#' x_train <- cbind(int = 1, X[(t - window_size + 1):t, 1:2]) +#' y_train <- y[(t - window_size + 1):t] #' -#' # Set 'Simple' Signals -#' X <- dataset[, 2:442, drop = FALSE] +#' # Split Data for Prediction +#' x_pred <- cbind(int = 1, X[t + 1, 1:2, drop = FALSE]) #' -#' # Set External Point Forecasts (Koop & Korobilis 2023) -#' Ext_F <- dataset[, 443:458, drop = FALSE] +#' # Fit AR-Model +#' model_ar <- .lm.fit(x_train, y_train) #' +#' # Predict and store in benchmark matrix +#' benchmark[t + 1, ] <- x_pred %*% model_ar$coefficients +#' } +#' +#' ########## STSC ########## +#' ### Part 1: TVC-Function #' # Set TV-C-Parameter -#' sample_length <- 4 * 5 -#' lambda_grid <- c(0.90, 0.95, 1) -#' kappa_grid <- 0.98 -#' bias <- TRUE -#' -#' # Apply TV-C-Function -#' results <- hdflex::tvc(y, -#' X, -#' Ext_F, -#' sample_length, -#' lambda_grid, -#' kappa_grid, -#' bias) -#' -#' # Assign TV-C-Results -#' forecast_tvc <- results[[1]] -#' variance_tvc <- results[[2]] -#' -#' # Define Burn-In Period -#' sample_period_idx <- 80:nrow(dataset) -#' sub_forecast_tvc <- forecast_tvc[sample_period_idx, , drop = FALSE] -#' sub_variance_tvc <- variance_tvc[sample_period_idx, , drop = FALSE] -#' sub_y <- y[sample_period_idx, , drop = FALSE] -#' sub_dates <- rownames(dataset)[sample_period_idx] -#' -#' ### Part 2: Dynamic Subset Combination ### +#' init <- 5 * 4 +#' lambda_grid <- c(0.90, 0.95, 1.00) +#' kappa_grid <- c(0.94, 0.96, 0.98) +#' bias <- TRUE +#' +#' # Apply TVC-Function +#' tvc_results <- hdflex::tvc(y, +#' X, +#' Ext_F, +#' init, +#' lambda_grid, +#' kappa_grid, +#' bias) +#' +#' # Assign TVC-Results +#' forecast_tvc <- tvc_results$Forecasts$Point_Forecasts +#' variance_tvc <- tvc_results$Forecasts$Variance_Forecasts +#' +#' # First complete forecast period (no missing values) +#' sub_period <- seq(which(complete.cases(forecast_tvc))[1], tlength) +#' +#' ### Part 2: DSC-Function #' # Set DSC-Parameter -#' nr_mods <- ncol(sub_forecast_tvc) -#' gamma_grid <- c(0.40, 0.50, 0.60, 0.70, 0.80, 0.90, -#' 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.00) -#' psi_grid <- c(1:100) -#' delta <- 0.95 -#' n_cores <- 1 +#' gamma_grid <- c(0.40, 0.50, 0.60, 0.70, 0.80, 0.90, +#' 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.00) +#' psi_grid <- c(1:100, sapply(1:4, function(i) floor(i * ncol(forecast_tvc) / 4))) +#' delta <- 0.95 +#' burn_in_tvc <- (init / 2) + 1 +#' burn_in_dsc <- 1 +#' metric <- 5 +#' equal_weight <- TRUE +#' incl <- NULL #' #' # Apply DSC-Function -#' results <- hdflex::dsc(gamma_grid, -#' psi_grid, -#' sub_y, -#' sub_forecast_tvc, -#' sub_variance_tvc, -#' delta, -#' n_cores) +#' dsc_results <- hdflex::dsc(y[sub_period], +#' forecast_tvc[sub_period, , drop = FALSE], +#' variance_tvc[sub_period, , drop = FALSE], +#' gamma_grid, +#' psi_grid, +#' delta, +#' burn_in_tvc, +#' burn_in_dsc, +#' metric, +#' equal_weight, +#' incl, +#' NULL) #' #' # Assign DSC-Results -#' sub_forecast_stsc <- results[[1]] -#' sub_variance_stsc <- results[[2]] -#' sub_chosen_gamma <- results[[3]] -#' sub_chosen_psi <- results[[4]] -#' sub_chosen_signals <- results[[5]] -#' -#' # Define Evaluation Period -#' eval_date_start <- "1991-01-01" -#' eval_date_end <- "2021-12-31" -#' eval_period_idx <- which(sub_dates > eval_date_start & sub_dates <= eval_date_end) -#' -#' # Trim Objects -#' oos_y <- sub_y[eval_period_idx, ] -#' oos_forecast_stsc <- sub_forecast_stsc[eval_period_idx] -#' oos_variance_stsc <- sub_variance_stsc[eval_period_idx] -#' oos_chosen_gamma <- sub_chosen_gamma[eval_period_idx] -#' oos_chosen_psi <- sub_chosen_psi[eval_period_idx] -#' oos_chosen_signals <- sub_chosen_signals[eval_period_idx, , drop = FALSE] -#' oos_dates <- sub_dates[eval_period_idx] -#' -#' # Add Dates -#' names(oos_forecast_stsc) <- oos_dates -#' names(oos_variance_stsc) <- oos_dates -#' names(oos_chosen_gamma) <- oos_dates -#' names(oos_chosen_psi) <- oos_dates -#' rownames(oos_chosen_signals) <- oos_dates -#' -#' ### Part 3: Evaluation ### -#' # Apply Summary-Function -#' summary_results <- summary_stsc(oos_y, -#' benchmark_ar2[, i], -#' oos_forecast_stsc) -#' # Assign Summary-Results -#' cssed <- summary_results[[3]] -#' mse <- summary_results[[4]] -#' -#' ########## Results ########## +#' pred_stsc <- dsc_results$Forecasts$Point_Forecasts +#' var_stsc <- dsc_results$Forecasts$Variance_Forecasts +#' +#' ########## Evaluation ########## +#' # Define Evaluation Period (OOS-Period) +#' eval_period <- which(tdates[sub_period] >= "1991-04-01" & tdates[sub_period] <= "2021-12-01") +#' +#' # Get Evaluation Summary for STSC +#' eval_results <- summary(obj = dsc_results, eval_period = eval_period) +#' +#' # Calculate (Mean-)Squared-Errors for AR2-Benchmark +#' oos_y <- y[sub_period][eval_period] +#' oos_benchmark <- benchmark[sub_period[eval_period], , drop = FALSE] +#' se_ar2 <- (oos_y - oos_benchmark)^2 +#' mse_ar2 <- mean(se_ar2) +#' +#' # Create Cumulative Squared Error Differences (CSSED) Plot +#' cssed <- cumsum(se_ar2 - eval_results$MSE[[2]]) +#' plot_cssed <- ggplot( +#' data.frame(eval_period, cssed), +#' aes(x = eval_period, y = cssed) +#' ) + +#' geom_line() + +#' ylim(-0.0008, 0.0008) + +#' ggtitle("Cumulative Squared Error Differences") + +#' xlab("Time Index") + +#' ylab("CSSED") + +#' geom_hline(yintercept = 0, linetype = "dashed", color = "darkgray") + +#' theme_minimal(base_size = 15) + +#' theme( +#' panel.grid.major = element_blank(), +#' panel.grid.minor = element_blank(), +#' panel.border = element_rect(colour = "black", fill = NA), +#' axis.ticks = element_line(colour = "black"), +#' plot.title = element_text(hjust = 0.5) +#' ) +#' +#' # Show Plots +#' options(repr.plot.width = 15, repr.plot.height = 15) +#' plots_list <- eval_results$Plots +#' plots_list <- c(list(plot_cssed), plots_list) +#' cowplot::plot_grid(plotlist = plots_list, ncol = 2, nrow = 3, align = "hv") +#' #' # Relative MSE -#' print(paste("Relative MSE:", round(mse[[1]] / mse[[2]], 4))) -#' -#' # Plot CSSED -#' plot(x = as.Date(oos_dates), -#' y = cssed, -#' ylim = c(-0.0008, 0.0008), -#' main = "Cumulated squared error differences", -#' type = "l", -#' lwd = 1.5, -#' xlab = "Date", -#' ylab = "CSSED") + abline(h = 0, lty = 2, col = "darkgray") -#' -#' # Plot Predictive Signals -#' vec <- seq_len(dim(oos_chosen_signals)[2]) -#' mat <- oos_chosen_signals %*% diag(vec) -#' mat[mat == 0] <- NA -#' matplot(x = as.Date(oos_dates), -#' y = mat, -#' cex = 0.4, -#' pch = 20, -#' type = "p", -#' main = "Evolution of selected signal(s)", -#' xlab = "Date", -#' ylab = "Predictive Signal") -#' -#' # Plot Psi -#' plot(x = as.Date(oos_dates), -#' y = oos_chosen_psi, -#' ylim = c(1, 100), -#' main = "Evolution of the subset size", -#' type = "p", -#' cex = 0.75, -#' pch = 20, -#' xlab = "Date", -#' ylab = "Psi") +#' print(paste("Relative MSE:", round(eval_results$MSE[[1]] / mse_ar2, 4))) #' } ### Time-Varying Coefficient Model tvc <- function(y, X, Ext_F, - sample_length, + init, lambda_grid, kappa_grid, bias) { + # Convert y to matrix + y <- as.matrix(y, ncol = 1) + ######################################################## ### Checkmate - # Check if y is numeric vector without missing / infinite values - checkmate::assertNumeric(y, - min.len = 1, - any.missing = FALSE, - finite = TRUE) + # Check if numeric vector without missing or infinite values + assertNumeric(y, min.len = 1, any.missing = FALSE, finite = TRUE) - # Either x or f must not be null - checkmate::assert(checkmate::checkMatrix(X), - checkmate::checkMatrix(Ext_F), - combine = "or") + # Either X or Ext_F must not be null + assert(checkMatrix(X), checkMatrix(Ext_F), combine = "or") - # Check if x is numeric matrix and has the same number of observations as y - checkmate::assertMatrix(X, - mode = "numeric", - nrow = length(y), - null.ok = TRUE) + # Check if numeric matrices and have same length as y + assertMatrix(X, mode = "numeric", nrows = nrow(y), null.ok = TRUE) + assertMatrix(Ext_F, mode = "numeric", nrows = nrow(y), null.ok = TRUE) - # Check if F is numeric matrix and has the same number of observations as y - checkmate::assertMatrix(Ext_F, - mode = "numeric", - nrow = length(y), - null.ok = TRUE) + # Check if integer between 2 and the length of y + assertInt(init, lower = 2, upper = nrow(y)) - # Check if lambda_grid is numeric vector with values between 0 and 1 - checkmate::assertNumeric(lambda_grid, - lower = 0, - upper = 1, - min.len = 1, - any.missing = FALSE, - finite = TRUE) + # Check if numeric vectors with values between exp(-10) and 1 + assertNumeric(lambda_grid, lower = exp(-10), upper = 1, min.len = 1, any.missing = FALSE, finite = TRUE) + assertNumeric(kappa_grid, lower = exp(-10), upper = 1, min.len = 1, any.missing = FALSE, finite = TRUE) - # Check if kappa_grid is numeric vector with values between 0 and 1 - checkmate::assertNumeric(kappa_grid, - lower = 0, - upper = 1, - min.len = 1, - any.missing = FALSE, - finite = TRUE) + # Check if boolean + assertLogical(bias, len = 1, any.missing = FALSE) - # Check if bias is Boolean - checkmate::assertLogical(bias, - len = 1, - any.missing = FALSE) - - # Check if sample_length is Integer between 2 and N - checkmate::assertInt(sample_length, - lower = 2, - upper = length(y)) + # Check if there are any non-consecutive NA values + non_consec_na <- any( + apply( + is.na(cbind(if (exists("X")) X, if (exists("Ext_F")) Ext_F)), 2, + function(x) sum(abs(diff(x))) > 1 | sum(diff(x)) == 1 + ) + ) + assertFALSE(non_consec_na) # Check if any column in X is constant for the first observations if (!is.null(X)) { - if (any(apply(X, 2, function(x) length(unique(na.omit(x)[1:sample_length])) == 1))) { - print("One or more columns in X are constant for the first 1:sample_length observations.") + if (any(apply(X, 2, function(x) length(unique(na.omit(x)[1:init])) == 1))) { + print("One or more columns in X are constant for the first 1:init observations.") } } # Check if any column in Ext_F is constant for the first observations if (!is.null(Ext_F)) { - if (any(apply(Ext_F, 2, function(x) length(unique(na.omit(x)[1:sample_length])) == 1))) { - print("One or more columns in Ext_F are constant for the first 1:sample_length observations.") + if (any(apply(Ext_F, 2, function(x) length(unique(na.omit(x)[1:init])) == 1))) { + print("One or more columns in Ext_F are constant for the first 1:init observations.") } } - ######################################################## + ######################################################## ### Apply Rcpp-Function tvc_results <- tvc_(y, X, Ext_F, - sample_length, + init, lambda_grid, kappa_grid, bias) @@ -315,7 +304,7 @@ tvc <- function(y, ### Remove rm(list = c("tvc_results")) - ### Create / Get Raw-Signal Names + ### Create / Get P-Signal Names x_names <- if (!is.null(X)) { if (!is.null(colnames(X))) { colnames(X) @@ -325,7 +314,7 @@ tvc <- function(y, } if (!is.null(X)) { - + # Preallocate TVC-Model Names tvc_x_name <- vector("character", length(lambda_grid) * length(kappa_grid) * ncol(X)) @@ -342,7 +331,7 @@ tvc <- function(y, } } - ### Create / Get Point-Forecast Names + ### Create / Get F-Signal Names f_names <- if (!is.null(Ext_F)) { if (!is.null(colnames(Ext_F))) { colnames(Ext_F) @@ -370,13 +359,27 @@ tvc <- function(y, } # Combine Signal Names - model_names_tvc <- c(if (exists("tvc_x_name")) tvc_x_name, - if (exists("tvc_f_name")) tvc_f_name) + model_names_tvc <- c(if (exists("tvc_x_name")) tvc_x_name, + if (exists("tvc_f_name")) tvc_f_name) # Assign Model Names (-> Column Names) - colnames(forecast_tvc) <- model_names_tvc - colnames(variance_tvc) <- model_names_tvc + colnames(forecast_tvc) <- model_names_tvc + colnames(variance_tvc) <- model_names_tvc # Return Results - return(list(forecast_tvc, variance_tvc)) + return( + list( + Forecasts = list( + Realization = y, + Point_Forecasts = forecast_tvc, + Variance_Forecasts = variance_tvc + ), + Model = list( + Lambda_grid = lambda_grid, + Kappa_grid = kappa_grid, + Init = init, + Bias = bias + ) + ) + ) } \ No newline at end of file diff --git a/R/zzz.R b/R/zzz.R new file mode 100644 index 0000000..ad055d7 --- /dev/null +++ b/R/zzz.R @@ -0,0 +1,5 @@ +.onLoad <- function(libname, pkgname){ + + utils::globalVariables(c("Time", "Value", "Variable")) + +} \ No newline at end of file diff --git a/data/benchmark_ar2.rda b/data/benchmark_ar2.rda deleted file mode 100644 index ce38335..0000000 Binary files a/data/benchmark_ar2.rda and /dev/null differ diff --git a/data/inflation_data.rda b/data/inflation_data.rda index 69a947a..6321812 100644 Binary files a/data/inflation_data.rda and b/data/inflation_data.rda differ diff --git a/dev/config_attachment.yaml b/dev/config_attachment.yaml new file mode 100644 index 0000000..46e24ec --- /dev/null +++ b/dev/config_attachment.yaml @@ -0,0 +1,12 @@ +path.n: NAMESPACE +path.d: DESCRIPTION +dir.r: R +dir.v: vignettes +dir.t: tests +extra.suggests: ~ +pkg_ignore: ~ +document: yes +normalize: yes +inside_rmd: no +must.exist: yes +check_if_suggests_is_installed: yes diff --git a/man/benchmark_ar2.Rd b/man/benchmark_ar2.Rd deleted file mode 100644 index d479628..0000000 --- a/man/benchmark_ar2.Rd +++ /dev/null @@ -1,34 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data-documentation.R -\docType{data} -\name{benchmark_ar2} -\alias{benchmark_ar2} -\title{AR(2) benchmark forecasts for quarterly U.S. inflation} -\format{ -A \link{matrix} with 123 quarterly observations (rows) and 4 benchmarks (columns): -\describe{ - - \item{GDPCTPI}{OOS-AR2-benchmark forecast for quarterly GDP deflator (GDPCTPI).} - - \item{PCECTPI}{OOS-AR2-benchmark forecast for quarterly PCE deflator (PCECTPI).} - - \item{CPIAUCSL}{OOS-AR2-benchmark forecast for quarterly Total CPI (CPIAUCSL).} - - \item{CPILFESL}{OOS-AR2-benchmark forecast for quarterly Core CPI (CPILFESL).} -} -} -\source{ - -} -\usage{ -benchmark_ar2 -} -\description{ -Out-of-sample one-step-ahead AR(2) benchmark forecasts for the period - from 1991-Q2 to 2021-Q4. The AR(2) models are estimated with OLS and intercept. -} -\references{ -Koop, G. and Korobilis, D. (2023) "Bayesian dynamic variable selection in high dimensions." -\emph{International Economic Review}. -} -\keyword{datasets} diff --git a/man/dsc.Rd b/man/dsc.Rd index 1d5f6af..08fa04b 100644 --- a/man/dsc.Rd +++ b/man/dsc.Rd @@ -4,49 +4,138 @@ \alias{dsc} \title{Generate dynamic subset forecast combinations} \usage{ -dsc(gamma_grid, psi_grid, y, mu_mat, var_mat, delta, n_cores) +dsc( + y, + point_forecasts, + variance_forecasts, + gamma_grid, + psi_grid, + delta, + burn_in, + burn_in_dsc, + metric, + equal_weight, + incl, + portfolio_params = NULL +) } \arguments{ -\item{gamma_grid}{A numerical vector that contains discount factors -to exponentially down-weight the past predictive performance -of the candidate forecasts.} +\item{y}{A matrix of dimension `T * 1` or numeric vector of length `T` +containing the observations of the target variable.} + +\item{point_forecasts}{A matrix with `T` rows containing +the first moments of (conditionally) normal distributed +predictive densities in each column.} + +\item{variance_forecasts}{A matrix with `T` rows containing +the second moments of (conditionally) normal distributed +predictive densities in each column.} + +\item{gamma_grid}{A numeric vector containing potential discount factors +between 0 and 1 to exponentially down-weight the past predictive performance +of the candidate forecasting models. The values of this tuning parameter are chosen +in a procedure that amounts to leave-one-out cross-validation, +taking into account the time series structure of the data. +For details, see Adaemmer et al. (2023).} \item{psi_grid}{An integer vector that controls -the (possible) sizes of the active subsets.} +the (possible) sizes of the subsets. The values of this tuning parameter are chosen +in a procedure that amounts to leave-one-out cross-validation, +taking taking into account the time series structure of the data. +For details, see Adaemmer et al. (2023).} -\item{y}{A matrix of dimension `T * 1` or numeric vector of length `T` -containing the observations of the target variable.} +\item{delta}{A numeric value between 0 and 1 denoting the discount factor +applied to down-weight the past predictive performance of the +aggregate predictive densities.} + +\item{burn_in}{An integer value `>= 1` that denotes the number of +observations used to 'initialize' the rankings. +After 'burn_in' observations, the rankings for both, +the candidate forecasting models and aggregate predictive densities +are reset. `burn_in = 1` means no burn-in period is applied.} + +\item{burn_in_dsc}{An integer value `>= 1` that denotes the number of +observations used to 'initialize' the rankings. +After 'burn_in_dsc' observations, only the ranking of the +aggregate predictive densities is reset. +`burn_in_dsc = 1` means no burn-in period is applied.} -\item{mu_mat}{A matrix with `T` rows containing -the first moment of each predictive density in each column.} +\item{metric}{An integer from the set `1, 2, 3, 4, 5` representing +the metric used to rank the candidate forecasting models (TV-C models) +and subset combinations based on their predictive performance. +The default value is `metric = 5` which ranks them according to the +sum of (discounted) Continuous-Ranked-Probability-Scores (CRPS). +`metric = 1` uses discounted Predictive Log-Likelihoods, +`metric = 2` uses discounted Squared-Errors, +`metric = 3` uses discounted Absolute-Errors, +`metric = 4` uses discounted Compounded-Returns (in this case the target variable +y has to be a time series of financial returns).} -\item{var_mat}{A matrix with `T` rows containing -the second moment of each predictive density in each column.} +\item{equal_weight}{A boolean that denotes whether equal weights are used to +combine the candidate forecasts within a subset. If `FALSE`, the weights are +calculated applying the softmax function on the ranking scores of +the candidate forecasting models. The method proposed in Adaemmer et al. (2023) uses +equal weights to combine the candidate forecasting models.} -\item{delta}{A numeric value denoting the discount factor used -to down-weight the past predictive performance of the subset combinations.} +\item{incl}{An optional integer vector that denotes signals that +must be included in the subset combinations. For example, `incl = c(1, 3)` +includes all candidate forecasting models generated by the first and third signals. +If `NULL`, no signal is forced to be included.} -\item{n_cores}{An integer that denotes the number of CPU-cores -used for the computational estimation.} +\item{portfolio_params}{A numeric vector of length 3 +containing the following elements: +\describe{ + \item{risk_aversion}{A non-negative double representing the investor's risk aversion. + Higher values indicate more risk-averse behavior.} + \item{min_weight}{A double specifying the minimum weight allocated to the market. + A non-negative lower bound effectively rules out short sales.} + \item{max_weight}{A double specifying the maximum weight allocated to the market. + For example, a value of 2 allows for a maximum leverage ratio of two.} +} +This parameter is only required if `metric = 4`.} } \value{ -A list that contains: -* (1) a vector with the first moments (point forecasts) of the STSC-Model, -* (2) a vector with the the second moments (variance) of the STSC-Model, -* (3) a vector that contains the selected values for gamma, -* (4) a vector that contains the selected values for psi and -* (5) a matrix that indicates the selected signals for every point in time. +A list containing: +\describe{ + \item{Forecasts}{A list containing: + \describe{ + \item{Realization}{A vector with the actual values of the target variable.} + \item{Point_Forecasts}{A vector with the first moments of the aggregate predictive densities of the DSC model.} + \item{Variance_Prediction}{A vector with the second moments of the aggregate predictive densities of the DSC model.} + } + } + \item{Tuning_Parameters}{A list containing: + \describe{ + \item{Gamma}{A vector containing the selected values for the tuning parameter gamma.} + \item{Psi}{A vector containing the selected values for the tuning parameter psi.} + \item{CFM}{A matrix containing the selected candidate forecasting models.} + } + } + \item{Model}{A list containing: + \describe{ + \item{Gamma_grid}{The grid of gamma values used in the model.} + \item{Psi_grid}{The grid of psi values used in the model.} + \item{Delta}{The delta value used in the model.} + \item{Burn_in}{The burn-in period used in the model.} + \item{Burn_in_dsc}{The burn-in period used in the model.} + \item{Metric}{The ranking metric used in the model.} + \item{Equal_weight}{A boolean indicating if equal weighting was used.} + \item{Incl}{Additional included parameters.} + } + } +} } \description{ -`dsc()` can be used to generate forecast combinations -from a set of candidate density forecasts. For each period, -`dsc()` selects a subset of predictive densities with highest ranks -regarding (local) predictive accuracy. -Both the identities of the candidate forecasts -that are used for building the combined forecast and -the subset sizes may vary over time based on the data. -If only one candidate forecast is picked, the approach (temporarily) -collapses to pure model selection. +The `dsc()` function generates +dynamic forecast combinations from a set of +candidate density forecasts. For each period, +it selects and combines a subset of predictive densities +with the highest ranks regarding local predictive accuracy. +The identities of the candidate forecasting models and +the subset sizes used for building the aggregate predictive density +may vary over time based on the data. +If only one candidate forecast is picked, +the approach temporarily collapses to pure model selection. } \examples{ \donttest{ @@ -56,23 +145,19 @@ collapses to pure model selection. } } \references{ -Beckmann, J., Koop, G., Korobilis, D., and Schüssler, R. A. (2020) "Exchange rate predictability and dynamic bayesian learning." -\emph{Journal of Applied Econometrics}, 35 (4): 410–421. +Beckmann, J., Koop, G., Korobilis, D., and Schüssler, R. A. (2020) "Exchange rate predictability and dynamic bayesian learning." \emph{Journal of Applied Econometrics}, 35 (4): 410–421. + +Dangl, T. and Halling, M. (2012) "Predictive regressions with time-varying coefficients." \emph{Journal of Financial Economics}, 106 (1): 157–181. -Koop, G. and Korobilis, D. (2012) "Forecasting inflation using dynamic model averaging." -\emph{International Economic Review}, 53 (3): 867–886. +Del Negro, M., Hasegawa, R. B., and Schorfheide, F. (2016) "Dynamic prediction pools: An investigation of financial frictions and forecasting performance." \emph{Journal of Econometrics}, 192 (2): 391–405. -Koop, G. and Korobilis, D. (2023) "Bayesian dynamic variable selection in high dimensions." -\emph{International Economic Review}. +Koop, G. and Korobilis, D. (2012) "Forecasting inflation using dynamic model averaging." \emph{International Economic Review}, 53 (3): 867–886. -Raftery, A. E., Kárn`y, M., and Ettler, P. (2010) "Online prediction under model uncertainty via dynamic model averaging: Application to a cold rolling mill." -\emph{Technometrics}, 52 (1): 52–66. +Koop, G. and Korobilis, D. (2023) "Bayesian dynamic variable selection in high dimensions." \emph{International Economic Review}. -Del Negro, M., Hasegawa, R. B., and Schorfheide, F. (2016) "Dynamic prediction pools: An investigation of financial frictions and forecasting performance." -\emph{Journal of Econometrics}, 192 (2): 391–405. +Raftery, A. E., Kárn`y, M., and Ettler, P. (2010) "Online prediction under model uncertainty via dynamic model averaging: Application to a cold rolling mill." \emph{Technometrics}, 52 (1): 52–66. -West, M. and Harrison, J. (1997) "Bayesian forecasting and dynamic models" -\emph{Springer}, 2nd edn. +West, M. and Harrison, J. (1997) "Bayesian forecasting and dynamic models" \emph{Springer}, 2nd edn. } \seealso{ \url{https://github.com/lehmasve/hdflex#readme} diff --git a/man/inflation_data.Rd b/man/inflation_data.Rd index 3d3e9d0..1f71b69 100644 --- a/man/inflation_data.Rd +++ b/man/inflation_data.Rd @@ -3,45 +3,25 @@ \docType{data} \name{inflation_data} \alias{inflation_data} -\title{Dataset to estimate quarterly U.S. inflation} +\title{Quarterly U.S. Inflation Dataset (Total CPI)} \format{ -A \link{matrix} with 245 quarterly observations (rows) and 516 variables (columns). +A \link{matrix} with 245 quarterly observations (rows) and 462 signals (columns): \describe{ - - \item{Column 1:4}{Transformed target variables: - GDP deflator (GDPCTPI), PCE deflator (PCECTPI), - Total CPI (CPIAUCSL), Core CPI (CPILFESL)} - - \item{Column 5:8}{First lag of the target variables} - - \item{Column 9:12}{Second lag of the target variables} - - \item{Column 13:16}{All four (lagged) price series transformed with second differences of logarithms} - - \item{Column 17:452}{All remaining (lagged and transformed) signals from the - FRED-QD dataset of McCracken and Ng (2020), - portfolio data used in Jurado et al. (2015), - stock market predictors from Welch and Goyal (2008), - survey data from University of Michigan consumer surveys, - commodity prices from the World Bank’s Pink Sheet database, - and key macroeconomic indicators from the Federal Reserve Economic Data - for Canada, Germany, Japan & United Kingdom. } - - \item{Column 453:468}{External point forecasts for quarterly GDP deflator (GDPCTPI) - generated by the MatLab Code from Koop and Korobilis (2023). - The forecasts were generated out-of-sample from 1976-Q1 to 2021-Q4.} - - \item{Column 469:484}{External point forecasts for quarterly PCE deflator (PCECTPI) - generated by the MatLab Code from Koop and Korobilis (2023). - The forecasts were generated out-of-sample from 1976-Q1 to 2021-Q4.} - - \item{Column 485:500}{External point forecasts for quarterly Total CPI (CPIAUCSL) - generated by the MatLab Code from Koop and Korobilis (2023). - The forecasts were generated out-of-sample from 1976-Q1 to 2021-Q4.} - - \item{Column 501:516}{External point forecasts for quarterly Core CPI (CPILFESL) - generated by the MatLab Code from Koop and Korobilis (2023). - The forecasts were generated out-of-sample from 1976-Q1 to 2021-Q4.} + \item{Column 1}{Transformed target variable: Total CPI (CPIAUCSL)} + \item{Columns 2-3}{First and second lag of the target variable} + \item{Columns 4-442}{Lagged and transformed signals from the sources listed above} + \item{Columns 443-462}{External point forecasts available from 1976-Q1 to 2021-Q4 + for quarterly Total CPI (CPIAUCSL), including: + \describe{ + \item{First 12 forecasts}{Generated using regression trees, + ridge regressions, and elastic nets + over expanding and rolling windows} + \item{Remaining 8 forecasts}{Based on models discussed in Koop and Korobilis (2023) + such as Gaussian process regressions (GPR_FAC5), + Unobserved Component Stochastic Volatility (UCSV), + and Variational Bayes Dynamic Variable Selection (VBDVS_X)} + } + } } } \source{ @@ -51,31 +31,28 @@ A \link{matrix} with 245 quarterly observations (rows) and 516 variables (column inflation_data } \description{ -A novel, high-dimensional dataset built by Koop and Korobilis (2023) - that merges predictive signals from several mainstream aggregate - macroeconomic and financial datasets. The dataset includes - the FRED-QD dataset of McCracken and Ng (2020), - augment with portfolio data used in Jurado et al. (2015), - stock market predictors from Welch and Goyal (2008), - survey data from University of Michigan consumer surveys, - commodity prices from the World Bank’s Pink Sheet database, - and key macroeconomic indicators from the Federal Reserve Economic Data - for four economies (Canada, Germany, Japan, United Kingdom). - The data is already pre-processed to perform one-step-ahead forecasts - and augmented with (external) point forecasts from Koop & Korobilis (2023). - The dataset spans the period 1960-Q3 to 2021-Q4. +A high-dimensional dataset created by \emph{Koop and Korobilis (2023)} +that integrates predictive signals from various macroeconomic and financial sources. +} +\details{ +The dataset includes data from the following sources: +- **FRED-QD dataset** (McCracken and Ng, 2020) +- **Portfolio data** (Jurado et al., 2015) +- **Stock market predictors** (Welch and Goyal, 2008) +- **University of Michigan consumer surveys** +- **World Bank’s Pink Sheet commodity prices** +- **Key macroeconomic indicators** from the Federal Reserve Economic Data for Canada, Germany, Japan, and the United Kingdom + +The dataset is pre-processed for one-step-ahead forecasts and includes external point forecasts. +It spans from 1960-Q3 to 2021-Q4. } \references{ -Jurado, K., Ludvigson, S. C., and Ng, S. (2015) "Measuring uncertainty." -\emph{American Economic Review}, 105 (3): 1177–1216. +Jurado, K., Ludvigson, S. C., and Ng, S. (2015) "Measuring uncertainty." \emph{American Economic Review}, 105 (3): 1177–1216. -Koop, G. and Korobilis, D. (2023) "Bayesian dynamic variable selection in high dimensions." -\emph{International Economic Review}. +Koop, G. and Korobilis, D. (2023) "Bayesian dynamic variable selection in high dimensions." \emph{International Economic Review}. -McCracken, M., and S. Ng (2020) “FRED-QD: A Quarterly Database for Macroeconomic Research” -\emph{National Bureau of Economic Research}, Working Paper 26872. +McCracken, M., and S. Ng (2020) “FRED-QD: A Quarterly Database for Macroeconomic Research” \emph{National Bureau of Economic Research}, Working Paper 26872. -Welch, I. and Goyal, A. (2008) "A comprehensive look at the empirical performance of equity premium prediction." -\emph{The Review of Financial Studies}, 21 (4): 1455–1508. +Welch, I. and Goyal, A. (2008) "A comprehensive look at the empirical performance of equity premium prediction." \emph{The Review of Financial Studies}, 21 (4): 1455–1508. } \keyword{datasets} diff --git a/man/stsc.Rd b/man/stsc.Rd index 39f49c9..00a6284 100644 --- a/man/stsc.Rd +++ b/man/stsc.Rd @@ -2,44 +2,41 @@ % Please edit documentation in R/stsc.R \name{stsc} \alias{stsc} -\title{Signal-Transform-Subset-Combination (STSC)} +\title{Signal-Transformed-Subset-Combination (STSC)} \usage{ stsc( y, X, Ext_F, - sample_length, + init, lambda_grid, kappa_grid, - burn_in_tvc, bias, gamma_grid, psi_grid, delta, + burn_in, burn_in_dsc, - method, + metric, equal_weight, incl, parallel = FALSE, n_threads = parallel::detectCores() - 2, - risk_aversion = NULL, - min_weight = NULL, - max_weight = NULL + portfolio_params = NULL ) } \arguments{ \item{y}{A matrix of dimension `T * 1` or numeric vector of length `T` containing the observations of the target variable.} -\item{X}{A matrix with `T` rows containing -the lagged 'P' signals in each column. -Use NULL if no 'P' signal shall be included.} +\item{X}{A matrix with `T` rows containing the lagged 'P-signals' in each column. +Use `NULL` if no (external) 'P-signal' is to be included.} -\item{Ext_F}{A matrix with `T` rows containing -(external) 'F' signals for y in each column. -Use NULL if no (external) 'F' signal shall be included.} +\item{Ext_F}{A matrix with `T` rows containing the (external) 'F-signals' in each column. +For 'F-Signals', the slope of the TV-C models is fixed to 1. +Use `NULL` if no (external) 'F-signal' is to be included.} -\item{sample_length}{An integer that denotes the number of observations used +\item{init}{An integer that denotes the number of observations used to initialize the observational variance and the coefficients' variance in the TV-C models.} @@ -49,103 +46,132 @@ coefficients. Each signal in combination with each value of lambda provides a separate candidate forecast. Constant coefficients are nested for the case `lambda = 1`.} -\item{kappa_grid}{A numeric vector between 0 and 1 to accommodate -time-varying volatility in the TV-C models. The observational variance -is estimated via Exponentially Weighted Moving Average and kappa -denotes the underlying decay factor. +\item{kappa_grid}{A numeric vector which takes values between 0 and 1 +to accommodate time-varying volatility in the TV-C models. +The observational variance is estimated via Exponentially Weighted Moving Average +and kappa denotes the underlying decay factor. Constant variance is nested for the case `kappa = 1`. Each signal in combination with each value of -kappa provides a separate candidate forecast. +kappa provides a separate candidate density forecast. For the values of kappa, we follow the recommendation of RiskMetrics (Reuters, 1996).} -\item{burn_in_tvc}{An integer value `>= 1` that denotes the number of -observations used to 'initialize' the TV-C models. -After 'burn_in_tvc' observations, the ranking for the candidate forecasting models -and aggregated predictive densities are resetted. -`burn_in_tvc = 1` means no burn-in period is applied.} +\item{bias}{A boolean to indicate whether the TV-C-models +allow for a bias correction to F-signals. +`TRUE` allows for a time-varying intercept, and `FALSE` sets (and fixes) the intercept to 0.} -\item{bias}{A boolean to indicate whether the TVC-Models should -perform a bias correction to external point forecasts -(TRUE -> time-varying intercept) or -take it 'as is' (FALSE -> constant intercept of 0.0).} - -\item{gamma_grid}{A numerical vector that contains discount factors +\item{gamma_grid}{A numeric vector containing potential discount factors between 0 and 1 to exponentially down-weight the past predictive performance -of the candidate forecasting models.} +of the candidate forecasting models. The values of this tuning parameter are chosen +in a procedure that amounts to leave-one-out cross-validation, +taking into account the time series structure of the data. +For details, \emph{see Adaemmer et al. (2023)}.} \item{psi_grid}{An integer vector that controls -the (possible) sizes of the subsets.} +the (possible) sizes of the subsets. The values of this tuning parameter are chosen +in a procedure that amounts to leave-one-out cross-validation, +taking taking into account the time series structure of the data. +For details, \emph{see Adaemmer et al. (2023)}.} \item{delta}{A numeric value between 0 and 1 denoting the discount factor -used to down-weight the past predictive performance of the -subset combinations.} +applied to down-weight the past predictive performance of the +aggregate predictive densities.} + +\item{burn_in}{An integer value `>= 1` that denotes the number of +observations used to 'initialize' the rankings. +After 'burn_in' observations, the rankings for both, +the candidate forecasting models and aggregate predictive densities +are reset. `burn_in = 1` means no burn-in period is applied.} \item{burn_in_dsc}{An integer value `>= 1` that denotes the number of -observations used to 'initialize' the Dynamic Subset Combinations. -After 'burn_in_dsc' observations the ranking of the -aggregated predictive densities is resetted. +observations used to 'initialize' the rankings. +After 'burn_in_dsc' observations, only the ranking of the +aggregate predictive densities is reset. `burn_in_dsc = 1` means no burn-in period is applied.} -\item{method}{An integer of the set `1, 2, 3, 4, 5` that denotes -the method used to rank the candidate forecasting models (TV-C models) -and subset combinations according to their performance. -Default is `method = 1` which ranks according to their -generated sum of discounted predictive log-likelihoods (DPLLs), -`method = 2` uses Squared-Errors (SE) instead of DPLLs, -`method = 3` uses Absolute-Errors (AE), -`method = 4` uses Compounded-Returns (in this case the target variable -y has to be a time series of financial returns) and -`method = 5` uses Continuous-Ranked-Probability-Scores (CRPS).} +\item{metric}{An integer from the set `1, 2, 3, 4, 5` representing +the metric used to rank the candidate forecasting models (TV-C models) +and subset combinations based on their predictive performance. +The default value is `metric = 5` which ranks them according to the +sum of (discounted) Continuous-Ranked-Probability-Scores (CRPS). +`metric = 1` uses discounted Predictive Log-Likelihoods, +`metric = 2` uses discounted Squared-Errors, +`metric = 3` uses discounted Absolute-Errors, +`metric = 4` uses discounted Compounded-Returns (in this case the target variable +y has to be a time series of financial returns).} \item{equal_weight}{A boolean that denotes whether equal weights are used to combine the candidate forecasts within a subset. If `FALSE`, the weights are -calculated using the softmax-function on the ranking scores of -the candidate forecasting models. The method proposed in Adaemmer et al (2023) uses -equal weights to combine the candidate forecasts.} +calculated applying the softmax function on the ranking scores of +the candidate forecasting models. The method proposed in Adaemmer et al. (2023) uses +equal weights to combine the candidate forecasting models.} -\item{incl}{An (optional) integer vector that denotes signals that -must be included in the subset combinations. E.g. `incl = c(1, 3)` -includes all TVC-Models generated by the first and third signal. +\item{incl}{An optional integer vector that denotes signals that +must be included in the subset combinations. For example, `incl = c(1, 3)` +includes all candidate forecasting models generated by the first and third signals. If `NULL`, no signal is forced to be included.} -\item{parallel}{A boolean that denotes whether the function should +\item{parallel}{A boolean indicating whether the function should be parallelized.} \item{n_threads}{An integer that denotes the number of cores used -for parallelization.} - -\item{risk_aversion}{A double `>= 0` that denotes the risk aversion -of an investor. A higher value indicates a risk avoiding behaviour. -Only necessary if `method = 4`.} - -\item{min_weight}{A double that denotes the lower bound -for the weight placed on the market. -A non-negative value rules out short sales. -Only necessary if `method = 4`.} - -\item{max_weight}{A double that denotes the upper bound -for the weight placed on the market. -A value of e.g. 2 allows for a maximum leverage ratio of two. -Only necessary if `method = 4`.} +for parallelization. Only necessary if `parallel = TRUE`.} + +\item{portfolio_params}{A numeric vector of length 3 +containing the following elements: + \describe{ + \item{risk_aversion}{A non-negative double representing the investor's risk aversion. + Higher values indicate more risk-averse behavior.} + \item{min_weight}{A double specifying the minimum weight allocated to the market. + A non-negative lower bound effectively rules out short sales.} + \item{max_weight}{A double specifying the maximum weight allocated to the market. + For example, a value of 2 allows for a maximum leverage ratio of two.} +} +This parameter is only required if `metric = 4`.} } \value{ -A list that contains: -* (1) a vector with the first moments (point forecasts) of the STSC-Model, -* (2) a vector with the second moments (variance) of the STSC-Model, -* (3) a vector that contains the selected values for gamma, -* (4) a vector that contains the selected values for psi, -* (5) a matrix that contains the selected signals, -* (6) a matrix that contains the selected values for lambda and -* (7) a matrix that contains the selected values for kappa -for every point in time. +A list containing: +\describe{ +\item{Forecasts}{A list containing: + \describe{ + \item{Realization}{A vector with the actual values of the target variable.} + \item{Point_Forecast}{A vector with the first moments of the aggregate predictive densities of the STSC model.} + \item{Variance_Prediction}{A vector with the second moments of the aggregate predictive densities of the STSC model.} + } +} +\item{Tuning_Parameters}{A list containing: + \describe{ + \item{Gamma}{A vector containing the selected values for the tuning parameter gamma.} + \item{Psi}{A vector containing the selected values for the tuning parameter psi.} + \item{Signals}{A matrix containing the selected signals.} + \item{Lambda}{A matrix containing the selected values for the tuning parameter lambda.} + \item{Kappa}{A matrix containing the selected values for the tuning parameter kappa.} + } +} +\item{Model}{A list containing: + \describe{ + \item{Lambda_grid}{The grid of lambda values used in the model.} + \item{Kappa_grid}{The grid of kappa values used in the model.} + \item{Gamma_grid}{The grid of gamma values used in the model.} + \item{Psi_grid}{The grid of psi values used in the model.} + \item{Delta}{The delta value used in the model.} + \item{Init}{The init value used in the model.} + \item{Burn_in}{The burn-in period used in the model.} + \item{Burn_in_dsc}{The burn-in period used in the model.} + \item{metric}{The ranking metric used in the model.} + \item{Equal_weight}{A boolean indicating if equal weighting was used.} + \item{Bias}{A boolean indicating if bias correct was applied to F-signals.} + \item{Incl}{Additional included parameters.} + } + } +} } \description{ `stsc()` is a time series forecasting method designed to handle -vast sets of predictive signals, many of which are irrelevant or short-lived. -The method transforms heterogeneous scalar-valued signals into +vast sets of predictive signals, many of which may be irrelevant or short-lived. +This method transforms heterogeneous scalar-valued signals into candidate density forecasts via time-varying coefficient models (TV-C), -and subsequently, combines them into an ultimate aggregated density forecast +and subsequently, combines them into an ultimate aggregate density forecast via dynamic subset combinations (DSC). } \examples{ @@ -157,178 +183,153 @@ via dynamic subset combinations (DSC). #### details regarding the data & external forecasts #### ######################################################### - # Packages + # Load Package library("hdflex") + library("ggplot2") + library("cowplot") ########## Get Data ########## - # Load Data + # Load Package Data inflation_data <- inflation_data - benchmark_ar2 <- benchmark_ar2 - # Set Index for Target Variable - i <- 1 # (1 -> GDPCTPI; 2 -> PCECTPI; 3 -> CPIAUCSL; 4 -> CPILFESL) + # Set Target Variable + y <- inflation_data[, 1] + + # Set 'P-Signals' + X <- inflation_data[, 2:442] - # Subset Data (keep only data relevant for target variable i) - dataset <- inflation_data[, c(1+(i-1), # Target Variable - 5+(i-1), # Lag 1 - 9+(i-1), # Lag 2 - (13:16)[-i], # Remaining Price Series - 17:452, # Exogenous Predictor Variables - seq(453+(i-1)*16,468+(i-1)*16))] # Ext. Point Forecasts + # Set 'F-Signals' + Ext_F <- inflation_data[, 443:462] - ########## STSC ########## - # Set Target Variable - y <- dataset[, 1, drop = FALSE] + # Get Dates and Number of Observations + tdates <- rownames(inflation_data) + tlength <- length(tdates) + + # First complete observation (no missing values) + first_complete <- which(complete.cases(inflation_data))[1] + + ########## Rolling AR2-Benchmark ########## + # Set up matrix for predictions + benchmark <- matrix(NA, nrow = tlength, + ncol = 1, dimnames = list(tdates, "AR2")) + + # Set Window-Size (15 years of quarterly data) + window_size <- 15 * 4 - # Set 'Simple' Signals - X <- dataset[, 2:442, drop = FALSE] + # Time Sequence + t_seq <- seq(window_size, tlength - 1) - # Set External Point Forecasts (Koop & Korobilis 2023) - Ext_F <- dataset[, 443:458, drop = FALSE] + # Loop with rolling window + for (t in t_seq) { - # Set Dates - dates <- rownames(dataset) + # Split Data for Training Train Data + x_train <- cbind(int = 1, X[(t - window_size + 1):t, 1:2]) + y_train <- y[(t - window_size + 1):t] + # Split Data for Prediction + x_pred <- cbind(int = 1, X[t + 1, 1:2, drop = FALSE]) + + # Fit AR-Model + model_ar <- .lm.fit(x_train, y_train) + + # Predict and store in benchmark matrix + benchmark[t + 1, ] <- x_pred \%*\% model_ar$coefficients + } + + ########## STSC ########## # Set TV-C-Parameter - sample_length <- 4 * 5 - lambda_grid <- c(0.90, 0.95, 1) - kappa_grid <- 0.98 - burn_in_tvc <- 79 - bias <- TRUE + init <- 5 * 4 + lambda_grid <- c(0.90, 0.95, 1.00) + kappa_grid <- c(0.94, 0.96, 0.98) + bias <- TRUE # Set DSC-Parameter - gamma_grid <- c(0.40, 0.50, 0.60, 0.70, 0.80, 0.90, - 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, - 0.97, 0.98, 0.99, 1.00) - psi_grid <- c(1:100) - delta <- 0.95 - burn_in_dsc <- 1 - method <- 1 + gamma_grid <- c(0.40, 0.50, 0.60, 0.70, 0.80, 0.90, + 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.00) + n_tvc <- (ncol(X) + ncol(Ext_F)) * length(lambda_grid) * length(kappa_grid) + psi_grid <- c(1:100, sapply(1:4, function(i) floor(i * n_tvc / 4))) + delta <- 0.95 + burn_in <- first_complete + init / 2 + burn_in_dsc <- 1 + metric <- 5 equal_weight <- TRUE - incl <- NULL - parallel <- FALSE - n_threads <- NULL + incl <- NULL + parallel <- FALSE + n_threads <- NULL # Apply STSC-Function results <- hdflex::stsc(y, X, Ext_F, - sample_length, + init, lambda_grid, kappa_grid, - burn_in_tvc, bias, gamma_grid, psi_grid, delta, + burn_in, burn_in_dsc, - method, + metric, equal_weight, incl, parallel, n_threads, - NULL, - NULL, NULL) - # Assign DSC-Results - forecast_stsc <- results[[1]] - variance_stsc <- results[[2]] - chosen_gamma <- results[[3]] - chosen_psi <- results[[4]] - chosen_signals <- results[[5]] - - # Define Evaluation Period - eval_date_start <- "1991-01-01" - eval_date_end <- "2021-12-31" - eval_period_idx <- which(dates > eval_date_start & dates <= eval_date_end) - - # Trim Objects - oos_y <- y[eval_period_idx, ] - oos_forecast_stsc <- forecast_stsc[eval_period_idx] - oos_variance_stsc <- variance_stsc[eval_period_idx] - oos_chosen_gamma <- chosen_gamma[eval_period_idx] - oos_chosen_psi <- chosen_psi[eval_period_idx] - oos_chosen_signals <- chosen_signals[eval_period_idx, , drop = FALSE] - oos_dates <- dates[eval_period_idx] - - # Add Dates - names(oos_forecast_stsc) <- oos_dates - names(oos_variance_stsc) <- oos_dates - names(oos_chosen_gamma) <- oos_dates - names(oos_chosen_psi) <- oos_dates - rownames(oos_chosen_signals) <- oos_dates - - ### Part 2: Evaluation ### - # Apply Summary-Function - summary_results <- summary_stsc(oos_y, - benchmark_ar2[, i], - oos_forecast_stsc) - - # Assign Summary-Results - cssed <- summary_results[[3]] - mse <- summary_results[[4]] - - ########## Results ########## + ########## Evaluation ########## + # Define Evaluation Period (OOS-Period) + eval_period <- which(tdates >= "1991-04-01" & tdates <= "2021-12-01") + + # Get Evaluation Summary for STSC + eval_results <- summary(obj = results, eval_period = eval_period) + + # Calculate (Mean-)Squared-Errors for AR2-Benchmark + se_ar2 <- (y[eval_period] - benchmark[eval_period, 1])^2 + mse_ar2 <- mean(se_ar2) + + # Create CSSED-Plot + cssed <- cumsum(se_ar2 - eval_results$MSE[[2]]) + plot_cssed <- ggplot(data = data.frame(eval_period, cssed), aes(x = eval_period, y = cssed)) + + geom_line() + + ylim(-0.0008, 0.0008) + + ggtitle("Cumulative Squared Error Differences") + + xlab("Time Index") + + ylab("CSSED") + + geom_hline(yintercept = 0, linetype = "dashed", color = "darkgray") + + theme_minimal(base_size = 15) + + theme( + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.border = element_rect(colour = "black", fill = NA), + axis.ticks = element_line(colour = "black"), + plot.title = element_text(hjust = 0.5) + ) + + # Show Plots + options(repr.plot.width = 15, repr.plot.height = 15) + plots_list <- eval_results$Plots + plots_list <- c(list(plot_cssed), plots_list) + cowplot::plot_grid(plotlist = plots_list, ncol = 2, nrow = 3, align = "hv") + # Relative MSE - print(paste("Relative MSE:", round(mse[[1]] / mse[[2]], 4))) - - # Plot CSSED - plot(x = as.Date(oos_dates), - y = cssed, - ylim = c(-0.0008, 0.0008), - main = "Cumulated squared error differences", - type = "l", - lwd = 1.5, - xlab = "Date", - ylab = "CSSED") + abline(h = 0, lty = 2, col = "darkgray") - - # Plot Predictive Signals - vec <- seq_len(dim(oos_chosen_signals)[2]) - mat <- oos_chosen_signals \%*\% diag(vec) - mat[mat == 0] <- NA - matplot(x = as.Date(oos_dates), - y = mat, - cex = 0.4, - pch = 20, - type = "p", - main = "Evolution of selected signal(s)", - xlab = "Date", - ylab = "Predictive Signal") - - # Plot Psi - plot(x = as.Date(oos_dates), - y = oos_chosen_psi, - ylim = c(1, 100), - main = "Evolution of the subset size", - type = "p", - cex = 0.75, - pch = 20, - xlab = "Date", - ylab = "Psi") + print(paste("Relative MSE:", round(eval_results$MSE[[1]] / mse_ar2, 4))) } } \references{ -Beckmann, J., Koop, G., Korobilis, D., and Schüssler, R. A. (2020) "Exchange rate predictability and dynamic bayesian learning." -\emph{Journal of Applied Econometrics}, 35 (4): 410–421. +Beckmann, J., Koop, G., Korobilis, D., and Schüssler, R. A. (2020) "Exchange rate predictability and dynamic bayesian learning." \emph{Journal of Applied Econometrics}, 35 (4): 410–421. -Dangl, T. and Halling, M. (2012) "Predictive regressions with time-varying coefficients." -\emph{Journal of Financial Economics}, 106 (1): 157–181. +Dangl, T. and Halling, M. (2012) "Predictive regressions with time-varying coefficients." \emph{Journal of Financial Economics}, 106 (1): 157–181. -Del Negro, M., Hasegawa, R. B., and Schorfheide, F. (2016) "Dynamic prediction pools: An investigation of financial frictions and forecasting performance." -\emph{Journal of Econometrics}, 192 (2): 391–405. +Del Negro, M., Hasegawa, R. B., and Schorfheide, F. (2016) "Dynamic prediction pools: An investigation of financial frictions and forecasting performance." \emph{Journal of Econometrics}, 192 (2): 391–405. -Koop, G. and Korobilis, D. (2012) "Forecasting inflation using dynamic model averaging." -\emph{International Economic Review}, 53 (3): 867–886. +Koop, G. and Korobilis, D. (2012) "Forecasting inflation using dynamic model averaging." \emph{International Economic Review}, 53 (3): 867–886. -Koop, G. and Korobilis, D. (2023) "Bayesian dynamic variable selection in high dimensions." -\emph{International Economic Review}. +Koop, G. and Korobilis, D. (2023) "Bayesian dynamic variable selection in high dimensions." \emph{International Economic Review}. -Raftery, A. E., Kárn`y, M., and Ettler, P. (2010) "Online prediction under model uncertainty via dynamic model averaging: Application to a cold rolling mill." -\emph{Technometrics}, 52 (1): 52–66. +Raftery, A. E., Kárn`y, M., and Ettler, P. (2010) "Online prediction under model uncertainty via dynamic model averaging: Application to a cold rolling mill." \emph{Technometrics}, 52 (1): 52–66. -West, M. and Harrison, J. (1997) "Bayesian forecasting and dynamic models" -\emph{Springer}, 2nd edn. +West, M. and Harrison, J. (1997) "Bayesian forecasting and dynamic models" \emph{Springer}, 2nd edn. } \seealso{ \url{https://github.com/lehmasve/hdflex#readme} diff --git a/man/summary.dsc_obj.Rd b/man/summary.dsc_obj.Rd new file mode 100644 index 0000000..2899e3f --- /dev/null +++ b/man/summary.dsc_obj.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ClassMethods.R +\name{summary.dsc_obj} +\alias{summary.dsc_obj} +\title{Plots the evolution of the tuning parameter for 'dsc' object +and returns basic performance metrics} +\usage{ +\method{summary}{dsc_obj}(object, eval_period = NULL, ...) +} +\arguments{ +\item{object}{An object of type 'dsc'} + +\item{eval_period}{(Optional) A vector of indices to specify the evaluation period} + +\item{...}{Additional arguments to be consistent with S3 print() function} +} +\description{ +Plots the evolution of the tuning parameter for 'dsc' object +and returns basic performance metrics +} +\references{ +Gneiting, T., Raftery, A. E., Westveld, A. H., and Goldman, T. (2005): Calibrated Probabilistic Forecasting Using Ensemble Model Output Statistics and Minimum CRPS Estimation. \emph{Monthly Weather Review}, 133: 1098–1118. +Jordan, A., Krueger, F., and Lerch, S. (2019): "Evaluating Probabilistic Forecasts with scoringRules." \emph{Journal of Statistical Software}, 90(12): 1-37. +} diff --git a/man/summary.stsc_obj.Rd b/man/summary.stsc_obj.Rd new file mode 100644 index 0000000..1fe2979 --- /dev/null +++ b/man/summary.stsc_obj.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ClassMethods.R +\name{summary.stsc_obj} +\alias{summary.stsc_obj} +\title{Plots the evolution of the tuning parameter for 'stsc' object +and returns basic performance metrics} +\usage{ +\method{summary}{stsc_obj}(object, eval_period = NULL, ...) +} +\arguments{ +\item{object}{An object of type 'stsc'} + +\item{eval_period}{(Optional) A vector of indices to specify the evaluation period} + +\item{...}{Additional arguments to be consistent with S3 print() function} +} +\description{ +Plots the evolution of the tuning parameter for 'stsc' object +and returns basic performance metrics +} +\references{ +Gneiting, T., Raftery, A. E., Westveld, A. H., and Goldman, T. (2005): Calibrated Probabilistic Forecasting Using Ensemble Model Output Statistics and Minimum CRPS Estimation. \emph{Monthly Weather Review}, 133: 1098–1118. +Jordan, A., Krueger, F., and Lerch, S. (2019): "Evaluating Probabilistic Forecasts with scoringRules." \emph{Journal of Statistical Software}, 90(12): 1-37. +} diff --git a/man/summary_stsc.Rd b/man/summary_stsc.Rd deleted file mode 100644 index 7e58a86..0000000 --- a/man/summary_stsc.Rd +++ /dev/null @@ -1,51 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/summary_stsc.R -\name{summary_stsc} -\alias{summary_stsc} -\title{Statistical summary of the STSC-results} -\usage{ -summary_stsc(oos_y, oos_benchmark, oos_forecast_stsc) -} -\arguments{ -\item{oos_y}{A matrix of dimension `T * 1` or numeric vector of length `T` -containing the out-of-sample observations of the target variable.} - -\item{oos_benchmark}{A matrix of dimension `T * 1` or -numeric vector of length `T` containing the -out-of-sample forecasts of an arbitrary benchmark -(i.e. prevailing historical mean).} - -\item{oos_forecast_stsc}{A matrix of dimension `T * 1` -or numeric vector of length `T` containing the -out-of-sample forecasts of dsc().} -} -\value{ -List that contains: -* (1) the Clark-West-Statistic, -* (2) the Out-of-Sample R2, -* (3) a vector with the CSSED between the STSC-Forecast and the benchmark and -* (4) a list with the MSE of the STSC-Model and the benchmark. -} -\description{ -`summary_stsc()` returns a statistical summary -of the results from dsc(). It provides statistical measures -such as Clark-West-Statistic, OOS-R2, Mean-Squared-Error and -Cumulated Sum of Squared-Error-Differences. -} -\examples{ -\donttest{ - -# See example for tvc(). - -} -} -\references{ -Clark, T. E. and West, K. D. (2007) "Approximately normal tests for equal predictive accuracy in nested models." -\emph{Journal of Econometrics}, 138 (1): 291–311. -} -\seealso{ -\url{https://github.com/lehmasve/hdflex#readme} -} -\author{ -Philipp Adämmer, Sven Lehmann, Rainer Schüssler -} diff --git a/man/tvc.Rd b/man/tvc.Rd index 5e49c57..053bb8a 100644 --- a/man/tvc.Rd +++ b/man/tvc.Rd @@ -2,60 +2,71 @@ % Please edit documentation in R/tvc.R \name{tvc} \alias{tvc} -\title{Compute density forecasts based on univariate time-varying -coefficient (TV-C) models in state-space form} +\title{Compute density forecasts using univariate time-varying coefficient (TV-C) models} \usage{ -tvc(y, X, Ext_F, sample_length, lambda_grid, kappa_grid, bias) +tvc(y, X, Ext_F, init, lambda_grid, kappa_grid, bias) } \arguments{ \item{y}{A matrix of dimension `T * 1` or numeric vector of length `T` containing the observations of the target variable.} -\item{X}{A matrix with `T` rows containing -the lagged 'simple' signals in each column. -Use NULL if no 'simple' signal shall be included.} +\item{X}{A matrix with `T` rows containing the lagged 'P-signals' in each column. +Use `NULL` if no (external) 'P-signal' is to be included.} -\item{Ext_F}{A matrix with `T` rows containing -point forecasts of y in each column. -Use NULL if no point forecasts shall be included.} +\item{Ext_F}{A matrix with `T` rows containing the (external) 'F-signals' in each column. +For 'F-Signals', the slope of the TV-C models is fixed to 1. +Use `NULL` if no (external) 'F-signal' is to be included.} -\item{sample_length}{An integer that denotes the number of observations used -to initialize the observational variance and the coefficients' variance.} +\item{init}{An integer that denotes the number of observations used +to initialize the observational variance and the coefficients' variance +in the TV-C models.} -\item{lambda_grid}{A numeric vector denoting the discount factor(s) -that control the dynamics of the coefficients. -Each signal in combination with each value of +\item{lambda_grid}{A numeric vector which takes values between 0 and 1 +denoting the discount factor(s) that control the dynamics of the time-varying +coefficients. Each signal in combination with each value of lambda provides a separate candidate forecast. Constant coefficients are nested for the case `lambda = 1`.} -\item{kappa_grid}{A numeric vector to accommodate time-varying volatility. -The observational variance is estimated via -Exponentially Weighted Moving Average. +\item{kappa_grid}{A numeric vector which takes values between 0 and 1 +to accommodate time-varying volatility in the TV-C models. +The observational variance is estimated via Exponentially Weighted Moving Average +and kappa denotes the underlying decay factor. Constant variance is nested for the case `kappa = 1`. Each signal in combination with each value of -kappa provides a separate forecast.} +kappa provides a separate candidate density forecast. +For the values of kappa, we follow the recommendation +of RiskMetrics (Reuters, 1996).} -\item{bias}{A boolean to indicate whether the TVC-Models should -perform a bias correction to external point forecasts -(TRUE -> time-varying intercept) or -take it 'as is' (FALSE -> constant intercept of 0.0).} +\item{bias}{A boolean to indicate whether the TV-C-models +allow for a bias correction to F-signals. +`TRUE` allows for a time-varying intercept, and `FALSE` sets (and fixes) the intercept to 0.} } \value{ -A list that contains: - -* (1) a matrix with the first moments (point forecasts) -of the conditionally normal predictive distributions and - -* (2) a matrix with the second moments (variance) -of the conditionally normal predictive distributions. +A list containing: +\describe{ +\item{Forecasts}{A list containing: + \describe{ + \item{Realization: }{A vector with the actual values of the target variable.} + \item{Point_Forecasts: }{A vector with the first moments of the predictive densities.} + \item{Variance_Forecasts: }{A vector with the second moments of the predictive densities.} + } +} +\item{Model}{A list containing: + \describe{ + \item{Lambda_grid}{The grid of lambda values used in the model.} + \item{Kappa_grid}{The grid of kappa values used in the model.} + \item{Init}{The init value used in the model.} + \item{Bias}{A boolean indicating if bias correct was applied to F-signals.} + } + } +} } \description{ -`tvc()` can be used to generate density forecasts based on -univariate time-varying coefficient models. In each forecasting model, -we include an intercept and one predictive signal. The predictive signal -either represents the value of a 'simple' signal -or the the value of an external point forecast. -All models are estimated independently from each other and +The `tvc()` function generates density forecasts +based on univariate time-varying coefficient models in state-space form. +Each forecasting model includes an intercept and one predictive signal, +which can either be a 'P-signal' or 'F-signal'. +All models are estimated independently and both estimation and forecasting are carried out recursively. } \examples{ @@ -67,174 +78,171 @@ estimation and forecasting are carried out recursively. #### details regarding the data & external forecasts #### ######################################################### - # Packages + # Load Package library("hdflex") + library("ggplot2") + library("cowplot") ########## Get Data ########## - # Load Data + # Load Package Data inflation_data <- inflation_data - benchmark_ar2 <- benchmark_ar2 - # Set Index for Target Variable - i <- 1 # (1 -> GDPCTPI; 2 -> PCECTPI; 3 -> CPIAUCSL; 4 -> CPILFESL) + # Set Target Variable + y <- inflation_data[, 1] - # Subset Data (keep only data relevant for target variable i) - dataset <- inflation_data[, c(1+(i-1), # Target Variable - 5+(i-1), # Lag 1 - 9+(i-1), # Lag 2 - (13:16)[-i], # Remaining Price Series - 17:452, # Exogenous Predictor Variables - seq(453+(i-1)*16,468+(i-1)*16))] # Ext. Point Forecasts + # Set 'P-Signals' + X <- inflation_data[, 2:442] - ########## STSC ########## - ### Part 1: TV-C Model ### - # Set Target Variable - y <- dataset[, 1, drop = FALSE] + # Set 'F-Signals' + Ext_F <- inflation_data[, 443:462] + + # Get Dates and Number of Observations + tdates <- rownames(inflation_data) + tlength <- length(tdates) - # Set 'Simple' Signals - X <- dataset[, 2:442, drop = FALSE] + # First complete observation (no missing values) + first_complete <- which(complete.cases(inflation_data))[1] - # Set External Point Forecasts (Koop & Korobilis 2023) - Ext_F <- dataset[, 443:458, drop = FALSE] + ########## Rolling AR2-Benchmark ########## + # Set up matrix for predictions + benchmark <- matrix(NA, nrow = tlength, + ncol = 1, dimnames = list(tdates, "AR2")) + # Set Window-Size (15 years of quarterly data) + window_size <- 15 * 4 + + # Time Sequence + t_seq <- seq(window_size, tlength - 1) + + # Loop with rolling window + for (t in t_seq) { + + # Split Data for Training Train Data + x_train <- cbind(int = 1, X[(t - window_size + 1):t, 1:2]) + y_train <- y[(t - window_size + 1):t] + + # Split Data for Prediction + x_pred <- cbind(int = 1, X[t + 1, 1:2, drop = FALSE]) + + # Fit AR-Model + model_ar <- .lm.fit(x_train, y_train) + + # Predict and store in benchmark matrix + benchmark[t + 1, ] <- x_pred \%*\% model_ar$coefficients + } + + ########## STSC ########## + ### Part 1: TVC-Function # Set TV-C-Parameter - sample_length <- 4 * 5 - lambda_grid <- c(0.90, 0.95, 1) - kappa_grid <- 0.98 - bias <- TRUE - - # Apply TV-C-Function - results <- hdflex::tvc(y, - X, - Ext_F, - sample_length, - lambda_grid, - kappa_grid, - bias) - - # Assign TV-C-Results - forecast_tvc <- results[[1]] - variance_tvc <- results[[2]] - - # Define Burn-In Period - sample_period_idx <- 80:nrow(dataset) - sub_forecast_tvc <- forecast_tvc[sample_period_idx, , drop = FALSE] - sub_variance_tvc <- variance_tvc[sample_period_idx, , drop = FALSE] - sub_y <- y[sample_period_idx, , drop = FALSE] - sub_dates <- rownames(dataset)[sample_period_idx] - - ### Part 2: Dynamic Subset Combination ### + init <- 5 * 4 + lambda_grid <- c(0.90, 0.95, 1.00) + kappa_grid <- c(0.94, 0.96, 0.98) + bias <- TRUE + + # Apply TVC-Function + tvc_results <- hdflex::tvc(y, + X, + Ext_F, + init, + lambda_grid, + kappa_grid, + bias) + + # Assign TVC-Results + forecast_tvc <- tvc_results$Forecasts$Point_Forecasts + variance_tvc <- tvc_results$Forecasts$Variance_Forecasts + + # First complete forecast period (no missing values) + sub_period <- seq(which(complete.cases(forecast_tvc))[1], tlength) + + ### Part 2: DSC-Function # Set DSC-Parameter - nr_mods <- ncol(sub_forecast_tvc) - gamma_grid <- c(0.40, 0.50, 0.60, 0.70, 0.80, 0.90, - 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.00) - psi_grid <- c(1:100) - delta <- 0.95 - n_cores <- 1 + gamma_grid <- c(0.40, 0.50, 0.60, 0.70, 0.80, 0.90, + 0.91, 0.92, 0.93, 0.94, 0.95, 0.96, 0.97, 0.98, 0.99, 1.00) + psi_grid <- c(1:100, sapply(1:4, function(i) floor(i * ncol(forecast_tvc) / 4))) + delta <- 0.95 + burn_in_tvc <- (init / 2) + 1 + burn_in_dsc <- 1 + metric <- 5 + equal_weight <- TRUE + incl <- NULL # Apply DSC-Function - results <- hdflex::dsc(gamma_grid, - psi_grid, - sub_y, - sub_forecast_tvc, - sub_variance_tvc, - delta, - n_cores) + dsc_results <- hdflex::dsc(y[sub_period], + forecast_tvc[sub_period, , drop = FALSE], + variance_tvc[sub_period, , drop = FALSE], + gamma_grid, + psi_grid, + delta, + burn_in_tvc, + burn_in_dsc, + metric, + equal_weight, + incl, + NULL) # Assign DSC-Results - sub_forecast_stsc <- results[[1]] - sub_variance_stsc <- results[[2]] - sub_chosen_gamma <- results[[3]] - sub_chosen_psi <- results[[4]] - sub_chosen_signals <- results[[5]] - - # Define Evaluation Period - eval_date_start <- "1991-01-01" - eval_date_end <- "2021-12-31" - eval_period_idx <- which(sub_dates > eval_date_start & sub_dates <= eval_date_end) - - # Trim Objects - oos_y <- sub_y[eval_period_idx, ] - oos_forecast_stsc <- sub_forecast_stsc[eval_period_idx] - oos_variance_stsc <- sub_variance_stsc[eval_period_idx] - oos_chosen_gamma <- sub_chosen_gamma[eval_period_idx] - oos_chosen_psi <- sub_chosen_psi[eval_period_idx] - oos_chosen_signals <- sub_chosen_signals[eval_period_idx, , drop = FALSE] - oos_dates <- sub_dates[eval_period_idx] - - # Add Dates - names(oos_forecast_stsc) <- oos_dates - names(oos_variance_stsc) <- oos_dates - names(oos_chosen_gamma) <- oos_dates - names(oos_chosen_psi) <- oos_dates - rownames(oos_chosen_signals) <- oos_dates - - ### Part 3: Evaluation ### - # Apply Summary-Function - summary_results <- summary_stsc(oos_y, - benchmark_ar2[, i], - oos_forecast_stsc) - # Assign Summary-Results - cssed <- summary_results[[3]] - mse <- summary_results[[4]] - - ########## Results ########## + pred_stsc <- dsc_results$Forecasts$Point_Forecasts + var_stsc <- dsc_results$Forecasts$Variance_Forecasts + + ########## Evaluation ########## + # Define Evaluation Period (OOS-Period) + eval_period <- which(tdates[sub_period] >= "1991-04-01" & tdates[sub_period] <= "2021-12-01") + + # Get Evaluation Summary for STSC + eval_results <- summary(obj = dsc_results, eval_period = eval_period) + + # Calculate (Mean-)Squared-Errors for AR2-Benchmark + oos_y <- y[sub_period][eval_period] + oos_benchmark <- benchmark[sub_period[eval_period], , drop = FALSE] + se_ar2 <- (oos_y - oos_benchmark)^2 + mse_ar2 <- mean(se_ar2) + + # Create Cumulative Squared Error Differences (CSSED) Plot + cssed <- cumsum(se_ar2 - eval_results$MSE[[2]]) + plot_cssed <- ggplot( + data.frame(eval_period, cssed), + aes(x = eval_period, y = cssed) + ) + + geom_line() + + ylim(-0.0008, 0.0008) + + ggtitle("Cumulative Squared Error Differences") + + xlab("Time Index") + + ylab("CSSED") + + geom_hline(yintercept = 0, linetype = "dashed", color = "darkgray") + + theme_minimal(base_size = 15) + + theme( + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.border = element_rect(colour = "black", fill = NA), + axis.ticks = element_line(colour = "black"), + plot.title = element_text(hjust = 0.5) + ) + + # Show Plots + options(repr.plot.width = 15, repr.plot.height = 15) + plots_list <- eval_results$Plots + plots_list <- c(list(plot_cssed), plots_list) + cowplot::plot_grid(plotlist = plots_list, ncol = 2, nrow = 3, align = "hv") + # Relative MSE - print(paste("Relative MSE:", round(mse[[1]] / mse[[2]], 4))) - - # Plot CSSED - plot(x = as.Date(oos_dates), - y = cssed, - ylim = c(-0.0008, 0.0008), - main = "Cumulated squared error differences", - type = "l", - lwd = 1.5, - xlab = "Date", - ylab = "CSSED") + abline(h = 0, lty = 2, col = "darkgray") - - # Plot Predictive Signals - vec <- seq_len(dim(oos_chosen_signals)[2]) - mat <- oos_chosen_signals \%*\% diag(vec) - mat[mat == 0] <- NA - matplot(x = as.Date(oos_dates), - y = mat, - cex = 0.4, - pch = 20, - type = "p", - main = "Evolution of selected signal(s)", - xlab = "Date", - ylab = "Predictive Signal") - - # Plot Psi - plot(x = as.Date(oos_dates), - y = oos_chosen_psi, - ylim = c(1, 100), - main = "Evolution of the subset size", - type = "p", - cex = 0.75, - pch = 20, - xlab = "Date", - ylab = "Psi") + print(paste("Relative MSE:", round(eval_results$MSE[[1]] / mse_ar2, 4))) } } \references{ -Beckmann, J., Koop, G., Korobilis, D., and Schüssler, R. A. (2020) "Exchange rate predictability and dynamic bayesian learning." -\emph{Journal of Applied Econometrics}, 35 (4): 410–421. +Beckmann, J., Koop, G., Korobilis, D., and Schüssler, R. A. (2020) "Exchange rate predictability and dynamic bayesian learning." \emph{Journal of Applied Econometrics}, 35 (4): 410–421. + +Dangl, T. and Halling, M. (2012) "Predictive regressions with time-varying coefficients." \emph{Journal of Financial Economics}, 106 (1): 157–181. -Dangl, T. and Halling, M. (2012) "Predictive regressions with time-varying coefficients." -\emph{Journal of Financial Economics}, 106 (1): 157–181. +Del Negro, M., Hasegawa, R. B., and Schorfheide, F. (2016) "Dynamic prediction pools: An investigation of financial frictions and forecasting performance." \emph{Journal of Econometrics}, 192 (2): 391–405. -Koop, G. and Korobilis, D. (2012) "Forecasting inflation using dynamic model averaging." -\emph{International Economic Review}, 53 (3): 867–886. +Koop, G. and Korobilis, D. (2012) "Forecasting inflation using dynamic model averaging." \emph{International Economic Review}, 53 (3): 867–886. -Koop, G. and Korobilis, D. (2023) "Bayesian dynamic variable selection in high dimensions." -\emph{International Economic Review}. +Koop, G. and Korobilis, D. (2023) "Bayesian dynamic variable selection in high dimensions." \emph{International Economic Review}. -Raftery, A. E., Kárn`y, M., and Ettler, P. (2010) "Online prediction under model uncertainty via dynamic model averaging: Application to a cold rolling mill." -\emph{Technometrics}, 52 (1): 52–66. +Raftery, A. E., Kárn`y, M., and Ettler, P. (2010) "Online prediction under model uncertainty via dynamic model averaging: Application to a cold rolling mill." \emph{Technometrics}, 52 (1): 52–66. -West, M. and Harrison, J. (1997) "Bayesian forecasting and dynamic models" -\emph{Springer}, 2nd edn. +West, M. and Harrison, J. (1997) "Bayesian forecasting and dynamic models" \emph{Springer}, 2nd edn. } \seealso{ \url{https://github.com/lehmasve/hdflex#readme} diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 6c936c9..88be101 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -12,176 +12,95 @@ Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); #endif -// init_dsc -NumericVector init_dsc(int number_forecasts); -RcppExport SEXP _hdflex_init_dsc(SEXP number_forecastsSEXP) { +// dsc_ +List dsc_(const arma::vec& y, const arma::mat& point_forecasts, const arma::mat& variance_forecasts, arma::rowvec gamma_grid, arma::irowvec psi_grid, double delta, int burn_in, int burn_in_dsc, int metric, bool equal_weight, Nullable incl_, Nullable portfolio_params_); +RcppExport SEXP _hdflex_dsc_(SEXP ySEXP, SEXP point_forecastsSEXP, SEXP variance_forecastsSEXP, SEXP gamma_gridSEXP, SEXP psi_gridSEXP, SEXP deltaSEXP, SEXP burn_inSEXP, SEXP burn_in_dscSEXP, SEXP metricSEXP, SEXP equal_weightSEXP, SEXP incl_SEXP, SEXP portfolio_params_SEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< int >::type number_forecasts(number_forecastsSEXP); - rcpp_result_gen = Rcpp::wrap(init_dsc(number_forecasts)); - return rcpp_result_gen; -END_RCPP -} -// forget_dsc -NumericVector forget_dsc(NumericVector weights, double gamma); -RcppExport SEXP _hdflex_forget_dsc(SEXP weightsSEXP, SEXP gammaSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< NumericVector >::type weights(weightsSEXP); - Rcpp::traits::input_parameter< double >::type gamma(gammaSEXP); - rcpp_result_gen = Rcpp::wrap(forget_dsc(weights, gamma)); - return rcpp_result_gen; -END_RCPP -} -// active_models_dsc -List active_models_dsc(NumericVector weights, int psi); -RcppExport SEXP _hdflex_active_models_dsc(SEXP weightsSEXP, SEXP psiSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< NumericVector >::type weights(weightsSEXP); - Rcpp::traits::input_parameter< int >::type psi(psiSEXP); - rcpp_result_gen = Rcpp::wrap(active_models_dsc(weights, psi)); - return rcpp_result_gen; -END_RCPP -} -// matrix_subset_idx -NumericVector matrix_subset_idx(NumericMatrix mat, IntegerVector col_idx, int t); -RcppExport SEXP _hdflex_matrix_subset_idx(SEXP matSEXP, SEXP col_idxSEXP, SEXP tSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< NumericMatrix >::type mat(matSEXP); - Rcpp::traits::input_parameter< IntegerVector >::type col_idx(col_idxSEXP); - Rcpp::traits::input_parameter< int >::type t(tSEXP); - rcpp_result_gen = Rcpp::wrap(matrix_subset_idx(mat, col_idx, t)); - return rcpp_result_gen; -END_RCPP -} -// agg_density_dsc -List agg_density_dsc(NumericVector active_weights, NumericVector oos_target_var, NumericMatrix oos_forecast_tvp, NumericMatrix oos_variance_tvp, IntegerVector idx_sub, int t); -RcppExport SEXP _hdflex_agg_density_dsc(SEXP active_weightsSEXP, SEXP oos_target_varSEXP, SEXP oos_forecast_tvpSEXP, SEXP oos_variance_tvpSEXP, SEXP idx_subSEXP, SEXP tSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< NumericVector >::type active_weights(active_weightsSEXP); - Rcpp::traits::input_parameter< NumericVector >::type oos_target_var(oos_target_varSEXP); - Rcpp::traits::input_parameter< NumericMatrix >::type oos_forecast_tvp(oos_forecast_tvpSEXP); - Rcpp::traits::input_parameter< NumericMatrix >::type oos_variance_tvp(oos_variance_tvpSEXP); - Rcpp::traits::input_parameter< IntegerVector >::type idx_sub(idx_subSEXP); - Rcpp::traits::input_parameter< int >::type t(tSEXP); - rcpp_result_gen = Rcpp::wrap(agg_density_dsc(active_weights, oos_target_var, oos_forecast_tvp, oos_variance_tvp, idx_sub, t)); - return rcpp_result_gen; -END_RCPP -} -// update_dsc -NumericVector update_dsc(NumericVector weights, NumericVector oos_target_var, NumericMatrix oos_forecast_tvp, NumericMatrix oos_variance_tvp, int n_models, int t); -RcppExport SEXP _hdflex_update_dsc(SEXP weightsSEXP, SEXP oos_target_varSEXP, SEXP oos_forecast_tvpSEXP, SEXP oos_variance_tvpSEXP, SEXP n_modelsSEXP, SEXP tSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< NumericVector >::type weights(weightsSEXP); - Rcpp::traits::input_parameter< NumericVector >::type oos_target_var(oos_target_varSEXP); - Rcpp::traits::input_parameter< NumericMatrix >::type oos_forecast_tvp(oos_forecast_tvpSEXP); - Rcpp::traits::input_parameter< NumericMatrix >::type oos_variance_tvp(oos_variance_tvpSEXP); - Rcpp::traits::input_parameter< int >::type n_models(n_modelsSEXP); - Rcpp::traits::input_parameter< int >::type t(tSEXP); - rcpp_result_gen = Rcpp::wrap(update_dsc(weights, oos_target_var, oos_forecast_tvp, oos_variance_tvp, n_models, t)); - return rcpp_result_gen; -END_RCPP -} -// dsc_loop -List dsc_loop(NumericVector weights, double gamma, int psi, NumericVector oos_target_var, NumericMatrix oos_forecast_tvp, NumericMatrix oos_variance_tvp, int len_para_grid, int oos_length, int n_models); -RcppExport SEXP _hdflex_dsc_loop(SEXP weightsSEXP, SEXP gammaSEXP, SEXP psiSEXP, SEXP oos_target_varSEXP, SEXP oos_forecast_tvpSEXP, SEXP oos_variance_tvpSEXP, SEXP len_para_gridSEXP, SEXP oos_lengthSEXP, SEXP n_modelsSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< NumericVector >::type weights(weightsSEXP); - Rcpp::traits::input_parameter< double >::type gamma(gammaSEXP); - Rcpp::traits::input_parameter< int >::type psi(psiSEXP); - Rcpp::traits::input_parameter< NumericVector >::type oos_target_var(oos_target_varSEXP); - Rcpp::traits::input_parameter< NumericMatrix >::type oos_forecast_tvp(oos_forecast_tvpSEXP); - Rcpp::traits::input_parameter< NumericMatrix >::type oos_variance_tvp(oos_variance_tvpSEXP); - Rcpp::traits::input_parameter< int >::type len_para_grid(len_para_gridSEXP); - Rcpp::traits::input_parameter< int >::type oos_length(oos_lengthSEXP); - Rcpp::traits::input_parameter< int >::type n_models(n_modelsSEXP); - rcpp_result_gen = Rcpp::wrap(dsc_loop(weights, gamma, psi, oos_target_var, oos_forecast_tvp, oos_variance_tvp, len_para_grid, oos_length, n_models)); + Rcpp::traits::input_parameter< const arma::vec& >::type y(ySEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type point_forecasts(point_forecastsSEXP); + Rcpp::traits::input_parameter< const arma::mat& >::type variance_forecasts(variance_forecastsSEXP); + Rcpp::traits::input_parameter< arma::rowvec >::type gamma_grid(gamma_gridSEXP); + Rcpp::traits::input_parameter< arma::irowvec >::type psi_grid(psi_gridSEXP); + Rcpp::traits::input_parameter< double >::type delta(deltaSEXP); + Rcpp::traits::input_parameter< int >::type burn_in(burn_inSEXP); + Rcpp::traits::input_parameter< int >::type burn_in_dsc(burn_in_dscSEXP); + Rcpp::traits::input_parameter< int >::type metric(metricSEXP); + Rcpp::traits::input_parameter< bool >::type equal_weight(equal_weightSEXP); + Rcpp::traits::input_parameter< Nullable >::type incl_(incl_SEXP); + Rcpp::traits::input_parameter< Nullable >::type portfolio_params_(portfolio_params_SEXP); + rcpp_result_gen = Rcpp::wrap(dsc_(y, point_forecasts, variance_forecasts, gamma_grid, psi_grid, delta, burn_in, burn_in_dsc, metric, equal_weight, incl_, portfolio_params_)); return rcpp_result_gen; END_RCPP } // stsc_loop_ -List stsc_loop_(const arma::vec& y, Nullable X_, Nullable Ext_F_, int sample_length, arma::vec lambda_grid, arma::vec kappa_grid, int burn_in_tvc, bool bias, arma::rowvec gamma_grid, arma::irowvec psi_grid, double delta, int burn_in_dsc, int method, bool equal_weight, Nullable incl_, Nullable risk_aversion_, Nullable min_weight_, Nullable max_weight_); -RcppExport SEXP _hdflex_stsc_loop_(SEXP ySEXP, SEXP X_SEXP, SEXP Ext_F_SEXP, SEXP sample_lengthSEXP, SEXP lambda_gridSEXP, SEXP kappa_gridSEXP, SEXP burn_in_tvcSEXP, SEXP biasSEXP, SEXP gamma_gridSEXP, SEXP psi_gridSEXP, SEXP deltaSEXP, SEXP burn_in_dscSEXP, SEXP methodSEXP, SEXP equal_weightSEXP, SEXP incl_SEXP, SEXP risk_aversion_SEXP, SEXP min_weight_SEXP, SEXP max_weight_SEXP) { +List stsc_loop_(const arma::vec& y, Nullable X_, Nullable Ext_F_, int init, arma::vec lambda_grid, arma::vec kappa_grid, bool bias, arma::rowvec gamma_grid, arma::irowvec psi_grid, double delta, int burn_in, int burn_in_dsc, int metric, bool equal_weight, Nullable incl_, Nullable portfolio_params_); +RcppExport SEXP _hdflex_stsc_loop_(SEXP ySEXP, SEXP X_SEXP, SEXP Ext_F_SEXP, SEXP initSEXP, SEXP lambda_gridSEXP, SEXP kappa_gridSEXP, SEXP biasSEXP, SEXP gamma_gridSEXP, SEXP psi_gridSEXP, SEXP deltaSEXP, SEXP burn_inSEXP, SEXP burn_in_dscSEXP, SEXP metricSEXP, SEXP equal_weightSEXP, SEXP incl_SEXP, SEXP portfolio_params_SEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const arma::vec& >::type y(ySEXP); Rcpp::traits::input_parameter< Nullable >::type X_(X_SEXP); Rcpp::traits::input_parameter< Nullable >::type Ext_F_(Ext_F_SEXP); - Rcpp::traits::input_parameter< int >::type sample_length(sample_lengthSEXP); + Rcpp::traits::input_parameter< int >::type init(initSEXP); Rcpp::traits::input_parameter< arma::vec >::type lambda_grid(lambda_gridSEXP); Rcpp::traits::input_parameter< arma::vec >::type kappa_grid(kappa_gridSEXP); - Rcpp::traits::input_parameter< int >::type burn_in_tvc(burn_in_tvcSEXP); Rcpp::traits::input_parameter< bool >::type bias(biasSEXP); Rcpp::traits::input_parameter< arma::rowvec >::type gamma_grid(gamma_gridSEXP); Rcpp::traits::input_parameter< arma::irowvec >::type psi_grid(psi_gridSEXP); Rcpp::traits::input_parameter< double >::type delta(deltaSEXP); + Rcpp::traits::input_parameter< int >::type burn_in(burn_inSEXP); Rcpp::traits::input_parameter< int >::type burn_in_dsc(burn_in_dscSEXP); - Rcpp::traits::input_parameter< int >::type method(methodSEXP); + Rcpp::traits::input_parameter< int >::type metric(metricSEXP); Rcpp::traits::input_parameter< bool >::type equal_weight(equal_weightSEXP); Rcpp::traits::input_parameter< Nullable >::type incl_(incl_SEXP); - Rcpp::traits::input_parameter< Nullable >::type risk_aversion_(risk_aversion_SEXP); - Rcpp::traits::input_parameter< Nullable >::type min_weight_(min_weight_SEXP); - Rcpp::traits::input_parameter< Nullable >::type max_weight_(max_weight_SEXP); - rcpp_result_gen = Rcpp::wrap(stsc_loop_(y, X_, Ext_F_, sample_length, lambda_grid, kappa_grid, burn_in_tvc, bias, gamma_grid, psi_grid, delta, burn_in_dsc, method, equal_weight, incl_, risk_aversion_, min_weight_, max_weight_)); + Rcpp::traits::input_parameter< Nullable >::type portfolio_params_(portfolio_params_SEXP); + rcpp_result_gen = Rcpp::wrap(stsc_loop_(y, X_, Ext_F_, init, lambda_grid, kappa_grid, bias, gamma_grid, psi_grid, delta, burn_in, burn_in_dsc, metric, equal_weight, incl_, portfolio_params_)); return rcpp_result_gen; END_RCPP } // stsc_loop_par_ -List stsc_loop_par_(const arma::vec& y, Nullable X_, Nullable Ext_F_, int sample_length, arma::vec lambda_grid, arma::vec kappa_grid, int burn_in_tvc, bool bias, arma::rowvec gamma_grid, arma::irowvec psi_grid, double delta, int burn_in_dsc, int method, bool equal_weight, Nullable incl_, int n_threads, Nullable risk_aversion_, Nullable min_weight_, Nullable max_weight_); -RcppExport SEXP _hdflex_stsc_loop_par_(SEXP ySEXP, SEXP X_SEXP, SEXP Ext_F_SEXP, SEXP sample_lengthSEXP, SEXP lambda_gridSEXP, SEXP kappa_gridSEXP, SEXP burn_in_tvcSEXP, SEXP biasSEXP, SEXP gamma_gridSEXP, SEXP psi_gridSEXP, SEXP deltaSEXP, SEXP burn_in_dscSEXP, SEXP methodSEXP, SEXP equal_weightSEXP, SEXP incl_SEXP, SEXP n_threadsSEXP, SEXP risk_aversion_SEXP, SEXP min_weight_SEXP, SEXP max_weight_SEXP) { +List stsc_loop_par_(const arma::vec& y, Nullable X_, Nullable Ext_F_, int init, arma::vec lambda_grid, arma::vec kappa_grid, bool bias, arma::rowvec gamma_grid, arma::irowvec psi_grid, double delta, int burn_in, int burn_in_dsc, int metric, bool equal_weight, Nullable incl_, int n_threads, Nullable portfolio_params_); +RcppExport SEXP _hdflex_stsc_loop_par_(SEXP ySEXP, SEXP X_SEXP, SEXP Ext_F_SEXP, SEXP initSEXP, SEXP lambda_gridSEXP, SEXP kappa_gridSEXP, SEXP biasSEXP, SEXP gamma_gridSEXP, SEXP psi_gridSEXP, SEXP deltaSEXP, SEXP burn_inSEXP, SEXP burn_in_dscSEXP, SEXP metricSEXP, SEXP equal_weightSEXP, SEXP incl_SEXP, SEXP n_threadsSEXP, SEXP portfolio_params_SEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const arma::vec& >::type y(ySEXP); Rcpp::traits::input_parameter< Nullable >::type X_(X_SEXP); Rcpp::traits::input_parameter< Nullable >::type Ext_F_(Ext_F_SEXP); - Rcpp::traits::input_parameter< int >::type sample_length(sample_lengthSEXP); + Rcpp::traits::input_parameter< int >::type init(initSEXP); Rcpp::traits::input_parameter< arma::vec >::type lambda_grid(lambda_gridSEXP); Rcpp::traits::input_parameter< arma::vec >::type kappa_grid(kappa_gridSEXP); - Rcpp::traits::input_parameter< int >::type burn_in_tvc(burn_in_tvcSEXP); Rcpp::traits::input_parameter< bool >::type bias(biasSEXP); Rcpp::traits::input_parameter< arma::rowvec >::type gamma_grid(gamma_gridSEXP); Rcpp::traits::input_parameter< arma::irowvec >::type psi_grid(psi_gridSEXP); Rcpp::traits::input_parameter< double >::type delta(deltaSEXP); + Rcpp::traits::input_parameter< int >::type burn_in(burn_inSEXP); Rcpp::traits::input_parameter< int >::type burn_in_dsc(burn_in_dscSEXP); - Rcpp::traits::input_parameter< int >::type method(methodSEXP); + Rcpp::traits::input_parameter< int >::type metric(metricSEXP); Rcpp::traits::input_parameter< bool >::type equal_weight(equal_weightSEXP); Rcpp::traits::input_parameter< Nullable >::type incl_(incl_SEXP); Rcpp::traits::input_parameter< int >::type n_threads(n_threadsSEXP); - Rcpp::traits::input_parameter< Nullable >::type risk_aversion_(risk_aversion_SEXP); - Rcpp::traits::input_parameter< Nullable >::type min_weight_(min_weight_SEXP); - Rcpp::traits::input_parameter< Nullable >::type max_weight_(max_weight_SEXP); - rcpp_result_gen = Rcpp::wrap(stsc_loop_par_(y, X_, Ext_F_, sample_length, lambda_grid, kappa_grid, burn_in_tvc, bias, gamma_grid, psi_grid, delta, burn_in_dsc, method, equal_weight, incl_, n_threads, risk_aversion_, min_weight_, max_weight_)); + Rcpp::traits::input_parameter< Nullable >::type portfolio_params_(portfolio_params_SEXP); + rcpp_result_gen = Rcpp::wrap(stsc_loop_par_(y, X_, Ext_F_, init, lambda_grid, kappa_grid, bias, gamma_grid, psi_grid, delta, burn_in, burn_in_dsc, metric, equal_weight, incl_, n_threads, portfolio_params_)); return rcpp_result_gen; END_RCPP } // tvc_ -List tvc_(const arma::vec& y, Nullable X_, Nullable Ext_F_, int sample_length, const arma::vec& lambda_grid, const arma::vec& kappa_grid, bool bias); -RcppExport SEXP _hdflex_tvc_(SEXP ySEXP, SEXP X_SEXP, SEXP Ext_F_SEXP, SEXP sample_lengthSEXP, SEXP lambda_gridSEXP, SEXP kappa_gridSEXP, SEXP biasSEXP) { +List tvc_(const arma::vec& y, Nullable X_, Nullable Ext_F_, int init, const arma::vec& lambda_grid, const arma::vec& kappa_grid, bool bias); +RcppExport SEXP _hdflex_tvc_(SEXP ySEXP, SEXP X_SEXP, SEXP Ext_F_SEXP, SEXP initSEXP, SEXP lambda_gridSEXP, SEXP kappa_gridSEXP, SEXP biasSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const arma::vec& >::type y(ySEXP); Rcpp::traits::input_parameter< Nullable >::type X_(X_SEXP); Rcpp::traits::input_parameter< Nullable >::type Ext_F_(Ext_F_SEXP); - Rcpp::traits::input_parameter< int >::type sample_length(sample_lengthSEXP); + Rcpp::traits::input_parameter< int >::type init(initSEXP); Rcpp::traits::input_parameter< const arma::vec& >::type lambda_grid(lambda_gridSEXP); Rcpp::traits::input_parameter< const arma::vec& >::type kappa_grid(kappa_gridSEXP); Rcpp::traits::input_parameter< bool >::type bias(biasSEXP); - rcpp_result_gen = Rcpp::wrap(tvc_(y, X_, Ext_F_, sample_length, lambda_grid, kappa_grid, bias)); + rcpp_result_gen = Rcpp::wrap(tvc_(y, X_, Ext_F_, init, lambda_grid, kappa_grid, bias)); return rcpp_result_gen; END_RCPP } diff --git a/src/dsc.cpp b/src/dsc.cpp index 7416274..1d10dfe 100644 --- a/src/dsc.cpp +++ b/src/dsc.cpp @@ -1,201 +1,602 @@ -#include +#include using namespace Rcpp; // 2) Dynamic Subset Combination -// Function I - Initialize all Predictive Densities -//[[Rcpp::export]] - NumericVector init_dsc(int number_forecasts){ +// Function I - Initialize DSC-Parameter + arma::field> dsc_init(int n_cands, + int n_combs, + int n_gamma) { - // Define Variables - NumericVector weights(number_forecasts, 1.0 / number_forecasts); + // Define Variables + arma::field> ret(2); - // Return Vector - return weights; -} - -// Function II - Re-Weight Probabilities by Forgetting Factor Gamma -//[[Rcpp::export]] - NumericVector forget_dsc(NumericVector weights, - double gamma){ + // Initialize Vector for Performance-Score (Subset Combinations) -> Ranking + arma::rowvec score_combs(n_combs, arma::fill::zeros); + + // Initialize Vector for Performance-Score (Candidate Models) -> Ranking + arma::rowvec vec(n_cands, arma::fill::zeros); - // Define Variables - weights = pow(weights, gamma) / sum(pow(weights, gamma)); + // Fill Field for Candidate Models + arma::field score_cands(n_gamma); + for (unsigned int i = 0; i < n_gamma; ++i) { + score_cands(i) = vec; + } - // Return Updated Probabilities - return weights; + // Fill Return-Field + ret(0) = score_cands; + ret(1) = arma::field(1); + ret(1)(0) = score_combs; + + // Return + return ret; } -// Function III - Rank and Subset Forecasting Models (Active Models) -//[[Rcpp::export]] - List active_models_dsc(NumericVector weights, - int psi){ - - // Define Variables - List ret(2); - NumericVector active_weights(psi); - - // (Partial) Sort Weights - IntegerVector idx = seq(0, weights.size()-1); - std::partial_sort(idx.begin(), idx.begin()+psi, idx.end(), - [&](int i, int j) {return weights[i] > weights[j]; }); - - // Get index of the 'psi' highest weights - IntegerVector idx_sub = idx[seq(0, psi-1)]; - - // Calculate Active Weights - active_weights.fill(1.0 / psi); - //active_weights = weights[idx_sub]; - //active_weights = active_weights / sum(active_weights); - - // Fill list - ret[0] = active_weights; - ret[1] = idx_sub; +// Function II - Rank and Set Active Model Set (Active Models) + arma::field dsc_active_models(const arma::rowvec& score_cands_gamma, + int psi) { + + // Define Variables + arma::field ret(2); + const int n_elem = score_cands_gamma.n_elem; + const int psi_ = std::min(n_elem, psi); + + // Check if all elements are equal + const bool isequal = arma::all(score_cands_gamma == score_cands_gamma(0)); + + // If all elements are equal, return the indices from left to right + arma::uvec idx; + if (isequal) { + idx = arma::regspace(0, psi_ - 1); + } else { + // Get psi-highest values (-> indices) + const arma::uvec sorted_idx = arma::sort_index(score_cands_gamma, "descend"); + idx = sorted_idx.head(psi_); + } - // Return list - return ret; + // Fill Return-Field + ret(0) = idx; + ret(1) = arma::uvec(1).fill(n_elem); + + // Return + return ret; } -//----------------- -// Helper Function - Subset Matrix based on Column- and Row-Index -// [[Rcpp::export]] - NumericVector matrix_subset_idx(NumericMatrix mat, - IntegerVector col_idx, - int t) { +// Function III - Compute Aggregated Predictive Distribution + arma::field dsc_agg_density(const arma::rowvec& active_weights, + const arma::rowvec& forecast_tvc_t, + const arma::rowvec& variance_tvc_t, + const arma::uvec& idx_sub) { + + // Define Variables + arma::field ret(2); - // Determine Number of Columns - int n_cols_out = col_idx.size(); + // Subset Matrices (select only active models) + const arma::rowvec oos_forecast_tvp_sub = arma::conv_to::from(forecast_tvc_t(idx_sub)); + const arma::rowvec oos_variance_tvp_sub = arma::conv_to::from(variance_tvc_t(idx_sub)); - // Create Output Vector - NumericVector out(n_cols_out); + // Calculate Combined Predictive Density (Logarithmic Combination Rule) + const double variance_comb = 1.0 / accu(active_weights / oos_variance_tvp_sub); + const double mu_comb = accu(active_weights % oos_forecast_tvp_sub / oos_variance_tvp_sub) * + variance_comb; + + // Fill Field + ret(0) = mu_comb; + ret(1) = variance_comb; - // Loop through each column and copy data - for(int i = 0; i < n_cols_out; ++i) { - out(i) = mat(t, col_idx[i]); - } - return out; + // Return + return ret; } -// ----------------- - -// Function IV - Compute Combined Predictive Distribution -//[[Rcpp::export]] - List agg_density_dsc(NumericVector active_weights, - NumericVector oos_target_var, - NumericMatrix oos_forecast_tvp, - NumericMatrix oos_variance_tvp, - IntegerVector idx_sub, - int t) { + + +// Function IV - Calculate (exponentially down-weighted) Performance Scores (-> Ranking) for Candidate Forecasts + void dsc_score_cands(arma::rowvec& score_cands_gamma, + double y_t, + const arma::rowvec& forecast_tvc_t, + const arma::rowvec& variance_tvc_t, + double gamma, + int metric, + double risk_aversion, + double min_weight, + double max_weight) { - // Define Variables - List ret(3); - double mu_agg, variance_agg, ln_score; + // Define Variables + const int n_cands = score_cands_gamma.n_elem; + arma::rowvec performance_score(n_cands); performance_score.fill(arma::datum::nan); - // Subset Matrices - NumericVector oos_forecast_tvp_sub = matrix_subset_idx(oos_forecast_tvp, idx_sub, t); - NumericVector oos_variance_tvp_sub = matrix_subset_idx(oos_variance_tvp, idx_sub, t); + // Calculate Performance + for (unsigned int i=0; i rank_comb(const arma::rowvec& score_combs, + const arma::rowvec& mu_comb_vec, + const arma::rowvec& variance_comb_vec) { + + // Define Variables + arma::field ret(3); + + // Get index of highest performance score + const arma::uword best_idx = index_max(score_combs); + + // Select STSC-Forecast + auto it_mu = mu_comb_vec.begin() + best_idx; + auto it_var = variance_comb_vec.begin() + best_idx; + const double forecast_stsc = *it_mu; + const double variance_stsc = *it_var; + + // Fill Field + ret(0) = forecast_stsc; + ret(1) = variance_stsc; + ret(2) = best_idx; + + // Return + return ret; + } + + +// Function VI - Calculate (exponentially down-weighted) performance scores (-> ranking) for combinations (Aggregated Predictive Distributions) + void dsc_score_comb(arma::rowvec& score_combs, + double y_t, + const arma::rowvec& forecasts_comb, + const arma::rowvec& variances_comb, + double delta, + int metric, + double risk_aversion, + double min_weight, + double max_weight) { - // Define Variables - NumericVector pred_lik(n_models); - double oos_target_var_t = oos_target_var(t); - - // Calculate Predictive Likelihood - for( int i=0; i seen; + std::unordered_set target_values(values.begin(), values.end()); + std::vector result; + + // Remove duplicates for specified values + for (arma::uword i = 0; i < vec.n_elem; ++i) { + arma::uword val = vec[i]; + if (target_values.find(val) != target_values.end()) { + // Only check for duplicates for specified values + if (seen.find(val) == seen.end()) { + seen.insert(val); + result.push_back(val); + } + } else { + // Directly add other values + result.push_back(val); + } + } + + // Convert std::vector to arma::uvec and return + return arma::uvec(result); + } + +// Function VII - Loop over Gamma and Psi + arma::field dsc_loop(arma::field& score_cands, + arma::rowvec& score_combs, + arma::rowvec gamma_grid, + arma::irowvec psi_grid, + double y_t, + const arma::rowvec& forecast_tvc_t, + const arma::rowvec& variance_tvc_t, + double delta, + int metric, + bool equal_weight, + arma::uvec incl_idx, + double risk_aversion, + double min_weight, + double max_weight) { + + // Define Variables + const int n_combs = score_combs.n_cols; + arma::rowvec forecasts_comb(n_combs); forecasts_comb.fill(arma::datum::nan); + arma::rowvec variances_comb(n_combs); variances_comb.fill(arma::datum::nan); + arma::field chosen_cands(n_combs); + arma::field active_models(2); + arma::field agg_density(2); + arma::field stsc_results(3); + arma::field ret(4); + + // Set highest value for psi + const int psi_max = max(psi_grid); + + // Loop over Gamma and Psi + int ctr = 0; + for (unsigned int g=0; g psi_max) { + throw std::invalid_argument("Error in Active Models Selection"); + } + + // Add Keep-Index & Remove Duplicates + if (incl_idx.n_elem > 0) { + active_idx = arma::join_cols(incl_idx, active_idx); + active_idx = remove_duplicates(active_idx, incl_idx); + } + + // Loop over Psi + for (unsigned int p=0; p(0, psi - 1); + + // Select Active Set of Candidate Models + const arma::uvec active_idx_uvec = arma::conv_to::from(active_idx.elem(seq_psi)); + + // Save Active Set of Candidate Models + chosen_cands(ctr) = arma::conv_to::from(active_idx_uvec); + + // Create Active Weight Vector + arma::rowvec active_weights(psi); + if (equal_weight) { + active_weights.fill(1.0 / psi); + } else { + const arma::rowvec raw_weights = arma::conv_to::from(score_cands(g).elem(active_idx.elem(seq_psi))); + const arma::rowvec exp_raw_weights = exp(raw_weights); + active_weights = exp_raw_weights / accu(exp_raw_weights); + } + + // Calculate Aggregated Predictive Density + agg_density = dsc_agg_density(active_weights, + forecast_tvc_t, + variance_tvc_t, + active_idx_uvec); + + // Assign Results + forecasts_comb(ctr) = agg_density(0); + variances_comb(ctr) = agg_density(1); + ctr++; + } + + // Update score for Candidate Models + dsc_score_cands(score_cands(g), + y_t, + forecast_tvc_t, + variance_tvc_t, + gamma, + metric, + risk_aversion, + min_weight, + max_weight); + } + + // Select Aggregated Forecast + stsc_results = rank_comb(score_combs, + forecasts_comb, + variances_comb); + + // Assign Results + const double stsc_forecast = stsc_results(0); + const double stsc_variance = stsc_results(1); + const int stsc_idx = stsc_results(2); + + // Update score for Combinations (Aggregated Predictive Distributions) + dsc_score_comb(score_combs, + y_t, + forecasts_comb, + variances_comb, + delta, + metric, + risk_aversion, + min_weight, + max_weight); + + // Fill field + ret(0) = stsc_forecast; + ret(1) = stsc_variance; + ret(2) = stsc_idx; + ret(3) = chosen_cands(stsc_idx); + + // Return + return ret; } -// Function VI - Loop over Predictive and Update Step +// 3.) Wrapper Dynamic Subset Combination +// Function I - Loop over t // [[Rcpp::export]] - List dsc_loop(NumericVector weights, - double gamma, - int psi, - NumericVector oos_target_var, - NumericMatrix oos_forecast_tvp, - NumericMatrix oos_variance_tvp, - int len_para_grid, - int oos_length, - int n_models) { - - // Define Variables - List active_results(1), agg_density(1), ret(4); - NumericVector active_weights(psi), forecasts_agg(oos_length), variances_agg(oos_length), ln_scores(oos_length); - List selected_models(oos_length); - - // Start loop - for (int t = 0; t < oos_length; t++) { - - // Forget Weights - weights = forget_dsc(weights, gamma); - - // Active Models - active_results = active_models_dsc(weights, psi); - active_weights = active_results(0); - selected_models(t) = active_results(1); - - - // Aggregated Predictive Density and Log Score - agg_density = agg_density_dsc(active_weights, - oos_target_var, - oos_forecast_tvp, - oos_variance_tvp, - active_results(1), - t); - - // Assign Results - forecasts_agg(t) = agg_density(0); - variances_agg(t) = agg_density(1); - ln_scores(t) = agg_density(2); - - // Update Weights - weights = update_dsc(weights, - oos_target_var, - oos_forecast_tvp, - oos_variance_tvp, - n_models, - t); - } + List dsc_(const arma::vec& y, + const arma::mat& point_forecasts, + const arma::mat& variance_forecasts, + arma::rowvec gamma_grid, + arma::irowvec psi_grid, + double delta, + int burn_in, + int burn_in_dsc, + int metric, + bool equal_weight, + Nullable incl_, + Nullable portfolio_params_) { + + // Check Nullable Objects for metric 4 + if (metric == 4 && portfolio_params_.isNull()) { + throw std::invalid_argument("Error: Relevant parameter not provided!"); + } + + // Cast Nullable Objects for metric 4 + double risk_aversion = arma::datum::nan; + double min_weight = arma::datum::nan; + double max_weight = arma::datum::nan; + if (metric == 4) { + // Cast to NumericVector and extract values + NumericVector combined_params = as(portfolio_params_.get()); + if (combined_params.size() != 3) { + throw std::invalid_argument("Error: portfolio_params_ must contain exactly 3 elements!"); + } + risk_aversion = combined_params[0]; + min_weight = combined_params[1]; + max_weight = combined_params[2]; + } - // Fill list - ret[0] = forecasts_agg; - ret[1] = variances_agg; - ret[2] = ln_scores; - ret[3] = selected_models; - - // Return list - return ret; + // Define Variables for Dynamic Subset Combinations + const int tlength = y.n_elem; + const int n_cands = point_forecasts.n_cols; + const int n_combs = gamma_grid.n_elem * psi_grid.n_elem; + arma::vec stsc_forecast(tlength); stsc_forecast.fill(arma::datum::nan); + arma::vec stsc_variance(tlength); stsc_variance.fill(arma::datum::nan); + arma::vec stsc_idx(tlength); stsc_idx.fill(arma::datum::nan); + arma::field score_cands(gamma_grid.n_elem); + arma::rowvec score_combs(n_combs); + List chosen_cands(tlength); + + // Include: CFM that must be included in the Subsets + arma::uvec incl_idx; + if (incl_.isNotNull()) { + + // Cast Values to uvec incl + arma::uvec incl = as(incl_.get()); + + // Number of CFM per Signal + int grid_size = 1; + + // Resize incl_idx (-> Number of CFM that must be included) + incl_idx.set_size(grid_size * incl.n_elem); + + // Calculate the Indices of the CFM + int ctr = 0; + for (arma::uword k = 0; k < incl.n_elem; ++k) { + for (int i = 0; i < grid_size; ++i) { + arma::uword index = (incl[k] - 1); + incl_idx(ctr) = index; + ctr++; + } + } + } + + // --- + // Apply DSC-Init-Function + arma::field> init_dsc_results(2); + init_dsc_results = dsc_init(n_cands, + n_combs, + gamma_grid.n_elem); + + // Assign Results + score_cands = init_dsc_results(0); + score_combs = init_dsc_results(1)(0); + + // --- + // Loop over t + for (unsigned int t=0; t> init_dsc_results_after_burn_in = dsc_init(n_cands, n_combs, gamma_grid.n_elem); + score_cands = init_dsc_results_after_burn_in(0); + score_combs = init_dsc_results_after_burn_in(1)(0); + stsc_forecast.fill(arma::datum::nan); + stsc_variance.fill(arma::datum::nan); + stsc_idx.fill(arma::datum::nan); + IntegerVector idx = Range(0, t); + chosen_cands[idx] = NA_INTEGER; + } + + if (t == (burn_in_dsc-1)) { + + arma::field> init_dsc_results_after_burn_in = dsc_init(n_cands, n_combs, gamma_grid.n_elem); + score_combs = init_dsc_results_after_burn_in(1)(0); + stsc_forecast.fill(arma::datum::nan); + stsc_variance.fill(arma::datum::nan); + stsc_idx.fill(arma::datum::nan); + IntegerVector idx = Range(0, t); + chosen_cands[idx] = NA_INTEGER; + } + + // Apply DSC-Function + arma::field dsc_results(4); + dsc_results = dsc_loop(score_cands, + score_combs, + gamma_grid, + psi_grid, + y_t, + point_forecasts_t, + variance_forecasts_t, + delta, + metric, + equal_weight, + incl_idx, + risk_aversion, + min_weight, + max_weight); + + // Assign Results + stsc_forecast(t) = dsc_results(0)(0); + stsc_variance(t) = dsc_results(1)(0); + stsc_idx(t) = dsc_results(2)(0); + chosen_cands(t) = dsc_results(3); + } + + // Fill list + List ret(4); + ret[0] = stsc_forecast; + ret[1] = stsc_variance; + ret[2] = stsc_idx; + ret[3] = chosen_cands; + + // Return list + return ret; } diff --git a/src/init.c b/src/init.c index b841c06..931687f 100644 --- a/src/init.c +++ b/src/init.c @@ -8,28 +8,16 @@ */ /* .Call calls */ -extern SEXP _hdflex_active_models_dsc(void *, void *); -extern SEXP _hdflex_agg_density_dsc(void *, void *, void *, void *, void *, void *); -extern SEXP _hdflex_dsc_loop(void *, void *, void *, void *, void *, void *, void *, void *, void *); -extern SEXP _hdflex_forget_dsc(void *, void *); -extern SEXP _hdflex_init_dsc(void *); -extern SEXP _hdflex_matrix_subset_idx(void *, void *, void *); -extern SEXP _hdflex_stsc_loop_(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); -extern SEXP _hdflex_stsc_loop_par_(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); -extern SEXP _hdflex_tvc_(void *, void *, void *, void *, void *, void *, void *); -extern SEXP _hdflex_update_dsc(void *, void *, void *, void *, void *, void *); +extern SEXP _hdflex_dsc_(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); +extern SEXP _hdflex_stsc_loop_(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); +extern SEXP _hdflex_stsc_loop_par_(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); +extern SEXP _hdflex_tvc_(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); static const R_CallMethodDef CallEntries[] = { - {"_hdflex_active_models_dsc", (DL_FUNC) &_hdflex_active_models_dsc, 2}, - {"_hdflex_agg_density_dsc", (DL_FUNC) &_hdflex_agg_density_dsc, 6}, - {"_hdflex_dsc_loop", (DL_FUNC) &_hdflex_dsc_loop, 9}, - {"_hdflex_forget_dsc", (DL_FUNC) &_hdflex_forget_dsc, 2}, - {"_hdflex_init_dsc", (DL_FUNC) &_hdflex_init_dsc, 1}, - {"_hdflex_matrix_subset_idx", (DL_FUNC) &_hdflex_matrix_subset_idx, 3}, - {"_hdflex_stsc_loop_", (DL_FUNC) &_hdflex_stsc_loop_, 18}, - {"_hdflex_stsc_loop_par_", (DL_FUNC) &_hdflex_stsc_loop_par_, 19}, - {"_hdflex_tvc_", (DL_FUNC) &_hdflex_tvc_, 7}, - {"_hdflex_update_dsc", (DL_FUNC) &_hdflex_update_dsc, 6}, + {"_hdflex_dsc_", (DL_FUNC) &_hdflex_dsc_, 12}, + {"_hdflex_stsc_loop_", (DL_FUNC) &_hdflex_stsc_loop_, 16}, + {"_hdflex_stsc_loop_par_", (DL_FUNC) &_hdflex_stsc_loop_par_, 17}, + {"_hdflex_tvc_", (DL_FUNC) &_hdflex_tvc_, 7}, {NULL, NULL, 0} }; @@ -38,3 +26,4 @@ void R_init_hdflex(DllInfo *dll) R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); } + diff --git a/src/stsc.cpp b/src/stsc.cpp index 1a98d56..721a136 100644 --- a/src/stsc.cpp +++ b/src/stsc.cpp @@ -6,7 +6,7 @@ using namespace Rcpp; List init_tvc_(const arma::vec& y, const arma::mat& S, int n_raw_sig, - int sample_length, + int init, arma::vec lambda_grid, arma::vec kappa_grid, bool bias) { @@ -16,15 +16,15 @@ using namespace Rcpp; const int n_cands = n_signal * lambda_grid.n_elem * kappa_grid.n_elem; // Define Variables - arma::cube theta_cube(2, 1, n_cands); - arma::cube cov_mat_cube(2, 2, n_cands); + arma::cube theta_cube(2, 1, n_cands); + arma::cube cov_mat_cube(2, 2, n_cands); arma::rowvec h_vec(n_cands); - arma::vec y_sample, x, cm_na_idx(n_cands); cm_na_idx.fill(arma::datum::nan); - arma::uvec non_finite, init_idx; - arma::mat x_sample_one, theta, cov_mat; + arma::vec y_sample, x, cm_na_idx(n_cands); cm_na_idx.fill(arma::datum::nan); + arma::uvec non_finite, init_idx; + arma::mat x_sample_one, theta, cov_mat; arma::colvec coef; - double intercept, var_y, var_x, h; - List ret_all(4); + double intercept, var_y, var_x, h; + List ret_all(4); // Loop over all candidates models int ctr = 0; @@ -43,7 +43,7 @@ using namespace Rcpp; } // Index for subsetting the initialisation sample - init_idx = arma::regspace(0 + na_ctr, na_ctr + sample_length - 1); + init_idx = arma::regspace(0 + na_ctr, na_ctr + init - 1); // Define and prepare matrices for regression y_sample = y.elem(init_idx); @@ -60,15 +60,13 @@ using namespace Rcpp; var_y = arma::var(y_sample); var_x = arma::var(x_sample.col(1)); cov_mat = arma::zeros(2, 2); + + // Set Intercept Variance cov_mat(0, 0) = pow(intercept, 2) + var_y; // Set to Zero for Constant Intercept - // Distinguish between raw and processed signals + // Distinguish between P- and F-Signals if(j < n_raw_sig) { - if(var_x != 0.0) { - cov_mat(1, 1) = var_y / var_x; - } else { - cov_mat(1, 1) = var_y; - } + cov_mat(1, 1) = (var_x != 0.0) ? var_y / var_x : var_y; } else { theta(1, 0) = 1.0; // -> Slope Coefficient 1.0 cov_mat(1, 1) = 0.0; // -> Constant Slope Coefficient @@ -81,9 +79,9 @@ using namespace Rcpp; h = var_y; // Fill Cubes - theta_cube.slice(ctr) = theta; + theta_cube.slice(ctr) = theta; cov_mat_cube.slice(ctr) = cov_mat; - h_vec(ctr) = h; + h_vec(ctr) = h; ctr++; } } @@ -262,6 +260,7 @@ using namespace Rcpp; } +// 2) Dynamic Subset Combination // Function II - Rank and Set Active Model Set (Active Models) arma::field dsc_active_models_(const arma::rowvec& score_cands_gamma, int psi) { @@ -296,6 +295,8 @@ using namespace Rcpp; return ret; } + +// 2) Dynamic Subset Combination // Function III - Compute Aggregated Predictive Distribution arma::field dsc_agg_density_(const arma::rowvec& active_weights, const arma::rowvec& forecast_tvc_t, @@ -323,13 +324,14 @@ using namespace Rcpp; } +// 2) Dynamic Subset Combination // Function IV - Calculate (exponentially down-weighted) Performance Scores (-> Ranking) for Candidate Forecasts void dsc_score_cands_(arma::rowvec& score_cands_gamma, double y_t, const arma::rowvec& forecast_tvc_t, const arma::rowvec& variance_tvc_t, double gamma, - int method, + int metric, double risk_aversion, double min_weight, double max_weight) { @@ -344,8 +346,8 @@ using namespace Rcpp; // Check for NA value if (arma::is_finite(forecast_tvc_t(i))) { - // Optimization-Method - switch (method) { + // Optimization-metric + switch (metric) { case 1: { // Predictive-Log-Likelihoods performance_score(i) = arma::log_normpdf(y_t, @@ -406,7 +408,7 @@ using namespace Rcpp; break; } default: - throw std::invalid_argument("Error: Method not available"); + throw std::invalid_argument("Error: Metric not available"); } } } @@ -416,6 +418,7 @@ using namespace Rcpp; } +// 2) Dynamic Subset Combination // Function V - Rank and Select Aggregated Forecast arma::field rank_comb_(const arma::rowvec& score_combs, const arma::rowvec& mu_comb_vec, @@ -443,13 +446,14 @@ using namespace Rcpp; } +// 2) Dynamic Subset Combination // Function VI - Calculate (exponentially down-weighted) performance scores (-> ranking) for combinations (Aggregated Predictive Distributions) void dsc_score_comb_(arma::rowvec& score_combs, double y_t, const arma::rowvec& forecasts_comb, const arma::rowvec& variances_comb, double delta, - int method, + int metric, double risk_aversion, double min_weight, double max_weight) { @@ -461,8 +465,8 @@ using namespace Rcpp; // Calculate Performance for (unsigned int i=0; i seen; @@ -559,6 +565,8 @@ using namespace Rcpp; return arma::uvec(result); } + +// 2) Dynamic Subset Combination // Function VII - Loop over Gamma and Psi arma::field dsc_loop_(arma::field& score_cands, arma::rowvec& score_combs, @@ -568,7 +576,7 @@ using namespace Rcpp; const arma::rowvec& forecast_tvc_t, const arma::rowvec& variance_tvc_t, double delta, - int method, + int metric, bool equal_weight, arma::uvec incl_idx, double risk_aversion, @@ -608,7 +616,7 @@ using namespace Rcpp; // Add Keep-Index & Remove Duplicates if (incl_idx.n_elem > 0) { active_idx = arma::join_cols(incl_idx, active_idx); // -> Keep must be empty arma::uvec if no keep - active_idx = remove_duplicates(active_idx, incl_idx); + active_idx = remove_duplicates_(active_idx, incl_idx); } // Loop over Psi @@ -652,7 +660,7 @@ using namespace Rcpp; forecast_tvc_t, variance_tvc_t, gamma, - method, + metric, risk_aversion, min_weight, max_weight); @@ -674,7 +682,7 @@ using namespace Rcpp; forecasts_comb, variances_comb, delta, - method, + metric, risk_aversion, min_weight, max_weight); @@ -694,7 +702,7 @@ using namespace Rcpp; // ################################################################################### // Helper Function -- Replication of Rcpp::setdiff() - arma::uvec my_setdiff(arma::uvec y, arma::uvec x) { + arma::uvec my_setdiff_(arma::uvec y, arma::uvec x) { std::sort(x.begin(), x.end()); std::sort(y.begin(), y.end()); arma::uvec diff(x.n_elem); @@ -704,7 +712,7 @@ using namespace Rcpp; } // Helper Function -- Median with NAs - double compute_median(arma::rowvec vec) { + double compute_median_(arma::rowvec vec) { // Filter out NA values arma::vec finiteVec = vec(arma::find_finite(vec)); @@ -729,21 +737,19 @@ using namespace Rcpp; List stsc_loop_(const arma::vec& y, Nullable X_, Nullable Ext_F_, - int sample_length, + int init, arma::vec lambda_grid, arma::vec kappa_grid, - int burn_in_tvc, bool bias, arma::rowvec gamma_grid, arma::irowvec psi_grid, double delta, + int burn_in, int burn_in_dsc, - int method, + int metric, bool equal_weight, Nullable incl_, - Nullable risk_aversion_, - Nullable min_weight_, - Nullable max_weight_) { + Nullable portfolio_params_) { // Check whether Simple Signals and / or Point Forecasts are provided and create combined Signal-Matrix arma::mat S; @@ -763,20 +769,24 @@ using namespace Rcpp; S = as(Ext_F_.get()); } - // Check Nullable Objects for method 4 - if (method == 4 && (risk_aversion_.isNull() || min_weight_.isNull() || max_weight_.isNull())) { + // Check Nullable Objects for metric 4 + if (metric == 4 && portfolio_params_.isNull()) { throw std::invalid_argument("Error: Relevant parameter not provided!"); } - // Cast Nullable Objects for method 4 + // Cast Nullable Objects for metric 4 double risk_aversion = arma::datum::nan; double min_weight = arma::datum::nan; double max_weight = arma::datum::nan; - if (method == 4) { - // Cast to double - risk_aversion = as(risk_aversion_.get()); - min_weight = as(min_weight_.get()); - max_weight = as(max_weight_.get()); + if (metric == 4) { + // Cast to NumericVector and extract values + NumericVector combined_params = as(portfolio_params_.get()); + if (combined_params.size() != 3) { + throw std::invalid_argument("Error: portfolio_params_ must contain exactly 3 elements!"); + } + risk_aversion = combined_params[0]; + min_weight = combined_params[1]; + max_weight = combined_params[2]; } // Number of Candiate Models and Signals @@ -830,7 +840,7 @@ using namespace Rcpp; init_tvc_results = init_tvc_(y, S, n_raw_sig, - sample_length, + init, lambda_grid, kappa_grid, bias); @@ -866,34 +876,34 @@ using namespace Rcpp; // Check for NA-Values in Candidate Models in t new_na_cm.clear(); int ctr = 0; - for (unsigned int l = 0; l < lambda_grid.n_elem; l++) { - for (unsigned int k = 0; k < kappa_grid.n_elem; k++) { - for (unsigned int j = 0; j < S.n_cols; j++) { + for (unsigned int l = 0; l < lambda_grid.n_elem; l++) { + for (unsigned int k = 0; k < kappa_grid.n_elem; k++) { + for (unsigned int j = 0; j < S.n_cols; j++) { - // Check and Count for NA-Values - if (!arma::is_finite(s_t(j))) { - new_na_cm.insert_rows(new_na_cm.n_rows, 1); - new_na_cm(new_na_cm.n_rows-1) = ctr; - } - ctr++; + // Check and Count for NA-Values + if (!arma::is_finite(s_t(j))) { + new_na_cm.insert_rows(new_na_cm.n_rows, 1); + new_na_cm(new_na_cm.n_rows-1) = ctr; } - } - } + ctr++; + } + } + } // Identify Candidate Models that went to Non-Na if (new_na_cm.n_elem < current_na_cm.n_elem) { // Get the Index for the Signals that are not NA anymore - arma::uvec vec_diff = my_setdiff(new_na_cm, current_na_cm); + arma::uvec vec_diff = my_setdiff_(new_na_cm, current_na_cm); current_na_cm = new_na_cm; for (unsigned int g=0; g::from(score_cands(g))); // 0.0; // -> Insert Value !!! + score_cands(g)(i) = compute_median_(arma::conv_to::from(score_cands(g))); // 0.0; // -> Insert Value !!! } } } // Check for Burn-In-Period - if (t == (burn_in_tvc-1)) { + if (t == (burn_in-1)) { arma::field> init_dsc_results_after_burn_in = dsc_init_(n_cands, n_combs, gamma_grid.n_elem, current_na_cm); score_cands = init_dsc_results_after_burn_in(0); @@ -941,7 +951,7 @@ using namespace Rcpp; forecast_tvc_pred, variance_tvc_pred, delta, - method, + metric, equal_weight, incl_idx, risk_aversion, diff --git a/src/stsc_parallel.cpp b/src/stsc_parallel.cpp index 3c76f4c..1dee4fe 100644 --- a/src/stsc_parallel.cpp +++ b/src/stsc_parallel.cpp @@ -9,7 +9,7 @@ using namespace RcppThread; List init_tvc_par_(const arma::vec& y, const arma::mat& S, int n_raw_sig, - int sample_length, + int init, arma::vec lambda_grid, arma::vec kappa_grid, bool bias, @@ -17,19 +17,20 @@ using namespace RcppThread; // Get Dimensions const int n_signal = S.n_cols; - const int n_cands = n_signal * lambda_grid.n_elem * kappa_grid.n_elem; + const int n_cands = n_signal * lambda_grid.n_elem * kappa_grid.n_elem; // Define Variables - arma::cube theta_cube(2, 1, n_cands); - arma::cube cov_mat_cube(2, 2, n_cands); + arma::cube theta_cube(2, 1, n_cands); + arma::cube cov_mat_cube(2, 2, n_cands); arma::rowvec h_vec(n_cands); - arma::vec y_sample, cm_na_idx(n_cands); cm_na_idx.fill(arma::datum::nan); // x - List ret_all(4); + arma::vec cm_na_idx(n_cands); cm_na_idx.fill(arma::datum::nan); + List ret_all(4); // Define Counter Cube arma::cube ctr_cube(lambda_grid.n_elem, kappa_grid.n_elem, n_signal); unsigned int ctr = 0; - // fill ctr cube according to loop + + // Fill ctr cube according to loop for (unsigned int l = 0; l < lambda_grid.n_elem; l++) { for (unsigned int k = 0; k < kappa_grid.n_elem; k++) { for (unsigned int j = 0; j < n_signal; j++) { @@ -40,18 +41,18 @@ using namespace RcppThread; } // Parallel Loop over lambdas - parallelFor(0, lambda_grid.n_elem, [&y, &S, &n_cands, &sample_length, + parallelFor(0, lambda_grid.n_elem, [&y, &S, &init, &kappa_grid, &bias, &n_signal, &n_raw_sig, - &theta_cube, &cov_mat_cube, &h_vec, + &theta_cube, &cov_mat_cube, &h_vec, &cm_na_idx, &ctr_cube] (unsigned int l) { // Define variables - arma::vec x; - arma::uvec init_idx, non_finite; - arma::vec y_sample, cm_na_idx(n_cands); cm_na_idx.fill(arma::datum::nan); - arma::mat x_sample_one, theta, cov_mat; + arma::vec x; + arma::uvec init_idx, non_finite; + arma::vec y_sample; + arma::mat x_sample_one, theta, cov_mat; arma::colvec coef; - double intercept, var_y, var_x, h; + double intercept, var_y, var_x, h; // Loop over Kappas and Sinals for (unsigned int k = 0; k < kappa_grid.n_elem; k++) { @@ -64,12 +65,11 @@ using namespace RcppThread; non_finite = arma::find_nonfinite(x); int na_ctr = non_finite.n_elem; if (na_ctr > 0) { - // get lth lement from ctr cm_na_idx(ctr_cube(l, k, j)) = ctr_cube(l, k, j); } // Index for subsetting the initialisation sample - init_idx = arma::regspace(0 + na_ctr, na_ctr + sample_length - 1); + init_idx = arma::regspace(0 + na_ctr, na_ctr + init - 1); // Define and prepare matrices for regression y_sample = y.elem(init_idx); @@ -81,20 +81,18 @@ using namespace RcppThread; theta = arma::zeros(2,1); // Initialize - System Covariance - coef = solve(x_sample, y_sample); + coef = solve(x_sample, y_sample); intercept = coef(0); - var_y = arma::var(y_sample); - var_x = arma::var(x_sample.col(1)); - cov_mat = arma::zeros(2, 2); + var_y = arma::var(y_sample); + var_x = arma::var(x_sample.col(1)); + cov_mat = arma::zeros(2, 2); + + // Set Intercept Variance cov_mat(0, 0) = pow(intercept, 2) + var_y; // Set to Zero for Constant Intercept - // Distinguish between raw and processed signals + // Distinguish between P- and F-Signals if(j < n_raw_sig) { - if(var_x != 0.0) { - cov_mat(1, 1) = var_y / var_x; - } else { - cov_mat(1, 1) = var_y; - } + cov_mat(1, 1) = (var_x != 0.0) ? var_y / var_x : var_y; } else { theta(1, 0) = 1.0; // -> Slope Coefficient 1.0 cov_mat(1, 1) = 0.0; // -> Constant Slope Coefficient @@ -108,9 +106,9 @@ using namespace RcppThread; // Fill Cubes unsigned int index = ctr_cube(l, k, j); - theta_cube.slice(index) = theta; + theta_cube.slice(index) = theta; cov_mat_cube.slice(index) = cov_mat; - h_vec(index) = h; + h_vec(index) = h; } } }, n_threads); @@ -142,28 +140,28 @@ using namespace RcppThread; arma::field ret(2); // Get Signal for time t and t + 1 - const arma::mat z_t = {1.0, s_t_j}; - const arma::mat z_pred = {1.0, s_pred_j}; + arma::rowvec z_t = {1.0, s_t_j}; + arma::rowvec z_pred = {1.0, s_pred_j}; // Add noise to uncertainty of coefficients in time t (see Equation 5) - const arma::mat r_upt = cov_mat / lambda; + arma::mat r_upt = cov_mat / lambda; // Calculate (OOS) Forecast Error for time t (see Equation 7) - const double e_t = arma::as_scalar(y_t - z_t * theta); + double e_t = y_t - arma::dot(z_t, theta); // Update Observational Variance in time t (see Equation 10 and 11) h = arma::as_scalar(kappa * h + (1 - kappa) * pow(e_t, 2)); // Update Coefficients in time t (see Equation 7) - const double inv_tvar = arma::as_scalar(1.0 / (h + z_t * r_upt * z_t.t())); - theta = theta + r_upt * z_t.t() * inv_tvar * e_t; + double inv_tvar = arma::as_scalar(1.0 / (h + z_t * r_upt * z_t.t())); + theta += r_upt * z_t.t() * inv_tvar * e_t; // Update Uncertainty of Coefficients in time t (see Equation 8) cov_mat = r_upt - r_upt * z_t.t() * inv_tvar * (z_t * r_upt); // Get Predictive Density for Predicting t + 1 (see Equation 9) - const double mu = arma::as_scalar(z_pred * theta); - const double variance = arma::as_scalar(h + z_pred * ((1.0 / lambda) * cov_mat) * z_pred.t()); + double mu = arma::dot(z_pred, theta); + double variance = arma::as_scalar(h + z_pred * ((1.0 / lambda) * cov_mat) * z_pred.t()); // Fill Return-Field ret(0) = mu; @@ -194,7 +192,6 @@ using namespace RcppThread; arma::rowvec mu_vec(n_cands); arma::rowvec variance_vec(n_cands); - // Loop over Lambda // Define Counter Cube arma::cube ctr_cube(lambda_grid.n_elem, kappa_grid.n_elem, s_t.n_elem); unsigned int ctr = 0; @@ -222,7 +219,7 @@ using namespace RcppThread; for (unsigned int k = 0; k < kappa_grid.n_elem; k++) { // Set Kappa - const double kappa = kappa_grid(k); + const double kappa = kappa_grid(k); // Loop over all candidates for (unsigned int j = 0; j < s_t.n_elem; j++) { @@ -231,12 +228,11 @@ using namespace RcppThread; unsigned int counter = ctr_cube(l, k, j); // Set Signals - const double s_t_j = s_t(j); + const double s_t_j = s_t(j); const double s_pred_j = s_pred(j); // Check if signal is NA or not - const bool is_na = !arma::is_finite(s_t_j); - if(!is_na) { + if(arma::is_finite(s_t_j)) { // Apply TVC-Function const arma::field tvc_results = tvc_model_par_(y_t, @@ -248,13 +244,11 @@ using namespace RcppThread; cov_mat_cube.slice(counter), h_vec(counter)); // Assign TVC-Model-Results - mu_vec(counter) = tvc_results(0); + mu_vec(counter) = tvc_results(0); variance_vec(counter) = tvc_results(1); - } else { - // Assign TVC-Model-Results - mu_vec(counter) = arma::datum::nan; + mu_vec(counter) = arma::datum::nan; variance_vec(counter) = arma::datum::nan; } @@ -279,15 +273,15 @@ using namespace RcppThread; arma::field> dsc_init_par_(int n_cands, int n_combs, int n_gamma, - arma::uvec na_idx) { + arma::uvec& na_idx) { // Define Variables arma::field> ret(2); - // Initialize Vector for Performance-Score (Subset Combinations) -> Ranking + // Initialize Vector for Performance-Score (Subset Combinations) arma::rowvec score_combs(n_combs, arma::fill::zeros); - // Initialize Vector for Performance-Score (Candidate Models) -> Ranking + // Initialize Vector for Performance-Score (Candidate Models) arma::rowvec vec(n_cands, arma::fill::zeros); vec.elem(na_idx).fill(arma::datum::nan); @@ -306,6 +300,8 @@ using namespace RcppThread; return ret; } + +// 2) Dynamic Subset Combination // Function II - Rank and Set Active Model Set (Active Models) arma::field dsc_active_models_par_(const arma::rowvec& score_cands_gamma, int psi) { @@ -340,6 +336,8 @@ using namespace RcppThread; return ret; } + +// 2) Dynamic Subset Combination // Function III - Compute Aggregated Predictive Distribution arma::field dsc_agg_density_par_(const arma::rowvec& active_weights, const arma::rowvec& forecast_tvc_t, @@ -367,13 +365,14 @@ using namespace RcppThread; } +// 2) Dynamic Subset Combination // Function IV - Calculate (exponentially down-weighted) Performance Scores (-> Ranking) for Candidate Forecasts void dsc_score_cands_par_(arma::rowvec& score_cands_gamma, double y_t, const arma::rowvec& forecast_tvc_t, const arma::rowvec& variance_tvc_t, double gamma, - int method, + int metric, double risk_aversion, double min_weight, double max_weight) { @@ -388,8 +387,8 @@ using namespace RcppThread; // Check for NA value if (arma::is_finite(forecast_tvc_t(i))) { - // Optimization-Method - switch (method) { + // Optimization-metric + switch (metric) { case 1: { // Predictive-Log-Likelihoods performance_score(i) = arma::log_normpdf(y_t, @@ -416,22 +415,16 @@ using namespace RcppThread; double weight = std::min(std::max(w, min_weight), max_weight); // Returns - if (weight * y_t <= -1.0) { - performance_score(i) = -10000; - } else { - performance_score(i) = log(1.0 + weight * y_t); - } + performance_score(i) = (weight * y_t <= -1.0) ? -10000 : std::log(1.0 + weight * y_t); break; } case 5: { // Continuous-Ranked-Probability-Scores // Convert - double obs = y_t; - double mu = forecast_tvc_t(i); double sig = pow(variance_tvc_t(i), 0.5); // Standardize Prediction Error - double z = (obs - mu) / sig; + double z = (y_t - forecast_tvc_t(i)) / sig; // PDF evaluated at normalized Prediction Error double pdf = arma::normpdf(z); @@ -450,7 +443,7 @@ using namespace RcppThread; break; } default: - throw std::invalid_argument("Error: Method not available"); + throw std::invalid_argument("Error: Metric not available"); } } } @@ -459,6 +452,8 @@ using namespace RcppThread; score_cands_gamma = score_cands_gamma * gamma + performance_score * gamma; } + +// 2) Dynamic Subset Combination // Function V - Rank and Select Aggregated Forecast arma::field rank_comb_par_(const arma::rowvec& score_combs, const arma::rowvec& mu_comb_vec, @@ -471,10 +466,8 @@ using namespace RcppThread; const arma::uword best_idx = index_max(score_combs); // Select STSC-Forecast - auto it_mu = mu_comb_vec.begin() + best_idx; - auto it_var = variance_comb_vec.begin() + best_idx; - const double forecast_stsc = *it_mu; - const double variance_stsc = *it_var; + const double forecast_stsc = mu_comb_vec(best_idx); + const double variance_stsc = variance_comb_vec(best_idx); // Fill Field ret(0) = forecast_stsc; @@ -486,13 +479,14 @@ using namespace RcppThread; } +// 2) Dynamic Subset Combination // Function VI - Calculate (exponentially down-weighted) performance scores (-> ranking) for combinations (Aggregated Predictive Distributions) void dsc_score_comb_par_(arma::rowvec& score_combs, double y_t, const arma::rowvec& forecasts_comb, const arma::rowvec& variances_comb, double delta, - int method, + int metric, double risk_aversion, double min_weight, double max_weight) { @@ -504,8 +498,8 @@ using namespace RcppThread; // Calculate Performance for (unsigned int i=0; i dsc_loop_par_(arma::field& score_cands, arma::rowvec& score_combs, @@ -611,7 +603,7 @@ using namespace RcppThread; const arma::rowvec& forecast_tvc_t, const arma::rowvec& variance_tvc_t, double delta, - int method, + int metric, bool equal_weight, arma::uvec incl_idx, double risk_aversion, @@ -645,14 +637,14 @@ using namespace RcppThread; &gamma_grid, &chosen_cands, &equal_weight, &forecasts_comb, &variances_comb, &forecast_tvc_t, &variance_tvc_t, - &y_t, &method, &incl_idx, &risk_aversion, + &y_t, &metric, &incl_idx, &risk_aversion, &min_weight, &max_weight] (unsigned int g) { // Set Gamma const double gamma = gamma_grid(g); unsigned int ctr; - - // Define variables for local scope + + // Define variables for local scope arma::field active_models(2); arma::field agg_density(2); @@ -710,7 +702,7 @@ using namespace RcppThread; forecast_tvc_t, variance_tvc_t, gamma, - method, + metric, risk_aversion, min_weight, max_weight); @@ -732,7 +724,7 @@ using namespace RcppThread; forecasts_comb, variances_comb, delta, - method, + metric, risk_aversion, min_weight, max_weight); @@ -787,22 +779,20 @@ using namespace RcppThread; List stsc_loop_par_(const arma::vec& y, Nullable X_, Nullable Ext_F_, - int sample_length, + int init, arma::vec lambda_grid, arma::vec kappa_grid, - int burn_in_tvc, bool bias, arma::rowvec gamma_grid, arma::irowvec psi_grid, double delta, + int burn_in, int burn_in_dsc, - int method, + int metric, bool equal_weight, Nullable incl_, int n_threads, - Nullable risk_aversion_, - Nullable min_weight_, - Nullable max_weight_) { + Nullable portfolio_params_) { // Check whether Simple Signals and / or Point Forecasts are provided and create combined Signal-Matrix arma::mat S; @@ -822,20 +812,24 @@ using namespace RcppThread; S = as(Ext_F_.get()); } - // Check Nullable Objects for method 4 - if (method == 4 && (risk_aversion_.isNull() || min_weight_.isNull() || max_weight_.isNull())) { + // Check Nullable Objects for metric 4 + if (metric == 4 && portfolio_params_.isNull()) { throw std::invalid_argument("Error: Relevant parameter not provided!"); } - // Cast Nullable Objects for method 4 + // Cast Nullable Objects for metric 4 double risk_aversion = arma::datum::nan; double min_weight = arma::datum::nan; double max_weight = arma::datum::nan; - if (method == 4) { - // Cast to double - risk_aversion = as(risk_aversion_.get()); - min_weight = as(min_weight_.get()); - max_weight = as(max_weight_.get()); + if (metric == 4) { + // Cast to NumericVector and extract values + NumericVector combined_params = as(portfolio_params_.get()); + if (combined_params.size() != 3) { + throw std::invalid_argument("Error: portfolio_params_ must contain exactly 3 elements!"); + } + risk_aversion = combined_params[0]; + min_weight = combined_params[1]; + max_weight = combined_params[2]; } // Number of Candiate Models and Signals @@ -889,7 +883,7 @@ using namespace RcppThread; init_tvc_results = init_tvc_par_(y, S, n_raw_sig, - sample_length, + init, lambda_grid, kappa_grid, bias, @@ -942,9 +936,9 @@ using namespace RcppThread; // Identify Candidate Models that went to Non-Na if (new_na_cm.n_elem < current_na_cm.n_elem) { - // Get the Index for the Signals that are not NA anymore + // Get the Index for the Signals that are not NA anymore arma::uvec vec_diff = my_setdiff_par(new_na_cm, current_na_cm); - current_na_cm = new_na_cm; + current_na_cm = new_na_cm; for (unsigned int g=0; g::from(score_cands(g))); // 0.0; // -> Insert Value !!! @@ -953,7 +947,7 @@ using namespace RcppThread; } // Check for Burn-In-Period - if (t == (burn_in_tvc-1)) { + if (t == (burn_in-1)) { arma::field> init_dsc_results_after_burn_in = dsc_init_par_(n_cands, n_combs, gamma_grid.n_elem, current_na_cm); score_cands = init_dsc_results_after_burn_in(0); @@ -1002,7 +996,7 @@ using namespace RcppThread; forecast_tvc_pred, variance_tvc_pred, delta, - method, + metric, equal_weight, incl_idx, risk_aversion, @@ -1026,4 +1020,4 @@ using namespace RcppThread; // Return list return ret; -} \ No newline at end of file +} diff --git a/src/tvc.cpp b/src/tvc.cpp index a5b102b..dab635d 100644 --- a/src/tvc.cpp +++ b/src/tvc.cpp @@ -7,7 +7,7 @@ using namespace Rcpp; List init_tvc(const arma::vec& y, const arma::mat& S, int n_raw_sig, - int sample_length, + int init, arma::vec lambda_grid, arma::vec kappa_grid, bool bias) { @@ -41,7 +41,7 @@ using namespace Rcpp; int na_ctr = non_finite.n_elem; // Index for subsetting the Initialisation Sample - init_idx = arma::regspace(0 + na_ctr, na_ctr + sample_length - 1); + init_idx = arma::regspace(0 + na_ctr, na_ctr + init - 1); // Define and prepare matrices for regression y_sample = y.elem(init_idx); @@ -225,7 +225,7 @@ using namespace Rcpp; List tvc_(const arma::vec& y, Nullable X_, Nullable Ext_F_, - int sample_length, + int init, const arma::vec& lambda_grid, const arma::vec& kappa_grid, bool bias) { @@ -266,7 +266,7 @@ using namespace Rcpp; init_tvc_results = init_tvc(y, S, n_raw_sig, - sample_length, + init, lambda_grid, kappa_grid, bias); diff --git a/tests/testthat/test-dsc.R b/tests/testthat/test-dsc.R index 8129347..2c915c4 100644 --- a/tests/testthat/test-dsc.R +++ b/tests/testthat/test-dsc.R @@ -1,154 +1,365 @@ +########################################################### ### Simulate Data +# Set Seed set.seed(123) # Set Dimensions -numb_obs <- 500 -numb_mods <- 50 - -# Create Random Target-Variable -target_var <- rnorm(n = numb_obs, mean = 0, sd = 1) - -# Create Random Candidate-Forecast-Matrix -forecast_tvc <- replicate(numb_mods, rnorm(n = numb_obs, mean = 0, sd = 1), ) -f_tvc_names <- paste0("X", as.character(seq_len(ncol(forecast_tvc)))) -colnames(forecast_tvc) <- f_tvc_names - -# Create Random Candidate-Variance-Matrix -variance_tvc <- replicate(numb_mods, abs(rnorm(n = numb_obs, mean = 0, sd = 1)), ) -v_tvc_names <- paste0("F", as.character(seq_len(ncol(variance_tvc)))) -colnames(variance_tvc) <- v_tvc_names - -# Set DSC Parameter -nr_mods <- numb_mods -gamma_grid <- c(0.8, 0.9, 0.95, 0.99, 1) -psi_grid <- c(1, 2, 3) -delta <- 0.99 -n_cores <- 1 - -### Test for no NULLs -test_that("Test whether every input parameter is specified.", { - - testthat::expect_error(dsc(NULL, - psi_grid, - target_var, - forecast_tvc, - variance_tvc, - delta, - n_cores), - "not 'NULL'.", fixed = TRUE) - - testthat::expect_error(dsc(gamma_grid, - NULL, - target_var, - forecast_tvc, - variance_tvc, - delta, - n_cores), - "not 'NULL'.", fixed = TRUE) - - testthat::expect_error(dsc(gamma_grid, - psi_grid, - NULL, - forecast_tvc, - variance_tvc, - delta, - n_cores), - "not 'NULL'.", fixed = TRUE) - - testthat::expect_error(dsc(gamma_grid, - psi_grid, - target_var, - NULL, - variance_tvc, - delta, - n_cores), - "not 'NULL'.", fixed = TRUE) - - testthat::expect_error(dsc(gamma_grid, - psi_grid, - target_var, - forecast_tvc, - NULL, - delta, - n_cores), - "not 'NULL'.", fixed = TRUE) - - testthat::expect_error(dsc(gamma_grid, - psi_grid, - target_var, - forecast_tvc, - variance_tvc, - NULL, - n_cores), - "not 'NULL'.", fixed = TRUE) - - testthat::expect_error(dsc(gamma_grid, - psi_grid, - target_var, - forecast_tvc, - variance_tvc, - delta, - NULL), - "not 'NULL'.", fixed = TRUE) -}) +n_obs <- 500 +n_sigs <- 90 -### Test on Dimnames -test_that("Test whether Code still works without dimnames", { - - colnames(forecast_tvc) <- NULL - colnames(variance_tvc) <- NULL - testthat::expect_no_error(dsc(gamma_grid, - psi_grid, - target_var, - forecast_tvc, - variance_tvc, - delta, - n_cores)) -}) +### Simulate Data +# Generate Covariates +X <- matrix(rnorm(n_obs * n_sigs), nrow = n_obs, ncol = n_sigs) + +# Generate Beta-Coefficients +n_relevant <- 10 +beta <- runif(n_relevant, -1.0, 1.0) + +# Compute f(x) +f_x <- X[, seq(n_relevant)] %*% beta + +# Generate Error-Term +eps <- rnorm(n_obs) + +# Calculate Response +y <- as.matrix(f_x + eps, ncol = 1) +returns <- as.matrix(exp(f_x + eps), ncol = 1) + +# F-Signals +Ext_F <- matrix(rep(y, 10), nrow = n_obs, ncol = 10) + rnorm(n_obs * 10) + +# Add Names +colnames(X) <- paste0("X", seq_len(n_sigs)) +colnames(y) <- "response" +colnames(Ext_F) <- paste0("F", seq_len(10)) + +########################################################### +### STSC Parameter +# TV-C-Parameter +init <- 10 +lambda_grid <- c(0.95, 1.00) +kappa_grid <- c(0.95, 0.97) +bias <- TRUE + +# Set DSC-Parameter +gamma_grid <- c(0.9, 0.95, 1) +psi_grid <- c(1:10) +delta <- 0.95 +burn_in <- 5 +burn_in_dsc <- 10 +metric <- 5 +equal_weight <- TRUE +incl <- NULL + +# Parallel-Parameter +parallel <- FALSE +n_threads <- NULL + +# Set Portfolio-Parameter +portfolio_params <- c(3, 0, 2) + +########################################################### +### Create Density Forecast +# Apply TVC-Function +tvc_results <- tvc(y, + X, + Ext_F, + init, + lambda_grid, + kappa_grid, + bias) + +# Assign TVC-Results +forecast_tvc <- tvc_results$Forecasts$Point_Forecasts +variance_tvc <- tvc_results$Forecasts$Variance_Forecasts + +# Remove NAs +y <- y[-1, , drop = FALSE] +returns <- returns[-1, , drop = FALSE] +forecast_tvc <- forecast_tvc[-1, ] +variance_tvc <- variance_tvc[-1, ] + +########################################################### +### Test DSC (with test_that) +test_that("DSC-Function works correctly", { + + # Apply DSC-Function + results <- dsc(y, + forecast_tvc, + variance_tvc, + gamma_grid, + psi_grid, + delta, + burn_in, + burn_in_dsc, + 1, + equal_weight, + incl, + portfolio_params) + + # Apply DSC-Function + results <- dsc(y, + forecast_tvc, + variance_tvc, + gamma_grid, + psi_grid, + delta, + burn_in, + burn_in_dsc, + 2, + equal_weight, + incl, + portfolio_params) -### Output -test_that("Test whether the output has the right format", { + # Apply DSC-Function + results <- dsc(y, + forecast_tvc, + variance_tvc, + gamma_grid, + psi_grid, + delta, + burn_in, + burn_in_dsc, + 3, + equal_weight, + incl, + portfolio_params) - # Apply TVP-Function - results <- dsc(gamma_grid, - psi_grid, - target_var, - forecast_tvc, - variance_tvc, - delta, - n_cores) + # Apply DSC-Function + results <- dsc(returns, + forecast_tvc, + variance_tvc, + gamma_grid, + psi_grid, + delta, + burn_in, + burn_in_dsc, + 4, + equal_weight, + incl, + portfolio_params) - # List Contains Five Elements - testthat::expect_equal(length(results), 5) + # Apply DSC-Function + results <- dsc(y, + forecast_tvc, + variance_tvc, + gamma_grid, + psi_grid, + delta, + burn_in, + burn_in_dsc, + metric, + equal_weight, + incl, + portfolio_params) - # Number of Forecasts - checkmate::expect_numeric(results[[1]], - len = numb_obs, - any.missing = FALSE, + # List Contains three Elements + expect_equal(length(results), 3) + + # Forecasts List Contains three Elements + expect_equal(length(results$Forecasts), 3) + + # Point Forecasts + expect_numeric(results$Forecasts$Point_Forecasts, + len = n_obs - 1, finite = TRUE) - # Number of Variances - checkmate::expect_numeric(results[[2]], - len = numb_obs, - any.missing = FALSE, + # Variance Forecasts + expect_numeric(results$Forecasts$Variance_Forecasts, + len = n_obs - 1, lower = 0, finite = TRUE) - # Length of Gamma-Vector - checkmate::expect_numeric(results[[3]], - len = numb_obs, - any.missing = FALSE, + + # Realization + expect_numeric(results$Forecasts$Realization, + len = n_obs - 1, + finite = TRUE) + + # Tuning Parameters List Contains three Elements + expect_equal(length(results$Tuning_Parameters), 3) + + # Gamma-Vector + expect_numeric(results$Tuning_Parameters$Gamma, + len = n_obs - 1, + lower = min(gamma_grid), + upper = max(gamma_grid), + finite = TRUE) + + # Psi-Vector + expect_numeric(results$Tuning_Parameters$Psi, + len = n_obs - 1, + lower = min(psi_grid), + upper = max(psi_grid), + finite = TRUE) + + # CFM + expect_matrix(results$Tuning_Parameters$CFM, + mode = "integerish", + nrows = n_obs - 1, + ncols = ncol(forecast_tvc)) + + # Model List Contains 8 Elements + expect_equal(length(results$Model), 8) + + # Gamma Grid + expect_numeric(results$Model$Gamma_grid, + len = length(gamma_grid), + finite = TRUE) + + # Psi Grid + expect_numeric(results$Model$Psi_grid, + len = length(psi_grid), + finite = TRUE) + + # Delta + expect_numeric(results$Model$Delta, + len = 1, + finite = TRUE) + + # Burn-in + expect_numeric(results$Model$Burn_in, + len = 1, + finite = TRUE) + + # Burn-in DSC + expect_numeric(results$Model$Burn_in_dsc, + len = 1, + finite = TRUE) + + # Metric + expect_numeric(results$Model$Metric, + len = 1, + finite = TRUE) + + # Equal Weight + expect_equal(results$Model$Equal_weight, equal_weight) + + # Incl + expect_equal(results$Model$Incl, incl) +}) + +########################################################### +### Test DSC with inclusion +########################################################### +### Test STSC with inclusion +test_that("Test whether the STSC-Function works with inclusion", { + + # Set Inclusion + incl <- c(1, 2) + psi_grid <- c(8:20) + + # Apply DSC-Function + results <- dsc(y, + forecast_tvc, + variance_tvc, + gamma_grid, + psi_grid, + delta, + burn_in, + burn_in_dsc, + metric, + equal_weight, + incl, + portfolio_params) + + # Cut-Off + cut_off <- seq(max(burn_in, burn_in_dsc)) + + # List Contains three Elements + expect_equal(length(results), 3) + + # Forecasts List Contains three Elements + expect_equal(length(results$Forecasts), 3) + + # Point Forecasts + expect_numeric(results$Forecasts$Point_Forecasts, + len = n_obs - 1, + finite = TRUE) + + # Variance Forecasts + expect_numeric(results$Forecasts$Variance_Forecasts, + len = n_obs - 1, lower = 0, finite = TRUE) - # Length of Psi-Vector - checkmate::expect_numeric(results[[4]], - len = numb_obs, - any.missing = FALSE, + + # Tuning Parameters List Contains three Elements + expect_equal(length(results$Tuning_Parameters), 3) + + # Psi-Vector + expect_numeric(results$Tuning_Parameters$Psi, + len = n_obs - 1, + lower = min(psi_grid), + upper = max(psi_grid), + finite = TRUE) + + # CFM + expect_matrix(results$Tuning_Parameters$CFM, + mode = "integerish", + nrows = n_obs - 1, + ncols = ncol(forecast_tvc)) + + # Check if the CFMs in incl were really selected + for (i in incl) { + expect_true(all(results$Tuning_Parameters$CFM[-cut_off, i] > 0), + info = paste("Column", i, "contains zeros")) + } +}) + +########################################################### +### Test STSC with equal weight option +test_that("Test whether the STSC-Function works with equal weight option", { + + # Apply DSC-Function + results <- dsc(y, + forecast_tvc, + variance_tvc, + gamma_grid, + psi_grid, + delta, + burn_in, + burn_in_dsc, + metric, + FALSE, + incl, + portfolio_params) + + # List Contains three Elements + expect_equal(length(results), 3) + + # Forecasts List Contains three Elements + expect_equal(length(results$Forecasts), 3) + + # Point Forecasts + expect_numeric(results$Forecasts$Point_Forecasts, + len = n_obs - 1, + finite = TRUE) + + # Variance Forecasts + expect_numeric(results$Forecasts$Variance_Forecasts, + len = n_obs - 1, lower = 0, finite = TRUE) - # Dimension of selected Candidate Forecasts - checkmate::expect_matrix(results[[5]], + # Tuning Parameters List Contains three Elements + expect_equal(length(results$Tuning_Parameters), 3) + + # Gamma-Vector + expect_numeric(results$Tuning_Parameters$Gamma, + len = n_obs - 1, + lower = min(gamma_grid), + upper = max(gamma_grid), + finite = TRUE) + + # Psi-Vector + expect_numeric(results$Tuning_Parameters$Psi, + len = n_obs - 1, + lower = min(psi_grid), + upper = max(psi_grid), + finite = TRUE) + + # CFM + expect_matrix(results$Tuning_Parameters$CFM, mode = "integerish", - nrows = numb_obs, - ncols = numb_mods) + nrows = n_obs - 1, + ncols = ncol(forecast_tvc)) }) diff --git a/tests/testthat/test-stsc.R b/tests/testthat/test-stsc.R index e4621d7..c3f3cc8 100644 --- a/tests/testthat/test-stsc.R +++ b/tests/testthat/test-stsc.R @@ -1,1034 +1,1097 @@ ########################################################### ### Simulate Data -# Set Seed -set.seed(123) - # Set Dimensions -numb_obs <- 500 -numb_noise <- 49 -numb_signals <- numb_noise + 1 -numb_forc <- 50 - -# Set up Coefficient-Matrix -theta <- matrix(NA, nrow = numb_obs, ncol = numb_signals) +n_obs <- 500 +n_sigs <- 90 -# Loop over Time -for (t in seq_len(numb_obs)) { +### Simulate Data +# Generate Covariates +X <- matrix(rnorm(n_obs * n_sigs), nrow = n_obs, ncol = n_sigs) - ### Theta: Abrupt Change - theta[t, 1] <- ifelse((t > 200 & t < 450), -0.30, 0.30) +# Generate Beta-Coefficients +n_relevant <- 10 +beta <- runif(n_relevant, -1.0, 1.0) - ### Noise Predictors - theta[t, -1] <- 0 -} +# Compute f(x) +f_x <- X[, seq(n_relevant)] %*% beta -### Draw Simple Signals -raw_signals <- replicate(numb_signals, rnorm(numb_obs, 0, 0.5)) -colnames(raw_signals) <- paste0("X", as.character(seq_len(numb_signals))) +# Generate Error-Term +eps <- rnorm(n_obs) -### Draw Noise -eps <- rnorm(numb_obs, 0, 0.1) +# Calculate Response +y <- as.matrix(f_x + eps, ncol = 1) +returns <- as.matrix(exp(f_x + eps), ncol = 1) -### Compute Target Variable -y <- rowSums(cbind(theta * raw_signals, eps)) +# F-Signals +Ext_F <- matrix(rep(y, 10), nrow = n_obs, ncol = 10) + rnorm(n_obs * 10) -# Create 'External' Forecasts -f_signals <- y + replicate(numb_forc, rnorm(numb_obs, 0, 0.5)) -colnames(f_signals) <- paste0("F", as.character(seq_len(numb_forc))) -########################################################### +# Add Names +colnames(X) <- paste0("X", seq_len(n_sigs)) +colnames(y) <- "response" +colnames(Ext_F) <- paste0("F", seq_len(10)) ########################################################### ### STSC Parameter # TV-C-Parameter -sample_length <- 100 -lambda_grid <- c(0.95, 1.00) -kappa_grid <- 0.97 -bias <- TRUE +init <- 10 +lambda_grid <- c(0.95, 1.00) +kappa_grid <- c(0.95, 0.97) +bias <- TRUE # Set DSC-Parameter -gamma_grid <- c(0.9, 0.95, 1) -psi_grid <- c(1:10) -delta <- 0.95 -incl <- NULL - -# Set Method-Parameter -burn_in_tvc <- 5 -burn_in_dsc <- 5 -method <- 1 -equal_weight <- TRUE -parallel <- FALSE -n_threads <- NULL +gamma_grid <- c(0.9, 0.95, 1) +psi_grid <- c(1:10) +delta <- 0.95 +burn_in <- 5 +burn_in_dsc <- 10 +metric <- 5 +equal_weight <- TRUE +incl <- NULL + +# Parallel-Parameter +parallel <- FALSE +n_threads <- 1 # Set Portfolio-Parameter -risk_aversion <- 3 -min_weight <- 0 -max_weight <- 2 +portfolio_params <- c(3, 0, 2) ########################################################### +### Test STSC +test_that("Test whether the STSC-Function works", { -########################################################### -### Tests on Y -test_that("Test whether y is Numeric Vector", { - - y <- as.data.frame(y) - testthat::expect_error(stsc(y, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - method, - equal_weight, - incl, - parallel, - n_threads, - risk_aversion, - min_weight, - max_weight), - "Must be of type 'numeric', not 'data.frame'.", fixed = TRUE) -}) + apply_stsc <- function(y, metric) { + stsc(y, + X, + Ext_F, + init, + lambda_grid, + kappa_grid, + bias, + gamma_grid, + psi_grid, + delta, + burn_in, + burn_in_dsc, + metric, + equal_weight, + incl, + parallel, + n_threads, + portfolio_params) + } -test_that("Test whether y is not NULL", { - - y <- NULL - testthat::expect_error(stsc(y, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - method, - equal_weight, - incl, - parallel, - n_threads, - risk_aversion, - min_weight, - max_weight), - "Must be of type 'numeric', not 'NULL'.", fixed = TRUE) -}) + check_results <- function(results, y) { + # List Contains three Elements + expect_equal(length(results), 3) -test_that("Test whether y has only numeric values", { - - y[10] <- "test" - testthat::expect_error(stsc(y, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - method, - equal_weight, - incl, - parallel, - n_threads, - risk_aversion, - min_weight, - max_weight), - "Must be of type 'numeric', not 'character'.", fixed = TRUE) -}) + # Forecasts List Contains three Elements + expect_equal(length(results$Forecasts), 3) -test_that("Test whether y has no NA-Values", { - - y[10] <- NA - testthat::expect_error(stsc(y, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - method, - equal_weight, - incl, - parallel, - n_threads, - risk_aversion, - min_weight, - max_weight), - "Contains missing values", fixed = TRUE) -}) + # Point Forecasts + expect_numeric(results$Forecasts$Point_Forecasts, len = n_obs, finite = TRUE) -### Tests on X -test_that("Test whether x is matrix", { - - raw_signals <- as.data.frame(raw_signals) - testthat::expect_error(stsc(y, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - method, - equal_weight, - incl, - parallel, - n_threads, - risk_aversion, - min_weight, - max_weight), - "Must be of type 'matrix' (or 'NULL')", fixed = TRUE) -}) + # Variance Forecasts + expect_numeric(results$Forecasts$Variance_Forecasts, len = n_obs, lower = 0, finite = TRUE) -test_that("Test whether x has the same number of observations as y", { - - raw_signals <- raw_signals[1:10, ] - testthat::expect_error(stsc(y, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - method, - equal_weight, - incl, - parallel, - n_threads, - risk_aversion, - min_weight, - max_weight), - "Must have exactly", fixed = TRUE) -}) + # Realization + expect_equal(results$Forecasts$Realization, y) -test_that("Test whether exception works when cov_mat cannot be initialised", { - - raw_signals[1:100, 10] <- 0 - testthat::expect_no_error(stsc(y, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - method, - equal_weight, - incl, - parallel, - n_threads, - risk_aversion, - min_weight, - max_weight)) - - f_signals[1:100, 10] <- 0 - testthat::expect_no_error(stsc(y, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - method, - equal_weight, - incl, - parallel, - n_threads, - risk_aversion, - min_weight, - max_weight)) -}) + # Tuning Parameters List Contains five Elements + expect_equal(length(results$Tuning_Parameters), 5) -### Tests on f -test_that("Test whether f is matrix", { - - f_signals <- as.data.frame(f_signals) - testthat::expect_error(stsc(y, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - method, - equal_weight, - incl, - parallel, - n_threads, - risk_aversion, - min_weight, - max_weight), - "Must be of type 'matrix' (or 'NULL')", fixed = TRUE) -}) + # Gamma-Vector + expect_numeric(results$Tuning_Parameters$Gamma, len = n_obs, lower = min(gamma_grid), upper = max(gamma_grid), finite = TRUE) -test_that("Test whether f has the same number of observations as y", { - - f_signals <- f_signals[1:10, ] - testthat::expect_error(stsc(y, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - method, - equal_weight, - incl, - parallel, - n_threads, - risk_aversion, - min_weight, - max_weight), - "Must have exactly", fixed = TRUE) -}) + # Psi-Vector + expect_numeric(results$Tuning_Parameters$Psi, len = n_obs, lower = min(psi_grid), upper = max(psi_grid), finite = TRUE) -### Tests on x and f -test_that("Test whether either x or f is provided", { - - raw_signals <- NULL - f_signals <- NULL - testthat::expect_error(stsc_loop_(y, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - method, - equal_weight, - incl, - risk_aversion, - min_weight, - max_weight), - "Error: No signals provided", - fixed = TRUE) - - testthat::expect_error(stsc(y, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - method, - equal_weight, - incl, - parallel, - n_threads, - risk_aversion, - min_weight, - max_weight), - "Assertion failed. One of the following must apply: - * checkmate::checkMatrix(X): Must be of type 'matrix', not 'NULL' - * checkmate::checkMatrix(Ext_F): Must be of type 'matrix', not 'NULL'", - fixed = TRUE) -}) + # Signals + expect_matrix(results$Tuning_Parameters$Signals, mode = "integerish", nrows = n_obs, ncols = (ncol(X) + ncol(Ext_F))) -test_that("Test whether Code still works with only raw signals / only point forecasts", { - - testthat::expect_no_error(stsc(y, - raw_signals, - NULL, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - method, - equal_weight, - incl, - parallel, - n_threads, - risk_aversion, - min_weight, - max_weight)) - - testthat::expect_no_error(stsc(y, - NULL, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - method, - equal_weight, - incl, - parallel, - n_threads, - risk_aversion, - min_weight, - max_weight)) -}) + # Lambda-Vector + expect_matrix(results$Tuning_Parameters$Lambda, mode = "integerish", nrows = n_obs, ncols = length(lambda_grid)) -test_that("Test whether Code still works with NA-values", { - - raw_signals[1:20, c(1, 3, 5)] <- NA - testthat::expect_no_error(stsc(y, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - method, - equal_weight, - incl, - parallel, - n_threads, - risk_aversion, - min_weight, - max_weight)) -}) + # Kappa-Vector + expect_matrix(results$Tuning_Parameters$Kappa, mode = "integerish", nrows = n_obs, ncols = length(kappa_grid)) -test_that("Test whether Code still works without dimnames", { - - colnames(raw_signals) <- NULL - colnames(f_signals) <- NULL - testthat::expect_no_error(stsc(y, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - method, - equal_weight, - incl, - parallel, - n_threads, - risk_aversion, - min_weight, - max_weight)) -}) + # Model List Contains 12 Elements + expect_equal(length(results$Model), 12) -### Tests on Methods -test_that("Test whether Code still works with different methods", { - - testthat::expect_no_error(stsc(y, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - method = 2, - equal_weight, - incl, - parallel, - n_threads, - risk_aversion, - min_weight, - max_weight)) - - testthat::expect_no_error(stsc(y, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - method = 3, - equal_weight, - incl, - parallel, - n_threads, - risk_aversion, - min_weight, - max_weight)) - - testthat::expect_no_error(stsc(y, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - method = 4, - equal_weight, - incl, - parallel, - n_threads, - risk_aversion, - min_weight, - max_weight)) - - testthat::expect_no_error(stsc(y, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - method = 5, - equal_weight, - incl, - parallel, - n_threads, - risk_aversion, - min_weight, - max_weight)) + # Lambda Grid + expect_equal(results$Model$Lambda_grid, lambda_grid) -}) + # Kappa Grid + expect_equal(results$Model$Kappa_grid, kappa_grid) -test_that("Test whether method is of given set", { - - method <- 6 - testthat::expect_error(stsc(y, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - method, - equal_weight, - incl, - parallel, - n_threads, - risk_aversion, - min_weight, - max_weight), - "Must be element of set {'1','2','3','4','5'}", - fixed = TRUE) - - testthat::expect_error(stsc_loop_(y, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - method, - equal_weight, - incl, - risk_aversion, - min_weight, - max_weight), - "Error: Method not available", fixed = TRUE) -}) + # Gamma Grid + expect_equal(results$Model$Gamma_grid, gamma_grid) -### Tests on Equal_weight -test_that("Tests on equal_weight", { - - equal_weight <- "True" - testthat::expect_error(stsc(y, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - method, - equal_weight, - incl, - parallel, - n_threads, - risk_aversion, - min_weight, - max_weight), - "Must be of type 'logical'", fixed = TRUE) - - equal_weight <- FALSE - testthat::expect_no_error(stsc(y, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - method, - equal_weight, - incl, - parallel, - n_threads, - risk_aversion, - min_weight, - max_weight)) -}) + # Psi Grid + expect_equal(results$Model$Psi_grid, psi_grid) + + # Delta + expect_equal(results$Model$Delta, delta) + + # Init + expect_equal(results$Model$Init, init) + + # Burn-in + expect_equal(results$Model$Burn_in, burn_in) -### Tests on Return Parameter -test_that("Test whether relevant parameter are provided", { - - method <- 4 - risk_aversion <- NULL - min_weight <- NULL - max_weight <- NULL - testthat::expect_error(stsc(y, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - method, - equal_weight, - incl, - parallel, - n_threads, - risk_aversion, - min_weight, - max_weight), - "Must be of type 'number', not 'NULL'", - fixed = TRUE) - - testthat::expect_error(stsc_loop_(y, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - method, - equal_weight, - incl, - risk_aversion, - min_weight, - max_weight), - "Error: Relevant parameter not provided!", - fixed = TRUE) + # Burn-in DSC + expect_equal(results$Model$Burn_in_dsc, burn_in_dsc) + + # Metric + expect_numeric(results$Model$Metric, len = 1, lower = 1, upper = 5) + + # Equal Weight + expect_equal(results$Model$Equal_weight, equal_weight) + + # Bias + expect_equal(results$Model$Bias, bias) + + # Incl + expect_equal(results$Model$Incl, incl) + } + + # Apply STSC-Function + results1 <- apply_stsc(y, 1) + results2 <- apply_stsc(y, 2) + results3 <- apply_stsc(y, 3) + results4 <- apply_stsc(returns, 4) + results5 <- apply_stsc(y, 5) + + # Check results + check_results(results1, y) + check_results(results2, y) + check_results(results3, y) + check_results(results4, returns) + check_results(results5, y) }) +########################################################### ### Test STSC-Parallel -test_that("Test parallel function", { - - parallel <- TRUE - n_threads <- NULL - testthat::expect_no_error(stsc(y, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - method, - equal_weight, - incl, - parallel, - n_threads, - risk_aversion, - min_weight, - max_weight)) - - testthat::expect_no_error(stsc(y, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - 2, - equal_weight, - incl, - parallel, - n_threads, - risk_aversion, - min_weight, - max_weight)) - - testthat::expect_no_error(stsc(y, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - 3, - equal_weight, - incl, - parallel, - n_threads, - risk_aversion, - min_weight, - max_weight)) - - testthat::expect_no_error(stsc(y, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - 4, - equal_weight, - incl, - parallel, - n_threads, - 3, - 0, - 2)) - - # Apply Parallel-STSC-Function - parallel <- TRUE - n_threads <- 1 - stsc_par_results <- stsc(y, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - method, - equal_weight, - incl, - parallel, - n_threads, - risk_aversion, - min_weight, - max_weight) - - # Apply Parallel-STSC-Function - parallel <- FALSE - n_threads <- NULL - stsc_results <- stsc(y, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - method, - equal_weight, - incl, - parallel, - n_threads, - risk_aversion, - min_weight, - max_weight) +test_that("Test whether the STSC-Parallel-Function works", { - # Compare Forecasts - testthat::expect_equal(round(sum(na.omit(stsc_results[[1]])), 20), - round(sum(na.omit(stsc_par_results[[1]])), 20)) + apply_stsc <- function(y, metric) { + stsc(y, + X, + Ext_F, + init, + lambda_grid, + kappa_grid, + bias, + gamma_grid, + psi_grid, + delta, + burn_in, + burn_in_dsc, + metric, + equal_weight, + incl, + TRUE, + 1, + portfolio_params) + } - # Compare Variances - testthat::expect_equal(round(sum(na.omit(stsc_results[[2]])), 20), - round(sum(na.omit(stsc_par_results[[2]])), 20)) + check_results <- function(results, y) { + # List Contains three Elements + expect_equal(length(results), 3) - # Compare Gammas - testthat::expect_equal(na.omit(stsc_results[[3]]), - na.omit(stsc_par_results[[3]])) + # Forecasts List Contains three Elements + expect_equal(length(results$Forecasts), 3) - # Compare Psis - testthat::expect_equal(na.omit(stsc_results[[4]]), - na.omit(stsc_par_results[[4]])) -}) + # Point Forecasts + expect_numeric(results$Forecasts$Point_Forecasts, len = n_obs, finite = TRUE) -test_that("Test parallel function", { - - parallel <- TRUE - n_threads <- 1 - raw_signals[1:10, 1] <- 0 - raw_signals[1:100, 3] <- 0 - raw_signals[1:20, c(2, 5, 7)] <- NA - f_signals[1:30, 1] <- NA - testthat::expect_no_error(stsc(y, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - method, - equal_weight, - incl, - parallel, - n_threads, - risk_aversion, - min_weight, - max_weight)) -}) + # Variance Forecasts + expect_numeric(results$Forecasts$Variance_Forecasts, len = n_obs, lower = 0, finite = TRUE) + + # Realization + expect_equal(results$Forecasts$Realization, y) + + # Tuning Parameters List Contains five Elements + expect_equal(length(results$Tuning_Parameters), 5) + + # Gamma-Vector + expect_numeric(results$Tuning_Parameters$Gamma, len = n_obs, lower = min(gamma_grid), upper = max(gamma_grid), finite = TRUE) + + # Psi-Vector + expect_numeric(results$Tuning_Parameters$Psi, len = n_obs, lower = min(psi_grid), upper = max(psi_grid), finite = TRUE) + + # Signals + expect_matrix(results$Tuning_Parameters$Signals, mode = "integerish", nrows = n_obs, ncols = (ncol(X) + ncol(Ext_F))) + + # Lambda-Vector + expect_matrix(results$Tuning_Parameters$Lambda, mode = "integerish", nrows = n_obs, ncols = length(lambda_grid)) + + # Kappa-Vector + expect_matrix(results$Tuning_Parameters$Kappa, mode = "integerish", nrows = n_obs, ncols = length(kappa_grid)) + + # Model List Contains 12 Elements + expect_equal(length(results$Model), 12) + + # Lambda Grid + expect_equal(results$Model$Lambda_grid, lambda_grid) + + # Kappa Grid + expect_equal(results$Model$Kappa_grid, kappa_grid) + + # Gamma Grid + expect_equal(results$Model$Gamma_grid, gamma_grid) + + # Psi Grid + expect_equal(results$Model$Psi_grid, psi_grid) + + # Delta + expect_equal(results$Model$Delta, delta) + + # Init + expect_equal(results$Model$Init, init) -### Output -test_that("Test whether the output has the right format", { + # Burn-in + expect_equal(results$Model$Burn_in, burn_in) + + # Burn-in DSC + expect_equal(results$Model$Burn_in_dsc, burn_in_dsc) + + # Metric + expect_numeric(results$Model$Metric, len = 1, lower = 1, upper = 5) + + # Equal Weight + expect_equal(results$Model$Equal_weight, equal_weight) + + # Bias + expect_equal(results$Model$Bias, bias) + + # Incl + expect_equal(results$Model$Incl, incl) + } # Apply STSC-Function - results <- stsc(y, - raw_signals, - f_signals, - sample_length, + results1 <- apply_stsc(y, 1) + results2 <- apply_stsc(y, 2) + results3 <- apply_stsc(y, 3) + results4 <- apply_stsc(returns, 4) + results5 <- apply_stsc(y, 5) + + # Check results + check_results(results1, y) + check_results(results2, y) + check_results(results3, y) + check_results(results4, returns) + check_results(results5, y) +}) + +############################################################# +### Test for same results between STSC and STSC-Parallel for different metrics +test_that("Test whether the STSC-Function and STSC-Parallel-Function return the same results", { + + for (m in seq(5)) { + + # Use returns instead of y if m == 4 + y_input <- if (m == 4) returns else y + + # Apply STSC-Function + results <- stsc(y_input, + X, + Ext_F, + init, lambda_grid, kappa_grid, - burn_in_tvc, bias, gamma_grid, psi_grid, delta, + burn_in, burn_in_dsc, - method, + m, equal_weight, incl, parallel, n_threads, - risk_aversion, - min_weight, - max_weight) + portfolio_params) + + # Apply STSC-Parallel-Function + results_par <- stsc(y_input, + X, + Ext_F, + init, + lambda_grid, + kappa_grid, + bias, + gamma_grid, + psi_grid, + delta, + burn_in, + burn_in_dsc, + m, + equal_weight, + incl, + TRUE, + n_threads, + portfolio_params) + + # Forecasts + expect_equal(results$Forecasts$Point_Forecasts, + results_par$Forecasts$Point_Forecasts, + info = paste("Mismatch in Point_Forecasts for m =", m)) + + expect_equal(results$Forecasts$Variance_Forecasts, + results_par$Forecasts$Variance_Forecasts, + info = paste("Mismatch in Variance_Forecasts for m =", m)) + + expect_equal(results$Forecasts$Realization, + results_par$Forecasts$Realization, + info = paste("Mismatch in Realization for m =", m)) + + # Tuning Parameters + expect_equal(results$Tuning_Parameters$Gamma, + results_par$Tuning_Parameters$Gamma, + info = paste("Mismatch in Gamma for m =", m)) + + expect_equal(results$Tuning_Parameters$Psi, + results_par$Tuning_Parameters$Psi, + info = paste("Mismatch in Psi for m =", m)) + + expect_equal(results$Tuning_Parameters$Signals, + results_par$Tuning_Parameters$Signals, + info = paste("Mismatch in Signals for m =", m)) + + expect_equal(results$Tuning_Parameters$Lambda, + results_par$Tuning_Parameters$Lambda, + info = paste("Mismatch in Lambda for m =", m)) + + expect_equal(results$Tuning_Parameters$Kappa, + results_par$Tuning_Parameters$Kappa, + info = paste("Mismatch in Kappa for m =", m)) + } +}) + +########################################################### +### Test same results between STSC and TVC/DSC +test_that("Test whether the STSC-Function and TVC/DSC-Function return the same results", { + + # Apply STSC-Function + results <- stsc(y, + X, + Ext_F, + init, + lambda_grid, + kappa_grid, + bias, + gamma_grid, + psi_grid, + delta, + burn_in, + burn_in_dsc, + metric, + equal_weight, + incl, + parallel, + n_threads, + portfolio_params) + + # Apply STSC-Function + results_par <- stsc(y, + X, + Ext_F, + init, + lambda_grid, + kappa_grid, + bias, + gamma_grid, + psi_grid, + delta, + burn_in, + burn_in_dsc, + metric, + equal_weight, + incl, + TRUE, + n_threads, + portfolio_params) + + # Apply TVC-Function + tvc_results <- hdflex::tvc(y, + X, + Ext_F, + init, + lambda_grid, + kappa_grid, + bias) + + # Assign TVC-Results + forecast_tvc <- tvc_results$Forecasts$Point_Forecasts + variance_tvc <- tvc_results$Forecasts$Variance_Forecasts + + # First complete forecast period (no missing values) + sub_period <- seq(which(complete.cases(forecast_tvc))[1], nrow(y)) + + ### Part 2: DSC-Function + # Apply DSC-Function + dsc_results <- hdflex::dsc(y[sub_period], + forecast_tvc[sub_period, , drop = FALSE], + variance_tvc[sub_period, , drop = FALSE], + gamma_grid, + psi_grid, + delta, + burn_in, + burn_in_dsc, + metric, + equal_weight, + incl, + NULL) + + # Forecasts + expect_true( + all( + all.equal( + na.omit(results$Forecasts$Point_Forecasts), + na.omit(results_par$Forecasts$Point_Forecasts), + check.attributes = FALSE, + ) == TRUE, + all.equal( + na.omit(results$Forecasts$Point_Forecasts), + na.omit(dsc_results$Forecasts$Point_Forecasts), + check.attributes = FALSE, + ) == TRUE + ) + ) + + expect_true( + all( + all.equal( + na.omit(results$Forecasts$Variance_Forecasts), + na.omit(results_par$Forecasts$Variance_Forecasts), + check.attributes = FALSE, + ) == TRUE, + all.equal( + na.omit(results$Forecasts$Variance_Forecasts), + na.omit(dsc_results$Forecasts$Variance_Forecasts), + check.attributes = FALSE, + ) == TRUE + ) + ) + + # Tuning Parameters + expect_true( + all( + all.equal( + na.omit(results$Tuning_Parameters$Gamma), + na.omit(dsc_results$Tuning_Parameters$Gamma), + check.attributes = FALSE + ) == TRUE, + all.equal( + na.omit(results$Tuning_Parameters$Gamma), + na.omit(results_par$Tuning_Parameters$Gamma), + check.attributes = FALSE + ) == TRUE + ) + ) + + expect_true( + all( + all.equal( + na.omit(results$Tuning_Parameters$Psi), + na.omit(dsc_results$Tuning_Parameters$Psi), + check.attributes = FALSE + ) == TRUE, + all.equal( + na.omit(results$Tuning_Parameters$Psi), + na.omit(results_par$Tuning_Parameters$Psi), + check.attributes = FALSE + ) == TRUE + ) + ) +}) + +########################################################### +### Test STSC with missing values +test_that("Test whether the STSC-Function works with missing values", { + + # Set Missing Values + X[1:20, 1] <- NA + Ext_F[1:15, 1] <- NA + + # Apply STSC-Function + results <- stsc(y, + X, + Ext_F, + init, + lambda_grid, + kappa_grid, + bias, + gamma_grid, + psi_grid, + delta, + burn_in, + burn_in_dsc, + metric, + equal_weight, + incl, + parallel, + n_threads, + portfolio_params) + + # Apply STSC-Function + results_par <- stsc(y, + X, + Ext_F, + init, + lambda_grid, + kappa_grid, + bias, + gamma_grid, + psi_grid, + delta, + burn_in, + burn_in_dsc, + metric, + equal_weight, + incl, + TRUE, + NULL, + portfolio_params) + + # Compare Forecasts + expect_equal(results$Forecasts$Point_Forecasts, + results_par$Forecasts$Point_Forecasts) + + # List Contains three Elements + expect_equal(length(results), 3) + + # Forecasts List Contains three Elements + expect_equal(length(results$Forecasts), 3) + + # Point Forecasts + expect_numeric(results$Forecasts$Point_Forecasts, + len = n_obs, + finite = TRUE) + + # Variance Forecasts + expect_numeric(results$Forecasts$Variance_Forecasts, + len = n_obs, + lower = 0, + finite = TRUE) +}) + +########################################################### +### Test STSC with inclusion +test_that("Test whether the STSC-Function works with inclusion", { + + # Set Inclusion + incl <- c(1, 2) + psi_grid <- c(8:20) + + # Apply STSC-Function + results <- stsc(y, + X, + Ext_F, + init, + lambda_grid, + kappa_grid, + bias, + gamma_grid, + psi_grid, + delta, + burn_in, + burn_in_dsc, + metric, + equal_weight, + incl, + parallel, + n_threads, + portfolio_params) + + # Apply STSC-Function + results_par <- stsc(y, + X, + Ext_F, + init, + lambda_grid, + kappa_grid, + bias, + gamma_grid, + psi_grid, + delta, + burn_in, + burn_in_dsc, + metric, + equal_weight, + incl, + TRUE, + n_threads, + portfolio_params) + + # Compare Forecasts + expect_equal(results$Forecasts$Point_Forecasts, + results_par$Forecasts$Point_Forecasts) + + # Cut-Off + cut_off <- seq(max(burn_in, burn_in_dsc)) + + # List Contains three Elements + expect_equal(length(results), 3) + + # Forecasts List Contains three Elements + expect_equal(length(results$Forecasts), 3) + + # Point Forecasts + expect_numeric(results$Forecasts$Point_Forecasts, + len = n_obs, + finite = TRUE) + + # Variance Forecasts + expect_numeric(results$Forecasts$Variance_Forecasts, + len = n_obs, + lower = 0, + finite = TRUE) + + # Tuning Parameters List Contains five Elements + expect_equal(length(results$Tuning_Parameters), 5) + + # Psi-Vector + expect_numeric(results$Tuning_Parameters$Psi, + len = n_obs, + lower = min(psi_grid), + upper = max(psi_grid), + finite = TRUE) + + # Signals + expect_matrix(results$Tuning_Parameters$Signals, + mode = "integerish", + nrows = n_obs, + ncols = (ncol(X) + ncol(Ext_F))) + + # Check if the Signals in incl were really selected + for (i in incl) { + expect_true(all(results$Tuning_Parameters$Signals[-cut_off, i] > 0), + info = paste("Column", i, "contains zeros")) + } + + # Lambda-Vector + expect_matrix(results$Tuning_Parameters$Lambda, + mode = "integerish", + nrows = n_obs, + ncols = length(lambda_grid)) - # List Contains seven Elements - testthat::expect_equal(length(results), 7) + # Check that the Lambda matrix does not contain any zeros + expect_true(all(results$Tuning_Parameters$Lambda[-cut_off, ] > 0), + info = "Lambda matrix contains zeros") + + # Kappa-Vector + expect_matrix(results$Tuning_Parameters$Kappa, + mode = "integerish", + nrows = n_obs, + ncols = length(kappa_grid)) + + # Check that the Kappa matrix does not contain any zeros + expect_true(all(results$Tuning_Parameters$Kappa[-cut_off, ] > 0), + info = "Kappa matrix contains zeros") +}) + +########################################################### +### Test STSC with X +test_that("Test whether the STSC-Function works with X", { + + # Apply STSC-Function + results <- stsc(y, + X, + NULL, + init, + lambda_grid, + kappa_grid, + bias, + gamma_grid, + psi_grid, + delta, + burn_in, + burn_in_dsc, + metric, + equal_weight, + incl, + parallel, + n_threads, + portfolio_params) + + # Apply STSC-Function + results_par <- stsc(y, + X, + NULL, + init, + lambda_grid, + kappa_grid, + bias, + gamma_grid, + psi_grid, + delta, + burn_in, + burn_in_dsc, + metric, + equal_weight, + incl, + TRUE, + n_threads, + portfolio_params) + + # Compare Forecasts + expect_equal(results$Forecasts$Point_Forecasts, + results_par$Forecasts$Point_Forecasts) + + # List Contains three Elements + expect_equal(length(results), 3) + + # Forecasts List Contains three Elements + expect_equal(length(results$Forecasts), 3) # Point Forecasts - checkmate::expect_numeric(results[[1]], - len = numb_obs, + expect_numeric(results$Forecasts$Point_Forecasts, + len = n_obs, finite = TRUE) - # Variances - checkmate::expect_numeric(results[[2]], - len = numb_obs, + # Variance Forecasts + expect_numeric(results$Forecasts$Variance_Forecasts, + len = n_obs, lower = 0, finite = TRUE) + + # Tuning Parameters List Contains five Elements + expect_equal(length(results$Tuning_Parameters), 5) + # Gamma-Vector - checkmate::expect_numeric(results[[3]], - len = numb_obs, + expect_numeric(results$Tuning_Parameters$Gamma, + len = n_obs, lower = min(gamma_grid), upper = max(gamma_grid), finite = TRUE) + # Psi-Vector - checkmate::expect_numeric(results[[4]], - len = numb_obs, + expect_numeric(results$Tuning_Parameters$Psi, + len = n_obs, lower = min(psi_grid), upper = max(psi_grid), finite = TRUE) - # Candidate Forecasting Models - checkmate::expect_matrix(results[[5]], + # Signals + expect_matrix(results$Tuning_Parameters$Signals, mode = "integerish", - nrows = numb_obs, - ncols = (ncol(raw_signals) + ncol(f_signals))) + nrows = n_obs, + ncols = ncol(X)) # Lambda-Vector - checkmate::expect_matrix(results[[6]], - nrows = numb_obs, + expect_matrix(results$Tuning_Parameters$Lambda, + nrows = n_obs, + mode = "integerish", ncols = length(lambda_grid)) # Kappa-Vector - checkmate::expect_matrix(results[[7]], - nrows = numb_obs, + expect_matrix(results$Tuning_Parameters$Kappa, + nrows = n_obs, + mode = "integerish", ncols = length(kappa_grid)) }) -### Guarantee same results between tvc() & dsc() vs. stsc() -test_that("Test whether the different implementations give same results", { +### Test STSC with Ext_F +test_that("Test whether the STSC-Function works with Ext_F", { # Apply STSC-Function - stsc_results <- hdflex::stsc(y, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - burn_in_tvc, - bias, - gamma_grid, - psi_grid, - delta, - burn_in_dsc, - method, - equal_weight, - incl, - parallel, - n_threads, - risk_aversion, - min_weight, - max_weight) + results <- stsc(y, + NULL, + Ext_F, + init, + lambda_grid, + kappa_grid, + bias, + gamma_grid, + psi_grid, + delta, + burn_in, + burn_in_dsc, + metric, + equal_weight, + incl, + parallel, + n_threads, + portfolio_params) - # Apply TVC-Function - tvc_results <- hdflex::tvc(y, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - bias) + # Apply STSC-Function + results_par <- stsc(y, + NULL, + Ext_F, + init, + lambda_grid, + kappa_grid, + bias, + gamma_grid, + psi_grid, + delta, + burn_in, + burn_in_dsc, + metric, + equal_weight, + incl, + TRUE, + n_threads, + portfolio_params) + + # Compare Forecasts + expect_equal(results$Forecasts$Point_Forecasts, + results_par$Forecasts$Point_Forecasts) - # Assign Results - forecast_tvc <- tvc_results[[1]] - variance_tvc <- tvc_results[[2]] + # List Contains three Elements + expect_equal(length(results), 3) - # Cut Initialization-Period - sample_period_idx <- (burn_in_tvc+1):numb_obs - sub_forecast_tvc <- forecast_tvc[sample_period_idx, , drop = FALSE] - sub_variance_tvc <- variance_tvc[sample_period_idx, , drop = FALSE] - sub_y <- y[sample_period_idx] + # Forecasts List Contains three Elements + expect_equal(length(results$Forecasts), 3) - # Apply DSC-Function - dsc_results <- hdflex::dsc(gamma_grid, - psi_grid, - sub_y, - sub_forecast_tvc, - sub_variance_tvc, - delta, - 1) + # Point Forecasts + expect_numeric(results$Forecasts$Point_Forecasts, + len = n_obs, + finite = TRUE) + + # Variance Forecasts + expect_numeric(results$Forecasts$Variance_Forecasts, + len = n_obs, + lower = 0, + finite = TRUE) + + # Tuning Parameters List Contains five Elements + expect_equal(length(results$Tuning_Parameters), 5) + + # Gamma-Vector + expect_numeric(results$Tuning_Parameters$Gamma, + len = n_obs, + lower = min(gamma_grid), + upper = max(gamma_grid), + finite = TRUE) + + # Psi-Vector + expect_numeric(results$Tuning_Parameters$Psi, + len = n_obs, + lower = min(psi_grid), + upper = max(psi_grid), + finite = TRUE) + + # Signals + expect_matrix(results$Tuning_Parameters$Signals, + mode = "integerish", + nrows = n_obs, + ncols = ncol(Ext_F)) + + # Lambda-Vector + expect_matrix(results$Tuning_Parameters$Lambda, + nrows = n_obs, + ncols = length(lambda_grid)) + + # Kappa-Vector + expect_matrix(results$Tuning_Parameters$Kappa, + nrows = n_obs, + ncols = length(kappa_grid)) +}) + +########################################################### +### Test STSC without Bias Correction +test_that("Test whether the STSC-Function works without Bias", { + + # Set Signal constant for init periods + X[1:10, 1] <- 0 + Ext_F[1:10, 1] <- 0 + + # Apply STSC-Function + results <- stsc(y, + X, + Ext_F, + init, + lambda_grid, + kappa_grid, + FALSE, + gamma_grid, + psi_grid, + delta, + burn_in, + burn_in_dsc, + metric, + equal_weight, + incl, + parallel, + n_threads, + portfolio_params) + + # Apply STSC-Function + results_par <- stsc(y, + X, + Ext_F, + init, + lambda_grid, + kappa_grid, + FALSE, + gamma_grid, + psi_grid, + delta, + burn_in, + burn_in_dsc, + metric, + equal_weight, + incl, + TRUE, + n_threads, + portfolio_params) # Compare Forecasts - testthat::expect_equal(round(sum(na.omit(stsc_results[[1]])[-1]), 20), - round(sum( dsc_results[[1]][-1]), 20)) + expect_equal(results$Forecasts$Point_Forecasts, + results_par$Forecasts$Point_Forecasts) - # Compare Variances - testthat::expect_equal(round(sum(na.omit(stsc_results[[2]])[-1]), 20), - round(sum( dsc_results[[2]][-1]), 20)) + # List Contains three Elements + expect_equal(length(results), 3) - # Compare Gammas - testthat::expect_equal(na.omit(stsc_results[[3]])[-1], - dsc_results[[3]][-1]) + # Forecasts List Contains three Elements + expect_equal(length(results$Forecasts), 3) - # Compare Psis - testthat::expect_equal(na.omit(stsc_results[[4]])[-1], - dsc_results[[4]][-1]) + # Point Forecasts + expect_numeric(results$Forecasts$Point_Forecasts, + len = n_obs, + finite = TRUE) + + # Variance Forecasts + expect_numeric(results$Forecasts$Variance_Forecasts, + len = n_obs, + lower = 0, + finite = TRUE) + + # Tuning Parameters List Contains five Elements + expect_equal(length(results$Tuning_Parameters), 5) + + # Gamma-Vector + expect_numeric(results$Tuning_Parameters$Gamma, + len = n_obs, + lower = min(gamma_grid), + upper = max(gamma_grid), + finite = TRUE) + + # Psi-Vector + expect_numeric(results$Tuning_Parameters$Psi, + len = n_obs, + lower = min(psi_grid), + upper = max(psi_grid), + finite = TRUE) + # Signals + expect_matrix(results$Tuning_Parameters$Signals, + mode = "integerish", + nrows = n_obs, + ncols = (ncol(X) + ncol(Ext_F))) + + # Lambda-Vector + expect_matrix(results$Tuning_Parameters$Lambda, + mode = "integerish", + nrows = n_obs, + ncols = length(lambda_grid)) + + # Kappa-Vector + expect_matrix(results$Tuning_Parameters$Kappa, + mode = "integerish", + nrows = n_obs, + ncols = length(kappa_grid)) +}) + +########################################################### +### Test STSC with equal weight option +test_that("Test whether the STSC-Function works with equal weight option", { + + # Apply STSC-Function + results <- stsc(y, + X, + Ext_F, + init, + lambda_grid, + kappa_grid, + bias, + gamma_grid, + psi_grid, + delta, + burn_in, + burn_in_dsc, + metric, + FALSE, + incl, + parallel, + n_threads, + portfolio_params) + + # Apply STSC-Function + results_par <- stsc(y, + X, + Ext_F, + init, + lambda_grid, + kappa_grid, + bias, + gamma_grid, + psi_grid, + delta, + burn_in, + burn_in_dsc, + metric, + FALSE, + incl, + TRUE, + n_threads, + portfolio_params) + + # Compare Forecasts + expect_equal(results$Forecasts$Point_Forecasts, + results_par$Forecasts$Point_Forecasts) + + # List Contains three Elements + expect_equal(length(results), 3) + + # Forecasts List Contains three Elements + expect_equal(length(results$Forecasts), 3) + + # Point Forecasts + expect_numeric(results$Forecasts$Point_Forecasts, + len = n_obs, + finite = TRUE) + + # Variance Forecasts + expect_numeric(results$Forecasts$Variance_Forecasts, + len = n_obs, + lower = 0, + finite = TRUE) + + # Tuning Parameters List Contains five Elements + expect_equal(length(results$Tuning_Parameters), 5) + + # Gamma-Vector + expect_numeric(results$Tuning_Parameters$Gamma, + len = n_obs, + lower = min(gamma_grid), + upper = max(gamma_grid), + finite = TRUE) + + # Psi-Vector + expect_numeric(results$Tuning_Parameters$Psi, + len = n_obs, + lower = min(psi_grid), + upper = max(psi_grid), + finite = TRUE) + + # Signals + expect_matrix(results$Tuning_Parameters$Signals, + mode = "integerish", + nrows = n_obs, + ncols = (ncol(X) + ncol(Ext_F))) + + # Lambda-Vector + expect_matrix(results$Tuning_Parameters$Lambda, + mode = "integerish", + nrows = n_obs, + ncols = length(lambda_grid)) + + # Kappa-Vector + expect_matrix(results$Tuning_Parameters$Kappa, + mode = "integerish", + nrows = n_obs, + ncols = length(kappa_grid)) }) diff --git a/tests/testthat/test-summary.R b/tests/testthat/test-summary.R new file mode 100644 index 0000000..cb82aad --- /dev/null +++ b/tests/testthat/test-summary.R @@ -0,0 +1,174 @@ +########################################################### +### Simulate Data +# Set Seed +set.seed(123) + +# Set Dimensions +n_obs <- 500 +n_sigs <- 90 + +### Simulate Data +# Generate Covariates +X <- matrix(rnorm(n_obs * n_sigs), nrow = n_obs, ncol = n_sigs) + +# Generate Beta-Coefficients +n_relevant <- 10 +beta <- runif(n_relevant, -1.0, 1.0) + +# Compute f(x) +f_x <- X[, seq(n_relevant)] %*% beta + +# Generate Error-Term +eps <- rnorm(n_obs) + +# Calculate Response +y <- as.matrix(f_x + eps, ncol = 1) + +# F-Signals +Ext_F <- matrix(rep(y, 10), nrow = n_obs, ncol = 10) + rnorm(n_obs * 10) + +# Add Names +colnames(X) <- paste0("X", seq_len(n_sigs)) +colnames(y) <- "response" +colnames(Ext_F) <- paste0("F", seq_len(10)) + +########################################################### +### STSC Parameter +# TV-C-Parameter +init <- 10 +lambda_grid <- c(0.95, 1.00) +kappa_grid <- c(0.95, 0.97) +bias <- TRUE + +# Set DSC-Parameter +gamma_grid <- c(0.9, 0.95, 1) +psi_grid <- c(1:10) +delta <- 0.95 +burn_in <- 5 +burn_in_dsc <- 10 +metric <- 5 +equal_weight <- TRUE +incl <- NULL + +# Parallel-Parameter +parallel <- FALSE +n_threads <- 1 + +# Set Portfolio-Parameter +portfolio_params <- NULL + +########################################################### +### Create Density Forecast +# Apply TVC-Function +tvc_results <- hdflex::tvc(y, + X, + Ext_F, + init, + lambda_grid, + kappa_grid, + bias) + +# Assign TVC-Results +forecast_tvc <- tvc_results$Forecasts$Point_Forecasts +variance_tvc <- tvc_results$Forecasts$Variance_Forecasts + +########################################################### +### Create STSC and DSC-Objects +# Apply DSC-Function +dsc_results <- dsc(y[-1, , drop = FALSE], + forecast_tvc[-1, ], + variance_tvc[-1, ], + gamma_grid, + psi_grid, + delta, + burn_in, + burn_in_dsc, + metric, + equal_weight, + incl, + portfolio_params) + +# Apply STSC-Function +stsc_results <- stsc(y, + X, + Ext_F, + init, + lambda_grid, + kappa_grid, + bias, + gamma_grid, + psi_grid, + delta, + burn_in, + burn_in_dsc, + metric, + equal_weight, + incl, + parallel, + n_threads, + portfolio_params) + +########################################################### +### STSC - Object +# Test Metrics +test_that("summary calculates metrics correctly", { + result <- summary(stsc_results, eval_period = 50:500) + + eval_length <- length(50:500) + + expect_true(is.list(result)) + + expect_true("MSE" %in% names(result)) + expect_numeric(result$MSE[[1]], lower = 0, len = 1, any.missing = FALSE) + expect_numeric(result$MSE[[2]], lower = 0, len = eval_length, any.missing = FALSE) + + expect_true("ACRPS" %in% names(result)) + expect_numeric(result$ACRPS[[1]], lower = 0, len = 1, any.missing = FALSE) + expect_numeric(result$ACRPS[[2]], lower = 0, len = eval_length, any.missing = FALSE) + + expect_true("APLL" %in% names(result)) + expect_numeric(result$APLL[[1]], len = 1, any.missing = FALSE) + expect_numeric(result$APLL[[2]], len = eval_length, any.missing = FALSE) +}) + +# Test Plots +test_that("summary generates plots", { + result <- summary(stsc_results) + expect_true("Plots" %in% names(result)) + expect_true(is.ggplot(result$Plots$Gamma)) + expect_true(is.ggplot(result$Plots$Psi)) + expect_true(is.ggplot(result$Plots$Signals)) + expect_true(is.ggplot(result$Plots$Lambda)) + expect_true(is.ggplot(result$Plots$Kappa)) +}) + +### DSC - Object +# Test Metrics +test_that("summary calculates metrics correctly", { + result <- summary(dsc_results, eval_period = 50:499) + + eval_length <- length(50:499) + + expect_true(is.list(result)) + + expect_true("MSE" %in% names(result)) + expect_numeric(result$MSE[[1]], lower = 0, len = 1, any.missing = FALSE) + expect_numeric(result$MSE[[2]], lower = 0, len = eval_length, any.missing = FALSE) + + expect_true("ACRPS" %in% names(result)) + expect_numeric(result$ACRPS[[1]], lower = 0, len = 1, any.missing = FALSE) + expect_numeric(result$ACRPS[[2]], lower = 0, len = eval_length, any.missing = FALSE) + + expect_true("APLL" %in% names(result)) + expect_numeric(result$APLL[[1]], len = 1, any.missing = FALSE) + expect_numeric(result$APLL[[2]], len = eval_length, any.missing = FALSE) +}) + +# Test Plots +test_that("summary generates plots", { + result <- summary(dsc_results) + expect_true("Plots" %in% names(result)) + expect_true(is.ggplot(result$Plots$Gamma)) + expect_true(is.ggplot(result$Plots$Psi)) + expect_true(is.ggplot(result$Plots$CFM)) +}) \ No newline at end of file diff --git a/tests/testthat/test-summary_stsc.R b/tests/testthat/test-summary_stsc.R deleted file mode 100644 index fd714d4..0000000 --- a/tests/testthat/test-summary_stsc.R +++ /dev/null @@ -1,146 +0,0 @@ -### Simulate Data -set.seed(123) - -# Set Dimensions -numb_obs <- 500 -numb_pred <- 50 -numb_forc <- 10 - -# Create Random Target-Variable -target_var <- rnorm(n = numb_obs, mean = 0, sd = 1) - -# Create Random Simple Signals -raw_signals <- replicate(numb_pred, sample(0:10, numb_obs, rep = TRUE), ) -raw_names <- paste0("X", as.character(seq_len(ncol(raw_signals)))) -colnames(raw_signals) <- raw_names - -# Create Random (External) Point Forecasts -f_signals <- replicate(10, rnorm(n = numb_obs, mean = 0, sd = 0.5), ) -f_names <- paste0("F", as.character(seq_len(ncol(f_signals)))) -colnames(f_signals) <- f_names - -# Benchmark -benchmark <- dplyr::lag(roll::roll_mean(target_var, - width = length(target_var), - min_obs = 1), n = 1) - -# Specify TV-C-Parameter -sample_length <- floor(numb_obs / 10) -lambda_grid <- c(0.99, 0.999, 1.000) -kappa_grid <- c(0.94) -bias <- TRUE - -# Apply TVP-Function -results <- hdflex::tvc(target_var, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - bias) - -# Assign Results -forecast_tvc <- results[[1]] -variance_tvc <- results[[2]] - -# Define Cut Length -sample_period_idx <- (sample_length + 1):numb_obs - -# Trim Objects -sub_forecast_tvc <- forecast_tvc[sample_period_idx, , drop = FALSE] -sub_variance_tvc <- variance_tvc[sample_period_idx, , drop = FALSE] -sub_benchmark <- benchmark[sample_period_idx] -sub_target_var <- target_var[sample_period_idx] - -# Remove Objects -rm(list = c("results", "forecast_tvc", "variance_tvc")) - -##### Dynamic Subset Combination ##### -# Set DSC Parameter -nr_mods <- ncol(sub_forecast_tvc) -gamma_grid <- c(0.8, 0.9, 0.95, 0.99, 1) -psi_grid <- c(1:25) -delta <- 0.99 -n_cores <- 1 - -# Apply DSC-Function -results <- hdflex::dsc(gamma_grid, - psi_grid, - sub_target_var, - sub_forecast_tvc, - sub_variance_tvc, - delta, - n_cores) - - # Assign Results - sub_forecast_stsc <- results[[1]] - sub_variance_stsc <- results[[2]] - sub_chosen_gamma <- results[[3]] - sub_chosen_psi <- results[[4]] - sub_pred_pockets <- results[[5]] - - # Define Evaluation Period - eval_period_idx <- 51:length(sub_target_var) - - # Trim Objects - oos_target_var <- sub_target_var[eval_period_idx] - oos_benchmark <- sub_benchmark[eval_period_idx] - oos_forecast_stsc <- sub_forecast_stsc[eval_period_idx] - oos_variance_stsc <- sub_variance_stsc[eval_period_idx] - oos_chosen_gamma <- sub_chosen_gamma[eval_period_idx] - oos_chosen_psi <- sub_chosen_psi[eval_period_idx] - oos_pred_pockets <- sub_pred_pockets[eval_period_idx, , drop = FALSE] - oos_length <- length(eval_period_idx) - -### Test for no NULLs -test_that("Test whether every input parameter is specified.", { - - testthat::expect_error(summary_stsc(NULL, - oos_benchmark, - oos_forecast_stsc), - "not 'NULL'.", fixed = TRUE) - - testthat::expect_error(summary_stsc(oos_target_var, - NULL, - oos_forecast_stsc), - "not 'NULL'.", fixed = TRUE) - - testthat::expect_error(summary_stsc(oos_target_var, - oos_benchmark, - NULL), - "not 'NULL'.", fixed = TRUE) -}) - -### Output -test_that("Test whether the output has the right format", { - - # Apply Statistial-Evaluation-Function - summary_results <- summary_stsc(oos_target_var, - oos_benchmark, - oos_forecast_stsc) - - # List Contains Four Elements - testthat::expect_equal(length(summary_results), 4) - - # Clark-West - checkmate::expect_number(summary_results[[1]], - na.ok = FALSE, - finite = TRUE) - - # OOS-R2 - checkmate::expect_number(summary_results[[2]], - na.ok = FALSE, - upper = 1, - finite = TRUE) - - # CSSED - checkmate::expect_numeric(summary_results[[3]], - len = length(oos_target_var), - any.missing = FALSE, - finite = TRUE) - - # MSE - checkmate::expect_list(summary_results[[4]], - types = "numeric", - len = 2) -}) \ No newline at end of file diff --git a/tests/testthat/test-tvc.R b/tests/testthat/test-tvc.R index 0a96a32..465c14f 100644 --- a/tests/testthat/test-tvc.R +++ b/tests/testthat/test-tvc.R @@ -1,249 +1,278 @@ +########################################################### ### Simulate Data +# Set Seed set.seed(123) # Set Dimensions -numb_obs <- 500 -numb_pred <- 50 -numb_forc <- 10 - -# Create Random Target-Variable -target_var <- rnorm(n = numb_obs, mean = 0, sd = 1) - -# Create Random Simple Signals -raw_signals <- replicate(numb_pred, sample(0:10, numb_obs, rep = TRUE), ) -raw_names <- paste0("X", as.character(seq_len(ncol(raw_signals)))) -colnames(raw_signals) <- raw_names - -# Create Random (External) Point Forecasts -f_signals <- replicate(10, rnorm(n = numb_obs, mean = 0, sd = 0.5), ) -f_names <- paste0("F", as.character(seq_len(ncol(f_signals)))) -colnames(f_signals) <- f_names - -# Specify TV-C-Parameter -sample_length <- 100 -lambda_grid <- c(0.99, 0.999, 1.000) -kappa_grid <- c(0.94) -bias <- TRUE - -### Tests on Y -test_that("Test whether y is Numeric Vector", { - - target_var <- as.data.frame(target_var) - testthat::expect_error(tvc(target_var, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - bias), - "Must be of type 'numeric', not 'data.frame'.", fixed = TRUE) -}) +n_obs <- 500 +n_sigs <- 90 -test_that("Test whether y is not NULL", { - - target_var <- NULL - testthat::expect_error(tvc(target_var, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - bias), - "Must be of type 'numeric', not 'NULL'.", fixed = TRUE) +### Simulate Data +# Generate Covariates +X <- matrix(rnorm(n_obs * n_sigs), nrow = n_obs, ncol = n_sigs) + +# Generate Beta-Coefficients +n_relevant <- 10 +beta <- runif(n_relevant, -1.0, 1.0) + +# Compute f(x) +f_x <- X[, seq(n_relevant)] %*% beta + +# Generate Error-Term +eps <- rnorm(n_obs) + +# Calculate Response +y <- as.matrix(f_x + eps, ncol = 1) + +# F-Signals +Ext_F <- matrix(rep(y, 10), nrow = n_obs, ncol = 10) + rnorm(n_obs * 10) + +# Add Names +colnames(X) <- paste0("X", seq_len(n_sigs)) +colnames(y) <- "response" +colnames(Ext_F) <- paste0("F", seq_len(10)) + +########################################################### +### STSC Parameter +# TV-C-Parameter +init <- 10 +lambda_grid <- c(0.95, 1.00) +kappa_grid <- c(0.95, 0.97) +bias <- TRUE + +########################################################### +### Test TVC +test_that("Test TVC", { + + # Apply TVC-Function + results <- hdflex::tvc(y, + X, + Ext_F, + init, + lambda_grid, + kappa_grid, + bias) + + # List Contains three Elements + expect_equal(length(results), 2) + + # Forecasts List Contains three Elements + expect_equal(length(results$Forecasts), 3) + + # Point Forecasts + expect_matrix(results$Forecasts$Point_Forecasts, + nrow = n_obs, + ncol = (ncol(X) + ncol(Ext_F)) * length(lambda_grid) * length(kappa_grid)) + + # Variance Forecasts + expect_matrix(results$Forecasts$Variance_Forecasts, + nrow = n_obs, + ncol = (ncol(X) + ncol(Ext_F)) * length(lambda_grid) * length(kappa_grid)) + + # Ensure minimum value is 0 + expect_true(all(results$Forecasts$Variance_Forecasts[-1, ] >= 0)) + + # Realization + expect_numeric(results$Forecasts$Realization, + len = n_obs, + finite = TRUE) + + # Model List Contains 4 Elements + expect_equal(length(results$Model), 4) + + # Lambda Grid + expect_numeric(results$Model$Lambda_grid, + len = length(lambda_grid), + finite = TRUE) + + # Kappa Grid + expect_numeric(results$Model$Kappa_grid, + len = length(kappa_grid), + finite = TRUE) + + # Init + expect_numeric(results$Model$Init, + len = 1, + finite = TRUE) + + # Bias + expect_logical(results$Model$Bias, + len = 1, + any.missing = FALSE) }) -test_that("Test whether y has only numeric values", { - - target_var[10] <- "test" - testthat::expect_error(tvc(target_var, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - bias), - "Must be of type 'numeric', not 'character'.", fixed = TRUE) +########################################################### +### Test STSC with X +test_that("Test whether the STSC-Function works with X", { + + # Apply TVC-Function + results <- hdflex::tvc(y, + X, + NULL, + init, + lambda_grid, + kappa_grid, + bias) + + # List Contains three Elements + expect_equal(length(results), 2) + + # Forecasts List Contains three Elements + expect_equal(length(results$Forecasts), 3) + + # Point Forecasts + expect_matrix(results$Forecasts$Point_Forecasts, + nrow = n_obs, + ncol = ncol(X) * length(lambda_grid) * length(kappa_grid)) + + # Variance Forecasts + expect_matrix(results$Forecasts$Variance_Forecasts, + nrow = n_obs, + ncol = ncol(X) * length(lambda_grid) * length(kappa_grid)) + + # Ensure minimum value is 0 + expect_true(all(results$Forecasts$Variance_Forecasts[-1, ] >= 0)) }) -test_that("Test whether y has no NA-Values", { - - target_var[10] <- NA - testthat::expect_error(tvc(target_var, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - bias), - "Contains missing values", fixed = TRUE) +########################################################### +### Test STSC with Ext_F +test_that("Test whether the STSC-Function works with Ext_F", { + + # Apply TVC-Function + results <- hdflex::tvc(y, + NULL, + Ext_F, + init, + lambda_grid, + kappa_grid, + bias) + + # List Contains three Elements + expect_equal(length(results), 2) + + # Forecasts List Contains three Elements + expect_equal(length(results$Forecasts), 3) + + # Point Forecasts + expect_matrix(results$Forecasts$Point_Forecasts, + nrow = n_obs, + ncol = ncol(Ext_F) * length(lambda_grid) * length(kappa_grid)) + + # Variance Forecasts + expect_matrix(results$Forecasts$Variance_Forecasts, + nrow = n_obs, + ncol = ncol(Ext_F) * length(lambda_grid) * length(kappa_grid)) + + # Ensure minimum value is 0 + expect_true(all(results$Forecasts$Variance_Forecasts[-1, ] >= 0)) }) -### Tests on X -test_that("Test whether x is matrix", { - - raw_signals <- as.data.frame(raw_signals) - testthat::expect_error(tvc(target_var, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - bias), - "Must be of type 'matrix' (or 'NULL')", fixed = TRUE) +########################################################### +### Test TVC without Bias Correction +test_that("Test TVC without Bias Correction", { + + # Apply TVC-Function + results <- hdflex::tvc(y, + X, + Ext_F, + init, + lambda_grid, + kappa_grid, + FALSE) + + # List Contains three Elements + expect_equal(length(results), 2) + + # Forecasts List Contains three Elements + expect_equal(length(results$Forecasts), 3) + + # Point Forecasts + expect_matrix(results$Forecasts$Point_Forecasts, + nrow = n_obs, + ncol = (ncol(X) + ncol(Ext_F)) * length(lambda_grid) * length(kappa_grid)) + + # Variance Forecasts + expect_matrix(results$Forecasts$Variance_Forecasts, + nrow = n_obs, + ncol = (ncol(X) + ncol(Ext_F)) * length(lambda_grid) * length(kappa_grid)) + + # Ensure minimum value is 0 + expect_true(all(results$Forecasts$Variance_Forecasts[-1, ] >= 0)) }) -test_that("Test with only X-Matrix", { +########################################################### +### Test TVC with Missing Values +test_that("Test TVC with Missing / Constant Values", { - testthat::expect_no_error(tvc(target_var, - raw_signals, - NULL, - - sample_length, lambda_grid, - kappa_grid, - bias)) -}) + # Add Missing Values + X[1, 1:10] <- NA + Ext_F[1, 1:10] <- NA -test_that("Test whether x has the same number of observations as y", { - - raw_signals <- raw_signals[1:10, ] - testthat::expect_error(tvc(target_var, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - bias), - "Must have exactly", fixed = TRUE) -}) + X[2, 1:10] <- 1 + Ext_F[2, 1:10] <- 1 -test_that("Test whether exception works when cov_mat cannot be initialised", { - - raw_signals[1:100, 10] <- 0 - testthat::expect_no_error(tvc(target_var, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - bias)) - - f_signals[1:100, 10] <- 0 - testthat::expect_no_error(tvc(target_var, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - bias)) -}) + # Apply TVC-Function + results <- hdflex::tvc(y, + X, + Ext_F, + init, + lambda_grid, + kappa_grid, + bias) -### Tests on f -test_that("Test whether f is matrix", { - - f_signals <- as.data.frame(f_signals) - testthat::expect_error(tvc(target_var, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - bias), - "Must be of type 'matrix' (or 'NULL')", fixed = TRUE) -}) + # List Contains three Elements + expect_equal(length(results), 2) -test_that("Test with only F-Matrix", { + # Forecasts List Contains three Elements + expect_equal(length(results$Forecasts), 3) - testthat::expect_no_error(tvc(target_var, - NULL, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - bias)) -}) + # Point Forecasts + expect_matrix(results$Forecasts$Point_Forecasts, + nrow = n_obs, + ncol = (ncol(X) + ncol(Ext_F)) * length(lambda_grid) * length(kappa_grid)) -test_that("Test whether f has the same number of observations as y", { - - f_signals <- f_signals[1:10, ] - testthat::expect_error(tvc(target_var, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - bias), - "Must have exactly", fixed = TRUE) -}) + # Variance Forecasts + expect_matrix(results$Forecasts$Variance_Forecasts, + nrow = n_obs, + ncol = (ncol(X) + ncol(Ext_F)) * length(lambda_grid) * length(kappa_grid)) -### Tests on x and f -test_that("Test whether either x or f is provided", { - - raw_signals <- NULL - f_signals <- NULL - testthat::expect_error(tvc(target_var, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - bias), - "Assertion failed. One of the following must apply: - * checkmate::checkMatrix(X): Must be of type 'matrix', not 'NULL' - * checkmate::checkMatrix(Ext_F): Must be of type 'matrix', not 'NULL'", - fixed = TRUE) + # Ensure minimum value is 0 + expect_true(all(results$Forecasts$Variance_Forecasts[-c(1:11), ] >= 0)) }) -test_that("Test whether Code still works without dimnames", { - - colnames(raw_signals) <- NULL - colnames(f_signals) <- NULL - testthat::expect_no_error(tvc(target_var, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - bias)) -}) - -### Output -test_that("Test whether the output has the right format", { - - # Apply TVP-Function - results <- tvc(target_var, - raw_signals, - f_signals, - sample_length, - lambda_grid, - kappa_grid, - bias) - - # List Contains Two Elements - testthat::expect_equal(length(results), 2) - - # Number of Models - numb_mods <- length(lambda_grid) * length(kappa_grid) * numb_pred + - length(lambda_grid) * length(kappa_grid) * numb_forc - - # Dimension of Forecasts - checkmate::expect_matrix(results[[1]], - mode = "numeric", - nrows = numb_obs, - ncols = numb_mods) - - # Dimension of Variances - checkmate::expect_matrix(results[[2]], - mode = "numeric", - nrows = numb_obs, - ncols = numb_mods) - - # Only positive values in Var-Matrix - checkmate::qassert(results[[2]], - c("m+[0,]")) - - # Check Candidate Forecast Names - checkmate::expect_character(colnames(results[[1]]), - any.missing = FALSE, - len = numb_mods, - unique = TRUE) -}) +########################################################### +### Test TVC without providing colnames +test_that("Test TVC without providing colnames", { + + # Remove Colnames + colnames(X) <- NULL + colnames(y) <- NULL + + # Apply TVC-Function + results <- hdflex::tvc(y, + X, + Ext_F, + init, + lambda_grid, + kappa_grid, + bias) + + # List Contains three Elements + expect_equal(length(results), 2) + + # Forecasts List Contains three Elements + expect_equal(length(results$Forecasts), 3) + + # Point Forecasts + expect_matrix(results$Forecasts$Point_Forecasts, + nrow = n_obs, + ncol = (ncol(X) + ncol(Ext_F)) * length(lambda_grid) * length(kappa_grid)) + + # Variance Forecasts + expect_matrix(results$Forecasts$Variance_Forecasts, + nrow = n_obs, + ncol = (ncol(X) + ncol(Ext_F)) * length(lambda_grid) * length(kappa_grid)) + + # Ensure minimum value is 0 + expect_true(all(results$Forecasts$Variance_Forecasts[-1, ] >= 0)) +}) \ No newline at end of file