diag_util_functions.R

# LEGAL NOTICE
# This computer software was prepared by Battelle Memorial Institute,
# hereinafter the Contractor, under Contract No. DE-AC05-76RL0 1830
# with the Department of Energy (DOE). NEITHER THE GOVERNMENT NOR THE
# CONTRACTOR MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY
# LIABILITY FOR THE USE OF THIS SOFTWARE. This notice including this
# sentence must appear on any copies of this computer software.
# 
# EXPORT CONTROL
# User agrees that the Software will not be shipped, transferred or
# exported into any country or used in any manner prohibited by the
# United States Export Administration Act or any other applicable
# export laws, restrictions or regulations (collectively the "Export Laws").
# Export of the Software may require some form of license or other
# authority from the U.S. Government, and failure to obtain such
# export control license may result in criminal liability under
# U.S. laws. In addition, if the Software is identified as export controlled
# items under the Export Laws, User represents and warrants that User
# is not a citizen, or otherwise located within, an embargoed nation
# (including without limitation Iran, Syria, Sudan, Cuba, and North Korea)
#     and that User is not otherwise prohibited
# under the Export Laws from receiving the Software.
# 
# Copyright 2011 Battelle Memorial Institute.  All Rights Reserved.
# Distributed as open-source under the terms of the Educational Community 
# License version 2.0 (ECL 2.0). http://www.opensource.org/licenses/ecl2.php
# 
# For further details, see: http://www.globalchange.umd.edu/models/gcam/
#

# diag_util_functions.R
#
# An automated graphing system to process GCAM output data (as generated by the
# ModelInterface) and generate both standard and user-defined graphs.
#
# Ben Bond-Lamberty, November 2012

# -----------------------------------------------------------------------------
# A library of useful utility methods that can be useful to get GCAM data,
# aggregate regions, compute differences, etc.

# -----------------------------------------------------------------------------
# Adds a Global region which is the sum of all the regions in d
add_global_sum <- function(d) {
    names_keep <- names(d)[ !(names(d) %in% c("region", "value")) ]
    agg_formula <- as.formula(paste("value ~", paste(names_keep, collapse=" + ")))
    d.global <- aggregate(agg_formula, d, FUN=sum)
    d.global$region <- "Global"
    d.global <- d.global[, names(d)]
    d <- rbind(d, d.global)
    return(d)
}

# -----------------------------------------------------------------------------
# Aggregate regions given a region mapping data.frame.  The "aggregate" region
# definition will be looked for in colname of the mapping data.frame.
aggregate_regions <- function(d, mapping, colname="agg_region") {
    d <- merge(d, mapping[, c("region", colname)], all.x=TRUE)
    # If a region was not including in the mapping use the original name
    d[is.na(d[,colname]), colname] <- d[is.na(d[,colname]), "region"]
    names_keep <- names(d)[ !(names(d) %in% c("region", "value")) ]
    agg_formula <- as.formula(paste("value ~", paste(names_keep, collapse=" + ")))
    d.agg <- aggregate(agg_formula, d, FUN=sum)
    d.agg$region <- d.agg[, colname]
    d.agg <- d.agg[, names(d)]
    d.agg[, colname] <- NULL
    return(d.agg)
}

# same as above but calculating average of aggregated regions instead of sum
aggregate_regions_avg <- function(d, mapping, colname="agg_region") {
    d <- merge(d, mapping[, c("region", colname)], all.x=TRUE)
    # If a region was not including in the mapping use the original name
    d[is.na(d[,colname]), colname] <- d[is.na(d[,colname]), "region"]
    names_keep <- names(d)[ !(names(d) %in% c("region", "value")) ]
    agg_formula <- as.formula(paste("value ~", paste(names_keep, collapse=" + ")))
    d.agg <- aggregate(agg_formula, d, FUN=mean)
    d.agg$region <- d.agg[, colname]
    d.agg <- d.agg[, names(d)]
    d.agg[, colname] <- NULL
    return(d.agg)
}

# -----------------------------------------------------------------------------
# Compute "energy reduction" or more generically the difference in the total sum
# of some variable of interest named by var_name between scenarios from the base
# scenario named by base_scn_name
compute_energy_reduction <- function(d, var_name, base_scn_name=BASE_SCENARIO_NAME) {
    d.reduction <- aggregate(value ~ scenario + region + year, d, FUN=sum)
    d.reduction <- dcast(d.reduction, region + year ~ scenario)
    d.reduction[, !(names(d.reduction) %in% c("region", "year"))] <- 
         d.reduction[,base_scn_name] - d.reduction[, !(names(d.reduction) %in% c("region", "year"))]
    d.reduction <- melt(d.reduction, id.vars=c("region", "year"), variable.name="scenario")
    d.reduction[, var_name] <- "energy reduction"
    d.reduction$Units <- d$Units[1]
    #for(scn in unique(d.reduction$scenario)) {
        #d.reduction[, c("date", "file")] <- subset(d, scenario == scn, select=c("date", "file"))[1,]
    #}
    d.reduction[, names(d)[!(names(d) %in% names(d.reduction))]] <- NA
    d <- rbind(d, d.reduction)
    return(d)
}

# -----------------------------------------------------------------------------
# A helper function to generate a bar graph that will have negative values by
# spliting the plot into: positive values and negative.
split_neg_geom_bar <- function(p) {
    # The subset functionality has been removed from ggplot. With no
    # reliable way of doing this in a way that will be compatible with
    # do_graph's page_variables we will have to simply set a flag for
    # now and let do_graph. detect it and do the split then.
    p$do_neg_split <- TRUE
    return(p)
}

# -----------------------------------------------------------------------------
# A helper function to subset the years to plot before continuing with the
# do_graph.
do_graph_yearSubset <- function(p, year_subset=2035, ...) {
    p$data <- subset(p$data, year <= year_subset)
    p$data[TITLE_FIELD_NAME] <- paste(year_subset, p$data[1,TITLE_FIELD_NAME], sep=OUTPUT_FILENAME_SEP)
    p[[ "filter_data" ]][[ "year_subset" ]] <- year_subset
    # TODO: adjust plot dimensions?
    do_graph(p, ...)
}

# -----------------------------------------------------------------------------
# Compute the difference of some variable of interest named by var_name between
# scenarios from the base scenario named by base_scn_name. The difference can
# optionally be calculated as relative.  A "diff" column will be added to the
# given data.frame.
compute_diff <- function(d, var_name="value", base.scenario=BASE_SCENARIO_NAME, relative.diff=FALSE) {
    d.base <- subset(d, scenario == base.scenario)
    names(d.base)[names(d.base) == var_name] <- "base"
    d.base <- d.base[, !(names(d.base) %in% c("scenario", "date", "file"))]
    d <- merge(d[d$scenario != base.scenario, ], d.base, all=TRUE)
    d[is.na(d$value), var_name] <- 0
    d[is.na(d$base), "base"] <- 0
    d$diff <- d[, var_name] - d$base
    if(relative.diff) {
        d$diff <- d$diff / d$base * 100
        d[is.na(d$diff), "diff"] <- 1
        d$Units <- "%"
    }
    return(d)
}

# -----------------------------------------------------------------------------
# Compute the difference of some variable between scenarios (see compute_diff)
# of the data in the given plot and generate a new figure where the difference
# is plotted instead.  Differences will be plots as "geom_line".
create_diff_plot <- function(p, var_name="value", base.scenario=BASE_SCENARIO_NAME, relative.diff=FALSE) {
    p$data <- compute_diff(p$data, var_name, base.scenario, relative.diff)
    p$data[TITLE_FIELD_NAME] <- paste("Diff", p$data[1,TITLE_FIELD_NAME], sep=OUTPUT_FILENAME_SEP)
    p[[ "filter_data" ]][[ "diff_plot" ]] <- TRUE
    p$layers[[1]]$mapping$y <- as.symbol("diff")
    if("fill" %in% names(p$layers[[1]]$mapping)) {
        p$layers[[1]]$mapping$colour <- p$layers[[1]]$mapping$fill
        p$layers[[1]]$mapping$fill <- NULL
        p$labels$colour <- p$labels$fill
        p$labels$fill <- NULL
        p$scales$scales[[1]]$aesthetics <- "colour"
    }
    p$layers[[1]] <- geom_line(p$layers[[1]]$mapping, size=1.5)
    return(p)
}

# -----------------------------------------------------------------------------
# Call do graph however allow some page vars to remain free while fixing some
# others.
do_graph_some_scale_fixed <- function(p, ylabel, page_free, page_fixed) {
    for(i in unique(p$data[, page_free])) {
        p_temp <- p
        p_temp$data <- p_temp$data[p_temp$data[, page_free] == i, ]
        p_temp$labels$title <- paste(i, p_temp$labels$title)
        do_graph(p_temp, ylab=ylabel, page_variables=page_fixed, scales="fixed_y")
    }
}