diff --git a/DESCRIPTION b/DESCRIPTION index dc9cc87..1aaa15c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -21,7 +21,7 @@ BugReports: https://github.com/lanl/NEONiso/issues URL: https://github.com/lanl/NEONiso Encoding: UTF-8 LazyData: true -RoxygenNote: 7.2.0 +RoxygenNote: 7.2.1 Imports: dplyr, zoo, diff --git a/NEWS.md b/NEWS.md index 2023071..da34082 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,7 +5,7 @@ have some known issues (e.g., no correction is made for concentration dependence of the analyzers yet), and any data produced from this function should be considered provisional. * Added capability to plot data used in carbon calibration regression in order -to help identify periods where calibration paramters seem to be okay, but +to help identify periods where calibration parameters seem to be okay, but quality of calibrated data is degraded. * Added cross-validation error estimates to carbon calibration routines. * The calibrate_carbon_bymonth() function has been marked as deprecated, but will diff --git a/R/calibrate_ambient_carbon_Bowling2003.R b/R/calibrate_ambient_carbon_Bowling2003.R index ec46273..bf6020e 100644 --- a/R/calibrate_ambient_carbon_Bowling2003.R +++ b/R/calibrate_ambient_carbon_Bowling2003.R @@ -2,7 +2,7 @@ #' #' @author Rich Fiorella \email{rfiorella@@lanl.gov} #' -#' Function called by `calibrate_carbon_bymoth()` to apply +#' Function called by `calibrate_carbon_bymonth()` to apply #' gain and offset parameters to the ambient datasets (000_0x0_09m and #' 000_0x0_30m). This function should generally not be used independently, #' but should be used in coordination with @@ -155,8 +155,8 @@ calibrate_ambient_carbon_Bowling2003 <- function(amb_data_list, # extract 12CO2 and 13CO2 concentrations from the ambient data, # set as NA, unless overwritten - mean12c <- max12c <- min12c <- cv5rmse12c <- loocv12c <- rep(NA, length(amb_delta$mean)) # placeholders for 12CO2 vecs - mean13c <- max13c <- min13c <- cv5rmse13c <- loocv13c <- rep(NA, length(amb_delta$mean)) # placeholders for 13CO2 vecs + mean12c <- max12c <- min12c <- cv5rmse12c <- cvloo12c <- rep(NA, length(amb_delta$mean)) # placeholders for 12CO2 vecs + mean13c <- max13c <- min13c <- cv5rmse13c <- cvloo13c <- rep(NA, length(amb_delta$mean)) # placeholders for 13CO2 vecs for (i in seq_len(length(var_inds_in_calperiod))) { # calculate calibrated 12CO2 concentrations @@ -166,8 +166,8 @@ calibrate_ambient_carbon_Bowling2003 <- function(amb_data_list, amb_12CO2$min[var_inds_in_calperiod[[i]]] + caldf$offset12C[i] max12c[var_inds_in_calperiod[[i]]] <- caldf$gain12C[i] * amb_12CO2$max[var_inds_in_calperiod[[i]]] + caldf$offset12C[i] - cv5rmse12c[var_inds_in_calperiod[[i]]] <- caldf$cv5rmse_12C[i] - loocv12c[var_inds_in_calperiod[[i]]] <- caldf$loocv_12C[i] + # cv5rmse12c[var_inds_in_calperiod[[i]]] <- caldf$cv5rmse_12C[i] + # cvloo12c[var_inds_in_calperiod[[i]]] <- caldf$cvloo_12C[i] # calculate calibrated 13CO2 concentrations mean13c[var_inds_in_calperiod[[i]]] <- caldf$gain13C[i] * @@ -176,8 +176,8 @@ calibrate_ambient_carbon_Bowling2003 <- function(amb_data_list, amb_13CO2$min[var_inds_in_calperiod[[i]]] + caldf$offset13C[i] max13c[var_inds_in_calperiod[[i]]] <- caldf$gain13C[i] * amb_13CO2$max[var_inds_in_calperiod[[i]]] + caldf$offset13C[i] - cv5rmse13c[var_inds_in_calperiod[[i]]] <- caldf$cv5rmse_13C[i] - loocv13c[var_inds_in_calperiod[[i]]] <- caldf$loocv_13C[i] + # cv5rmse13c[var_inds_in_calperiod[[i]]] <- caldf$cv5rmse_13C[i] + # cvloo13c[var_inds_in_calperiod[[i]]] <- caldf$cvloo_13C[i] } @@ -198,12 +198,13 @@ calibrate_ambient_carbon_Bowling2003 <- function(amb_data_list, } # calculate uncertainties: - amb_delta$CVcalUcrt <- round(abs(amb_delta$mean_cal) * - sqrt((cv5rmse12c/mean12c)^2 + (cv5rmse13c/mean13c)^2), 3) - amb_delta$LOOcalUcrt <- round(abs(amb_delta$mean_cal) * - sqrt((loocv12c/mean12c)^2 + (loocv13c/mean13c)^2), 3) - amb_co2$CVcalUcrt <- round(sqrt(cv5rmse12c^2 + cv5rmse13c^2), 3) - amb_co2$LOOcalUcrt <- round(sqrt(loocv12c^2 + loocv13c^2), 3) + # save this ucrt propogation for later version. + #amb_delta$CVcalUcrt <- round(abs(amb_delta$mean_cal) * + # sqrt((cv5rmse12c/mean12c)^2 + (cv5rmse13c/mean13c)^2), 3) + #amb_delta$LOOcalUcrt <- round(abs(amb_delta$mean_cal) * + # sqrt((cvloo12c/mean12c)^2 + (cvloo13c/mean13c)^2), 3) + #amb_co2$CVcalUcrt <- round(sqrt(cv5rmse12c^2 + cv5rmse13c^2), 3) + #amb_co2$LOOcalUcrt <- round(sqrt(cvloo12c^2 + cvloo13c^2), 3) # replace ambdf in amb_data_list, return amb_data_list amb_data_list$dlta13CCo2 <- amb_delta diff --git a/R/calibrate_ambient_carbon_linreg.R b/R/calibrate_ambient_carbon_linreg.R index 7851d9d..c9702c8 100644 --- a/R/calibrate_ambient_carbon_linreg.R +++ b/R/calibrate_ambient_carbon_linreg.R @@ -123,7 +123,16 @@ calibrate_ambient_carbon_linreg <- function(amb_data_list, # extract d13C and CO2 concentrations from the ambient data d13C_ambdf$mean_cal <- d13C_ambdf$mean co2_ambdf$mean_cal <- co2_ambdf$mean - + + # add columns to d13C_ambdf and co2_ambdf for uncertainty calculation + d13C_ambdf$cvloo <- d13C_ambdf$mean + d13C_ambdf$cv5rmse <- d13C_ambdf$mean + d13C_ambdf$cv5mae <- d13C_ambdf$mean + + co2_ambdf$cvloo <- co2_ambdf$mean + co2_ambdf$cv5rmse <- co2_ambdf$mean + co2_ambdf$cv5mae <- co2_ambdf$mean + for (i in 1:length(var_inds_in_calperiod)) { d13C_ambdf$mean_cal[var_inds_in_calperiod[[i]]] <- caldf$d13C_intercept[i] + @@ -135,6 +144,8 @@ calibrate_ambient_carbon_linreg <- function(amb_data_list, d13C_ambdf$max[var_inds_in_calperiod[[i]]] <- caldf$d13C_intercept[i] + d13C_ambdf$max[var_inds_in_calperiod[[i]]] * caldf$d13C_slope[i] + # d13C_ambdf$cvloo[var_inds_in_calperiod[[i]]] <- + co2_ambdf$mean_cal[var_inds_in_calperiod[[i]]] <- caldf$co2_intercept[i] + co2_ambdf$mean[var_inds_in_calperiod[[i]]] * caldf$co2_slope[i] diff --git a/R/calibrate_carbon.R b/R/calibrate_carbon.R index f7937ea..304405f 100644 --- a/R/calibrate_carbon.R +++ b/R/calibrate_carbon.R @@ -153,7 +153,7 @@ calibrate_carbon <- function(inname, tmp_names <- names(ciso$reference) - print("correcting reference output df.,..") + print("correcting reference output df...") #apply seems to strip names from ciso$reference, so need to save above # and reassign below. ciso$reference <- lapply(names(ciso$reference), diff --git a/R/output_functions.R b/R/output_functions.R index 36ec313..890a55c 100644 --- a/R/output_functions.R +++ b/R/output_functions.R @@ -296,9 +296,6 @@ write_carbon_ambient_data <- function(outname, site, amb_data_list) { for (i in 1:length(amb_data_list)) { amb_data_subset <- amb_data_list[i] - print(names(amb_data_subset)) - print(site) - co2_data_outloc <- rhdf5::H5Gcreate(fid, paste0("/", site, "/dp01/data/isoCo2/", names(amb_data_subset))) diff --git a/R/reference_data_regression.R b/R/reference_data_regression.R index b7ff3bf..507a05d 100644 --- a/R/reference_data_regression.R +++ b/R/reference_data_regression.R @@ -104,8 +104,8 @@ fit_carbon_regression <- function(ref_data, method, calibration_half_width, offset13C = as.numeric(NA), r2_12C = as.numeric(NA), r2_13C = as.numeric(NA), - loocv_12C = as.numeric(NA), - loocv_13C = as.numeric(NA), + cvloo_12C = as.numeric(NA), + cvloo_13C = as.numeric(NA), cv5mae_12C = as.numeric(NA), cv5mae_13C = as.numeric(NA), cv5rmse_12C = as.numeric(NA), @@ -118,8 +118,8 @@ fit_carbon_regression <- function(ref_data, method, calibration_half_width, offset13C = numeric(length = 2e5), r2_12C = numeric(length = 2e5), r2_13C = numeric(length = 2e5), - loocv_12C = numeric(length = 2e5), - loocv_13C = numeric(length = 2e5), + cvloo_12C = numeric(length = 2e5), + cvloo_13C = numeric(length = 2e5), cv5mae_12C = numeric(length = 2e5), cv5mae_13C = numeric(length = 2e5), cv5rmse_12C = numeric(length = 2e5), @@ -183,8 +183,8 @@ fit_carbon_regression <- function(ref_data, method, calibration_half_width, out$r2_13C[i] <- summary(tmpmod13C)$r.squared # extract leave-one-out CV value - out$loocv_12C[i] <- loocv(tmpmod12C) - out$loocv_13C[i] <- loocv(tmpmod13C) + out$cvloo_12C[i] <- loocv(tmpmod12C) + out$cvloo_13C[i] <- loocv(tmpmod13C) # get cv5 values tmp <- stats::formula(conc12CCO2_ref ~ conc12CCO2_obs) @@ -206,8 +206,8 @@ fit_carbon_regression <- function(ref_data, method, calibration_half_width, out$offset13C[i] <- NA out$r2_12C[i] <- NA out$r2_13C[i] <- NA - out$loocv_12C[i] <- NA - out$loocv_13C[i] <- NA + out$cvloo_12C[i] <- NA + out$cvloo_13C[i] <- NA out$cv5mae_12C[i] <- NA out$cv5mae_13C[i] <- NA out$cv5rmse_12C[i] <- NA @@ -225,9 +225,9 @@ fit_carbon_regression <- function(ref_data, method, calibration_half_width, # re-order columns to ensure that they are consistent across methods out <- out[, c("timeBgn", "timeEnd", "gain12C", "offset12C", "r2_12C", - "loocv_12C", "cv5mae_12C", "cv5rmse_12C", + "cvloo_12C", "cv5mae_12C", "cv5rmse_12C", "gain13C", "offset13C", "r2_13C", - "loocv_13C", "cv5mae_13C", "cv5rmse_13C")] + "cvloo_13C", "cv5mae_13C", "cv5rmse_13C")] } } else if (method == "linreg") { @@ -245,18 +245,30 @@ fit_carbon_regression <- function(ref_data, method, calibration_half_width, d13C_slope = as.numeric(NA), d13C_intercept = as.numeric(NA), d13C_r2 = as.numeric(NA), + d13C_cvloo = as.numeric(NA), + d13C_cv5mae = as.numeric(NA), + d13C_cv5rmse = as.numeric(NA), co2_slope = as.numeric(NA), co2_intercept = as.numeric(NA), - co2_r2 = as.numeric(NA)) + co2_r2 = as.numeric(NA), + co2_cvloo = as.numeric(NA), + co2_cv5mae = as.numeric(NA), + co2_cv5rmse = as.numeric(NA)) } else { # output dataframe giving valid time range, slopes, intercepts, rsquared. out <- data.frame(d13C_slope = numeric(length = 2e5), d13C_intercept = numeric(length = 2e5), d13C_r2 = numeric(length = 2e5), + d13C_cvloo = numeric(length = 2e5), + d13C_cv5mae = numeric(length = 2e5), + d13C_cv5rmse = numeric(length = 2e5), co2_slope = numeric(length = 2e5), co2_intercept = numeric(length = 2e5), - co2_r2 = numeric(length = 2e5)) + co2_r2 = numeric(length = 2e5), + co2_cvloo = numeric(length = 2e5), + co2_cv5mae = numeric(length = 2e5), + co2_cv5rmse = numeric(length = 2e5)) # get start and end days. start_date <- as.Date(min(ref_data$timeBgn)) @@ -302,6 +314,7 @@ fit_carbon_regression <- function(ref_data, method, calibration_half_width, tmpmod_co2 <- stats::lm(rtioMoleDryCo2Refe.mean ~ rtioMoleDryCo2.mean, data = cal_subset) + out$d13C_slope[i] <- coef(tmpmod_d13)[[2]] out$d13C_intercept[i] <- coef(tmpmod_d13)[[1]] out$d13C_r2[i] <- summary(tmpmod_d13)$r.squared @@ -310,6 +323,23 @@ fit_carbon_regression <- function(ref_data, method, calibration_half_width, out$co2_intercept[i] <- coef(tmpmod_co2)[[1]] out$co2_r2[i] <- summary(tmpmod_co2)$r.squared + # extract uncertainties: + # extract leave-one-out CV value + out$d13C_cvloo[i] <- loocv(tmpmod_d13) + out$co2_cvloo[i] <- loocv(tmpmod_co2) + + # get cv5 values + tmp <- stats::formula(dlta13CCo2Refe.mean ~ dlta13CCo2.mean) + cv_d13C <- estimate_calibration_error(tmp, cal_subset) + tmp <- stats::formula(rtioMoleDryCo2Refe.mean ~ rtioMoleDryCo2.mean) + cv_co2 <- estimate_calibration_error(tmp, cal_subset) + + # assign cv values: + out$d13C_cv5mae[i] <- cv_d13C$MAE + out$co2_cv5mae[i] <- cv_co2$MAE + out$d13C_cv5rmse[i] <- cv_d13C$RMSE + out$co2_cv5rmse[i] <- cv_co2$RMSE + } else { out$d13C_slope[i] <- NA @@ -320,6 +350,14 @@ fit_carbon_regression <- function(ref_data, method, calibration_half_width, out$co2_intercept[i] <- NA out$co2_r2[i] <- NA + # set uncertainty values to NA as well. + out$d13C_cvloo[i] <- NA + out$d13C_cv5mae[i] <- NA + out$d13C_cv5rmse[i] <- NA + out$co2_cvloo[i] <- NA + out$co2_cv5mae[i] <- NA + out$co2_cv5rmse[i] <- NA + } } @@ -333,7 +371,9 @@ fit_carbon_regression <- function(ref_data, method, calibration_half_width, # re-order columns to ensure that they are consistent across methods out <- out[, c("timeBgn", "timeEnd", "d13C_slope", "d13C_intercept", "d13C_r2", - "co2_slope", "co2_intercept", "co2_r2")] + "d13C_cvloo", "d13C_cv5mae", "d13C_cv5rmse", + "co2_slope", "co2_intercept", "co2_r2", + "co2_cvloo", "co2_cv5mae", "co2_cv5rmse")] } } diff --git a/R/restructure_data.R b/R/restructure_data.R index 7ac8e5e..baf1888 100644 --- a/R/restructure_data.R +++ b/R/restructure_data.R @@ -88,9 +88,6 @@ ingest_data <- function(inname, analyte, name_fix = TRUE) { ambi_by_height <- base::split(ambient, factor(ambient$verticalPosition)) refe_by_height <- base::split(reference, factor(reference$verticalPosition)) - - print(names(ambi_by_height)) - print(names(refe_by_height)) #------------------------- # RESTRUCTURE AMBIENT @@ -144,9 +141,6 @@ ingest_data <- function(inname, analyte, name_fix = TRUE) { names(refe_out) <- paste0(names(refe_out), "_09m") } - print(names(refe_out)) - print(names(ambi_out)) - output <- list(ambi_out, refe_out, reference) names(output) <- c("ambient", "reference", "refe_stacked") diff --git a/cran-comments.md b/cran-comments.md index 7858802..8e235f4 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,19 +1,29 @@ ## Test environments -* local R installation: R 4.1.2 +* local R installation: R 4.2.1 * GitHub Actions (ubuntu-18.04): devel, release, oldrel * GitHub Actions (windows-latest): release * GitHub Actions (macOS-latest): devel, release, oldrel -* win-builder: devel +* win-builder: devel, release +* R-hub (Windows Server 2022): devel +* R-hub (ubuntu-20.04.1 LTS): release +* R-hub (fedora): devel ## R CMD check results * No errors, warnings, or notes in local environment. -* No errors, warnings, or notes in GitHub Actions, except for windows-latest where -R CMD check does not run due to an error in 'session info' -(Error: invalid version specification '). This error does not seem to be related -to the package itself. + +* No errors, warnings, or notes in GitHub Actions, except for macOS-latest (devel), +which failed to download R-devel due to a 404 HTTP error + * No errors or warnings on win-builder. +* One error and one note issued on R-hub/Windows Server (devel): error is about a +bioconductor dependency not being available, note is about change in maintainer +(my email address has changed) and invalid URLs (URLs are correct however). +Same note is also provided on ubuntu-20.04.1 LTS. + +* No errors, warnings, or notes on R-hub fedora. + * Auto-check will issue a warning about misspelled words in DESCRIPTION, but the words flagged are all correct. diff --git a/extract_data_for_Gabe.R b/extract_data_for_Gabe.R deleted file mode 100644 index b49eb5e..0000000 --- a/extract_data_for_Gabe.R +++ /dev/null @@ -1,38 +0,0 @@ -# extract_data_for_Gabe.R -# ONAQ data for Gabe and Paige. - -library(neonUtilities) -library(dplyr) -library(magrittr) - -data <- neonUtilities::stackEddy("~/Downloads/NEON.D15.ONAQ.DP4.00200.001.nsae.all.basic.wiso.calibrated.2e+05dayWindow.h5", - level = 'dp01', - avg = 9) - -data_red <- data[["ONAQ"]] %>% - dplyr::select(verticalPosition,timeBgn,timeEnd,data.isoH2o.dlta18OH2o.mean_cal, - data.isoH2o.dlta2HH2o.mean_cal, data.isoH2o.rtioMoleDryH2o.mean, - data.isoH2o.rtioMoleWetH2o.mean) %>% - dplyr::rename(d18O=data.isoH2o.dlta18OH2o.mean_cal, - d2H =data.isoH2o.dlta2HH2o.mean_cal, - spec_hum=data.isoH2o.rtioMoleWetH2o.mean, - mixing_ratio=data.isoH2o.rtioMoleDryH2o.mean) - -# get heights -attrs <- rhdf5::h5readAttributes("~/Downloads/NEON.D15.ONAQ.DP4.00200.001.nsae.all.basic.wiso.calibrated.2e+05dayWindow.h5", "ONAQ") - -heights <- as.numeric(attrs$DistZaxsLvlMeasTow) - -data_red <- data_red %>% - dplyr::mutate(level = as.numeric(verticalPosition)/10, - height = heights[level]) %>% - dplyr::select(-verticalPosition) %>% - dplyr::filter(lubridate::year(timeBgn) >= 2020 & lubridate::year(timeEnd) < 2021) - -# round delta, q, and w: -data_red$d18O <- round(data_red$d18O, digits = 2) -data_red$d2H <- round(data_red$d2H, digits = 2) -data_red$mixing_ratio <- round(data_red$mixing_ratio, digits = 3) -data_red$spec_hum <- round(data_red$spec_hum, digits = 3) - -write.table(data_red, file = "~/Desktop/ONAQ_wiso_2020.csv", sep = ',', row.names = FALSE) diff --git a/man/calibrate_ambient_carbon_Bowling2003.Rd b/man/calibrate_ambient_carbon_Bowling2003.Rd index 050bce0..296f3e4 100644 --- a/man/calibrate_ambient_carbon_Bowling2003.Rd +++ b/man/calibrate_ambient_carbon_Bowling2003.Rd @@ -54,7 +54,7 @@ calibrate_ambient_carbon_Bowling2003 \author{ Rich Fiorella \email{rfiorella@lanl.gov} -Function called by \code{calibrate_carbon_bymoth()} to apply +Function called by \code{calibrate_carbon_bymonth()} to apply gain and offset parameters to the ambient datasets (000_0x0_09m and 000_0x0_30m). This function should generally not be used independently, but should be used in coordination with diff --git a/setup_testing_data.R b/setup_testing_data.R index f59f465..6a7ac93 100644 --- a/setup_testing_data.R +++ b/setup_testing_data.R @@ -8,9 +8,9 @@ library(rhdf5) # exists in rbuildignore, so should not be bundled with package. # requires install and restart w/ NEONiso. -master_file <- "/Volumes/GradSchoolBackup/NEON/DP4_00200_001/ONAQ/NEON.D15.ONAQ.DP4.00200.001.nsae.2019-06.basic.h5" -master_ccalB03_file <- "~/Desktop/NEONcal/210303_CisoEC/ONAQ/NEON.D15.ONAQ.DP4.00200.001.nsae.2019-06.basic.20201020T212232Z.calibrated.h5" -master_ccalLR_file <- "~/Desktop/NEONcal/210303_CisoLR/ONAQ/NEON.D15.ONAQ.DP4.00200.001.nsae.2019-06.basic.20201020T212232Z.calibrated.h5" +master_file <- "NEON/DP4_00200_001/ONAQ/NEON.D15.ONAQ.DP4.00200.001.nsae.2019-06.basic.h5" +master_ccalB03_file <- "210303_CisoEC/ONAQ/NEON.D15.ONAQ.DP4.00200.001.nsae.2019-06.basic.20201020T212232Z.calibrated.h5" +master_ccalLR_file <- "210303_CisoLR/ONAQ/NEON.D15.ONAQ.DP4.00200.001.nsae.2019-06.basic.20201020T212232Z.calibrated.h5" #--------------------------------------- # extract co2 data for unit testing: @@ -462,5 +462,5 @@ h5delete(fid, '/YELL/dp01/data/isoCo2/co2High_30m') H5Fclose(fid) -system('/Users/rfiorella/opt/anaconda3/bin/h5repack inst/extdata/NEON.D12.YELL.DP4.00200.001.nsae.2020-11.basic.20210209T161116Z.h5 inst/extdata/NEON.D12.YELL.DP4.00200.001.nsae.2020-11.basic.packed.h5') +system('~/opt/anaconda3/bin/h5repack inst/extdata/NEON.D12.YELL.DP4.00200.001.nsae.2020-11.basic.20210209T161116Z.h5 inst/extdata/NEON.D12.YELL.DP4.00200.001.nsae.2020-11.basic.packed.h5') diff --git a/test_workflows.R b/test_workflows.R index b772700..c141c17 100644 --- a/test_workflows.R +++ b/test_workflows.R @@ -14,7 +14,7 @@ # 8) calibrate_water_linreg_bysite works # where does uncalibrated data live? -data.dir <- '~/airflow/data/01-DP4_00200_001' +data.dir <- '~/airflow/data/01-DP4_00200_001/' # set test_date #test_date <- "2022-01-01" @@ -27,15 +27,15 @@ devtools::load_all() dir.create(paste0('~/NEONcal/',test_date,"_tests")) # which tests to run? -run_test1 <- FALSE +run_test1 <- TRUE run_test2 <- FALSE -run_test3 <- TRUE -run_test4 <- TRUE -run_test5 <- TRUE -run_test6 <- TRUE -run_test7 <- TRUE -run_test8 <- TRUE -rapid_test <- TRUE # if rapid, only run ~5% of possible site months. +run_test3 <- FALSE +run_test4 <- FALSE +run_test5 <- FALSE +run_test6 <- FALSE +run_test7 <- FALSE +run_test8 <- FALSE +rapid_test <- FALSE # if rapid, only run ~5% of possible site months. # load required packages: library(rhdf5) diff --git a/test_workflows_parallel.R b/test_workflows_parallel.R index 5dba1cd..86e0be0 100644 --- a/test_workflows_parallel.R +++ b/test_workflows_parallel.R @@ -33,14 +33,14 @@ test_date <- Sys.Date() dir.create(paste0('~/NEONcal/',test_date,"_parallel")) # which tests to run? -run_test1 <- FALSE -run_test2 <- FALSE -run_test3 <- FALSE +run_test1 <- TRUE +run_test2 <- TRUE +run_test3 <- TRUE run_test4 <- TRUE -run_test5 <- FALSE -run_test6 <- FALSE -run_test7 <- FALSE -run_test8 <- FALSE +run_test5 <- TRUE +run_test6 <- TRUE +run_test7 <- TRUE +run_test8 <- TRUE rapid_test <- FALSE # if rapid, only run ~5% of possible site months. # load required packages: diff --git a/tests/testthat/test-data_regression.R b/tests/testthat/test-data_regression.R index 07e8164..5e21613 100644 --- a/tests/testthat/test-data_regression.R +++ b/tests/testthat/test-data_regression.R @@ -19,10 +19,10 @@ test_that("fit_carbon_regression returns data.frame", { expect_s3_class(fit_carbon_regression(co2data, method = "linreg", calibration_half_width = 2), "data.frame") }) -test_that("calibration data frames have 8 columns", { +test_that("calibration data frames have 14 columns", { skip_on_cran() expect_equal(ncol(calDf_B03), 14) - expect_equal(ncol(calDf_LR), 8) + expect_equal(ncol(calDf_LR), 14) }) diff --git a/uncertainty_analysis.R b/uncertainty_analysis.R deleted file mode 100644 index 196dcbe..0000000 --- a/uncertainty_analysis.R +++ /dev/null @@ -1,129 +0,0 @@ -# uncertainty_analysis.R - -# test integration period on one site first. -library(rhdf5) -library(tidyverse) -library(neonUtilities) - - -time_windows <- c(1, 2, 3, 4, 7, 14, 21, 28, 90, 180, 365, 100000) - -data <- list() -plot_out <- list() -plot_amb <- list() - -sites <- c(NEONiso::terrestrial_core_sites(),NEONiso::terrestrial_relocatable_sites()) - -for (j in 1:length(sites)) { - - - #j <- 1 - for (i in 1:length(time_windows)) { - - - # get list of files. - flist <- list.files(paste0('~/NEONcal/r2_99/carbon_halfwidth_tests/',time_windows[i],'/'), - full.names = TRUE) - - glist <- list.files(paste0('~/NEONcal/r2_99/carbon_halfwidth_tests/',time_windows[i],'/')) - slist <- substr(glist, 10, 13) - - - # figure out which site in slist matches sites[j] - k <- which(sites[j] == slist) - - print(slist) - print(paste(i,j,k,sites[j])) - - tmp0 <- h5read(flist[k], name = paste0('/',slist[k],'/dp01/data/isoCo2/')) - - # select just the standards - tmp1 <- tmp0[c("co2High_09m", "co2Med_09m", "co2Low_09m")] - - # this is annoying! - tmp2 <- list() - tmp3 <- list() - - print("rearrange data") - - for (z in 1:length(tmp1)) { - tmp2[[z]] <- tmp1[[z]]$dlta13CCo2 %>% - dplyr::select(timeBgn, mean_cal, numSamp, vari, CVcalUcrt, LOOcalUcrt) - - tmp3[[z]] <- tmp1[[z]]$dlta13CCo2Refe %>% - dplyr::select(timeBgn, mean) - } - - print('merging data') - tmp2 <- do.call(rbind, tmp2) - tmp3 <- do.call(rbind, tmp3) - - print("continue merging data") - tmp4 <- merge(tmp2, tmp3, by = 'timeBgn', all = TRUE) - - plot_out[[i]] <- tmp4 %>% - dplyr::select(-timeBgn) %>% - dplyr::rename(cal = mean_cal, ref = mean) %>% - mutate(diff1 = CVcalUcrt, diff2 = cal - ref, diff3 = LOOcalUcrt, intNum = time_windows[i]) - - # also collect ambient data: - tmp91 <- tmp0[!(names(tmp0) %in% c("co2High_09m", "co2Med_09m", "co2Low_09m", "calData"))] - # this is annoying! - tmp92 <- list() - - for (y in 1:length(tmp91)) { - tmp92[[y]] <- tmp91[[y]]$dlta13CCo2 %>% - dplyr::select(mean_cal, mean) - } - - tmp92 <- do.call(rbind, tmp92) - - plot_amb[[i]] <- tmp92 %>% - dplyr::mutate(not_na_cal = !is.na(mean_cal), not_na_obs = !is.na(mean), - na_cal = is.na(mean_cal), na_obs = is.na(mean), - window = time_windows[i]) %>% - dplyr::summarize(total_obs_frac = sum(not_na_cal)/sum(not_na_obs), window = mean(window)) - } - - # collapse plot out - plot_out <- do.call(rbind, plot_out) - plot_amb <- do.call(rbind, plot_amb) - - # filter out really high variance measurements - plot_out <- plot_out %>% - dplyr::filter(vari < 0.5) - - # make a plot? - p1 <- ggplot(data = plot_out, aes(x = factor(intNum), y = diff1)) + - # geom_violin() + - # geom_line(aes(x = intNum, y = mean(diff1))) + - geom_boxplot() + - scale_y_continuous(name = "calibration uncertainty (permil)", limits=c(1e-4,10), trans = "log10") + - scale_x_discrete(name = 'calibration window (days)') - - print(p1) - - p2 <- ggplot(data = plot_amb, aes(x = factor(window), y = total_obs_frac)) + - geom_point() - - print(p2) - - p3 <- ggplot(data = plot_out, aes(x = factor(intNum), y = abs(diff2))) + - geom_violin() + - scale_y_continuous(name = "calibration uncertainty (permil)", limits=c(1e-4,10), trans = "log10") + - geom_boxplot() - - print(p3) - - p4 <- ggplot(data = plot_out, aes(x = factor(intNum), y = abs(diff3))) + - geom_violin() + - scale_y_continuous(name = "calibration uncertainty (permil)", limits=c(1e-4,10), trans = "log10") + - geom_boxplot() - - print(p4) - - pdf(paste0('~/Desktop/testing/',sites[j],'.pdf'), width = 12, height = 8) - gridExtra::grid.arrange(p1, p2, p3, p4, nrow = 2) - dev.off() - -} \ No newline at end of file diff --git a/uncertainty_testing.R b/uncertainty_testing.R deleted file mode 100644 index e561bee..0000000 --- a/uncertainty_testing.R +++ /dev/null @@ -1,88 +0,0 @@ -# uncertainty_testing.R -# rich fiorella -# 9 december 2021 -# load required packages: -library(rhdf5) -library(dplyr) -library(lubridate) -library(doParallel) -library(foreach) -# script to drive multiple tests of half_width size. - -devtools::load_all() - -# where does uncalibrated data live? -data.dir <- '~/airflow/data/01-DP4_00200_001/' - -which_test <- 'medlow' - -# set test_date -# make output directory structure: -dir.create(paste0('~/NEONcal/r2_99/carbon_halfwidth_tests/'), recursive = TRUE) - -csites <- c(NEONiso:::terrestrial_core_sites(), NEONiso:::terrestrial_relocatable_sites()) - -# set up a function to drive calibration across sites: - -test_half_width <- function(ndays) { - dir.create(paste0('~/NEONcal/r2_99/carbon_halfwidth_tests/',ndays)) - - for (i in 19:length(csites)) { - - print(paste(csites[i], ndays)) - - fin <- list.files(paste0(data.dir,csites[i],'/') , pattern = '.h5', full.names=TRUE) - fout <- list.files(paste0(data.dir,csites[i],'/'), pattern = '.h5', full.names=FALSE) - - calibrate_carbon(fin, - paste0(paste0('~/NEONcal/r2_99/carbon_halfwidth_tests/',ndays,'/'),fout[1]), - site=csites[i], r2_thres = 0.99, - calibration_half_width = ndays, - plot_regression_data = TRUE, # VERYYYY SLOW - plot_directory = paste0('~/NEONcal/r2_99/carbon_halfwidth_tests/',ndays,'/')) - } -} - -# set up a cluster to run across possible nday options -nday_list <- c(1, 2, 3, 4, 7, 14, 21, 28, 90, 180, 365, 100000) - -test_half_width(nday_list[12]) - -#local.cluster <- parallel::makeCluster(12, type = "FORK", outfile="~/Desktop/cluster_output.txt") -#doParallel::registerDoParallel(cl = local.cluster) -#foreach::getDoParRegistered() - -#foreach::registerDoSEQ() - -#foreach (i = 1:12) %dopar% {test_half_width(nday_list[i])} - -#stopCluster(local.cluster) - - -# # if doing one time, parallelize over sites instead: -# test_half_width_parallel <- function(ndays, ncores) { -# dir.create(paste0('~/NEONcal/carbon_halfwidth_tests/',which_test,'/',ndays)) -# -# # start cluster -# local.cluster <- parallel::makeCluster(ncores, type = "FORK") -# doParallel::registerDoParallel(cl = local.cluster) -# foreach::getDoParRegistered() -# -# foreach (i = 1:47) %dopar% { -# -# print(paste(csites[i], ndays)) -# -# fin <- list.files(paste0(data.dir,csites[i],'/') , pattern = '.h5', full.names=TRUE) -# fout <- list.files(paste0(data.dir,csites[i],'/'), pattern = '.h5', full.names=FALSE) -# -# calibrate_carbon(fin, -# paste0(paste0('~/NEONcal/carbon_halfwidth_tests/',which_test,'/',ndays,'/'),fout[1]), -# site=csites[i], r2_thres = 0.95, -# calibration_half_width = ndays) -# } -# -# stopCluster(local.cluster) -# } - - -