diff --git a/install_container_bash b/bash/install_container similarity index 100% rename from install_container_bash rename to bash/install_container diff --git a/install_packages_bash b/bash/install_packages similarity index 100% rename from install_packages_bash rename to bash/install_packages diff --git a/install_sem_devel_bash b/bash/install_sem_devel similarity index 100% rename from install_sem_devel_bash rename to bash/install_sem_devel diff --git a/cfa/analyze_results.R b/cfa/analyze_results.R new file mode 100644 index 0000000..b5cb8a3 --- /dev/null +++ b/cfa/analyze_results.R @@ -0,0 +1,135 @@ +library(ggplot2) +library(readr) +library(stringr) +library(lubridate) +library(dplyr) +library(purrr) + +# command line arguments +args <- commandArgs(trailingOnly = TRUE) +compare_lavaan <- tolower(args[1]) == "true" + +# list all result files + +files <- list.files("cfa/results") + +files_julia <- str_subset(files, "julia") +files_lavaan <- str_subset(files, "lavaan") + +# extract dates +dates_julia <- str_remove_all(files_julia, "benchmarks_julia_|.csv") +dates_lavaan <- str_remove_all(files_lavaan, "benchmarks_lavaan_|.csv") + +dates_julia <- lubridate::ymd_hm(dates_julia) +dates_lavaan <- lubridate::ymd_hm(dates_lavaan) + +files_julia <- data.frame(filename = files_julia, date = dates_julia) +files_lavaan <- data.frame(filename = files_lavaan, date = dates_lavaan) + +files_julia <- arrange(files_julia, desc(date)) +files_lavaan <- arrange(files_lavaan, desc(date)) + +# extract only two recent files +dates_julia <- files_julia$date[1:2] +files_julia <- files_julia$filename[1:2] +data_julia <- map(files_julia, ~read_delim(paste("cfa/results/", .x, sep = ""), delim = ";", locale = locale(decimal_mark = "."))) + +data_julia[[1]] <- + mutate(data_julia[[1]], + datetime = dates_julia[1], + package = "julia") +data_julia[[2]] <- + mutate(data_julia[[2]], + datetime = dates_julia[2], + package = "julia") + +data_julia <- bind_rows(data_julia) + +# print if all models are correct +print(paste("All models are correct:", all(data_julia$correct))) + +data <- mutate(data_julia, across(c(median_time, mean_time, sd_time), function(x){x/1e9})) + +if (compare_lavaan) { + date_lavaan <- files_lavaan$date[1] + file_lavaan <- files_lavaan$filename[1] + data_lavaan <- read_csv2(paste("cfa/results/", file_lavaan, sep = "")) + data_lavaan <- + mutate(data_lavaan, + datetime = date_lavaan, + package = "lavaan" + ) + if (any(!is.na(data_lavaan$error))) {print(data_lavaan$error)} + if (any(!is.na(data_lavaan$warnings))) {print(data_lavaan$warnings)} + if (any(!is.na(data_lavaan$messages))) {print(data_lavaan$messages)} + data <- bind_rows(data, data_lavaan) +} + +data <- mutate( + data, + n_parameters = 2*(n_factors*n_items) + n_factors*(n_factors-1)/2, + se = sd_time/sqrt(n_repetitions) + ) + + +# plots +# compare julia versions +p1 <- data %>% + filter(package == "julia") %>% + ggplot(aes( + x = n_parameters, + y = median_time, + color = interaction(as.factor(datetime), package))) + + geom_point() + + geom_line() + + geom_errorbar(aes(ymin = median_time - se, ymax = median_time + se)) + + labs(color = "time:package") + + theme_minimal() + + theme(text = element_text(color = "white")) + +p2 <- data %>% + filter(package == "julia") %>% + ggplot(aes( + x = n_parameters, + y = median_time, + color = interaction(as.factor(datetime), package))) + + geom_point() + + geom_line() + + geom_errorbar(aes(ymin = median_time - se, ymax = median_time + se)) + + labs(color="time:package") + + theme_minimal() + + scale_y_log10() + + theme(text = element_text(color = "white")) + +# compare with lavaan +if (compare_lavaan) { + p3 <- data %>% + ggplot(aes( + x = n_parameters, + y = median_time, + color = interaction(as.factor(datetime), package))) + + geom_point() + + geom_line() + + geom_errorbar(aes(ymin = median_time - se, ymax = median_time + se)) + + labs(color="time:package") + + theme_minimal() + + theme(text = element_text(color = "white")) + + p4 <- data %>% + ggplot(aes( + x = n_parameters, + y = median_time, + color = interaction(as.factor(datetime), package))) + + geom_point() + + geom_line() + + geom_errorbar(aes(ymin = median_time - se, ymax = median_time + se)) + + labs(color = "time:package") + + theme_minimal() + + scale_y_log10() + + theme(text = element_text(color = "white")) +} + +ggsave("cfa/results/plots/julia.png", p1) +ggsave("cfa/results/plots/julia_log.png", p2) +ggsave("cfa/results/plots/lavaan_julia.png", p3) +ggsave("cfa/results/plots/lavaan_julia_log.png", p4) \ No newline at end of file diff --git a/cfa/bash/analyze_results b/cfa/bash/analyze_results new file mode 100644 index 0000000..cd4d9a0 --- /dev/null +++ b/cfa/bash/analyze_results @@ -0,0 +1,3 @@ +#!/bin/bash + +singularity exec --writable sem-writ.simg Rscript cfa/analyze_results.R TRUE \ No newline at end of file diff --git a/cfa/julia_bash b/cfa/bash/julia similarity index 100% rename from cfa/julia_bash rename to cfa/bash/julia diff --git a/cfa/lavaan_bash b/cfa/bash/lavaan similarity index 100% rename from cfa/lavaan_bash rename to cfa/bash/lavaan diff --git a/cfa/benchmark_julia.jl b/cfa/benchmark_julia.jl index 405dc56..b0f00ca 100644 --- a/cfa/benchmark_julia.jl +++ b/cfa/benchmark_julia.jl @@ -1,6 +1,6 @@ using DataFrames, StructuralEquationModels, Symbolics, LinearAlgebra, SparseArrays, Optim, LineSearches, - BenchmarkTools, CSV + BenchmarkTools, CSV, Statistics date = string(ARGS...) @@ -38,7 +38,11 @@ benchmarks = benchmark_models(models) results = select(config, :Estimator, :n_factors, :n_items, :meanstructure, :backend, :correct) -results.median_time_jl = median.(getfield.(benchmarks, :times)) -results.n_par = 2*(results.n_factors*results.n_items) + results.n_factors*(results.n_factors-1)/2, +results.median_time = median.(getfield.(benchmarks, :times)) +results.mean_time = median.(getfield.(benchmarks, :times)) +results.sd_time = std.(getfield.(benchmarks, :times)) +results.n_repetitions = getfield.(getfield.(benchmarks, :params), :evals).*getfield.(getfield.(benchmarks, :params), :samples) + +# results.n_par = 2*(results.n_factors.*results.n_items) + results.n_factors.*(results.n_factors.-1)/2, CSV.write("results/benchmarks_julia_"*date*".csv", results, delim = ";") \ No newline at end of file diff --git a/cfa/benchmark_lavaan.R b/cfa/benchmark_lavaan.R index 2056e4d..3cd3121 100644 --- a/cfa/benchmark_lavaan.R +++ b/cfa/benchmark_lavaan.R @@ -8,7 +8,8 @@ source("cfa/functions.R") args <- commandArgs(trailingOnly = TRUE) date <- args[1] -results <- readr::read_rds("cfa/results.rds") +results <- read_csv("cfa/config.csv") +results <- filter(results, meanstructure == 0) results <- mutate(results, @@ -19,6 +20,25 @@ results <- list(...), lavaan_model(n_factors, n_items, meanstructure)))) +results <- + mutate(results, + data = pmap( + results, + ~with( + list(...), + read_csv(paste( + "cfa/data/", + "n_factors_", + n_factors, + "_n_items_", + n_items, + "_meanstructure_", + meanstructure, + ".csv", sep = "")) + ) + ) + ) + # results$model_lavaan[[24]] <- str_remove_all(results$model_lavaan[[24]], "NA") # results$model_lavaan[[12]] <- str_remove_all(results$model_lavaan[[12]], "NA") @@ -42,6 +62,15 @@ results <- start, parTable)) +results <- mutate( + results, + n_par = map2_dbl( + n_factors, + n_items, + ~ 2*(.x*.y) + .x*(.x-1)/2 + ) + ) + const <- 3*(results$n_par[length(results$n_par)]^2) results <- mutate( @@ -50,8 +79,6 @@ results <- mutate( #!!! # results$n_repetitions <- 10 - -results <- filter(results, meanstructure == 0) ## benchmarks <- pmap( @@ -67,25 +94,22 @@ benchmarks <- pmap( benchmark_summary <- map_dfr(benchmarks, extract_results) -# benchmark_summary <- rename_with(benchmark_summary, ~str_c(.x, "_lav")) -# results <- bind_cols(results, benchmark_summary) +results <- bind_cols(results, benchmark_summary) write_csv2( select( - results, - Estimator, - n_factors, - n_items, + results, + Estimator, + n_factors, + n_items, meanstructure, n_repetitions, - n_obs, - mean_time_lav, - median_time_lav, - sd_time_lav, - n_par, - error_lav, - warnings_lav, - messages_lav), - paste("cfa/results/benchmarks_lavaan_", date, ".csv", sep = "")) + mean_time, + median_time, + sd_time, + error, + warnings, + messages), + paste("cfa/results/benchmarks_lavaan_", date, ".csv", sep = "")) # write_rds(results, "results.rds") \ No newline at end of file diff --git a/cfa/estimates_lavaan.R b/cfa/estimates_lavaan.R index 839b0e5..cebbe50 100644 --- a/cfa/estimates_lavaan.R +++ b/cfa/estimates_lavaan.R @@ -6,7 +6,8 @@ library(readr) set.seed(73647820) source("functions.R") -results <- readr::read_rds("results.rds") +results <- read_csv("cfa/config.csv") +results <- filter(results, meanstructure == 0) results <- mutate(results, @@ -17,8 +18,24 @@ results <- list(...), lavaan_model(n_factors, n_items, meanstructure)))) -# results$model_lavaan[[24]] <- str_remove_all(results$model_lavaan[[24]], "NA") -results$model_lavaan[[12]] <- str_remove_all(results$model_lavaan[[12]], "NA") +results <- + mutate(results, + data = pmap( + results, + ~with( + list(...), + read_csv(paste( + "cfa/data/", + "n_factors_", + n_factors, + "_n_items_", + n_items, + "_meanstructure_", + meanstructure, + ".csv", sep = "")) + ) + ) + ) results <- mutate(results, @@ -42,13 +59,13 @@ results <- estimate, parTable)) -pwalk(results, +pwalk(results, ~with( - list(...), + list(...), write_csv( - estimate, + estimate, str_c( - "parest/", + "cfa/parest/", "n_factors_", n_factors, "_n_items_", @@ -57,5 +74,5 @@ pwalk(results, meanstructure, ".csv") ) - ) ) + ) diff --git a/install_packages.jl b/install_packages.jl index 8750aa2..5f23b10 100644 --- a/install_packages.jl +++ b/install_packages.jl @@ -5,4 +5,5 @@ Pkg.add("DataFrames") Pkg.add("Symbolics") Pkg.add("Optim") Pkg.add("LineSearches") -Pkg.add("BenchmarkTools") \ No newline at end of file +Pkg.add("BenchmarkTools") +Pkg.add("Statistics") \ No newline at end of file