Skip to content

Commit

Permalink
finalize first benchmarks and restructure files
Browse files Browse the repository at this point in the history
  • Loading branch information
Maximilian-Stefan-Ernst committed Feb 14, 2022
1 parent 5a1ae77 commit 8bcdb86
Show file tree
Hide file tree
Showing 11 changed files with 214 additions and 30 deletions.
File renamed without changes.
File renamed without changes.
File renamed without changes.
135 changes: 135 additions & 0 deletions cfa/analyze_results.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
library(ggplot2)
library(readr)
library(stringr)
library(lubridate)
library(dplyr)
library(purrr)

# command line arguments
args <- commandArgs(trailingOnly = TRUE)
compare_lavaan <- tolower(args[1]) == "true"

# list all result files

files <- list.files("cfa/results")

files_julia <- str_subset(files, "julia")
files_lavaan <- str_subset(files, "lavaan")

# extract dates
dates_julia <- str_remove_all(files_julia, "benchmarks_julia_|.csv")
dates_lavaan <- str_remove_all(files_lavaan, "benchmarks_lavaan_|.csv")

dates_julia <- lubridate::ymd_hm(dates_julia)
dates_lavaan <- lubridate::ymd_hm(dates_lavaan)

files_julia <- data.frame(filename = files_julia, date = dates_julia)
files_lavaan <- data.frame(filename = files_lavaan, date = dates_lavaan)

files_julia <- arrange(files_julia, desc(date))
files_lavaan <- arrange(files_lavaan, desc(date))

# extract only two recent files
dates_julia <- files_julia$date[1:2]
files_julia <- files_julia$filename[1:2]
data_julia <- map(files_julia, ~read_delim(paste("cfa/results/", .x, sep = ""), delim = ";", locale = locale(decimal_mark = ".")))

data_julia[[1]] <-
mutate(data_julia[[1]],
datetime = dates_julia[1],
package = "julia")
data_julia[[2]] <-
mutate(data_julia[[2]],
datetime = dates_julia[2],
package = "julia")

data_julia <- bind_rows(data_julia)

# print if all models are correct
print(paste("All models are correct:", all(data_julia$correct)))

data <- mutate(data_julia, across(c(median_time, mean_time, sd_time), function(x){x/1e9}))

if (compare_lavaan) {
date_lavaan <- files_lavaan$date[1]
file_lavaan <- files_lavaan$filename[1]
data_lavaan <- read_csv2(paste("cfa/results/", file_lavaan, sep = ""))
data_lavaan <-
mutate(data_lavaan,
datetime = date_lavaan,
package = "lavaan"
)
if (any(!is.na(data_lavaan$error))) {print(data_lavaan$error)}
if (any(!is.na(data_lavaan$warnings))) {print(data_lavaan$warnings)}
if (any(!is.na(data_lavaan$messages))) {print(data_lavaan$messages)}
data <- bind_rows(data, data_lavaan)
}

data <- mutate(
data,
n_parameters = 2*(n_factors*n_items) + n_factors*(n_factors-1)/2,
se = sd_time/sqrt(n_repetitions)
)


# plots
# compare julia versions
p1 <- data %>%
filter(package == "julia") %>%
ggplot(aes(
x = n_parameters,
y = median_time,
color = interaction(as.factor(datetime), package))) +
geom_point() +
geom_line() +
geom_errorbar(aes(ymin = median_time - se, ymax = median_time + se)) +
labs(color = "time:package") +
theme_minimal() +
theme(text = element_text(color = "white"))

p2 <- data %>%
filter(package == "julia") %>%
ggplot(aes(
x = n_parameters,
y = median_time,
color = interaction(as.factor(datetime), package))) +
geom_point() +
geom_line() +
geom_errorbar(aes(ymin = median_time - se, ymax = median_time + se)) +
labs(color="time:package") +
theme_minimal() +
scale_y_log10() +
theme(text = element_text(color = "white"))

# compare with lavaan
if (compare_lavaan) {
p3 <- data %>%
ggplot(aes(
x = n_parameters,
y = median_time,
color = interaction(as.factor(datetime), package))) +
geom_point() +
geom_line() +
geom_errorbar(aes(ymin = median_time - se, ymax = median_time + se)) +
labs(color="time:package") +
theme_minimal() +
theme(text = element_text(color = "white"))

p4 <- data %>%
ggplot(aes(
x = n_parameters,
y = median_time,
color = interaction(as.factor(datetime), package))) +
geom_point() +
geom_line() +
geom_errorbar(aes(ymin = median_time - se, ymax = median_time + se)) +
labs(color = "time:package") +
theme_minimal() +
scale_y_log10() +
theme(text = element_text(color = "white"))
}

ggsave("cfa/results/plots/julia.png", p1)
ggsave("cfa/results/plots/julia_log.png", p2)
ggsave("cfa/results/plots/lavaan_julia.png", p3)
ggsave("cfa/results/plots/lavaan_julia_log.png", p4)
3 changes: 3 additions & 0 deletions cfa/bash/analyze_results
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

singularity exec --writable sem-writ.simg Rscript cfa/analyze_results.R TRUE
File renamed without changes.
File renamed without changes.
10 changes: 7 additions & 3 deletions cfa/benchmark_julia.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
using DataFrames, StructuralEquationModels, Symbolics,
LinearAlgebra, SparseArrays, Optim, LineSearches,
BenchmarkTools, CSV
BenchmarkTools, CSV, Statistics

date = string(ARGS...)

Expand Down Expand Up @@ -38,7 +38,11 @@ benchmarks = benchmark_models(models)

results = select(config, :Estimator, :n_factors, :n_items, :meanstructure, :backend, :correct)

results.median_time_jl = median.(getfield.(benchmarks, :times))
results.n_par = 2*(results.n_factors*results.n_items) + results.n_factors*(results.n_factors-1)/2,
results.median_time = median.(getfield.(benchmarks, :times))
results.mean_time = median.(getfield.(benchmarks, :times))
results.sd_time = std.(getfield.(benchmarks, :times))
results.n_repetitions = getfield.(getfield.(benchmarks, :params), :evals).*getfield.(getfield.(benchmarks, :params), :samples)

# results.n_par = 2*(results.n_factors.*results.n_items) + results.n_factors.*(results.n_factors.-1)/2,

CSV.write("results/benchmarks_julia_"*date*".csv", results, delim = ";")
60 changes: 42 additions & 18 deletions cfa/benchmark_lavaan.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ source("cfa/functions.R")
args <- commandArgs(trailingOnly = TRUE)
date <- args[1]

results <- readr::read_rds("cfa/results.rds")
results <- read_csv("cfa/config.csv")
results <- filter(results, meanstructure == 0)

results <-
mutate(results,
Expand All @@ -19,6 +20,25 @@ results <-
list(...),
lavaan_model(n_factors, n_items, meanstructure))))

results <-
mutate(results,
data = pmap(
results,
~with(
list(...),
read_csv(paste(
"cfa/data/",
"n_factors_",
n_factors,
"_n_items_",
n_items,
"_meanstructure_",
meanstructure,
".csv", sep = ""))
)
)
)

# results$model_lavaan[[24]] <- str_remove_all(results$model_lavaan[[24]], "NA")
# results$model_lavaan[[12]] <- str_remove_all(results$model_lavaan[[12]], "NA")

Expand All @@ -42,6 +62,15 @@ results <-
start,
parTable))

results <- mutate(
results,
n_par = map2_dbl(
n_factors,
n_items,
~ 2*(.x*.y) + .x*(.x-1)/2
)
)

const <- 3*(results$n_par[length(results$n_par)]^2)

results <- mutate(
Expand All @@ -50,8 +79,6 @@ results <- mutate(

#!!!
# results$n_repetitions <- 10

results <- filter(results, meanstructure == 0)
##

benchmarks <- pmap(
Expand All @@ -67,25 +94,22 @@ benchmarks <- pmap(

benchmark_summary <- map_dfr(benchmarks, extract_results)

# benchmark_summary <- rename_with(benchmark_summary, ~str_c(.x, "_lav"))
# results <- bind_cols(results, benchmark_summary)
results <- bind_cols(results, benchmark_summary)

write_csv2(
select(
results,
Estimator,
n_factors,
n_items,
results,
Estimator,
n_factors,
n_items,
meanstructure,
n_repetitions,
n_obs,
mean_time_lav,
median_time_lav,
sd_time_lav,
n_par,
error_lav,
warnings_lav,
messages_lav),
paste("cfa/results/benchmarks_lavaan_", date, ".csv", sep = ""))
mean_time,
median_time,
sd_time,
error,
warnings,
messages),
paste("cfa/results/benchmarks_lavaan_", date, ".csv", sep = ""))

# write_rds(results, "results.rds")
33 changes: 25 additions & 8 deletions cfa/estimates_lavaan.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ library(readr)
set.seed(73647820)
source("functions.R")

results <- readr::read_rds("results.rds")
results <- read_csv("cfa/config.csv")
results <- filter(results, meanstructure == 0)

results <-
mutate(results,
Expand All @@ -17,8 +18,24 @@ results <-
list(...),
lavaan_model(n_factors, n_items, meanstructure))))

# results$model_lavaan[[24]] <- str_remove_all(results$model_lavaan[[24]], "NA")
results$model_lavaan[[12]] <- str_remove_all(results$model_lavaan[[12]], "NA")
results <-
mutate(results,
data = pmap(
results,
~with(
list(...),
read_csv(paste(
"cfa/data/",
"n_factors_",
n_factors,
"_n_items_",
n_items,
"_meanstructure_",
meanstructure,
".csv", sep = ""))
)
)
)

results <-
mutate(results,
Expand All @@ -42,13 +59,13 @@ results <-
estimate,
parTable))

pwalk(results,
pwalk(results,
~with(
list(...),
list(...),
write_csv(
estimate,
estimate,
str_c(
"parest/",
"cfa/parest/",
"n_factors_",
n_factors,
"_n_items_",
Expand All @@ -57,5 +74,5 @@ pwalk(results,
meanstructure,
".csv")
)
)
)
)
3 changes: 2 additions & 1 deletion install_packages.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ Pkg.add("DataFrames")
Pkg.add("Symbolics")
Pkg.add("Optim")
Pkg.add("LineSearches")
Pkg.add("BenchmarkTools")
Pkg.add("BenchmarkTools")
Pkg.add("Statistics")

0 comments on commit 8bcdb86

Please sign in to comment.