Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixing seed for jobs #257

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions ..Rcheck/00check.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
* using log directory ‘/home/damir/software/batchtools/..Rcheck’
* using R version 3.6.2 (2019-12-12)
* using platform: x86_64-pc-linux-gnu (64-bit)
* using session charset: UTF-8
* checking for file ‘./DESCRIPTION’ ... ERROR
Required fields missing or empty:
‘Author’ ‘Maintainer’
* DONE
Status: 1 ERROR
11 changes: 11 additions & 0 deletions .Rhistory
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
reticulate::repl_python()
setwd("./software/batchtools/")
library(devtools)
document()
document(".")
document("batchtools")
?document
pwd()
getwd()
devtools::check()
devtools::check(".")
4 changes: 2 additions & 2 deletions R/ExperimentRegistry.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@
#' tab = ljoin(pars, results)
#' tab[, list(mres = mean(res)), by = c("n", "algorithm")]
makeExperimentRegistry = function(file.dir = "registry", work.dir = getwd(), conf.file = findConfFile(), packages = character(0L), namespaces = character(0L),
source = character(0L), load = character(0L), seed = NULL, make.default = TRUE) {
source = character(0L), load = character(0L), seed = NULL, fix.seed = FALSE, make.default = TRUE) {

reg = makeRegistry(file.dir = file.dir, work.dir = work.dir, conf.file = conf.file,
packages = packages, namespaces = namespaces, source = source, load = load, seed = seed, make.default = make.default)
packages = packages, namespaces = namespaces, source = source, load = load, seed = seed, fix.seed = fix.seed, make.default = make.default)

fs::dir_create(fs::path(reg$file.dir, c("problems", "algorithms")))

Expand Down
24 changes: 18 additions & 6 deletions R/Job.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,19 @@ Experiment = R6Class("Experiment", cloneable = FALSE, inherit = BaseJob,
return(result)
}
}
seed = if (is.null(p$seed)) self$seed else getSeed(p$seed, self$repl - 1L)
seed = if(p$fix.seed) {
if(is.null(p$seed)) {
self$seed
} else {
getSeed(p$seed, self$repl - 1L, p$fix.seed)
}
} else {
if(is.null(p$seed)) {
getSeed(self$seed, self$repl - 1L)
} else {
getSeed(p$seed, self$repl - 1L)
}
}
wrapper = function(...) p$fun(job = self, data = p$data, ...)
result = with_seed(seed, do.call(wrapper, self$prob.pars, envir = .GlobalEnv))
if (p$cache)
Expand Down Expand Up @@ -167,15 +179,14 @@ makeJob = function(id, reader = NULL, reg = getDefaultRegistry()) {
makeJob.Registry = function(id, reader = NULL, reg = getDefaultRegistry()) {
row = mergedJobs(reg, convertId(reg, id), c("job.id", "job.pars", "resource.id"))
resources = reg$resources[row, "resources", on = "resource.id", nomatch = NA]$resources[[1L]] %??% list()
Job$new(file.dir = reg$file.dir, reader %??% RDSReader$new(FALSE), id = row$job.id, job.pars = row$job.pars[[1L]], seed = getSeed(reg$seed, row$job.id),
resources = resources)
Job$new(file.dir = reg$file.dir, reader %??% RDSReader$new(FALSE), id = row$job.id, job.pars = row$job.pars[[1L]], seed = getSeed(reg$seed, row$job.id, reg$fix.seed), resources = resources)
}

#' @export
makeJob.ExperimentRegistry = function(id, reader = NULL, reg = getDefaultRegistry()) {
row = mergedJobs(reg, convertId(reg, id), c("job.id", "problem", "prob.pars", "algorithm", "algo.pars", "repl", "resource.id"))
resources = reg$resources[row, "resources", on = "resource.id", nomatch = NA]$resources[[1L]] %??% list()
Experiment$new(file.dir = reg$file.dir, reader %??% RDSReader$new(FALSE), id = row$job.id, prob.pars = row$prob.pars[[1L]], algo.pars = row$algo.pars[[1L]], seed = getSeed(reg$seed, row$job.id),
Experiment$new(file.dir = reg$file.dir, reader %??% RDSReader$new(FALSE), id = row$job.id, prob.pars = row$prob.pars[[1L]], algo.pars = row$algo.pars[[1L]], seed = getSeed(reg$seed, row$job.id, reg$fix.seed),
repl = row$repl, resources = resources, prob.name = row$problem, algo.name = row$algorithm)
}

Expand All @@ -185,12 +196,13 @@ getJob = function(jc, i, reader = NULL) {

getJob.JobCollection = function(jc, i, reader = RDSReader$new(FALSE)) {
row = jc$jobs[i]
Job$new(file.dir = jc$file.dir, reader = reader, id = row$job.id, job.pars = row$job.pars[[1L]], seed = getSeed(jc$seed, row$job.id), resources = jc$resources)
Job$new(file.dir = jc$file.dir, reader = reader, id = row$job.id, job.pars = row$job.pars[[1L]], seed = getSeed(jc$seed, row$job.id, jc$fix.seed), resources = jc$resources)
}

#get back to this
getJob.ExperimentCollection = function(jc, i, reader = RDSReader$new(FALSE)) {
row = jc$jobs[i]
Experiment$new(file.dir = jc$file.dir, reader = reader, id = row$job.id, prob.pars = row$prob.pars[[1L]],
algo.pars = row$algo.pars[[1L]], seed = getSeed(jc$seed, row$job.id), repl = row$repl,
algo.pars = row$algo.pars[[1L]], seed = getSeed(jc$seed, row$job.id, jc$fix.seed), repl = row$repl,
resources = jc$resources, prob.name = row$problem, algo.name = row$algorithm, compress = jc$compress)
}
2 changes: 2 additions & 0 deletions R/JobCollection.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#' \item{resources:}{Named list of of specified computational resources.}
#' \item{uri}{Location of the job description file (saved with \code{link[base]{saveRDS}} on the file system.}
#' \item{seed}{\code{integer(1)} Seed of the \link{Registry}.}
#' \item{fix.seed}{\code{logical(1)} Fix.seed of the \link{Registry}.}
#' \item{packages}{\code{character} with required packages to load via \code{\link[base]{require}}.}
#' \item{namespaces}{code{character} with required packages to load via \code{\link[base]{requireNamespace}}.}
#' \item{source}{\code{character} with list of files to source before execution.}
Expand Down Expand Up @@ -54,6 +55,7 @@ createCollection = function(jobs, resources = list(), reg = getDefaultRegistry()
jc$file.dir = reg$file.dir
jc$work.dir = reg$work.dir
jc$seed = reg$seed
jc$fix.seed = reg$fix.seed
jc$uri = getJobFiles(reg, hash = jc$job.hash)
jc$log.file = fs::path(reg$file.dir, "logs", sprintf("%s.log", jc$job.hash))
jc$packages = reg$packages
Expand Down
15 changes: 13 additions & 2 deletions R/Problem.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,17 @@
#' see \code{\link{ExperimentRegistry}}.
#' If \code{seed} is set to \code{NULL} (default), the job seed is used to instantiate the problem and
#' different algorithms see different stochastic instances of the same problem.
#' @param fix.seed [\code{logical(1)}]\cr
#' Fix.seed for this problem. This allows to set the same seed for all problem instances so that
#' all algorithms are evaluated on the same stochastic instance. There are four cases for fix.seed and seed:
#' (1) If fix.seed is TRUE and \code{seed} is specified, then all stochastic instances are the same
#' and are set to \code{seed}.
#' (2)If fix.seed is TRUE and \code{seed} is not specified, then all stochastic instances are the same
#' and are set to registry seed.
#' (3)If fix.seed is FALSE (default) and \code{seed} is specified, then the seeding strategy is the same
#' as described in \code{seed}.
#' (4)If fix.seed is FALSE (default) and \code{seed} is not specified, then the seeding strategy is the same
#' as described in \code{seed}.
#' @param cache [\code{logical(1)}]\cr
#' If \code{TRUE} and \code{seed} is set, problem instances will be cached on the file system.
#' This assumes that each problem instance is deterministic for each combination of hyperparameter setting
Expand Down Expand Up @@ -64,7 +75,7 @@
#' tmp$problems
#' tmp$algorithms
#' getJobPars(reg = tmp)
addProblem = function(name, data = NULL, fun = NULL, seed = NULL, cache = FALSE, reg = getDefaultRegistry()) {
addProblem = function(name, data = NULL, fun = NULL, seed = NULL, fix.seed = FALSE, cache = FALSE, reg = getDefaultRegistry()) {
assertRegistry(reg, class = "ExperimentRegistry", writeable = TRUE)
assertString(name, min.chars = 1L)
if (!stri_detect_regex(name, "^[[:alnum:]_.-]+$"))
Expand All @@ -82,7 +93,7 @@ addProblem = function(name, data = NULL, fun = NULL, seed = NULL, cache = FALSE,
}

info("Adding problem '%s'", name)
prob = setClasses(list(name = name, seed = seed, cache = cache, data = data, fun = fun), "Problem")
prob = setClasses(list(name = name, seed = seed, fix.seed = fix.seed, cache = cache, data = data, fun = fun), "Problem")
writeRDS(prob, file = getProblemURI(reg, name), compress = reg$compress)
reg$problems = union(reg$problems, name)
cache.dir = getProblemCacheDir(reg, name)
Expand Down
9 changes: 7 additions & 2 deletions R/Registry.R
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,11 @@
#' Files which should be loaded on the slaves prior to executing a job.
#' Calls \code{\link[base]{load}} using the \code{\link[base]{.GlobalEnv}}.
#' @param seed [\code{integer(1)}]\cr
#' Start seed for jobs. Each job uses the (\code{seed} + \code{job.id}) as seed.
#' Start seed for jobs. Each job uses the (\code{seed} + \code{job.id}) as seed, if fix.seed is FALSE.
#' Default is a random integer between 1 and 32768
#' @param fix.seed [\code{logical(1)}]\cr
#' Fix seed for jobs. If set to TRUE, each job uses the same seed.
#' Default is FALSE, which means each job uses the (\code{seed} + \code{job.id}) as seed.
#' @param make.default [\code{logical(1)}]\cr
#' If set to \code{TRUE}, the created registry is saved inside the package
#' namespace and acts as default registry. You might want to switch this
Expand All @@ -98,6 +101,7 @@
#' \item{\code{packages} [character()]:}{Packages to load on the slaves.}
#' \item{\code{namespaces} [character()]:}{Namespaces to load on the slaves.}
#' \item{\code{seed} [integer(1)]:}{Registry seed. Before each job is executed, the seed \code{seed + job.id} is set.}
#' \item{\code{fix.seed} [logical(1)]:}{Fix seed. Determine if to use the same seed for all computational jobs.}
#' \item{\code{cluster.functions} [cluster.functions]:}{Usually set in your \code{conf.file}. Set via a call to \code{\link{makeClusterFunctions}}. See example.}
#' \item{\code{default.resources} [named list()]:}{Usually set in your \code{conf.file}. Named list of default resources.}
#' \item{\code{max.concurrent.jobs} [integer(1)]:}{Usually set in your \code{conf.file}. Maximum number of concurrent jobs for a single user and current registry on the system.
Expand All @@ -123,7 +127,7 @@
#' tmp$packages = c("MASS")
#' saveRegistry(reg = tmp)
makeRegistry = function(file.dir = "registry", work.dir = getwd(), conf.file = findConfFile(), packages = character(0L), namespaces = character(0L),
source = character(0L), load = character(0L), seed = NULL, make.default = TRUE) {
source = character(0L), load = character(0L), seed = NULL, fix.seed = FALSE, make.default = TRUE) {
assertString(file.dir, na.ok = TRUE)
if (!is.na(file.dir))
assertPathForOutput(file.dir, overwrite = FALSE)
Expand All @@ -145,6 +149,7 @@ makeRegistry = function(file.dir = "registry", work.dir = getwd(), conf.file = f
reg$source = source
reg$load = load
reg$seed = seed
reg$fix.seed = fix.seed
reg$writeable = TRUE
reg$version = packageVersion("batchtools")

Expand Down
14 changes: 9 additions & 5 deletions R/helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,15 @@ Rscript = function() {
fs::path(R.home("bin"), ifelse(testOS("windows"), "Rscript.exe", "Rscript"))
}

getSeed = function(start.seed, id) {
if (id > .Machine$integer.max - start.seed)
start.seed - .Machine$integer.max + id
else
start.seed + id
getSeed = function(start.seed, id, fix.seed = FALSE) {
if(!fix.seed) {
if (id > .Machine$integer.max - start.seed)
start.seed - .Machine$integer.max + id
else
start.seed + id
} else {
start.seed
}
}

chsetdiff = function(x, y) {
Expand Down
7 changes: 4 additions & 3 deletions man/JobCollection.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions man/Worker.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 4 additions & 10 deletions man/addExperiments.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 14 additions & 8 deletions man/addProblem.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 2 additions & 7 deletions man/assertRegistry.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 2 additions & 5 deletions man/batchExport.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 2 additions & 7 deletions man/batchMap.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 5 additions & 12 deletions man/batchMapResults.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 2 additions & 8 deletions man/batchReduce.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading