Skip to content

Commit

Permalink
Merge pull request #4494 from vgteam/sniff-slurm
Browse files Browse the repository at this point in the history
Magically pick up Slurm CPU limits via SLURM_JOB_CPUS_PER_NODE
  • Loading branch information
adamnovak authored Jan 17, 2025
2 parents 6554344 + 1bb0dd3 commit e91be89
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 3 deletions.
33 changes: 32 additions & 1 deletion src/utility.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
#include <fstream>
#include <iostream>
#include <cctype>
// We don't define _GNU_SOURCE to get the cpuset functions since we will
// already have it for libstdc++ on the platforms where we need them
#include <sched.h>


// For setting the temporary directory in submodules.
Expand Down Expand Up @@ -118,7 +121,7 @@ void choose_good_thread_count() {
if (count == 0) {
// First priority: OMP_NUM_THREADS
const char* value = getenv("OMP_NUM_THREADS");
if (value) {
if (value && *value != '\0') {
// Read the value. Throws if it isn't a legit number.
count = std::stoi(value);
}
Expand All @@ -144,6 +147,34 @@ void choose_good_thread_count() {
}
}

#if !defined(__APPLE__) && defined(_GNU_SOURCE)
if (count == 0) {
// Next priority: CPU affinity mask (used by Slurm)
cpu_set_t mask;
if (sched_getaffinity(getpid(), sizeof(cpu_set_t), &mask)) {
// TODO: If you have >1024 bits in your mask, glibc can't deal and you will get EINVAL.
// We're supposed to then try increasingly large dynamically-allocated CPU flag sets until we find one that works.
auto problem = errno;
std::cerr << "warning[vg]: Cannot determine CPU count from affinity mask: " << strerror(problem) << std::endl;
} else {
// We're also supposed to intersect this mask with the actual
// existing processors, in case somebody flags on way more
// processors than actually exist. But Linux doesn't seem to do
// that by default, so we don't worry about it.
count = CPU_COUNT(&mask);
}
}
#endif

if (count == 0) {
// Next priority: SLURM_JOB_CPUS_PER_NODE
const char* value = getenv("SLURM_JOB_CPUS_PER_NODE");
if (value && *value != '\0') {
// Read the value. Throws if it isn't a legit number.
count = std::stoi(value);
}
}

if (count == 0) {
// Next priority: hardware concurrency as reported by the STL.
// This may itself be 0 if ungettable.
Expand Down
5 changes: 3 additions & 2 deletions src/utility.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,9 @@ double get_fraction_of_ns(const string& seq);
/// TODO: Assumes that this is the same for every parallel section.
int get_thread_count(void);
/// Decide on and apply a sensible OMP thread count. Pay attention to
/// OMP_NUM_THREADS if set, the "hardware concurrency", and container limit
/// information that may be available in /proc.
/// OMP_NUM_THREADS and SLURM_JOB_CPUS_PER_NODE if set, the "hardware
/// concurrency", and container limit information that may be available in
/// /proc.
void choose_good_thread_count();
string wrap_text(const string& str, size_t width);
bool is_number(const string& s);
Expand Down

1 comment on commit e91be89

@adamnovak
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

vg CI tests complete for merge to master. View the full report here.

16 tests passed, 0 tests failed and 0 tests skipped in 17540 seconds

Please sign in to comment.