diff --git a/apps/io/shm_io.h b/apps/io/shm_io.h index ae8b97d5..7136ce87 100644 --- a/apps/io/shm_io.h +++ b/apps/io/shm_io.h @@ -13,6 +13,7 @@ #include "kaminpar-shm/datastructures/compressed_graph.h" #include "kaminpar-shm/datastructures/csr_graph.h" +#include "kaminpar-shm/datastructures/graph.h" #include "kaminpar-shm/kaminpar.h" #include "kaminpar-common/datastructures/static_array.h" diff --git a/apps/tools/shm_graph_properties_tool.cc b/apps/tools/shm_graph_properties_tool.cc index 82bd4d7d..35c2e82a 100644 --- a/apps/tools/shm_graph_properties_tool.cc +++ b/apps/tools/shm_graph_properties_tool.cc @@ -76,17 +76,30 @@ void print_graph_properties(const Graph &graph, const Context ctx, std::ostream int main(int argc, char *argv[]) { Context ctx = create_default_context(); std::string graph_filename; + io::GraphFileFormat graph_file_format = io::GraphFileFormat::METIS; CLI::App app("Shared-memory graph properties tool"); app.add_option("-G,--graph", graph_filename, "Input graph in METIS format")->required(); app.add_option("-t,--threads", ctx.parallel.num_threads, "Number of threads"); + app.add_option("-f,--graph-file-format", graph_file_format) + ->transform(CLI::CheckedTransformer(io::get_graph_file_formats()).description("")) + ->description(R"(Graph file formats: + - metis + - parhip)") + ->capture_default_str(); create_graph_compression_options(&app, ctx); CLI11_PARSE(app, argc, argv); tbb::global_control gc(tbb::global_control::max_allowed_parallelism, ctx.parallel.num_threads); - Graph graph = - io::read(graph_filename, ctx.compression.enabled, ctx.compression.may_dismiss, false, false); + Graph graph = io::read( + graph_filename, + graph_file_format, + ctx.compression.enabled, + ctx.compression.may_dismiss, + false, + false + ); ctx.debug.graph_name = str::extract_basename(graph_filename); ctx.compression.setup(graph); diff --git a/kaminpar-cli/kaminpar_arguments.cc b/kaminpar-cli/kaminpar_arguments.cc index 427483eb..1f67d8e4 100644 --- a/kaminpar-cli/kaminpar_arguments.cc +++ b/kaminpar-cli/kaminpar_arguments.cc @@ -140,7 +140,7 @@ CLI::Option_group *create_coarsening_options(CLI::App *app, Context &ctx) { ->capture_default_str(); // Clustering options: - coarsening->add_option("--c-clustering-algorithm", ctx.coarsening.algorithm) + coarsening->add_option("--c-clustering-algorithm", ctx.coarsening.clustering.algorithm) ->transform(CLI::CheckedTransformer(get_clustering_algorithms()).description("")) ->description(R"(One of the following options: - noop: disable coarsening @@ -239,10 +239,17 @@ Options are: ->description( R"(Determines the mode for aggregating ratings in the second phase of label propagation. Options are: + - none: Skip the second phase - direct: Write the ratings directly into the global vector (shared between threads) - buffered: Write the ratings into a thread-local buffer and then copy them into the global vector when the buffer is full )" ); + lp->add_option( + "--c-lp-second-phase-relabel", + ctx.coarsening.clustering.lp.relabel_before_second_phase, + "Relabel the clusters before running the second phase" + ) + ->capture_default_str(); lp->add_option("--c-lp-two-hop-strategy", ctx.coarsening.clustering.lp.two_hop_strategy) ->transform(CLI::CheckedTransformer(get_two_hop_strategies()).description("")) @@ -365,6 +372,36 @@ CLI::Option_group *create_lp_refinement_options(CLI::App *app, Context &ctx) { ) ->capture_default_str(); + lp->add_option( + "--r-lp-two-phases", + ctx.refinement.lp.use_two_phases, + "Uses two phases in each iteration, where in the second phase the high-degree nodes are " + "treated separately" + ) + ->capture_default_str(); + lp->add_option("--r-lp-second-phase-select-mode", ctx.refinement.lp.second_phase_select_mode) + ->transform(CLI::CheckedTransformer(get_second_phase_select_modes()).description("")) + ->description( + R"(Determines the mode for selecting nodes for the second phase of label propagation. +Options are: + - high-degree: Select nodes with high degree + - full-rating-map: Select nodes which have a full rating map in the first phase + )" + ) + ->capture_default_str(); + lp->add_option( + "--r-lp-second-phase-aggregation-mode", ctx.refinement.lp.second_phase_aggregation_mode + ) + ->transform(CLI::CheckedTransformer(get_second_phase_aggregation_modes()).description("")) + ->description( + R"(Determines the mode for aggregating ratings in the second phase of label propagation. +Options are: + - none: Skip the second phase + - direct: Write the ratings directly into the global vector (shared between threads) + - buffered: Write the ratings into a thread-local buffer and then copy them into the global vector when the buffer is full + )" + ); + return lp; } diff --git a/kaminpar-common/datastructures/concurrent_two_level_vector.h b/kaminpar-common/datastructures/concurrent_two_level_vector.h index 31203b53..056f2c9f 100644 --- a/kaminpar-common/datastructures/concurrent_two_level_vector.h +++ b/kaminpar-common/datastructures/concurrent_two_level_vector.h @@ -55,7 +55,10 @@ class ConcurrentTwoLevelVector { * * @param capacity The capacity of the vector. */ - ConcurrentTwoLevelVector(const Size capacity = 0) : _values(capacity), _table(0) {} + ConcurrentTwoLevelVector(const Size capacity = 0) + : _capacity(capacity), + _values(capacity), + _table(0) {} ConcurrentTwoLevelVector(const ConcurrentTwoLevelVector &) = delete; ConcurrentTwoLevelVector &operator=(const ConcurrentTwoLevelVector &) = delete; @@ -63,6 +66,15 @@ class ConcurrentTwoLevelVector { ConcurrentTwoLevelVector(ConcurrentTwoLevelVector &&) noexcept = default; ConcurrentTwoLevelVector &operator=(ConcurrentTwoLevelVector &&) noexcept = default; + /*! + * Returns the number of elements that this vector can hold. + * + * @return The number of elements that this vector can hold. + */ + [[nodiscard]] Size capacity() const { + return _capacity; + } + /*! * Resizes the vector. * @@ -70,6 +82,7 @@ class ConcurrentTwoLevelVector { */ void resize(const Size capacity) { _values.resize(capacity); + _capacity = capacity; } /*! @@ -78,16 +91,49 @@ class ConcurrentTwoLevelVector { void free() { _values.free(); _table = ConcurrentHashTable(0); + _capacity = 0; } /*! * Resets the vector such that new elements can be inserted. */ void reset() { - // As Growt does not provide a clear function, just create a new hash table. + // As growt does not provide a clear function, just create a new hash table. _table = ConcurrentHashTable(0); } + /** + * Reassigns stored values according to a provided mapping. + * + * @param mapping The mapping according to which the values are reassigned. + * @param new_size The new size of the vector. + */ + void reassign(const StaticArray &mapping, const Size new_size) { + StaticArray new_values(new_size); + ConcurrentHashTable new_table(0); + + tbb::parallel_for(tbb::blocked_range(0, _values.size()), [&](const auto &r) { + for (Size pos = r.begin(); pos != r.end(); ++pos) { + const Value value = _values[pos]; + + if (value == kMaxFirstValue) { + Size new_pos = mapping[pos] - 1; + new_values[new_pos] = kMaxFirstValue; + + const Value actual_value = (*_table.get_handle().find(pos)).second; + new_table.get_handle().insert(new_pos, value); + } else if (value != 0) { + Size new_pos = mapping[pos] - 1; + new_values[new_pos] = value; + } + } + }); + + _values = std::move(new_values); + _table = std::move(new_table); + _capacity = new_size; + } + /*! * Accesses a value at a given position. * @@ -194,6 +240,7 @@ class ConcurrentTwoLevelVector { } private: + Size _capacity; StaticArray _values; ConcurrentHashTable _table; }; @@ -225,7 +272,7 @@ class ConcurrentTwoLevelVector { * * @param capacity The capacity of the vector. */ - ConcurrentTwoLevelVector(const Size capacity = 0) : _values(capacity) {} + ConcurrentTwoLevelVector(const Size capacity = 0) : _capacity(capacity), _values(capacity) {} ConcurrentTwoLevelVector(const ConcurrentTwoLevelVector &) = delete; ConcurrentTwoLevelVector &operator=(const ConcurrentTwoLevelVector &) = delete; @@ -233,6 +280,15 @@ class ConcurrentTwoLevelVector { ConcurrentTwoLevelVector(ConcurrentTwoLevelVector &&) noexcept = default; ConcurrentTwoLevelVector &operator=(ConcurrentTwoLevelVector &&) noexcept = default; + /*! + * Returns the number of elements that this vector can hold. + * + * @return The number of elements that this vector can hold. + */ + [[nodiscard]] Size capacity() const { + return _capacity; + } + /*! * Resizes the vector. * @@ -240,6 +296,7 @@ class ConcurrentTwoLevelVector { */ void resize(const Size capacity) { _values.resize(capacity); + _capacity = capacity; } /*! @@ -248,6 +305,7 @@ class ConcurrentTwoLevelVector { void free() { _values.free(); _table.clear(); + _capacity = 0; } /*! @@ -257,6 +315,45 @@ class ConcurrentTwoLevelVector { _table.clear(); } + /** + * Reassigns stored values according to a provided mapping. + * + * @param mapping The mapping according to which the values are reassigned. + * @param new_size The new size of the vector. + */ + void reassign(const StaticArray &mapping, const Size new_size) { + StaticArray new_values(new_size); + ConcurrentHashTable new_table; + + tbb::parallel_for(tbb::blocked_range(0, _values.size()), [&](const auto &r) { + for (Size pos = r.begin(); pos != r.end(); ++pos) { + const Value value = _values[pos]; + + if (value == kMaxFirstValue) { + Size new_pos = mapping[pos] - 1; + new_values[new_pos] = kMaxFirstValue; + + const Value actual_value = [&] { + typename ConcurrentHashTable::const_accessor entry; + _table.find(entry, pos); + return entry->second; + }(); + + typename ConcurrentHashTable::accessor entry; + new_table.insert(entry, new_pos); + entry->second = actual_value; + } else if (value != 0) { + Size new_pos = mapping[pos] - 1; + new_values[new_pos] = value; + } + } + }); + + _values = std::move(new_values); + _table = std::move(new_table); + _capacity = new_size; + } + /*! * Accesses a value at a given position. * @@ -309,7 +406,7 @@ class ConcurrentTwoLevelVector { void atomic_add(const Size pos, const Value delta) { KASSERT(pos < _values.size()); - Value value = _values[pos]; + FirstValue value = _values[pos]; bool success; do { if (value == kMaxFirstValue) { @@ -323,7 +420,7 @@ class ConcurrentTwoLevelVector { break; } - const Value new_value = value + delta; + const Value new_value = static_cast(value) + delta; if (new_value < kMaxFirstValue) { success = __atomic_compare_exchange_n( &_values[pos], &value, new_value, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED @@ -357,7 +454,7 @@ class ConcurrentTwoLevelVector { void atomic_sub(const Size pos, const Value delta) { KASSERT(pos < _values.size()); - Value value = _values[pos]; + FirstValue value = _values[pos]; bool success; do { if (value == kMaxFirstValue) { @@ -378,6 +475,7 @@ class ConcurrentTwoLevelVector { } private: + Size _capacity; StaticArray _values; ConcurrentHashTable _table; }; diff --git a/kaminpar-shm/coarsening/cluster_coarsener.cc b/kaminpar-shm/coarsening/cluster_coarsener.cc index 690c1551..d512568c 100644 --- a/kaminpar-shm/coarsening/cluster_coarsener.cc +++ b/kaminpar-shm/coarsening/cluster_coarsener.cc @@ -32,6 +32,7 @@ bool ClusteringCoarsener::coarsen() { SCOPED_TIMER("Level", std::to_string(_hierarchy.size())); if (_clustering.size() < current().n()) { + SCOPED_HEAP_PROFILER("Allocation"); SCOPED_TIMER("Allocation"); _clustering.resize(current().n()); } diff --git a/kaminpar-shm/coarsening/clustering/lp_clusterer.cc b/kaminpar-shm/coarsening/clustering/lp_clusterer.cc index 3ce6f05c..bdbf8095 100644 --- a/kaminpar-shm/coarsening/clustering/lp_clusterer.cc +++ b/kaminpar-shm/coarsening/clustering/lp_clusterer.cc @@ -7,70 +7,359 @@ ******************************************************************************/ #include "kaminpar-shm/coarsening/clustering/lp_clusterer.h" +#include "kaminpar-shm/label_propagation.h" + +#include "kaminpar-common/heap_profiler.h" +#include "kaminpar-common/timer.h" + namespace kaminpar::shm { -LPClustering::LPClustering(const NodeID max_n, const CoarseningContext &c_ctx) - : _csr_core(std::make_unique>(max_n, c_ctx)), - _compact_csr_core(std::make_unique>(max_n, c_ctx)), - _compressed_core(std::make_unique>(max_n, c_ctx)) {} +// +// Actual implementation -- not exposed in header +// + +struct LPClusteringConfig : public LabelPropagationConfig { + using ClusterID = NodeID; + using ClusterWeight = BlockWeight; + static constexpr bool kTrackClusterCount = true; + static constexpr bool kUseTwoHopClustering = true; +}; + +template +class LPClusteringImpl final + : public ChunkRandomLabelPropagation, LPClusteringConfig, Graph>, + public OwnedRelaxedClusterWeightVector, + public NonatomicClusterVectorRef { + SET_DEBUG(false); + + using Base = ChunkRandomLabelPropagation; + using ClusterWeightBase = OwnedRelaxedClusterWeightVector; + using ClusterBase = NonatomicClusterVectorRef; + +public: + using Permutations = Base::Permutations; + + LPClusteringImpl(const CoarseningContext &c_ctx, Permutations &permutations) + : Base(permutations), + ClusterWeightBase(c_ctx.clustering.lp.use_two_level_cluster_weight_vector), + _lp_ctx(c_ctx.clustering.lp) { + Base::set_max_degree(_lp_ctx.large_degree_threshold); + Base::set_max_num_neighbors(_lp_ctx.max_num_neighbors); + Base::set_use_two_phases(_lp_ctx.use_two_phases); + Base::set_second_phase_select_mode(_lp_ctx.second_phase_select_mode); + Base::set_second_phase_aggregation_mode(_lp_ctx.second_phase_aggregation_mode); + Base::set_relabel_before_second_phase(_lp_ctx.relabel_before_second_phase); + } + + void set_max_cluster_weight(const NodeWeight max_cluster_weight) { + _max_cluster_weight = max_cluster_weight; + } + + void preinitialize(const NodeID num_nodes) { + Base::preinitialize(num_nodes, num_nodes); + } + + void allocate(const NodeID num_clusters) { + SCOPED_HEAP_PROFILER("Allocation"); + SCOPED_TIMER("Allocation"); + + Base::allocate(); + ClusterWeightBase::allocate_cluster_weights(num_clusters); + } + + void free() { + SCOPED_HEAP_PROFILER("Free"); + SCOPED_TIMER("Free"); + + Base::free(); + ClusterWeightBase::free(); + } + + void compute_clustering(StaticArray &clustering, const Graph &graph) { + ClusterWeightBase::reset_cluster_weights(); + ClusterBase::init_clusters_ref(clustering); + Base::initialize(&graph, graph.n()); + + for (std::size_t iteration = 0; iteration < _lp_ctx.num_iterations; ++iteration) { + SCOPED_TIMER("Iteration", std::to_string(iteration)); + if (Base::perform_iteration() == 0) { + break; + } + + // Only relabel during the first iteration because afterwards the memory for the second phase + // is already allocated. + if (iteration == 0) { + Base::set_relabel_before_second_phase(false); + } + } + + cluster_isolated_nodes(); + cluster_two_hop_nodes(); + } + +private: + void cluster_two_hop_nodes() { + SCOPED_HEAP_PROFILER("Handle two-hop nodes"); + SCOPED_TIMER("Handle two-hop nodes"); + + if (!should_handle_two_hop_nodes()) { + return; + } + + switch (_lp_ctx.two_hop_strategy) { + case TwoHopStrategy::MATCH: + Base::match_two_hop_nodes(); + break; + case TwoHopStrategy::MATCH_THREADWISE: + Base::match_two_hop_nodes_threadwise(); + break; + case TwoHopStrategy::CLUSTER: + Base::cluster_two_hop_nodes(); + break; + case TwoHopStrategy::CLUSTER_THREADWISE: + Base::cluster_two_hop_nodes_threadwise(); + break; + case TwoHopStrategy::LEGACY: + handle_two_hop_clustering_legacy(); + break; + case TwoHopStrategy::DISABLE: + break; + } + } + + void cluster_isolated_nodes() { + SCOPED_HEAP_PROFILER("Handle isolated nodes"); + SCOPED_TIMER("Handle isolated nodes"); + + switch (_lp_ctx.isolated_nodes_strategy) { + case IsolatedNodesClusteringStrategy::MATCH: + Base::match_isolated_nodes(); + break; + case IsolatedNodesClusteringStrategy::CLUSTER: + Base::cluster_isolated_nodes(); + break; + case IsolatedNodesClusteringStrategy::MATCH_DURING_TWO_HOP: + if (should_handle_two_hop_nodes()) { + Base::match_isolated_nodes(); + } + break; + case IsolatedNodesClusteringStrategy::CLUSTER_DURING_TWO_HOP: + if (should_handle_two_hop_nodes()) { + Base::cluster_isolated_nodes(); + } + break; + case IsolatedNodesClusteringStrategy::KEEP: + break; + } + } + + [[nodiscard]] bool should_handle_two_hop_nodes() const { + return (1.0 - 1.0 * _current_num_clusters / _graph->n()) <= _lp_ctx.two_hop_threshold; + } + + // @todo: old implementation that should no longer be used + void handle_two_hop_clustering_legacy() { + // Reset _favored_clusters entries for nodes that are not considered for + // 2-hop clustering, i.e., nodes that are already clustered with at least one other node or + // nodes that have more weight than max_weight/2. + // Set _favored_clusters to dummy entry _graph->n() for isolated nodes + tbb::parallel_for(0, _graph->n(), [&](const NodeID u) { + if (u != cluster(u)) { + Base::_favored_clusters[u] = u; + } else { + const auto initial_weight = initial_cluster_weight(u); + const auto current_weight = ClusterWeightBase::cluster_weight(u); + const auto max_weight = max_cluster_weight(u); + if (current_weight != initial_weight || current_weight > max_weight / 2) { + Base::_favored_clusters[u] = u; + } + } + }); + + tbb::parallel_for(0, _graph->n(), [&](const NodeID u) { + // Abort once we have merged enough clusters to achieve the configured minimum shrink factor + if (Base::should_stop()) { + return; + } -// We must declare the destructor explicitly here, otherwise, it is implicitly generated before -// LabelPropagationClusterCore is complete. + // Skip nodes that should not be considered during 2-hop clustering + const NodeID favored_leader = Base::_favored_clusters[u]; + if (favored_leader == u) { + return; + } + + do { + // If this works, we set ourself as clustering partners for nodes that have the same favored + // cluster we have + NodeID expected_value = favored_leader; + if (__atomic_compare_exchange_n( + &Base::_favored_clusters[favored_leader], + &expected_value, + u, + false, + __ATOMIC_SEQ_CST, + __ATOMIC_SEQ_CST + )) { + break; + } + + // If this did not work, there is another node that has the same favored cluster + // Try to join the cluster of that node + const NodeID partner = expected_value; + if (__atomic_compare_exchange_n( + &Base::_favored_clusters[favored_leader], + &expected_value, + favored_leader, + false, + __ATOMIC_SEQ_CST, + __ATOMIC_SEQ_CST + )) { + if (ClusterWeightBase::move_cluster_weight( + u, partner, ClusterWeightBase::cluster_weight(u), max_cluster_weight(partner) + )) { + move_node(u, partner); + --_current_num_clusters; + } + + break; + } + } while (true); + }); + } + +public: + [[nodiscard]] NodeID initial_cluster(const NodeID u) { + return u; + } + + [[nodiscard]] NodeWeight initial_cluster_weight(const NodeID cluster) { + return _graph->node_weight(cluster); + } + + [[nodiscard]] NodeWeight max_cluster_weight(const NodeID /* cluster */) { + return _max_cluster_weight; + } + + [[nodiscard]] bool accept_cluster(const Base::ClusterSelectionState &state) { + return (state.current_gain > state.best_gain || + (state.current_gain == state.best_gain && state.local_rand.random_bool())) && + (state.current_cluster_weight + state.u_weight <= + max_cluster_weight(state.current_cluster) || + state.current_cluster == state.initial_cluster); + } + + using Base::_current_num_clusters; + using Base::_graph; + + const LabelPropagationCoarseningContext &_lp_ctx; + NodeWeight _max_cluster_weight = kInvalidBlockWeight; +}; + +class LPClusteringImplWrapper { +public: + LPClusteringImplWrapper(const CoarseningContext &c_ctx) + : _csr_core(std::make_unique>(c_ctx, _permutations)), + _compact_csr_core(std::make_unique>(c_ctx, _permutations) + ), + _compressed_core(std::make_unique>(c_ctx, _permutations) + ) {} + + void set_max_cluster_weight(const NodeWeight max_cluster_weight) { + _csr_core->set_max_cluster_weight(max_cluster_weight); + _compact_csr_core->set_max_cluster_weight(max_cluster_weight); + _compressed_core->set_max_cluster_weight(max_cluster_weight); + } + + void set_desired_cluster_count(const NodeID count) { + _csr_core->set_desired_num_clusters(count); + _compact_csr_core->set_desired_num_clusters(count); + _compressed_core->set_desired_num_clusters(count); + } + + void compute_clustering( + StaticArray &clustering, const Graph &graph, const bool free_memory_afterwards + ) { + // Compute a clustering and setup/release the data structures used by the core, so that they can + // be shared by all implementations. + const auto compute = [&](auto &core, auto &graph) { + if (_freed) { + _freed = false; + core.allocate(graph.n()); + } else { + core.setup(std::move(_structs)); + core.setup_cluster_weights(std::move(_cluster_weights)); + } + + core.compute_clustering(clustering, graph); + + if (free_memory_afterwards) { + _freed = true; + core.free(); + } else { + _structs = core.release(); + _cluster_weights = core.take_cluster_weights(); + } + }; + + const NodeID num_nodes = graph.n(); + _csr_core->preinitialize(num_nodes); + _compact_csr_core->preinitialize(num_nodes); + _compressed_core->preinitialize(num_nodes); + + if (auto *csr_graph = dynamic_cast(graph.underlying_graph()); + csr_graph != nullptr) { + compute(*_csr_core, *csr_graph); + } else if (auto *compact_csr_graph = + dynamic_cast(graph.underlying_graph()); + compact_csr_graph != nullptr) { + compute(*_compact_csr_core, *compact_csr_graph); + } else if (auto *compressed_graph = + dynamic_cast(graph.underlying_graph()); + compressed_graph != nullptr) { + compute(*_compressed_core, *compressed_graph); + } + + // Only relabel clusters during the first iteration + _csr_core->set_relabel_before_second_phase(false); + _compact_csr_core->set_relabel_before_second_phase(false); + _compressed_core->set_relabel_before_second_phase(false); + } + +private: + std::unique_ptr> _csr_core; + std::unique_ptr> _compact_csr_core; + std::unique_ptr> _compressed_core; + + // The data structures that are used by the LP clusterer and are shared between the + // different implementations. + bool _freed = true; + LPClusteringImpl::Permutations _permutations; + LPClusteringImpl::DataStructures _structs; + LPClusteringImpl::ClusterWeights _cluster_weights; +}; + +// +// Exposed wrapper +// + +LPClustering::LPClustering(const CoarseningContext &c_ctx) + : _impl_wrapper(std::make_unique(c_ctx)) {} + +// we must declare the destructor explicitly here, otherwise, it is implicitly +// generated before LPClusteringImplWrapper is complete LPClustering::~LPClustering() = default; void LPClustering::set_max_cluster_weight(const NodeWeight max_cluster_weight) { - _csr_core->set_max_cluster_weight(max_cluster_weight); - _compact_csr_core->set_max_cluster_weight(max_cluster_weight); - _compressed_core->set_max_cluster_weight(max_cluster_weight); + _impl_wrapper->set_max_cluster_weight(max_cluster_weight); } void LPClustering::set_desired_cluster_count(const NodeID count) { - _csr_core->set_desired_num_clusters(count); - _compact_csr_core->set_desired_num_clusters(count); - _compressed_core->set_desired_num_clusters(count); + _impl_wrapper->set_desired_cluster_count(count); } void LPClustering::compute_clustering( StaticArray &clustering, const Graph &graph, const bool free_memory_afterwards ) { - // Compute a clustering and setup/release the data structures used by the core, so that they can - // be shared by all implementations. - const auto compute = [&](auto &core, auto &graph) { - if (_freed) { - _freed = false; - core.allocate(); - } else { - core.setup(std::move(_structs)); - core.setup_cluster_weights(std::move(_cluster_weights)); - } - - core.compute_clustering(clustering, graph); - - if (free_memory_afterwards) { - _freed = true; - core.free(); - } else { - _structs = core.release(); - _cluster_weights = core.take_cluster_weights(); - } - }; - - const NodeID num_nodes = graph.n(); - _csr_core->preinitialize(num_nodes); - _compact_csr_core->preinitialize(num_nodes); - _compressed_core->preinitialize(num_nodes); - - if (auto *csr_graph = dynamic_cast(graph.underlying_graph()); - csr_graph != nullptr) { - compute(*_csr_core, *csr_graph); - } else if (auto *compact_csr_graph = - dynamic_cast(graph.underlying_graph()); - compact_csr_graph != nullptr) { - compute(*_compact_csr_core, *compact_csr_graph); - } else if (auto *compressed_graph = - dynamic_cast(graph.underlying_graph()); - compressed_graph != nullptr) { - compute(*_compressed_core, *compressed_graph); - } + return _impl_wrapper->compute_clustering(clustering, graph, free_memory_afterwards); } } // namespace kaminpar::shm diff --git a/kaminpar-shm/coarsening/clustering/lp_clusterer.h b/kaminpar-shm/coarsening/clustering/lp_clusterer.h index 07a837f0..7e81bf06 100644 --- a/kaminpar-shm/coarsening/clustering/lp_clusterer.h +++ b/kaminpar-shm/coarsening/clustering/lp_clusterer.h @@ -10,237 +10,13 @@ #include #include "kaminpar-shm/coarsening/clusterer.h" -#include "kaminpar-shm/context.h" #include "kaminpar-shm/datastructures/graph.h" -#include "kaminpar-shm/label_propagation.h" - -#include "kaminpar-common/heap_profiler.h" -#include "kaminpar-common/timer.h" namespace kaminpar::shm { -template struct LPClusteringConfig : public LabelPropagationConfig { - using ClusterID = NodeID; - using ClusterWeight = BlockWeight; - static constexpr bool kTrackClusterCount = true; - static constexpr bool kUseTwoHopClustering = true; -}; - -template -class LPClusteringImpl final - : public ChunkRandomdLabelPropagation, LPClusteringConfig, Graph>, - public OwnedRelaxedClusterWeightVector, - public NonatomicClusterVectorRef { - SET_DEBUG(false); - - using Base = ChunkRandomdLabelPropagation, LPClusteringConfig, Graph>; - using ClusterWeightBase = OwnedRelaxedClusterWeightVector; - using ClusterBase = NonatomicClusterVectorRef; - -public: - LPClusteringImpl(const NodeID max_n, const CoarseningContext &c_ctx) - : ClusterWeightBase(c_ctx.clustering.lp.use_two_level_cluster_weight_vector), - _lp_ctx(c_ctx.clustering.lp), - _max_n(max_n) { - this->set_max_degree(_lp_ctx.large_degree_threshold); - this->set_max_num_neighbors(_lp_ctx.max_num_neighbors); - this->set_use_two_phases(_lp_ctx.use_two_phases); - this->set_second_phase_select_mode(_lp_ctx.second_phase_select_mode); - this->set_second_phase_aggregation_mode(_lp_ctx.second_phase_aggregation_mode); - } - - void set_max_cluster_weight(const NodeWeight max_cluster_weight) { - _max_cluster_weight = max_cluster_weight; - } - - void preinitialize(const NodeID num_nodes) { - Base::preinitialize(num_nodes, num_nodes); - } - - void allocate() { - SCOPED_HEAP_PROFILER("Allocation"); - SCOPED_TIMER("Allocation"); - - Base::allocate(); - ClusterWeightBase::allocate(_max_n); - } - - void free() { - SCOPED_HEAP_PROFILER("Free"); - SCOPED_TIMER("Free"); - - Base::free(); - ClusterWeightBase::free(); - } - - void compute_clustering(StaticArray &clustering, const Graph &graph) { - init_clusters_ref(clustering); - - START_HEAP_PROFILER("Initialization"); - this->reset_cluster_weights(); - this->initialize(&graph, graph.n()); - STOP_HEAP_PROFILER(); - - for (std::size_t iteration = 0; iteration < _lp_ctx.num_iterations; ++iteration) { - SCOPED_HEAP_PROFILER("Iteration", std::to_string(iteration)); - SCOPED_TIMER("Iteration", std::to_string(iteration)); - if (this->perform_iteration() == 0) { - break; - } - } - - cluster_isolated_nodes(); - cluster_two_hop_nodes(); - } - -private: - void cluster_two_hop_nodes() { - SCOPED_HEAP_PROFILER("Handle two-hop nodes"); - SCOPED_TIMER("Handle two-hop nodes"); - - if (!should_handle_two_hop_nodes()) { - return; - } - - switch (_lp_ctx.two_hop_strategy) { - case TwoHopStrategy::MATCH: - this->match_two_hop_nodes(); - break; - case TwoHopStrategy::MATCH_THREADWISE: - this->match_two_hop_nodes_threadwise(); - break; - case TwoHopStrategy::CLUSTER: - this->cluster_two_hop_nodes(); - break; - case TwoHopStrategy::CLUSTER_THREADWISE: - this->cluster_two_hop_nodes_threadwise(); - break; - case TwoHopStrategy::LEGACY: - this->handle_two_hop_clustering_legacy(); - break; - case TwoHopStrategy::DISABLE: - break; - } - } - - void cluster_isolated_nodes() { - SCOPED_HEAP_PROFILER("Handle isolated nodes"); - SCOPED_TIMER("Handle isolated nodes"); - - switch (_lp_ctx.isolated_nodes_strategy) { - case IsolatedNodesClusteringStrategy::MATCH: - this->match_isolated_nodes(); - break; - case IsolatedNodesClusteringStrategy::CLUSTER: - this->cluster_isolated_nodes(); - break; - case IsolatedNodesClusteringStrategy::MATCH_DURING_TWO_HOP: - if (should_handle_two_hop_nodes()) { - this->match_isolated_nodes(); - } - break; - case IsolatedNodesClusteringStrategy::CLUSTER_DURING_TWO_HOP: - if (should_handle_two_hop_nodes()) { - this->cluster_isolated_nodes(); - } - break; - case IsolatedNodesClusteringStrategy::KEEP: - break; - } - } - - [[nodiscard]] bool should_handle_two_hop_nodes() const { - return (1.0 - 1.0 * _current_num_clusters / _graph->n()) <= _lp_ctx.two_hop_threshold; - } - - // @todo: old implementation that should no longer be used - void handle_two_hop_clustering_legacy() { - // Reset _favored_clusters entries for nodes that are not considered for - // 2-hop clustering, i.e., nodes that are already clustered with at least one other node or - // nodes that have more weight than max_weight/2. - // Set _favored_clusters to dummy entry _graph->n() for isolated nodes - tbb::parallel_for(0, _graph->n(), [&](const NodeID u) { - if (u != cluster(u)) { - Base::_favored_clusters[u] = u; - } else { - const auto initial_weight = initial_cluster_weight(u); - const auto current_weight = cluster_weight(u); - const auto max_weight = max_cluster_weight(u); - if (current_weight != initial_weight || current_weight > max_weight / 2) { - Base::_favored_clusters[u] = u; - } - } - }); - - tbb::parallel_for(0, _graph->n(), [&](const NodeID u) { - // Abort once we have merged enough clusters to achieve the configured minimum shrink factor - if (this->should_stop()) { - return; - } - - // Skip nodes that should not be considered during 2-hop clustering - const NodeID favored_leader = Base::_favored_clusters[u]; - if (favored_leader == u) { - return; - } - - do { - // If this works, we set ourself as clustering partners for nodes that have the same favored - // cluster we have - NodeID expected_value = favored_leader; - if (Base::_favored_clusters[favored_leader].compare_exchange_strong(expected_value, u)) { - break; - } - - // If this did not work, there is another node that has the same favored cluster - // Try to join the cluster of that node - const NodeID partner = expected_value; - if (Base::_favored_clusters[favored_leader].compare_exchange_strong( - expected_value, favored_leader - )) { - if (move_cluster_weight(u, partner, cluster_weight(u), max_cluster_weight(partner))) { - move_node(u, partner); - --_current_num_clusters; - } - - break; - } - } while (true); - }); - } - -public: - [[nodiscard]] NodeID initial_cluster(const NodeID u) { - return u; - } - - [[nodiscard]] NodeWeight initial_cluster_weight(const NodeID cluster) { - return _graph->node_weight(cluster); - } - - [[nodiscard]] NodeWeight max_cluster_weight(const NodeID /* cluster */) { - return _max_cluster_weight; - } - - [[nodiscard]] bool accept_cluster(const typename Base::ClusterSelectionState &state) { - return (state.current_gain > state.best_gain || - (state.current_gain == state.best_gain && state.local_rand.random_bool())) && - (state.current_cluster_weight + state.u_weight <= - max_cluster_weight(state.current_cluster) || - state.current_cluster == state.initial_cluster); - } - - using Base::_current_num_clusters; - using Base::_graph; - - const LabelPropagationCoarseningContext &_lp_ctx; - const NodeID _max_n; - NodeWeight _max_cluster_weight = kInvalidBlockWeight; -}; - class LPClustering : public Clusterer { public: - LPClustering(NodeID preallocate_n, const CoarseningContext &c_ctx); + LPClustering(const CoarseningContext &c_ctx); LPClustering(const LPClustering &) = delete; LPClustering &operator=(const LPClustering &) = delete; @@ -254,19 +30,11 @@ class LPClustering : public Clusterer { void set_desired_cluster_count(NodeID count) final; void compute_clustering( - StaticArray &clustering, const Graph &graph, const bool free_memory_afterwards + StaticArray &clustering, const Graph &graph, bool free_memory_afterwards ) final; private: - std::unique_ptr> _csr_core; - std::unique_ptr> _compact_csr_core; - std::unique_ptr> _compressed_core; - - // The data structures which are used by the LP clusterer and are shared between the - // different implementations. - bool _freed = true; - LPClusteringImpl::DataStructures _structs; - LPClusteringImpl::ClusterWeights _cluster_weights; + std::unique_ptr _impl_wrapper; }; } // namespace kaminpar::shm diff --git a/kaminpar-shm/context_io.cc b/kaminpar-shm/context_io.cc index c1b9ae73..a5f604d6 100644 --- a/kaminpar-shm/context_io.cc +++ b/kaminpar-shm/context_io.cc @@ -310,6 +310,8 @@ std::unordered_map get_second_phase_select_m std::ostream &operator<<(std::ostream &out, SecondPhaseAggregationMode strategy) { switch (strategy) { + case SecondPhaseAggregationMode::NONE: + return out << "none"; case SecondPhaseAggregationMode::DIRECT: return out << "direct"; case SecondPhaseAggregationMode::BUFFERED: @@ -321,6 +323,7 @@ std::ostream &operator<<(std::ostream &out, SecondPhaseAggregationMode strategy) std::unordered_map get_second_phase_aggregation_modes() { return { + {"none", SecondPhaseAggregationMode::NONE}, {"direct", SecondPhaseAggregationMode::DIRECT}, {"buffered", SecondPhaseAggregationMode::BUFFERED} }; @@ -462,6 +465,8 @@ void print(const LabelPropagationCoarseningContext &lp_ctx, std::ostream &out) { if (lp_ctx.use_two_phases) { out << " Select mode: " << lp_ctx.second_phase_select_mode << '\n'; out << " Aggregation mode: " << lp_ctx.second_phase_aggregation_mode << '\n'; + out << " Relabel: " << (lp_ctx.relabel_before_second_phase ? "yes" : "no") + << '\n'; } out << " 2-hop clustering: " << lp_ctx.two_hop_strategy << ", if |Vcoarse| > " << std::setw(2) << std::fixed << lp_ctx.two_hop_threshold << " * |V|\n"; @@ -478,6 +483,11 @@ void print(const RefinementContext &r_ctx, std::ostream &out) { if (r_ctx.includes_algorithm(RefinementAlgorithm::LABEL_PROPAGATION)) { out << "Label propagation:\n"; out << " Number of iterations: " << r_ctx.lp.num_iterations << "\n"; + out << " Uses two phases: " << (r_ctx.lp.use_two_phases ? "yes" : "no") << "\n"; + if (r_ctx.lp.use_two_phases) { + out << " Select mode: " << r_ctx.lp.second_phase_select_mode << '\n'; + out << " Aggregation mode: " << r_ctx.lp.second_phase_aggregation_mode << '\n'; + } } if (r_ctx.includes_algorithm(RefinementAlgorithm::KWAY_FM)) { out << "k-way FM:\n"; diff --git a/kaminpar-shm/factories.cc b/kaminpar-shm/factories.cc index 930dc06a..5a8d5357 100644 --- a/kaminpar-shm/factories.cc +++ b/kaminpar-shm/factories.cc @@ -57,7 +57,7 @@ std::unique_ptr create_clusterer(const Context &ctx) { return std::make_unique(); case ClusteringAlgorithm::LABEL_PROPAGATION: - return std::make_unique(ctx.partition.n, ctx.coarsening); + return std::make_unique(ctx.coarsening); case ClusteringAlgorithm::LEGACY_LABEL_PROPAGATION: return std::make_unique(ctx.coarsening); diff --git a/kaminpar-shm/kaminpar.h b/kaminpar-shm/kaminpar.h index 27908f93..d1718711 100644 --- a/kaminpar-shm/kaminpar.h +++ b/kaminpar-shm/kaminpar.h @@ -101,6 +101,7 @@ enum class SecondPhaseSelectMode { }; enum class SecondPhaseAggregationMode { + NONE, DIRECT, BUFFERED }; @@ -139,7 +140,7 @@ struct LabelPropagationCoarseningContext { bool use_two_phases; SecondPhaseSelectMode second_phase_select_mode; SecondPhaseAggregationMode second_phase_aggregation_mode; - bool use_second_phase_filter; + bool relabel_before_second_phase; TwoHopStrategy two_hop_strategy; double two_hop_threshold; @@ -205,6 +206,10 @@ struct LabelPropagationRefinementContext { std::size_t num_iterations; NodeID large_degree_threshold; NodeID max_num_neighbors; + + bool use_two_phases; + SecondPhaseSelectMode second_phase_select_mode; + SecondPhaseAggregationMode second_phase_aggregation_mode; }; struct KwayFMRefinementContext { diff --git a/kaminpar-shm/label_propagation.h b/kaminpar-shm/label_propagation.h index c73904e1..c5298024 100644 --- a/kaminpar-shm/label_propagation.h +++ b/kaminpar-shm/label_propagation.h @@ -1,7 +1,7 @@ /******************************************************************************* * Generic implementation of parallel label propagation. * - * @file: parallel_label_propagation.h + * @file: label_propagation.h * @author: Daniel Seemaier * @date: 21.09.2021 ******************************************************************************/ @@ -21,8 +21,6 @@ #include "kaminpar-common/datastructures/concurrent_two_level_vector.h" #include "kaminpar-common/datastructures/dynamic_map.h" #include "kaminpar-common/datastructures/rating_map.h" -#include "kaminpar-common/datastructures/scalable_vector.h" -#include "kaminpar-common/datastructures/static_array.h" #include "kaminpar-common/heap_profiler.h" #include "kaminpar-common/logger.h" #include "kaminpar-common/parallel/atomic.h" @@ -31,26 +29,22 @@ #include "kaminpar-common/timer.h" namespace kaminpar { -template struct LabelPropagationConfig { - using Graph = TGraph; - +struct LabelPropagationConfig { // Data structure used to accumulate edge weights for gain value calculation - using RatingMap = ::kaminpar::RatingMap< - typename Graph::EdgeWeight, - typename Graph::NodeID, - FastResetArray>; + using RatingMap = + ::kaminpar::RatingMap>; // Data type for cluster IDs and weights using ClusterID = tag::Mandatory; using ClusterWeight = tag::Mandatory; // Approx. number of edges per work unit - static constexpr Graph::NodeID kMinChunkSize = 1024; + static constexpr shm::NodeID kMinChunkSize = 1024; // Nodes per permutation unit: when iterating over nodes in a chunk, we divide // them into permutation units, iterate over permutation orders in random // order, and iterate over nodes inside a permutation unit in random order. - static constexpr Graph::NodeID kPermutationSize = 64; + static constexpr shm::NodeID kPermutationSize = 64; // When randomizing the node order inside a permutation unit, we pick a random // permutation from a pool of permutations. This constant determines the pool @@ -81,23 +75,20 @@ template struct LabelPropagationConfig { * @tparam Derived Derived class for static polymorphism. * @tparam Config Algorithmic configuration and data types. */ -template typename TConfig, typename TGraph> -class LabelPropagation { - static_assert(std::is_base_of_v, TConfig>); +template class LabelPropagation { + static_assert(std::is_base_of_v); SET_DEBUG(false); SET_STATISTICS_FROM_GLOBAL(); protected: - using Config = TConfig; - using RatingMap = typename Config::RatingMap; - using Graph = typename Config::Graph; using NodeID = typename Graph::NodeID; using NodeWeight = typename Graph::NodeWeight; using EdgeID = typename Graph::EdgeID; using EdgeWeight = typename Graph::EdgeWeight; using ClusterID = typename Config::ClusterID; using ClusterWeight = typename Config::ClusterWeight; + using RatingMap = typename Config::RatingMap; using SecondPhaseSelectMode = shm::SecondPhaseSelectMode; using SecondPhaseAggregationMode = shm::SecondPhaseAggregationMode; @@ -117,6 +108,13 @@ class LabelPropagation { return _max_num_neighbors; } + void set_desired_num_clusters(const ClusterID desired_num_clusters) { + _desired_num_clusters = desired_num_clusters; + } + [[nodiscard]] ClusterID desired_num_clusters() const { + return _desired_num_clusters; + } + void set_use_two_phases(const bool use_two_phases) { _use_two_phases = use_two_phases; } @@ -138,11 +136,11 @@ class LabelPropagation { return _second_phase_aggregation_mode; } - void set_desired_num_clusters(const ClusterID desired_num_clusters) { - _desired_num_clusters = desired_num_clusters; + void set_relabel_before_second_phase(const bool relabel) { + _relabel_before_second_phase = relabel; } - [[nodiscard]] ClusterID desired_num_clusters() const { - return _desired_num_clusters; + [[nodiscard]] bool relabel_before_second_phase() const { + return _relabel_before_second_phase; } [[nodiscard]] EdgeWeight expected_total_gain() const { @@ -186,19 +184,19 @@ class LabelPropagation { */ void allocate() { if constexpr (Config::kUseLocalActiveSetStrategy) { - if (_active.capacity() < _num_nodes) { + if (_active.size() < _num_nodes) { _active.resize(_num_nodes); } } if constexpr (Config::kUseActiveSetStrategy) { - if (_active.capacity() < _num_active_nodes) { + if (_active.size() < _num_active_nodes) { _active.resize(_num_active_nodes); } } if constexpr (Config::kUseTwoHopClustering) { - if (_favored_clusters.capacity() < _num_active_nodes) { + if (_favored_clusters.size() < _num_active_nodes) { _favored_clusters.resize(_num_active_nodes); } } @@ -208,12 +206,29 @@ class LabelPropagation { tbb::enumerable_thread_specific([&_num_clusters = _num_clusters] { return RatingMap(_num_clusters); }); + } else { + for (auto &rating_map : _rating_map_ets) { + rating_map.change_max_size(_num_clusters); + } } } + void free() { + // No shrink-to-fit call is needed (and provided by the ets-interface) since the clear already + // frees the memory. + _rating_map_ets.clear(); + _prev_num_clusters = 0; + + _active.free(); + _favored_clusters.free(); + _moved.free(); + + _second_phase_nodes.clear(); + _second_phase_nodes.shrink_to_fit(); + } + /*! * Initialize label propagation. Must be called after \c allocate(). - * @param graph Graph for label propagation. * @param num_clusters Number of different clusters the nodes are placed in * initially. When using label propagation as refinement graphutils, this is @@ -232,22 +247,6 @@ class LabelPropagation { reset_state(); } - void free() { - // No shrink-to-fit call is needed (and provided by the ets-interface) since the clear already - // frees the memory. - _rating_map_ets.clear(); - _prev_num_clusters = 0; - - _active.clear(); - _active.shrink_to_fit(); - - _favored_clusters.clear(); - _favored_clusters.shrink_to_fit(); - - _second_phase_nodes.clear(); - _second_phase_nodes.shrink_to_fit(); - } - /*! * Determines whether we should stop label propagation because the number of * non-empty clusters has been reduced sufficiently. @@ -260,20 +259,80 @@ class LabelPropagation { return false; } + /*! + * Relabel the clusters such that afterwards the cluster IDs are consecutive in the range of [0, + * num_actual_clusters]; num_actual_clusters is thereby the number of clusters that have at least + * one member. + */ + void relabel_clusters() { + SCOPED_HEAP_PROFILER("Relabel"); + SCOPED_TIMER("Relabel"); + + // Update initial num clusters since the maximum cluster ID is now different. + ClusterID num_actual_clusters = _current_num_clusters; + _initial_num_clusters = num_actual_clusters; + + // Store for each node whether it joined another cluster as this information gets lost. This + // information is needed only by 2-hop clustering. + if constexpr (Config::kUseTwoHopClustering) { + if (_moved.size() < _graph->n()) { + _moved.resize(_graph->n()); + } + } + + // Compute a mapping from old cluster IDs to new cluster IDs. + RECORD("mapping") StaticArray mapping(_graph->n()); + tbb::parallel_for(tbb::blocked_range(0, _graph->n()), [&](const auto &r) { + for (NodeID u = r.begin(); u != r.end(); ++u) { + const ClusterID c_u = derived_cluster(u); + __atomic_store_n(&mapping[c_u], 1, __ATOMIC_RELAXED); + + if constexpr (Config::kUseTwoHopClustering) { + if (u != c_u) { + _moved[u] = 1; + } + } + } + }); + + parallel::prefix_sum(mapping.begin(), mapping.end(), mapping.begin()); + KASSERT(num_actual_clusters == mapping[_graph->n() - 1]); + + tbb::parallel_invoke( + // Relabel the cluster stored for each node. + [&] { + tbb::parallel_for(tbb::blocked_range(0, _graph->n()), [&](const auto &r) { + for (NodeID u = r.begin(); u != r.end(); ++u) { + derived_move_node(u, mapping[derived_cluster(u)] - 1); + } + }); + }, + // Relabel the clusters stored in the favored clusters vector. + [&] { + tbb::parallel_for(tbb::blocked_range(0, _graph->n()), [&](const auto &r) { + for (NodeID u = r.begin(); u != r.end(); ++u) { + _favored_clusters[u] = mapping[_favored_clusters[u]] - 1; + } + }); + }, + // Reassign the clusters weights such that they match the new cluster IDs. + [&] { + static_cast(this)->reassign_cluster_weights(mapping, num_actual_clusters); + } + ); + _relabeled = true; + } + /*! * Move a single node to a new cluster. * - * @tparam parallel Whether the best cluster for the node should be computed in parallel. - * @tparam RatingMap The rating map used for computing the best cluster for the node. - * * @param u The node that is moved. * @param rand (Thread-local) \c Random object. - * @param rating_map (Thread-local) rating map for gain computation. - * - * @return Pair with: whether the node was moved to another cluster, whether the previous cluster - * is now empty. + * @param rating_map (Thread-local) Rating map for gain computation. + * @return Pair with: whether the node was moved to another cluster, whether + * the previous cluster is now empty. */ - template + template std::pair handle_node(const NodeID u, Random &rand, RatingMap &rating_map) { if (derived_skip_node(u)) { return {false, false}; @@ -282,30 +341,16 @@ class LabelPropagation { const NodeWeight u_weight = _graph->node_weight(u); const ClusterID u_cluster = derived_cluster(u); - std::optional> best_cluster_opt; - if constexpr (parallel) { - best_cluster_opt = find_best_cluster(u, u_weight, u_cluster, rand, rating_map); - } else { - std::size_t upper_bound_size = std::min(_graph->degree(u), _initial_num_clusters); - if (_use_two_phases && _second_phase_select_mode == SecondPhaseSelectMode::FULL_RATING_MAP) { - upper_bound_size = std::min(upper_bound_size, Config::kRatingMapThreshold); - } - - best_cluster_opt = rating_map.execute(upper_bound_size, [&](auto &map) { - return find_best_cluster(u, u_weight, u_cluster, rand, map); - }); - } - - if (const auto best_cluster = best_cluster_opt) { - const auto [new_cluster, new_gain] = *best_cluster; - + auto move_node = [&](const ClusterID new_cluster, EdgeWeight gain) -> std::pair { if (derived_cluster(u) != new_cluster) { - if (derived_move_cluster_weight( - u_cluster, new_cluster, u_weight, derived_max_cluster_weight(new_cluster) - )) { + const bool successful_weight_move = derived_move_cluster_weight( + u_cluster, new_cluster, u_weight, derived_max_cluster_weight(new_cluster) + ); + + if (successful_weight_move) { derived_move_node(u, new_cluster); activate_neighbors(u); - IFSTATS(_expected_total_gain += new_gain); + IFSTATS(_expected_total_gain += gain); const bool decrement_cluster_count = Config::kTrackClusterCount && derived_cluster_weight(u_cluster) == 0; @@ -314,9 +359,31 @@ class LabelPropagation { // cluster count? } } + + // did not move, did not reduce cluster count + return {false, false}; + }; + + if constexpr (first_phase) { + std::size_t upper_bound_size = std::min(_graph->degree(u), _initial_num_clusters); + if (_use_two_phases && _second_phase_select_mode == SecondPhaseSelectMode::FULL_RATING_MAP) { + upper_bound_size = std::min(upper_bound_size, Config::kRatingMapThreshold); + } + + const auto opt_move = rating_map.execute(upper_bound_size, [&](auto &map) { + return find_best_cluster(u, u_weight, u_cluster, rand, map); + }); + + if (opt_move.has_value()) { + const auto [best_cluster, gain] = *opt_move; + return move_node(best_cluster, gain); + } + } else { + const auto [best_cluster, gain] = + *find_best_cluster(u, u_weight, u_cluster, rand, rating_map); + return move_node(best_cluster, gain); } - // did not move, did not reduce cluster count return {false, false}; } @@ -337,18 +404,15 @@ class LabelPropagation { /*! * Computes the best feasible cluster for a node. * - * @tparam parallel Whether the best cluster for the node should be computed in parallel. - * @tparam RatingMap The rating map used for computing the best cluster for the node. - * * @param u The node for which the cluster is computed. * @param u_weight The weight of the node. * @param u_cluster The current cluster of the node. * @param rand (Thread-local) \c Random object. - * @param map (Thread-local) rating map to compute gain values. - * - * @return Pair with: new cluster of the node, gain value for the move to the new cluster. + * @param rating_map (Thread-local) rating map to compute gain values. + * @return Pair with: new cluster of the node, gain value for the move to the + * new cluster. */ - template + template std::optional> find_best_cluster( const NodeID u, const NodeWeight u_weight, @@ -373,7 +437,42 @@ class LabelPropagation { bool is_interface_node = false; - if constexpr (parallel) { + if constexpr (first_phase) { + const bool use_frm_selection = + _use_two_phases && _second_phase_select_mode == SecondPhaseSelectMode::FULL_RATING_MAP; + const bool aggregate_during_second_phase = + _second_phase_aggregation_mode != SecondPhaseAggregationMode::NONE; + + bool second_phase_node = false; + _graph->neighbors(u, _max_num_neighbors, [&](const EdgeID e, const NodeID v) { + if (derived_accept_neighbor(u, v)) { + const ClusterID v_cluster = derived_cluster(v); + const EdgeWeight rating = _graph->edge_weight(e); + + map[v_cluster] += rating; + + if (use_frm_selection && map.size() >= Config::kRatingMapThreshold) { + if (aggregate_during_second_phase) { + _second_phase_nodes.push_back(u); + } + + second_phase_node = true; + return true; + } + + if constexpr (Config::kUseLocalActiveSetStrategy) { + is_interface_node |= v >= _num_active_nodes; + } + } + + return false; + }); + + if (second_phase_node) { + map.clear(); + return std::nullopt; + } + } else { switch (_second_phase_aggregation_mode) { case SecondPhaseAggregationMode::DIRECT: { _graph->pfor_neighbors(u, _max_num_neighbors, 2000, [&](const EdgeID e, const NodeID v) { @@ -434,47 +533,18 @@ class LabelPropagation { }); break; } - } - } else { - const bool use_frm_two_phases = - _use_two_phases && _second_phase_select_mode == SecondPhaseSelectMode::FULL_RATING_MAP; - bool second_phase_node = false; - - _graph->neighbors(u, _max_num_neighbors, [&](const EdgeID e, const NodeID v) { - if (derived_accept_neighbor(u, v)) { - const ClusterID v_cluster = derived_cluster(v); - const EdgeWeight rating = _graph->edge_weight(e); - - map[v_cluster] += rating; - - if (use_frm_two_phases && map.size() >= Config::kRatingMapThreshold) { - _second_phase_nodes.push_back(u); - map.clear(); - - second_phase_node = true; - return true; - } - - if constexpr (Config::kUseLocalActiveSetStrategy) { - is_interface_node |= v >= _num_active_nodes; - } - } - - return false; - }); - - if (second_phase_node) { - return std::nullopt; + case SecondPhaseAggregationMode::NONE: + __builtin_unreachable(); } } if constexpr (Config::kUseLocalActiveSetStrategy) { if (!is_interface_node) { - _active[u] = 0; + __atomic_store_n(&_active[u], 0, __ATOMIC_RELAXED); } } if constexpr (Config::kUseActiveSetStrategy) { - _active[u] = 0; + __atomic_store_n(&_active[u], 0, __ATOMIC_RELAXED); } // After LP, we might want to use 2-hop clustering to merge nodes that @@ -490,7 +560,23 @@ class LabelPropagation { const EdgeWeight gain_delta = (Config::kUseActualGain) ? map[u_cluster] : 0; - if constexpr (parallel) { + if constexpr (first_phase) { + for (const auto [cluster, rating] : map.entries()) { + state.current_cluster = cluster; + state.current_gain = rating - gain_delta; + state.current_cluster_weight = derived_cluster_weight(cluster); + + if (store_favored_cluster && state.current_gain > state.best_gain) { + favored_cluster = state.current_cluster; + } + + if (derived_accept_cluster(state)) { + state.best_cluster = state.current_cluster; + state.best_cluster_weight = state.current_cluster_weight; + state.best_gain = state.current_gain; + } + } + } else { std::vector> local_values; local_values.resize(tbb::this_task_arena::max_concurrency()); @@ -551,22 +637,6 @@ class LabelPropagation { favored_cluster = local_favored_cluster; } } - } else { - for (const auto [cluster, rating] : map.entries()) { - state.current_cluster = cluster; - state.current_gain = rating - gain_delta; - state.current_cluster_weight = derived_cluster_weight(cluster); - - if (store_favored_cluster && state.current_gain > state.best_gain) { - favored_cluster = state.current_cluster; - } - - if (derived_accept_cluster(state)) { - state.best_cluster = state.current_cluster; - state.best_cluster_weight = state.current_cluster_weight; - state.best_gain = state.current_gain; - } - } } // if we couldn't join any cluster, we store the favored cluster @@ -575,10 +645,9 @@ class LabelPropagation { } const EdgeWeight actual_gain = IFSTATS(state.best_gain - map[state.initial_cluster]); - if constexpr (!parallel) { + if constexpr (first_phase) { map.clear(); } - return std::make_pair(state.best_cluster, actual_gain); } @@ -588,13 +657,13 @@ class LabelPropagation { * @param u Node that was moved. */ void activate_neighbors(const NodeID u) { - _graph->adjacent_nodes(u, [this](const NodeID v) { + _graph->adjacent_nodes(u, [&](const NodeID v) { // call derived_activate_neighbor() even if we do not use the active set // strategy since the function might have side effects; the compiler // should remove it if it does not side effects if (derived_activate_neighbor(v)) { if constexpr (Config::kUseActiveSetStrategy || Config::kUseLocalActiveSetStrategy) { - _active[v].store(1, std::memory_order_relaxed); + __atomic_store_n(&_active[v], 1, __ATOMIC_RELAXED); } } }); @@ -668,37 +737,54 @@ class LabelPropagation { tbb::enumerable_thread_specific> matching_map_ets; auto is_considered_for_two_hop_clustering = [&](const NodeID u) { - // Skip nodes not considered for two-hop clustering + // Not considered: isolated node if (_graph->degree(u) == 0) { - // Not considered: isolated node - return false; - } else if (u != derived_cluster(u)) { - // Not considered: joined another cluster return false; - } else { - // If u did not join another cluster, there could still be other nodes that joined this - // node's cluster: find out by checking the cluster weight - const ClusterWeight current_weight = derived_cluster_weight(u); - if (current_weight > derived_max_cluster_weight(u) / 2 || - current_weight != derived_initial_cluster_weight(u)) { + } + + // If u did not join another cluster, there could still be other nodes that joined this + // node's cluster: find out by checking the cluster weight + auto check_cluster_weight = [&](const NodeID c_u) { + const ClusterWeight current_weight = derived_cluster_weight(c_u); + + if (current_weight > derived_max_cluster_weight(c_u) / 2 || + current_weight != derived_initial_cluster_weight(c_u)) { // Not considered: not a singleton cluster; or its weight is too heavy return false; } - } - return true; + return true; + }; + + // Not considered: joined another cluster + if (_relabeled) { + if (_moved[u]) { + return false; + } + + const ClusterID c_u = derived_cluster(u); + return check_cluster_weight(c_u); + } else { + if (u != derived_cluster(u)) { + return false; + } + + // In this case c_u == u holds. + return check_cluster_weight(u); + } }; auto handle_node = [&](DynamicFlatMap &matching_map, const NodeID u) { + const ClusterID c_u = derived_cluster(u); ClusterID &rep_key = matching_map[_favored_clusters[u]]; if (rep_key == 0) { - rep_key = u + 1; + rep_key = c_u + 1; } else { const ClusterID rep = rep_key - 1; const bool could_move_u_to_rep = derived_move_cluster_weight( - u, rep, derived_cluster_weight(u), derived_max_cluster_weight(rep) + c_u, rep, derived_cluster_weight(c_u), derived_max_cluster_weight(rep) ); if constexpr (match) { @@ -709,7 +795,7 @@ class LabelPropagation { if (could_move_u_to_rep) { derived_move_node(u, rep); } else { - rep_key = u + 1; + rep_key = c_u + 1; } } } @@ -839,14 +925,16 @@ class LabelPropagation { // Conclusion: // We can use _favored_clusters[u] to build the two-hop clusters. - const NodeID C = _favored_clusters[u]; + const NodeID C = __atomic_load_n(&_favored_clusters[u], __ATOMIC_RELAXED); auto &sync = _favored_clusters[C]; do { NodeID cluster = sync; if (cluster == C) { - if (sync.compare_exchange_strong(cluster, u)) { + if (__atomic_compare_exchange_n( + &sync, &cluster, u, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST + )) { // We are done: other nodes will join our cluster break; } @@ -857,14 +945,16 @@ class LabelPropagation { // Invariant: cluster is a node with favored cluster C KASSERT( - _favored_clusters[cluster] == C, + __atomic_load_n(&_favored_clusters[cluster], __ATOMIC_RELAXED) == C, "invariant violated by: " << V(u) << V(cluster) << V(C) << V(_favored_clusters[C]) ); // Try to join the cluster: if constexpr (match) { // Matching mode: try to build a cluster only containing nodes "cluster" and "u" - if (sync.compare_exchange_strong(cluster, C)) { + if (__atomic_compare_exchange_n( + &sync, &cluster, C, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST + )) { [[maybe_unused]] const bool success = derived_move_cluster_weight( u, cluster, derived_cluster_weight(u), derived_max_cluster_weight(cluster) ); @@ -890,8 +980,10 @@ class LabelPropagation { // We are done: joined cluster "cluster" break; - } else if (sync.compare_exchange_strong(cluster, u)) { - // We are done: other nodes will join our cluster + } else if (__atomic_compare_exchange_n( + &sync, &cluster, C, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST + )) { + // We are done: start a new cluster break; } } @@ -925,6 +1017,7 @@ class LabelPropagation { ); IFSTATS(_expected_total_gain = 0); _current_num_clusters = _initial_num_clusters; + _relabeled = false; } private: // CRTP calls @@ -1024,9 +1117,8 @@ class LabelPropagation { } protected: // Members - //! Graph we operate on, or \c nullptr if \c initialize has not been called - //! yet. - const Graph *_graph{nullptr}; + //! Graph we operate on, or \c nullptr if \c initialize has not been called yet. + const Graph *_graph = nullptr; //! The number of non-empty clusters before we ran the first iteration of //! label propagation. @@ -1061,18 +1153,29 @@ class LabelPropagation { //! The mode by which the ratings for nodes in the second phase are aggregated. SecondPhaseAggregationMode _second_phase_aggregation_mode; + //! Whether to relabel the clusters before the second phase. + bool _relabel_before_second_phase; + //! Thread-local map to compute gain values. tbb::enumerable_thread_specific _rating_map_ets; //! Flags nodes with at least one node in its neighborhood that changed //! clusters during the last iteration. Nodes without this flag set must not //! be considered in the next iteration. - scalable_vector> _active; + StaticArray _active; + + //! Flags nodes that joined another cluster. This information is used during 2-hop clustering when + //! we relabel the clusters. + StaticArray _moved; + + //! Store whether we relabeled the clusters and thus have to use the information of the _moved + //! array for 2-hop clustering. + bool _relabeled; //! If a node cannot join any cluster during an iteration, this vector stores //! the node's highest rated cluster independent of the maximum cluster //! weight. This information is used during 2-hop clustering. - scalable_vector> _favored_clusters; + StaticArray _favored_clusters; //! The nodes which should be processed in the second phase. tbb::concurrent_vector _second_phase_nodes; @@ -1095,22 +1198,20 @@ class LabelPropagation { * @tparam Derived Derived subclass for static polymorphism. * @tparam Config Algorithmic configuration and data types. */ -template typename TConfig, typename TGraph> -class InOrderLabelPropagation : public LabelPropagation { - static_assert(std::is_base_of_v, TConfig>); +template +class InOrderLabelPropagation : public LabelPropagation { + static_assert(std::is_base_of_v); SET_DEBUG(false); -protected: - using Config = TConfig; - using Base = LabelPropagation; + using Base = LabelPropagation; - using Graph = typename Base::Graph; - using ClusterID = typename Base::ClusterID; - using ClusterWeight = typename Base::ClusterWeight; - using EdgeID = typename Base::EdgeID; - using EdgeWeight = typename Base::EdgeWeight; +protected: using NodeID = typename Base::NodeID; using NodeWeight = typename Base::NodeWeight; + using EdgeID = typename Base::EdgeID; + using EdgeWeight = typename Base::EdgeWeight; + using ClusterID = typename Base::ClusterID; + using ClusterWeight = typename Base::ClusterWeight; using Base::handle_node; using Base::set_max_degree; @@ -1137,7 +1238,7 @@ class InOrderLabelPropagation : public LabelPropagation struct AbstractChunk { + NodeID start; + NodeID end; +}; + +struct Bucket { + std::size_t start; + std::size_t end; +}; + /*! * Parallel label propagation template that iterates over nodes in chunk random * order. * @tparam Derived Derived subclass for static polymorphism. * @tparam Config Algorithmic configuration and data types. */ -template typename TConfig, typename TGraph> -class ChunkRandomdLabelPropagation : public LabelPropagation { - using Config = TConfig; - using Base = LabelPropagation; - static_assert(std::is_base_of_v, TConfig>); - +template +class ChunkRandomLabelPropagation : public LabelPropagation { + static_assert(std::is_base_of_v); SET_DEBUG(false); + using Base = LabelPropagation; + protected: - using Graph = typename Base::Graph; - using ClusterID = typename Base::ClusterID; - using ClusterWeight = typename Base::ClusterWeight; - using EdgeID = typename Base::EdgeID; - using EdgeWeight = typename Base::EdgeWeight; using NodeID = typename Base::NodeID; using NodeWeight = typename Base::NodeWeight; + using EdgeID = typename Base::EdgeID; + using EdgeWeight = typename Base::EdgeWeight; + using ClusterID = typename Base::ClusterID; + using ClusterWeight = typename Base::ClusterWeight; using RatingMap = typename Base::RatingMap; + using SecondPhaseSelectMode = Base::SecondPhaseSelectMode; + using SecondPhaseAggregationMode = Base::SecondPhaseAggregationMode; + using Base::handle_node; + using Base::relabel_clusters; using Base::set_max_degree; using Base::set_max_num_neighbors; using Base::should_stop; + using Permutations = + RandomPermutations; + using Chunk = AbstractChunk; + public: + //! The data strucutres that are stored on the heap and used by label propagation. using DataStructures = std::tuple< tbb::enumerable_thread_specific, - scalable_vector>, - scalable_vector>, + StaticArray, + StaticArray, + StaticArray, tbb::concurrent_vector, + std::vector, + std::vector, ConcurrentFastResetArray>; + /*! + * Sets the data structures to use, which can save memory space when (unused) data structures are + * already in memory. + * + * @param structs The data structures to use. + */ void setup(DataStructures structs) { - auto [rating_map_ets, active, favored_clusters, second_phase_nodes, concurrent_rating_map] = - std::move(structs); + auto + [rating_map_ets, + active, + moved, + favored_clusters, + second_phase_nodes, + chunks, + buckets, + concurrent_rating_map] = std::move(structs); Base::_rating_map_ets = std::move(rating_map_ets); Base::_active = std::move(active); + Base::_moved = std::move(moved); Base::_favored_clusters = std::move(favored_clusters); Base::_second_phase_nodes = std::move(second_phase_nodes); + _chunks = std::move(chunks); + _buckets = std::move(buckets); _concurrent_rating_map = std::move(concurrent_rating_map); } + /*! + * Returns ownership of the data structures that are stored on the heap. + * + * @return Ownership of the data structures that are stored on the heap. + */ DataStructures release() { return std::make_tuple( std::move(Base::_rating_map_ets), std::move(Base::_active), + std::move(Base::_moved), std::move(Base::_favored_clusters), std::move(Base::_second_phase_nodes), + std::move(_chunks), + std::move(_buckets), std::move(_concurrent_rating_map) ); } protected: - void initialize(const Graph *graph, const ClusterID num_clusters) { - Base::initialize(graph, num_clusters); - _chunks.clear(); - _buckets.clear(); - } + ChunkRandomLabelPropagation(Permutations &permutations) : _random_permutations(permutations) {} void free() { Base::free(); @@ -1250,6 +1391,12 @@ class ChunkRandomdLabelPropagation : public LabelPropagation::max()) { - START_HEAP_PROFILER("Buckets"); if (from != 0 || to != std::numeric_limits::max()) { _chunks.clear(); } @@ -1282,142 +1428,41 @@ class ChunkRandomdLabelPropagation : public LabelPropagation num_first_phase_nodes_ets; - tbb::enumerable_thread_specific num_moved_nodes_ets; - parallel::Atomic next_chunk = 0; - - const bool use_high_degree_selection = - Base::_use_two_phases && Base::_initial_num_clusters >= Config::kRatingMapThreshold && - Base::_second_phase_select_mode == Base::SecondPhaseSelectMode::HIGH_DEGREE; - - START_HEAP_PROFILER("First phase"); - START_TIMER("First phase"); - tbb::parallel_for(static_cast(0), _chunks.size(), [&](const std::size_t) { - if (should_stop()) { - return; - } - - auto &local_num_first_phase_nodes = num_first_phase_nodes_ets.local(); - auto &local_num_moved_nodes = num_moved_nodes_ets.local(); - auto &local_rand = Random::instance(); - auto &local_rating_map = _rating_map_ets.local(); - NodeID num_removed_clusters = 0; - - const auto chunk_id = next_chunk.fetch_add(1, std::memory_order_relaxed); - const auto &chunk = _chunks[chunk_id]; - const auto &permutation = _random_permutations.get(local_rand); - - const std::size_t num_sub_chunks = - std::ceil(1.0 * (chunk.end - chunk.start) / Config::kPermutationSize); - std::vector sub_chunk_permutation(num_sub_chunks); - std::iota(sub_chunk_permutation.begin(), sub_chunk_permutation.end(), 0); - local_rand.shuffle(sub_chunk_permutation); - - for (std::size_t sub_chunk = 0; sub_chunk < num_sub_chunks; ++sub_chunk) { - for (std::size_t i = 0; i < Config::kPermutationSize; ++i) { - const NodeID u = chunk.start + - Config::kPermutationSize * sub_chunk_permutation[sub_chunk] + - permutation[i % Config::kPermutationSize]; - if (u >= chunk.end) { - continue; - } - - if constexpr (Config::kUseActiveSetStrategy || Config::kUseLocalActiveSetStrategy) { - if (!_active[u].load(std::memory_order_relaxed)) { - continue; - } - } - - const NodeID degree = _graph->degree(u); - if (degree < _max_degree) { - if (use_high_degree_selection && degree >= Config::kRatingMapThreshold) { - _second_phase_nodes.push_back(u); - continue; - } - const auto [moved_node, emptied_cluster] = handle_node(u, local_rand, local_rating_map); - - ++local_num_first_phase_nodes; - if (moved_node) { - ++local_num_moved_nodes; - } - if (emptied_cluster) { - ++num_removed_clusters; - } - } - } - } - - _current_num_clusters -= num_removed_clusters; - }); - - STOP_TIMER(); - STOP_HEAP_PROFILER(); + const NodeID initial_num_clusters = _initial_num_clusters; + const auto [num_processed_nodes, num_moved_nodes_first_phase] = perform_first_phase(); const NodeID num_second_phase_nodes = _second_phase_nodes.size(); if (num_second_phase_nodes > 0) { - SCOPED_HEAP_PROFILER("Second phase"); - SCOPED_TIMER("Second phase"); - - auto &num_moved_nodes = num_moved_nodes_ets.local(); - auto &rand = Random::instance(); - - if (_concurrent_rating_map.capacity() < Base::_initial_num_clusters) { - _concurrent_rating_map.resize(Base::_initial_num_clusters); - } - - for (const NodeID u : _second_phase_nodes) { - const auto [moved_node, emptied_cluster] = - Base::template handle_node(u, rand, _concurrent_rating_map); - - if (moved_node) { - ++num_moved_nodes; - } - - if (emptied_cluster) { - --_current_num_clusters; - } + if (_relabel_before_second_phase) { + relabel_clusters(); } - _second_phase_nodes.clear(); + perform_second_phase(); } + const NodeID num_moved_nodes = _num_moved_nodes_ets.combine(std::plus{}); if constexpr (kDebug) { - NodeID num_first_phase_nodes = 0; - for (NodeID local_num_first_phase_nodes : num_first_phase_nodes_ets) { - num_first_phase_nodes += local_num_first_phase_nodes; - } - LOG << "Label Propagation"; - LOG << " Initial clusters: " << Base::_initial_num_clusters; - LOG << " First Phase: " << num_first_phase_nodes << " nodes"; - LOG << " Second Phase: " << num_second_phase_nodes << " nodes"; + LOG << " Initial clusters: " << initial_num_clusters << " clusters"; + LOG << " First Phase:"; + LOG << " Processed: " << (num_processed_nodes - num_second_phase_nodes) << " nodes"; + LOG << " Moved: " << num_moved_nodes_first_phase << " nodes"; + if (_relabel_before_second_phase) { + LOG << " Clusters after relabeling: " << _initial_num_clusters << " clusters"; + } + LOG << " Second Phase:"; + LOG << " Processed: " << num_second_phase_nodes << " nodes"; + LOG << " Moved: " << (num_moved_nodes - num_moved_nodes_first_phase) << " nodes"; LOG; } - return num_moved_nodes_ets.combine(std::plus{}); + _num_processed_nodes_ets.clear(); + _num_moved_nodes_ets.clear(); + return num_moved_nodes; } private: - struct Chunk { - NodeID start; - NodeID end; - }; - - struct Bucket { - std::size_t start; - std::size_t end; - }; - - void shuffle_chunks() { - tbb::parallel_for(0, _buckets.size(), [&](const std::size_t i) { - const auto &bucket = _buckets[i]; - Random::instance().shuffle(_chunks.begin() + bucket.start, _chunks.begin() + bucket.end); - }); - } - void init_chunks(const NodeID from, NodeID to) { _chunks.clear(); _buckets.clear(); @@ -1540,173 +1585,212 @@ class ChunkRandomdLabelPropagation : public LabelPropagation(0, _buckets.size(), [&](const std::size_t i) { + const auto &bucket = _buckets[i]; + Random::instance().shuffle(_chunks.begin() + bucket.start, _chunks.begin() + bucket.end); + }); + } - RandomPermutations - _random_permutations{}; - std::vector _chunks; - std::vector _buckets; + std::pair perform_first_phase() { + SCOPED_HEAP_PROFILER("First phase"); + SCOPED_TIMER("First phase"); - ConcurrentFastResetArray _concurrent_rating_map; -}; + const bool use_high_degree_selection = + _use_two_phases && _initial_num_clusters >= Config::kRatingMapThreshold && + _second_phase_select_mode == SecondPhaseSelectMode::HIGH_DEGREE; + const bool aggregate_during_second_phase = + _second_phase_aggregation_mode != SecondPhaseAggregationMode::NONE; -template class NonatomicClusterVectorRef { -public: - void init_clusters_ref(StaticArray &clustering) { - _clusters = &clustering; - } + parallel::Atomic next_chunk = 0; + tbb::parallel_for(static_cast(0), _chunks.size(), [&](const std::size_t) { + if (should_stop()) { + return; + } - [[nodiscard]] auto &&take_clusters() { - return std::move(_clusters); - } + auto &local_num_processed_nodes = _num_processed_nodes_ets.local(); + auto &local_num_moved_nodes = _num_moved_nodes_ets.local(); + auto &local_rand = Random::instance(); + auto &local_rating_map = _rating_map_ets.local(); + NodeID num_removed_clusters = 0; - void init_cluster(const NodeID node, const ClusterID cluster) { - move_node(node, cluster); - } + const auto chunk_id = next_chunk.fetch_add(1, std::memory_order_relaxed); + const auto &chunk = _chunks[chunk_id]; + const auto &permutation = _random_permutations.get(local_rand); - [[nodiscard]] ClusterID cluster(const NodeID node) { - KASSERT(node < _clusters->size()); - return __atomic_load_n(&_clusters->at(node), __ATOMIC_RELAXED); - } + const std::size_t num_sub_chunks = + std::ceil(1.0 * (chunk.end - chunk.start) / Config::kPermutationSize); + std::vector sub_chunk_permutation(num_sub_chunks); + std::iota(sub_chunk_permutation.begin(), sub_chunk_permutation.end(), 0); + local_rand.shuffle(sub_chunk_permutation); - void move_node(const NodeID node, const ClusterID cluster) { - KASSERT(node < _clusters->size()); - __atomic_store_n(&_clusters->at(node), cluster, __ATOMIC_RELAXED); - } + for (std::size_t sub_chunk = 0; sub_chunk < num_sub_chunks; ++sub_chunk) { + for (std::size_t i = 0; i < Config::kPermutationSize; ++i) { + const NodeID u = chunk.start + + Config::kPermutationSize * sub_chunk_permutation[sub_chunk] + + permutation[i % Config::kPermutationSize]; + if (u >= chunk.end) { + continue; + } -private: - StaticArray *_clusters; -}; + if constexpr (Config::kUseActiveSetStrategy || Config::kUseLocalActiveSetStrategy) { + if (!__atomic_load_n(&_active[u], __ATOMIC_RELAXED)) { + continue; + } + } -template class OwnedClusterVector { -public: - using Clusters = scalable_vector>; + const NodeID degree = _graph->degree(u); + if (degree < _max_degree) { + ++local_num_processed_nodes; - explicit OwnedClusterVector(const NodeID max_num_nodes) : _max_num_nodes(max_num_nodes) { - RECORD_DATA_STRUCT(0, _struct); - } + if (use_high_degree_selection && degree >= Config::kRatingMapThreshold) { + if (aggregate_during_second_phase) { + _second_phase_nodes.push_back(u); + } - void allocate_clusters() { - IF_HEAP_PROFILING( - _struct->size = - std::max(_struct->size, _max_num_nodes * sizeof(parallel::Atomic)) - ); + continue; + } - _clusters.resize(_max_num_nodes); - } + const auto [moved_node, emptied_cluster] = handle_node(u, local_rand, local_rating_map); + if (moved_node) { + ++local_num_moved_nodes; - void setup_clusters(Clusters clusters) { - _clusters = std::move(clusters); - } + if (_relabeled) { + _moved[u] = 1; + } + } + if (emptied_cluster) { + ++num_removed_clusters; + } + } + } + } - [[nodiscard]] auto &&take_clusters() { - return std::move(_clusters); - } + _current_num_clusters -= num_removed_clusters; + }); - [[nodiscard]] auto &clusters() { - return _clusters; + return std::make_pair( + _num_processed_nodes_ets.combine(std::plus{}), _num_moved_nodes_ets.combine(std::plus{}) + ); } - void init_cluster(const NodeID node, const ClusterID cluster) { - _clusters[node] = cluster; - } + void perform_second_phase() { + SCOPED_HEAP_PROFILER("Second phase"); + SCOPED_TIMER("Second phase"); - [[nodiscard]] ClusterID cluster(const NodeID node) { - KASSERT(node < _clusters.size()); - return _clusters[node]; - } + const std::size_t num_clusters = _initial_num_clusters; + if (_concurrent_rating_map.capacity() < num_clusters) { + _concurrent_rating_map.resize(num_clusters); + } - void move_node(const NodeID node, const ClusterID cluster) { - KASSERT(node < _clusters.size()); - _clusters[node] = cluster; + auto &num_moved_nodes = _num_moved_nodes_ets.local(); + auto &rand = Random::instance(); + for (const NodeID u : _second_phase_nodes) { + const auto [moved_node, emptied_cluster] = + Base::template handle_node(u, rand, _concurrent_rating_map); + + if (moved_node) { + ++num_moved_nodes; + + if (_relabeled) { + _moved[u] = 1; + } + } + + if (emptied_cluster) { + --_current_num_clusters; + } + } + + _second_phase_nodes.clear(); } -private: - NodeID _max_num_nodes; - Clusters _clusters; +protected: + using Base::_active; + using Base::_current_num_clusters; + using Base::_graph; + using Base::_initial_num_clusters; + using Base::_max_degree; + using Base::_moved; + using Base::_rating_map_ets; + using Base::_relabel_before_second_phase; + using Base::_relabeled; + using Base::_second_phase_aggregation_mode; + using Base::_second_phase_nodes; + using Base::_second_phase_select_mode; + using Base::_use_two_phases; - IF_HEAP_PROFILING(heap_profiler::DataStructure *_struct); + Permutations &_random_permutations; + std::vector _chunks; + std::vector _buckets; + tbb::enumerable_thread_specific _num_processed_nodes_ets; + tbb::enumerable_thread_specific _num_moved_nodes_ets; + ConcurrentFastResetArray _concurrent_rating_map; }; template class OwnedRelaxedClusterWeightVector { using FirstLevelClusterWeight = typename std:: conditional_t, std::int16_t, std::int32_t>; - using ClusterWeightsVec = scalable_vector>; - using ClusterWeightsTwoLevelVec = + using ClusterWeightVec = StaticArray; + using ClusterWeightTwoLevelVec = ConcurrentTwoLevelVector; public: - using ClusterWeights = std::tuple; + using ClusterWeights = std::pair; - explicit OwnedRelaxedClusterWeightVector(const bool use_two_level_vector) - : _use_two_level_vector(use_two_level_vector) { - IF_HEAP_PROFILING(_struct = nullptr); - } + OwnedRelaxedClusterWeightVector(const bool use_two_level_vector) + : _use_two_level_vector(use_two_level_vector) {} - void allocate(const ClusterID max_num_clusters) { + void allocate_cluster_weights(const ClusterID num_clusters) { if (_use_two_level_vector) { - _cluster_weights_tlvec.resize(max_num_clusters); + if (_two_level_cluster_weights.capacity() < num_clusters) { + _two_level_cluster_weights.resize(num_clusters); + } } else { - IF_HEAP_PROFILING( // - if (_struct == nullptr) { - RECORD_DATA_STRUCT(max_num_clusters * sizeof(parallel::Atomic), _struct); - } else { - _struct->size = - std::max(_struct->size, max_num_clusters * sizeof(parallel::Atomic)); - } - ) - - if (_cluster_weights_vec.capacity() < max_num_clusters) { - _cluster_weights_vec.resize(max_num_clusters); + if (_cluster_weights.size() < num_clusters) { + _cluster_weights.resize(num_clusters); } } } void free() { if (_use_two_level_vector) { - _cluster_weights_tlvec.free(); + _two_level_cluster_weights.free(); } else { - _cluster_weights_vec.clear(); - _cluster_weights_vec.shrink_to_fit(); + _cluster_weights.free(); } } void setup_cluster_weights(ClusterWeights weights) { - auto [cluster_weights_vec, cluster_weights_tlvec] = std::move(weights); - _cluster_weights_vec = std::move(cluster_weights_vec); - _cluster_weights_tlvec = std::move(cluster_weights_tlvec); + auto [cluster_weights, two_level_cluster_weights] = std::move(weights); + _cluster_weights = std::move(cluster_weights); + _two_level_cluster_weights = std::move(two_level_cluster_weights); } - [[nodiscard]] ClusterWeights take_cluster_weights() { - return std::make_tuple(std::move(_cluster_weights_vec), std::move(_cluster_weights_tlvec)); + ClusterWeights take_cluster_weights() { + return std::make_pair(std::move(_cluster_weights), std::move(_two_level_cluster_weights)); } void reset_cluster_weights() { if (_use_two_level_vector) { - _cluster_weights_tlvec.reset(); + _two_level_cluster_weights.reset(); } } void init_cluster_weight(const ClusterID cluster, const ClusterWeight weight) { if (_use_two_level_vector) { - _cluster_weights_tlvec.insert(cluster, weight); + _two_level_cluster_weights.insert(cluster, weight); } else { - _cluster_weights_vec[cluster] = weight; + _cluster_weights[cluster] = weight; } } ClusterWeight cluster_weight(const ClusterID cluster) { if (_use_two_level_vector) { - return _cluster_weights_tlvec[cluster]; + return _two_level_cluster_weights[cluster]; } else { - return _cluster_weights_vec[cluster]; + return __atomic_load_n(&_cluster_weights[cluster], __ATOMIC_RELAXED); } } @@ -1717,15 +1801,15 @@ template class OwnedRelaxedClusterW const ClusterWeight max_weight ) { if (_use_two_level_vector) { - if (_cluster_weights_tlvec[new_cluster] + delta <= max_weight) { - _cluster_weights_tlvec.atomic_add(new_cluster, delta); - _cluster_weights_tlvec.atomic_sub(old_cluster, delta); + if (_two_level_cluster_weights[new_cluster] + delta <= max_weight) { + _two_level_cluster_weights.atomic_add(new_cluster, delta); + _two_level_cluster_weights.atomic_sub(old_cluster, delta); return true; } } else { - if (_cluster_weights_vec[new_cluster] + delta <= max_weight) { - _cluster_weights_vec[new_cluster].fetch_add(delta, std::memory_order_relaxed); - _cluster_weights_vec[old_cluster].fetch_sub(delta, std::memory_order_relaxed); + if (_cluster_weights[new_cluster] + delta <= max_weight) { + __atomic_fetch_add(&_cluster_weights[new_cluster], delta, __ATOMIC_RELAXED); + __atomic_fetch_sub(&_cluster_weights[old_cluster], delta, __ATOMIC_RELAXED); return true; } } @@ -1733,11 +1817,59 @@ template class OwnedRelaxedClusterW return false; } + void reassign_cluster_weights( + const StaticArray &mapping, const ClusterID num_new_clusters + ) { + if (_use_two_level_vector) { + _two_level_cluster_weights.reassign(mapping, num_new_clusters); + } else { + RECORD("new_cluster_weights") ClusterWeightVec new_cluster_weights(num_new_clusters); + + tbb::parallel_for( + tbb::blocked_range(0, _cluster_weights.size()), + [&](const auto &r) { + for (ClusterID u = r.begin(); u != r.end(); ++u) { + ClusterWeight weight = _cluster_weights[u]; + + if (weight != 0) { + ClusterID new_cluster_id = mapping[u] - 1; + new_cluster_weights[new_cluster_id] = weight; + } + } + } + ); + + _cluster_weights = std::move(new_cluster_weights); + } + } + private: const bool _use_two_level_vector; - ClusterWeightsVec _cluster_weights_vec; - ClusterWeightsTwoLevelVec _cluster_weights_tlvec; + ClusterWeightVec _cluster_weights; + ClusterWeightTwoLevelVec _two_level_cluster_weights; +}; + +template class NonatomicClusterVectorRef { +public: + void init_clusters_ref(StaticArray &clustering) { + _clusters = &clustering; + } - IF_HEAP_PROFILING(heap_profiler::DataStructure *_struct); + void init_cluster(const NodeID node, const ClusterID cluster) { + move_node(node, cluster); + } + + [[nodiscard]] ClusterID cluster(const NodeID node) { + KASSERT(node < _clusters->size()); + return __atomic_load_n(&_clusters->at(node), __ATOMIC_RELAXED); + } + + void move_node(const NodeID node, const ClusterID cluster) { + KASSERT(node < _clusters->size()); + __atomic_store_n(&_clusters->at(node), cluster, __ATOMIC_RELAXED); + } + +private: + StaticArray *_clusters = nullptr; }; } // namespace kaminpar diff --git a/kaminpar-shm/partitioning/deep/deep_multilevel.cc b/kaminpar-shm/partitioning/deep/deep_multilevel.cc index 83bbc4f6..944c5d46 100644 --- a/kaminpar-shm/partitioning/deep/deep_multilevel.cc +++ b/kaminpar-shm/partitioning/deep/deep_multilevel.cc @@ -144,6 +144,10 @@ const Graph *DeepMultilevelPartitioner::coarsen() { EdgeID prev_c_graph_m = c_graph->m(); bool shrunk = true; + bool search_subgraph_memory_size = true; + NodeID subgraph_memory_n; + EdgeID subgraph_memory_m; + while (shrunk && c_graph->n() > initial_partitioning_threshold()) { // If requested, dump graph before each coarsening step + after coarsening // converged. This way, we also have a dump of the (reordered) input graph, @@ -164,9 +168,11 @@ const Graph *DeepMultilevelPartitioner::coarsen() { // bipartitioning // To avoid repeated allocation, we pre-allocate the memory during coarsening for the largest // coarse graph for which we still need recursive bipartitioning - if (_subgraph_memory.empty() && + if (search_subgraph_memory_size && helper::compute_k_for_n(c_graph->n(), _input_ctx) < _input_ctx.partition.k) { - _subgraph_memory.resize(prev_c_graph_n, _input_ctx.partition.k, prev_c_graph_m, true, true); + search_subgraph_memory_size = false; + subgraph_memory_n = prev_c_graph_n; + subgraph_memory_m = prev_c_graph_m; } // Print some metrics for the coarse graphs @@ -189,9 +195,11 @@ const Graph *DeepMultilevelPartitioner::coarsen() { LOG; } - if (_subgraph_memory.empty()) { - _subgraph_memory.resize(prev_c_graph_n, _input_ctx.partition.k, prev_c_graph_m, true, true); + if (search_subgraph_memory_size) { + subgraph_memory_n = prev_c_graph_n; + subgraph_memory_m = prev_c_graph_m; } + _subgraph_memory.resize(subgraph_memory_n, _input_ctx.partition.k, subgraph_memory_m, true, true); if (shrunk) { LOG << "==> Coarsening terminated with less than " << initial_partitioning_threshold() diff --git a/kaminpar-shm/presets.cc b/kaminpar-shm/presets.cc index f1977358..55c790a1 100644 --- a/kaminpar-shm/presets.cc +++ b/kaminpar-shm/presets.cc @@ -83,6 +83,7 @@ Context create_default_context() { .use_two_phases = false, .second_phase_select_mode = SecondPhaseSelectMode::FULL_RATING_MAP, .second_phase_aggregation_mode = SecondPhaseAggregationMode::BUFFERED, + .relabel_before_second_phase = false, .two_hop_strategy = TwoHopStrategy::MATCH_THREADWISE, .two_hop_threshold = 0.5, .isolated_nodes_strategy = @@ -142,6 +143,9 @@ Context create_default_context() { .num_iterations = 5, .large_degree_threshold = 1000000, .max_num_neighbors = std::numeric_limits::max(), + .use_two_phases = false, + .second_phase_select_mode = SecondPhaseSelectMode::FULL_RATING_MAP, + .second_phase_aggregation_mode = SecondPhaseAggregationMode::BUFFERED, }, .kway_fm = { @@ -203,7 +207,7 @@ Context create_memory_context() { ctx.coarsening.clustering.algorithm = ClusteringAlgorithm::LABEL_PROPAGATION; ctx.coarsening.clustering.lp.use_two_phases = true; ctx.coarsening.clustering.lp.use_two_level_cluster_weight_vector = true; - ctx.coarsening.clustering.max_mem_free_coarsening_level = 100; + ctx.coarsening.clustering.max_mem_free_coarsening_level = 1; ctx.coarsening.contraction.mode = ContractionMode::UNBUFFERED; ctx.coarsening.contraction.use_compact_mapping = true; return ctx; diff --git a/kaminpar-shm/refinement/lp/lp_refiner.cc b/kaminpar-shm/refinement/lp/lp_refiner.cc index 006ed0a4..0c19160f 100644 --- a/kaminpar-shm/refinement/lp/lp_refiner.cc +++ b/kaminpar-shm/refinement/lp/lp_refiner.cc @@ -7,64 +7,243 @@ ******************************************************************************/ #include "kaminpar-shm/refinement/lp/lp_refiner.h" +#include "kaminpar-shm/label_propagation.h" + +#include "kaminpar-common/heap_profiler.h" +#include "kaminpar-common/timer.h" + namespace kaminpar::shm { -LabelPropagationRefiner::LabelPropagationRefiner(const Context &ctx) - : _csr_impl(std::make_unique>(ctx)), - _compact_csr_impl(std::make_unique>(ctx)), - _compressed_impl(std::make_unique>(ctx)) {} -LabelPropagationRefiner::~LabelPropagationRefiner() = default; +// +// Actual implementation -- not exposed in header +// -void LabelPropagationRefiner::initialize(const PartitionedGraph &p_graph) { - const Graph &graph = p_graph.graph(); - - if (auto *csr_graph = dynamic_cast(graph.underlying_graph()); - csr_graph != nullptr) { - _csr_impl->initialize(csr_graph); - } else if (auto *compact_csr_graph = - dynamic_cast(graph.underlying_graph()); - compact_csr_graph != nullptr) { - _compact_csr_impl->initialize(compact_csr_graph); - } else if (auto *compressed_graph = - dynamic_cast(graph.underlying_graph()); - compressed_graph != nullptr) { - _compressed_impl->initialize(compressed_graph); +struct LPRefinerConfig : public LabelPropagationConfig { + using ClusterID = BlockID; + using ClusterWeight = BlockWeight; + using RatingMap = ::kaminpar::RatingMap>; + static constexpr bool kUseHardWeightConstraint = true; + static constexpr bool kReportEmptyClusters = false; +}; + +template +class LPRefinerImpl final + : public ChunkRandomLabelPropagation, LPRefinerConfig, Graph> { + using Base = ChunkRandomLabelPropagation, LPRefinerConfig, Graph>; + friend Base; + + static constexpr std::size_t kInfiniteIterations = std::numeric_limits::max(); + +public: + using Permutations = Base::Permutations; + + LPRefinerImpl(const Context &ctx, Permutations &permutations) + : Base(permutations), + _r_ctx(ctx.refinement) { + Base::preinitialize(ctx.partition.n, ctx.partition.k); + Base::set_max_degree(_r_ctx.lp.large_degree_threshold); + Base::set_max_num_neighbors(_r_ctx.lp.max_num_neighbors); + Base::set_use_two_phases(_r_ctx.lp.use_two_phases); + Base::set_second_phase_select_mode(_r_ctx.lp.second_phase_select_mode); + Base::set_second_phase_aggregation_mode(_r_ctx.lp.second_phase_aggregation_mode); + Base::set_relabel_before_second_phase(false); } -} -bool LabelPropagationRefiner::refine(PartitionedGraph &p_graph, const PartitionContext &p_ctx) { - const auto specific_refine = [&](auto &impl) { - if (_freed) { - _freed = false; - impl.allocate(); - } else { - impl.setup(std::move(_structs)); + void allocate() { + SCOPED_HEAP_PROFILER("Allocation"); + SCOPED_TIMER("Allocation"); + + Base::allocate(); + } + + void initialize(const Graph *graph) { + _graph = graph; + } + + bool refine(PartitionedGraph &p_graph, const PartitionContext &p_ctx) { + KASSERT(_graph == p_graph.graph().underlying_graph()); + KASSERT(p_graph.k() <= p_ctx.k); + _p_graph = &p_graph; + _p_ctx = &p_ctx; + + Base::initialize(_graph, _p_ctx->k); + + const std::size_t max_iterations = + _r_ctx.lp.num_iterations == 0 ? kInfiniteIterations : _r_ctx.lp.num_iterations; + for (std::size_t iteration = 0; iteration < max_iterations; ++iteration) { + SCOPED_TIMER("Iteration", std::to_string(iteration)); + + if (Base::perform_iteration() == 0) { + return false; + } } - const bool found_improvement = impl.refine(p_graph, p_ctx); + return true; + } + + using Base::expected_total_gain; + +public: + [[nodiscard]] BlockID initial_cluster(const NodeID u) { + return _p_graph->block(u); + } + + [[nodiscard]] BlockWeight initial_cluster_weight(const BlockID b) { + return _p_graph->block_weight(b); + } + + [[nodiscard]] BlockWeight cluster_weight(const BlockID b) { + return _p_graph->block_weight(b); + } + + bool move_cluster_weight( + const BlockID old_block, + const BlockID new_block, + const BlockWeight delta, + const BlockWeight max_weight + ) { + return _p_graph->move_block_weight(old_block, new_block, delta, max_weight); + } + + void reassign_cluster_weights( + const StaticArray & /* mapping */, const BlockID /* num_new_clusters */ + ) {} + + void init_cluster(const NodeID /* u */, const BlockID /* b */) {} + + void init_cluster_weight(const BlockID /* b */, const BlockWeight /* weight */) {} + + [[nodiscard]] BlockID cluster(const NodeID u) { + return _p_graph->block(u); + } + void move_node(const NodeID u, const BlockID block) { + _p_graph->set_block(u, block); + } + [[nodiscard]] BlockID num_clusters() { + return _p_graph->k(); + } + [[nodiscard]] BlockWeight max_cluster_weight(const BlockID block) { + return _p_ctx->block_weights.max(block); + } - _structs = impl.release(); - return found_improvement; - }; + bool accept_cluster(const Base::ClusterSelectionState &state) { + static_assert(std::is_signed_v); - SCOPED_TIMER("Label Propagation"); - const Graph &graph = p_graph.graph(); + const NodeWeight current_max_weight = max_cluster_weight(state.current_cluster); + const NodeWeight best_overload = + state.best_cluster_weight - max_cluster_weight(state.best_cluster); + const NodeWeight current_overload = state.current_cluster_weight - current_max_weight; + const NodeWeight initial_overload = + state.initial_cluster_weight - max_cluster_weight(state.initial_cluster); - if (auto *csr_graph = dynamic_cast(graph.underlying_graph()); - csr_graph != nullptr) { - return specific_refine(*_csr_impl); + return (state.current_gain > state.best_gain || + (state.current_gain == state.best_gain && + (current_overload < best_overload || + (current_overload == best_overload && state.local_rand.random_bool())))) && + (state.current_cluster_weight + state.u_weight < current_max_weight || + current_overload < initial_overload || state.current_cluster == state.initial_cluster); } - if (auto *compact_csr_graph = dynamic_cast(graph.underlying_graph()); - compact_csr_graph != nullptr) { - return specific_refine(*_compact_csr_impl); + const Graph *_graph = nullptr; + PartitionedGraph *_p_graph = nullptr; + + const PartitionContext *_p_ctx; + const RefinementContext &_r_ctx; +}; + +class LPRefinerImplWrapper { +public: + LPRefinerImplWrapper(const Context &ctx) + : _csr_impl(std::make_unique>(ctx, _permutations)), + _compact_csr_impl(std::make_unique>(ctx, _permutations)), + _compressed_impl(std::make_unique>(ctx, _permutations)) {} + + void initialize(const PartitionedGraph &p_graph) { + const Graph &graph = p_graph.graph(); + + if (auto *csr_graph = dynamic_cast(graph.underlying_graph()); + csr_graph != nullptr) { + _csr_impl->initialize(csr_graph); + return; + } + + if (auto *compact_csr_graph = dynamic_cast(graph.underlying_graph()); + compact_csr_graph != nullptr) { + _compact_csr_impl->initialize(compact_csr_graph); + return; + } + + if (auto *compressed_graph = dynamic_cast(graph.underlying_graph()); + compressed_graph != nullptr) { + _compressed_impl->initialize(compressed_graph); + return; + } + + __builtin_unreachable(); } - if (auto *compressed_graph = dynamic_cast(graph.underlying_graph()); - compressed_graph != nullptr) { - return specific_refine(*_compressed_impl); + bool refine(PartitionedGraph &p_graph, const PartitionContext &p_ctx) { + const auto specific_refine = [&](auto &impl) { + if (_freed) { + _freed = false; + impl.allocate(); + } else { + impl.setup(std::move(_structs)); + } + + const bool found_improvement = impl.refine(p_graph, p_ctx); + + _structs = impl.release(); + return found_improvement; + }; + + SCOPED_TIMER("Label Propagation"); + const Graph &graph = p_graph.graph(); + + if (auto *csr_graph = dynamic_cast(graph.underlying_graph()); + csr_graph != nullptr) { + return specific_refine(*_csr_impl); + } + + if (auto *compact_csr_graph = dynamic_cast(graph.underlying_graph()); + compact_csr_graph != nullptr) { + return specific_refine(*_compact_csr_impl); + } + + if (auto *compressed_graph = dynamic_cast(graph.underlying_graph()); + compressed_graph != nullptr) { + return specific_refine(*_compressed_impl); + } + + __builtin_unreachable(); } - __builtin_unreachable(); +private: + std::unique_ptr> _csr_impl; + std::unique_ptr> _compact_csr_impl; + std::unique_ptr> _compressed_impl; + + // The data structures which are used by the LP refiner and are shared between the + // different implementations. + bool _freed = true; + LPRefinerImpl::Permutations _permutations; + LPRefinerImpl::DataStructures _structs; +}; + +// +// Exposed wrapper +// + +LabelPropagationRefiner::LabelPropagationRefiner(const Context &ctx) + : _impl_wrapper(std::make_unique(ctx)) {} + +LabelPropagationRefiner::~LabelPropagationRefiner() = default; + +void LabelPropagationRefiner::initialize(const PartitionedGraph &p_graph) { + _impl_wrapper->initialize(p_graph); +} + +bool LabelPropagationRefiner::refine(PartitionedGraph &p_graph, const PartitionContext &p_ctx) { + return _impl_wrapper->refine(p_graph, p_ctx); } } // namespace kaminpar::shm diff --git a/kaminpar-shm/refinement/lp/lp_refiner.h b/kaminpar-shm/refinement/lp/lp_refiner.h index bbdde168..c9f653ac 100644 --- a/kaminpar-shm/refinement/lp/lp_refiner.h +++ b/kaminpar-shm/refinement/lp/lp_refiner.h @@ -7,144 +7,24 @@ ******************************************************************************/ #pragma once -#include "kaminpar-shm/context.h" +#include + +#include "kaminpar-shm/datastructures/graph.h" #include "kaminpar-shm/datastructures/partitioned_graph.h" -#include "kaminpar-shm/kaminpar.h" -#include "kaminpar-shm/label_propagation.h" #include "kaminpar-shm/refinement/refiner.h" -#include "kaminpar-common/timer.h" - namespace kaminpar::shm { -template -struct LabelPropagationRefinerConfig : public LabelPropagationConfig { - using ClusterID = BlockID; - using ClusterWeight = BlockWeight; - using RatingMap = ::kaminpar::RatingMap>; - static constexpr bool kUseHardWeightConstraint = true; - static constexpr bool kReportEmptyClusters = false; -}; - -template -class LabelPropagationRefinerImpl final : public ChunkRandomdLabelPropagation< - LabelPropagationRefinerImpl, - LabelPropagationRefinerConfig, - Graph> { - using Base = ChunkRandomdLabelPropagation< - LabelPropagationRefinerImpl, - LabelPropagationRefinerConfig, - Graph>; - friend Base; - - static constexpr std::size_t kInfiniteIterations = std::numeric_limits::max(); - -public: - LabelPropagationRefinerImpl(const Context &ctx) : _r_ctx{ctx.refinement} { - Base::preinitialize(ctx.partition.n, ctx.partition.k); - this->set_max_degree(_r_ctx.lp.large_degree_threshold); - this->set_max_num_neighbors(_r_ctx.lp.max_num_neighbors); - } - - void initialize(const Graph *graph) { - _graph = graph; - } - - void allocate() { - SCOPED_HEAP_PROFILER("Allocation"); - SCOPED_TIMER("Allocation"); - - Base::allocate(); - } - - bool refine(PartitionedGraph &p_graph, const PartitionContext &p_ctx) { - KASSERT(_graph == p_graph.graph().underlying_graph()); - KASSERT(p_graph.k() <= p_ctx.k); - _p_graph = &p_graph; - _p_ctx = &p_ctx; - Base::initialize(_graph, _p_ctx->k); - - const std::size_t max_iterations = - _r_ctx.lp.num_iterations == 0 ? kInfiniteIterations : _r_ctx.lp.num_iterations; - for (std::size_t iteration = 0; iteration < max_iterations; ++iteration) { - SCOPED_TIMER("Iteration", std::to_string(iteration)); - if (this->perform_iteration() == 0) { - return false; - } - } - - return true; - } - - using Base::expected_total_gain; - -public: - [[nodiscard]] BlockID initial_cluster(const NodeID u) { - return _p_graph->block(u); - } - - [[nodiscard]] BlockWeight initial_cluster_weight(const BlockID b) { - return _p_graph->block_weight(b); - } - - [[nodiscard]] BlockWeight cluster_weight(const BlockID b) { - return _p_graph->block_weight(b); - } - - bool move_cluster_weight( - const BlockID old_block, - const BlockID new_block, - const BlockWeight delta, - const BlockWeight max_weight - ) { - return _p_graph->move_block_weight(old_block, new_block, delta, max_weight); - } - - void init_cluster(const NodeID /* u */, const BlockID /* b */) {} - - void init_cluster_weight(const BlockID /* b */, const BlockWeight /* weight */) {} - - [[nodiscard]] BlockID cluster(const NodeID u) { - return _p_graph->block(u); - } - void move_node(const NodeID u, const BlockID block) { - _p_graph->set_block(u, block); - } - [[nodiscard]] BlockID num_clusters() { - return _p_graph->k(); - } - [[nodiscard]] BlockWeight max_cluster_weight(const BlockID block) { - return _p_ctx->block_weights.max(block); - } - - bool accept_cluster(const typename Base::ClusterSelectionState &state) { - static_assert(std::is_signed_v); - - const NodeWeight current_max_weight = max_cluster_weight(state.current_cluster); - const NodeWeight best_overload = - state.best_cluster_weight - max_cluster_weight(state.best_cluster); - const NodeWeight current_overload = state.current_cluster_weight - current_max_weight; - const NodeWeight initial_overload = - state.initial_cluster_weight - max_cluster_weight(state.initial_cluster); - - return (state.current_gain > state.best_gain || - (state.current_gain == state.best_gain && - (current_overload < best_overload || - (current_overload == best_overload && state.local_rand.random_bool())))) && - (state.current_cluster_weight + state.u_weight < current_max_weight || - current_overload < initial_overload || state.current_cluster == state.initial_cluster); - } - - const Graph *_graph{nullptr}; - PartitionedGraph *_p_graph{nullptr}; - const PartitionContext *_p_ctx; - const RefinementContext &_r_ctx; -}; - class LabelPropagationRefiner : public Refiner { public: LabelPropagationRefiner(const Context &ctx); + LabelPropagationRefiner(const LabelPropagationRefiner &) = delete; + LabelPropagationRefiner &operator=(const LabelPropagationRefiner &) = delete; + + LabelPropagationRefiner(LabelPropagationRefiner &&) noexcept = default; + LabelPropagationRefiner &operator=(LabelPropagationRefiner &&) noexcept = default; + ~LabelPropagationRefiner() override; void initialize(const PartitionedGraph &p_graph) override; @@ -152,13 +32,7 @@ class LabelPropagationRefiner : public Refiner { bool refine(PartitionedGraph &p_graph, const PartitionContext &p_ctx) override; private: - std::unique_ptr> _csr_impl; - std::unique_ptr> _compact_csr_impl; - std::unique_ptr> _compressed_impl; - - // The data structures which are used by the LP refiner and are shared between the - // different implementations. - bool _freed = true; - LabelPropagationRefinerImpl::DataStructures _structs; + std::unique_ptr _impl_wrapper; }; + } // namespace kaminpar::shm