merge with parallel

KaHIP · Dec 8, 2024 · 11abf74 · 11abf74
2 parents 1b65774 + db5d3e5
commit 11abf74
Show file tree

Hide file tree

Showing 18 changed files with 691 additions and 208 deletions.
diff --git a/kaminpar-cli/kaminpar_arguments.cc b/kaminpar-cli/kaminpar_arguments.cc
@@ -138,7 +138,8 @@ CLI::Option_group *create_coarsening_options(CLI::App *app, Context &ctx) {
   - effective-resistance, er: sample edges with relative effective-resistance above threshold
   - independent-random, ir: sample edges indepently with probabilites proportional to scores
   - random-with-replacement, rw/r: draw random edges WITH replacment and probailites proportinal to scores
-  - random-without-replacement, rw/or: draw random edges WITHOUT replacment and probailites proportinal to scores)")
+  - random-without-replacement, rw/or: draw random edges WITHOUT replacment and probailites proportinal to scores)"
+      )
       ->capture_default_str();
 
   coarsening->add_option("--s-score", ctx.sparsification.score_function)
@@ -164,6 +165,9 @@ CLI::Option_group *create_coarsening_options(CLI::App *app, Context &ctx) {
       ->check(CLI::PositiveNumber)
       ->description(R"(The factor c for the sparsification target, supplied with --s-target.)")
       ->default_val(1);
+  coarsening->add_flag("--s-no-approx", ctx.sparsification.no_approx)
+      ->description("Disables some approximations of sparsification algorithms.")
+      ->default_val(false);
 
   coarsening
       ->add_option(

diff --git a/kaminpar-shm/CMakeLists.txt b/kaminpar-shm/CMakeLists.txt
@@ -26,12 +26,15 @@ add_library(kaminpar_shm ${KAMINPAR_SHM_SOURCE_FILES}
         coarsening/sparsification/IndexDistributionWithoutReplacement.h
         coarsening/sparsification/IndexDistributionWithReplacement.h
         coarsening/sparsification/RandomWithoutReplacementSampler.h
-        coarsening/sparsification/WeightedForestFireScore.hpp
-        coarsening/sparsification/WeightedForestFireScore.cpp
+        coarsening/sparsification/NetworKitWeightedForestFireScore.cpp
+        coarsening/sparsification/NetworKitWeightedForestFireScore.hpp
         coarsening/sparsification/IndependentRandomSampler.h
         coarsening/sparsification/DistributionDecorator.h
         coarsening/sparsification/UnbiasedThesholdSampler.cpp
-        coarsening/sparsification/UnbiasedThesholdSampler.h)
+        coarsening/sparsification/UnbiasedThesholdSampler.h
+        coarsening/sparsification/WeightedForestFireScore.cpp
+        coarsening/sparsification/WeightedForestFireScore.h
+        coarsening/sparsification/IndependentRandomSampler.cpp)
 target_include_directories(kaminpar_shm PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../" ${JL_SHARE}/../../include/julia/)
 target_link_libraries(kaminpar_shm PUBLIC kaminpar_common networkit ${JL_SHARE}/../../lib/libjulia.so)
 target_compile_options(kaminpar_shm PRIVATE ${KAMINPAR_WARNING_FLAGS})

diff --git a/kaminpar-shm/coarsening/sparsification/IndependentRandomSampler.cpp b/kaminpar-shm/coarsening/sparsification/IndependentRandomSampler.cpp
@@ -0,0 +1,134 @@
+#include "IndependentRandomSampler.h"
+
+namespace kaminpar::shm::sparsification {
+template <typename Score>
+StaticArray<EdgeWeight>
+IndependentRandomSampler<Score>::sample(const CSRGraph &g, EdgeID target_edge_amount) {
+  auto scores = this->_score_function->scores(g);
+  double factor = normalizationFactor(g, scores, target_edge_amount);
+
+  StaticArray<EdgeWeight> sample(g.m(), 0);
+  utils::parallel_for_upward_edges(g, [&](EdgeID e) {
+    sample[e] = Random::instance().random_bool(factor * scores[e]) ? g.edge_weight(e) : 0;
+  });
+  return sample;
+}
+template <>
+double IndependentRandomSampler<EdgeWeight>::normalizationFactor(
+    const CSRGraph &g, const StaticArray<EdgeWeight> &scores, EdgeID target
+) {
+  if (_noApprox)
+    return exactNormalizationFactor(g, scores, target);
+  else
+    return approxNormalizationFactor(g, scores, target);
+}
+template <>
+double IndependentRandomSampler<EdgeID>::normalizationFactor(
+    const CSRGraph &g, const StaticArray<EdgeID> &scores, EdgeID target
+) {
+  if (_noApprox)
+    return exactNormalizationFactor(g, scores, target);
+  else
+    return approxNormalizationFactor(g, scores, target);
+}
+template <>
+double IndependentRandomSampler<double>::normalizationFactor(
+    const CSRGraph &g, const StaticArray<double> &scores, EdgeID target
+) {
+  return exactNormalizationFactor(g, scores, target);
+}
+
+template <typename Score>
+double IndependentRandomSampler<Score>::approxNormalizationFactor(
+    const CSRGraph &g, const StaticArray<Score> &scores, EdgeID target
+) {
+  // The i-th bucket contains scores in [2^i, 2^(i+1))
+  Score max_score = *std::max_element(scores.begin(), scores.end());
+  EdgeID number_of_buckets = exponential_bucket(max_score) + 1;
+  std::vector<tbb::concurrent_vector<Score>> expontial_buckets(number_of_buckets);
+  StaticArray<Score> buckets_score_prefixsum(number_of_buckets);
+  StaticArray<EdgeID> buckets_size_prefixsum(number_of_buckets);
+  tbb::parallel_for(static_cast<EdgeID>(0), g.m(), [&](EdgeID e) {
+    Score score = g.edge_weight(e);
+    auto bucket = exponential_bucket(score);
+    expontial_buckets[bucket].push_back(e);
+    __atomic_add_fetch(&buckets_score_prefixsum[bucket], score, __ATOMIC_RELAXED);
+  });
+  parallel::prefix_sum(
+      buckets_score_prefixsum.begin(),
+      buckets_score_prefixsum.end(),
+      buckets_score_prefixsum.begin()
+  );
+  for (EdgeID i = 0; i < number_of_buckets; i++) {
+    buckets_size_prefixsum[i] = expontial_buckets[i].size();
+  }
+  parallel::prefix_sum(
+      buckets_size_prefixsum.begin(), buckets_size_prefixsum.end(), buckets_size_prefixsum.begin()
+  );
+
+  auto max_edges_with_factor_in_bucket = [&](EdgeID bucket_index) {
+    // s = smallest possible score in bucket = 2^bucket_index
+    // #{e in Edges : s <= scores[e]} + 1/s scores{e in Edges : scores[e] < s}
+    if (bucket_index > 1)
+      return g.m() - buckets_size_prefixsum[bucket_index - 1] +
+             1.0 / (1 << bucket_index) * buckets_score_prefixsum[bucket_index - 1];
+    else
+      return static_cast<double>(g.m());
+  };
+  EdgeID bucket_index = number_of_buckets - 1;
+  while (target > max_edges_with_factor_in_bucket(bucket_index))
+    bucket_index -= 1;
+
+  double factor = (target - (g.m() - buckets_size_prefixsum[bucket_index])) /
+                  static_cast<double>(buckets_score_prefixsum[bucket_index]);
+  return factor;
+}
+
+template <>
+double IndependentRandomSampler<double>::approxNormalizationFactor(
+    const CSRGraph &g, const StaticArray<double> &scores, EdgeID target
+) {
+  throw std::logic_error(
+      "no implementation for of approxNormalizationFactor exists for Score=double."
+  );
+}
+
+template <typename Score>
+double IndependentRandomSampler<Score>::exactNormalizationFactor(
+    const CSRGraph &g, const StaticArray<Score> &scores, EdgeID target
+) {
+  StaticArray<Score> sorted_scores(g.m() / 2);
+  StaticArray<Score> prefix_sum(g.m() / 2);
+  EdgeID end_of_sorted_scores = 0;
+  utils::for_upward_edges(g, [&](EdgeID e) {
+    sorted_scores[end_of_sorted_scores++] = static_cast<Score>(scores[e]);
+  });
+  tbb::parallel_sort(sorted_scores.begin(), sorted_scores.end());
+  parallel::prefix_sum(sorted_scores.begin(), sorted_scores.end(), prefix_sum.begin());
+
+  auto expected_at_index = [&](EdgeID i) {
+    return g.m() / 2 - i - 1 + 1 / static_cast<double>(sorted_scores[i]) * prefix_sum[i];
+  };
+
+  auto possible_indices =
+      std::ranges::iota_view(static_cast<EdgeID>(0), g.m() / 2) | std::views::reverse;
+  EdgeID index = *std::upper_bound(
+      possible_indices.begin(),
+      possible_indices.end(),
+      target / 2,
+      [&](EdgeID t, NodeID i) { return t <= expected_at_index(i); }
+  );
+  KASSERT(
+      (index + 1 >= g.m() / 2 || expected_at_index(index + 1) <= target / 2) &&
+          target / 2 <= expected_at_index(index),
+      "binary search did not work: target/2=" << target / 2 << " is not in ["
+                                              << expected_at_index(index + 1) << ", "
+                                              << expected_at_index(index) << "]",
+      assert::always
+  );
+
+  double factor = static_cast<double>((target / 2 - (g.m() / 2 - index))) / prefix_sum[index - 1];
+
+  return factor;
+}
+}; // namespace kaminpar::shm::sparsification
diff --git a/kaminpar-shm/coarsening/sparsification/IndependentRandomSampler.h b/kaminpar-shm/coarsening/sparsification/IndependentRandomSampler.h
@@ -10,49 +10,29 @@
 namespace kaminpar::shm::sparsification {
 template <typename Score> class IndependentRandomSampler : public ScoreBacedSampler<Score> {
 public:
-  IndependentRandomSampler(std::unique_ptr<ScoreFunction<Score>> scoreFunction)
-      : ScoreBacedSampler<Score>(std::move(scoreFunction)) {}
-
-  StaticArray<EdgeWeight> sample(const CSRGraph &g, EdgeID target_edge_amount) override {
-    auto scores = this->_score_function->scores(g);
-    double factor = normalizationFactor(g, scores, target_edge_amount);
-
-    StaticArray<EdgeWeight> sample(g.m(), 0);
-    utils::for_upward_edges(g, [&](EdgeID e) {
-      sample[e] = Random::instance().random_bool(factor * scores[e]) ? g.edge_weight(e) : 0;
-    });
-    return sample;
+  IndependentRandomSampler(
+      std::unique_ptr<ScoreFunction<Score>> scoreFunction, bool noApprox = false
+  )
+      : ScoreBacedSampler<Score>(std::move(scoreFunction)),
+        _noApprox(noApprox) {}
+
+  double normalizationFactor(const CSRGraph &g, const StaticArray<Score> &scores, EdgeID target);
+  double
+  exactNormalizationFactor(const CSRGraph &g, const StaticArray<Score> &scores, EdgeID target);
+  double
+  approxNormalizationFactor(const CSRGraph &g, const StaticArray<Score> &scores, EdgeID target);
+
+  StaticArray<EdgeWeight> sample(const CSRGraph &g, EdgeID target_edge_amount) override;
+
+  static EdgeID exponential_bucket(EdgeWeight score) {
+    return 31 - __builtin_clz(score);
   }
 
-  double normalizationFactor(const CSRGraph &g, const StaticArray<Score> &scores, EdgeID target) {
-    StaticArray<Score> sorted_scores(g.m() / 2);
-    StaticArray<Score> prefix_sum(g.m() / 2);
-    EdgeID end_of_sorted_scores = 0;
-    utils::for_upward_edges(g, [&](EdgeID e) {
-      sorted_scores[end_of_sorted_scores++] = static_cast<Score>(scores[e]);
-    });
-    std::sort(sorted_scores.begin(), sorted_scores.end());
-    parallel::prefix_sum(sorted_scores.begin(), sorted_scores.end(), prefix_sum.begin());
-
-    auto expected_at_index = [&](EdgeID i) {
-      return g.m() / 2 - i - 1 + 1 / static_cast<double>(sorted_scores[i]) * prefix_sum[i];
-    };
-
-    auto possible_indices =
-        std::ranges::iota_view(static_cast<EdgeID>(0), g.m() / 2) | std::views::reverse;
-    EdgeID index = *std::upper_bound(
-        possible_indices.begin(),
-        possible_indices.end(),
-        target / 2,
-        [&](EdgeID t, NodeID i) {
-          return t <= expected_at_index(i); // negated to make asc
-        }
-    );
-
-    double factor = static_cast<double>((target / 2 - (g.m() / 2 - index))) / prefix_sum[index - 1];
-
-
-    return factor;
-  }
+private:
+  bool _noApprox;
 };
+
+template class IndependentRandomSampler<EdgeWeight>;
+template class IndependentRandomSampler<EdgeID>;
+template class IndependentRandomSampler<double>;
 }; // namespace kaminpar::shm::sparsification
diff --git a/kaminpar-shm/coarsening/sparsification/NetworKitScoreAdapter.cpp b/kaminpar-shm/coarsening/sparsification/NetworKitScoreAdapter.cpp
@@ -6,8 +6,8 @@
 
 #include <networkit/graph/Graph.hpp>
 #include <networkit/sparsification/ForestFireScore.hpp>
-#include "WeightedForestFireScore.hpp"
 
+#include "NetworKitWeightedForestFireScore.hpp"
 #include "networkit_utils.h"
 #include "sparsification_utils.h"
 
@@ -38,6 +38,6 @@ StaticArray<Score> NetworKitScoreAdapter<EdgeScore, Score>::scores(const CSRGrap
   return scores;
 }
 template class NetworKitScoreAdapter<NetworKit::ForestFireScore, double>;
-template class NetworKitScoreAdapter<WeightedForestFireScore, double>;
+template class NetworKitScoreAdapter<NetworKitWeightedForestFireScore, double>;
 
 } // namespace kaminpar::shm::sparsification
diff --git a/kaminpar-shm/coarsening/sparsification/NetworKitWeightedForestFireScore.cpp b/kaminpar-shm/coarsening/sparsification/NetworKitWeightedForestFireScore.cpp
@@ -0,0 +1,110 @@
+/*
+ * WeightedWeightedForestFireScore.cpp
+ *
+ *  Created on: 26.08.2014
+ *      Author: Gerd Lindner
+ */
+
+#include <limits>
+#include <queue>
+#include <set>
+
+#include <networkit/auxiliary/Log.hpp>
+#include <networkit/auxiliary/Parallel.hpp>
+#include <networkit/graph/GraphTools.hpp>
+
+#include "DistributionDecorator.h"
+#include "IndexDistributionWithoutReplacement.h"
+#include "NetworKitWeightedForestFireScore.hpp"
+
+namespace kaminpar::shm::sparsification {
+
+NetworKitWeightedForestFireScore::NetworKitWeightedForestFireScore(
+    const NetworKit::Graph &G, double pf, double targetBurntRatio
+)
+    : EdgeScore<double>(G),
+      pf(pf),
+      targetBurntRatio(targetBurntRatio) {}
+
+void NetworKitWeightedForestFireScore::run() {
+  if (G->hasEdgeIds() == false) {
+    throw std::runtime_error("edges have not been indexed - call indexEdges first");
+  }
+
+  std::vector<count> burnt(G->upperEdgeIdBound(), 0);
+  count edgesBurnt = 0;
+
+#pragma omp parallel
+  while (edgesBurnt < targetBurntRatio * G->numberOfEdges()) {
+    // Start a new fire
+    std::queue<node> activeNodes;
+    std::vector<bool> visited(G->upperNodeIdBound(), false);
+    activeNodes.push(GraphTools::randomNode(*G));
+
+    auto forwardNeighborDistribution = [&](node u) {
+      std::vector<std::pair<node, edgeid>> validEdges;
+      std::vector<edgeweight> weights;
+      for (count i = 0; i < G->degree(u); i++) {
+        auto [v, e] = G->getIthNeighborWithId(u, i);
+        if (visited[v])
+          continue;
+        weights.push_back(G->getIthNeighborWeight(u, i));
+        validEdges.emplace_back(v, e);
+      }
+      return DistributionDecorator<std::pair<node, edgeid>, IndexDistributionWithoutReplacement>(
+          weights.begin(), weights.end(), validEdges.begin(), validEdges.end()
+      );
+    };
+
+    count localEdgesBurnt = 0;
+
+    while (!activeNodes.empty()) {
+      node v = activeNodes.front();
+      activeNodes.pop();
+
+      auto validNeighborDistribution = forwardNeighborDistribution(v);
+
+      while (true) {
+        double q = Aux::Random::real(1.0);
+        if (q > pf || validNeighborDistribution.underlying_distribution().empty()) {
+          break;
+        }
+
+        { // mark node as visited, burn edge
+          auto [x, eid] = validNeighborDistribution();
+          activeNodes.push(x);
+#pragma omp atomic
+          burnt[eid]++;
+          localEdgesBurnt++;
+          visited[x] = true;
+        }
+      }
+    }
+
+#pragma omp atomic
+    edgesBurnt += localEdgesBurnt;
+  }
+
+  std::vector<double> burntNormalized(G->upperEdgeIdBound(), 0.0);
+  double maxv = (double)*Aux::Parallel::max_element(std::begin(burnt), std::end(burnt));
+
+  if (maxv > 0) {
+#pragma omp parallel for
+    for (omp_index i = 0; i < static_cast<omp_index>(burnt.size()); ++i) {
+      burntNormalized[i] = burnt[i] / maxv;
+    }
+  }
+
+  scoreData = std::move(burntNormalized);
+  hasRun = true;
+}
+
+double NetworKitWeightedForestFireScore::score(node, node) {
+  throw std::runtime_error("Not implemented: Use scores() instead.");
+}
+
+double NetworKitWeightedForestFireScore::score(edgeid) {
+  throw std::runtime_error("Not implemented: Use scores() instead.");
+}
+
+} // namespace kaminpar::shm::sparsification
diff --git a/...parsification/WeightedForestFireScore.hpp → ...tion/NetworKitWeightedForestFireScore.hpp b/...parsification/WeightedForestFireScore.hpp → ...tion/NetworKitWeightedForestFireScore.hpp
@@ -1,5 +1,5 @@
 /*
- * WeightedForestFireScore.hpp
+ * NetworKitWeightedForestFireScore.hpp
  *
  *  Created on: 26.08.2014
  *      Author: Gerd Lindner
@@ -16,11 +16,11 @@ using namespace NetworKit;
  * Based on the Forest Fire algorithm introduced by Leskovec et al.
  * The burn frequency of the edges is used as edge score.
  */
-class WeightedForestFireScore final : public EdgeScore<double> {
+class NetworKitWeightedForestFireScore final : public EdgeScore<double> {
 typedef std::tuple<node, edgeid, edgeweight> edgetriple;
 
 public:
-  WeightedForestFireScore(const NetworKit::Graph &graph, double pf, double targetBurntRatio);
+  NetworKitWeightedForestFireScore(const NetworKit::Graph &graph, double pf, double targetBurntRatio);
   double score(edgeid eid) override;
   double score(node u, node v) override;
   void run() override;