Skip to content

Commit

Permalink
merge with parallel
Browse files Browse the repository at this point in the history
  • Loading branch information
Dominik Rosch committed Dec 8, 2024
2 parents 1b65774 + db5d3e5 commit 11abf74
Show file tree
Hide file tree
Showing 18 changed files with 691 additions and 208 deletions.
6 changes: 5 additions & 1 deletion kaminpar-cli/kaminpar_arguments.cc
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,8 @@ CLI::Option_group *create_coarsening_options(CLI::App *app, Context &ctx) {
- effective-resistance, er: sample edges with relative effective-resistance above threshold
- independent-random, ir: sample edges indepently with probabilites proportional to scores
- random-with-replacement, rw/r: draw random edges WITH replacment and probailites proportinal to scores
- random-without-replacement, rw/or: draw random edges WITHOUT replacment and probailites proportinal to scores)")
- random-without-replacement, rw/or: draw random edges WITHOUT replacment and probailites proportinal to scores)"
)
->capture_default_str();

coarsening->add_option("--s-score", ctx.sparsification.score_function)
Expand All @@ -164,6 +165,9 @@ CLI::Option_group *create_coarsening_options(CLI::App *app, Context &ctx) {
->check(CLI::PositiveNumber)
->description(R"(The factor c for the sparsification target, supplied with --s-target.)")
->default_val(1);
coarsening->add_flag("--s-no-approx", ctx.sparsification.no_approx)
->description("Disables some approximations of sparsification algorithms.")
->default_val(false);

coarsening
->add_option(
Expand Down
9 changes: 6 additions & 3 deletions kaminpar-shm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,15 @@ add_library(kaminpar_shm ${KAMINPAR_SHM_SOURCE_FILES}
coarsening/sparsification/IndexDistributionWithoutReplacement.h
coarsening/sparsification/IndexDistributionWithReplacement.h
coarsening/sparsification/RandomWithoutReplacementSampler.h
coarsening/sparsification/WeightedForestFireScore.hpp
coarsening/sparsification/WeightedForestFireScore.cpp
coarsening/sparsification/NetworKitWeightedForestFireScore.cpp
coarsening/sparsification/NetworKitWeightedForestFireScore.hpp
coarsening/sparsification/IndependentRandomSampler.h
coarsening/sparsification/DistributionDecorator.h
coarsening/sparsification/UnbiasedThesholdSampler.cpp
coarsening/sparsification/UnbiasedThesholdSampler.h)
coarsening/sparsification/UnbiasedThesholdSampler.h
coarsening/sparsification/WeightedForestFireScore.cpp
coarsening/sparsification/WeightedForestFireScore.h
coarsening/sparsification/IndependentRandomSampler.cpp)
target_include_directories(kaminpar_shm PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../" ${JL_SHARE}/../../include/julia/)
target_link_libraries(kaminpar_shm PUBLIC kaminpar_common networkit ${JL_SHARE}/../../lib/libjulia.so)
target_compile_options(kaminpar_shm PRIVATE ${KAMINPAR_WARNING_FLAGS})
Expand Down
134 changes: 134 additions & 0 deletions kaminpar-shm/coarsening/sparsification/IndependentRandomSampler.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
#include "IndependentRandomSampler.h"

namespace kaminpar::shm::sparsification {
template <typename Score>
StaticArray<EdgeWeight>
IndependentRandomSampler<Score>::sample(const CSRGraph &g, EdgeID target_edge_amount) {
auto scores = this->_score_function->scores(g);
double factor = normalizationFactor(g, scores, target_edge_amount);

StaticArray<EdgeWeight> sample(g.m(), 0);
utils::parallel_for_upward_edges(g, [&](EdgeID e) {
sample[e] = Random::instance().random_bool(factor * scores[e]) ? g.edge_weight(e) : 0;
});
return sample;
}
template <>
double IndependentRandomSampler<EdgeWeight>::normalizationFactor(
const CSRGraph &g, const StaticArray<EdgeWeight> &scores, EdgeID target
) {
if (_noApprox)
return exactNormalizationFactor(g, scores, target);
else
return approxNormalizationFactor(g, scores, target);
}
template <>
double IndependentRandomSampler<EdgeID>::normalizationFactor(
const CSRGraph &g, const StaticArray<EdgeID> &scores, EdgeID target
) {
if (_noApprox)
return exactNormalizationFactor(g, scores, target);
else
return approxNormalizationFactor(g, scores, target);
}
template <>
double IndependentRandomSampler<double>::normalizationFactor(
const CSRGraph &g, const StaticArray<double> &scores, EdgeID target
) {
return exactNormalizationFactor(g, scores, target);
}

template <typename Score>
double IndependentRandomSampler<Score>::approxNormalizationFactor(
const CSRGraph &g, const StaticArray<Score> &scores, EdgeID target
) {
// The i-th bucket contains scores in [2^i, 2^(i+1))
Score max_score = *std::max_element(scores.begin(), scores.end());
EdgeID number_of_buckets = exponential_bucket(max_score) + 1;
std::vector<tbb::concurrent_vector<Score>> expontial_buckets(number_of_buckets);
StaticArray<Score> buckets_score_prefixsum(number_of_buckets);
StaticArray<EdgeID> buckets_size_prefixsum(number_of_buckets);
tbb::parallel_for(static_cast<EdgeID>(0), g.m(), [&](EdgeID e) {
Score score = g.edge_weight(e);
auto bucket = exponential_bucket(score);
expontial_buckets[bucket].push_back(e);
__atomic_add_fetch(&buckets_score_prefixsum[bucket], score, __ATOMIC_RELAXED);
});
parallel::prefix_sum(
buckets_score_prefixsum.begin(),
buckets_score_prefixsum.end(),
buckets_score_prefixsum.begin()
);
for (EdgeID i = 0; i < number_of_buckets; i++) {
buckets_size_prefixsum[i] = expontial_buckets[i].size();
}
parallel::prefix_sum(
buckets_size_prefixsum.begin(), buckets_size_prefixsum.end(), buckets_size_prefixsum.begin()
);

auto max_edges_with_factor_in_bucket = [&](EdgeID bucket_index) {
// s = smallest possible score in bucket = 2^bucket_index
// #{e in Edges : s <= scores[e]} + 1/s scores{e in Edges : scores[e] < s}
if (bucket_index > 1)
return g.m() - buckets_size_prefixsum[bucket_index - 1] +
1.0 / (1 << bucket_index) * buckets_score_prefixsum[bucket_index - 1];
else
return static_cast<double>(g.m());
};
EdgeID bucket_index = number_of_buckets - 1;
while (target > max_edges_with_factor_in_bucket(bucket_index))
bucket_index -= 1;

double factor = (target - (g.m() - buckets_size_prefixsum[bucket_index])) /
static_cast<double>(buckets_score_prefixsum[bucket_index]);
return factor;
}

template <>
double IndependentRandomSampler<double>::approxNormalizationFactor(
const CSRGraph &g, const StaticArray<double> &scores, EdgeID target
) {
throw std::logic_error(
"no implementation for of approxNormalizationFactor exists for Score=double."
);
}

template <typename Score>
double IndependentRandomSampler<Score>::exactNormalizationFactor(
const CSRGraph &g, const StaticArray<Score> &scores, EdgeID target
) {
StaticArray<Score> sorted_scores(g.m() / 2);
StaticArray<Score> prefix_sum(g.m() / 2);
EdgeID end_of_sorted_scores = 0;
utils::for_upward_edges(g, [&](EdgeID e) {
sorted_scores[end_of_sorted_scores++] = static_cast<Score>(scores[e]);
});
tbb::parallel_sort(sorted_scores.begin(), sorted_scores.end());
parallel::prefix_sum(sorted_scores.begin(), sorted_scores.end(), prefix_sum.begin());

auto expected_at_index = [&](EdgeID i) {
return g.m() / 2 - i - 1 + 1 / static_cast<double>(sorted_scores[i]) * prefix_sum[i];
};

auto possible_indices =
std::ranges::iota_view(static_cast<EdgeID>(0), g.m() / 2) | std::views::reverse;
EdgeID index = *std::upper_bound(
possible_indices.begin(),
possible_indices.end(),
target / 2,
[&](EdgeID t, NodeID i) { return t <= expected_at_index(i); }
);
KASSERT(
(index + 1 >= g.m() / 2 || expected_at_index(index + 1) <= target / 2) &&
target / 2 <= expected_at_index(index),
"binary search did not work: target/2=" << target / 2 << " is not in ["
<< expected_at_index(index + 1) << ", "
<< expected_at_index(index) << "]",
assert::always
);

double factor = static_cast<double>((target / 2 - (g.m() / 2 - index))) / prefix_sum[index - 1];

return factor;
}
}; // namespace kaminpar::shm::sparsification
64 changes: 22 additions & 42 deletions kaminpar-shm/coarsening/sparsification/IndependentRandomSampler.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,49 +10,29 @@
namespace kaminpar::shm::sparsification {
template <typename Score> class IndependentRandomSampler : public ScoreBacedSampler<Score> {
public:
IndependentRandomSampler(std::unique_ptr<ScoreFunction<Score>> scoreFunction)
: ScoreBacedSampler<Score>(std::move(scoreFunction)) {}

StaticArray<EdgeWeight> sample(const CSRGraph &g, EdgeID target_edge_amount) override {
auto scores = this->_score_function->scores(g);
double factor = normalizationFactor(g, scores, target_edge_amount);

StaticArray<EdgeWeight> sample(g.m(), 0);
utils::for_upward_edges(g, [&](EdgeID e) {
sample[e] = Random::instance().random_bool(factor * scores[e]) ? g.edge_weight(e) : 0;
});
return sample;
IndependentRandomSampler(
std::unique_ptr<ScoreFunction<Score>> scoreFunction, bool noApprox = false
)
: ScoreBacedSampler<Score>(std::move(scoreFunction)),
_noApprox(noApprox) {}

double normalizationFactor(const CSRGraph &g, const StaticArray<Score> &scores, EdgeID target);
double
exactNormalizationFactor(const CSRGraph &g, const StaticArray<Score> &scores, EdgeID target);
double
approxNormalizationFactor(const CSRGraph &g, const StaticArray<Score> &scores, EdgeID target);

StaticArray<EdgeWeight> sample(const CSRGraph &g, EdgeID target_edge_amount) override;

static EdgeID exponential_bucket(EdgeWeight score) {
return 31 - __builtin_clz(score);
}

double normalizationFactor(const CSRGraph &g, const StaticArray<Score> &scores, EdgeID target) {
StaticArray<Score> sorted_scores(g.m() / 2);
StaticArray<Score> prefix_sum(g.m() / 2);
EdgeID end_of_sorted_scores = 0;
utils::for_upward_edges(g, [&](EdgeID e) {
sorted_scores[end_of_sorted_scores++] = static_cast<Score>(scores[e]);
});
std::sort(sorted_scores.begin(), sorted_scores.end());
parallel::prefix_sum(sorted_scores.begin(), sorted_scores.end(), prefix_sum.begin());

auto expected_at_index = [&](EdgeID i) {
return g.m() / 2 - i - 1 + 1 / static_cast<double>(sorted_scores[i]) * prefix_sum[i];
};

auto possible_indices =
std::ranges::iota_view(static_cast<EdgeID>(0), g.m() / 2) | std::views::reverse;
EdgeID index = *std::upper_bound(
possible_indices.begin(),
possible_indices.end(),
target / 2,
[&](EdgeID t, NodeID i) {
return t <= expected_at_index(i); // negated to make asc
}
);

double factor = static_cast<double>((target / 2 - (g.m() / 2 - index))) / prefix_sum[index - 1];


return factor;
}
private:
bool _noApprox;
};

template class IndependentRandomSampler<EdgeWeight>;
template class IndependentRandomSampler<EdgeID>;
template class IndependentRandomSampler<double>;
}; // namespace kaminpar::shm::sparsification
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@

#include <networkit/graph/Graph.hpp>
#include <networkit/sparsification/ForestFireScore.hpp>
#include "WeightedForestFireScore.hpp"

#include "NetworKitWeightedForestFireScore.hpp"
#include "networkit_utils.h"
#include "sparsification_utils.h"

Expand Down Expand Up @@ -38,6 +38,6 @@ StaticArray<Score> NetworKitScoreAdapter<EdgeScore, Score>::scores(const CSRGrap
return scores;
}
template class NetworKitScoreAdapter<NetworKit::ForestFireScore, double>;
template class NetworKitScoreAdapter<WeightedForestFireScore, double>;
template class NetworKitScoreAdapter<NetworKitWeightedForestFireScore, double>;

} // namespace kaminpar::shm::sparsification
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
/*
* WeightedWeightedForestFireScore.cpp
*
* Created on: 26.08.2014
* Author: Gerd Lindner
*/

#include <limits>
#include <queue>
#include <set>

#include <networkit/auxiliary/Log.hpp>
#include <networkit/auxiliary/Parallel.hpp>
#include <networkit/graph/GraphTools.hpp>

#include "DistributionDecorator.h"
#include "IndexDistributionWithoutReplacement.h"
#include "NetworKitWeightedForestFireScore.hpp"

namespace kaminpar::shm::sparsification {

NetworKitWeightedForestFireScore::NetworKitWeightedForestFireScore(
const NetworKit::Graph &G, double pf, double targetBurntRatio
)
: EdgeScore<double>(G),
pf(pf),
targetBurntRatio(targetBurntRatio) {}

void NetworKitWeightedForestFireScore::run() {
if (G->hasEdgeIds() == false) {
throw std::runtime_error("edges have not been indexed - call indexEdges first");
}

std::vector<count> burnt(G->upperEdgeIdBound(), 0);
count edgesBurnt = 0;

#pragma omp parallel
while (edgesBurnt < targetBurntRatio * G->numberOfEdges()) {
// Start a new fire
std::queue<node> activeNodes;
std::vector<bool> visited(G->upperNodeIdBound(), false);
activeNodes.push(GraphTools::randomNode(*G));

auto forwardNeighborDistribution = [&](node u) {
std::vector<std::pair<node, edgeid>> validEdges;
std::vector<edgeweight> weights;
for (count i = 0; i < G->degree(u); i++) {
auto [v, e] = G->getIthNeighborWithId(u, i);
if (visited[v])
continue;
weights.push_back(G->getIthNeighborWeight(u, i));
validEdges.emplace_back(v, e);
}
return DistributionDecorator<std::pair<node, edgeid>, IndexDistributionWithoutReplacement>(
weights.begin(), weights.end(), validEdges.begin(), validEdges.end()
);
};

count localEdgesBurnt = 0;

while (!activeNodes.empty()) {
node v = activeNodes.front();
activeNodes.pop();

auto validNeighborDistribution = forwardNeighborDistribution(v);

while (true) {
double q = Aux::Random::real(1.0);
if (q > pf || validNeighborDistribution.underlying_distribution().empty()) {
break;
}

{ // mark node as visited, burn edge
auto [x, eid] = validNeighborDistribution();
activeNodes.push(x);
#pragma omp atomic
burnt[eid]++;
localEdgesBurnt++;
visited[x] = true;
}
}
}

#pragma omp atomic
edgesBurnt += localEdgesBurnt;
}

std::vector<double> burntNormalized(G->upperEdgeIdBound(), 0.0);
double maxv = (double)*Aux::Parallel::max_element(std::begin(burnt), std::end(burnt));

if (maxv > 0) {
#pragma omp parallel for
for (omp_index i = 0; i < static_cast<omp_index>(burnt.size()); ++i) {
burntNormalized[i] = burnt[i] / maxv;
}
}

scoreData = std::move(burntNormalized);
hasRun = true;
}

double NetworKitWeightedForestFireScore::score(node, node) {
throw std::runtime_error("Not implemented: Use scores() instead.");
}

double NetworKitWeightedForestFireScore::score(edgeid) {
throw std::runtime_error("Not implemented: Use scores() instead.");
}

} // namespace kaminpar::shm::sparsification
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* WeightedForestFireScore.hpp
* NetworKitWeightedForestFireScore.hpp
*
* Created on: 26.08.2014
* Author: Gerd Lindner
Expand All @@ -16,11 +16,11 @@ using namespace NetworKit;
* Based on the Forest Fire algorithm introduced by Leskovec et al.
* The burn frequency of the edges is used as edge score.
*/
class WeightedForestFireScore final : public EdgeScore<double> {
class NetworKitWeightedForestFireScore final : public EdgeScore<double> {
typedef std::tuple<node, edgeid, edgeweight> edgetriple;

public:
WeightedForestFireScore(const NetworKit::Graph &graph, double pf, double targetBurntRatio);
NetworKitWeightedForestFireScore(const NetworKit::Graph &graph, double pf, double targetBurntRatio);
double score(edgeid eid) override;
double score(node u, node v) override;
void run() override;
Expand Down
Loading

0 comments on commit 11abf74

Please sign in to comment.