Skip to content

Commit

Permalink
Implementation of C++ metrics parallelization (#734)
Browse files Browse the repository at this point in the history
  • Loading branch information
dbukki authored Jun 17, 2024
1 parent e9f01db commit e86e58b
Show file tree
Hide file tree
Showing 5 changed files with 317 additions and 149 deletions.
9 changes: 0 additions & 9 deletions plugins/cpp/model/include/model/cppfunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,6 @@ struct CppFunctionParamCountWithId

#pragma db column("count(" + Parameters::id + ")")
std::size_t count;

#pragma db column(File::path)
std::string filePath;
};

#pragma db view \
Expand All @@ -80,9 +77,6 @@ struct CppFunctionMcCabe

#pragma db column(CppFunction::mccabe)
unsigned int mccabe;

#pragma db column(File::path)
std::string filePath;
};

#pragma db view \
Expand All @@ -99,9 +93,6 @@ struct CppFunctionBumpyRoad

#pragma db column(CppFunction::statementCount)
unsigned int statementCount;

#pragma db column(File::path)
std::string filePath;
};

}
Expand Down
3 changes: 0 additions & 3 deletions plugins/cpp_metrics/model/include/model/cppcohesionmetrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,6 @@ struct CohesionCppRecordView

#pragma db column(CppEntity::astNodeId)
CppAstNodeId astNodeId;

#pragma db column(File::path)
std::string filePath;
};

#pragma db view \
Expand Down
149 changes: 147 additions & 2 deletions plugins/cpp_metrics/parser/include/cppmetricsparser/cppmetricsparser.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,43 @@
#include <model/cpprecord.h>
#include <model/cpprecord-odb.hxx>

#include <util/dbutil.h>
#include <util/parserutil.h>
#include <util/threadpool.h>
#include <util/odbtransaction.h>

namespace cc
{
namespace parser
{


template<typename TTask>
class MetricsTasks
{
public:
typedef typename std::vector<TTask>::const_iterator TTaskIter;

const TTaskIter& begin() const { return _begin; }
const TTaskIter& end() const { return _end; }
std::size_t size() const { return _size; }

MetricsTasks(
const TTaskIter& begin_,
const TTaskIter& end_,
std::size_t size_
) :
_begin(begin_),
_end(end_),
_size(size_)
{}

private:
TTaskIter _begin;
TTaskIter _end;
std::size_t _size;
};


class CppMetricsParser : public AbstractParser
{
public:
Expand All @@ -41,10 +70,126 @@ class CppMetricsParser : public AbstractParser
// and member functions for every type.
void lackOfCohesion();


/// @brief Constructs an ODB query that you can use to filter only
/// the database records of the given parameter type whose path
/// is rooted under any of this parser's input paths.
/// @tparam TQueryParam The type of database records to query.
/// This type must represent an ODB view that has access to
/// (i.e. is also joined with) the File table.
/// @return A query containing the disjunction of filters.
template<typename TQueryParam>
odb::query<TQueryParam> getFilterPathsQuery() const
{
return cc::util::getFilterPathsQuery<TQueryParam>(
_inputPaths.begin(), _inputPaths.end());
}

/// @brief Calculates a metric by querying all objects of the
/// specified parameter type and passing them one-by-one to the
/// specified worker function on parallel threads.
/// This call blocks the caller thread until all workers are finished.
/// @tparam TQueryParam The type of parameters to query.
/// @param name_ The name of the metric (for progress logging).
/// @param partitions_ The number of jobs to partition the query into.
/// @param query_ A filter query for retrieving only
/// the eligible parameters for which a worker should be spawned.
/// @param worker_ The logic of the worker thread.
template<typename TQueryParam>
void parallelCalcMetric(
const char* name_,
std::size_t partitions_,
const odb::query<TQueryParam>& query_,
const std::function<void(const MetricsTasks<TQueryParam>&)>& worker_)
{
typedef MetricsTasks<TQueryParam> TMetricsTasks;
typedef typename TMetricsTasks::TTaskIter TTaskIter;
typedef std::pair<std::size_t, TMetricsTasks> TJobParam;

// Define the thread pool and job wrapper function.
LOG(info) << name_ << " : Collecting jobs from database...";
std::unique_ptr<util::JobQueueThreadPool<TJobParam>> pool =
util::make_thread_pool<TJobParam>(_threadCount,
[&](const TJobParam& job)
{
LOG(info) << '(' << job.first << '/' << partitions_
<< ") " << name_;
worker_(job.second);
});

// Cache the results of the query that will be dispatched to workers.
std::vector<TQueryParam> tasks;
util::OdbTransaction {_ctx.db} ([&, this]
{
// Storing the result directly and then calling odb::result<>::cache()
// on it does not work: odb::result<>::size() will always throw
// odb::result_not_cached. As of writing, this is a limitation of SQLite.
// So we fall back to the old-fashioned way: std::vector<> in memory.
for (const TQueryParam& param : _ctx.db->query<TQueryParam>(query_))
tasks.emplace_back(param);
});

// Ensure that all workers receive at least one task.
std::size_t taskCount = tasks.size();
if (partitions_ > taskCount)
partitions_ = taskCount;

// Dispatch jobs to workers in discrete packets.
LOG(info) << name_ << " : Dispatching jobs on "
<< _threadCount << " thread(s)...";
std::size_t prev = 0;
TTaskIter it_prev = tasks.cbegin();

std::size_t i = 0;
while (i < partitions_)
{
std::size_t next = taskCount * ++i / partitions_;
std::size_t size = next - prev;
TTaskIter it_next = it_prev;
std::advance(it_next, size);

pool->enqueue(TJobParam(i, TMetricsTasks(it_prev, it_next, size)));

prev = next;
it_prev = it_next;
}

// Await the termination of all workers.
pool->wait();
LOG(info) << name_ << " : Calculation finished.";
}

/// @brief Calculates a metric by querying all objects of the
/// specified parameter type and passing them one-by-one to the
/// specified worker function on parallel threads.
/// This call blocks the caller thread until all workers are finished.
/// @tparam TQueryParam The type of parameters to query.
/// @param name_ The name of the metric (for progress logging).
/// @param partitions_ The number of jobs to partition the query into.
/// @param worker_ The logic of the worker thread.
template<typename TQueryParam>
void parallelCalcMetric(
const char* name_,
std::size_t partitions_,
const std::function<void(const MetricsTasks<TQueryParam>&)>& worker_)
{
parallelCalcMetric<TQueryParam>(
name_,
partitions_,
odb::query<TQueryParam>(),
worker_);
}


int _threadCount;
std::vector<std::string> _inputPaths;
std::unordered_set<model::FileId> _fileIdCache;
std::unordered_map<model::CppAstNodeId, model::FileId> _astNodeIdCache;
std::unique_ptr<util::JobQueueThreadPool<std::string>> _pool;

static const int functionParamsPartitionMultiplier = 5;
static const int functionMcCabePartitionMultiplier = 5;
static const int functionBumpyRoadPartitionMultiplier = 5;
static const int lackOfCohesionPartitionMultiplier = 25;
};

} // parser
Expand Down
Loading

0 comments on commit e86e58b

Please sign in to comment.