thesis version
This commit is contained in:
13
inc/alsk/executor/executor.h
Normal file
13
inc/alsk/executor/executor.h
Normal file
@ -0,0 +1,13 @@
|
||||
#ifndef ALSK_ALSK_EXECUTOR_EXECUTOR_H
|
||||
#define ALSK_ALSK_EXECUTOR_EXECUTOR_H
|
||||
|
||||
#include "impl/dynamicpool.h"
|
||||
#include "impl/firstlevel/equi.h"
|
||||
#include "impl/firstlevel/greedy.h"
|
||||
#include "impl/firstlevel/noopti.h"
|
||||
#include "impl/sequential.h"
|
||||
#include "impl/staticpool.h"
|
||||
#include "impl/staticpoolid.h"
|
||||
#include "impl/staticthread.h"
|
||||
|
||||
#endif
|
125
inc/alsk/executor/executorbase.h
Normal file
125
inc/alsk/executor/executorbase.h
Normal file
@ -0,0 +1,125 @@
|
||||
#ifndef ALSK_ALSK_EXECUTOR_EXECUTORBASE_H
|
||||
#define ALSK_ALSK_EXECUTOR_EXECUTORBASE_H
|
||||
|
||||
#include <algorithm>
|
||||
#include <thread>
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "tags.h"
|
||||
#include "../impl/tags.h"
|
||||
|
||||
namespace alsk {
|
||||
namespace exec {
|
||||
|
||||
struct ExecutorBase {
|
||||
using IsExecutor = tag::Executor;
|
||||
|
||||
public:
|
||||
struct Info {};
|
||||
|
||||
struct RCores {
|
||||
std::vector<std::size_t> coresList;
|
||||
|
||||
RCores() { upTo(std::thread::hardware_concurrency()); }
|
||||
|
||||
/**
|
||||
* @brief disables repeatability
|
||||
*/
|
||||
void disabled() noexcept { coresList.clear(); }
|
||||
|
||||
/**
|
||||
* @brief defines possibles cores from min to n by given step
|
||||
* @param n possibly included upper bound, if 0 or 1, disables repeatability
|
||||
* @param min lower bound (at least 2, at most n)
|
||||
* @param step step (at least 1)
|
||||
*/
|
||||
void upTo(std::size_t n, std::size_t min = 2, std::size_t step = 1) {
|
||||
coresList.clear();
|
||||
if(n < 2) return;
|
||||
std::size_t k = (n-min+step) / step;
|
||||
coresList.resize(k);
|
||||
std::generate_n(std::begin(coresList), n-1, [i=0, &min, &step]() mutable { return (min+step*i++); });
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief defines possibles cores from min to n, multiplying by given step
|
||||
* @param n possibly included upper bound, if 0 or 1, disables repeatability
|
||||
* @param min lower bound (at least 2, at most n)
|
||||
* @param step step (at least 2)
|
||||
*/
|
||||
void expUpTo(std::size_t n, std::size_t min = 2, std::size_t step = 2) {
|
||||
coresList.clear();
|
||||
if(n < 2) return;
|
||||
while(min <= n) {
|
||||
coresList.push_back(min);
|
||||
min *= step;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief defines possibles cores from min to n, multiplying by given step
|
||||
* @param args all cores to support
|
||||
*/
|
||||
template<typename... Args>
|
||||
void forValues(Args&&... args) {
|
||||
coresList = {std::forward<Args>(args)...};
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
/**
|
||||
* @brief set this variable to the number of allotted cores
|
||||
*/
|
||||
std::size_t cores;
|
||||
|
||||
/**
|
||||
* @brief this variable allows to configure repeatability
|
||||
*/
|
||||
RCores repeatability;
|
||||
|
||||
public:
|
||||
ExecutorBase(): cores{std::thread::hardware_concurrency()} {}
|
||||
|
||||
public:
|
||||
template<typename Impl>
|
||||
void config(Impl&) {}
|
||||
|
||||
template<typename Impl>
|
||||
std::size_t contextIdCount(Impl&, std::size_t id) { return id; }
|
||||
|
||||
template<typename Impl>
|
||||
std::size_t contextId(Impl&, std::size_t id) { return id; }
|
||||
|
||||
template<typename Task, typename Impl, typename BTask, typename Parameters, typename Results, typename... Args>
|
||||
decltype(auto) execute(Impl& impl, BTask& task, Parameters&& parameters, Results&& results, Args&&... args) {
|
||||
return _execute<Task>(impl, task, impl.executorInfo, std::forward<Parameters>(parameters), std::forward<Results>(results),
|
||||
std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
template<typename Task, typename Impl, typename BTask, typename Parameters>
|
||||
void executeSequential(Impl& impl, BTask& task, Parameters&& parameters, std::size_t n) {
|
||||
return _executeSequential<Task>(impl, task, impl.executorInfo, std::forward<Parameters>(parameters), n);
|
||||
}
|
||||
|
||||
protected:
|
||||
template<typename Task, typename Impl, typename BTask, typename Info, typename Parameters, typename Results, typename... Args>
|
||||
decltype(auto) _execute(Impl& impl, BTask& task, Info&& info, Parameters&& parameters, Results&& results, Args&&... args) {
|
||||
return Task::execute(
|
||||
impl, task, 0, std::forward<Info>(info), std::forward<Parameters>(parameters), std::forward<Results>(results),
|
||||
std::forward<Args>(args)...
|
||||
);
|
||||
}
|
||||
|
||||
template<typename Task, typename Impl, typename BTask, typename Info, typename Parameters>
|
||||
void _executeSequential(Impl& impl, BTask& task, Info const& info, Parameters const& parameters, std::size_t n) {
|
||||
for(std::size_t i = 0; i < n; ++i)
|
||||
Task::execute(impl, task, i, info, parameters, std::tuple<>{});
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
12
inc/alsk/executor/executorstate.h
Normal file
12
inc/alsk/executor/executorstate.h
Normal file
@ -0,0 +1,12 @@
|
||||
#ifndef ALSK_ALSK_EXECUTOR_EXECUTORSTATE_H
|
||||
#define ALSK_ALSK_EXECUTOR_EXECUTORSTATE_H
|
||||
|
||||
namespace alsk {
|
||||
namespace exec {
|
||||
|
||||
template<typename> struct ExecutorState;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
123
inc/alsk/executor/impl/dynamicpool.h
Normal file
123
inc/alsk/executor/impl/dynamicpool.h
Normal file
@ -0,0 +1,123 @@
|
||||
#ifndef ALSK_ALSK_EXECUTOR_IMPL_DYNAMICPOOL_H
|
||||
#define ALSK_ALSK_EXECUTOR_IMPL_DYNAMICPOOL_H
|
||||
|
||||
#include <algorithm>
|
||||
#include <future>
|
||||
#include <vector>
|
||||
|
||||
#include "../executorbase.h"
|
||||
#include "../executorstate.h"
|
||||
#include "../../skeleton/traits.h"
|
||||
|
||||
#include "../utility/pool.h"
|
||||
|
||||
namespace alsk {
|
||||
namespace exec {
|
||||
|
||||
template<typename S>
|
||||
struct DynamicPool: ExecutorBase {
|
||||
using Tag = alsk::tag::Parallel;
|
||||
|
||||
public:
|
||||
std::size_t maxTaskCount = 1'000;
|
||||
|
||||
public:
|
||||
template<typename Impl>
|
||||
void config(Impl& impl) {
|
||||
impl.state.executor.config(cores);
|
||||
}
|
||||
|
||||
template<typename Task, typename Impl, typename BTask, typename Parameters>
|
||||
void executeParallel(Impl& impl, BTask& task, Parameters const& parameters, std::size_t n) {
|
||||
std::size_t taskCount = std::min(maxTaskCount, n);
|
||||
|
||||
if(cores > 1 && taskCount > 1) {
|
||||
Info info;
|
||||
std::vector<std::future<void>> futures(taskCount);
|
||||
std::size_t const step = n/taskCount;
|
||||
std::size_t const remain = n - step*(taskCount-1);
|
||||
|
||||
typename Impl::State& state = impl.state;
|
||||
|
||||
auto run = [&](std::size_t b, std::size_t k) {
|
||||
for(std::size_t i = 0; i < k; ++i)
|
||||
Task::execute(impl, task, b+i, info, parameters, std::tuple<>{});
|
||||
};
|
||||
|
||||
for(std::size_t i = 0; i < taskCount-1; ++i)
|
||||
futures[i] = state.executor.run([&, b=i*step, k=step]{ run(b, k); });
|
||||
futures[taskCount-1] = state.executor.run([&, b=(taskCount-1)*step, k=remain]{ run(b, k); });
|
||||
|
||||
state.executor.wait(futures);
|
||||
} else {
|
||||
Info info;
|
||||
for(std::size_t i = 0; i < n; ++i)
|
||||
Task::execute(impl, task, i, info, parameters, std::tuple<>{});
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Value, typename Task, typename Select, typename Impl, typename BTask, typename BSelect, typename Parameters>
|
||||
Value executeParallelAccumulate(Impl& impl, BTask& task, BSelect& select, Parameters const& parameters, std::size_t n) {
|
||||
std::size_t taskCount = std::min(maxTaskCount, n);
|
||||
|
||||
Value best{};
|
||||
|
||||
if(cores > 1 && taskCount > 1) {
|
||||
Info info;
|
||||
std::vector<std::future<void>> futures(taskCount);
|
||||
std::size_t const step = n/taskCount;
|
||||
std::size_t const remainBase = n - step*taskCount;
|
||||
std::size_t remain = remainBase;
|
||||
|
||||
typename Impl::State& state = impl.state;
|
||||
|
||||
auto run = [&](Value& out, std::size_t b, std::size_t k) {
|
||||
Value best{};
|
||||
|
||||
if(k)
|
||||
best = Task::execute(impl, task, b+0, info, parameters, std::tuple<>{});
|
||||
for(std::size_t i = 1; i < k; ++i) {
|
||||
Value current = Task::execute(impl, task, b+i, info, parameters, std::tuple<>{});
|
||||
best = Select::execute(impl, select, b+i, info, parameters, std::tuple<>{}, std::move(current), std::move(best));
|
||||
}
|
||||
|
||||
out = std::move(best);
|
||||
};
|
||||
|
||||
std::size_t start{};
|
||||
std::vector<Value> bests(taskCount);
|
||||
for(std::size_t i = 0; i < taskCount-1; ++i) {
|
||||
std::size_t offset = !!remain;
|
||||
remain -= offset;
|
||||
futures[i] = state.executor.run([&, &best=bests[i], b=start, k=step+offset] { run(best, b, k); });
|
||||
start += step+offset;
|
||||
}
|
||||
|
||||
futures[taskCount-1] = state.executor.run([&, &best=bests[taskCount-1], b=start, k=step] { run(best, b, k); });
|
||||
|
||||
state.executor.wait(futures);
|
||||
|
||||
if(taskCount) best = std::move(bests[0]);
|
||||
for(std::size_t i = 1; i < taskCount; ++i)
|
||||
best = Select::execute(impl, select, i, info, parameters, std::tuple<>{}, std::move(bests[i]), std::move(best));
|
||||
} else {
|
||||
Info info;
|
||||
if(n)
|
||||
best = Task::execute(impl, task, 0, info, parameters, std::tuple<>{});
|
||||
for(std::size_t i = 1; i < n; ++i) {
|
||||
Value current = Task::execute(impl, task, i, info, parameters, std::tuple<>{});
|
||||
best = Select::execute(impl, select, i, info, parameters, std::tuple<>{}, std::move(current), std::move(best));
|
||||
}
|
||||
}
|
||||
|
||||
return best;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename S>
|
||||
struct ExecutorState<DynamicPool<S>>: util::Pool {};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
195
inc/alsk/executor/impl/firstlevel/equi.h
Normal file
195
inc/alsk/executor/impl/firstlevel/equi.h
Normal file
@ -0,0 +1,195 @@
|
||||
#ifndef ALSK_ALSK_EXECUTOR_IMPL_FIRSTLEVEL_EQUI_H
|
||||
#define ALSK_ALSK_EXECUTOR_IMPL_FIRSTLEVEL_EQUI_H
|
||||
|
||||
#include <cmath>
|
||||
#include <thread>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include "../../executorbase.h"
|
||||
#include "../../executorstate.h"
|
||||
#include "../../../skeleton/traits.h"
|
||||
|
||||
namespace alsk {
|
||||
namespace exec {
|
||||
|
||||
template<typename S>
|
||||
struct FirstLevelEqui: ExecutorBase {
|
||||
using Tag = alsk::tag::Parallel;
|
||||
|
||||
public:
|
||||
struct Info {
|
||||
unsigned int parDepth;
|
||||
|
||||
Info(unsigned int parDepth = 0) noexcept: parDepth{parDepth} {}
|
||||
|
||||
Info par() const noexcept { return {parDepth+1}; }
|
||||
Info seq() const noexcept { return {parDepth}; }
|
||||
};
|
||||
|
||||
private:
|
||||
template<typename Impl>
|
||||
void buildSplit(Impl& impl) {
|
||||
typename Impl::State& state = impl.state;
|
||||
auto& split = state.executor.split;
|
||||
|
||||
split.clear();
|
||||
|
||||
auto traverser = [](std::size_t, auto&& skl, auto&&... values) {
|
||||
using Skl = decltype(skl);
|
||||
using Traits = SkeletonTraitsT<std::decay_t<Skl>>;
|
||||
if(Traits::serial) return max(decltype(values)(values)...);
|
||||
return Traits::parallelizability(std::forward<Skl>(skl));
|
||||
};
|
||||
|
||||
auto firstLevelPar = SkeletonTraversal<S>::execute(impl.skeleton, traverser, 1ul);
|
||||
|
||||
split.insert(0);
|
||||
for(auto const& k: repeatability.coresList) {
|
||||
std::size_t start{};
|
||||
std::size_t const step = firstLevelPar/k;
|
||||
std::size_t remain = firstLevelPar - step*k;
|
||||
|
||||
for(unsigned int i = 0; i < k-1; ++i) {
|
||||
std::size_t offset = !!remain;
|
||||
remain -= offset;
|
||||
start += step+offset;
|
||||
split.insert(start * (state.executor.parTasksCount/firstLevelPar));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int threadLimit(Info const& info) const noexcept {
|
||||
return info.parDepth? 1 : cores;
|
||||
}
|
||||
|
||||
public:
|
||||
template<typename Impl>
|
||||
void config(Impl& impl) {
|
||||
typename Impl::State& state = impl.state;
|
||||
|
||||
state.executor.parTasksCount = impl.parallelTasksCount();;
|
||||
buildSplit(impl);
|
||||
}
|
||||
|
||||
template<typename Impl>
|
||||
std::size_t contextIdCount(Impl& impl, std::size_t) {
|
||||
typename Impl::State& state = impl.state;
|
||||
return state.executor.split.size();
|
||||
}
|
||||
|
||||
template<typename Impl>
|
||||
std::size_t contextId(Impl& impl, std::size_t id) { // O(log(n))
|
||||
typename Impl::State& state = impl.state;
|
||||
auto& split = state.executor.split;
|
||||
return std::distance(std::begin(split), split.upper_bound(id)) - 1;
|
||||
}
|
||||
|
||||
template<typename Task, typename Impl, typename BTask, typename Parameters>
|
||||
void executeParallel(Impl& impl, BTask& task, Parameters const& parameters, std::size_t n) {
|
||||
std::size_t const maxThreads = threadLimit(impl.executorInfo);
|
||||
std::size_t const nThreads = std::min(n, maxThreads);
|
||||
|
||||
if(nThreads > 1) {
|
||||
Info info = impl.executorInfo.par();
|
||||
std::vector<std::thread> threads(nThreads-1);
|
||||
std::size_t const step = n/nThreads;
|
||||
std::size_t const remainBase = n - step*nThreads;
|
||||
std::size_t remain = remainBase;
|
||||
|
||||
auto run = [&](std::size_t b, std::size_t k) {
|
||||
for(std::size_t i = 0; i < k; ++i)
|
||||
Task::execute(impl, task, b+i, info, parameters, std::tuple<>{});
|
||||
};
|
||||
|
||||
{
|
||||
std::size_t start{};
|
||||
for(std::size_t i = 0; i < nThreads-1; ++i) {
|
||||
std::size_t offset = !!remain;
|
||||
remain -= offset;
|
||||
threads[i] = std::thread{run, start, step+offset};
|
||||
start += step+offset;
|
||||
}
|
||||
|
||||
run(start, step);
|
||||
}
|
||||
|
||||
for(std::thread& thread: threads) thread.join();
|
||||
} else {
|
||||
Info info = impl.executorInfo.seq();
|
||||
for(std::size_t i = 0; i < n; ++i)
|
||||
Task::execute(impl, task, i, info, parameters, std::tuple<>{});
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Value, typename Task, typename Select, typename Impl, typename BTask, typename BSelect, typename Parameters>
|
||||
Value executeParallelAccumulate(Impl& impl, BTask& task, BSelect& select, Parameters const& parameters, std::size_t n) {
|
||||
std::size_t const maxThreads = threadLimit(impl.executorInfo); // TODO? fix neighbours
|
||||
|
||||
Value best{};
|
||||
|
||||
std::size_t const nThreads = std::min(n, maxThreads);
|
||||
if(nThreads > 1) {
|
||||
Info info = impl.executorInfo.par();
|
||||
std::vector<std::thread> threads(nThreads-1);
|
||||
std::size_t const step = n/nThreads;
|
||||
std::size_t const remainBase = n - step*nThreads;
|
||||
std::size_t remain = remainBase;
|
||||
|
||||
auto run = [&](Value& out, std::size_t b, std::size_t k) {
|
||||
Value best{};
|
||||
|
||||
if(k)
|
||||
best = Task::execute(impl, task, b+0, info, parameters, std::tuple<>{});
|
||||
for(std::size_t i = 1; i < k; ++i) {
|
||||
Value current = Task::execute(impl, task, b+i, info, parameters, std::tuple<>{});
|
||||
best = Select::execute(impl, select, b+i, info, parameters, std::tuple<>{}, std::move(current), std::move(best));
|
||||
}
|
||||
|
||||
out = std::move(best);
|
||||
};
|
||||
|
||||
std::size_t start{};
|
||||
std::vector<Value> bests(nThreads);
|
||||
|
||||
{
|
||||
std::size_t i;
|
||||
for(i = 0; i < nThreads-1; ++i) {
|
||||
std::size_t offset = !!remain;
|
||||
remain -= offset;
|
||||
threads[i] = std::thread{run, std::ref(bests[i]), start, step+offset};
|
||||
start += step+offset;
|
||||
}
|
||||
|
||||
run(bests[i], start, step);
|
||||
}
|
||||
|
||||
for(std::thread& thread: threads) thread.join();
|
||||
|
||||
if(nThreads) best = std::move(bests[0]);
|
||||
for(std::size_t i = 1; i < nThreads; ++i)
|
||||
best = Select::execute(impl, select, i, info, parameters, std::tuple<>{}, std::move(bests[i]), std::move(best));
|
||||
} else {
|
||||
Info info = impl.executorInfo.seq();
|
||||
if(n)
|
||||
best = Task::execute(impl, task, 0, info, parameters, std::tuple<>{});
|
||||
for(std::size_t i = 1; i < n; ++i) {
|
||||
Value current = Task::execute(impl, task, i, info, parameters, std::tuple<>{});
|
||||
best = Select::execute(impl, select, i, info, parameters, std::tuple<>{}, std::move(current), std::move(best));
|
||||
}
|
||||
}
|
||||
|
||||
return best;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename S>
|
||||
struct ExecutorState<FirstLevelEqui<S>> {
|
||||
std::size_t parTasksCount;
|
||||
std::set<std::size_t> split;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
168
inc/alsk/executor/impl/firstlevel/greedy.h
Normal file
168
inc/alsk/executor/impl/firstlevel/greedy.h
Normal file
@ -0,0 +1,168 @@
|
||||
#ifndef ALSK_ALSK_EXECUTOR_IMPL_FIRSTLEVEL_GREEDY_H
|
||||
#define ALSK_ALSK_EXECUTOR_IMPL_FIRSTLEVEL_GREEDY_H
|
||||
|
||||
#include <cmath>
|
||||
#include <thread>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include "../../executorbase.h"
|
||||
#include "../../executorstate.h"
|
||||
#include "../../../skeleton/traits.h"
|
||||
|
||||
namespace alsk {
|
||||
namespace exec {
|
||||
|
||||
template<typename S>
|
||||
struct FirstLevelGreedy: ExecutorBase {
|
||||
using Tag = alsk::tag::Parallel;
|
||||
|
||||
public:
|
||||
struct Info {
|
||||
unsigned int parDepth;
|
||||
};
|
||||
|
||||
private:
|
||||
template<typename Impl>
|
||||
void buildSplit(Impl& impl) {
|
||||
typename Impl::State& state = impl.state;
|
||||
auto& split = state.executor.split;
|
||||
|
||||
split.clear();
|
||||
|
||||
auto traverser = [](std::size_t, auto&& skl, auto&&... values) {
|
||||
using Skl = decltype(skl);
|
||||
using Traits = alsk::SkeletonTraitsT<Skl>;
|
||||
if(Traits::serial) return max(values...);
|
||||
return Traits::parallelizability(std::forward<Skl>(skl));
|
||||
};
|
||||
|
||||
auto firstLevelPar = SkeletonTraversal<S>::execute(impl.skeleton, traverser, 1ul);
|
||||
|
||||
split.insert(0);
|
||||
for(auto const& k: repeatability.coresList) {
|
||||
std::size_t start{};
|
||||
std::size_t const step = (firstLevelPar + k-1)/k;
|
||||
std::size_t const rk = (firstLevelPar + step-1)/step;
|
||||
|
||||
for(unsigned int i = 0; i < rk; ++i, start += step)
|
||||
split.insert(start * (state.executor.parTasksCount/firstLevelPar));
|
||||
}
|
||||
}
|
||||
|
||||
unsigned int threadLimit(unsigned int level) const { return level? 1 : cores; }
|
||||
|
||||
public:
|
||||
template<typename Impl>
|
||||
void config(Impl& impl) {
|
||||
typename Impl::State& state = impl.state;
|
||||
|
||||
state.executor.parTasksCount = impl.parallelTasksCount();;
|
||||
buildSplit(impl);
|
||||
}
|
||||
|
||||
template<typename Impl>
|
||||
std::size_t contextIdCount(Impl& impl, std::size_t) {
|
||||
typename Impl::State& state = impl.state;
|
||||
return state.executor.split.size();
|
||||
}
|
||||
|
||||
template<typename Impl>
|
||||
std::size_t contextId(Impl& impl, std::size_t id) { // O(log(n))
|
||||
typename Impl::State& state = impl.state;
|
||||
auto& split = state.executor.split;
|
||||
return std::distance(std::begin(split), split.upper_bound(id)) - 1;
|
||||
}
|
||||
|
||||
template<typename Task, typename Impl, typename BTask, typename Parameters>
|
||||
void executeParallel(Impl& impl, BTask& task, Parameters const& parameters, std::size_t n) {
|
||||
auto const& parDepth = impl.executorInfo.parDepth;
|
||||
std::size_t const maxThreads = threadLimit(parDepth);
|
||||
|
||||
std::size_t const nThreads = std::min(n, maxThreads);
|
||||
if(nThreads > 1) {
|
||||
Info info{parDepth+1};
|
||||
std::vector<std::thread> threads(nThreads-1);
|
||||
std::size_t const step = std::round(static_cast<double>(n)/nThreads);
|
||||
|
||||
auto run = [&](std::size_t b, std::size_t k) {
|
||||
for(std::size_t i = 0; i < k; ++i)
|
||||
Task::execute(impl, task, b+i, info, parameters, std::tuple<>{});
|
||||
};
|
||||
|
||||
for(std::size_t i = 0; i < nThreads-1; ++i)
|
||||
threads[i] = std::thread{run, i*step, step};
|
||||
|
||||
run((nThreads-1)*step, n-(nThreads-1)*step);
|
||||
|
||||
for(std::thread& thread: threads) thread.join();
|
||||
} else {
|
||||
Info info{parDepth};
|
||||
for(std::size_t i = 0; i < n; ++i)
|
||||
Task::execute(impl, task, i, info, parameters, std::tuple<>{});
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Value, typename Task, typename Select, typename Impl, typename BTask, typename BSelect, typename Parameters>
|
||||
Value executeParallelAccumulate(Impl& impl, BTask& task, BSelect& select, Parameters const& parameters, std::size_t n) {
|
||||
auto const& parDepth = impl.executorInfo.parDepth;
|
||||
std::size_t const maxThreads = threadLimit(parDepth); // TODO fix neighbours
|
||||
|
||||
Value best{};
|
||||
|
||||
std::size_t const nThreadsBase = std::min(n, maxThreads);
|
||||
if(nThreadsBase > 1) {
|
||||
Info info{parDepth+1};
|
||||
std::size_t const step = (n+nThreadsBase-1)/nThreadsBase;
|
||||
std::size_t const nThreads = (n+step-1)/step;
|
||||
std::vector<std::thread> threads(nThreads-1);
|
||||
|
||||
auto run = [&](Value& out, std::size_t b, std::size_t k) {
|
||||
Value best{};
|
||||
|
||||
if(k)
|
||||
best = Task::execute(impl, task, b+0, info, parameters, std::tuple<>{});
|
||||
for(std::size_t i = 1; i < k; ++i) {
|
||||
Value current = Task::execute(impl, task, b+i, info, parameters, std::tuple<>{});
|
||||
best = Select::execute(impl, select, b+i, info, parameters, std::tuple<>{}, std::move(current), std::move(best));
|
||||
}
|
||||
|
||||
out = std::move(best);
|
||||
};
|
||||
|
||||
std::size_t start{};
|
||||
std::vector<Value> bests(nThreads);
|
||||
for(std::size_t i = 0; i < nThreads-1; ++i, start += step)
|
||||
threads[i] = std::thread{run, std::ref(bests[i]), start, step};
|
||||
|
||||
run(bests[nThreads-1], start, n - step*(nThreads-1));
|
||||
|
||||
for(std::thread& thread: threads) thread.join();
|
||||
|
||||
if(nThreads) best = std::move(bests[0]);
|
||||
for(std::size_t i = 1; i < nThreads; ++i)
|
||||
best = Select::execute(impl, select, i, info, parameters, std::tuple<>{}, std::move(bests[i]), std::move(best));
|
||||
} else {
|
||||
Info info{parDepth};
|
||||
if(n)
|
||||
best = Task::execute(impl, task, 0, info, parameters, std::tuple<>{});
|
||||
for(std::size_t i = 1; i < n; ++i) {
|
||||
Value current = Task::execute(impl, task, i, info, parameters, std::tuple<>{});
|
||||
best = Select::execute(impl, select, i, info, parameters, std::tuple<>{}, std::move(current), std::move(best));
|
||||
}
|
||||
}
|
||||
|
||||
return best;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename S>
|
||||
struct ExecutorState<FirstLevelGreedy<S>> {
|
||||
std::size_t parTasksCount;
|
||||
std::set<std::size_t> split;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
129
inc/alsk/executor/impl/firstlevel/noopti.h
Normal file
129
inc/alsk/executor/impl/firstlevel/noopti.h
Normal file
@ -0,0 +1,129 @@
|
||||
#ifndef ALSK_ALSK_EXECUTOR_IMPL_FIRSTLEVEL_NOOPTI_H
|
||||
#define ALSK_ALSK_EXECUTOR_IMPL_FIRSTLEVEL_NOOPTI_H
|
||||
|
||||
#include <thread>
|
||||
#include <set>
|
||||
#include <cmath>
|
||||
#include <vector>
|
||||
|
||||
#include "../../executorbase.h"
|
||||
#include "../../executorstate.h"
|
||||
#include "../../../skeleton/traits.h"
|
||||
|
||||
namespace alsk {
|
||||
namespace exec {
|
||||
|
||||
template<typename S>
|
||||
struct FirstLevelNoOpti: ExecutorBase {
|
||||
using Tag = alsk::tag::Parallel;
|
||||
|
||||
public:
|
||||
struct Info {
|
||||
unsigned int parDepth;
|
||||
};
|
||||
|
||||
private:
|
||||
unsigned int threadLimit(unsigned int level) const { return level? 1 : cores; }
|
||||
|
||||
public:
|
||||
template<typename Impl>
|
||||
std::size_t contextIdCount(Impl&, std::size_t count) { return count; }
|
||||
|
||||
template<typename Impl>
|
||||
std::size_t contextId(Impl&, std::size_t id) { return id; }
|
||||
|
||||
template<typename Task, typename Impl, typename BTask, typename Parameters>
|
||||
void executeParallel(Impl& impl, BTask& task, Parameters const& parameters, std::size_t n) {
|
||||
auto const& parDepth = impl.executorInfo.parDepth;
|
||||
std::size_t const maxThreads = threadLimit(parDepth);
|
||||
|
||||
std::size_t const nThreads = std::min(n, maxThreads);
|
||||
if(nThreads > 1) {
|
||||
Info info{parDepth+1};
|
||||
std::vector<std::thread> threads(nThreads-1);
|
||||
std::size_t const step = std::round(static_cast<double>(n)/nThreads);
|
||||
|
||||
auto run = [&](std::size_t b, std::size_t k) {
|
||||
for(std::size_t i = 0; i < k; ++i)
|
||||
Task::execute(impl, task, b+i, info, parameters, std::tuple<>{});
|
||||
};
|
||||
|
||||
for(std::size_t i = 0; i < nThreads-1; ++i)
|
||||
threads[i] = std::thread{run, i*step, step};
|
||||
|
||||
run((nThreads-1)*step, n-(nThreads-1)*step);
|
||||
|
||||
for(std::thread& thread: threads) thread.join();
|
||||
|
||||
} else {
|
||||
Info info{parDepth};
|
||||
for(std::size_t i = 0; i < n; ++i)
|
||||
Task::execute(impl, task, i, info, parameters, std::tuple<>{});
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Value, typename Task, typename Select, typename Impl, typename BTask, typename BSelect, typename Parameters>
|
||||
Value executeParallelAccumulate(Impl& impl, BTask& task, BSelect& select, Parameters const& parameters, std::size_t n) {
|
||||
auto const& parDepth = impl.executorInfo.parDepth;
|
||||
std::size_t const maxThreads = threadLimit(parDepth); // TODO fix neighbours
|
||||
|
||||
Value best{};
|
||||
|
||||
std::size_t const nThreads = std::min(n, maxThreads);
|
||||
if(nThreads > 1) {
|
||||
Info info{parDepth+1};
|
||||
std::vector<std::thread> threads(nThreads-1);
|
||||
std::size_t const step = n/nThreads;
|
||||
std::size_t const remainBase = n - step*nThreads;
|
||||
std::size_t remain = remainBase;
|
||||
|
||||
auto run = [&](Value& out, std::size_t b, std::size_t k) {
|
||||
Value best{};
|
||||
|
||||
if(k)
|
||||
best = Task::execute(impl, task, b+0, info, parameters, std::tuple<>{});
|
||||
for(std::size_t i = 1; i < k; ++i) {
|
||||
Value current = Task::execute(impl, task, b+i, info, parameters, std::tuple<>{});
|
||||
best = Select::execute(impl, select, b+i, info, parameters, std::tuple<>{}, std::move(current), std::move(best));
|
||||
}
|
||||
|
||||
out = std::move(best);
|
||||
};
|
||||
|
||||
std::size_t start{};
|
||||
std::vector<Value> bests(nThreads);
|
||||
for(std::size_t i = 0; i < nThreads-1; ++i) {
|
||||
std::size_t offset = !!remain;
|
||||
remain -= offset;
|
||||
threads[i] = std::thread{run, std::ref(bests[i]), start, step+offset};
|
||||
start += step+offset;
|
||||
}
|
||||
|
||||
run(bests[nThreads-1], start, step);
|
||||
|
||||
for(std::thread& thread: threads) thread.join();
|
||||
|
||||
if(nThreads) best = std::move(bests[0]);
|
||||
for(std::size_t i = 1; i < nThreads; ++i)
|
||||
best = Select::execute(impl, select, i, info, parameters, std::tuple<>{}, std::move(bests[i]), std::move(best));
|
||||
} else {
|
||||
Info info{parDepth};
|
||||
if(n)
|
||||
best = Task::execute(impl, task, 0, info, parameters, std::tuple<>{});
|
||||
for(std::size_t i = 1; i < n; ++i) {
|
||||
Value current = Task::execute(impl, task, i, info, parameters, std::tuple<>{});
|
||||
best = Select::execute(impl, select, i, info, parameters, std::tuple<>{}, std::move(current), std::move(best));
|
||||
}
|
||||
}
|
||||
|
||||
return best;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename S>
|
||||
struct ExecutorState<FirstLevelNoOpti<S>> {};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
55
inc/alsk/executor/impl/sequential.h
Normal file
55
inc/alsk/executor/impl/sequential.h
Normal file
@ -0,0 +1,55 @@
|
||||
#ifndef ALSK_ALSK_EXECUTOR_IMPL_SEQUENTIAL_H
|
||||
#define ALSK_ALSK_EXECUTOR_IMPL_SEQUENTIAL_H
|
||||
|
||||
#include <set>
|
||||
#include <cmath>
|
||||
#include <vector>
|
||||
|
||||
#include "../executorbase.h"
|
||||
#include "../executorstate.h"
|
||||
#include "../../skeleton/traits.h"
|
||||
|
||||
namespace alsk {
|
||||
namespace exec {
|
||||
|
||||
template<typename S>
|
||||
struct Sequential: ExecutorBase {
|
||||
using Tag = alsk::tag::Sequential;
|
||||
|
||||
public:
|
||||
template<typename Impl>
|
||||
std::size_t contextIdCount(Impl&, std::size_t) { return 1; }
|
||||
|
||||
template<typename Impl>
|
||||
std::size_t contextId(Impl&, std::size_t) { return 0; }
|
||||
|
||||
template<typename Task, typename Impl, typename BTask, typename Parameters>
|
||||
void executeParallel(Impl& impl, BTask& task, Parameters const& parameters, std::size_t n) {
|
||||
Info info;
|
||||
for(std::size_t i = 0; i < n; ++i)
|
||||
Task::execute(impl, task, i, info, parameters, std::tuple<>{});
|
||||
}
|
||||
|
||||
template<typename Value, typename Task, typename Select, typename Impl, typename BTask, typename BSelect, typename Parameters>
|
||||
Value executeParallelAccumulate(Impl& impl, BTask& task, BSelect& select, Parameters const& parameters, std::size_t n) {
|
||||
Info info;
|
||||
Value best{};
|
||||
|
||||
if(n)
|
||||
best = Task::execute(impl, task, 0, info, parameters, std::tuple<>{});
|
||||
for(std::size_t i = 1; i < n; ++i) {
|
||||
Value current = Task::execute(impl, task, i, info, parameters, std::tuple<>{});
|
||||
best = Select::execute(impl, select, i, info, parameters, std::tuple<>{}, std::move(current), std::move(best));
|
||||
}
|
||||
|
||||
return best;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename S>
|
||||
struct ExecutorState<Sequential<S>> {};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
272
inc/alsk/executor/impl/staticpool.h
Normal file
272
inc/alsk/executor/impl/staticpool.h
Normal file
@ -0,0 +1,272 @@
|
||||
#ifndef ALSK_ALSK_EXECUTOR_IMPL_STATICPOOL_H
|
||||
#define ALSK_ALSK_EXECUTOR_IMPL_STATICPOOL_H
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <future>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include "../executorbase.h"
|
||||
#include "../executorstate.h"
|
||||
#include "../../skeleton/traits.h"
|
||||
|
||||
#include "../utility/staticpool.h"
|
||||
|
||||
namespace alsk {
|
||||
namespace exec {
|
||||
|
||||
template<typename S>
|
||||
struct StaticPool: ExecutorBase {
|
||||
using Tag = alsk::tag::Parallel;
|
||||
|
||||
public:
|
||||
struct Info {
|
||||
std::size_t cores;
|
||||
std::size_t offset;
|
||||
|
||||
Info(std::size_t cores = 0, std::size_t offset = 0):
|
||||
cores{cores}, offset{offset} {}
|
||||
};
|
||||
|
||||
private:
|
||||
auto buildSplitFor(S& s, std::size_t cores) {
|
||||
std::map<std::size_t, std::size_t> ms;
|
||||
|
||||
auto populateSplitImpl = [&ms, totalCores=cores](
|
||||
auto& buildSplitImpl, auto& s,
|
||||
std::size_t maxThreads, std::size_t thOffset,
|
||||
std::size_t n, std::size_t idStep, std::size_t id, bool isRemainder
|
||||
) {
|
||||
std::size_t const nThreads = std::min(n, maxThreads);
|
||||
if(nThreads > 0) {
|
||||
std::size_t const step = n/nThreads;
|
||||
std::size_t const remainBase = n - step*nThreads;
|
||||
std::size_t remain = remainBase;
|
||||
|
||||
std::size_t const coresA = maxThreads/nThreads;
|
||||
std::size_t const coresB = remainBase? maxThreads/remainBase : 1;
|
||||
|
||||
std::size_t start = 0;
|
||||
for(std::size_t i = 0; i < nThreads; ++i) {
|
||||
std::size_t thNum = thOffset + i*coresA;
|
||||
std::size_t offset = !!remain;
|
||||
remain -= offset;
|
||||
|
||||
if(!ms.count(id+start*idStep))
|
||||
ms[id+start*idStep] = thNum;
|
||||
|
||||
for(std::size_t j = 0; j < step; ++j)
|
||||
buildSplitImpl(s, coresA, thNum, id+(start+j)*idStep, false);
|
||||
if(offset)
|
||||
buildSplitImpl(s, coresB, thNum, id+(start+step)*idStep, true);
|
||||
|
||||
start += step+offset;
|
||||
}
|
||||
|
||||
if(isRemainder) ms[id+start*idStep] = totalCores;
|
||||
} else {
|
||||
for(std::size_t i = 0; i < n; ++i)
|
||||
buildSplitImpl(s, maxThreads, thOffset, id+i*idStep, false);
|
||||
}
|
||||
};
|
||||
|
||||
auto buildSplitImpl = makeRecursiveLambda(
|
||||
[&populateSplitImpl](
|
||||
auto buildSplitImpl,
|
||||
auto& s, auto maxThreads, auto thOffset,
|
||||
auto id, bool isRemainder
|
||||
) {
|
||||
auto idStep = skeletonStep(s);
|
||||
auto populateSplit = [&](auto& s, std::size_t n) {
|
||||
if(!idStep) return;
|
||||
populateSplitImpl(buildSplitImpl, s, maxThreads, thOffset, n, idStep, id, isRemainder);
|
||||
};
|
||||
|
||||
|
||||
skeletonTraversal(s, populateSplit);
|
||||
}
|
||||
);
|
||||
|
||||
buildSplitImpl(s, cores, 0ul, 0ul, false);
|
||||
|
||||
return ms;
|
||||
}
|
||||
|
||||
template<typename Impl>
|
||||
void buildSplit(Impl& impl) {
|
||||
typename Impl::State& state = impl.state;
|
||||
auto& split = state.executor.split;
|
||||
|
||||
split.clear();
|
||||
split.insert(0);
|
||||
|
||||
for(auto cores: repeatability.coresList) {
|
||||
std::size_t curThread = 0;
|
||||
for(auto p: buildSplitFor(impl.skeleton, cores)) { // TODO: C++17
|
||||
if(std::get<1>(p) != curThread) {
|
||||
curThread = std::get<1>(p);
|
||||
split.insert(std::get<0>(p));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::size_t threadLimit(Info const& info) const {
|
||||
auto const& lCores = info.cores;
|
||||
return lCores? lCores : cores;
|
||||
}
|
||||
|
||||
public:
|
||||
template<typename Impl>
|
||||
void config(Impl& impl) {
|
||||
typename Impl::State& state = impl.state;
|
||||
|
||||
impl.executorInfo.cores = cores;
|
||||
impl.executorInfo.offset = 0;
|
||||
state.executor.config(cores);
|
||||
|
||||
state.executor.parTasksCount = impl.parallelTasksCount();;
|
||||
buildSplit(impl);
|
||||
}
|
||||
|
||||
template<typename Impl>
|
||||
std::size_t contextIdCount(Impl& impl, std::size_t) {
|
||||
typename Impl::State& state = impl.state;
|
||||
return state.executor.split.size();
|
||||
}
|
||||
|
||||
template<typename Impl>
|
||||
std::size_t contextId(Impl& impl, std::size_t id) { // O(log(n))
|
||||
typename Impl::State& state = impl.state;
|
||||
auto& split = state.executor.split;
|
||||
return std::distance(std::begin(split), split.upper_bound(id)) - 1;
|
||||
}
|
||||
|
||||
template<typename Task, typename Impl, typename BTask, typename Parameters>
|
||||
void executeParallel(Impl& impl, BTask& task, Parameters const& parameters, std::size_t n) {
|
||||
std::size_t const maxThreads = threadLimit(impl.executorInfo);
|
||||
std::size_t const nThreads = std::min(n, maxThreads);
|
||||
|
||||
if(nThreads > 0) {
|
||||
std::vector<std::future<void>> futures(nThreads);
|
||||
std::size_t const step = n/nThreads;
|
||||
std::size_t const remainBase = n - step*nThreads;
|
||||
std::size_t remain = remainBase;
|
||||
|
||||
std::size_t const coresA = maxThreads/nThreads; // cores for sub tasks in main cases
|
||||
std::size_t const coresB = remainBase? maxThreads/remainBase : 1; // cores for remaining tasks
|
||||
|
||||
typename Impl::State& state = impl.state;
|
||||
|
||||
auto run = [&](std::size_t b, std::size_t k, bool offset, std::size_t thOffset) {
|
||||
Info infoA{coresA, thOffset}, infoB{coresB, thOffset};
|
||||
|
||||
std::size_t i;
|
||||
for(i = 0; i < k; ++i)
|
||||
Task::execute(impl, task, b+i, infoA, parameters, std::tuple<>{});
|
||||
if(offset)
|
||||
Task::execute(impl, task, b+i, infoB, parameters, std::tuple<>{});
|
||||
};
|
||||
|
||||
for(std::size_t i = 0, start = 0; i < nThreads; ++i) {
|
||||
std::size_t thNum = impl.executorInfo.offset + i*coresA;
|
||||
|
||||
std::size_t offset = !!remain;
|
||||
remain -= offset;
|
||||
|
||||
auto task = [&run, start, step, offset, thNum]{ run(start, step, offset, thNum); };
|
||||
futures[i] = state.executor.run(thNum, std::move(task));
|
||||
start += step+offset;
|
||||
}
|
||||
|
||||
state.executor.wait(futures);
|
||||
} else {
|
||||
Info info{impl.executorInfo};
|
||||
for(std::size_t i = 0; i < n; ++i)
|
||||
Task::execute(impl, task, i, info, parameters, std::tuple<>{});
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Value, typename Task, typename Select, typename Impl, typename BTask, typename BSelect, typename Parameters>
|
||||
Value executeParallelAccumulate(Impl& impl, BTask& task, BSelect& select, Parameters const& parameters, std::size_t n) {
|
||||
std::size_t const maxThreads = threadLimit(impl.executorInfo);
|
||||
|
||||
Value best{};
|
||||
|
||||
std::size_t const nThreads = std::min(n, maxThreads);
|
||||
if(nThreads > 0) {
|
||||
std::vector<std::future<void>> futures(nThreads);
|
||||
std::size_t const step = n/nThreads;
|
||||
std::size_t const remainBase = n - step*nThreads;
|
||||
std::size_t remain = remainBase;
|
||||
|
||||
std::size_t const coresA = maxThreads/nThreads; // cores for sub tasks in main cases
|
||||
std::size_t const coresB = remainBase? maxThreads/remainBase : 1; // cores for remaining tasks
|
||||
|
||||
typename Impl::State& state = impl.state;
|
||||
|
||||
auto run = [&](Value& out, std::size_t b, std::size_t k, bool offset, std::size_t thOffset) {
|
||||
Value best{};
|
||||
Info infoA{coresA, thOffset}, infoB{coresB, thOffset};
|
||||
|
||||
if(k) {
|
||||
best = Task::execute(impl, task, b+0, infoA, parameters, std::tuple<>{});
|
||||
|
||||
std::size_t i;
|
||||
for(i = 1; i < k; ++i) {
|
||||
Value current = Task::execute(impl, task, b+i, infoA, parameters, std::tuple<>{});
|
||||
best = Select::execute(impl, select, b+i, infoA, parameters, std::tuple<>{}, std::move(current), std::move(best));
|
||||
}
|
||||
|
||||
if(offset) {
|
||||
Value current = Task::execute(impl, task, b+i, infoB, parameters, std::tuple<>{});
|
||||
best = Select::execute(impl, select, b+i, infoB, parameters, std::tuple<>{}, std::move(current), std::move(best));
|
||||
}
|
||||
}
|
||||
|
||||
out = std::move(best);
|
||||
};
|
||||
|
||||
std::vector<Value> bests(nThreads);
|
||||
for(std::size_t i = 0, start = 0; i < nThreads; ++i) {
|
||||
std::size_t thNum = impl.executorInfo.offset + i*coresA;
|
||||
|
||||
std::size_t offset = !!remain;
|
||||
remain -= offset;
|
||||
|
||||
auto task = [&, &best=bests[i], start, step, offset, thNum]{ run(best, start, step, offset, thNum); };
|
||||
futures[i] = state.executor.run(thNum, std::move(task));
|
||||
start += step+offset;
|
||||
}
|
||||
|
||||
state.executor.wait(futures);
|
||||
|
||||
if(nThreads) best = std::move(bests[0]);
|
||||
for(std::size_t i = 1; i < nThreads; ++i)
|
||||
best = Select::execute(impl, select, i, impl.executorInfo, parameters, std::tuple<>{}, std::move(bests[i]), std::move(best));
|
||||
} else {
|
||||
Info info{impl.executorInfo};
|
||||
if(n)
|
||||
best = Task::execute(impl, task, 0, info, parameters, std::tuple<>{});
|
||||
for(std::size_t i = 1; i < n; ++i) {
|
||||
Value current = Task::execute(impl, task, i, info, parameters, std::tuple<>{});
|
||||
best = Select::execute(impl, select, i, info, parameters, std::tuple<>{}, std::move(current), std::move(best));
|
||||
}
|
||||
}
|
||||
|
||||
return best;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename S>
|
||||
struct ExecutorState<StaticPool<S>>: util::StaticPool {
|
||||
std::size_t parTasksCount;
|
||||
std::set<std::size_t> split;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
110
inc/alsk/executor/impl/staticpoolid.h
Normal file
110
inc/alsk/executor/impl/staticpoolid.h
Normal file
@ -0,0 +1,110 @@
|
||||
#ifndef ALSK_ALSK_EXECUTOR_IMPL_STATICPOOLID_H
|
||||
#define ALSK_ALSK_EXECUTOR_IMPL_STATICPOOLID_H
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <future>
|
||||
#include <list>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include "../executorbase.h"
|
||||
#include "../executorstate.h"
|
||||
#include "../../skeleton/traits.h"
|
||||
|
||||
#include "../utility/staticpool.h"
|
||||
|
||||
namespace alsk {
|
||||
namespace exec {
|
||||
|
||||
template<typename S>
|
||||
struct StaticPoolId: ExecutorBase {
|
||||
using Tag = alsk::tag::Parallel;
|
||||
|
||||
private:
|
||||
template<typename Impl>
|
||||
void buildSplit(Impl& impl) {
|
||||
typename Impl::State& state = impl.state;
|
||||
auto& split = state.executor.split;
|
||||
|
||||
split.clear();
|
||||
split.insert(0);
|
||||
|
||||
auto const n = static_cast<double>(state.executor.upperId);
|
||||
for(auto cores: repeatability.coresList)
|
||||
for(std::size_t i = 1; i < cores; ++i)
|
||||
split.insert(std::ceil(n/cores * i));
|
||||
}
|
||||
|
||||
public:
|
||||
template<typename Impl>
|
||||
void config(Impl& impl) {
|
||||
typename Impl::State& state = impl.state;
|
||||
state.executor.config(cores);
|
||||
state.executor.upperId = impl.parallelTasksCount();
|
||||
buildSplit(impl);
|
||||
}
|
||||
|
||||
template<typename Impl>
|
||||
std::size_t contextIdCount(Impl& impl, std::size_t) {
|
||||
typename Impl::State& state = impl.state;
|
||||
return state.executor.split.size();
|
||||
}
|
||||
|
||||
template<typename Impl>
|
||||
std::size_t contextId(Impl& impl, std::size_t id) { // O(log(n))
|
||||
typename Impl::State& state = impl.state;
|
||||
auto& split = state.executor.split;
|
||||
return std::distance(std::begin(split), split.upper_bound(id)) - 1;
|
||||
}
|
||||
|
||||
template<typename Task, typename Impl, typename BTask, typename Parameters>
|
||||
void executeParallel(Impl& impl, BTask& task, Parameters const& parameters, std::size_t n) {
|
||||
std::list<std::future<void>> futures;
|
||||
typename Impl::State& state = impl.state;
|
||||
|
||||
for(std::size_t i = 0; i < n; ++i) {
|
||||
std::size_t thNum = cores * (impl.id + impl.skeleton.step * i) / state.executor.upperId;
|
||||
|
||||
auto thTask = [&, i]{ Task::execute(impl, task, i, Info{}, parameters, std::tuple<>{}); };
|
||||
futures.emplace_back(state.executor.run(thNum, std::move(thTask)));
|
||||
}
|
||||
|
||||
state.executor.wait(futures);
|
||||
}
|
||||
|
||||
template<typename Value, typename Task, typename Select, typename Impl, typename BTask, typename BSelect, typename Parameters>
|
||||
Value executeParallelAccumulate(Impl& impl, BTask& task, BSelect& select, Parameters const& parameters, std::size_t n) {
|
||||
Value best{};
|
||||
std::vector<Value> bests(n);
|
||||
|
||||
std::list<std::future<void>> futures;
|
||||
typename Impl::State& state = impl.state;
|
||||
|
||||
for(std::size_t i = 0; i < n; ++i) {
|
||||
std::size_t thNum = cores * (impl.id + impl.skeleton.step * i) / state.executor.upperId;
|
||||
|
||||
auto thTask = [&, &best = bests[i], i]{ best = Task::execute(impl, task, i, Info{}, parameters, std::tuple<>{}); };
|
||||
futures.emplace_back(state.executor.run(thNum, std::move(thTask)));
|
||||
}
|
||||
|
||||
state.executor.wait(futures);
|
||||
|
||||
if(n) best = std::move(bests[0]);
|
||||
for(std::size_t i = 1; i < n; ++i)
|
||||
best = Select::execute(impl, select, i, impl.executorInfo, parameters, std::tuple<>{}, std::move(bests[i]), std::move(best));
|
||||
|
||||
return best;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename S>
|
||||
struct ExecutorState<StaticPoolId<S>>: util::StaticPool {
|
||||
std::size_t upperId;
|
||||
std::set<std::size_t> split;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
268
inc/alsk/executor/impl/staticthread.h
Normal file
268
inc/alsk/executor/impl/staticthread.h
Normal file
@ -0,0 +1,268 @@
|
||||
#ifndef ALSK_ALSK_EXECUTOR_IMPL_STATICTHREAD_H
|
||||
#define ALSK_ALSK_EXECUTOR_IMPL_STATICTHREAD_H
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#include "../executorbase.h"
|
||||
#include "../executorstate.h"
|
||||
#include "../../skeleton/traits.h"
|
||||
|
||||
namespace alsk {
|
||||
namespace exec {
|
||||
|
||||
template<typename S>
|
||||
struct StaticThread: ExecutorBase {
|
||||
using Tag = alsk::tag::Parallel;
|
||||
|
||||
public:
|
||||
struct Info {
|
||||
std::size_t cores;
|
||||
|
||||
Info(std::size_t cores = 0):
|
||||
cores{cores} {}
|
||||
};
|
||||
|
||||
private:
|
||||
auto buildSplitFor(S& s, std::size_t cores) {
|
||||
std::map<std::size_t, std::size_t> ms;
|
||||
|
||||
auto populateSplitImpl = [&ms, totalCores=cores](
|
||||
auto& buildSplitImpl, auto& s,
|
||||
std::size_t maxThreads, std::size_t thOffset,
|
||||
std::size_t n, std::size_t idStep, std::size_t id, bool isRemainder
|
||||
) {
|
||||
std::size_t const nThreads = std::min(n, maxThreads);
|
||||
if(nThreads > 0) {
|
||||
std::size_t const step = n/nThreads;
|
||||
std::size_t const remainBase = n - step*nThreads;
|
||||
std::size_t remain = remainBase;
|
||||
|
||||
std::size_t const coresA = maxThreads/nThreads;
|
||||
std::size_t const coresB = remainBase? maxThreads/remainBase : 1;
|
||||
|
||||
std::size_t start = 0;
|
||||
for(std::size_t i = 0; i < nThreads; ++i) {
|
||||
std::size_t thNum = thOffset + i*coresA;
|
||||
std::size_t offset = !!remain;
|
||||
remain -= offset;
|
||||
|
||||
if(!ms.count(id+start*idStep))
|
||||
ms[id+start*idStep] = thNum;
|
||||
|
||||
for(std::size_t j = 0; j < step; ++j)
|
||||
buildSplitImpl(s, coresA, thNum, id+(start+j)*idStep, false);
|
||||
if(offset)
|
||||
buildSplitImpl(s, coresB, thNum, id+(start+step)*idStep, true);
|
||||
|
||||
start += step+offset;
|
||||
}
|
||||
|
||||
if(isRemainder) ms[id+start*idStep] = totalCores;
|
||||
} else {
|
||||
for(std::size_t i = 0; i < n; ++i)
|
||||
buildSplitImpl(s, maxThreads, thOffset, id+i*idStep, false);
|
||||
}
|
||||
};
|
||||
|
||||
auto buildSplitImpl = makeRecursiveLambda(
|
||||
[&populateSplitImpl](
|
||||
auto buildSplitImpl,
|
||||
auto& s, auto maxThreads, auto thOffset,
|
||||
auto id, bool isRemainder
|
||||
) {
|
||||
auto idStep = skeletonStep(s);
|
||||
auto populateSplit = [&](auto& s, std::size_t n) {
|
||||
if(!idStep) return;
|
||||
populateSplitImpl(buildSplitImpl, s, maxThreads, thOffset, n, idStep, id, isRemainder);
|
||||
};
|
||||
|
||||
|
||||
skeletonTraversal(s, populateSplit);
|
||||
}
|
||||
);
|
||||
|
||||
buildSplitImpl(s, cores, 0ul, 0ul, false);
|
||||
|
||||
return ms;
|
||||
}
|
||||
|
||||
template<typename Impl>
|
||||
void buildSplit(Impl& impl) {
|
||||
typename Impl::State& state = impl.state;
|
||||
auto& split = state.executor.split;
|
||||
|
||||
split.clear();
|
||||
split.insert(0);
|
||||
|
||||
for(auto cores: repeatability.coresList) {
|
||||
std::size_t curThread = 0;
|
||||
for(auto p: buildSplitFor(impl.skeleton, cores)) { // TODO: C++17
|
||||
if(std::get<1>(p) != curThread) {
|
||||
curThread = std::get<1>(p);
|
||||
split.insert(std::get<0>(p));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::size_t threadLimit(Info const& info) const {
|
||||
auto const& lCores = info.cores;
|
||||
return lCores? lCores : cores;
|
||||
}
|
||||
|
||||
public:
|
||||
template<typename Impl>
|
||||
void config(Impl& impl) {
|
||||
typename Impl::State& state = impl.state;
|
||||
impl.executorInfo.cores = cores;
|
||||
|
||||
state.executor.parTasksCount = impl.parallelTasksCount();;
|
||||
buildSplit(impl);
|
||||
}
|
||||
|
||||
template<typename Impl>
|
||||
std::size_t contextIdCount(Impl& impl, std::size_t) {
|
||||
typename Impl::State& state = impl.state;
|
||||
return state.executor.split.size();
|
||||
}
|
||||
|
||||
template<typename Impl>
|
||||
std::size_t contextId(Impl& impl, std::size_t id) { // O(log(n))
|
||||
typename Impl::State& state = impl.state;
|
||||
auto& split = state.executor.split;
|
||||
return std::distance(std::begin(split), split.upper_bound(id)) - 1;
|
||||
}
|
||||
|
||||
template<typename Task, typename Impl, typename BTask, typename Parameters>
|
||||
void executeParallel(Impl& impl, BTask& task, Parameters const& parameters, std::size_t n) {
|
||||
std::size_t const maxThreads = threadLimit(impl.executorInfo);
|
||||
std::size_t const nThreads = std::min(n, maxThreads);
|
||||
|
||||
if(nThreads > 1) {
|
||||
std::vector<std::thread> threads(nThreads-1);
|
||||
std::size_t const step = n/nThreads;
|
||||
std::size_t const remainBase = n - step*nThreads;
|
||||
std::size_t remain = remainBase;
|
||||
|
||||
std::size_t const coresA = maxThreads/nThreads; // cores for sub tasks in main cases
|
||||
std::size_t const coresB = remainBase? maxThreads/remainBase : 1; // cores for remaining tasks
|
||||
|
||||
auto run = [&](std::size_t b, std::size_t k, bool offset = false) {
|
||||
Info infoA{coresA}, infoB{coresB};
|
||||
|
||||
std::size_t i;
|
||||
for(i = 0; i < k; ++i)
|
||||
Task::execute(impl, task, b+i, infoA, parameters, std::tuple<>{});
|
||||
if(offset)
|
||||
Task::execute(impl, task, b+i, infoB, parameters, std::tuple<>{});
|
||||
};
|
||||
|
||||
{
|
||||
std::size_t start = 0;
|
||||
for(std::size_t i = 0; i < nThreads-1; ++i) {
|
||||
std::size_t offset = !!remain;
|
||||
remain -= offset;
|
||||
|
||||
auto task = [&run, start, step, offset]{ run(start, step, offset); };
|
||||
threads[i] = std::thread{std::move(task)};
|
||||
start += step+offset;
|
||||
}
|
||||
|
||||
run(start, step);
|
||||
}
|
||||
|
||||
for(std::thread& thread: threads) thread.join();
|
||||
} else {
|
||||
Info info{impl.executorInfo};
|
||||
for(std::size_t i = 0; i < n; ++i)
|
||||
Task::execute(impl, task, i, info, parameters, std::tuple<>{});
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Value, typename Task, typename Select, typename Impl, typename BTask, typename BSelect, typename Parameters>
|
||||
Value executeParallelAccumulate(Impl& impl, BTask& task, BSelect& select, Parameters const& parameters, std::size_t n) {
|
||||
std::size_t const maxThreads = threadLimit(impl.executorInfo);
|
||||
|
||||
Value best{};
|
||||
|
||||
std::size_t const nThreads = std::min(n, maxThreads);
|
||||
if(nThreads > 1) {
|
||||
std::vector<std::thread> threads(nThreads-1);
|
||||
std::size_t const step = n/nThreads;
|
||||
std::size_t const remainBase = n - step*nThreads;
|
||||
std::size_t remain = remainBase;
|
||||
|
||||
std::size_t const coresA = maxThreads/nThreads; // cores for sub tasks in main cases
|
||||
std::size_t const coresB = remainBase? maxThreads/remainBase : 1; // cores for remaining tasks
|
||||
|
||||
auto run = [&](Value& out, std::size_t b, std::size_t k, bool offset = false) {
|
||||
Value best{};
|
||||
Info infoA{coresA}, infoB{coresB};
|
||||
|
||||
if(k) {
|
||||
best = Task::execute(impl, task, b+0, infoA, parameters, std::tuple<>{});
|
||||
|
||||
std::size_t i;
|
||||
for(i = 1; i < k; ++i) {
|
||||
Value current = Task::execute(impl, task, b+i, infoA, parameters, std::tuple<>{});
|
||||
best = Select::execute(impl, select, b+i, infoA, parameters, std::tuple<>{}, std::move(current), std::move(best));
|
||||
}
|
||||
|
||||
if(offset) {
|
||||
Value current = Task::execute(impl, task, b+i, infoB, parameters, std::tuple<>{});
|
||||
best = Select::execute(impl, select, b+i, infoB, parameters, std::tuple<>{}, std::move(current), std::move(best));
|
||||
}
|
||||
}
|
||||
|
||||
out = std::move(best);
|
||||
};
|
||||
|
||||
std::vector<Value> bests(nThreads);
|
||||
{
|
||||
std::size_t start = 0;
|
||||
for(std::size_t i = 0; i < nThreads-1; ++i) {
|
||||
std::size_t offset = !!remain;
|
||||
remain -= offset;
|
||||
|
||||
auto task = [&, &best=bests[i], start, step, offset]{ run(best, start, step, offset); };
|
||||
threads[i] = std::thread{std::move(task)};
|
||||
start += step+offset;
|
||||
}
|
||||
|
||||
run(bests[nThreads-1], start, step);
|
||||
}
|
||||
|
||||
for(std::thread& thread: threads) thread.join();
|
||||
|
||||
if(nThreads) best = std::move(bests[0]);
|
||||
for(std::size_t i = 1; i < nThreads; ++i)
|
||||
best = Select::execute(impl, select, i, impl.executorInfo, parameters, std::tuple<>{}, std::move(bests[i]), std::move(best));
|
||||
} else {
|
||||
Info info{impl.executorInfo};
|
||||
if(n)
|
||||
best = Task::execute(impl, task, 0, info, parameters, std::tuple<>{});
|
||||
for(std::size_t i = 1; i < n; ++i) {
|
||||
Value current = Task::execute(impl, task, i, info, parameters, std::tuple<>{});
|
||||
best = Select::execute(impl, select, i, info, parameters, std::tuple<>{}, std::move(current), std::move(best));
|
||||
}
|
||||
}
|
||||
|
||||
return best;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename S>
|
||||
struct ExecutorState<StaticThread<S>> {
|
||||
std::size_t parTasksCount;
|
||||
std::set<std::size_t> split;
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
14
inc/alsk/executor/tags.h
Normal file
14
inc/alsk/executor/tags.h
Normal file
@ -0,0 +1,14 @@
|
||||
#ifndef ALSK_ALSK_EXECUTOR_TAGS_H
|
||||
#define ALSK_ALSK_EXECUTOR_TAGS_H
|
||||
|
||||
namespace alsk {
|
||||
namespace exec {
|
||||
namespace tag {
|
||||
|
||||
struct Executor {};
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
20
inc/alsk/executor/traits.h
Normal file
20
inc/alsk/executor/traits.h
Normal file
@ -0,0 +1,20 @@
|
||||
#ifndef ALSK_ALSK_EXECUTOR_TRAITS_H
|
||||
#define ALSK_ALSK_EXECUTOR_TRAITS_H
|
||||
|
||||
#include <type_traits>
|
||||
|
||||
#include "executorbase.h"
|
||||
|
||||
namespace alsk {
|
||||
|
||||
template<typename, typename=void> struct IsExecutorImpl: std::false_type {};
|
||||
|
||||
template<typename T>
|
||||
struct IsExecutorImpl<T, std::enable_if_t<std::is_same<typename std::decay_t<T>::IsExecutor, exec::tag::Executor>{}>>: std::true_type {};
|
||||
|
||||
template<typename T>
|
||||
constexpr bool isExecutor = IsExecutorImpl<T>::value;
|
||||
|
||||
}
|
||||
|
||||
#endif
|
154
inc/alsk/executor/utility/pool.h
Normal file
154
inc/alsk/executor/utility/pool.h
Normal file
@ -0,0 +1,154 @@
|
||||
#ifndef ALSK_ALSK_EXECUTOR_UTILITY_POOL_H
|
||||
#define ALSK_ALSK_EXECUTOR_UTILITY_POOL_H
|
||||
|
||||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
#include <functional>
|
||||
#include <future>
|
||||
#include <list>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
#include <tmp/traits.h>
|
||||
|
||||
namespace alsk {
|
||||
namespace exec {
|
||||
namespace util {
|
||||
|
||||
struct Pool {
|
||||
using Task = std::function<void()>;
|
||||
using TaskInfo = std::tuple<Task, std::promise<void>>;
|
||||
|
||||
private:
|
||||
std::atomic_bool _running;
|
||||
|
||||
std::vector<std::thread> _threads;
|
||||
std::list<TaskInfo> _tasks;
|
||||
std::mutex _mutexTasks;
|
||||
|
||||
std::condition_variable _cvTasks;
|
||||
|
||||
std::mutex _mutexProcessed;
|
||||
std::condition_variable _cvProcessed;
|
||||
|
||||
public:
|
||||
Pool(): _running{true} {}
|
||||
|
||||
Pool(Pool const& o): _running{true} {
|
||||
config(o._threads.size());
|
||||
}
|
||||
|
||||
Pool(Pool&& o): _running{true} {
|
||||
config(o._threads.size());
|
||||
}
|
||||
|
||||
~Pool() {
|
||||
terminate();
|
||||
}
|
||||
|
||||
Pool const& operator=(Pool const& o) {
|
||||
if(this == &o) return *this;
|
||||
config(o._threads.size());
|
||||
return *this;
|
||||
}
|
||||
|
||||
Pool const& operator=(Pool&& o) {
|
||||
if(this == &o) return *this;
|
||||
config(o._threads.size());
|
||||
return *this;
|
||||
}
|
||||
|
||||
void config(unsigned int cores) {
|
||||
terminate();
|
||||
_running = true;
|
||||
|
||||
if(cores == 0) return;
|
||||
--cores; // main thread will work too
|
||||
|
||||
_threads.reserve(cores);
|
||||
while(cores--)
|
||||
_threads.emplace_back([&]{ worker(); });
|
||||
}
|
||||
|
||||
template<typename F, typename R = tmp::invoke_result_t<F>, std::enable_if_t<std::is_same<R, void>{}>* = nullptr>
|
||||
std::future<void> run(F&& task) {
|
||||
std::future<void> future;
|
||||
{
|
||||
std::lock_guard<std::mutex> lg{_mutexTasks};
|
||||
_tasks.emplace_back(std::forward<F>(task), std::promise<void>{});
|
||||
future = std::get<1>(_tasks.back()).get_future();
|
||||
}
|
||||
_cvTasks.notify_one();
|
||||
|
||||
return future;
|
||||
}
|
||||
|
||||
template<typename F, typename R = tmp::invoke_result_t<F>, std::enable_if_t<not std::is_same<R, void>{}>* = nullptr>
|
||||
std::future<R> run(F&& task, std::promise<R>& promise) {
|
||||
std::future<R> future = promise.get_future();
|
||||
run([task=std::forward<F>(task), &promise]{ promise.set_value(task()); });
|
||||
return future;
|
||||
}
|
||||
|
||||
template<typename Futures>
|
||||
void wait(Futures& futures) {
|
||||
while(tryProcessOne());
|
||||
for(auto& future: futures) future.wait();
|
||||
}
|
||||
|
||||
protected:
|
||||
void terminate() {
|
||||
{
|
||||
std::lock_guard<std::mutex> lk{_mutexTasks};
|
||||
_running = false;
|
||||
}
|
||||
_cvTasks.notify_all();
|
||||
for(auto& thread: _threads) thread.join();
|
||||
_threads.clear();
|
||||
}
|
||||
|
||||
void worker() {
|
||||
auto test = [&]{ return !_running || _tasks.size(); };
|
||||
|
||||
for(;;) {
|
||||
TaskInfo taskInfo;
|
||||
{
|
||||
std::unique_lock<std::mutex> lk{_mutexTasks};
|
||||
|
||||
if(!test()) _cvTasks.wait(lk, test);
|
||||
if(!_running) return;
|
||||
|
||||
taskInfo = std::move(_tasks.front());
|
||||
_tasks.pop_front();
|
||||
}
|
||||
|
||||
process(taskInfo);
|
||||
}
|
||||
}
|
||||
|
||||
bool tryProcessOne() {
|
||||
TaskInfo taskInfo;
|
||||
{
|
||||
std::unique_lock<std::mutex> lk{_mutexTasks};
|
||||
if(_tasks.empty()) return false;
|
||||
taskInfo = std::move(_tasks.front());
|
||||
_tasks.pop_front();
|
||||
}
|
||||
|
||||
process(taskInfo);
|
||||
return true;
|
||||
}
|
||||
|
||||
void process(TaskInfo& taskInfo) {
|
||||
std::get<0>(taskInfo)();
|
||||
std::get<1>(taskInfo).set_value();
|
||||
|
||||
_cvProcessed.notify_all();
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
161
inc/alsk/executor/utility/staticpool.h
Normal file
161
inc/alsk/executor/utility/staticpool.h
Normal file
@ -0,0 +1,161 @@
|
||||
#ifndef ALSK_ALSK_EXECUTOR_UTILITY_STATICPOOL_H
|
||||
#define ALSK_ALSK_EXECUTOR_UTILITY_STATICPOOL_H
|
||||
|
||||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
#include <functional>
|
||||
#include <future>
|
||||
#include <list>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include <tmp/traits.h>
|
||||
|
||||
namespace alsk {
|
||||
namespace exec {
|
||||
namespace util {
|
||||
|
||||
struct StaticPool {
|
||||
using Task = std::function<void()>;
|
||||
using TaskInfo = std::tuple<Task, std::promise<void>>;
|
||||
|
||||
struct ThreadInfo {
|
||||
std::atomic_bool running;
|
||||
std::thread thread;
|
||||
std::list<TaskInfo> tasks;
|
||||
std::mutex mutex;
|
||||
std::condition_variable cv;
|
||||
|
||||
ThreadInfo() {}
|
||||
ThreadInfo(ThreadInfo&&) {}
|
||||
};
|
||||
|
||||
private:
|
||||
std::vector<ThreadInfo> _threads;
|
||||
std::unordered_map<std::thread::id, std::reference_wrapper<ThreadInfo>> _threadFromId;
|
||||
|
||||
public:
|
||||
StaticPool() {}
|
||||
|
||||
StaticPool(StaticPool const& o) {
|
||||
config(o._threads.size());
|
||||
}
|
||||
|
||||
StaticPool(StaticPool&& o) {
|
||||
config(o._threads.size());
|
||||
}
|
||||
|
||||
~StaticPool() {
|
||||
terminate();
|
||||
}
|
||||
|
||||
StaticPool const& operator=(StaticPool const& o) {
|
||||
if(this == &o) return *this;
|
||||
config(o._threads.size());
|
||||
return *this;
|
||||
}
|
||||
|
||||
StaticPool const& operator=(StaticPool&& o) {
|
||||
if(this == &o) return *this;
|
||||
config(o._threads.size());
|
||||
return *this;
|
||||
}
|
||||
|
||||
void config(unsigned int cores) {
|
||||
terminate();
|
||||
|
||||
if(cores == 0) return;
|
||||
|
||||
_threads.resize(cores);
|
||||
for(unsigned int i = 0; i < cores; ++i) {
|
||||
ThreadInfo& threadInfo = _threads[i];
|
||||
threadInfo.running = true;
|
||||
threadInfo.thread = std::thread{[&,&threadInfo=threadInfo] { worker(threadInfo); }};
|
||||
_threadFromId.emplace(threadInfo.thread.get_id(), threadInfo);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename F, typename R = tmp::invoke_result_t<F>, std::enable_if_t<std::is_same<R, void>{}>* = nullptr>
|
||||
std::future<void> run(std::size_t i, F&& task) {
|
||||
ThreadInfo& threadInfo = _threads[i];
|
||||
std::future<void> future;
|
||||
{
|
||||
std::lock_guard<std::mutex> lg{threadInfo.mutex};
|
||||
threadInfo.tasks.emplace_back(std::forward<F>(task), std::promise<void>{});
|
||||
future = std::get<1>(threadInfo.tasks.back()).get_future();
|
||||
}
|
||||
threadInfo.cv.notify_one();
|
||||
|
||||
return future;
|
||||
}
|
||||
|
||||
template<typename Futures>
|
||||
void wait(Futures& futures) {
|
||||
auto const& id = std::this_thread::get_id();
|
||||
if(_threadFromId.count(id)) {
|
||||
auto& threadInfo = _threadFromId.at(id);
|
||||
while(tryProcessOne(threadInfo));
|
||||
}
|
||||
for(auto& future: futures) future.wait();
|
||||
futures.clear();
|
||||
}
|
||||
|
||||
protected:
|
||||
void terminate() {
|
||||
for(auto& threadInfo: _threads) {
|
||||
{
|
||||
std::lock_guard<std::mutex> lg{threadInfo.mutex};
|
||||
threadInfo.running = false;
|
||||
}
|
||||
threadInfo.cv.notify_all();
|
||||
threadInfo.thread.join();
|
||||
}
|
||||
_threads.clear();
|
||||
_threadFromId.clear();
|
||||
}
|
||||
|
||||
void worker(ThreadInfo& threadInfo) {
|
||||
auto test = [&]{ return !threadInfo.running || threadInfo.tasks.size(); };
|
||||
|
||||
for(;;) {
|
||||
TaskInfo taskInfo;
|
||||
{
|
||||
std::unique_lock<std::mutex> lk{threadInfo.mutex};
|
||||
|
||||
if(!test()) threadInfo.cv.wait(lk, test);
|
||||
if(!threadInfo.running) return;
|
||||
|
||||
taskInfo = std::move(threadInfo.tasks.front());
|
||||
threadInfo.tasks.pop_front();
|
||||
}
|
||||
|
||||
process(taskInfo);
|
||||
}
|
||||
}
|
||||
|
||||
bool tryProcessOne(ThreadInfo& threadInfo) {
|
||||
TaskInfo taskInfo;
|
||||
{
|
||||
std::unique_lock<std::mutex> lk{threadInfo.mutex};
|
||||
if(threadInfo.tasks.empty()) return false;
|
||||
taskInfo = std::move(threadInfo.tasks.front());
|
||||
threadInfo.tasks.pop_front();
|
||||
}
|
||||
|
||||
process(taskInfo);
|
||||
return true;
|
||||
}
|
||||
|
||||
void process(TaskInfo& taskInfo) {
|
||||
std::get<0>(taskInfo)();
|
||||
std::get<1>(taskInfo).set_value();
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
Reference in New Issue
Block a user