pfor/src/pfor/strategies/stdthread.h

61 lines
1.6 KiB
C++

#ifndef PFOR_PFOR_STRATEGIES_STDTHREAD_H
#define PFOR_PFOR_STRATEGIES_STDTHREAD_H
#include <thread>
#include <vector>
#include "../range.h"
#include "parameters.h"
namespace pfor {
template<bool, typename, typename> struct ForLoopThread;
template<typename E, typename Range>
struct ForLoopThread<false, E, Range> {
static void eval(Range const& range, E e) {
if(range.step() > 0)
for(auto it = +range.begin(); it < range.end(); it += range.step()) e[it];
else
for(auto it = +range.begin(); it > range.end(); it += range.step()) e[it];
}
};
template<typename E, typename Range>
struct ForLoopThread<true, E, Range> {
using Index = typename Range::ValueType;
static void eval(Range const& range, E e) {
using SeqRange = decltype(makeRange(+range.begin(), +range.end(), +range.step()));
auto const& sequence = &ForLoopThread<false, E, SeqRange>::eval;
Index const count = (range.end() - range.begin() + (range.step()-(range.step() > 0? +1 : -1)))/range.step();
std::size_t const nThreads = std::min<decltype(nThreads)>(ParallelForParameters::nThreads, count);
std::vector<std::thread> threads(nThreads-1);
for(std::size_t k = 0; k < nThreads-1; ++k) {
auto lRange = makeRange(
range.begin() + static_cast<Index>(k*range.step()*count/nThreads),
range.begin() + static_cast<Index>((k+1)*range.step()*count/nThreads),
+range.step()
);
threads[k] = std::thread{sequence, lRange, e};
}
{
auto lRange = makeRange(
range.begin() + static_cast<Index>((nThreads-1)*range.step()*count/nThreads),
range.end(),
+range.step()
);
sequence(lRange, e);
}
for(auto&& thread: threads) thread.join();
}
};
}
#endif