pfor/src/pfor/parallel_for.h

149 lines
4.3 KiB
C++

#ifndef PFOR_PFOR_PARALLEL_FOR_H
#define PFOR_PFOR_PARALLEL_FOR_H
#include <cstdint>
#include <utility>
#include "algorithm.h"
#include "clusters.h"
#include "expression/subexpression.h"
#include "strategies/loopunrolling.h"
#include "strategies/openmp.h"
#include "strategies/stdthread.h"
namespace pfor {
template<typename, typename> struct CompUInt;
template<std::size_t lhs, std::size_t rhs>
struct CompUInt<UIntToType<lhs>, UIntToType<rhs>> {
static constexpr bool value = lhs < rhs;
};
/**
*/
template<bool par, typename E, typename Range>
using ForLoopDefault = ForLoopOMP<par, E, Range>;
template<bool par, template<bool, typename, typename> class ForLoop, typename Cluster>
struct ParallelForImpl {
template<typename Range, typename E>
static void eval(Range const& range, E& e) {
auto exprView = expr::expressionView<Cluster>(e);
using ExprView = decltype(exprView);
ForLoop<par, ExprView, Range>::eval(range, exprView);
}
};
template<bool par, template<bool, typename, typename> class ForLoop>
struct ParallelForImpl<par, ForLoop, Pack<>> {
template<typename Range, typename E>
static void eval(Range const&, E&) {}
};
/**
* @brief possibly parallel for-loop
*
* @param[in] range the index range
* @param[in] e the expression to evaluate
*
* The expression is split in two clusters:
* - parallelizable instructions
* - sequential instructions
*
* Each cluster is then run accordingly
*/
template<
template<bool, typename, typename> class ForLoop = ForLoopDefault,
typename Range, typename E,
std::enable_if_t<expr::isExpression<E>>* = nullptr
>
void parallelFor(Range const& range, E e) {
using Clusters = typename ClustersGen<E>::type;
using Sequential = SequentialCluster<E, Clusters>;
using Parallel = ParallelCluster<E, Clusters>;
ParallelForImpl<true, ForLoop, Parallel>::eval(range, e);
ParallelForImpl<false, ForLoop, Sequential>::eval(range, e);
}
/**
* @brief possibly parallel for-loop
*
* @param[in] range the index range with compile-time known information
* @param[in] e the expression to evaluate
*
* The expression is split in two clusters:
* - parallelizable instructions
* - sequential instructions
*
* To determine if an instruction can be run in parallel, it is first
* modified depending on the range (specifically (begin, step))
*
* Each cluster is then run accordingly
*/
template<
template<bool, typename, typename> class ForLoop = ForLoopDefault,
typename E, typename RT, index::Value begin, index::Value step,
std::enable_if_t<expr::isExpression<E>>* = nullptr
>
void parallelFor(TRangeCT<RT, begin, step> const& range, E e) {
using EStep = expr::MergeComma<PackForEach<expr::SplitComma<E>, GenSubstituteVariableInExpression<step, begin>::template type>>;
using Clusters = typename ClustersGen<EStep>::type;
using Sequential = SequentialCluster<EStep, Clusters>;
using Parallel = ParallelCluster<EStep, Clusters>;
ParallelForImpl<true, ForLoop, Parallel>::eval(range, e);
ParallelForImpl<false, ForLoop, Sequential>::eval(range, e);
}
/**
* @brief possibly parallel for-loop
*
* @param[in] range the index range
* @param[in] es a pack of expressions to evaluate
*
* The expression is split in two clusters:
* - parallelizable instructions
* - sequential instructions
*
* Each cluster is then run accordingly
*
* Note: C++17 version is parallelFor(range, (es, ...));
*/
template<
template<bool, typename, typename> class ForLoop = ForLoopDefault,
typename Range, typename... Es,
std::enable_if_t<(sizeof...(Es) > 1) and expr::allExpression<Es...>>* = nullptr
>
void parallelFor(Range const& range, Es... es) {
parallelFor<ForLoop>(range, commaMerger(es...));
}
/**
* @brief possibly parallel for-loop
*
* @param[in] range the index range
* @param[in] f a function returning the expression to evaluate
*
* The expression is split in two clusters:
* - parallelizable instructions
* - sequential instructions
*
* Each cluster is then run accordingly
*/
template<
template<bool, typename, typename> class ForLoop = ForLoopDefault,
typename Range, typename F,
typename E = std::decay_t<decltype(std::declval<F>()(std::declval<Index>()))>,
std::enable_if_t<expr::isExpression<E>>* = nullptr
>
void parallelFor(Range const& range, F&& f) {
parallelFor<ForLoop>(range, std::forward<F>(f)(Index{}));
}
}
#endif