149 lines
4.3 KiB
C
149 lines
4.3 KiB
C
|
#ifndef PFOR_PFOR_PARALLEL_FOR_H
|
||
|
#define PFOR_PFOR_PARALLEL_FOR_H
|
||
|
|
||
|
#include <cstdint>
|
||
|
#include <utility>
|
||
|
|
||
|
#include "algorithm.h"
|
||
|
#include "clusters.h"
|
||
|
#include "expression/subexpression.h"
|
||
|
|
||
|
#include "strategies/loopunrolling.h"
|
||
|
#include "strategies/openmp.h"
|
||
|
#include "strategies/stdthread.h"
|
||
|
|
||
|
namespace pfor {
|
||
|
|
||
|
template<typename, typename> struct CompUInt;
|
||
|
template<std::size_t lhs, std::size_t rhs>
|
||
|
struct CompUInt<UIntToType<lhs>, UIntToType<rhs>> {
|
||
|
static constexpr bool value = lhs < rhs;
|
||
|
};
|
||
|
|
||
|
/**
|
||
|
*/
|
||
|
template<bool par, typename E, typename Range>
|
||
|
using ForLoopDefault = ForLoopOMP<par, E, Range>;
|
||
|
|
||
|
template<bool par, template<bool, typename, typename> class ForLoop, typename Cluster>
|
||
|
struct ParallelForImpl {
|
||
|
template<typename Range, typename E>
|
||
|
static void eval(Range const& range, E& e) {
|
||
|
auto exprView = expr::expressionView<Cluster>(e);
|
||
|
using ExprView = decltype(exprView);
|
||
|
|
||
|
ForLoop<par, ExprView, Range>::eval(range, exprView);
|
||
|
}
|
||
|
};
|
||
|
|
||
|
template<bool par, template<bool, typename, typename> class ForLoop>
|
||
|
struct ParallelForImpl<par, ForLoop, Pack<>> {
|
||
|
template<typename Range, typename E>
|
||
|
static void eval(Range const&, E&) {}
|
||
|
};
|
||
|
|
||
|
/**
|
||
|
* @brief possibly parallel for-loop
|
||
|
*
|
||
|
* @param[in] range the index range
|
||
|
* @param[in] e the expression to evaluate
|
||
|
*
|
||
|
* The expression is split in two clusters:
|
||
|
* - parallelizable instructions
|
||
|
* - sequential instructions
|
||
|
*
|
||
|
* Each cluster is then run accordingly
|
||
|
*/
|
||
|
template<
|
||
|
template<bool, typename, typename> class ForLoop = ForLoopDefault,
|
||
|
typename Range, typename E,
|
||
|
std::enable_if_t<expr::isExpression<E>>* = nullptr
|
||
|
>
|
||
|
void parallelFor(Range const& range, E e) {
|
||
|
using Clusters = typename ClustersGen<E>::type;
|
||
|
using Sequential = SequentialCluster<E, Clusters>;
|
||
|
using Parallel = ParallelCluster<E, Clusters>;
|
||
|
|
||
|
ParallelForImpl<true, ForLoop, Parallel>::eval(range, e);
|
||
|
ParallelForImpl<false, ForLoop, Sequential>::eval(range, e);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* @brief possibly parallel for-loop
|
||
|
*
|
||
|
* @param[in] range the index range with compile-time known information
|
||
|
* @param[in] e the expression to evaluate
|
||
|
*
|
||
|
* The expression is split in two clusters:
|
||
|
* - parallelizable instructions
|
||
|
* - sequential instructions
|
||
|
*
|
||
|
* To determine if an instruction can be run in parallel, it is first
|
||
|
* modified depending on the range (specifically (begin, step))
|
||
|
*
|
||
|
* Each cluster is then run accordingly
|
||
|
*/
|
||
|
template<
|
||
|
template<bool, typename, typename> class ForLoop = ForLoopDefault,
|
||
|
typename E, typename RT, index::Value begin, index::Value step,
|
||
|
std::enable_if_t<expr::isExpression<E>>* = nullptr
|
||
|
>
|
||
|
void parallelFor(TRangeCT<RT, begin, step> const& range, E e) {
|
||
|
using EStep = expr::MergeComma<PackForEach<expr::SplitComma<E>, GenSubstituteVariableInExpression<step, begin>::template type>>;
|
||
|
using Clusters = typename ClustersGen<EStep>::type;
|
||
|
using Sequential = SequentialCluster<EStep, Clusters>;
|
||
|
using Parallel = ParallelCluster<EStep, Clusters>;
|
||
|
|
||
|
ParallelForImpl<true, ForLoop, Parallel>::eval(range, e);
|
||
|
ParallelForImpl<false, ForLoop, Sequential>::eval(range, e);
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* @brief possibly parallel for-loop
|
||
|
*
|
||
|
* @param[in] range the index range
|
||
|
* @param[in] es a pack of expressions to evaluate
|
||
|
*
|
||
|
* The expression is split in two clusters:
|
||
|
* - parallelizable instructions
|
||
|
* - sequential instructions
|
||
|
*
|
||
|
* Each cluster is then run accordingly
|
||
|
*
|
||
|
* Note: C++17 version is parallelFor(range, (es, ...));
|
||
|
*/
|
||
|
template<
|
||
|
template<bool, typename, typename> class ForLoop = ForLoopDefault,
|
||
|
typename Range, typename... Es,
|
||
|
std::enable_if_t<(sizeof...(Es) > 1) and expr::allExpression<Es...>>* = nullptr
|
||
|
>
|
||
|
void parallelFor(Range const& range, Es... es) {
|
||
|
parallelFor<ForLoop>(range, commaMerger(es...));
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* @brief possibly parallel for-loop
|
||
|
*
|
||
|
* @param[in] range the index range
|
||
|
* @param[in] f a function returning the expression to evaluate
|
||
|
*
|
||
|
* The expression is split in two clusters:
|
||
|
* - parallelizable instructions
|
||
|
* - sequential instructions
|
||
|
*
|
||
|
* Each cluster is then run accordingly
|
||
|
*/
|
||
|
template<
|
||
|
template<bool, typename, typename> class ForLoop = ForLoopDefault,
|
||
|
typename Range, typename F,
|
||
|
typename E = std::decay_t<decltype(std::declval<F>()(std::declval<Index>()))>,
|
||
|
std::enable_if_t<expr::isExpression<E>>* = nullptr
|
||
|
>
|
||
|
void parallelFor(Range const& range, F&& f) {
|
||
|
parallelFor<ForLoop>(range, std::forward<F>(f)(Index{}));
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
#endif
|