#ifndef PFOR_PFOR_PARALLEL_FOR_H #define PFOR_PFOR_PARALLEL_FOR_H #include #include #include "algorithm.h" #include "clusters.h" #include "expression/subexpression.h" #include "strategies/loopunrolling.h" #include "strategies/openmp.h" #include "strategies/stdthread.h" namespace pfor { template struct CompUInt; template struct CompUInt, UIntToType> { static constexpr bool value = lhs < rhs; }; /** */ template using ForLoopDefault = ForLoopOMP; template class ForLoop, typename Cluster> struct ParallelForImpl { template static void eval(Range const& range, E& e) { auto exprView = expr::expressionView(e); using ExprView = decltype(exprView); ForLoop::eval(range, exprView); } }; template class ForLoop> struct ParallelForImpl> { template static void eval(Range const&, E&) {} }; /** * @brief possibly parallel for-loop * * @param[in] range the index range * @param[in] e the expression to evaluate * * The expression is split in two clusters: * - parallelizable instructions * - sequential instructions * * Each cluster is then run accordingly */ template< template class ForLoop = ForLoopDefault, typename Range, typename E, std::enable_if_t>* = nullptr > void parallelFor(Range const& range, E e) { using Clusters = typename ClustersGen::type; using Sequential = SequentialCluster; using Parallel = ParallelCluster; ParallelForImpl::eval(range, e); ParallelForImpl::eval(range, e); } /** * @brief possibly parallel for-loop * * @param[in] range the index range with compile-time known information * @param[in] e the expression to evaluate * * The expression is split in two clusters: * - parallelizable instructions * - sequential instructions * * To determine if an instruction can be run in parallel, it is first * modified depending on the range (specifically (begin, step)) * * Each cluster is then run accordingly */ template< template class ForLoop = ForLoopDefault, typename E, typename RT, index::Value begin, index::Value step, std::enable_if_t>* = nullptr > void parallelFor(TRangeCT const& range, E e) { using EStep = expr::MergeComma, GenSubstituteVariableInExpression::template type>>; using Clusters = typename ClustersGen::type; using Sequential = SequentialCluster; using Parallel = ParallelCluster; ParallelForImpl::eval(range, e); ParallelForImpl::eval(range, e); } /** * @brief possibly parallel for-loop * * @param[in] range the index range * @param[in] es a pack of expressions to evaluate * * The expression is split in two clusters: * - parallelizable instructions * - sequential instructions * * Each cluster is then run accordingly * * Note: C++17 version is parallelFor(range, (es, ...)); */ template< template class ForLoop = ForLoopDefault, typename Range, typename... Es, std::enable_if_t<(sizeof...(Es) > 1) and expr::allExpression>* = nullptr > void parallelFor(Range const& range, Es... es) { parallelFor(range, commaMerger(es...)); } /** * @brief possibly parallel for-loop * * @param[in] range the index range * @param[in] f a function returning the expression to evaluate * * The expression is split in two clusters: * - parallelizable instructions * - sequential instructions * * Each cluster is then run accordingly */ template< template class ForLoop = ForLoopDefault, typename Range, typename F, typename E = std::decay_t()(std::declval()))>, std::enable_if_t>* = nullptr > void parallelFor(Range const& range, F&& f) { parallelFor(range, std::forward(f)(Index{})); } } #endif