thesis version
This commit is contained in:
32
celero/bone/common.cpp
Normal file
32
celero/bone/common.cpp
Normal file
@ -0,0 +1,32 @@
|
||||
#include <numeric>
|
||||
|
||||
#include "common.h"
|
||||
|
||||
namespace bench {
|
||||
|
||||
Data Task::operator()(int min, int max) const {
|
||||
Data v(size);
|
||||
std::generate_n(std::begin(v), size, [&, i=0]() mutable { return (++i)%(max-min+1) + min; });
|
||||
return v;
|
||||
};
|
||||
|
||||
Data taskD(Data const& data) {
|
||||
Data out(data.size()+2);
|
||||
std::copy(std::begin(data), std::end(data), std::begin(out)+2);
|
||||
out[0] = std::accumulate(std::begin(data), std::end(data), Data::value_type{});
|
||||
out[1] = out[0]&1? out[0]*out[0] : out[0];
|
||||
return out;
|
||||
}
|
||||
|
||||
Data const& select(Data const& a, Data const& b) {
|
||||
Data::value_type sumA = std::accumulate(std::begin(a), std::end(a), Data::value_type{});
|
||||
Data::value_type sumB = std::accumulate(std::begin(b), std::end(b), Data::value_type{});
|
||||
|
||||
return sumA < sumB? a : b;
|
||||
}
|
||||
|
||||
Data::value_type project(Data const& a, Data::value_type const& init) {
|
||||
return std::accumulate(std::begin(a), std::end(a), init);
|
||||
}
|
||||
|
||||
}
|
57
celero/bone/common.h
Normal file
57
celero/bone/common.h
Normal file
@ -0,0 +1,57 @@
|
||||
#ifndef ALSK_CELERO_BONE_COMMON_H
|
||||
#define ALSK_CELERO_BONE_COMMON_H
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
#include <celero/Celero.h>
|
||||
|
||||
#include <alsk/alsk.h>
|
||||
|
||||
namespace bench {
|
||||
|
||||
using Data = std::vector<int>;
|
||||
using Value = Data::value_type;
|
||||
|
||||
struct Task {
|
||||
std::size_t size;
|
||||
Data operator()(int min, int max) const;
|
||||
|
||||
// TODO inline version: improve benchmarking for skeleton?
|
||||
// Data operator()(int min, int max) const {
|
||||
// Data v(size);
|
||||
// std::generate_n(std::begin(v), size, [&, i=0]() mutable { return (++i)%(max-min+1) + min; });
|
||||
// return v;
|
||||
// };
|
||||
};
|
||||
constexpr auto eTask = alsk::edsl::makeOperand<Data(int, int), Task>();
|
||||
constexpr auto eTaskStdFun = alsk::edsl::makeOperand<Data(int, int), std::function<Data(int, int)>>();
|
||||
|
||||
template<std::size_t count>
|
||||
void taskV() {
|
||||
std::vector<int> v(count);
|
||||
std::generate_n(std::begin(v), count, [i=0]() mutable { return i++; });
|
||||
for(std::size_t i = 0; i < count; ++i)
|
||||
celero::DoNotOptimizeAway(std::accumulate(begin(v), end(v), i));
|
||||
}
|
||||
template<std::size_t count>
|
||||
constexpr auto eTaskV = alsk::edsl::makeOperand<void(), FN(taskV<count>)>();
|
||||
template<std::size_t count>
|
||||
constexpr auto eTaskVStdFun = alsk::edsl::makeOperand<void(), std::function<void()>>();
|
||||
|
||||
Data taskD(Data const&);
|
||||
constexpr auto eTaskD = alsk::edsl::makeOperand<Data(Data const&), FN(taskD)>();
|
||||
constexpr auto eTaskDStdFun = alsk::edsl::makeOperand<Data(Data const&), std::function<Data(Data const&)>>();
|
||||
|
||||
Data const& select(Data const&, Data const&);
|
||||
constexpr auto eSelect = alsk::edsl::makeOperand<Data(Data const&, Data const&), FN(select)>();
|
||||
constexpr auto eSelectStdFun = alsk::edsl::makeOperand<Data(Data const&, Data const&), std::function<Data(Data const&, Data const&)>>();
|
||||
|
||||
Value project(Data const&, Value const&);
|
||||
constexpr auto eProject = alsk::edsl::makeOperand<Value(Data const&, Value const&), FN(project)>();
|
||||
constexpr auto eProjectStdFun = alsk::edsl::makeOperand<Value(Data const&, Value const&), std::function<Value(Data const&, Value const&)>>();
|
||||
|
||||
}
|
||||
|
||||
#endif
|
34
celero/bone/farm.cpp
Normal file
34
celero/bone/farm.cpp
Normal file
@ -0,0 +1,34 @@
|
||||
#include <celero/Celero.h>
|
||||
#include <alsk/alsk.h>
|
||||
|
||||
#include "common.h"
|
||||
|
||||
using namespace bench;
|
||||
|
||||
constexpr unsigned samples = 30, iterations = 10, cores = 4;
|
||||
|
||||
constexpr unsigned n = 64;
|
||||
constexpr std::size_t vecSize = 1'000;
|
||||
|
||||
constexpr auto eFarm = n*eTaskV<vecSize>;
|
||||
|
||||
BASELINE(Farm, Handwritten, samples, iterations) {
|
||||
for(unsigned i = 0; i < n; ++i) taskV<vecSize>();
|
||||
}
|
||||
|
||||
BENCHMARK(Farm, Skeleton, samples, iterations) {
|
||||
auto farm = alsk::edsl::implement<alsk::exec::Sequential>(eFarm);
|
||||
farm();
|
||||
}
|
||||
|
||||
BASELINE(FarmPar, Handwritter, samples, iterations) {
|
||||
#pragma omp parallel for num_threads(cores)
|
||||
for(unsigned i = 0; i < n; ++i) taskV<vecSize>();
|
||||
}
|
||||
|
||||
BENCHMARK(FarmPar, Parallel, samples, iterations) {
|
||||
auto farm = alsk::edsl::implement<alsk::exec::StaticThread>(eFarm);
|
||||
farm.executor.cores = cores;
|
||||
|
||||
farm();
|
||||
}
|
111
celero/bone/farmsel.cpp
Normal file
111
celero/bone/farmsel.cpp
Normal file
@ -0,0 +1,111 @@
|
||||
#include <celero/Celero.h>
|
||||
#include <alsk/alsk.h>
|
||||
|
||||
#include "common.h"
|
||||
|
||||
using namespace bench;
|
||||
using namespace alsk::edsl;
|
||||
using namespace alsk::arg;
|
||||
|
||||
constexpr unsigned samples = 10, iterations = 100, cores = 4;
|
||||
|
||||
constexpr std::size_t vecSize = 10'000;
|
||||
constexpr unsigned n = 128;
|
||||
constexpr int minValue = -250, maxValue = +250;
|
||||
|
||||
decltype(auto) hwFarmSel(int min, int max) {
|
||||
Task task{vecSize};
|
||||
Data best{};
|
||||
|
||||
if(n)
|
||||
best = task(min, max);
|
||||
for(std::size_t i = 1; i < n; ++i) {
|
||||
Data current = task(min, max);
|
||||
best = select(current, best);
|
||||
}
|
||||
|
||||
return best;
|
||||
}
|
||||
|
||||
decltype(auto) hwFarmSelSk(int min, int max) {
|
||||
Task task{vecSize};
|
||||
Data best{};
|
||||
|
||||
std::vector<Data> bests(n);
|
||||
|
||||
for(std::size_t i = 0; i < n; ++i)
|
||||
bests[i] = task(min, max);
|
||||
|
||||
best = std::move(bests[0]);
|
||||
for(std::size_t i = 1; i < n; ++i)
|
||||
best = select(std::move(bests[i-1]), std::move(best));
|
||||
|
||||
return best;
|
||||
}
|
||||
|
||||
decltype(auto) hwFarmSelPar(int min, int max) {
|
||||
Task task{vecSize};
|
||||
Data best{};
|
||||
|
||||
std::vector<Data> bests(n);
|
||||
|
||||
#pragma omp parallel for num_threads(cores)
|
||||
for(std::size_t i = 0; i < n; ++i)
|
||||
bests[i] = task(min, max);
|
||||
|
||||
best = std::move(bests[0]);
|
||||
for(std::size_t i = 1; i < n; ++i)
|
||||
best = select(std::move(bests[i-1]), std::move(best));
|
||||
|
||||
return best;
|
||||
}
|
||||
|
||||
constexpr auto eFarmSel = link<R<1>(int, int)>(n * link<Data(P<0>, P<1>)>(eTask)) ->* eSelect;
|
||||
constexpr auto eFarmSelStdFun = link<R<1>(int, int)>(n * link<Data(P<0>, P<1>)>(eTaskStdFun)) ->* eSelectStdFun;
|
||||
|
||||
BASELINE(FarmSel, Handwritten, samples, iterations) {
|
||||
celero::DoNotOptimizeAway(
|
||||
hwFarmSel(minValue, maxValue)
|
||||
);
|
||||
}
|
||||
|
||||
BENCHMARK(FarmSel, HandwrittenSk, samples, iterations) {
|
||||
celero::DoNotOptimizeAway(
|
||||
hwFarmSelSk(minValue, maxValue)
|
||||
);
|
||||
}
|
||||
|
||||
BENCHMARK(FarmSel, Skeleton, samples, iterations) {
|
||||
auto farmSel = alsk::edsl::implement<alsk::exec::Sequential>(eFarmSel);
|
||||
farmSel.skeleton.task.size = vecSize;
|
||||
|
||||
celero::DoNotOptimizeAway(
|
||||
farmSel(minValue, maxValue)
|
||||
);
|
||||
}
|
||||
|
||||
BENCHMARK(FarmSel, SkeletonStdFunction, samples, iterations) {
|
||||
auto farmSel = alsk::edsl::implement<alsk::exec::Sequential>(eFarmSelStdFun);
|
||||
farmSel.skeleton.task = Task{vecSize};
|
||||
farmSel.skeleton.select = bench::select;
|
||||
|
||||
celero::DoNotOptimizeAway(
|
||||
farmSel(minValue, maxValue)
|
||||
);
|
||||
}
|
||||
|
||||
BASELINE(FarmSelPar, Handwritten, samples, iterations) {
|
||||
celero::DoNotOptimizeAway(
|
||||
hwFarmSelPar(minValue, maxValue)
|
||||
);
|
||||
}
|
||||
|
||||
BENCHMARK(FarmSelPar, Skeleton, samples, iterations) {
|
||||
auto farmSel = alsk::edsl::implement<alsk::exec::StaticThread>(eFarmSel);
|
||||
farmSel.executor.cores = cores;
|
||||
farmSel.skeleton.task.size = vecSize;
|
||||
|
||||
celero::DoNotOptimizeAway(
|
||||
farmSel(minValue, maxValue)
|
||||
);
|
||||
}
|
50
celero/bone/itersel.cpp
Normal file
50
celero/bone/itersel.cpp
Normal file
@ -0,0 +1,50 @@
|
||||
#include <celero/Celero.h>
|
||||
#include <alsk/alsk.h>
|
||||
|
||||
#include "common.h"
|
||||
|
||||
using namespace bench;
|
||||
using namespace alsk::edsl;
|
||||
using namespace alsk::arg;
|
||||
|
||||
constexpr unsigned samples = 50, iterations = 100;
|
||||
constexpr unsigned n = 8192; // if too small => bad results
|
||||
constexpr auto initVector = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
|
||||
|
||||
decltype(auto) hwIterSel(Data const& init) {
|
||||
Data best = init;
|
||||
|
||||
for(std::size_t i = 0; i < n; ++i) {
|
||||
Data current = taskD(best);
|
||||
best = select(std::move(current), std::move(best));
|
||||
}
|
||||
|
||||
return best;
|
||||
}
|
||||
|
||||
constexpr auto eIterSel = &link<Data(Data const&)>(n * eTaskD) ->* eSelect;
|
||||
constexpr auto eIterSelStdFun = &link<Data(Data const&)>(n * eTaskDStdFun) ->* eSelectStdFun;
|
||||
|
||||
BASELINE(IterSel, Handwritten, samples, iterations) {
|
||||
celero::DoNotOptimizeAway(
|
||||
hwIterSel(initVector)
|
||||
);
|
||||
}
|
||||
|
||||
BENCHMARK(IterSel, Skeleton, samples, iterations) {
|
||||
auto iterSel = alsk::edsl::implement<alsk::exec::Sequential>(eIterSel);
|
||||
|
||||
celero::DoNotOptimizeAway(
|
||||
iterSel(initVector)
|
||||
);
|
||||
}
|
||||
|
||||
BENCHMARK(IterSel, SkeletonStdFunction, samples, iterations) {
|
||||
auto iterSel = alsk::edsl::implement<alsk::exec::Sequential>(eIterSelStdFun);
|
||||
iterSel.skeleton.task = taskD;
|
||||
iterSel.skeleton.select = bench::select;
|
||||
|
||||
celero::DoNotOptimizeAway(
|
||||
iterSel(initVector)
|
||||
);
|
||||
}
|
32
celero/bone/loop.cpp
Normal file
32
celero/bone/loop.cpp
Normal file
@ -0,0 +1,32 @@
|
||||
#include <celero/Celero.h>
|
||||
#include <alsk/alsk.h>
|
||||
|
||||
#include "common.h"
|
||||
|
||||
using namespace bench;
|
||||
using namespace alsk::arg;
|
||||
|
||||
constexpr unsigned samples = 50, iterations = 100;
|
||||
constexpr unsigned n = 100, vecSize = 100;
|
||||
|
||||
void hwLoop() {
|
||||
for(std::size_t i = 0; i < n; ++i) taskV<vecSize>();
|
||||
}
|
||||
|
||||
constexpr auto eLoop = seq(n * eTaskV<vecSize>);
|
||||
constexpr auto eLoopStdFun = seq(n * eTaskVStdFun<vecSize>);
|
||||
|
||||
BASELINE(Loop, Handwritten, samples, iterations) {
|
||||
hwLoop();
|
||||
}
|
||||
|
||||
BENCHMARK(Loop, Skeleton, samples, iterations) {
|
||||
auto loop = alsk::edsl::implement<alsk::exec::Sequential>(eLoop);
|
||||
loop();
|
||||
}
|
||||
|
||||
BENCHMARK(Loop, SkeletonStdFunction, samples, iterations) {
|
||||
auto loop = alsk::edsl::implement<alsk::exec::Sequential>(eLoopStdFun);
|
||||
loop.skeleton.task = taskV<vecSize>;
|
||||
loop();
|
||||
}
|
70
celero/bone/serial.cpp
Normal file
70
celero/bone/serial.cpp
Normal file
@ -0,0 +1,70 @@
|
||||
#include <celero/Celero.h>
|
||||
#include <alsk/alsk.h>
|
||||
|
||||
#include "common.h"
|
||||
|
||||
using namespace bench;
|
||||
using namespace alsk::arg;
|
||||
using namespace alsk::edsl;
|
||||
|
||||
constexpr unsigned samples = 50, iterations = 100;
|
||||
constexpr std::size_t vecSize = 100'000;
|
||||
constexpr int minValue = -250, maxValue = +250;
|
||||
|
||||
decltype(auto) hwSerial(int min, int max) {
|
||||
Task task0{vecSize}, task1{vecSize};
|
||||
Data v0 = task0(min, max), v1 = task1(min, max);
|
||||
|
||||
Data const& v = select(v0, v1);
|
||||
return project(v, rand());
|
||||
}
|
||||
|
||||
decltype(auto) hwSerialBad(int min, int max) {
|
||||
Task task0{vecSize}, task1{vecSize};
|
||||
Data v2 = select(task0(min, max), task1(min, max));
|
||||
return project(v2, rand());
|
||||
}
|
||||
|
||||
constexpr auto eRand = makeOperand<int(), FN(rand)>();
|
||||
constexpr auto lTask = link<Data(P<0>, P<1>)>(eTask);
|
||||
constexpr auto eSerial = link<R<4>(int, int)>(lTask & lTask & link<Data(R<0>, R<1>)>(eSelect) & eRand & link<Value(R<2>, R<3>)>(eProject));
|
||||
|
||||
constexpr auto eRandStdFun = makeOperand<int(), std::function<int()>>();
|
||||
constexpr auto lTaskStdFun = link<Data(P<0>, P<1>)>(eTaskStdFun);
|
||||
constexpr auto eSerialStdFun = link<R<4>(int, int)>(
|
||||
lTaskStdFun & lTaskStdFun & link<Data(R<0>, R<1>)>(eSelectStdFun) &
|
||||
eRandStdFun & link<Value(R<2>, R<3>)>(eProjectStdFun));
|
||||
|
||||
BASELINE(Serial, Handwritten, samples, iterations) {
|
||||
celero::DoNotOptimizeAway(
|
||||
hwSerial(minValue, maxValue)
|
||||
);
|
||||
}
|
||||
|
||||
BENCHMARK(Serial, HandwrittenBad, samples, iterations) {
|
||||
celero::DoNotOptimizeAway(
|
||||
hwSerialBad(minValue, maxValue)
|
||||
);
|
||||
}
|
||||
|
||||
BENCHMARK(Serial, Skeleton, samples, iterations) {
|
||||
auto serial = alsk::edsl::implement<alsk::exec::Sequential>(eSerial);
|
||||
serial.skeleton.task<0>().size = vecSize;
|
||||
serial.skeleton.task<1>().size = vecSize;
|
||||
celero::DoNotOptimizeAway(
|
||||
serial(minValue, maxValue)
|
||||
);
|
||||
}
|
||||
|
||||
BENCHMARK(Serial, SkeletonStdFunction, samples, iterations) {
|
||||
auto serial = alsk::edsl::implement<alsk::exec::Sequential>(eSerialStdFun);
|
||||
serial.skeleton.task<0>() = Task{vecSize};
|
||||
serial.skeleton.task<1>() = Task{vecSize};
|
||||
serial.skeleton.task<2>() = bench::select;
|
||||
serial.skeleton.task<3>() = rand;
|
||||
serial.skeleton.task<4>() = project;
|
||||
|
||||
celero::DoNotOptimizeAway(
|
||||
serial(minValue, maxValue)
|
||||
);
|
||||
}
|
21
celero/bone/while.cpp
Normal file
21
celero/bone/while.cpp
Normal file
@ -0,0 +1,21 @@
|
||||
#include <celero/Celero.h>
|
||||
#include <alsk/alsk.h>
|
||||
|
||||
#include "common.h"
|
||||
|
||||
using namespace bench;
|
||||
using namespace alsk::arg;
|
||||
|
||||
constexpr unsigned samples = 50, iterations = 100;
|
||||
constexpr unsigned n = 100, vecSize = 100;
|
||||
|
||||
bool test(int& c) { return --c; }
|
||||
|
||||
void hwLoop(int& c) {
|
||||
while(test(c)) taskV<vecSize>();
|
||||
}
|
||||
|
||||
BASELINE(While, Handwritten, samples, iterations) {
|
||||
int count = n;
|
||||
hwLoop(count);
|
||||
}
|
57
celero/executor/common.h
Normal file
57
celero/executor/common.h
Normal file
@ -0,0 +1,57 @@
|
||||
#ifndef ALSK_CELERO_EXECUTOR_COMMON_H
|
||||
#define ALSK_CELERO_EXECUTOR_COMMON_H
|
||||
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
#include <celero/Celero.h>
|
||||
|
||||
#include <alsk/alsk.h>
|
||||
|
||||
#include "../bone/common.h"
|
||||
|
||||
namespace bench {
|
||||
|
||||
constexpr auto buildExprFarm() {
|
||||
using namespace alsk::arg;
|
||||
using namespace alsk::edsl;
|
||||
return 20 * eTaskV<1000>;
|
||||
}
|
||||
|
||||
constexpr auto exprFarm = buildExprFarm();
|
||||
|
||||
constexpr auto buildExprFarmSel() {
|
||||
using namespace alsk::arg;
|
||||
using namespace alsk::edsl;
|
||||
return link<void(int, int)>(link<Data(P<0>, P<1>)>(eTask) & link<Data(R<0>)>((50 * link<Data(P<0>)>(eTaskD)) ->* eSelect));
|
||||
}
|
||||
|
||||
constexpr auto exprFarmSel = buildExprFarmSel();
|
||||
|
||||
constexpr auto buildExprTwo() {
|
||||
using namespace alsk::arg;
|
||||
using namespace alsk::edsl;
|
||||
|
||||
constexpr auto farmsel = link<Data(R<0>)>(1000 * link<Data(P<0>)>(eTaskD)) ->* eSelect;
|
||||
constexpr auto serial = link<R<1>(P<0>, P<1>)>(link<Data(P<0>, P<1>)>(eTask) & farmsel);
|
||||
return link<void(int, int)>(2 * serial);
|
||||
}
|
||||
|
||||
constexpr auto exprTwo = buildExprTwo();
|
||||
|
||||
constexpr auto buildExprTwoS() {
|
||||
using namespace alsk::arg;
|
||||
using namespace alsk::edsl;
|
||||
|
||||
constexpr auto farmsel = link<Data(Data const&)>(1000 * link<Data(P<0>)>(eTaskD)) ->* eSelect;
|
||||
constexpr auto itersel = &link<Data(R<0>)>(2 * farmsel) ->* eSelect;
|
||||
constexpr auto serial = link<R<1>(P<0>, P<1>)>(link<Data(P<0>, P<1>)>(eTask) & itersel);
|
||||
constexpr auto loop = &link<void(P<0>, P<1>)>(2 * serial);
|
||||
return link<void(int, int)>(2 * loop);
|
||||
}
|
||||
|
||||
constexpr auto exprTwoS = buildExprTwoS();
|
||||
|
||||
}
|
||||
|
||||
#endif
|
52
celero/executor/farm.cpp
Normal file
52
celero/executor/farm.cpp
Normal file
@ -0,0 +1,52 @@
|
||||
#include <celero/Celero.h>
|
||||
|
||||
#include "common.h"
|
||||
|
||||
constexpr unsigned samples = 12, iterations = 10, cores = 4;
|
||||
|
||||
BASELINE(ExecFarm, Sequential, samples, iterations) {
|
||||
auto f = alsk::edsl::implement<alsk::exec::Sequential>(bench::exprFarm);
|
||||
f();
|
||||
}
|
||||
|
||||
BENCHMARK(ExecFarm, FirstLevelEqui, samples, iterations) {
|
||||
auto f = alsk::edsl::implement<alsk::exec::FirstLevelEqui>(bench::exprFarm);
|
||||
f.executor.cores = cores;
|
||||
f();
|
||||
}
|
||||
|
||||
BENCHMARK(ExecFarm, FirstLevelGreedy, samples, iterations) {
|
||||
auto f = alsk::edsl::implement<alsk::exec::FirstLevelGreedy>(bench::exprFarm);
|
||||
f.executor.cores = cores;
|
||||
f();
|
||||
}
|
||||
|
||||
BENCHMARK(ExecFarm, FirstLevelNoOpti, samples, iterations) {
|
||||
auto f = alsk::edsl::implement<alsk::exec::FirstLevelNoOpti>(bench::exprFarm);
|
||||
f.executor.cores = cores;
|
||||
f();
|
||||
}
|
||||
|
||||
BENCHMARK(ExecFarm, DynamicPool, samples, iterations) {
|
||||
auto f = alsk::edsl::implement<alsk::exec::DynamicPool>(bench::exprFarm);
|
||||
f.executor.cores = cores;
|
||||
f();
|
||||
}
|
||||
|
||||
BENCHMARK(ExecFarm, StaticPool, samples, iterations) {
|
||||
auto f = alsk::edsl::implement<alsk::exec::StaticPool>(bench::exprFarm);
|
||||
f.executor.cores = cores;
|
||||
f();
|
||||
}
|
||||
|
||||
BENCHMARK(ExecFarm, StaticPoolId, samples, iterations) {
|
||||
auto f = alsk::edsl::implement<alsk::exec::StaticPoolId>(bench::exprFarm);
|
||||
f.executor.cores = cores;
|
||||
f();
|
||||
}
|
||||
|
||||
BENCHMARK(ExecFarm, StaticThread, samples, iterations) {
|
||||
auto f = alsk::edsl::implement<alsk::exec::StaticThread>(bench::exprFarm);
|
||||
f.executor.cores = cores;
|
||||
f();
|
||||
}
|
62
celero/executor/farmsel.cpp
Normal file
62
celero/executor/farmsel.cpp
Normal file
@ -0,0 +1,62 @@
|
||||
#include <celero/Celero.h>
|
||||
|
||||
#include "common.h"
|
||||
|
||||
constexpr unsigned samples = 12, iterations = 10, cores = 4;
|
||||
constexpr std::size_t vecSize = 100'000;
|
||||
constexpr int minValue = -250, maxValue = +250;
|
||||
|
||||
BASELINE(ExecFarmSel, Sequential, samples, iterations) {
|
||||
auto f = alsk::edsl::implement<alsk::exec::Sequential>(bench::exprFarmSel);
|
||||
f.skeleton.task<0>().size = vecSize;
|
||||
f(minValue, maxValue);
|
||||
}
|
||||
|
||||
BENCHMARK(ExecFarmSel, FirstLevelEqui, samples, iterations) {
|
||||
auto f = alsk::edsl::implement<alsk::exec::FirstLevelEqui>(bench::exprFarmSel);
|
||||
f.executor.cores = cores;
|
||||
f.skeleton.task<0>().size = vecSize;
|
||||
f(minValue, maxValue);
|
||||
}
|
||||
|
||||
BENCHMARK(ExecFarmSel, FirstLevelGreedy, samples, iterations) {
|
||||
auto f = alsk::edsl::implement<alsk::exec::FirstLevelGreedy>(bench::exprFarmSel);
|
||||
f.executor.cores = cores;
|
||||
f.skeleton.task<0>().size = vecSize;
|
||||
f(minValue, maxValue);
|
||||
}
|
||||
|
||||
BENCHMARK(ExecFarmSel, FirstLevelNoOpti, samples, iterations) {
|
||||
auto f = alsk::edsl::implement<alsk::exec::FirstLevelNoOpti>(bench::exprFarmSel);
|
||||
f.executor.cores = cores;
|
||||
f.skeleton.task<0>().size = vecSize;
|
||||
f(minValue, maxValue);
|
||||
}
|
||||
|
||||
BENCHMARK(ExecFarmSel, DynamicPool, samples, iterations) {
|
||||
auto f = alsk::edsl::implement<alsk::exec::DynamicPool>(bench::exprFarmSel);
|
||||
f.executor.cores = cores;
|
||||
f.skeleton.task<0>().size = vecSize;
|
||||
f(minValue, maxValue);
|
||||
}
|
||||
|
||||
BENCHMARK(ExecFarmSel, StaticPool, samples, iterations) {
|
||||
auto f = alsk::edsl::implement<alsk::exec::StaticPool>(bench::exprFarmSel);
|
||||
f.executor.cores = cores;
|
||||
f.skeleton.task<0>().size = vecSize;
|
||||
f(minValue, maxValue);
|
||||
}
|
||||
|
||||
BENCHMARK(ExecFarmSel, StaticPoolId, samples, iterations) {
|
||||
auto f = alsk::edsl::implement<alsk::exec::StaticPoolId>(bench::exprFarmSel);
|
||||
f.executor.cores = cores;
|
||||
f.skeleton.task<0>().size = vecSize;
|
||||
f(minValue, maxValue);
|
||||
}
|
||||
|
||||
BENCHMARK(ExecFarmSel, StaticThread, samples, iterations) {
|
||||
auto f = alsk::edsl::implement<alsk::exec::StaticThread>(bench::exprFarmSel);
|
||||
f.executor.cores = cores;
|
||||
f.skeleton.task<0>().size = vecSize;
|
||||
f(minValue, maxValue);
|
||||
}
|
3
celero/executor/sequential.cpp
Normal file
3
celero/executor/sequential.cpp
Normal file
@ -0,0 +1,3 @@
|
||||
#include <celero/Celero.h>
|
||||
|
||||
#include "common.h"
|
62
celero/executor/twolevels.cpp
Normal file
62
celero/executor/twolevels.cpp
Normal file
@ -0,0 +1,62 @@
|
||||
#include <celero/Celero.h>
|
||||
|
||||
#include "common.h"
|
||||
|
||||
constexpr unsigned samples = 12, iterations = 10, cores = 4;
|
||||
constexpr std::size_t vecSize = 1000;
|
||||
constexpr int minValue = -250, maxValue = +250;
|
||||
|
||||
BASELINE(ExecTwoLevels, Sequential, samples, iterations) {
|
||||
auto f = alsk::edsl::implement<alsk::exec::Sequential>(bench::exprTwo);
|
||||
f.skeleton.task.task<0>().size = vecSize;
|
||||
f(minValue, maxValue);
|
||||
}
|
||||
|
||||
BENCHMARK(ExecTwoLevels, FirstLevelEqui, samples, iterations) {
|
||||
auto f = alsk::edsl::implement<alsk::exec::FirstLevelEqui>(bench::exprTwo);
|
||||
f.executor.cores = cores;
|
||||
f.skeleton.task.task<0>().size = vecSize;
|
||||
f(minValue, maxValue);
|
||||
}
|
||||
|
||||
BENCHMARK(ExecTwoLevels, FirstLevelGreedy, samples, iterations) {
|
||||
auto f = alsk::edsl::implement<alsk::exec::FirstLevelGreedy>(bench::exprTwo);
|
||||
f.executor.cores = cores;
|
||||
f.skeleton.task.task<0>().size = vecSize;
|
||||
f(minValue, maxValue);
|
||||
}
|
||||
|
||||
BENCHMARK(ExecTwoLevels, FirstLevelNoOpti, samples, iterations) {
|
||||
auto f = alsk::edsl::implement<alsk::exec::FirstLevelNoOpti>(bench::exprTwo);
|
||||
f.executor.cores = cores;
|
||||
f.skeleton.task.task<0>().size = vecSize;
|
||||
f(minValue, maxValue);
|
||||
}
|
||||
|
||||
BENCHMARK(ExecTwoLevels, DynamicPool, samples, iterations) {
|
||||
auto f = alsk::edsl::implement<alsk::exec::DynamicPool>(bench::exprTwo);
|
||||
f.executor.cores = cores;
|
||||
f.skeleton.task.task<0>().size = vecSize;
|
||||
f(minValue, maxValue);
|
||||
}
|
||||
|
||||
BENCHMARK(ExecTwoLevels, StaticPool, samples, iterations) {
|
||||
auto f = alsk::edsl::implement<alsk::exec::StaticPool>(bench::exprTwo);
|
||||
f.executor.cores = cores;
|
||||
f.skeleton.task.task<0>().size = vecSize;
|
||||
f(minValue, maxValue);
|
||||
}
|
||||
|
||||
BENCHMARK(ExecTwoLevels, StaticPoolId, samples, iterations) {
|
||||
auto f = alsk::edsl::implement<alsk::exec::StaticPoolId>(bench::exprTwo);
|
||||
f.executor.cores = cores;
|
||||
f.skeleton.task.task<0>().size = vecSize;
|
||||
f(minValue, maxValue);
|
||||
}
|
||||
|
||||
BENCHMARK(ExecTwoLevels, StaticThread, samples, iterations) {
|
||||
auto f = alsk::edsl::implement<alsk::exec::StaticThread>(bench::exprTwo);
|
||||
f.executor.cores = cores;
|
||||
f.skeleton.task.task<0>().size = vecSize;
|
||||
f(minValue, maxValue);
|
||||
}
|
62
celero/executor/twolevelshard.cpp
Normal file
62
celero/executor/twolevelshard.cpp
Normal file
@ -0,0 +1,62 @@
|
||||
#include <celero/Celero.h>
|
||||
|
||||
#include "common.h"
|
||||
|
||||
constexpr unsigned samples = 12, iterations = 10, cores = 4;
|
||||
constexpr std::size_t vecSize = 1'000;
|
||||
constexpr int minValue = -250, maxValue = +250;
|
||||
|
||||
BASELINE(ExecTwoLevelsHard, Sequential, samples, iterations) {
|
||||
auto f = alsk::edsl::implement<alsk::exec::Sequential>(bench::exprTwoS);
|
||||
f.skeleton.task.task.task<0>().size = vecSize;
|
||||
f(minValue, maxValue);
|
||||
}
|
||||
|
||||
BENCHMARK(ExecTwoLevelsHard, FirstLevelEqui, samples, iterations) {
|
||||
auto f = alsk::edsl::implement<alsk::exec::FirstLevelEqui>(bench::exprTwoS);
|
||||
f.executor.cores = cores;
|
||||
f.skeleton.task.task.task<0>().size = vecSize;
|
||||
f(minValue, maxValue);
|
||||
}
|
||||
|
||||
BENCHMARK(ExecTwoLevelsHard, FirstLevelGreedy, samples, iterations) {
|
||||
auto f = alsk::edsl::implement<alsk::exec::FirstLevelGreedy>(bench::exprTwoS);
|
||||
f.executor.cores = cores;
|
||||
f.skeleton.task.task.task<0>().size = vecSize;
|
||||
f(minValue, maxValue);
|
||||
}
|
||||
|
||||
BENCHMARK(ExecTwoLevelsHard, FirstLevelNoOpti, samples, iterations) {
|
||||
auto f = alsk::edsl::implement<alsk::exec::FirstLevelNoOpti>(bench::exprTwoS);
|
||||
f.executor.cores = cores;
|
||||
f.skeleton.task.task.task<0>().size = vecSize;
|
||||
f(minValue, maxValue);
|
||||
}
|
||||
|
||||
BENCHMARK(ExecTwoLevelsHard, DynamicPool, samples, iterations) {
|
||||
auto f = alsk::edsl::implement<alsk::exec::DynamicPool>(bench::exprTwoS);
|
||||
f.executor.cores = cores;
|
||||
f.skeleton.task.task.task<0>().size = vecSize;
|
||||
f(minValue, maxValue);
|
||||
}
|
||||
|
||||
BENCHMARK(ExecTwoLevelsHard, StaticPool, samples, iterations) {
|
||||
auto f = alsk::edsl::implement<alsk::exec::StaticPool>(bench::exprTwoS);
|
||||
f.executor.cores = cores;
|
||||
f.skeleton.task.task.task<0>().size = vecSize;
|
||||
f(minValue, maxValue);
|
||||
}
|
||||
|
||||
BENCHMARK(ExecTwoLevelsHard, StaticPoolId, samples, iterations) {
|
||||
auto f = alsk::edsl::implement<alsk::exec::StaticPoolId>(bench::exprTwoS);
|
||||
f.executor.cores = cores;
|
||||
f.skeleton.task.task.task<0>().size = vecSize;
|
||||
f(minValue, maxValue);
|
||||
}
|
||||
|
||||
BENCHMARK(ExecTwoLevelsHard, StaticThread, samples, iterations) {
|
||||
auto f = alsk::edsl::implement<alsk::exec::StaticThread>(bench::exprTwoS);
|
||||
f.executor.cores = cores;
|
||||
f.skeleton.task.task.task<0>().size = vecSize;
|
||||
f(minValue, maxValue);
|
||||
}
|
40
celero/inc/udm.h
Normal file
40
celero/inc/udm.h
Normal file
@ -0,0 +1,40 @@
|
||||
#ifndef BENCH_INC_UDM_H
|
||||
#define BENCH_INC_UDM_H
|
||||
|
||||
#include <celero/Celero.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/resource.h>
|
||||
|
||||
class GetRusageUDM: public celero::UserDefinedMeasurementTemplate<std::size_t> {
|
||||
std::string getName() const override { return "time"; }
|
||||
|
||||
bool reportSize() const override { return false; }
|
||||
// bool reportMean() const override { return false; }
|
||||
bool reportVariance() const override { return false; }
|
||||
bool reportStandardDeviation() const override { return false; }
|
||||
bool reportSkewness() const override { return false; }
|
||||
bool reportKurtosis() const override { return false; }
|
||||
bool reportZScore() const override { return false; }
|
||||
bool reportMin() const override { return false; }
|
||||
bool reportMax() const override { return false; }
|
||||
};
|
||||
|
||||
class GetRusage {
|
||||
int _who;
|
||||
struct rusage _begin, _end;
|
||||
int _iterations;
|
||||
|
||||
public:
|
||||
explicit GetRusage(int who = RUSAGE_SELF): _who{who} {}
|
||||
void start(int iterations) { _iterations = iterations; getrusage(_who, &_begin); }
|
||||
void stop() { getrusage(_who, &_end); }
|
||||
|
||||
std::size_t get() {
|
||||
auto begin = _begin.ru_utime, end = _end.ru_utime;
|
||||
auto totalUs = (end.tv_sec - begin.tv_sec) * 1e6 + (end.tv_usec - begin.tv_usec);
|
||||
return totalUs/_iterations;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#endif
|
2
celero/main.cpp
Normal file
2
celero/main.cpp
Normal file
@ -0,0 +1,2 @@
|
||||
#include <celero/Celero.h>
|
||||
CELERO_MAIN
|
36
celero/thread.cpp
Normal file
36
celero/thread.cpp
Normal file
@ -0,0 +1,36 @@
|
||||
#include <celero/Celero.h>
|
||||
#include <alsk/alsk.h>
|
||||
|
||||
constexpr unsigned samples = 20;
|
||||
constexpr unsigned iterations = 500;
|
||||
|
||||
constexpr unsigned count = 1'000'000;
|
||||
|
||||
namespace {
|
||||
|
||||
unsigned r;
|
||||
void *f(void * = nullptr) {
|
||||
r = 0;
|
||||
for(unsigned volatile i = 0; i < count; ++i) r += r;
|
||||
return &r;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
BASELINE(Thread, None, samples, iterations) {
|
||||
celero::DoNotOptimizeAway(f());
|
||||
}
|
||||
|
||||
BENCHMARK(Thread, cthread, samples, iterations) {
|
||||
void *r;
|
||||
pthread_t thread;
|
||||
pthread_create(&thread, NULL, f, NULL);
|
||||
pthread_join(thread, &r);
|
||||
celero::DoNotOptimizeAway(r);
|
||||
}
|
||||
|
||||
BENCHMARK(Thread, stdthread, samples, iterations) {
|
||||
std::thread thread{f, nullptr};
|
||||
thread.join();
|
||||
celero::DoNotOptimizeAway(thread);
|
||||
}
|
Reference in New Issue
Block a user