thesis version

This commit is contained in:
2021-05-10 18:14:13 +02:00
commit b688da651b
191 changed files with 35833 additions and 0 deletions

32
celero/bone/common.cpp Normal file
View File

@ -0,0 +1,32 @@
#include <numeric>
#include "common.h"
namespace bench {
Data Task::operator()(int min, int max) const {
Data v(size);
std::generate_n(std::begin(v), size, [&, i=0]() mutable { return (++i)%(max-min+1) + min; });
return v;
};
Data taskD(Data const& data) {
Data out(data.size()+2);
std::copy(std::begin(data), std::end(data), std::begin(out)+2);
out[0] = std::accumulate(std::begin(data), std::end(data), Data::value_type{});
out[1] = out[0]&1? out[0]*out[0] : out[0];
return out;
}
Data const& select(Data const& a, Data const& b) {
Data::value_type sumA = std::accumulate(std::begin(a), std::end(a), Data::value_type{});
Data::value_type sumB = std::accumulate(std::begin(b), std::end(b), Data::value_type{});
return sumA < sumB? a : b;
}
Data::value_type project(Data const& a, Data::value_type const& init) {
return std::accumulate(std::begin(a), std::end(a), init);
}
}

57
celero/bone/common.h Normal file
View File

@ -0,0 +1,57 @@
#ifndef ALSK_CELERO_BONE_COMMON_H
#define ALSK_CELERO_BONE_COMMON_H
#include <algorithm>
#include <cstdint>
#include <vector>
#include <celero/Celero.h>
#include <alsk/alsk.h>
namespace bench {
using Data = std::vector<int>;
using Value = Data::value_type;
struct Task {
std::size_t size;
Data operator()(int min, int max) const;
// TODO inline version: improve benchmarking for skeleton?
// Data operator()(int min, int max) const {
// Data v(size);
// std::generate_n(std::begin(v), size, [&, i=0]() mutable { return (++i)%(max-min+1) + min; });
// return v;
// };
};
constexpr auto eTask = alsk::edsl::makeOperand<Data(int, int), Task>();
constexpr auto eTaskStdFun = alsk::edsl::makeOperand<Data(int, int), std::function<Data(int, int)>>();
template<std::size_t count>
void taskV() {
std::vector<int> v(count);
std::generate_n(std::begin(v), count, [i=0]() mutable { return i++; });
for(std::size_t i = 0; i < count; ++i)
celero::DoNotOptimizeAway(std::accumulate(begin(v), end(v), i));
}
template<std::size_t count>
constexpr auto eTaskV = alsk::edsl::makeOperand<void(), FN(taskV<count>)>();
template<std::size_t count>
constexpr auto eTaskVStdFun = alsk::edsl::makeOperand<void(), std::function<void()>>();
Data taskD(Data const&);
constexpr auto eTaskD = alsk::edsl::makeOperand<Data(Data const&), FN(taskD)>();
constexpr auto eTaskDStdFun = alsk::edsl::makeOperand<Data(Data const&), std::function<Data(Data const&)>>();
Data const& select(Data const&, Data const&);
constexpr auto eSelect = alsk::edsl::makeOperand<Data(Data const&, Data const&), FN(select)>();
constexpr auto eSelectStdFun = alsk::edsl::makeOperand<Data(Data const&, Data const&), std::function<Data(Data const&, Data const&)>>();
Value project(Data const&, Value const&);
constexpr auto eProject = alsk::edsl::makeOperand<Value(Data const&, Value const&), FN(project)>();
constexpr auto eProjectStdFun = alsk::edsl::makeOperand<Value(Data const&, Value const&), std::function<Value(Data const&, Value const&)>>();
}
#endif

34
celero/bone/farm.cpp Normal file
View File

@ -0,0 +1,34 @@
#include <celero/Celero.h>
#include <alsk/alsk.h>
#include "common.h"
using namespace bench;
constexpr unsigned samples = 30, iterations = 10, cores = 4;
constexpr unsigned n = 64;
constexpr std::size_t vecSize = 1'000;
constexpr auto eFarm = n*eTaskV<vecSize>;
BASELINE(Farm, Handwritten, samples, iterations) {
for(unsigned i = 0; i < n; ++i) taskV<vecSize>();
}
BENCHMARK(Farm, Skeleton, samples, iterations) {
auto farm = alsk::edsl::implement<alsk::exec::Sequential>(eFarm);
farm();
}
BASELINE(FarmPar, Handwritter, samples, iterations) {
#pragma omp parallel for num_threads(cores)
for(unsigned i = 0; i < n; ++i) taskV<vecSize>();
}
BENCHMARK(FarmPar, Parallel, samples, iterations) {
auto farm = alsk::edsl::implement<alsk::exec::StaticThread>(eFarm);
farm.executor.cores = cores;
farm();
}

111
celero/bone/farmsel.cpp Normal file
View File

@ -0,0 +1,111 @@
#include <celero/Celero.h>
#include <alsk/alsk.h>
#include "common.h"
using namespace bench;
using namespace alsk::edsl;
using namespace alsk::arg;
constexpr unsigned samples = 10, iterations = 100, cores = 4;
constexpr std::size_t vecSize = 10'000;
constexpr unsigned n = 128;
constexpr int minValue = -250, maxValue = +250;
decltype(auto) hwFarmSel(int min, int max) {
Task task{vecSize};
Data best{};
if(n)
best = task(min, max);
for(std::size_t i = 1; i < n; ++i) {
Data current = task(min, max);
best = select(current, best);
}
return best;
}
decltype(auto) hwFarmSelSk(int min, int max) {
Task task{vecSize};
Data best{};
std::vector<Data> bests(n);
for(std::size_t i = 0; i < n; ++i)
bests[i] = task(min, max);
best = std::move(bests[0]);
for(std::size_t i = 1; i < n; ++i)
best = select(std::move(bests[i-1]), std::move(best));
return best;
}
decltype(auto) hwFarmSelPar(int min, int max) {
Task task{vecSize};
Data best{};
std::vector<Data> bests(n);
#pragma omp parallel for num_threads(cores)
for(std::size_t i = 0; i < n; ++i)
bests[i] = task(min, max);
best = std::move(bests[0]);
for(std::size_t i = 1; i < n; ++i)
best = select(std::move(bests[i-1]), std::move(best));
return best;
}
constexpr auto eFarmSel = link<R<1>(int, int)>(n * link<Data(P<0>, P<1>)>(eTask)) ->* eSelect;
constexpr auto eFarmSelStdFun = link<R<1>(int, int)>(n * link<Data(P<0>, P<1>)>(eTaskStdFun)) ->* eSelectStdFun;
BASELINE(FarmSel, Handwritten, samples, iterations) {
celero::DoNotOptimizeAway(
hwFarmSel(minValue, maxValue)
);
}
BENCHMARK(FarmSel, HandwrittenSk, samples, iterations) {
celero::DoNotOptimizeAway(
hwFarmSelSk(minValue, maxValue)
);
}
BENCHMARK(FarmSel, Skeleton, samples, iterations) {
auto farmSel = alsk::edsl::implement<alsk::exec::Sequential>(eFarmSel);
farmSel.skeleton.task.size = vecSize;
celero::DoNotOptimizeAway(
farmSel(minValue, maxValue)
);
}
BENCHMARK(FarmSel, SkeletonStdFunction, samples, iterations) {
auto farmSel = alsk::edsl::implement<alsk::exec::Sequential>(eFarmSelStdFun);
farmSel.skeleton.task = Task{vecSize};
farmSel.skeleton.select = bench::select;
celero::DoNotOptimizeAway(
farmSel(minValue, maxValue)
);
}
BASELINE(FarmSelPar, Handwritten, samples, iterations) {
celero::DoNotOptimizeAway(
hwFarmSelPar(minValue, maxValue)
);
}
BENCHMARK(FarmSelPar, Skeleton, samples, iterations) {
auto farmSel = alsk::edsl::implement<alsk::exec::StaticThread>(eFarmSel);
farmSel.executor.cores = cores;
farmSel.skeleton.task.size = vecSize;
celero::DoNotOptimizeAway(
farmSel(minValue, maxValue)
);
}

50
celero/bone/itersel.cpp Normal file
View File

@ -0,0 +1,50 @@
#include <celero/Celero.h>
#include <alsk/alsk.h>
#include "common.h"
using namespace bench;
using namespace alsk::edsl;
using namespace alsk::arg;
constexpr unsigned samples = 50, iterations = 100;
constexpr unsigned n = 8192; // if too small => bad results
constexpr auto initVector = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16};
decltype(auto) hwIterSel(Data const& init) {
Data best = init;
for(std::size_t i = 0; i < n; ++i) {
Data current = taskD(best);
best = select(std::move(current), std::move(best));
}
return best;
}
constexpr auto eIterSel = &link<Data(Data const&)>(n * eTaskD) ->* eSelect;
constexpr auto eIterSelStdFun = &link<Data(Data const&)>(n * eTaskDStdFun) ->* eSelectStdFun;
BASELINE(IterSel, Handwritten, samples, iterations) {
celero::DoNotOptimizeAway(
hwIterSel(initVector)
);
}
BENCHMARK(IterSel, Skeleton, samples, iterations) {
auto iterSel = alsk::edsl::implement<alsk::exec::Sequential>(eIterSel);
celero::DoNotOptimizeAway(
iterSel(initVector)
);
}
BENCHMARK(IterSel, SkeletonStdFunction, samples, iterations) {
auto iterSel = alsk::edsl::implement<alsk::exec::Sequential>(eIterSelStdFun);
iterSel.skeleton.task = taskD;
iterSel.skeleton.select = bench::select;
celero::DoNotOptimizeAway(
iterSel(initVector)
);
}

32
celero/bone/loop.cpp Normal file
View File

@ -0,0 +1,32 @@
#include <celero/Celero.h>
#include <alsk/alsk.h>
#include "common.h"
using namespace bench;
using namespace alsk::arg;
constexpr unsigned samples = 50, iterations = 100;
constexpr unsigned n = 100, vecSize = 100;
void hwLoop() {
for(std::size_t i = 0; i < n; ++i) taskV<vecSize>();
}
constexpr auto eLoop = seq(n * eTaskV<vecSize>);
constexpr auto eLoopStdFun = seq(n * eTaskVStdFun<vecSize>);
BASELINE(Loop, Handwritten, samples, iterations) {
hwLoop();
}
BENCHMARK(Loop, Skeleton, samples, iterations) {
auto loop = alsk::edsl::implement<alsk::exec::Sequential>(eLoop);
loop();
}
BENCHMARK(Loop, SkeletonStdFunction, samples, iterations) {
auto loop = alsk::edsl::implement<alsk::exec::Sequential>(eLoopStdFun);
loop.skeleton.task = taskV<vecSize>;
loop();
}

70
celero/bone/serial.cpp Normal file
View File

@ -0,0 +1,70 @@
#include <celero/Celero.h>
#include <alsk/alsk.h>
#include "common.h"
using namespace bench;
using namespace alsk::arg;
using namespace alsk::edsl;
constexpr unsigned samples = 50, iterations = 100;
constexpr std::size_t vecSize = 100'000;
constexpr int minValue = -250, maxValue = +250;
decltype(auto) hwSerial(int min, int max) {
Task task0{vecSize}, task1{vecSize};
Data v0 = task0(min, max), v1 = task1(min, max);
Data const& v = select(v0, v1);
return project(v, rand());
}
decltype(auto) hwSerialBad(int min, int max) {
Task task0{vecSize}, task1{vecSize};
Data v2 = select(task0(min, max), task1(min, max));
return project(v2, rand());
}
constexpr auto eRand = makeOperand<int(), FN(rand)>();
constexpr auto lTask = link<Data(P<0>, P<1>)>(eTask);
constexpr auto eSerial = link<R<4>(int, int)>(lTask & lTask & link<Data(R<0>, R<1>)>(eSelect) & eRand & link<Value(R<2>, R<3>)>(eProject));
constexpr auto eRandStdFun = makeOperand<int(), std::function<int()>>();
constexpr auto lTaskStdFun = link<Data(P<0>, P<1>)>(eTaskStdFun);
constexpr auto eSerialStdFun = link<R<4>(int, int)>(
lTaskStdFun & lTaskStdFun & link<Data(R<0>, R<1>)>(eSelectStdFun) &
eRandStdFun & link<Value(R<2>, R<3>)>(eProjectStdFun));
BASELINE(Serial, Handwritten, samples, iterations) {
celero::DoNotOptimizeAway(
hwSerial(minValue, maxValue)
);
}
BENCHMARK(Serial, HandwrittenBad, samples, iterations) {
celero::DoNotOptimizeAway(
hwSerialBad(minValue, maxValue)
);
}
BENCHMARK(Serial, Skeleton, samples, iterations) {
auto serial = alsk::edsl::implement<alsk::exec::Sequential>(eSerial);
serial.skeleton.task<0>().size = vecSize;
serial.skeleton.task<1>().size = vecSize;
celero::DoNotOptimizeAway(
serial(minValue, maxValue)
);
}
BENCHMARK(Serial, SkeletonStdFunction, samples, iterations) {
auto serial = alsk::edsl::implement<alsk::exec::Sequential>(eSerialStdFun);
serial.skeleton.task<0>() = Task{vecSize};
serial.skeleton.task<1>() = Task{vecSize};
serial.skeleton.task<2>() = bench::select;
serial.skeleton.task<3>() = rand;
serial.skeleton.task<4>() = project;
celero::DoNotOptimizeAway(
serial(minValue, maxValue)
);
}

21
celero/bone/while.cpp Normal file
View File

@ -0,0 +1,21 @@
#include <celero/Celero.h>
#include <alsk/alsk.h>
#include "common.h"
using namespace bench;
using namespace alsk::arg;
constexpr unsigned samples = 50, iterations = 100;
constexpr unsigned n = 100, vecSize = 100;
bool test(int& c) { return --c; }
void hwLoop(int& c) {
while(test(c)) taskV<vecSize>();
}
BASELINE(While, Handwritten, samples, iterations) {
int count = n;
hwLoop(count);
}