pfor/benchmarks/imgpro.cpp

102 lines
2.5 KiB
C++

#include <algorithm>
#include <iostream>
#include <iterator>
#include <sstream>
#include <cmath>
#include "common.h"
constexpr std::size_t K = 1;
constexpr long W = 100'001;
constexpr long H = 100'000;
std::size_t pfor::ParallelForParameters::nThreads{1};
struct Arguments {
std::string method;
};
Arguments processCLA(int argc, char** argv) {
if(argc < 2) {
std::cerr << "Usage: " << *argv << " method [nThreads]" << std::endl;
std::cerr << " method: {seq, omp, gen_omp, gen_thread}" << std::endl;
std::cerr << " nThreads: defaults to 1" << std::endl;
std::exit(1);
}
Arguments args;
args.method = std::string{argv[1]};
if(argc >= 3) {
std::istringstream iss{argv[2]};
iss >> pfor::ParallelForParameters::nThreads;
}
if(args.method != "seq" && args.method != "omp" && args.method != "gen_omp" && args.method != "gen_thread") {
std::cerr << "method out of bounds" << std::endl;
std::exit(1);
}
return args;
}
char r(int c) { return (c>>24&0xff); }
char g(int c) { return (c>>16&0xff); }
char b(int c) { return (c>>8&0xff); }
char a(int c) { return (c>>0&0xff); }
int main(int argc, char** argv) {
Arguments args = processCLA(argc, argv);
auto img_ = new int[W*H]; // 4 channels (rgba); W*H image
auto img = pfor::Operand<decltype(img_), class Img>{img_};
auto calc_ = [](int n, int w, int s, int e) {
auto volatile c = 0;
for(int i = 0; i < 10; ++i) ++c;
return (n+w+s+e)/4;
};
auto calc = pfor::makeOperator(calc_);
if(args.method == "seq") {
BENCH(K)
for(long i = W+1; i < (H-1)*W; i += 2) {
img_[i] = calc_(img_[i-W], img_[i-1], img_[i+W], img_[i+1]);
}
END_BENCH();
} else if(args.method == "omp") {
BENCH(K)
#pragma omp parallel for num_threads(pfor::ParallelForParameters::nThreads)
for(long i = W+1; i < (H-1)*W; i += 2) {
img_[i] = calc_(img_[i-W], img_[i-1], img_[i+W], img_[i+1]);
}
END_BENCH();
} else if(args.method == "gen_omp") {
BENCH(K)
pfor::Index i;
pfor::parallelFor<pfor::ForLoopOMP>(pfor::RangeCT<W+1, 2>{(H-1)*W},
// pfor::parallelFor<pfor::ForLoopOMP>(pfor::Range{W+1, (H-1)*W, 1},
img[i] = calc(img[i-_<W>], img[i-_<1>], img[i+_<W>], img[i+_<1>])
);
END_BENCH();
} else if(args.method == "gen_thread") {
BENCH(K)
pfor::Index i;
pfor::parallelFor<pfor::ForLoopThread>(pfor::RangeCT<W+1, 2>{(H-1)*W},
img[i] = calc(img[i-_<W>], img[i-_<1>], img[i+_<W>], img[i+_<1>])
);
END_BENCH();
}
if(rand()==rand()) {
for(long i = 0; i<W*H; ++i)
std::printf("%d,", img_[i]);
std::puts("");
}
delete[] img_;
}