#ifndef H_CELERO_UTILITIES_H #define H_CELERO_UTILITIES_H /// /// \author John Farrier /// /// \copyright Copyright 2015, 2016, 2017, 2018. 2019 John Farrier /// /// Licensed under the Apache License, Version 2.0 (the "License"); /// you may not use this file except in compliance with the License. /// You may obtain a copy of the License at /// /// http://www.apache.org/licenses/LICENSE-2.0 /// /// Unless required by applicable law or agreed to in writing, software /// distributed under the License is distributed on an "AS IS" BASIS, /// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. /// See the License for the specific language governing permissions and /// limitations under the License. /// #ifndef WIN32 #include #endif #ifdef __FreeBSD__ #include #endif #include #include #include #include #include #include namespace celero { /// /// \func DoNotOptimizeAway /// /// Used to prevent compiler optimization of a variable /// that performs no real purpose other than to participate /// in a benchmark /// /// Consider the following trivial benchmark: /// /// \code /// BASELINE(...) /// { /// int x = 0; /// /// for(int i = 0; i < 64; i++) /// { /// x += i; /// } /// } /// \endcode /// /// Using Ubuntu clang v3.0, the resultant assembly is highly optimized /// as one might expect, but not terribly useful for baselining: /// /// \verbatim /// movl $2016, %eax # imm = 0x7E0 /// ret /// \endverbatim /// /// Now, replace the inner loop with a call to DoNotOptimizeAway: /// /// \code /// DoNotOptimizeAway(x += i); /// \endcode /// /// The result is now a loop which is meaningful for establishing a /// baseline. /// /// \verbatim /// xorl %ecx, %ecx /// xorl %eax, %eax /// .LBB0_1: # =>This Inner Loop Header: Depth=1 /// addl %ecx, %eax /// incl %ecx /// cmpl $64, %ecx /// jne .LBB0_1 /// ret /// \endverbatim /// /// GCC 4.8 gives similar results. /// /// gcc.godbolt.org permalink: http://goo.gl/lsngwX /// /// Folly uses a simple bit of inline assembly: /// > template /// > void doNotOptimizeAway(T&& datum) { /// > asm volatile("" : "+r" (datum)); /// >} /// /// It would be great if that were portable with respect to both compilers and 32/64-bit targets. /// template void DoNotOptimizeAway(T&& x) { static auto ttid = std::this_thread::get_id(); if(ttid == std::thread::id()) { // This forces the value to never be optimized away // by taking a reference then using it. const auto* p = &x; putchar(*reinterpret_cast(p)); // If we do get here, kick out because something has gone wrong. std::abort(); } } /// Specialization for std::function objects which return a value. template void DoNotOptimizeAway(std::function&& x) { volatile auto foo = x(); static auto ttid = std::this_thread::get_id(); if(ttid == std::thread::id()) { // This forces the value to never be optimized away // by taking a reference then using it. const auto* p = &foo + &x; putchar(*reinterpret_cast(p)); // If we do get here, kick out because something has gone wrong. std::abort(); } } /// Specialization for std::function objects which return void. template <> CELERO_EXPORT void DoNotOptimizeAway(std::function&& x); /// /// Quick definition of the number of microseconds per second. /// constexpr uint64_t UsPerSec(1000000); /// /// Conversion from Microseconds to Seconds. /// constexpr double UsToSec{1.0e-6}; /// /// Drop-in replacement for std::rand(); /// CELERO_EXPORT int Random(); } // namespace celero #endif