|
| 1 | +// RUN: %clangxx_nsan -O0 -mllvm -nsan-shadow-type-mapping=dqq -g -DSUM=NaiveSum -DFLT=float %s -o %t |
| 2 | +// RUN: NSAN_OPTIONS=halt_on_error=1,log2_max_relative_error=19 not %run %t 2>&1 | FileCheck %s |
| 3 | + |
| 4 | +// RUN: %clangxx_nsan -O3 -mllvm -nsan-shadow-type-mapping=dqq -g -DSUM=NaiveSum -DFLT=float %s -o %t |
| 5 | +// RUN: NSAN_OPTIONS=halt_on_error=1,log2_max_relative_error=19 not %run %t 2>&1 | FileCheck %s |
| 6 | + |
| 7 | +// RUN: %clangxx_nsan -O0 -mllvm -nsan-shadow-type-mapping=dqq -g -DSUM=KahanSum -DFLT=float %s -o %t |
| 8 | +// RUN: NSAN_OPTIONS=halt_on_error=1,log2_max_relative_error=19 %run %t |
| 9 | + |
| 10 | +// RUN: %clangxx_nsan -O3 -mllvm -nsan-shadow-type-mapping=dqq -g -DSUM=KahanSum -DFLT=float %s -o %t |
| 11 | +// RUN: NSAN_OPTIONS=halt_on_error=1,log2_max_relative_error=19 %run %t |
| 12 | + |
| 13 | +#include <chrono> |
| 14 | +#include <iostream> |
| 15 | +#include <random> |
| 16 | +#include <vector> |
| 17 | + |
| 18 | +// A naive, unstable summation. |
| 19 | +template <typename T> |
| 20 | +__attribute__((noinline)) // To check call stack reporting. |
| 21 | +T NaiveSum(const std::vector<T>& values) { |
| 22 | + T sum = 0; |
| 23 | + for (T v : values) { |
| 24 | + sum += v; |
| 25 | + } |
| 26 | + return sum; |
| 27 | + // CHECK: WARNING: NumericalStabilitySanitizer: inconsistent shadow results while checking return |
| 28 | + // CHECK: float{{ *}}precision (native): |
| 29 | + // CHECK: double{{ *}}precision (shadow): |
| 30 | + // CHECK: {{#0 .*in .* NaiveSum}} |
| 31 | +} |
| 32 | + |
| 33 | +// Kahan's summation is a numerically stable sum. |
| 34 | +// https://en.wikipedia.org/wiki/Kahan_summation_algorithm |
| 35 | +template <typename T> |
| 36 | +__attribute__((noinline)) T KahanSum(const std::vector<T> &values) { |
| 37 | + T sum = 0; |
| 38 | + T c = 0; |
| 39 | + for (T v : values) { |
| 40 | + T y = v - c; |
| 41 | + T t = sum + y; |
| 42 | + c = (t - sum) - y; |
| 43 | + sum = t; |
| 44 | + } |
| 45 | + return sum; |
| 46 | +} |
| 47 | + |
| 48 | +int main() { |
| 49 | + std::vector<FLT> values; |
| 50 | + constexpr int kNumValues = 1000000; |
| 51 | + values.reserve(kNumValues); |
| 52 | + // Using a seed to avoid flakiness. |
| 53 | + constexpr uint32_t kSeed = 0x123456; |
| 54 | + std::mt19937 gen(kSeed); |
| 55 | + std::uniform_real_distribution<FLT> dis(0.0f, 1000.0f); |
| 56 | + for (int i = 0; i < kNumValues; ++i) { |
| 57 | + values.push_back(dis(gen)); |
| 58 | + } |
| 59 | + |
| 60 | + const auto t1 = std::chrono::high_resolution_clock::now(); |
| 61 | + const auto sum = SUM(values); |
| 62 | + const auto t2 = std::chrono::high_resolution_clock::now(); |
| 63 | + printf("sum: %.8f\n", sum); |
| 64 | + std::cout << "runtime: " |
| 65 | + << std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1) |
| 66 | + .count() / |
| 67 | + 1000.0 |
| 68 | + << "ms\n"; |
| 69 | + return 0; |
| 70 | +} |
0 commit comments