GPUOpen-Drivers
diff --git a/‎compiler-rt/lib/nsan/nsan.cpp
Lines changed: 30 additions & 4 deletions b/‎compiler-rt/lib/nsan/nsan.cpp
Lines changed: 30 additions & 4 deletions
diff --git a/‎compiler-rt/lib/nsan/nsan_flags.inc
Lines changed: 2 additions & 0 deletions b/‎compiler-rt/lib/nsan/nsan_flags.inc
Lines changed: 2 additions & 0 deletions
diff --git a/‎compiler-rt/test/nsan/nan.cpp
Lines changed: 25 additions & 0 deletions b/‎compiler-rt/test/nsan/nan.cpp
Lines changed: 25 additions & 0 deletions
diff --git a/‎compiler-rt/test/nsan/softmax.cpp
Lines changed: 54 additions & 0 deletions b/‎compiler-rt/test/nsan/softmax.cpp
Lines changed: 54 additions & 0 deletions
diff --git a/‎compiler-rt/test/nsan/vec_sqrt.cpp
Lines changed: 34 additions & 0 deletions b/‎compiler-rt/test/nsan/vec_sqrt.cpp
Lines changed: 34 additions & 0 deletions
diff --git a/‎compiler-rt/test/nsan/vec_sqrt_ext.cpp
Lines changed: 25 additions & 0 deletions b/‎compiler-rt/test/nsan/vec_sqrt_ext.cpp
Lines changed: 25 additions & 0 deletions
diff --git a/‎llvm/include/llvm/Config/llvm-config.h.cmake
Lines changed: 1 addition & 1 deletion b/‎llvm/include/llvm/Config/llvm-config.h.cmake
Lines changed: 1 addition & 1 deletion
diff --git a/‎mlir/include/mlir/Analysis/DataFlow/DeadCodeAnalysis.h
Lines changed: 8 additions & 8 deletions b/‎mlir/include/mlir/Analysis/DataFlow/DeadCodeAnalysis.h
Lines changed: 8 additions & 8 deletions
diff --git a/‎mlir/include/mlir/Analysis/DataFlow/DenseAnalysis.h
Lines changed: 19 additions & 18 deletions b/‎mlir/include/mlir/Analysis/DataFlow/DenseAnalysis.h
Lines changed: 19 additions & 18 deletions
diff --git a/‎mlir/include/mlir/Analysis/DataFlow/IntegerRangeAnalysis.h
Lines changed: 1 addition & 1 deletion b/‎mlir/include/mlir/Analysis/DataFlow/IntegerRangeAnalysis.h
Lines changed: 1 addition & 1 deletion
diff --git a/‎mlir/include/mlir/Analysis/DataFlow/SparseAnalysis.h
Lines changed: 4 additions & 4 deletions b/‎mlir/include/mlir/Analysis/DataFlow/SparseAnalysis.h
Lines changed: 4 additions & 4 deletions
@@ -409,21 +409,21 @@ __nsan_dump_shadow_mem(const u8 *addr, size_t size_bytes, size_t bytes_per_line,
   }
 }
 
-alignas(16) SANITIZER_INTERFACE_ATTRIBUTE
+alignas(64) SANITIZER_INTERFACE_ATTRIBUTE
     thread_local uptr __nsan_shadow_ret_tag = 0;
 
-alignas(16) SANITIZER_INTERFACE_ATTRIBUTE
+alignas(64) SANITIZER_INTERFACE_ATTRIBUTE
     thread_local char __nsan_shadow_ret_ptr[kMaxVectorWidth *
                                             sizeof(__float128)];
 
-alignas(16) SANITIZER_INTERFACE_ATTRIBUTE
+alignas(64) SANITIZER_INTERFACE_ATTRIBUTE
     thread_local uptr __nsan_shadow_args_tag = 0;
 
 // Maximum number of args. This should be enough for anyone (tm). An alternate
 // scheme is to have the generated code create an alloca and make
 // __nsan_shadow_args_ptr point ot the alloca.
 constexpr const int kMaxNumArgs = 128;
-alignas(16) SANITIZER_INTERFACE_ATTRIBUTE
+alignas(64) SANITIZER_INTERFACE_ATTRIBUTE
     thread_local char __nsan_shadow_args_ptr[kMaxVectorWidth * kMaxNumArgs *
                                              sizeof(__float128)];
 
@@ -445,6 +445,32 @@ int32_t checkFT(const FT value, ShadowFT Shadow, CheckTypeT CheckType,
   const InternalFT check_value = value;
   const InternalFT check_shadow = Shadow;
 
+  // We only check for NaNs in the value, not the shadow.
+  if (flags().check_nan && isnan(check_value)) {
+    GET_CALLER_PC_BP;
+    BufferedStackTrace stack;
+    stack.Unwind(pc, bp, nullptr, false);
+    if (GetSuppressionForStack(&stack, CheckKind::Consistency)) {
+      // FIXME: optionally print.
+      return flags().resume_after_suppression ? kResumeFromValue
+                                              : kContinueWithShadow;
+    }
+    Decorator D;
+    Printf("%s", D.Warning());
+    Printf("WARNING: NumericalStabilitySanitizer: NaN detected\n");
+    Printf("%s", D.Default());
+    stack.Print();
+    if (flags().halt_on_error) {
+      if (common_flags()->abort_on_error)
+        Printf("ABORTING\n");
+      else
+        Printf("Exiting\n");
+      Die();
+    }
+    // Performing other tests for NaN values is meaningless when dealing with numbers.
+    return kResumeFromValue;
+  }
+
   // See this article for an interesting discussion of how to compare floats:
   // https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/
   static constexpr const FT Eps = FTInfo<FT>::kEpsilon;
 
@@ -48,3 +48,5 @@ NSAN_FLAG(bool, enable_loadtracking_stats, false,
           "due to invalid or unknown types.")
 NSAN_FLAG(bool, poison_in_free, true, "")
 NSAN_FLAG(bool, print_stats_on_exit, false, "If true, print stats on exit.")
+NSAN_FLAG(bool, check_nan, false,
+          "If true, check the floating-point number is nan")
@@ -0,0 +1,25 @@
+// RUN: %clangxx_nsan -O0 -g %s -o %t
+// RUN: NSAN_OPTIONS=check_nan=true,halt_on_error=0 %run %t 2>&1 | FileCheck %s
+
+// RUN: %clangxx_nsan -O3 -g %s -o %t
+// RUN: NSAN_OPTIONS=check_nan=true,halt_on_error=0 %run %t 2>&1 | FileCheck %s
+
+// RUN: %clangxx_nsan -O0 -g %s -o %t
+// RUN: NSAN_OPTIONS=check_nan=true,halt_on_error=1 not %run %t
+
+#include <cmath>
+#include <cstdio>
+
+// This function returns a NaN value for triggering the NaN detection.
+__attribute__((noinline)) float ReturnNaN(float p, float q) {
+  float ret = p / q;
+  return ret;
+  // CHECK: WARNING: NumericalStabilitySanitizer: NaN detected
+}
+
+int main() {
+  float val = ReturnNaN(0., 0.);
+  printf("%f\n", val);
+  // CHECK: WARNING: NumericalStabilitySanitizer: NaN detected
+  return 0;
+}
@@ -0,0 +1,54 @@
+// RUN: %clangxx_nsan -O0 -g -DSOFTMAX=softmax %s -o %t
+// RUN: NSAN_OPTIONS=check_nan=true,halt_on_error=0,log2_max_relative_error=19 %run %t 2>&1 | FileCheck %s
+
+// RUN: %clangxx_nsan -O3 -g -DSOFTMAX=softmax %s -o %t
+// RUN: NSAN_OPTIONS=check_nan=true,halt_on_error=0,log2_max_relative_error=19 %run %t 2>&1 | FileCheck %s
+
+// RUN: %clangxx_nsan -O0 -g -DSOFTMAX=stable_softmax %s -o %t
+// RUN: NSAN_OPTIONS=check_nan=true,halt_on_error=1,log2_max_relative_error=19 %run %t 
+
+// RUN: %clangxx_nsan -O3 -g -DSOFTMAX=stable_softmax %s -o %t
+// RUN: NSAN_OPTIONS=check_nan=true,halt_on_error=1,log2_max_relative_error=19 %run %t
+
+#include<iostream>
+#include<vector>
+#include<algorithm>
+#include<cmath>
+
+// unstable softmax
+template <typename T>
+__attribute__((noinline)) void softmax(std::vector<T> &values) {
+    T sum_exp = 0.0;
+    for (auto &i: values) {
+      i = std::exp(i);
+      sum_exp += i;
+    }
+    for (auto &i: values) {
+      i /= sum_exp;
+    }
+}
+
+// use max value to avoid overflow
+// \sigma_i exp(x_i) / \sum_j exp(x_j) = \sigma_i exp(x_i - max(x)) / \sum_j exp(x_j - max(x))
+template <typename T>
+__attribute__((noinline)) void stable_softmax(std::vector<T> &values) {
+  T sum_exp = 0.0;
+  T max_values = *std::max_element(values.begin(), values.end());
+  for (auto &i: values) {
+    i = std::exp(i - max_values);
+    sum_exp += i;
+  }
+  for (auto &i:values) {
+    i /= sum_exp;
+  }
+}
+
+int main() {
+  std::vector<double> data = {1000, 1001, 1002};
+  SOFTMAX(data);
+  for (auto i: data) {
+    printf("%f", i);
+    // CHECK: WARNING: NumericalStabilitySanitizer: NaN detected
+  }
+  return 0;
+}
@@ -0,0 +1,34 @@
+// RUN: %clangxx_nsan -O0 -g -mavx %s -o %t
+// RUN: NSAN_OPTIONS=check_nan=true,halt_on_error=0 %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_nsan -O3 -g -mavx %s -o %t
+// RUN: NSAN_OPTIONS=check_nan=true,halt_on_error=0 %run %t 2>&1 | FileCheck %s
+
+#include <cmath>
+#include <immintrin.h>
+#include <iostream>
+
+void simd_sqrt(const float *input, float *output, size_t size) {
+  size_t i = 0;
+  for (; i + 7 < size; i += 8) {
+    __m256 vec = _mm256_loadu_ps(&input[i]);
+    __m256 result = _mm256_sqrt_ps(vec);
+    _mm256_storeu_ps(&output[i], result);
+  }
+  for (; i < size; ++i) {
+    output[i] = std::sqrt(input[i]);
+    // CHECK: WARNING: NumericalStabilitySanitizer: NaN detected
+  }
+}
+
+int main() {
+  float input[] = {1.0,  2.0,   -3.0,  4.0,   5.0,   6.0,  7.0,
+                   8.0,  9.0,   -10.0, 11.0,  12.0,  13.0, 14.0,
+                   15.0, -16.0, 17.0,  -18.0, -19.0, -20.0};
+  float output[20];
+  simd_sqrt(input, output, 20);
+  for (int i = 0; i < 20; ++i) {
+    std::cout << output[i] << std::endl;
+    // CHECK: WARNING: NumericalStabilitySanitizer: NaN detected
+  }
+  return 0;
+}
@@ -0,0 +1,25 @@
+// RUN: %clangxx_nsan -O0 -g -mavx %s -o %t
+// RUN: NSAN_OPTIONS=check_nan=true,halt_on_error=0 %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_nsan -O3 -g -mavx %s -o %t
+// RUN: NSAN_OPTIONS=check_nan=true,halt_on_error=0 %run %t 2>&1 | FileCheck %s
+#include <iostream>
+#include <cmath>
+
+typedef float v8sf __attribute__ ((vector_size(32)));
+
+v8sf simd_sqrt(v8sf a) {
+  return __builtin_elementwise_sqrt(a);
+  // CHECK: WARNING: NumericalStabilitySanitizer: NaN detected
+}
+
+int main() {
+  v8sf a = {-1.0, -2.0, -3.0, 4.0, 5.0, 6.0, 7.0, 8.0};
+  a = simd_sqrt(a);
+
+  // This prevents DCE.
+  for (size_t i = 0; i < 8; ++i) {
+    std::cout << a[i] << std::endl;
+    // CHECK: WARNING: NumericalStabilitySanitizer: NaN detected
+  }
+  return 0;
+}
@@ -16,7 +16,7 @@
 
 /* Indicate that this is LLVM compiled from the amd-gfx branch. */
 #define LLVM_HAVE_BRANCH_AMD_GFX
-#define LLVM_MAIN_REVISION 509555
+#define LLVM_MAIN_REVISION 509557
 
 /* Define if LLVM_ENABLE_DUMP is enabled */
 #cmakedefine LLVM_ENABLE_DUMP
 
@@ -35,21 +35,21 @@ namespace dataflow {
 //===----------------------------------------------------------------------===//
 
 /// This is a simple analysis state that represents whether the associated
-/// program point (either a block or a control-flow edge) is live.
+/// lattice anchor (either a block or a control-flow edge) is live.
 class Executable : public AnalysisState {
 public:
   using AnalysisState::AnalysisState;
 
-  /// Set the state of the program point to live.
+  /// Set the state of the lattice anchor to live.
   ChangeResult setToLive();
 
-  /// Get whether the program point is live.
+  /// Get whether the lattice anchor is live.
   bool isLive() const { return live; }
 
   /// Print the liveness.
   void print(raw_ostream &os) const override;
 
-  /// When the state of the program point is changed to live, re-invoke
+  /// When the state of the lattice anchor is changed to live, re-invoke
   /// subscribed analyses on the operations in the block and on the block
   /// itself.
   void onUpdate(DataFlowSolver *solver) const override;
@@ -60,8 +60,8 @@ class Executable : public AnalysisState {
   }
 
 private:
-  /// Whether the program point is live. Optimistically assume that the program
-  /// point is dead.
+  /// Whether the lattice anchor is live. Optimistically assume that the lattice
+  /// anchor is dead.
   bool live = false;
 
   /// A set of analyses that should be updated when this state changes.
@@ -140,10 +140,10 @@ class PredecessorState : public AnalysisState {
 // CFGEdge
 //===----------------------------------------------------------------------===//
 
-/// This program point represents a control-flow edge between a block and one
+/// This lattice anchor represents a control-flow edge between a block and one
 /// of its successors.
 class CFGEdge
-    : public GenericProgramPointBase<CFGEdge, std::pair<Block *, Block *>> {
+    : public GenericLatticeAnchorBase<CFGEdge, std::pair<Block *, Block *>> {
 public:
   using Base::Base;
 
 
@@ -91,15 +91,16 @@ class AbstractDenseForwardDataFlowAnalysis : public DataFlowAnalysis {
                                            const AbstractDenseLattice &before,
                                            AbstractDenseLattice *after) = 0;
 
-  /// Get the dense lattice after the execution of the given program point.
-  virtual AbstractDenseLattice *getLattice(ProgramPoint point) = 0;
+  /// Get the dense lattice after the execution of the given lattice anchor.
+  virtual AbstractDenseLattice *getLattice(LatticeAnchor anchor) = 0;
 
   /// Get the dense lattice after the execution of the given program point and
-  /// add it as a dependency to a program point. That is, every time the lattice
-  /// after point is updated, the dependent program point must be visited, and
-  /// the newly triggered visit might update the lattice after dependent.
+  /// add it as a dependency to a lattice anchor. That is, every time the
+  /// lattice after anchor is updated, the dependent program point must be
+  /// visited, and the newly triggered visit might update the lattice after
+  /// dependent.
   const AbstractDenseLattice *getLatticeFor(ProgramPoint dependent,
-                                            ProgramPoint point);
+                                            LatticeAnchor anchor);
 
   /// Set the dense lattice at control flow entry point and propagate an update
   /// if it changed.
@@ -249,9 +250,9 @@ class DenseForwardDataFlowAnalysis
   }
 
 protected:
-  /// Get the dense lattice after this program point.
-  LatticeT *getLattice(ProgramPoint point) override {
-    return getOrCreate<LatticeT>(point);
+  /// Get the dense lattice on this lattice anchor.
+  LatticeT *getLattice(LatticeAnchor anchor) override {
+    return getOrCreate<LatticeT>(anchor);
   }
 
   /// Set the dense lattice at control flow entry point and propagate an update
@@ -331,16 +332,16 @@ class AbstractDenseBackwardDataFlowAnalysis : public DataFlowAnalysis {
                                            const AbstractDenseLattice &after,
                                            AbstractDenseLattice *before) = 0;
 
-  /// Get the dense lattice before the execution of the program point. That is,
+  /// Get the dense lattice before the execution of the lattice anchor. That is,
   /// before the execution of the given operation or after the execution of the
   /// block.
-  virtual AbstractDenseLattice *getLattice(ProgramPoint point) = 0;
+  virtual AbstractDenseLattice *getLattice(LatticeAnchor anchor) = 0;
 
-  /// Get the dense lattice before the execution of the program point `point`
-  /// and declare that the `dependent` program point must be updated every time
-  /// `point` is.
+  /// Get the dense lattice before the execution of the program point in
+  /// `anchor` and declare that the `dependent` program point must be updated
+  /// every time `point` is.
   const AbstractDenseLattice *getLatticeFor(ProgramPoint dependent,
-                                            ProgramPoint point);
+                                            LatticeAnchor anchor);
 
   /// Set the dense lattice before at the control flow exit point and propagate
   /// the update if it changed.
@@ -500,9 +501,9 @@ class DenseBackwardDataFlowAnalysis
   }
 
 protected:
-  /// Get the dense lattice at the given program point.
-  LatticeT *getLattice(ProgramPoint point) override {
-    return getOrCreate<LatticeT>(point);
+  /// Get the dense lattice at the given lattice anchor.
+  LatticeT *getLattice(LatticeAnchor anchor) override {
+    return getOrCreate<LatticeT>(anchor);
   }
 
   /// Set the dense lattice at control flow exit point (after the terminator)
 
@@ -50,7 +50,7 @@ class IntegerRangeAnalysis
   /// At an entry point, we cannot reason about interger value ranges.
   void setToEntryState(IntegerValueRangeLattice *lattice) override {
     propagateIfChanged(lattice, lattice->join(IntegerValueRange::getMaxRange(
-                                    lattice->getPoint())));
+                                    lattice->getAnchor())));
   }
 
   /// Visit an operation. Invoke the transfer function on each operation that
 
@@ -36,8 +36,8 @@ class AbstractSparseLattice : public AnalysisState {
   /// Lattices can only be created for values.
   AbstractSparseLattice(Value value) : AnalysisState(value) {}
 
-  /// Return the program point this lattice is located at.
-  Value getPoint() const { return AnalysisState::getPoint().get<Value>(); }
+  /// Return the value this lattice is located at.
+  Value getAnchor() const { return AnalysisState::getAnchor().get<Value>(); }
 
   /// Join the information contained in 'rhs' into this lattice. Returns
   /// if the value of the lattice changed.
@@ -86,8 +86,8 @@ class Lattice : public AbstractSparseLattice {
 public:
   using AbstractSparseLattice::AbstractSparseLattice;
 
-  /// Return the program point this lattice is located at.
-  Value getPoint() const { return point.get<Value>(); }
+  /// Return the value this lattice is located at.
+  Value getAnchor() const { return anchor.get<Value>(); }
 
   /// Return the value held by this lattice. This requires that the value is
   /// initialized.
Original file line number	Diff line number	Diff line change
`@@ -50,7 +50,7 @@ class IntegerRangeAnalysis`
`50`	`50`	`/// At an entry point, we cannot reason about interger value ranges.`
`51`	`51`	`void setToEntryState(IntegerValueRangeLattice *lattice) override {`
`52`	`52`	`propagateIfChanged(lattice, lattice->join(IntegerValueRange::getMaxRange(`
`53`		`- lattice->getPoint())));`
	`53`	`+ lattice->getAnchor())));`
`54`	`54`	`}`
`55`	`55`
`56`	`56`	`/// Visit an operation. Invoke the transfer function on each operation that`