llvm · michele-scandale · Mar 21, 2024 · Mar 20, 2024 · goldsteinn · Mar 17, 2024
@@ -2135,7 +2135,8 @@ static Instruction *foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf) {
   return new ShuffleVectorInst(X, Y, NewMask);
 }
 
-static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) {
+static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf,
+                                                const SimplifyQuery &SQ) {
   assert(Shuf.isSelect() && "Must have select-equivalent shuffle");
 
   // Are we shuffling together some value and that same value after it has been
@@ -2159,6 +2160,19 @@ static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) {
   if (!IdC)
     return nullptr;
 
+  Value *X = Op0IsBinop ? Op1 : Op0;
+
+  // Prevent folding in the case the non-binop operand might have NaN values.
+  // If X can have NaN elements then we have that the floating point math
+  // operation in the transformed code may not preserve the exact NaN
+  // bit-pattern -- e.g. `fadd sNaN, 0.0 -> qNaN`.
+  // This makes the transformation incorrect since the original program would
+  // have preserved the exact NaN bit-pattern.
+  // Avoid the folding if X can have NaN elements.
+  if (Shuf.getType()->getElementType()->isFloatingPointTy() &&
+      !isKnownNeverNaN(X, 0, SQ))
+    return nullptr;
+
   // Shuffle identity constants into the lanes that return the original value.
   // Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
   // Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
@@ -2175,7 +2189,6 @@ static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) {
 
   // shuf (bop X, C), X, M --> bop X, C'
   // shuf X, (bop X, C), M --> bop X, C'
-  Value *X = Op0IsBinop ? Op1 : Op0;
   Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
   NewBO->copyIRFlags(BO);
 
@@ -2241,7 +2254,8 @@ Instruction *InstCombinerImpl::foldSelectShuffle(ShuffleVectorInst &Shuf) {
   if (Instruction *I = foldSelectShuffleOfSelectShuffle(Shuf))
     return I;
 
-  if (Instruction *I = foldSelectShuffleWith1Binop(Shuf))
+  if (Instruction *I = foldSelectShuffleWith1Binop(
+          Shuf, getSimplifyQuery().getWithInstruction(&Shuf)))
     return I;
 
   BinaryOperator *B0, *B1;

diff --git a/llvm/test/Transforms/InstCombine/shuffle_select-inseltpoison.ll b/llvm/test/Transforms/InstCombine/shuffle_select-inseltpoison.ll
@@ -336,7 +336,18 @@ define <4 x i32> @srem(<4 x i32> %v) {
 
 ; Try FP ops/types.
 
-define <4 x float> @fadd(<4 x float> %v) {
+define <4 x float> @fadd_maybe_nan(<4 x float> %v) {
+; CHECK-LABEL: @fadd_maybe_nan(
+; CHECK-NEXT:    [[B:%.*]] = fadd <4 x float> [[V:%.*]], <float 4.100000e+01, float 4.200000e+01, float poison, float poison>
+; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x float> [[B]], <4 x float> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    ret <4 x float> [[S]]
+;
+  %b = fadd <4 x float> %v, <float 41.0, float 42.0, float 43.0, float 44.0>
+  %s = shufflevector <4 x float> %b, <4 x float> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  ret <4 x float> %s
+}
+
+define <4 x float> @fadd(<4 x float> nofpclass(nan) %v) {
 ; CHECK-LABEL: @fadd(
 ; CHECK-NEXT:    [[S:%.*]] = fadd <4 x float> [[V:%.*]], <float 4.100000e+01, float 4.200000e+01, float -0.000000e+00, float -0.000000e+00>
 ; CHECK-NEXT:    ret <4 x float> [[S]]
@@ -359,7 +370,7 @@ define <4 x double> @fsub(<4 x double> %v) {
 
 ; Propagate any FMF.
 
-define <4 x float> @fmul(<4 x float> %v) {
+define <4 x float> @fmul(<4 x float> nofpclass(nan) %v) {
 ; CHECK-LABEL: @fmul(
 ; CHECK-NEXT:    [[S:%.*]] = fmul nnan ninf <4 x float> [[V:%.*]], <float 4.100000e+01, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
 ; CHECK-NEXT:    ret <4 x float> [[S]]
@@ -380,7 +391,7 @@ define <4 x double> @fdiv_constant_op0(<4 x double> %v) {
   ret <4 x double> %s
 }
 
-define <4 x double> @fdiv_constant_op1(<4 x double> %v) {
+define <4 x double> @fdiv_constant_op1(<4 x double> nofpclass(nan) %v) {
 ; CHECK-LABEL: @fdiv_constant_op1(
 ; CHECK-NEXT:    [[S:%.*]] = fdiv reassoc <4 x double> [[V:%.*]], <double undef, double 1.000000e+00, double 4.300000e+01, double 4.400000e+01>
 ; CHECK-NEXT:    ret <4 x double> [[S]]

diff --git a/llvm/test/Transforms/InstCombine/shuffle_select.ll b/llvm/test/Transforms/InstCombine/shuffle_select.ll
@@ -336,7 +336,18 @@ define <4 x i32> @srem(<4 x i32> %v) {
 
 ; Try FP ops/types.
 
-define <4 x float> @fadd(<4 x float> %v) {
+define <4 x float> @fadd_maybe_nan(<4 x float> %v) {
+; CHECK-LABEL: @fadd_maybe_nan(
+; CHECK-NEXT:    [[B:%.*]] = fadd <4 x float> [[V:%.*]], <float 4.100000e+01, float 4.200000e+01, float poison, float poison>
+; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x float> [[B]], <4 x float> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+; CHECK-NEXT:    ret <4 x float> [[S]]
+;
+  %b = fadd <4 x float> %v, <float 41.0, float 42.0, float 43.0, float 44.0>
+  %s = shufflevector <4 x float> %b, <4 x float> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+  ret <4 x float> %s
+}
+
+define <4 x float> @fadd(<4 x float> nofpclass(nan) %v) {
 ; CHECK-LABEL: @fadd(
 ; CHECK-NEXT:    [[S:%.*]] = fadd <4 x float> [[V:%.*]], <float 4.100000e+01, float 4.200000e+01, float -0.000000e+00, float -0.000000e+00>
 ; CHECK-NEXT:    ret <4 x float> [[S]]
@@ -359,7 +370,7 @@ define <4 x double> @fsub(<4 x double> %v) {
 
 ; Propagate any FMF.
 
-define <4 x float> @fmul(<4 x float> %v) {
+define <4 x float> @fmul(<4 x float> nofpclass(nan) %v) {
 ; CHECK-LABEL: @fmul(
 ; CHECK-NEXT:    [[S:%.*]] = fmul nnan ninf <4 x float> [[V:%.*]], <float 4.100000e+01, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
 ; CHECK-NEXT:    ret <4 x float> [[S]]
@@ -380,7 +391,7 @@ define <4 x double> @fdiv_constant_op0(<4 x double> %v) {
   ret <4 x double> %s
 }
 
-define <4 x double> @fdiv_constant_op1(<4 x double> %v) {
+define <4 x double> @fdiv_constant_op1(<4 x double> nofpclass(nan) %v) {
 ; CHECK-LABEL: @fdiv_constant_op1(
 ; CHECK-NEXT:    [[S:%.*]] = fdiv reassoc <4 x double> [[V:%.*]], <double undef, double 1.000000e+00, double 4.300000e+01, double 4.400000e+01>
 ; CHECK-NEXT:    ret <4 x double> [[S]]