Skip to content

[InstCombine] Fix for folding select-like shufflevector into floating point binary operators. #85452

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2135,7 +2135,8 @@ static Instruction *foldSelectShuffleOfSelectShuffle(ShuffleVectorInst &Shuf) {
return new ShuffleVectorInst(X, Y, NewMask);
}

static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) {
static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf,
const SimplifyQuery &SQ) {
assert(Shuf.isSelect() && "Must have select-equivalent shuffle");

// Are we shuffling together some value and that same value after it has been
Expand All @@ -2159,6 +2160,19 @@ static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) {
if (!IdC)
return nullptr;

Value *X = Op0IsBinop ? Op1 : Op0;

// Prevent folding in the case the non-binop operand might have NaN values.
// If X can have NaN elements then we have that the floating point math
// operation in the transformed code may not preserve the exact NaN
// bit-pattern -- e.g. `fadd sNaN, 0.0 -> qNaN`.
// This makes the transformation incorrect since the original program would
// have preserved the exact NaN bit-pattern.
// Avoid the folding if X can have NaN elements.
if (Shuf.getType()->getElementType()->isFloatingPointTy() &&
!isKnownNeverNaN(X, 0, SQ))
return nullptr;

// Shuffle identity constants into the lanes that return the original value.
// Example: shuf (mul X, {-1,-2,-3,-4}), X, {0,5,6,3} --> mul X, {-1,1,1,-4}
// Example: shuf X, (add X, {-1,-2,-3,-4}), {0,1,6,7} --> add X, {0,0,-3,-4}
Expand All @@ -2175,7 +2189,6 @@ static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) {

// shuf (bop X, C), X, M --> bop X, C'
// shuf X, (bop X, C), M --> bop X, C'
Value *X = Op0IsBinop ? Op1 : Op0;
Instruction *NewBO = BinaryOperator::Create(BOpcode, X, NewC);
NewBO->copyIRFlags(BO);

Expand Down Expand Up @@ -2241,7 +2254,8 @@ Instruction *InstCombinerImpl::foldSelectShuffle(ShuffleVectorInst &Shuf) {
if (Instruction *I = foldSelectShuffleOfSelectShuffle(Shuf))
return I;

if (Instruction *I = foldSelectShuffleWith1Binop(Shuf))
if (Instruction *I = foldSelectShuffleWith1Binop(
Shuf, getSimplifyQuery().getWithInstruction(&Shuf)))
return I;

BinaryOperator *B0, *B1;
Expand Down
17 changes: 14 additions & 3 deletions llvm/test/Transforms/InstCombine/shuffle_select-inseltpoison.ll
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,18 @@ define <4 x i32> @srem(<4 x i32> %v) {

; Try FP ops/types.

define <4 x float> @fadd(<4 x float> %v) {
define <4 x float> @fadd_maybe_nan(<4 x float> %v) {
; CHECK-LABEL: @fadd_maybe_nan(
; CHECK-NEXT: [[B:%.*]] = fadd <4 x float> [[V:%.*]], <float 4.100000e+01, float 4.200000e+01, float poison, float poison>
; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x float> [[B]], <4 x float> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; CHECK-NEXT: ret <4 x float> [[S]]
;
%b = fadd <4 x float> %v, <float 41.0, float 42.0, float 43.0, float 44.0>
%s = shufflevector <4 x float> %b, <4 x float> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
ret <4 x float> %s
}

define <4 x float> @fadd(<4 x float> nofpclass(nan) %v) {
; CHECK-LABEL: @fadd(
; CHECK-NEXT: [[S:%.*]] = fadd <4 x float> [[V:%.*]], <float 4.100000e+01, float 4.200000e+01, float -0.000000e+00, float -0.000000e+00>
; CHECK-NEXT: ret <4 x float> [[S]]
Expand All @@ -359,7 +370,7 @@ define <4 x double> @fsub(<4 x double> %v) {

; Propagate any FMF.

define <4 x float> @fmul(<4 x float> %v) {
define <4 x float> @fmul(<4 x float> nofpclass(nan) %v) {
; CHECK-LABEL: @fmul(
; CHECK-NEXT: [[S:%.*]] = fmul nnan ninf <4 x float> [[V:%.*]], <float 4.100000e+01, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
; CHECK-NEXT: ret <4 x float> [[S]]
Expand All @@ -380,7 +391,7 @@ define <4 x double> @fdiv_constant_op0(<4 x double> %v) {
ret <4 x double> %s
}

define <4 x double> @fdiv_constant_op1(<4 x double> %v) {
define <4 x double> @fdiv_constant_op1(<4 x double> nofpclass(nan) %v) {
; CHECK-LABEL: @fdiv_constant_op1(
; CHECK-NEXT: [[S:%.*]] = fdiv reassoc <4 x double> [[V:%.*]], <double undef, double 1.000000e+00, double 4.300000e+01, double 4.400000e+01>
; CHECK-NEXT: ret <4 x double> [[S]]
Expand Down
17 changes: 14 additions & 3 deletions llvm/test/Transforms/InstCombine/shuffle_select.ll
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,18 @@ define <4 x i32> @srem(<4 x i32> %v) {

; Try FP ops/types.

define <4 x float> @fadd(<4 x float> %v) {
define <4 x float> @fadd_maybe_nan(<4 x float> %v) {
; CHECK-LABEL: @fadd_maybe_nan(
; CHECK-NEXT: [[B:%.*]] = fadd <4 x float> [[V:%.*]], <float 4.100000e+01, float 4.200000e+01, float poison, float poison>
; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x float> [[B]], <4 x float> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
; CHECK-NEXT: ret <4 x float> [[S]]
;
%b = fadd <4 x float> %v, <float 41.0, float 42.0, float 43.0, float 44.0>
%s = shufflevector <4 x float> %b, <4 x float> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
ret <4 x float> %s
}

define <4 x float> @fadd(<4 x float> nofpclass(nan) %v) {
; CHECK-LABEL: @fadd(
; CHECK-NEXT: [[S:%.*]] = fadd <4 x float> [[V:%.*]], <float 4.100000e+01, float 4.200000e+01, float -0.000000e+00, float -0.000000e+00>
; CHECK-NEXT: ret <4 x float> [[S]]
Expand All @@ -359,7 +370,7 @@ define <4 x double> @fsub(<4 x double> %v) {

; Propagate any FMF.

define <4 x float> @fmul(<4 x float> %v) {
define <4 x float> @fmul(<4 x float> nofpclass(nan) %v) {
; CHECK-LABEL: @fmul(
; CHECK-NEXT: [[S:%.*]] = fmul nnan ninf <4 x float> [[V:%.*]], <float 4.100000e+01, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
; CHECK-NEXT: ret <4 x float> [[S]]
Expand All @@ -380,7 +391,7 @@ define <4 x double> @fdiv_constant_op0(<4 x double> %v) {
ret <4 x double> %s
}

define <4 x double> @fdiv_constant_op1(<4 x double> %v) {
define <4 x double> @fdiv_constant_op1(<4 x double> nofpclass(nan) %v) {
; CHECK-LABEL: @fdiv_constant_op1(
; CHECK-NEXT: [[S:%.*]] = fdiv reassoc <4 x double> [[V:%.*]], <double undef, double 1.000000e+00, double 4.300000e+01, double 4.400000e+01>
; CHECK-NEXT: ret <4 x double> [[S]]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please keep the old tests so we can see the fix actually take affect.
You can add new tests with nofpclass(nan).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added the test case fadd_maybe_nan to check that the transformation doesn't occur if the value might be a NaN. The other tests cases where I added nofpclass(nan) seems to cover the propagation of the fast-math flags in the transformation. Not sure if it is worth duplicating those test cases.

Expand Down