Skip to content

Commit 4c47e2b

Browse files
committed
[InstCombine] Fold shuffles through all trivially vectorizable intrinsics
This addresses a TODO in foldShuffledIntrinsicOperands to use isTriviallyVectorizable instead of a hardcoded list of intrinsics, which in turn allows more intriniscs to be scalarized by VectorCombine. From what I can tell every intrinsic here should be speculatable so an assertion was added. Because this enables intrinsics like abs which have a scalar operand, we need to also check isVectorIntrinsicWithScalarOpAtArg.
1 parent 46d23a7 commit 4c47e2b

File tree

4 files changed

+31
-30
lines changed

4 files changed

+31
-30
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1401,26 +1401,25 @@ static Instruction *factorizeMinMaxTree(IntrinsicInst *II) {
14011401
/// try to shuffle after the intrinsic.
14021402
Instruction *
14031403
InstCombinerImpl::foldShuffledIntrinsicOperands(IntrinsicInst *II) {
1404-
// TODO: This should be extended to handle other intrinsics like fshl, ctpop,
1405-
// etc. Use llvm::isTriviallyVectorizable() and related to determine
1406-
// which intrinsics are safe to shuffle?
1407-
switch (II->getIntrinsicID()) {
1408-
case Intrinsic::smax:
1409-
case Intrinsic::smin:
1410-
case Intrinsic::umax:
1411-
case Intrinsic::umin:
1412-
case Intrinsic::fma:
1413-
case Intrinsic::fshl:
1414-
case Intrinsic::fshr:
1415-
break;
1416-
default:
1404+
if (!isTriviallyVectorizable(II->getIntrinsicID()))
1405+
return nullptr;
1406+
1407+
assert(isSafeToSpeculativelyExecute(II) &&
1408+
"Trivially vectorizable but not safe to speculatively execute?");
1409+
1410+
// fabs is canonicalized to fabs (shuffle ...) in foldShuffleOfUnaryOps, so
1411+
// avoid undoing it.
1412+
if (match(II, m_FAbs(m_Value())))
14171413
return nullptr;
1418-
}
14191414

14201415
Value *X;
14211416
Constant *C;
14221417
ArrayRef<int> Mask;
1423-
auto *NonConstArg = find_if_not(II->args(), IsaPred<Constant>);
1418+
auto *NonConstArg = find_if_not(II->args(), [&II](Use &Arg) {
1419+
return isa<Constant>(Arg.get()) ||
1420+
isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1421+
Arg.getOperandNo(), nullptr);
1422+
});
14241423
if (!NonConstArg ||
14251424
!match(NonConstArg, m_Shuffle(m_Value(X), m_Poison(), m_Mask(Mask))))
14261425
return nullptr;
@@ -1432,11 +1431,15 @@ InstCombinerImpl::foldShuffledIntrinsicOperands(IntrinsicInst *II) {
14321431
// See if all arguments are shuffled with the same mask.
14331432
SmallVector<Value *, 4> NewArgs;
14341433
Type *SrcTy = X->getType();
1435-
for (Value *Arg : II->args()) {
1436-
if (match(Arg, m_Shuffle(m_Value(X), m_Poison(), m_SpecificMask(Mask))) &&
1437-
X->getType() == SrcTy)
1434+
for (Use &Arg : II->args()) {
1435+
if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1436+
Arg.getOperandNo(), nullptr))
1437+
NewArgs.push_back(Arg);
1438+
else if (match(&Arg,
1439+
m_Shuffle(m_Value(X), m_Poison(), m_SpecificMask(Mask))) &&
1440+
X->getType() == SrcTy)
14381441
NewArgs.push_back(X);
1439-
else if (match(Arg, m_ImmConstant(C))) {
1442+
else if (match(&Arg, m_ImmConstant(C))) {
14401443
// If it's a constant, try find the constant that would be shuffled to C.
14411444
if (Constant *ShuffledC =
14421445
unshuffleConstant(Mask, C, cast<VectorType>(SrcTy)))

llvm/test/Transforms/InstCombine/abs-1.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -981,9 +981,9 @@ define i32 @abs_diff_signed_slt_no_nsw_swap(i32 %a, i32 %b) {
981981

982982
define <2 x i32> @abs_unary_shuffle_ops(<2 x i32> %x) {
983983
; CHECK-LABEL: @abs_unary_shuffle_ops(
984-
; CHECK-NEXT: [[R1:%.*]] = shufflevector <2 x i32> [[R:%.*]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
985-
; CHECK-NEXT: [[R2:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[R1]], i1 false)
986-
; CHECK-NEXT: ret <2 x i32> [[R2]]
984+
; CHECK-NEXT: [[R2:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[R1:%.*]], i1 false)
985+
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[R2]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
986+
; CHECK-NEXT: ret <2 x i32> [[R]]
987987
;
988988
%a = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
989989
%r = call <2 x i32> @llvm.abs(<2 x i32> %a, i1 false)

llvm/test/Transforms/InstCombine/fma.ll

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -974,11 +974,9 @@ define <2 x half> @fma_negone_vec_partial_undef(<2 x half> %x, <2 x half> %y) {
974974

975975
define <2 x float> @fmuladd_unary_shuffle_ops(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
976976
; CHECK-LABEL: @fmuladd_unary_shuffle_ops(
977-
; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
978-
; CHECK-NEXT: [[B:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
979-
; CHECK-NEXT: [[C:%.*]] = shufflevector <2 x float> [[Z:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
980-
; CHECK-NEXT: [[R:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[A]], <2 x float> [[B]], <2 x float> [[C]])
981-
; CHECK-NEXT: ret <2 x float> [[R]]
977+
; CHECK-NEXT: [[R:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[A:%.*]], <2 x float> [[B:%.*]], <2 x float> [[C:%.*]])
978+
; CHECK-NEXT: [[R1:%.*]] = shufflevector <2 x float> [[R]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
979+
; CHECK-NEXT: ret <2 x float> [[R1]]
982980
;
983981
%a = shufflevector <2 x float> %x, <2 x float> poison, <2 x i32> <i32 1, i32 0>
984982
%b = shufflevector <2 x float> %y, <2 x float> poison, <2 x i32> <i32 1, i32 0>

llvm/test/Transforms/InstCombine/sqrt.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -203,9 +203,9 @@ define <2 x float> @sqrt_exp_vec(<2 x float> %x) {
203203

204204
define <2 x float> @sqrt_unary_shuffle_ops(<2 x float> %x) {
205205
; CHECK-LABEL: @sqrt_unary_shuffle_ops(
206-
; CHECK-NEXT: [[A:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
207-
; CHECK-NEXT: [[R:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[A]])
208-
; CHECK-NEXT: ret <2 x float> [[R]]
206+
; CHECK-NEXT: [[R:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[A:%.*]])
207+
; CHECK-NEXT: [[R1:%.*]] = shufflevector <2 x float> [[R]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
208+
; CHECK-NEXT: ret <2 x float> [[R1]]
209209
;
210210
%a = shufflevector <2 x float> %x, <2 x float> poison, <2 x i32> <i32 1, i32 0>
211211
%r = call <2 x float> @llvm.sqrt(<2 x float> %a)

0 commit comments

Comments
 (0)