Skip to content

Commit b81d5e0

Browse files
authored
[InstCombine] Fold shuffles through all trivially vectorizable intrinsics (#141979)
This addresses a TODO in foldShuffledIntrinsicOperands to use isTriviallyVectorizable instead of a hardcoded list of intrinsics, which in turn allows more intriniscs to be scalarized by VectorCombine. From what I can tell every intrinsic here should be speculatable so an assertion was added. Because this enables intrinsics like abs which have a scalar operand, we need to also check isVectorIntrinsicWithScalarOpAtArg.
1 parent 1072196 commit b81d5e0

File tree

11 files changed

+175
-98
lines changed

11 files changed

+175
-98
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 30 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1400,42 +1400,46 @@ static Instruction *factorizeMinMaxTree(IntrinsicInst *II) {
14001400
/// try to shuffle after the intrinsic.
14011401
Instruction *
14021402
InstCombinerImpl::foldShuffledIntrinsicOperands(IntrinsicInst *II) {
1403-
// TODO: This should be extended to handle other intrinsics like fshl, ctpop,
1404-
// etc. Use llvm::isTriviallyVectorizable() and related to determine
1405-
// which intrinsics are safe to shuffle?
1406-
switch (II->getIntrinsicID()) {
1407-
case Intrinsic::smax:
1408-
case Intrinsic::smin:
1409-
case Intrinsic::umax:
1410-
case Intrinsic::umin:
1411-
case Intrinsic::fma:
1412-
case Intrinsic::fshl:
1413-
case Intrinsic::fshr:
1414-
break;
1415-
default:
1403+
if (!isTriviallyVectorizable(II->getIntrinsicID()) ||
1404+
!II->getCalledFunction()->isSpeculatable())
1405+
return nullptr;
1406+
1407+
// fabs is canonicalized to fabs (shuffle ...) in foldShuffleOfUnaryOps, so
1408+
// avoid undoing it.
1409+
if (match(II, m_FAbs(m_Value())))
14161410
return nullptr;
1417-
}
14181411

14191412
Value *X;
14201413
Constant *C;
14211414
ArrayRef<int> Mask;
1422-
auto *NonConstArg = find_if_not(II->args(), IsaPred<Constant>);
1415+
auto *NonConstArg = find_if_not(II->args(), [&II](Use &Arg) {
1416+
return isa<Constant>(Arg.get()) ||
1417+
isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1418+
Arg.getOperandNo(), nullptr);
1419+
});
14231420
if (!NonConstArg ||
14241421
!match(NonConstArg, m_Shuffle(m_Value(X), m_Poison(), m_Mask(Mask))))
14251422
return nullptr;
14261423

1427-
// At least 1 operand must have 1 use because we are creating 2 instructions.
1428-
if (none_of(II->args(), [](Value *V) { return V->hasOneUse(); }))
1424+
// At least 1 operand must be a shuffle with 1 use because we are creating 2
1425+
// instructions.
1426+
if (none_of(II->args(), [](Value *V) {
1427+
return isa<ShuffleVectorInst>(V) && V->hasOneUse();
1428+
}))
14291429
return nullptr;
14301430

14311431
// See if all arguments are shuffled with the same mask.
14321432
SmallVector<Value *, 4> NewArgs;
14331433
Type *SrcTy = X->getType();
1434-
for (Value *Arg : II->args()) {
1435-
if (match(Arg, m_Shuffle(m_Value(X), m_Poison(), m_SpecificMask(Mask))) &&
1436-
X->getType() == SrcTy)
1434+
for (Use &Arg : II->args()) {
1435+
if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1436+
Arg.getOperandNo(), nullptr))
1437+
NewArgs.push_back(Arg);
1438+
else if (match(&Arg,
1439+
m_Shuffle(m_Value(X), m_Poison(), m_SpecificMask(Mask))) &&
1440+
X->getType() == SrcTy)
14371441
NewArgs.push_back(X);
1438-
else if (match(Arg, m_ImmConstant(C))) {
1442+
else if (match(&Arg, m_ImmConstant(C))) {
14391443
// If it's a constant, try find the constant that would be shuffled to C.
14401444
if (Constant *ShuffledC =
14411445
unshuffleConstant(Mask, C, cast<VectorType>(SrcTy)))
@@ -1448,8 +1452,12 @@ InstCombinerImpl::foldShuffledIntrinsicOperands(IntrinsicInst *II) {
14481452

14491453
// intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
14501454
Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
1455+
// Result type might be a different vector width.
1456+
// TODO: Check that the result type isn't widened?
1457+
VectorType *ResTy =
1458+
VectorType::get(II->getType()->getScalarType(), cast<VectorType>(SrcTy));
14511459
Value *NewIntrinsic =
1452-
Builder.CreateIntrinsic(II->getIntrinsicID(), SrcTy, NewArgs, FPI);
1460+
Builder.CreateIntrinsic(ResTy, II->getIntrinsicID(), NewArgs, FPI);
14531461
return new ShuffleVectorInst(NewIntrinsic, Mask);
14541462
}
14551463

llvm/test/Transforms/InstCombine/abs-1.ll

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -978,3 +978,14 @@ define i32 @abs_diff_signed_slt_no_nsw_swap(i32 %a, i32 %b) {
978978
%cond = select i1 %cmp, i32 %sub_ba, i32 %sub_ab
979979
ret i32 %cond
980980
}
981+
982+
define <2 x i32> @abs_unary_shuffle_ops(<2 x i32> %x) {
983+
; CHECK-LABEL: @abs_unary_shuffle_ops(
984+
; CHECK-NEXT: [[R2:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[R1:%.*]], i1 false)
985+
; CHECK-NEXT: [[R:%.*]] = shufflevector <2 x i32> [[R2]], <2 x i32> poison, <2 x i32> <i32 1, i32 0>
986+
; CHECK-NEXT: ret <2 x i32> [[R]]
987+
;
988+
%a = shufflevector <2 x i32> %x, <2 x i32> poison, <2 x i32> <i32 1, i32 0>
989+
%r = call <2 x i32> @llvm.abs(<2 x i32> %a, i1 false)
990+
ret <2 x i32> %r
991+
}

llvm/test/Transforms/InstCombine/fma.ll

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -972,6 +972,19 @@ define <2 x half> @fma_negone_vec_partial_undef(<2 x half> %x, <2 x half> %y) {
972972
ret <2 x half> %sub
973973
}
974974

975+
define <2 x float> @fmuladd_unary_shuffle_ops(<2 x float> %x, <2 x float> %y, <2 x float> %z) {
976+
; CHECK-LABEL: @fmuladd_unary_shuffle_ops(
977+
; CHECK-NEXT: [[R:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[A:%.*]], <2 x float> [[B:%.*]], <2 x float> [[C:%.*]])
978+
; CHECK-NEXT: [[R1:%.*]] = shufflevector <2 x float> [[R]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
979+
; CHECK-NEXT: ret <2 x float> [[R1]]
980+
;
981+
%a = shufflevector <2 x float> %x, <2 x float> poison, <2 x i32> <i32 1, i32 0>
982+
%b = shufflevector <2 x float> %y, <2 x float> poison, <2 x i32> <i32 1, i32 0>
983+
%c = shufflevector <2 x float> %z, <2 x float> poison, <2 x i32> <i32 1, i32 0>
984+
%r = call <2 x float> @llvm.fmuladd(<2 x float> %a, <2 x float> %b, <2 x float> %c)
985+
ret <2 x float> %r
986+
}
987+
975988
; negative tests
976989

977990
define half @fma_non_negone(half %x, half %y) {

llvm/test/Transforms/InstCombine/minmax-intrinsics.ll

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2511,6 +2511,21 @@ define <3 x i8> @smin_unary_shuffle_ops_uses(<3 x i8> %x, <3 x i8> %y) {
25112511
ret <3 x i8> %r
25122512
}
25132513

2514+
; negative test - too many uses
2515+
2516+
define <3 x i8> @smin_unary_shuffle_ops_uses_const(<3 x i8> %x, <3 x i8> %y) {
2517+
; CHECK-LABEL: @smin_unary_shuffle_ops_uses_const(
2518+
; CHECK-NEXT: [[SX:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 2>
2519+
; CHECK-NEXT: call void @use_vec(<3 x i8> [[SX]])
2520+
; CHECK-NEXT: [[R:%.*]] = call <3 x i8> @llvm.smin.v3i8(<3 x i8> [[SX]], <3 x i8> <i8 1, i8 2, i8 3>)
2521+
; CHECK-NEXT: ret <3 x i8> [[R]]
2522+
;
2523+
%sx = shufflevector <3 x i8> %x, <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 2>
2524+
call void @use_vec(<3 x i8> %sx)
2525+
%r = call <3 x i8> @llvm.smin.v3i8(<3 x i8> %sx, <3 x i8> <i8 1, i8 2, i8 3>)
2526+
ret <3 x i8> %r
2527+
}
2528+
25142529
; This would assert/crash because we tried to zext to i1.
25152530

25162531
@g = external dso_local global i32, align 4

llvm/test/Transforms/InstCombine/powi.ll

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -564,3 +564,29 @@ define double @powi_fmul_powi_x_overflow(double noundef %x) {
564564
%mul = fmul reassoc double %p1, %x
565565
ret double %mul
566566
}
567+
568+
define <3 x float> @powi_unary_shuffle_ops(<3 x float> %x, i32 %power) {
569+
; CHECK-LABEL: @powi_unary_shuffle_ops(
570+
; CHECK-NEXT: [[TMP1:%.*]] = call <3 x float> @llvm.powi.v3f32.i32(<3 x float> [[X:%.*]], i32 [[POWER:%.*]])
571+
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x float> [[TMP1]], <3 x float> poison, <3 x i32> <i32 1, i32 0, i32 2>
572+
; CHECK-NEXT: ret <3 x float> [[R]]
573+
;
574+
%sx = shufflevector <3 x float> %x, <3 x float> poison, <3 x i32> <i32 1, i32 0, i32 2>
575+
%r = call <3 x float> @llvm.powi(<3 x float> %sx, i32 %power)
576+
ret <3 x float> %r
577+
}
578+
579+
; Negative test - multiple uses
580+
581+
define <3 x float> @powi_unary_shuffle_ops_use(<3 x float> %x, i32 %power, ptr %p) {
582+
; CHECK-LABEL: @powi_unary_shuffle_ops_use(
583+
; CHECK-NEXT: [[SX:%.*]] = shufflevector <3 x float> [[X:%.*]], <3 x float> poison, <3 x i32> <i32 1, i32 0, i32 2>
584+
; CHECK-NEXT: store <3 x float> [[SX]], ptr [[P:%.*]], align 16
585+
; CHECK-NEXT: [[R:%.*]] = call <3 x float> @llvm.powi.v3f32.i32(<3 x float> [[SX]], i32 [[POWER:%.*]])
586+
; CHECK-NEXT: ret <3 x float> [[R]]
587+
;
588+
%sx = shufflevector <3 x float> %x, <3 x float> poison, <3 x i32> <i32 1, i32 0, i32 2>
589+
store <3 x float> %sx, ptr %p
590+
%r = call <3 x float> @llvm.powi(<3 x float> %sx, i32 %power)
591+
ret <3 x float> %r
592+
}

llvm/test/Transforms/InstCombine/scmp.ll

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -423,6 +423,19 @@ define i8 @scmp_from_select_eq_and_gt_commuted3(i32 %x, i32 %y) {
423423
ret i8 %r
424424
}
425425

426+
define <3 x i2> @scmp_unary_shuffle_ops(<3 x i8> %x, <3 x i8> %y) {
427+
; CHECK-LABEL: define <3 x i2> @scmp_unary_shuffle_ops(
428+
; CHECK-SAME: <3 x i8> [[X:%.*]], <3 x i8> [[Y:%.*]]) {
429+
; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i2> @llvm.scmp.v3i2.v3i8(<3 x i8> [[X]], <3 x i8> [[Y]])
430+
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i2> [[TMP1]], <3 x i2> poison, <3 x i32> <i32 1, i32 0, i32 2>
431+
; CHECK-NEXT: ret <3 x i2> [[R]]
432+
;
433+
%sx = shufflevector <3 x i8> %x, <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 2>
434+
%sy = shufflevector <3 x i8> %y, <3 x i8> poison, <3 x i32> <i32 1, i32 0, i32 2>
435+
%r = call <3 x i2> @llvm.scmp(<3 x i8> %sx, <3 x i8> %sy)
436+
ret <3 x i2> %r
437+
}
438+
426439
; Negative test: true value of outer select is not zero
427440
define i8 @scmp_from_select_eq_and_gt_neg1(i32 %x, i32 %y) {
428441
; CHECK-LABEL: define i8 @scmp_from_select_eq_and_gt_neg1(

llvm/test/Transforms/InstCombine/sqrt.ll

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,17 @@ define <2 x float> @sqrt_exp_vec(<2 x float> %x) {
201201
ret <2 x float> %res
202202
}
203203

204+
define <2 x float> @sqrt_unary_shuffle_ops(<2 x float> %x) {
205+
; CHECK-LABEL: @sqrt_unary_shuffle_ops(
206+
; CHECK-NEXT: [[R:%.*]] = call <2 x float> @llvm.sqrt.v2f32(<2 x float> [[A:%.*]])
207+
; CHECK-NEXT: [[R1:%.*]] = shufflevector <2 x float> [[R]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
208+
; CHECK-NEXT: ret <2 x float> [[R1]]
209+
;
210+
%a = shufflevector <2 x float> %x, <2 x float> poison, <2 x i32> <i32 1, i32 0>
211+
%r = call <2 x float> @llvm.sqrt(<2 x float> %a)
212+
ret <2 x float> %r
213+
}
214+
204215
declare i32 @foo(double)
205216
declare double @sqrt(double) readnone
206217
declare float @sqrtf(float)

llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -264,23 +264,17 @@ define <3 x i16> @uadd_sat_v3i16(<3 x i16> %arg0, <3 x i16> %arg1) {
264264
; GFX8-NEXT: bb:
265265
; GFX8-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 2
266266
; GFX8-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 2
267-
; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <2 x i32> <i32 0, i32 1>
268-
; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> poison, <2 x i32> <i32 0, i32 1>
269-
; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
267+
; GFX8-NEXT: [[TMP3:%.*]] = call <3 x i16> @llvm.uadd.sat.v3i16(<3 x i16> [[ARG0]], <3 x i16> [[ARG1]])
270268
; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
271-
; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> <i32 0, i32 1, i32 poison>
272269
; GFX8-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[TMP3]], i16 [[ADD_2]], i64 2
273270
; GFX8-NEXT: ret <3 x i16> [[INS_2]]
274271
;
275272
; GFX9-LABEL: @uadd_sat_v3i16(
276273
; GFX9-NEXT: bb:
277274
; GFX9-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 2
278275
; GFX9-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 2
279-
; GFX9-NEXT: [[TMP0:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <2 x i32> <i32 0, i32 1>
280-
; GFX9-NEXT: [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> poison, <2 x i32> <i32 0, i32 1>
281-
; GFX9-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
276+
; GFX9-NEXT: [[TMP3:%.*]] = call <3 x i16> @llvm.uadd.sat.v3i16(<3 x i16> [[ARG0]], <3 x i16> [[ARG1]])
282277
; GFX9-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
283-
; GFX9-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> <i32 0, i32 1, i32 poison>
284278
; GFX9-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[TMP3]], i16 [[ADD_2]], i64 2
285279
; GFX9-NEXT: ret <3 x i16> [[INS_2]]
286280
;
@@ -323,24 +317,20 @@ define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) {
323317
;
324318
; GFX8-LABEL: @uadd_sat_v4i16(
325319
; GFX8-NEXT: bb:
326-
; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
327-
; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
328-
; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
329-
; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
320+
; GFX8-NEXT: [[TMP0:%.*]] = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> [[ARG0:%.*]], <4 x i16> [[ARG2:%.*]])
321+
; GFX8-NEXT: [[ARG1:%.*]] = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> [[ARG0]], <4 x i16> [[ARG2]])
330322
; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
331-
; GFX8-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]])
332-
; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
323+
; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP4]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
324+
; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
333325
; GFX8-NEXT: ret <4 x i16> [[INS_31]]
334326
;
335327
; GFX9-LABEL: @uadd_sat_v4i16(
336328
; GFX9-NEXT: bb:
337-
; GFX9-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
338-
; GFX9-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
339-
; GFX9-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
340-
; GFX9-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
329+
; GFX9-NEXT: [[TMP0:%.*]] = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> [[ARG0:%.*]], <4 x i16> [[ARG2:%.*]])
330+
; GFX9-NEXT: [[ARG1:%.*]] = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> [[ARG0]], <4 x i16> [[ARG2]])
341331
; GFX9-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
342-
; GFX9-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]])
343-
; GFX9-NEXT: [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
332+
; GFX9-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP4]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
333+
; GFX9-NEXT: [[INS_31:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
344334
; GFX9-NEXT: ret <4 x i16> [[INS_31]]
345335
;
346336
bb:

llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -264,23 +264,17 @@ define <3 x i16> @uadd_sat_v3i16(<3 x i16> %arg0, <3 x i16> %arg1) {
264264
; GFX8-NEXT: bb:
265265
; GFX8-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 2
266266
; GFX8-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 2
267-
; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <2 x i32> <i32 0, i32 1>
268-
; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> poison, <2 x i32> <i32 0, i32 1>
269-
; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
267+
; GFX8-NEXT: [[TMP3:%.*]] = call <3 x i16> @llvm.uadd.sat.v3i16(<3 x i16> [[ARG0]], <3 x i16> [[ARG1]])
270268
; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
271-
; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> <i32 0, i32 1, i32 poison>
272269
; GFX8-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[TMP3]], i16 [[ADD_2]], i64 2
273270
; GFX8-NEXT: ret <3 x i16> [[INS_2]]
274271
;
275272
; GFX9-LABEL: @uadd_sat_v3i16(
276273
; GFX9-NEXT: bb:
277274
; GFX9-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 2
278275
; GFX9-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 2
279-
; GFX9-NEXT: [[TMP0:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> poison, <2 x i32> <i32 0, i32 1>
280-
; GFX9-NEXT: [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> poison, <2 x i32> <i32 0, i32 1>
281-
; GFX9-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
276+
; GFX9-NEXT: [[TMP3:%.*]] = call <3 x i16> @llvm.uadd.sat.v3i16(<3 x i16> [[ARG0]], <3 x i16> [[ARG1]])
282277
; GFX9-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
283-
; GFX9-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> <i32 0, i32 1, i32 poison>
284278
; GFX9-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[TMP3]], i16 [[ADD_2]], i64 2
285279
; GFX9-NEXT: ret <3 x i16> [[INS_2]]
286280
;
@@ -323,24 +317,20 @@ define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) {
323317
;
324318
; GFX8-LABEL: @uadd_sat_v4i16(
325319
; GFX8-NEXT: bb:
326-
; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
327-
; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
328-
; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
329-
; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
320+
; GFX8-NEXT: [[TMP0:%.*]] = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> [[ARG0:%.*]], <4 x i16> [[ARG2:%.*]])
321+
; GFX8-NEXT: [[ARG1:%.*]] = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> [[ARG0]], <4 x i16> [[ARG2]])
330322
; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
331-
; GFX8-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]])
332-
; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
323+
; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP4]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
324+
; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
333325
; GFX8-NEXT: ret <4 x i16> [[INS_31]]
334326
;
335327
; GFX9-LABEL: @uadd_sat_v4i16(
336328
; GFX9-NEXT: bb:
337-
; GFX9-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
338-
; GFX9-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
339-
; GFX9-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
340-
; GFX9-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
329+
; GFX9-NEXT: [[TMP0:%.*]] = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> [[ARG0:%.*]], <4 x i16> [[ARG2:%.*]])
330+
; GFX9-NEXT: [[ARG1:%.*]] = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> [[ARG0]], <4 x i16> [[ARG2]])
341331
; GFX9-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
342-
; GFX9-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]])
343-
; GFX9-NEXT: [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
332+
; GFX9-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP4]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
333+
; GFX9-NEXT: [[INS_31:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
344334
; GFX9-NEXT: ret <4 x i16> [[INS_31]]
345335
;
346336
bb:

0 commit comments

Comments
 (0)