Skip to content

Commit 129ae51

Browse files
committed
[INSTCOMBINE] Transform reduction(shuffle V, poison, unique_mask) to reduction(V).
After SLP + LTO we may have have reduction(shuffle V, poison, mask). This can be simplified to just reduction(V) if the mask is only for single vector and just all elements from this vector are permuted, without reusing, replacing with undefs and/or other values, etc. Differential Revision: https://reviews.llvm.org/D105053
1 parent 5d933c0 commit 129ae51

File tree

2 files changed

+53
-23
lines changed

2 files changed

+53
-23
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "llvm/ADT/None.h"
2020
#include "llvm/ADT/Optional.h"
2121
#include "llvm/ADT/STLExtras.h"
22+
#include "llvm/ADT/SmallBitVector.h"
2223
#include "llvm/ADT/SmallVector.h"
2324
#include "llvm/ADT/Statistic.h"
2425
#include "llvm/ADT/Twine.h"
@@ -1983,6 +1984,46 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
19831984
replaceInstUsesWith(CI, Res);
19841985
return eraseInstFromFunction(CI);
19851986
}
1987+
LLVM_FALLTHROUGH;
1988+
}
1989+
case Intrinsic::vector_reduce_add:
1990+
case Intrinsic::vector_reduce_mul:
1991+
case Intrinsic::vector_reduce_xor:
1992+
case Intrinsic::vector_reduce_umax:
1993+
case Intrinsic::vector_reduce_umin:
1994+
case Intrinsic::vector_reduce_smax:
1995+
case Intrinsic::vector_reduce_smin:
1996+
case Intrinsic::vector_reduce_fmax:
1997+
case Intrinsic::vector_reduce_fmin:
1998+
case Intrinsic::vector_reduce_fadd:
1999+
case Intrinsic::vector_reduce_fmul: {
2000+
bool CanBeReassociated = (IID != Intrinsic::vector_reduce_fadd &&
2001+
IID != Intrinsic::vector_reduce_fmul) ||
2002+
II->hasAllowReassoc();
2003+
const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd ||
2004+
IID == Intrinsic::vector_reduce_fmul)
2005+
? 1
2006+
: 0;
2007+
Value *Arg = II->getArgOperand(ArgIdx);
2008+
Value *V;
2009+
ArrayRef<int> Mask;
2010+
if (!isa<FixedVectorType>(Arg->getType()) || !CanBeReassociated ||
2011+
!match(Arg, m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))) ||
2012+
!cast<ShuffleVectorInst>(Arg)->isSingleSource())
2013+
break;
2014+
int Sz = Mask.size();
2015+
SmallBitVector UsedIndices(Sz);
2016+
for (int Idx : Mask) {
2017+
if (Idx == UndefMaskElem || UsedIndices.test(Idx))
2018+
break;
2019+
UsedIndices.set(Idx);
2020+
}
2021+
// Can remove shuffle iff just shuffled elements, no repeats, undefs, or
2022+
// other changes.
2023+
if (UsedIndices.all()) {
2024+
replaceUse(II->getOperandUse(ArgIdx), V);
2025+
return nullptr;
2026+
}
19862027
break;
19872028
}
19882029
default: {

llvm/test/Transforms/InstCombine/reduction-shufflevector.ll

Lines changed: 12 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,7 @@ define i32 @reduce_add(<4 x i32> %x) {
1313

1414
define i32 @reduce_or(<4 x i32> %x) {
1515
; CHECK-LABEL: @reduce_or(
16-
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
17-
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[SHUF]])
16+
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[X:%.*]])
1817
; CHECK-NEXT: ret i32 [[RES]]
1918
;
2019
%shuf = shufflevector <4 x i32> poison, <4 x i32> %x, <4 x i32> <i32 7, i32 6, i32 5, i32 4>
@@ -24,8 +23,7 @@ define i32 @reduce_or(<4 x i32> %x) {
2423

2524
define i32 @reduce_and(<4 x i32> %x) {
2625
; CHECK-LABEL: @reduce_and(
27-
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
28-
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[SHUF]])
26+
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[X:%.*]])
2927
; CHECK-NEXT: ret i32 [[RES]]
3028
;
3129
%shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
@@ -35,8 +33,7 @@ define i32 @reduce_and(<4 x i32> %x) {
3533

3634
define i32 @reduce_xor(<4 x i32> %x) {
3735
; CHECK-LABEL: @reduce_xor(
38-
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
39-
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[SHUF]])
36+
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> [[X:%.*]])
4037
; CHECK-NEXT: ret i32 [[RES]]
4138
;
4239
%shuf = shufflevector <4 x i32> poison, <4 x i32> %x, <4 x i32> <i32 5, i32 6, i32 7, i32 4>
@@ -46,8 +43,7 @@ define i32 @reduce_xor(<4 x i32> %x) {
4643

4744
define i32 @reduce_umax(<4 x i32> %x) {
4845
; CHECK-LABEL: @reduce_umax(
49-
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 2, i32 1, i32 3, i32 0>
50-
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[SHUF]])
46+
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[X:%.*]])
5147
; CHECK-NEXT: ret i32 [[RES]]
5248
;
5349
%shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 2, i32 1, i32 3, i32 0>
@@ -57,8 +53,7 @@ define i32 @reduce_umax(<4 x i32> %x) {
5753

5854
define i32 @reduce_umin(<4 x i32> %x) {
5955
; CHECK-LABEL: @reduce_umin(
60-
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
61-
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[SHUF]])
56+
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[X:%.*]])
6257
; CHECK-NEXT: ret i32 [[RES]]
6358
;
6459
%shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
@@ -68,8 +63,7 @@ define i32 @reduce_umin(<4 x i32> %x) {
6863

6964
define i32 @reduce_smax(<4 x i32> %x) {
7065
; CHECK-LABEL: @reduce_smax(
71-
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
72-
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[SHUF]])
66+
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[X:%.*]])
7367
; CHECK-NEXT: ret i32 [[RES]]
7468
;
7569
%shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
@@ -79,8 +73,7 @@ define i32 @reduce_smax(<4 x i32> %x) {
7973

8074
define i32 @reduce_smin(<4 x i32> %x) {
8175
; CHECK-LABEL: @reduce_smin(
82-
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
83-
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[SHUF]])
76+
; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[X:%.*]])
8477
; CHECK-NEXT: ret i32 [[RES]]
8578
;
8679
%shuf = shufflevector <4 x i32> %x, <4 x i32> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
@@ -90,19 +83,17 @@ define i32 @reduce_smin(<4 x i32> %x) {
9083

9184
define float @reduce_fmax(<4 x float> %x) {
9285
; CHECK-LABEL: @reduce_fmax(
93-
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
94-
; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[SHUF]])
86+
; CHECK-NEXT: [[RES:%.*]] = call nnan nsz float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[X:%.*]])
9587
; CHECK-NEXT: ret float [[RES]]
9688
;
9789
%shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
98-
%res = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %shuf)
90+
%res = call nsz nnan float @llvm.vector.reduce.fmax.v4f32(<4 x float> %shuf)
9991
ret float %res
10092
}
10193

10294
define float @reduce_fmin(<4 x float> %x) {
10395
; CHECK-LABEL: @reduce_fmin(
104-
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
105-
; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[SHUF]])
96+
; CHECK-NEXT: [[RES:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[X:%.*]])
10697
; CHECK-NEXT: ret float [[RES]]
10798
;
10899
%shuf = shufflevector <4 x float> %x, <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
@@ -112,8 +103,7 @@ define float @reduce_fmin(<4 x float> %x) {
112103

113104
define float @reduce_fadd(float %a, <4 x float> %x) {
114105
; CHECK-LABEL: @reduce_fadd(
115-
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
116-
; CHECK-NEXT: [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fadd.v4f32(float [[A:%.*]], <4 x float> [[SHUF]])
106+
; CHECK-NEXT: [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fadd.v4f32(float [[A:%.*]], <4 x float> [[X:%.*]])
117107
; CHECK-NEXT: ret float [[RES]]
118108
;
119109
%shuf = shufflevector <4 x float> %x, <4 x float> %x, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
@@ -123,8 +113,7 @@ define float @reduce_fadd(float %a, <4 x float> %x) {
123113

124114
define float @reduce_fmul(float %a, <4 x float> %x) {
125115
; CHECK-LABEL: @reduce_fmul(
126-
; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 0, i32 3, i32 1, i32 2>
127-
; CHECK-NEXT: [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v4f32(float [[A:%.*]], <4 x float> [[SHUF]])
116+
; CHECK-NEXT: [[RES:%.*]] = call reassoc float @llvm.vector.reduce.fmul.v4f32(float [[A:%.*]], <4 x float> [[X:%.*]])
128117
; CHECK-NEXT: ret float [[RES]]
129118
;
130119
%shuf = shufflevector <4 x float> %x, <4 x float> zeroinitializer, <4 x i32> <i32 0, i32 3, i32 1, i32 2>

0 commit comments

Comments
 (0)