Skip to content

Commit 57e96ef

Browse files
alexey-bataevronlieb
authored andcommitted
[SLP] Match poison as instruction with the same opcode
Patch allows to vector scalar instruction + poison values as if poisons are instructions with the same opcode. It allows better vectorization of the repeated values, reduces number of insertelement instructions and serves as a base ground for copyable elements vectorization AVX512, -O3 + LTO JM/ldecod - better vector code Applications/oggenc - better vectorization CINT2017speed/625.x264_s CINT2017rate/525.x264_r - better vector code CFP2017rate/526.blender_r - better vector code CFP2006/447.dealII - small variations Benchmarks/Bullet - extra vector code CFP2017rate/510.parest_r - better vectorization CINT2017rate/502.gcc_r CINT2017speed/602.gcc_s - extra vector code Benchmarks/tramp3d-v4 - small variations CFP2006/453.povray - extra vector code JM/lencod - better vector code CFP2017rate/511.povray_r - extra vector code MemFunctions/MemFunctions - extra vector code LoopVectorization/LoopVectorizationBenchmarks - extra vector code XRay/FDRMode - extra vector code XRay/ReturnReference - extra vector code LCALS/SubsetCLambdaLoops - extra vector code LCALS/SubsetCRawLoops - extra vector code LCALS/SubsetARawLoops - extra vector code LCALS/SubsetALambdaLoops - extra vector code DOE-ProxyApps-C++/miniFE - extra vector code LoopVectorization/LoopInterleavingBenchmarks - extra vector code LCALS/SubsetBLambdaLoops - extra vector code MicroBenchmarks/harris - extra vector code ImageProcessing/Dither - extra vector code MicroBenchmarks/SLPVectorization - extra vector code ImageProcessing/Blur - extra vector code ImageProcessing/Dilate - extra vector code Builtins/Int128 - extra vector code ImageProcessing/Interpolation - extra vector code ImageProcessing/BilateralFiltering - extra vector code ImageProcessing/AnisotropicDiffusion - extra vector code MicroBenchmarks/LoopInterchange - extra code vectorized LCALS/SubsetBRawLoops - extra code vectorized CINT2006/464.h264ref - extra vectorization with wider vectors CFP2017rate/508.namd_r - small variations, extra phis vectorized CFP2006/444.namd - 2 2 x phi replaced by 4 x phi DOE-ProxyApps-C/SimpleMOC - extra code vectorized CINT2017rate/541.leela_r CINT2017speed/641.leela_s - the function better vectorized and inlined Benchmarks/Misc/oourafft - 2 4 x bit reductions replaced by 2 x vector code FreeBench/fourinarow - better vectorization Reviewers: RKSimon Reviewed By: RKSimon Pull Request: llvm#115946 Change-Id: Ie96d4cd3ff69dd1efe59ff56a9a7393fed43d3aa
1 parent d9ab1a0 commit 57e96ef

File tree

8 files changed

+250
-119
lines changed

8 files changed

+250
-119
lines changed

llvm/include/llvm/IR/Instruction.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,7 @@ class Instruction : public User,
278278
bool isUnaryOp() const { return isUnaryOp(getOpcode()); }
279279
bool isBinaryOp() const { return isBinaryOp(getOpcode()); }
280280
bool isIntDivRem() const { return isIntDivRem(getOpcode()); }
281+
bool isFPDivRem() const { return isFPDivRem(getOpcode()); }
281282
bool isShift() const { return isShift(getOpcode()); }
282283
bool isCast() const { return isCast(getOpcode()); }
283284
bool isFuncletPad() const { return isFuncletPad(getOpcode()); }
@@ -304,6 +305,10 @@ class Instruction : public User,
304305
return Opcode == UDiv || Opcode == SDiv || Opcode == URem || Opcode == SRem;
305306
}
306307

308+
static inline bool isFPDivRem(unsigned Opcode) {
309+
return Opcode == FDiv || Opcode == FRem;
310+
}
311+
307312
/// Determine if the Opcode is one of the shift instructions.
308313
static inline bool isShift(unsigned Opcode) {
309314
return Opcode >= Shl && Opcode <= AShr;

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 206 additions & 70 deletions
Large diffs are not rendered by default.

llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ define void @test() {
1212
; CHECK: [[BB63]]:
1313
; CHECK-NEXT: br label %[[BB64]]
1414
; CHECK: [[BB64]]:
15-
; CHECK-NEXT: [[TMP0:%.*]] = phi <2 x float> [ poison, %[[BB61]] ], [ poison, %[[BB63]] ], [ poison, %[[BB62]] ]
15+
; CHECK-NEXT: [[TMP25:%.*]] = phi <16 x float> [ poison, %[[BB61]] ], [ poison, %[[BB63]] ], [ poison, %[[BB62]] ]
1616
; CHECK-NEXT: [[I66:%.*]] = load float, ptr poison, align 16
1717
; CHECK-NEXT: [[I67:%.*]] = load float, ptr poison, align 4
1818
; CHECK-NEXT: [[I68:%.*]] = load float, ptr poison, align 8
@@ -37,28 +37,30 @@ define void @test() {
3737
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x float> [[TMP10]], float [[I69]], i32 15
3838
; CHECK-NEXT: br i1 poison, label %[[BB167:.*]], label %[[BB77:.*]]
3939
; CHECK: [[BB77]]:
40-
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <8 x i32> <i32 poison, i32 5, i32 6, i32 7, i32 14, i32 14, i32 14, i32 15>
41-
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 1, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison>
42-
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 poison, i32 poison>
40+
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 14, i32 15, i32 poison, i32 poison>
41+
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <8 x float> [[TMP12]], float [[I70]], i32 0
42+
; CHECK-NEXT: [[TMP30:%.*]] = insertelement <2 x float> poison, float [[I68]], i32 0
43+
; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x float> [[TMP30]], float [[I66]], i32 1
44+
; CHECK-NEXT: [[TMP39:%.*]] = shufflevector <16 x float> [[TMP25]], <16 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 3, i32 2, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
45+
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <16 x float> [[TMP39]], <16 x float> [[TMP25]], <16 x i32> <i32 poison, i32 poison, i32 2, i32 3, i32 18, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 19, i32 poison, i32 poison>
4346
; CHECK-NEXT: br label %[[BB78:.*]]
4447
; CHECK: [[BB78]]:
45-
; CHECK-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP12]], %[[BB77]] ], [ [[TMP30:%.*]], %[[BB78]] ]
46-
; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x float> [ poison, %[[BB77]] ], [ [[TMP31:%.*]], %[[BB78]] ]
47-
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 0, i32 3, i32 1, i32 2, i32 3, i32 0, i32 2, i32 3, i32 2, i32 6, i32 2, i32 3, i32 0, i32 7, i32 6, i32 6>
48-
; CHECK-NEXT: [[TMP18:%.*]] = fmul fast <16 x float> [[TMP17]], [[TMP13]]
49-
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 1, i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 6, i32 7, i32 7>
48+
; CHECK-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP17]], %[[BB77]] ], [ [[TMP36:%.*]], %[[BB78]] ]
49+
; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x float> [ [[TMP31]], %[[BB77]] ], [ [[TMP37:%.*]], %[[BB78]] ]
50+
; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 1, i32 0, i32 3, i32 1, i32 3, i32 5, i32 3, i32 1, i32 0, i32 4, i32 5, i32 5>
51+
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 2, i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 2, i32 5, i32 4, i32 4>
5052
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
5153
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <16 x float> [[TMP19]], <16 x float> [[TMP20]], <16 x i32> <i32 0, i32 17, i32 2, i32 16, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 12, i32 13, i32 14, i32 15>
5254
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
53-
; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <16 x float> [[TMP21]], <16 x float> [[TMP22]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 17, i32 6, i32 7, i32 8, i32 23, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
55+
; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <16 x float> [[TMP21]], <16 x float> [[TMP22]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 18, i32 6, i32 7, i32 8, i32 20, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
5456
; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <16 x float> [[TMP23]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 1, i32 5, i32 3, i32 1, i32 3, i32 9, i32 3, i32 1, i32 12, i32 13, i32 14, i32 15>
55-
; CHECK-NEXT: [[TMP25:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v2f32(<16 x float> [[TMP14]], <2 x float> [[TMP0]], i64 2)
56-
; CHECK-NEXT: [[TMP26:%.*]] = fmul fast <16 x float> [[TMP24]], [[TMP25]]
57+
; CHECK-NEXT: [[TMP18:%.*]] = fmul fast <16 x float> [[TMP24]], [[TMP13]]
58+
; CHECK-NEXT: [[TMP26:%.*]] = fmul fast <16 x float> [[TMP38]], [[TMP25]]
5759
; CHECK-NEXT: [[TMP27:%.*]] = fadd fast <16 x float> [[TMP26]], [[TMP18]]
5860
; CHECK-NEXT: [[TMP28:%.*]] = fadd fast <16 x float> [[TMP27]], poison
5961
; CHECK-NEXT: [[TMP29:%.*]] = fadd fast <16 x float> [[TMP28]], poison
60-
; CHECK-NEXT: [[TMP30]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <8 x i32> <i32 12, i32 5, i32 6, i32 7, i32 14, i32 14, i32 14, i32 15>
61-
; CHECK-NEXT: [[TMP31]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <2 x i32> <i32 10, i32 11>
62+
; CHECK-NEXT: [[TMP36]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <8 x i32> <i32 5, i32 11, i32 12, i32 10, i32 14, i32 15, i32 poison, i32 poison>
63+
; CHECK-NEXT: [[TMP37]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <2 x i32> <i32 6, i32 7>
6264
; CHECK-NEXT: br i1 poison, label %[[BB78]], label %[[BB167]]
6365
; CHECK: [[BB167]]:
6466
; CHECK-NEXT: [[TMP32:%.*]] = phi <16 x float> [ [[TMP11]], %[[BB64]] ], [ [[TMP29]], %[[BB78]] ]

llvm/test/Transforms/SLPVectorizer/AArch64/vectorize-free-extracts-inserts.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -307,14 +307,14 @@ define void @noop_extracts_9_lanes(ptr %ptr.1, ptr %ptr.2) {
307307
; CHECK-NEXT: [[V1_LANE_5:%.*]] = extractelement <9 x double> [[V_1]], i32 5
308308
; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16
309309
; CHECK-NEXT: [[V2_LANE_0:%.*]] = extractelement <4 x double> [[V_2]], i32 0
310-
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 0, i32 1>
311310
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> <i32 0, i32 2, i32 1, i32 0, i32 2, i32 0, i32 2, i32 1>
311+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> <i32 2, i32 1, i32 0, i32 2, i32 1, i32 0, i32 2, i32 1>
312+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 0, i32 1>
312313
; CHECK-NEXT: [[TMP2:%.*]] = fmul <8 x double> [[TMP0]], [[TMP1]]
313314
; CHECK-NEXT: [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_0]]
314315
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison>
315316
; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP3]], double [[A_LANE_8]], i32 8
316317
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> <i32 6, i32 7, i32 8, i32 0, i32 1, i32 2, i32 3, i32 4>
317-
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> <i32 2, i32 1, i32 0, i32 2, i32 1, i32 0, i32 2, i32 1>
318318
; CHECK-NEXT: [[TMP6:%.*]] = fmul <8 x double> [[TMP4]], [[TMP5]]
319319
; CHECK-NEXT: [[B_LANE_8:%.*]] = fmul double [[V1_LANE_5]], [[V2_LANE_0]]
320320
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison>
@@ -395,10 +395,10 @@ define void @first_mul_chain_jumbled(ptr %ptr.1, ptr %ptr.2) {
395395
; CHECK-NEXT: [[V1_LANE_5:%.*]] = extractelement <9 x double> [[V_1]], i32 5
396396
; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16
397397
; CHECK-NEXT: [[V2_LANE_0:%.*]] = extractelement <4 x double> [[V_2]], i32 0
398-
; CHECK-NEXT: [[V2_LANE_1:%.*]] = extractelement <4 x double> [[V_2]], i32 1
399-
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> <i32 4, i32 3, i32 6, i32 5, i32 8, i32 7, i32 1, i32 0>
400398
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 2, i32 1, i32 0, i32 2>
399+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> <i32 4, i32 3, i32 6, i32 5, i32 8, i32 7, i32 1, i32 0>
401400
; CHECK-NEXT: [[TMP2:%.*]] = fmul <8 x double> [[TMP0]], [[TMP1]]
401+
; CHECK-NEXT: [[V2_LANE_1:%.*]] = extractelement <4 x double> [[V_2]], i32 1
402402
; CHECK-NEXT: [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_1]]
403403
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison>
404404
; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP3]], double [[A_LANE_8]], i32 8
@@ -483,15 +483,15 @@ define void @first_and_second_mul_chain_jumbled(ptr %ptr.1, ptr %ptr.2) {
483483
; CHECK-NEXT: [[V1_LANE_4:%.*]] = extractelement <9 x double> [[V_1]], i32 4
484484
; CHECK-NEXT: [[V_2:%.*]] = load <4 x double>, ptr [[PTR_2:%.*]], align 16
485485
; CHECK-NEXT: [[V2_LANE_0:%.*]] = extractelement <4 x double> [[V_2]], i32 0
486+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> <i32 0, i32 2, i32 1, i32 2, i32 1, i32 0, i32 2, i32 1>
487+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> <i32 2, i32 1, i32 0, i32 2, i32 0, i32 2, i32 1, i32 0>
486488
; CHECK-NEXT: [[V2_LANE_2:%.*]] = extractelement <4 x double> [[V_2]], i32 2
487489
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> <i32 4, i32 3, i32 5, i32 6, i32 8, i32 7, i32 1, i32 0>
488-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> <i32 0, i32 2, i32 1, i32 2, i32 1, i32 0, i32 2, i32 1>
489490
; CHECK-NEXT: [[TMP2:%.*]] = fmul <8 x double> [[TMP0]], [[TMP1]]
490491
; CHECK-NEXT: [[A_LANE_8:%.*]] = fmul double [[V1_LANE_2]], [[V2_LANE_0]]
491492
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison>
492493
; CHECK-NEXT: [[A_INS_8:%.*]] = insertelement <9 x double> [[TMP3]], double [[A_LANE_8]], i32 8
493494
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <9 x double> [[V_1]], <9 x double> poison, <8 x i32> <i32 7, i32 6, i32 8, i32 1, i32 0, i32 3, i32 2, i32 5>
494-
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[V_2]], <4 x double> poison, <8 x i32> <i32 2, i32 1, i32 0, i32 2, i32 0, i32 2, i32 1, i32 0>
495495
; CHECK-NEXT: [[TMP6:%.*]] = fmul <8 x double> [[TMP4]], [[TMP5]]
496496
; CHECK-NEXT: [[B_LANE_8:%.*]] = fmul double [[V1_LANE_4]], [[V2_LANE_2]]
497497
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x double> [[TMP6]], <8 x double> poison, <9 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison>

llvm/test/Transforms/SLPVectorizer/X86/landing_pad.ll

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,16 +33,15 @@ define void @foo() personality ptr @bar {
3333
; CHECK-NEXT: br i1 poison, label [[BB7]], label [[BB6]]
3434
; CHECK: bb9:
3535
; CHECK-NEXT: [[INDVARS_IV528799:%.*]] = phi i64 [ poison, [[BB10]] ], [ poison, [[BB12]] ]
36-
; CHECK-NEXT: [[TMP6:%.*]] = phi <2 x i32> [ [[TMP8:%.*]], [[BB10]] ], [ [[TMP9:%.*]], [[BB12]] ]
37-
; CHECK-NEXT: [[TMP7]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 1>
36+
; CHECK-NEXT: [[TMP7]] = phi <4 x i32> [ [[TMP9:%.*]], [[BB10]] ], [ [[TMP11:%.*]], [[BB12]] ]
3837
; CHECK-NEXT: br label [[BB2]]
3938
; CHECK: bb10:
4039
; CHECK-NEXT: [[LOCAL_10_38123_LCSSA:%.*]] = phi i32 [ [[TMP10]], [[BB3]] ]
4140
; CHECK-NEXT: [[LOCAL_5_33118_LCSSA:%.*]] = phi i32 [ [[TMP4]], [[BB3]] ]
4241
; CHECK-NEXT: [[LANDING_PAD68:%.*]] = landingpad { ptr, i32 }
4342
; CHECK-NEXT: cleanup
44-
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> poison, i32 [[LOCAL_10_38123_LCSSA]], i32 0
45-
; CHECK-NEXT: [[TMP8]] = insertelement <2 x i32> [[TMP12]], i32 [[LOCAL_5_33118_LCSSA]], i32 1
43+
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> poison, i32 [[LOCAL_10_38123_LCSSA]], i32 2
44+
; CHECK-NEXT: [[TMP9]] = insertelement <4 x i32> [[TMP8]], i32 [[LOCAL_5_33118_LCSSA]], i32 3
4645
; CHECK-NEXT: br label [[BB9]]
4746
; CHECK: bb11:
4847
; CHECK-NEXT: ret void
@@ -51,8 +50,8 @@ define void @foo() personality ptr @bar {
5150
; CHECK-NEXT: [[LOCAL_5_84111_LCSSA:%.*]] = phi i32 [ [[LOCAL_5_84111]], [[BB7]] ]
5251
; CHECK-NEXT: [[LANDING_PAD149:%.*]] = landingpad { ptr, i32 }
5352
; CHECK-NEXT: cleanup
54-
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> poison, i32 [[LOCAL_10_89113_LCSSA]], i32 0
55-
; CHECK-NEXT: [[TMP9]] = insertelement <2 x i32> [[TMP11]], i32 [[LOCAL_5_84111_LCSSA]], i32 1
53+
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> poison, i32 [[LOCAL_10_89113_LCSSA]], i32 2
54+
; CHECK-NEXT: [[TMP11]] = insertelement <4 x i32> [[TMP12]], i32 [[LOCAL_5_84111_LCSSA]], i32 3
5655
; CHECK-NEXT: br label [[BB9]]
5756
;
5857
bb1:

llvm/test/Transforms/SLPVectorizer/X86/non-power-2-num-elems-reused.ll

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,11 @@ define i64 @test() {
55
; CHECK-LABEL: define i64 @test() {
66
; CHECK-NEXT: [[ENTRY:.*:]]
77
; CHECK-NEXT: [[OR54_I_I_6:%.*]] = or i32 0, 0
8-
; CHECK-NEXT: [[CONV193_1_I_6:%.*]] = zext i32 [[OR54_I_I_6]] to i64
9-
; CHECK-NEXT: [[CONV193_I_7:%.*]] = zext i32 0 to i64
10-
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vector.reduce.or.v8i64(<8 x i64> zeroinitializer)
11-
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vector.reduce.or.v4i64(<4 x i64> zeroinitializer)
12-
; CHECK-NEXT: [[OP_RDX:%.*]] = or i64 [[TMP0]], [[TMP1]]
13-
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i64> poison, i64 [[OP_RDX]], i32 0
14-
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[CONV193_I_7]], i32 1
15-
; CHECK-NEXT: [[TMP7:%.*]] = or <2 x i64> [[TMP3]], zeroinitializer
16-
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP7]], i32 0
17-
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP7]], i32 1
18-
; CHECK-NEXT: [[OP_RDX3:%.*]] = or i64 [[TMP5]], [[TMP6]]
19-
; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[OP_RDX3]], [[CONV193_1_I_6]]
8+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i32> poison, i32 [[OR54_I_I_6]], i32 8
9+
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i32> @llvm.vector.insert.v16i32.v8i32(<16 x i32> [[TMP0]], <8 x i32> zeroinitializer, i64 0)
10+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> poison, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 7, i32 7, i32 8>
11+
; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i32> [[TMP2]] to <16 x i64>
12+
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> [[TMP3]])
2013
; CHECK-NEXT: ret i64 [[TMP4]]
2114
;
2215
entry:

llvm/test/Transforms/SLPVectorizer/X86/non-power-of-2-subvectors-insert.ll

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,13 @@
44
define void @test() {
55
; CHECK-LABEL: define void @test() {
66
; CHECK-NEXT: [[XOR108_I_I_I:%.*]] = xor i64 0, 1
7-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i64> <i64 0, i64 poison>, i64 [[XOR108_I_I_I]], i32 1
8-
; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], zeroinitializer
9-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 1>
10-
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i64> [[TMP3]], <16 x i64> <i64 0, i64 undef, i64 undef, i64 undef, i64 0, i64 undef, i64 undef, i64 undef, i64 0, i64 undef, i64 undef, i64 undef, i64 0, i64 undef, i64 undef, i64 undef>, <16 x i32> <i32 0, i32 1, i32 16, i32 3, i32 4, i32 5, i32 20, i32 7, i32 8, i32 9, i32 24, i32 11, i32 12, i32 13, i32 28, i32 15>
11-
; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i64> @llvm.vector.insert.v16i64.v2i64(<16 x i64> [[TMP4]], <2 x i64> zeroinitializer, i64 0)
12-
; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i64> @llvm.vector.insert.v16i64.v2i64(<16 x i64> [[TMP5]], <2 x i64> zeroinitializer, i64 4)
13-
; CHECK-NEXT: [[TMP13:%.*]] = call <16 x i64> @llvm.vector.insert.v16i64.v2i64(<16 x i64> [[TMP6]], <2 x i64> zeroinitializer, i64 8)
14-
; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i64> @llvm.vector.insert.v16i64.v2i64(<16 x i64> [[TMP13]], <2 x i64> [[TMP2]], i64 12)
7+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> <i64 0, i64 0, i64 poison, i64 0>, i64 [[XOR108_I_I_I]], i32 2
8+
; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i64> [[TMP1]], zeroinitializer
9+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i64> poison, i64 [[XOR108_I_I_I]], i32 3
10+
; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i64> @llvm.vector.insert.v16i64.v8i64(<16 x i64> poison, <8 x i64> zeroinitializer, i64 0)
11+
; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i64> @llvm.vector.insert.v16i64.v4i64(<16 x i64> [[TMP4]], <4 x i64> [[TMP2]], i64 8)
12+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <16 x i64> [[TMP5]], <16 x i64> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison, i32 poison>
13+
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <16 x i64> [[TMP6]], <16 x i64> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 3, i32 7, i32 8, i32 9, i32 3, i32 10, i32 11, i32 12, i32 3>
1514
; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i64> [[TMP7]] to <16 x i1>
1615
; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i1> [[TMP8]], zeroinitializer
1716
; CHECK-NEXT: [[TMP10:%.*]] = freeze <16 x i1> [[TMP9]]

revert_patches.txt

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,6 @@ breaks build of hipRuntime
1717
51ad2901ca54 [Clang] Improve diagnostic on `[[nodiscard]]` attribute (#112521)
1818
Ron, Maneesh
1919
---
20-
Revert: breaks build of rocRAND: assert SLP
21-
b8703369daf7 [SLP] Match poison as instruction with the same opcode
22-
---
2320
Revert: breaks build of rocFFT
2421
854d7301f989 [Clang/AMDGPU] Zero sized arrays not allowed in HIP device code. (#113470)
2522
Ron and Vigneshwar

0 commit comments

Comments
 (0)