Skip to content

Commit af3295b

Browse files
[SLP]Enable splat ordering for loads
Enables splat support for loads with lanes> 2 or number of operands> 2. Allows better detect splats of loads and reduces number of shuffles in some cases. X86, AVX512, -O3+LTO Metric: size..text results results0 diff test-suite :: External/SPEC/CFP2006/433.milc/433.milc.test 154867.00 156723.00 1.2% test-suite :: External/SPEC/CFP2017rate/526.blender_r/526.blender_r.test 12467735.00 12468023.00 0.0% Better vectorization quality Reviewers: RKSimon Reviewed By: RKSimon Pull Request: #115173
1 parent 43570a2 commit af3295b

File tree

2 files changed

+17
-20
lines changed

2 files changed

+17
-20
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2386,6 +2386,9 @@ class BoUpSLP {
23862386
/// the whole vector (it is mixed with constants or loop invariant values).
23872387
/// Note: This modifies the 'IsUsed' flag, so a cleanUsed() must follow.
23882388
bool shouldBroadcast(Value *Op, unsigned OpIdx, unsigned Lane) {
2389+
// Small number of loads - try load matching.
2390+
if (isa<LoadInst>(Op) && getNumLanes() == 2 && getNumOperands() == 2)
2391+
return false;
23892392
bool OpAPO = getData(OpIdx, Lane).APO;
23902393
bool IsInvariant = L && L->isLoopInvariant(Op);
23912394
unsigned Cnt = 0;
@@ -2511,23 +2514,23 @@ class BoUpSLP {
25112514
Value *OpLane0 = getValue(OpIdx, FirstLane);
25122515
// Keep track if we have instructions with all the same opcode on one
25132516
// side.
2514-
if (isa<LoadInst>(OpLane0))
2515-
ReorderingModes[OpIdx] = ReorderingMode::Load;
2516-
else if (auto *OpILane0 = dyn_cast<Instruction>(OpLane0)) {
2517+
if (auto *OpILane0 = dyn_cast<Instruction>(OpLane0)) {
25172518
// Check if OpLane0 should be broadcast.
25182519
if (shouldBroadcast(OpLane0, OpIdx, FirstLane) ||
25192520
!canBeVectorized(OpILane0, OpIdx, FirstLane))
25202521
ReorderingModes[OpIdx] = ReorderingMode::Splat;
2522+
else if (isa<LoadInst>(OpILane0))
2523+
ReorderingModes[OpIdx] = ReorderingMode::Load;
25212524
else
25222525
ReorderingModes[OpIdx] = ReorderingMode::Opcode;
2523-
} else if (isa<Constant>(OpLane0))
2526+
} else if (isa<Constant>(OpLane0)) {
25242527
ReorderingModes[OpIdx] = ReorderingMode::Constant;
2525-
else if (isa<Argument>(OpLane0))
2528+
} else if (isa<Argument>(OpLane0)) {
25262529
// Our best hope is a Splat. It may save some cost in some cases.
25272530
ReorderingModes[OpIdx] = ReorderingMode::Splat;
2528-
else
2529-
// NOTE: This should be unreachable.
2530-
ReorderingModes[OpIdx] = ReorderingMode::Failed;
2531+
} else {
2532+
llvm_unreachable("Unexpected value kind.");
2533+
}
25312534
}
25322535

25332536
// Check that we don't have same operands. No need to reorder if operands

llvm/test/Transforms/SLPVectorizer/RISCV/loads-ordering.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,13 @@ define fastcc void @rephase(ptr %phases_in, ptr %157, i64 %158) {
88
; CHECK-NEXT: [[IND_END11:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[TMP1]]
99
; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[TMP0]], align 8
1010
; CHECK-NEXT: [[IMAG_247:%.*]] = getelementptr i8, ptr [[IND_END11]], i64 408
11-
; CHECK-NEXT: [[MUL35_248:%.*]] = fmul double [[TMP2]], 0.000000e+00
12-
; CHECK-NEXT: store double [[MUL35_248]], ptr [[IMAG_247]], align 8
13-
; CHECK-NEXT: [[ARRAYIDX23_1_249:%.*]] = getelementptr i8, ptr [[IND_END11]], i64 416
14-
; CHECK-NEXT: [[MUL_1_250:%.*]] = fmul double [[TMP2]], 0.000000e+00
15-
; CHECK-NEXT: store double [[MUL_1_250]], ptr [[ARRAYIDX23_1_249]], align 8
1611
; CHECK-NEXT: [[IMAG_1_251:%.*]] = getelementptr i8, ptr [[IND_END11]], i64 424
17-
; CHECK-NEXT: [[TMP3:%.*]] = load double, ptr [[IMAG_1_251]], align 8
18-
; CHECK-NEXT: [[MUL35_1_252:%.*]] = fmul double [[TMP2]], [[TMP3]]
19-
; CHECK-NEXT: store double [[MUL35_1_252]], ptr [[IMAG_1_251]], align 8
20-
; CHECK-NEXT: [[ARRAYIDX23_2_253:%.*]] = getelementptr i8, ptr [[IND_END11]], i64 432
21-
; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[ARRAYIDX23_2_253]], align 8
22-
; CHECK-NEXT: [[MUL_2_254:%.*]] = fmul double [[TMP2]], [[TMP4]]
23-
; CHECK-NEXT: store double [[MUL_2_254]], ptr [[ARRAYIDX23_2_253]], align 8
12+
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x double>, ptr [[IMAG_1_251]], align 8
13+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x double> poison, double [[TMP2]], i32 0
14+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> poison, <4 x i32> zeroinitializer
15+
; CHECK-NEXT: [[TMP6:%.*]] = call <4 x double> @llvm.vector.insert.v4f64.v2f64(<4 x double> <double 0.000000e+00, double 0.000000e+00, double poison, double poison>, <2 x double> [[TMP3]], i64 2)
16+
; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x double> [[TMP5]], [[TMP6]]
17+
; CHECK-NEXT: store <4 x double> [[TMP7]], ptr [[IMAG_247]], align 8
2418
; CHECK-NEXT: store double [[TMP2]], ptr [[PHASES_IN]], align 8
2519
; CHECK-NEXT: ret void
2620
;

0 commit comments

Comments
 (0)