Skip to content

Commit 7e9dd7b

Browse files
committed
[SLP] Make code execute like the comment.
If multiple similar operands is in OpsVec, the hash value will be the same. In this situation, the first insertion lane will be kept (the highest lane will be since the loop starts from the last lane). Even if the following loop starts from reverse does not work because they share the same hash value. The code is not executed like the comment.
1 parent 5ca082c commit 7e9dd7b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+262
-323
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2213,6 +2213,7 @@ class BoUpSLP {
22132213
unsigned getBestLaneToStartReordering() const {
22142214
unsigned Min = UINT_MAX;
22152215
unsigned SameOpNumber = 0;
2216+
unsigned NumLanes = getNumLanes();
22162217
// std::pair<unsigned, unsigned> is used to implement a simple voting
22172218
// algorithm and choose the lane with the least number of operands that
22182219
// can freely move about or less profitable because it already has the
@@ -2223,8 +2224,7 @@ class BoUpSLP {
22232224
// Try to be closer to the original results, if we have multiple lanes
22242225
// with same cost. If 2 lanes have the same cost, use the one with the
22252226
// lowest index.
2226-
for (int I = getNumLanes(); I > 0; --I) {
2227-
unsigned Lane = I - 1;
2227+
for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
22282228
OperandsOrderData NumFreeOpsHash =
22292229
getMaxNumOperandsThatCanBeReordered(Lane);
22302230
// Compare the number of operands that can move and choose the one with
@@ -2251,7 +2251,7 @@ class BoUpSLP {
22512251
// Select the lane with the minimum counter.
22522252
unsigned BestLane = 0;
22532253
unsigned CntMin = UINT_MAX;
2254-
for (const auto &Data : reverse(HashMap)) {
2254+
for (const auto &Data : HashMap) {
22552255
if (Data.second.first < CntMin) {
22562256
CntMin = Data.second.first;
22572257
BestLane = Data.second.second;

llvm/test/Transforms/SLPVectorizer/AArch64/loadorder.ll

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -421,27 +421,32 @@ define i32 @reduce_blockstrided4x4(ptr nocapture noundef readonly %p1, i32 nound
421421
; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR]], i64 4
422422
; CHECK-NEXT: [[ARRAYIDX5_1:%.*]] = getelementptr inbounds i8, ptr [[ADD_PTR64]], i64 4
423423
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[P1]], align 1
424-
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[P2]], align 1
424+
; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i32>
425425
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3]], align 1
426+
; CHECK-NEXT: [[TMP7:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i32>
427+
; CHECK-NEXT: [[TMP10:%.*]] = mul nuw nsw <4 x i32> [[TMP1]], [[TMP7]]
428+
; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i8>, ptr [[P2]], align 1
429+
; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i8> [[TMP5]] to <4 x i32>
426430
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5]], align 1
431+
; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i8> [[TMP3]] to <4 x i32>
432+
; CHECK-NEXT: [[TMP9:%.*]] = mul nuw nsw <4 x i32> [[TMP6]], [[TMP8]]
427433
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[ADD_PTR]], align 1
428-
; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i8>, ptr [[ADD_PTR64]], align 1
429-
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i8> [[TMP0]], <4 x i8> [[TMP4]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
430-
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
431-
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i8> [[TMP6]], <16 x i8> [[TMP7]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
432-
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
433-
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <16 x i8> [[TMP8]], <16 x i8> [[TMP9]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
434-
; CHECK-NEXT: [[TMP11:%.*]] = zext <16 x i8> [[TMP10]] to <16 x i32>
434+
; CHECK-NEXT: [[TMP11:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
435435
; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i8>, ptr [[ARRAYIDX3_1]], align 1
436+
; CHECK-NEXT: [[TMP17:%.*]] = zext <4 x i8> [[TMP12]] to <4 x i32>
437+
; CHECK-NEXT: [[TMP14:%.*]] = mul nuw nsw <4 x i32> [[TMP11]], [[TMP17]]
438+
; CHECK-NEXT: [[TMP15:%.*]] = load <4 x i8>, ptr [[ADD_PTR64]], align 1
439+
; CHECK-NEXT: [[TMP16:%.*]] = zext <4 x i8> [[TMP15]] to <4 x i32>
436440
; CHECK-NEXT: [[TMP13:%.*]] = load <4 x i8>, ptr [[ARRAYIDX5_1]], align 1
437-
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> [[TMP12]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
438-
; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
439-
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x i8> [[TMP14]], <16 x i8> [[TMP15]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 poison, i32 poison, i32 poison, i32 poison>
440-
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x i8> [[TMP13]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
441-
; CHECK-NEXT: [[TMP18:%.*]] = shufflevector <16 x i8> [[TMP16]], <16 x i8> [[TMP17]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
442-
; CHECK-NEXT: [[TMP19:%.*]] = zext <16 x i8> [[TMP18]] to <16 x i32>
443-
; CHECK-NEXT: [[TMP20:%.*]] = mul nuw nsw <16 x i32> [[TMP11]], [[TMP19]]
444-
; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP20]])
441+
; CHECK-NEXT: [[TMP18:%.*]] = zext <4 x i8> [[TMP13]] to <4 x i32>
442+
; CHECK-NEXT: [[TMP19:%.*]] = mul nuw nsw <4 x i32> [[TMP16]], [[TMP18]]
443+
; CHECK-NEXT: [[TMP20:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP10]])
444+
; CHECK-NEXT: [[TMP24:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP9]])
445+
; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 [[TMP20]], [[TMP24]]
446+
; CHECK-NEXT: [[TMP22:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP14]])
447+
; CHECK-NEXT: [[OP_RDX1:%.*]] = add i32 [[OP_RDX]], [[TMP22]]
448+
; CHECK-NEXT: [[TMP23:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP19]])
449+
; CHECK-NEXT: [[TMP21:%.*]] = add i32 [[OP_RDX1]], [[TMP23]]
445450
; CHECK-NEXT: ret i32 [[TMP21]]
446451
;
447452
entry:

llvm/test/Transforms/SLPVectorizer/AArch64/reused-scalar-repeated-in-node.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -37,28 +37,28 @@ define void @test() {
3737
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x float> [[TMP10]], float [[I69]], i32 15
3838
; CHECK-NEXT: br i1 poison, label %[[BB167:.*]], label %[[BB77:.*]]
3939
; CHECK: [[BB77]]:
40-
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <8 x i32> <i32 poison, i32 5, i32 6, i32 7, i32 14, i32 14, i32 14, i32 15>
40+
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <16 x float> [[TMP11]], <16 x float> poison, <8 x i32> <i32 poison, i32 7, i32 poison, i32 14, i32 poison, i32 6, i32 6, i32 6>
41+
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> poison, float [[I70]], i32 0
42+
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x float> [[TMP15]], float [[I69]], i32 1
4143
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 1, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison>
4244
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 poison, i32 poison>
4345
; CHECK-NEXT: br label %[[BB78:.*]]
4446
; CHECK: [[BB78]]:
45-
; CHECK-NEXT: [[TMP15:%.*]] = phi <8 x float> [ [[TMP12]], %[[BB77]] ], [ [[TMP30:%.*]], %[[BB78]] ]
46-
; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x float> [ poison, %[[BB77]] ], [ [[TMP31:%.*]], %[[BB78]] ]
47-
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 0, i32 3, i32 1, i32 2, i32 3, i32 0, i32 2, i32 3, i32 2, i32 6, i32 2, i32 3, i32 0, i32 7, i32 6, i32 6>
47+
; CHECK-NEXT: [[TMP19:%.*]] = phi <8 x float> [ [[TMP12]], %[[BB77]] ], [ [[TMP30:%.*]], %[[BB78]] ]
48+
; CHECK-NEXT: [[TMP20:%.*]] = phi <2 x float> [ [[TMP16]], %[[BB77]] ], [ [[TMP31:%.*]], %[[BB78]] ]
49+
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <8 x float> [[TMP19]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 0, i32 2, i32 4, i32 0, i32 5, i32 1, i32 5, i32 3, i32 5, i32 1, i32 0, i32 3, i32 3, i32 3>
4850
; CHECK-NEXT: [[TMP18:%.*]] = fmul fast <16 x float> [[TMP17]], [[TMP13]]
49-
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 1, i32 poison, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 6, i32 7, i32 7>
50-
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> poison, <16 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
51-
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <16 x float> [[TMP19]], <16 x float> [[TMP20]], <16 x i32> <i32 0, i32 17, i32 2, i32 16, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 12, i32 13, i32 14, i32 15>
52-
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
53-
; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <16 x float> [[TMP21]], <16 x float> [[TMP22]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 17, i32 6, i32 7, i32 8, i32 23, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
54-
; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <16 x float> [[TMP23]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 1, i32 5, i32 3, i32 1, i32 3, i32 9, i32 3, i32 1, i32 12, i32 13, i32 14, i32 15>
51+
; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <8 x float> [[TMP19]], <8 x float> poison, <16 x i32> <i32 poison, i32 4, i32 poison, i32 5, i32 1, i32 poison, i32 2, i32 4, i32 2, i32 poison, i32 2, i32 4, i32 poison, i32 poison, i32 poison, i32 poison>
52+
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <2 x float> [[TMP20]], <2 x float> poison, <16 x i32> <i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
53+
; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <16 x float> [[TMP21]], <16 x float> [[TMP22]], <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 25, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
54+
; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <16 x float> [[TMP23]], <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 0, i32 3, i32 4, i32 0, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 0, i32 9, i32 9, i32 9>
5555
; CHECK-NEXT: [[TMP25:%.*]] = call <16 x float> @llvm.vector.insert.v16f32.v2f32(<16 x float> [[TMP14]], <2 x float> [[TMP0]], i64 2)
5656
; CHECK-NEXT: [[TMP26:%.*]] = fmul fast <16 x float> [[TMP24]], [[TMP25]]
5757
; CHECK-NEXT: [[TMP27:%.*]] = fadd fast <16 x float> [[TMP26]], [[TMP18]]
5858
; CHECK-NEXT: [[TMP28:%.*]] = fadd fast <16 x float> [[TMP27]], poison
5959
; CHECK-NEXT: [[TMP29:%.*]] = fadd fast <16 x float> [[TMP28]], poison
60-
; CHECK-NEXT: [[TMP30]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <8 x i32> <i32 12, i32 5, i32 6, i32 7, i32 14, i32 14, i32 14, i32 15>
61-
; CHECK-NEXT: [[TMP31]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <2 x i32> <i32 10, i32 11>
60+
; CHECK-NEXT: [[TMP30]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <8 x i32> <i32 12, i32 7, i32 10, i32 14, i32 11, i32 6, i32 6, i32 6>
61+
; CHECK-NEXT: [[TMP31]] = shufflevector <16 x float> [[TMP29]], <16 x float> poison, <2 x i32> <i32 5, i32 15>
6262
; CHECK-NEXT: br i1 poison, label %[[BB78]], label %[[BB167]]
6363
; CHECK: [[BB167]]:
6464
; CHECK-NEXT: [[TMP32:%.*]] = phi <16 x float> [ [[TMP11]], %[[BB64]] ], [ [[TMP29]], %[[BB78]] ]

llvm/test/Transforms/SLPVectorizer/AArch64/slp-fma-loss.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ define void @slp_not_profitable_with_fast_fmf(ptr %A, ptr %B) {
1313
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_B_1]], align 4
1414
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[B_0]], i32 0
1515
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer
16-
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[TMP3]], [[TMP1]]
16+
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP3]]
1717
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
1818
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0
1919
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <2 x i32> zeroinitializer
@@ -54,7 +54,7 @@ define void @slp_not_profitable_with_reassoc_fmf(ptr %A, ptr %B) {
5454
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_B_1]], align 4
5555
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[B_0]], i32 0
5656
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer
57-
; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], [[TMP1]]
57+
; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP1]], [[TMP3]]
5858
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
5959
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0
6060
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <2 x i32> zeroinitializer
@@ -96,7 +96,7 @@ define void @slp_profitable_missing_fmf_on_fadd_fsub(ptr %A, ptr %B) {
9696
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_B_1]], align 4
9797
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[B_0]], i32 0
9898
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer
99-
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[TMP3]], [[TMP1]]
99+
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP3]]
100100
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
101101
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0
102102
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <2 x i32> zeroinitializer
@@ -138,7 +138,7 @@ define void @slp_profitable_missing_fmf_on_fmul_fadd_fsub(ptr %A, ptr %B) {
138138
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_B_1]], align 4
139139
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[B_0]], i32 0
140140
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer
141-
; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP3]], [[TMP1]]
141+
; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x float> [[TMP1]], [[TMP3]]
142142
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
143143
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0
144144
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <2 x i32> zeroinitializer
@@ -180,7 +180,7 @@ define void @slp_profitable_missing_fmf_nnans_only(ptr %A, ptr %B) {
180180
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_B_1]], align 4
181181
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[B_0]], i32 0
182182
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer
183-
; CHECK-NEXT: [[TMP4:%.*]] = fmul nnan <2 x float> [[TMP3]], [[TMP1]]
183+
; CHECK-NEXT: [[TMP4:%.*]] = fmul nnan <2 x float> [[TMP1]], [[TMP3]]
184184
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> poison, <2 x i32> <i32 1, i32 0>
185185
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> poison, float [[A_0]], i32 0
186186
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> poison, <2 x i32> zeroinitializer

0 commit comments

Comments
 (0)