Skip to content

Commit e4f663b

Browse files
committed
\[SLP] Sort PHIs by ExtractElements when relevant
Change-Id: I62668ebc355c7746ec5ef69249573fe87e1343cc
1 parent 9b066f0 commit e4f663b

File tree

4 files changed

+187
-12
lines changed

4 files changed

+187
-12
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22687,8 +22687,41 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
2268722687
if (NodeI1 != NodeI2)
2268822688
return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn();
2268922689
InstructionsState S = getSameOpcode({I1, I2}, *TLI);
22690-
if (S && !S.isAltShuffle())
22690+
if (S && !S.isAltShuffle()) {
22691+
if (!isa<ExtractElementInst>(I1) || !isa<ExtractElementInst>(I2))
22692+
continue;
22693+
22694+
auto E1 = cast<ExtractElementInst>(I1);
22695+
auto E2 = cast<ExtractElementInst>(I2);
22696+
// Sort on ExtractElementInsts primarily by vector operands. Prefer
22697+
// program order of the vector operands
22698+
if (E1->getVectorOperand() != E2->getVectorOperand()) {
22699+
Instruction *V1 = dyn_cast<Instruction>(E1->getVectorOperand());
22700+
Instruction *V2 = dyn_cast<Instruction>(E2->getVectorOperand());
22701+
if (!V1 || !V2)
22702+
continue;
22703+
if (V1->getParent() != V2->getParent())
22704+
continue;
22705+
return V1->comesBefore(V2);
22706+
}
22707+
// If we have the same vector operand, try to sort by constant index
22708+
auto Id1 = E1->getIndexOperand();
22709+
auto Id2 = E2->getIndexOperand();
22710+
// Bring constants to the top
22711+
if (isa<ConstantInt>(Id1) && !isa<ConstantInt>(Id2))
22712+
return true;
22713+
if (!isa<ConstantInt>(Id1) && isa<ConstantInt>(Id2))
22714+
return false;
22715+
if (isa<ConstantInt>(Id1) && isa<ConstantInt>(Id2)) {
22716+
auto C1 = cast<ConstantInt>(Id1);
22717+
auto C2 = cast<ConstantInt>(Id2);
22718+
// First elements first
22719+
return C1->getValue().getZExtValue() <
22720+
C2->getValue().getZExtValue();
22721+
}
22722+
2269122723
continue;
22724+
}
2269222725
return I1->getOpcode() < I2->getOpcode();
2269322726
}
2269422727
if (I1)
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes=slp-vectorizer -S | FileCheck %s --check-prefix=GFX9
3+
4+
define protected amdgpu_kernel void @myfun(i32 %in, ptr addrspace(1) %aptr1, ptr addrspace(1) %bptr1, ptr addrspace(1) %aptr2, ptr addrspace(1) %bptr2) {
5+
; GFX9-LABEL: define protected amdgpu_kernel void @myfun(
6+
; GFX9-SAME: i32 [[IN:%.*]], ptr addrspace(1) [[APTR1:%.*]], ptr addrspace(1) [[BPTR1:%.*]], ptr addrspace(1) [[APTR2:%.*]], ptr addrspace(1) [[BPTR2:%.*]]) #[[ATTR0:[0-9]+]] {
7+
; GFX9-NEXT: [[ENTRY:.*]]:
8+
; GFX9-NEXT: [[VEC1:%.*]] = load <8 x i16>, ptr addrspace(1) [[APTR1]], align 16
9+
; GFX9-NEXT: [[BVEC1:%.*]] = load <8 x i16>, ptr addrspace(1) [[BPTR1]], align 16
10+
; GFX9-NEXT: [[TMP0:%.*]] = shufflevector <8 x i16> [[VEC1]], <8 x i16> poison, <2 x i32> <i32 0, i32 1>
11+
; GFX9-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[VEC1]], <8 x i16> poison, <2 x i32> <i32 2, i32 3>
12+
; GFX9-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[VEC1]], <8 x i16> poison, <2 x i32> <i32 4, i32 5>
13+
; GFX9-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[VEC1]], <8 x i16> poison, <2 x i32> <i32 6, i32 7>
14+
; GFX9-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[BVEC1]], <8 x i16> poison, <2 x i32> <i32 0, i32 1>
15+
; GFX9-NEXT: [[TMP5:%.*]] = shufflevector <8 x i16> [[BVEC1]], <8 x i16> poison, <2 x i32> <i32 2, i32 3>
16+
; GFX9-NEXT: [[TMP6:%.*]] = shufflevector <8 x i16> [[BVEC1]], <8 x i16> poison, <2 x i32> <i32 4, i32 5>
17+
; GFX9-NEXT: [[TMP7:%.*]] = shufflevector <8 x i16> [[BVEC1]], <8 x i16> poison, <2 x i32> <i32 6, i32 7>
18+
; GFX9-NEXT: br label %[[DO_BODY:.*]]
19+
; GFX9: [[DO_BODY]]:
20+
; GFX9-NEXT: [[ADD:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[NEWADD:%.*]], %[[DO_BODY]] ]
21+
; GFX9-NEXT: [[TMP8:%.*]] = phi <2 x i16> [ [[TMP0]], %[[ENTRY]] ], [ [[TMP30:%.*]], %[[DO_BODY]] ]
22+
; GFX9-NEXT: [[TMP9:%.*]] = phi <2 x i16> [ [[TMP1]], %[[ENTRY]] ], [ [[TMP31:%.*]], %[[DO_BODY]] ]
23+
; GFX9-NEXT: [[TMP10:%.*]] = phi <2 x i16> [ [[TMP2]], %[[ENTRY]] ], [ [[TMP32:%.*]], %[[DO_BODY]] ]
24+
; GFX9-NEXT: [[TMP11:%.*]] = phi <2 x i16> [ [[TMP3]], %[[ENTRY]] ], [ [[TMP33:%.*]], %[[DO_BODY]] ]
25+
; GFX9-NEXT: [[TMP12:%.*]] = phi <2 x i16> [ [[TMP4]], %[[ENTRY]] ], [ [[TMP34:%.*]], %[[DO_BODY]] ]
26+
; GFX9-NEXT: [[TMP13:%.*]] = phi <2 x i16> [ [[TMP5]], %[[ENTRY]] ], [ [[TMP35:%.*]], %[[DO_BODY]] ]
27+
; GFX9-NEXT: [[TMP14:%.*]] = phi <2 x i16> [ [[TMP6]], %[[ENTRY]] ], [ [[TMP36:%.*]], %[[DO_BODY]] ]
28+
; GFX9-NEXT: [[TMP15:%.*]] = phi <2 x i16> [ [[TMP7]], %[[ENTRY]] ], [ [[TMP37:%.*]], %[[DO_BODY]] ]
29+
; GFX9-NEXT: [[TMP16:%.*]] = shufflevector <2 x i16> [[TMP8]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
30+
; GFX9-NEXT: [[TMP17:%.*]] = shufflevector <2 x i16> [[TMP9]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
31+
; GFX9-NEXT: [[TMP18:%.*]] = shufflevector <2 x i16> [[TMP8]], <2 x i16> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
32+
; GFX9-NEXT: [[TMP19:%.*]] = shufflevector <2 x i16> [[TMP10]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
33+
; GFX9-NEXT: [[TMP20:%.*]] = shufflevector <8 x i16> [[TMP18]], <8 x i16> [[TMP19]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 poison, i32 poison>
34+
; GFX9-NEXT: [[TMP21:%.*]] = shufflevector <2 x i16> [[TMP11]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
35+
; GFX9-NEXT: [[TMP22:%.*]] = shufflevector <8 x i16> [[TMP20]], <8 x i16> [[TMP21]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
36+
; GFX9-NEXT: [[TMP23:%.*]] = shufflevector <2 x i16> [[TMP12]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
37+
; GFX9-NEXT: [[TMP24:%.*]] = shufflevector <2 x i16> [[TMP13]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
38+
; GFX9-NEXT: [[TMP25:%.*]] = shufflevector <2 x i16> [[TMP12]], <2 x i16> [[TMP13]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
39+
; GFX9-NEXT: [[TMP26:%.*]] = shufflevector <2 x i16> [[TMP14]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
40+
; GFX9-NEXT: [[TMP27:%.*]] = shufflevector <8 x i16> [[TMP25]], <8 x i16> [[TMP26]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 poison, i32 poison>
41+
; GFX9-NEXT: [[TMP28:%.*]] = shufflevector <2 x i16> [[TMP15]], <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
42+
; GFX9-NEXT: [[TMP29:%.*]] = shufflevector <8 x i16> [[TMP27]], <8 x i16> [[TMP28]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
43+
; GFX9-NEXT: [[RES:%.*]] = add <8 x i16> [[TMP22]], [[TMP29]]
44+
; GFX9-NEXT: [[VEC2:%.*]] = load <8 x i16>, ptr addrspace(1) [[APTR2]], align 16
45+
; GFX9-NEXT: [[BVEC2:%.*]] = load <8 x i16>, ptr addrspace(1) [[BPTR2]], align 16
46+
; GFX9-NEXT: [[NEWADD]] = add i32 [[ADD]], 1
47+
; GFX9-NEXT: [[COND:%.*]] = icmp sgt i32 [[NEWADD]], [[IN]]
48+
; GFX9-NEXT: [[TMP30]] = shufflevector <8 x i16> [[VEC2]], <8 x i16> poison, <2 x i32> <i32 0, i32 1>
49+
; GFX9-NEXT: [[TMP31]] = shufflevector <8 x i16> [[VEC2]], <8 x i16> poison, <2 x i32> <i32 2, i32 3>
50+
; GFX9-NEXT: [[TMP32]] = shufflevector <8 x i16> [[VEC2]], <8 x i16> poison, <2 x i32> <i32 4, i32 5>
51+
; GFX9-NEXT: [[TMP33]] = shufflevector <8 x i16> [[VEC2]], <8 x i16> poison, <2 x i32> <i32 6, i32 7>
52+
; GFX9-NEXT: [[TMP34]] = shufflevector <8 x i16> [[BVEC2]], <8 x i16> poison, <2 x i32> <i32 0, i32 1>
53+
; GFX9-NEXT: [[TMP35]] = shufflevector <8 x i16> [[BVEC2]], <8 x i16> poison, <2 x i32> <i32 2, i32 3>
54+
; GFX9-NEXT: [[TMP36]] = shufflevector <8 x i16> [[BVEC2]], <8 x i16> poison, <2 x i32> <i32 4, i32 5>
55+
; GFX9-NEXT: [[TMP37]] = shufflevector <8 x i16> [[BVEC2]], <8 x i16> poison, <2 x i32> <i32 6, i32 7>
56+
; GFX9-NEXT: br i1 [[COND]], label %[[DO_BODY]], label %[[END:.*]]
57+
; GFX9: [[END]]:
58+
; GFX9-NEXT: ret void
59+
;
60+
entry:
61+
%vec1 = load <8 x i16>, ptr addrspace(1) %aptr1
62+
%el0 = extractelement <8 x i16> %vec1, i64 0
63+
%el1 = extractelement <8 x i16> %vec1, i64 1
64+
%el2 = extractelement <8 x i16> %vec1, i64 2
65+
%el3 = extractelement <8 x i16> %vec1, i64 3
66+
%el4 = extractelement <8 x i16> %vec1, i64 4
67+
%el5 = extractelement <8 x i16> %vec1, i64 5
68+
%el6 = extractelement <8 x i16> %vec1, i64 6
69+
%el7 = extractelement <8 x i16> %vec1, i64 7
70+
%bvec1 = load <8 x i16>, ptr addrspace(1) %bptr1
71+
%bel0 = extractelement <8 x i16> %bvec1, i64 0
72+
%bel1 = extractelement <8 x i16> %bvec1, i64 1
73+
%bel2 = extractelement <8 x i16> %bvec1, i64 2
74+
%bel3 = extractelement <8 x i16> %bvec1, i64 3
75+
%bel4 = extractelement <8 x i16> %bvec1, i64 4
76+
%bel5 = extractelement <8 x i16> %bvec1, i64 5
77+
%bel6 = extractelement <8 x i16> %bvec1, i64 6
78+
%bel7 = extractelement <8 x i16> %bvec1, i64 7
79+
br label %do.body
80+
81+
do.body:
82+
%a_thread_buf1 = phi i16 [%el1, %entry], [%newel1, %do.body]
83+
%a_thread_buf2 = phi i16 [%el2, %entry], [%newel2, %do.body]
84+
%a_thread_buf3 = phi i16 [%el3, %entry], [%newel3, %do.body]
85+
%a_thread_buf4 = phi i16 [%el4, %entry], [%newel4, %do.body]
86+
%a_thread_buf5 = phi i16 [%el5, %entry], [%newel5, %do.body]
87+
%a_thread_buf6 = phi i16 [%el6, %entry], [%newel6, %do.body]
88+
%a_thread_buf7 = phi i16 [%el7, %entry], [%newel7, %do.body]
89+
%b_thread_buf1 = phi i16 [%bel1, %entry], [%bnewel1, %do.body]
90+
%b_thread_buf2 = phi i16 [%bel2, %entry], [%bnewel2, %do.body]
91+
%b_thread_buf3 = phi i16 [%bel3, %entry], [%bnewel3, %do.body]
92+
%b_thread_buf4 = phi i16 [%bel4, %entry], [%bnewel4, %do.body]
93+
%b_thread_buf5 = phi i16 [%bel5, %entry], [%bnewel5, %do.body]
94+
%b_thread_buf6 = phi i16 [%bel6, %entry], [%bnewel6, %do.body]
95+
%b_thread_buf7 = phi i16 [%bel7, %entry], [%bnewel7, %do.body]
96+
%add = phi i32 [0, %entry], [%newadd, %do.body]
97+
%a_thread_buf0 = phi i16 [%el0, %entry], [%newel0, %do.body]
98+
%b_thread_buf0 = phi i16 [%bel0, %entry], [%bnewel0, %do.body]
99+
%a_thread_vec0 = insertelement <8 x i16> poison, i16 %a_thread_buf0, i64 0
100+
%a_thread_vec1 = insertelement <8 x i16> %a_thread_vec0, i16 %a_thread_buf1, i64 1
101+
%a_thread_vec2 = insertelement <8 x i16> %a_thread_vec1, i16 %a_thread_buf2, i64 2
102+
%a_thread_vec3 = insertelement <8 x i16> %a_thread_vec2, i16 %a_thread_buf3, i64 3
103+
%a_thread_vec4 = insertelement <8 x i16> %a_thread_vec3, i16 %a_thread_buf4, i64 4
104+
%a_thread_vec5 = insertelement <8 x i16> %a_thread_vec4, i16 %a_thread_buf5, i64 5
105+
%a_thread_vec6 = insertelement <8 x i16> %a_thread_vec5, i16 %a_thread_buf6, i64 6
106+
%a_thread_vec7 = insertelement <8 x i16> %a_thread_vec6, i16 %a_thread_buf7, i64 7
107+
%b_thread_vec0 = insertelement <8 x i16> poison, i16 %b_thread_buf0, i64 0
108+
%b_thread_vec1 = insertelement <8 x i16> %b_thread_vec0, i16 %b_thread_buf1, i64 1
109+
%b_thread_vec2 = insertelement <8 x i16> %b_thread_vec1, i16 %b_thread_buf2, i64 2
110+
%b_thread_vec3 = insertelement <8 x i16> %b_thread_vec2, i16 %b_thread_buf3, i64 3
111+
%b_thread_vec4 = insertelement <8 x i16> %b_thread_vec3, i16 %b_thread_buf4, i64 4
112+
%b_thread_vec5 = insertelement <8 x i16> %b_thread_vec4, i16 %b_thread_buf5, i64 5
113+
%b_thread_vec6 = insertelement <8 x i16> %b_thread_vec5, i16 %b_thread_buf6, i64 6
114+
%b_thread_vec7 = insertelement <8 x i16> %b_thread_vec6, i16 %b_thread_buf7, i64 7
115+
%res = add <8 x i16> %a_thread_vec7, %b_thread_vec7
116+
%vec2 = load <8 x i16>, ptr addrspace(1) %aptr2
117+
%newel0 = extractelement <8 x i16> %vec2, i64 0
118+
%newel1 = extractelement <8 x i16> %vec2, i64 1
119+
%newel2 = extractelement <8 x i16> %vec2, i64 2
120+
%newel3 = extractelement <8 x i16> %vec2, i64 3
121+
%newel4 = extractelement <8 x i16> %vec2, i64 4
122+
%newel5 = extractelement <8 x i16> %vec2, i64 5
123+
%newel6 = extractelement <8 x i16> %vec2, i64 6
124+
%newel7 = extractelement <8 x i16> %vec2, i64 7
125+
%bvec2 = load <8 x i16>, ptr addrspace(1) %bptr2
126+
%bnewel0 = extractelement <8 x i16> %bvec2, i64 0
127+
%bnewel1 = extractelement <8 x i16> %bvec2, i64 1
128+
%bnewel2 = extractelement <8 x i16> %bvec2, i64 2
129+
%bnewel3 = extractelement <8 x i16> %bvec2, i64 3
130+
%bnewel4 = extractelement <8 x i16> %bvec2, i64 4
131+
%bnewel5 = extractelement <8 x i16> %bvec2, i64 5
132+
%bnewel6 = extractelement <8 x i16> %bvec2, i64 6
133+
%bnewel7 = extractelement <8 x i16> %bvec2, i64 7
134+
%newadd = add i32 %add, 1
135+
%cond = icmp sgt i32 %newadd, %in
136+
br i1 %cond, label %do.body, label %end
137+
138+
end:
139+
ret void
140+
}
141+
142+

llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -49,19 +49,19 @@ bb1:
4949
define <4 x half> @phis_reverse(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) {
5050
; CHECK-LABEL: @phis_reverse(
5151
; CHECK-NEXT: entry:
52-
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[IN1:%.*]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
53-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
52+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN1:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
53+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
5454
; CHECK-NEXT: br i1 [[CMP1:%.*]], label [[BB1:%.*]], label [[BB0:%.*]]
5555
; CHECK: bb0:
56-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x half> [[IN2:%.*]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
57-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
56+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x half> [[IN2:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
57+
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
5858
; CHECK-NEXT: br label [[BB1]]
5959
; CHECK: bb1:
60-
; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2]], [[BB0]] ]
61-
; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x half> [ [[TMP1]], [[ENTRY]] ], [ [[TMP3]], [[BB0]] ]
60+
; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x half> [ [[TMP1]], [[ENTRY:%.*]] ], [ [[TMP3]], [[BB0]] ]
61+
; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x half> [ [[TMP2]], [[ENTRY]] ], [ [[TMP9]], [[BB0]] ]
6262
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x half> [[TMP5]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
6363
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x half> [[TMP4]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
64-
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x half> [[TMP6]], <4 x half> [[TMP7]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
64+
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x half> [[TMP5]], <2 x half> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6565
; CHECK-NEXT: ret <4 x half> [[TMP8]]
6666
;
6767
entry:

llvm/test/Transforms/SLPVectorizer/RISCV/revec.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ define ptr @test4() {
141141
; POWEROF2-NEXT: [[TMP1:%.*]] = fadd <8 x float> zeroinitializer, zeroinitializer
142142
; POWEROF2-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> <i32 1, i32 2>
143143
; POWEROF2-NEXT: [[TMP3:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> <i32 5, i32 6>
144-
; POWEROF2-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> <i32 4, i32 0>
144+
; POWEROF2-NEXT: [[TMP4:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <2 x i32> <i32 0, i32 4>
145145
; POWEROF2-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> poison, <2 x float> [[TMP2]], i64 0)
146146
; POWEROF2-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.vector.insert.v4f32.v2f32(<4 x float> [[TMP5]], <2 x float> [[TMP3]], i64 2)
147147
; POWEROF2-NEXT: br label [[TMP8:%.*]]
@@ -156,10 +156,10 @@ define ptr @test4() {
156156
; POWEROF2-NEXT: [[TMP13:%.*]] = fmul <2 x float> [[TMP12]], zeroinitializer
157157
; POWEROF2-NEXT: [[TMP14:%.*]] = call <2 x float> @llvm.vector.extract.v2f32.v4f32(<4 x float> [[TMP10]], i64 2)
158158
; POWEROF2-NEXT: [[TMP15:%.*]] = fmul <2 x float> zeroinitializer, [[TMP14]]
159-
; POWEROF2-NEXT: [[TMP16:%.*]] = extractelement <2 x float> [[TMP9]], i32 1
160-
; POWEROF2-NEXT: [[TMP17:%.*]] = fmul float 0.000000e+00, [[TMP16]]
161159
; POWEROF2-NEXT: [[TMP18:%.*]] = extractelement <2 x float> [[TMP9]], i32 0
162-
; POWEROF2-NEXT: [[TMP19:%.*]] = fmul float [[TMP18]], 0.000000e+00
160+
; POWEROF2-NEXT: [[TMP17:%.*]] = fmul float 0.000000e+00, [[TMP18]]
161+
; POWEROF2-NEXT: [[TMP30:%.*]] = extractelement <2 x float> [[TMP9]], i32 1
162+
; POWEROF2-NEXT: [[TMP19:%.*]] = fmul float [[TMP30]], 0.000000e+00
163163
; POWEROF2-NEXT: [[TMP20:%.*]] = extractelement <2 x float> [[TMP13]], i32 0
164164
; POWEROF2-NEXT: [[TMP21:%.*]] = fadd reassoc nsz float [[TMP20]], [[TMP17]]
165165
; POWEROF2-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[TMP15]], i32 0

0 commit comments

Comments
 (0)