@@ -123,35 +123,36 @@ define <32 x i1> @fv32(ptr %p, i64 %index, i64 %tc) {
123
123
define <64 x i1 > @fv64 (ptr %p , i64 %index , i64 %tc ) {
124
124
; CHECK-LABEL: fv64:
125
125
; CHECK: # %bb.0:
126
+ ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
126
127
; CHECK-NEXT: lui a0, %hi(.LCPI9_0)
127
128
; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_0)
128
- ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
129
129
; CHECK-NEXT: vle8.v v8, (a0)
130
+ ; CHECK-NEXT: vid.v v16
131
+ ; CHECK-NEXT: vsaddu.vx v16, v16, a1
132
+ ; CHECK-NEXT: vmsltu.vx v0, v16, a2
133
+ ; CHECK-NEXT: vsext.vf8 v16, v8
134
+ ; CHECK-NEXT: vsaddu.vx v8, v16, a1
135
+ ; CHECK-NEXT: vmsltu.vx v16, v8, a2
136
+ ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
137
+ ; CHECK-NEXT: vslideup.vi v0, v16, 2
130
138
; CHECK-NEXT: lui a0, %hi(.LCPI9_1)
131
139
; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_1)
132
- ; CHECK-NEXT: vle8.v v9, (a0)
133
- ; CHECK-NEXT: vsext.vf8 v16, v8
134
- ; CHECK-NEXT: vsaddu.vx v16, v16, a1
135
- ; CHECK-NEXT: vmsltu.vx v8, v16, a2
136
- ; CHECK-NEXT: vsext.vf8 v16, v9
137
- ; CHECK-NEXT: vsaddu.vx v16, v16, a1
138
- ; CHECK-NEXT: vmsltu.vx v9, v16, a2
139
- ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
140
- ; CHECK-NEXT: vslideup.vi v9, v8, 2
141
140
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
141
+ ; CHECK-NEXT: vle8.v v8, (a0)
142
+ ; CHECK-NEXT: vsext.vf8 v16, v8
143
+ ; CHECK-NEXT: vsaddu.vx v8, v16, a1
144
+ ; CHECK-NEXT: vmsltu.vx v16, v8, a2
145
+ ; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
146
+ ; CHECK-NEXT: vslideup.vi v0, v16, 4
142
147
; CHECK-NEXT: lui a0, %hi(.LCPI9_2)
143
148
; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_2)
149
+ ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
144
150
; CHECK-NEXT: vle8.v v8, (a0)
145
- ; CHECK-NEXT: vid.v v16
146
- ; CHECK-NEXT: vsaddu.vx v16, v16, a1
147
- ; CHECK-NEXT: vmsltu.vx v0, v16, a2
148
151
; CHECK-NEXT: vsext.vf8 v16, v8
149
- ; CHECK-NEXT: vsaddu.vx v16, v16, a1
150
- ; CHECK-NEXT: vmsltu.vx v8, v16, a2
151
- ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
152
- ; CHECK-NEXT: vslideup.vi v0, v8, 2
152
+ ; CHECK-NEXT: vsaddu.vx v8, v16, a1
153
+ ; CHECK-NEXT: vmsltu.vx v16, v8, a2
153
154
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
154
- ; CHECK-NEXT: vslideup.vi v0, v9, 4
155
+ ; CHECK-NEXT: vslideup.vi v0, v16, 6
155
156
; CHECK-NEXT: ret
156
157
%mask = call <64 x i1 > @llvm.get.active.lane.mask.v64i1.i64 (i64 %index , i64 %tc )
157
158
ret <64 x i1 > %mask
@@ -169,60 +170,62 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) {
169
170
; CHECK-NEXT: vle8.v v9, (a0)
170
171
; CHECK-NEXT: vsext.vf8 v16, v8
171
172
; CHECK-NEXT: vsaddu.vx v16, v16, a1
172
- ; CHECK-NEXT: vmsltu.vx v8 , v16, a2
173
+ ; CHECK-NEXT: vmsltu.vx v10 , v16, a2
173
174
; CHECK-NEXT: vsext.vf8 v16, v9
174
175
; CHECK-NEXT: vsaddu.vx v16, v16, a1
175
- ; CHECK-NEXT: vmsltu.vx v9 , v16, a2
176
- ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta , ma
177
- ; CHECK-NEXT: vslideup.vi v9, v8 , 2
176
+ ; CHECK-NEXT: vmsltu.vx v8 , v16, a2
177
+ ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu , ma
178
+ ; CHECK-NEXT: vslideup.vi v8, v10 , 2
178
179
; CHECK-NEXT: lui a0, %hi(.LCPI10_2)
179
180
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_2)
180
181
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
181
- ; CHECK-NEXT: vle8.v v8, (a0)
182
+ ; CHECK-NEXT: vle8.v v9, (a0)
183
+ ; CHECK-NEXT: vsext.vf8 v16, v9
184
+ ; CHECK-NEXT: vsaddu.vx v16, v16, a1
185
+ ; CHECK-NEXT: vmsltu.vx v9, v16, a2
186
+ ; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
187
+ ; CHECK-NEXT: vslideup.vi v8, v9, 4
182
188
; CHECK-NEXT: lui a0, %hi(.LCPI10_3)
183
189
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_3)
184
- ; CHECK-NEXT: vle8.v v10, (a0)
185
- ; CHECK-NEXT: vsext.vf8 v16, v8
186
- ; CHECK-NEXT: vsaddu.vx v16, v16, a1
187
- ; CHECK-NEXT: vmsltu.vx v8, v16, a2
188
- ; CHECK-NEXT: vsext.vf8 v16, v10
190
+ ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
191
+ ; CHECK-NEXT: vle8.v v9, (a0)
192
+ ; CHECK-NEXT: vsext.vf8 v16, v9
189
193
; CHECK-NEXT: vsaddu.vx v16, v16, a1
190
- ; CHECK-NEXT: vmsltu.vx v10, v16, a2
191
- ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
192
- ; CHECK-NEXT: vslideup.vi v10, v8, 2
194
+ ; CHECK-NEXT: vmsltu.vx v9, v16, a2
193
195
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
194
- ; CHECK-NEXT: vslideup.vi v10, v9, 4
196
+ ; CHECK-NEXT: vslideup.vi v8, v9, 6
197
+ ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
195
198
; CHECK-NEXT: lui a0, %hi(.LCPI10_4)
196
199
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_4)
197
- ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
198
- ; CHECK-NEXT: vle8.v v8, (a0)
199
- ; CHECK-NEXT: lui a0, %hi(.LCPI10_5)
200
- ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_5)
201
200
; CHECK-NEXT: vle8.v v9, (a0)
202
- ; CHECK-NEXT: vsext.vf8 v16, v8
201
+ ; CHECK-NEXT: vid.v v16
203
202
; CHECK-NEXT: vsaddu.vx v16, v16, a1
204
- ; CHECK-NEXT: vmsltu.vx v8 , v16, a2
203
+ ; CHECK-NEXT: vmsltu.vx v0 , v16, a2
205
204
; CHECK-NEXT: vsext.vf8 v16, v9
206
205
; CHECK-NEXT: vsaddu.vx v16, v16, a1
207
206
; CHECK-NEXT: vmsltu.vx v9, v16, a2
208
- ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
209
- ; CHECK-NEXT: vslideup.vi v9, v8, 2
207
+ ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
208
+ ; CHECK-NEXT: vslideup.vi v0, v9, 2
209
+ ; CHECK-NEXT: lui a0, %hi(.LCPI10_5)
210
+ ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_5)
210
211
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
212
+ ; CHECK-NEXT: vle8.v v9, (a0)
213
+ ; CHECK-NEXT: vsext.vf8 v16, v9
214
+ ; CHECK-NEXT: vsaddu.vx v16, v16, a1
215
+ ; CHECK-NEXT: vmsltu.vx v9, v16, a2
216
+ ; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
217
+ ; CHECK-NEXT: vslideup.vi v0, v9, 4
211
218
; CHECK-NEXT: lui a0, %hi(.LCPI10_6)
212
219
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_6)
213
- ; CHECK-NEXT: vle8.v v8, (a0)
214
- ; CHECK-NEXT: vid.v v16
215
- ; CHECK-NEXT: vsaddu.vx v16, v16, a1
216
- ; CHECK-NEXT: vmsltu.vx v0, v16, a2
217
- ; CHECK-NEXT: vsext.vf8 v16, v8
220
+ ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
221
+ ; CHECK-NEXT: vle8.v v9, (a0)
222
+ ; CHECK-NEXT: vsext.vf8 v16, v9
218
223
; CHECK-NEXT: vsaddu.vx v16, v16, a1
219
- ; CHECK-NEXT: vmsltu.vx v8, v16, a2
220
- ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
221
- ; CHECK-NEXT: vslideup.vi v0, v8, 2
224
+ ; CHECK-NEXT: vmsltu.vx v9, v16, a2
222
225
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
223
- ; CHECK-NEXT: vslideup.vi v0, v9, 4
226
+ ; CHECK-NEXT: vslideup.vi v0, v9, 6
224
227
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
225
- ; CHECK-NEXT: vslideup.vi v0, v10 , 8
228
+ ; CHECK-NEXT: vslideup.vi v0, v8 , 8
226
229
; CHECK-NEXT: ret
227
230
%mask = call <128 x i1 > @llvm.get.active.lane.mask.v128i1.i64 (i64 %index , i64 %tc )
228
231
ret <128 x i1 > %mask
0 commit comments