@@ -123,36 +123,35 @@ define <32 x i1> @fv32(ptr %p, i64 %index, i64 %tc) {
123
123
define <64 x i1 > @fv64 (ptr %p , i64 %index , i64 %tc ) {
124
124
; CHECK-LABEL: fv64:
125
125
; CHECK: # %bb.0:
126
- ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
127
126
; CHECK-NEXT: lui a0, %hi(.LCPI9_0)
128
127
; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_0)
128
+ ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
129
129
; CHECK-NEXT: vle8.v v8, (a0)
130
- ; CHECK-NEXT: vid.v v16
131
- ; CHECK-NEXT: vsaddu.vx v16, v16, a1
132
- ; CHECK-NEXT: vmsltu.vx v0, v16, a2
133
- ; CHECK-NEXT: vsext.vf8 v16, v8
134
- ; CHECK-NEXT: vsaddu.vx v8, v16, a1
135
- ; CHECK-NEXT: vmsltu.vx v16, v8, a2
136
- ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
137
- ; CHECK-NEXT: vslideup.vi v0, v16, 2
138
130
; CHECK-NEXT: lui a0, %hi(.LCPI9_1)
139
131
; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_1)
140
- ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
141
- ; CHECK-NEXT: vle8.v v8, (a0)
132
+ ; CHECK-NEXT: vle8.v v9, (a0)
142
133
; CHECK-NEXT: vsext.vf8 v16, v8
143
- ; CHECK-NEXT: vsaddu.vx v8, v16, a1
144
- ; CHECK-NEXT: vmsltu.vx v16, v8, a2
145
- ; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma
146
- ; CHECK-NEXT: vslideup.vi v0, v16, 4
134
+ ; CHECK-NEXT: vsaddu.vx v16, v16, a1
135
+ ; CHECK-NEXT: vmsltu.vx v8, v16, a2
136
+ ; CHECK-NEXT: vsext.vf8 v16, v9
137
+ ; CHECK-NEXT: vsaddu.vx v16, v16, a1
138
+ ; CHECK-NEXT: vmsltu.vx v9, v16, a2
139
+ ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
140
+ ; CHECK-NEXT: vslideup.vi v9, v8, 2
141
+ ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
147
142
; CHECK-NEXT: lui a0, %hi(.LCPI9_2)
148
143
; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_2)
149
- ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
150
144
; CHECK-NEXT: vle8.v v8, (a0)
145
+ ; CHECK-NEXT: vid.v v16
146
+ ; CHECK-NEXT: vsaddu.vx v16, v16, a1
147
+ ; CHECK-NEXT: vmsltu.vx v0, v16, a2
151
148
; CHECK-NEXT: vsext.vf8 v16, v8
152
- ; CHECK-NEXT: vsaddu.vx v8, v16, a1
153
- ; CHECK-NEXT: vmsltu.vx v16, v8, a2
149
+ ; CHECK-NEXT: vsaddu.vx v16, v16, a1
150
+ ; CHECK-NEXT: vmsltu.vx v8, v16, a2
151
+ ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
152
+ ; CHECK-NEXT: vslideup.vi v0, v8, 2
154
153
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
155
- ; CHECK-NEXT: vslideup.vi v0, v16, 6
154
+ ; CHECK-NEXT: vslideup.vi v0, v9, 4
156
155
; CHECK-NEXT: ret
157
156
%mask = call <64 x i1 > @llvm.get.active.lane.mask.v64i1.i64 (i64 %index , i64 %tc )
158
157
ret <64 x i1 > %mask
@@ -161,72 +160,69 @@ define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) {
161
160
define <128 x i1 > @fv128 (ptr %p , i64 %index , i64 %tc ) {
162
161
; CHECK-LABEL: fv128:
163
162
; CHECK: # %bb.0:
164
- ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
165
163
; CHECK-NEXT: lui a0, %hi(.LCPI10_0)
166
164
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_0)
165
+ ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
167
166
; CHECK-NEXT: vle8.v v8, (a0)
168
- ; CHECK-NEXT: vid.v v16
169
- ; CHECK-NEXT: vsaddu.vx v16, v16, a1
170
- ; CHECK-NEXT: vmsltu.vx v0, v16, a2
171
- ; CHECK-NEXT: vsext.vf8 v16, v8
172
- ; CHECK-NEXT: vsaddu.vx v8, v16, a1
173
- ; CHECK-NEXT: vmsltu.vx v16, v8, a2
174
- ; CHECK-NEXT: vsetivli zero, 4, e8, m1, tu, ma
175
- ; CHECK-NEXT: vslideup.vi v0, v16, 2
176
167
; CHECK-NEXT: lui a0, %hi(.LCPI10_1)
177
168
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_1)
178
- ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
179
- ; CHECK-NEXT: vle8.v v8, (a0)
169
+ ; CHECK-NEXT: vle8.v v9, (a0)
180
170
; CHECK-NEXT: vsext.vf8 v16, v8
181
- ; CHECK-NEXT: vsaddu.vx v8, v16, a1
182
- ; CHECK-NEXT: vmsltu.vx v16, v8, a2
183
- ; CHECK-NEXT: vsetivli zero, 6, e8, m1, tu, ma
184
- ; CHECK-NEXT: vslideup.vi v0, v16, 4
171
+ ; CHECK-NEXT: vsaddu.vx v16, v16, a1
172
+ ; CHECK-NEXT: vmsltu.vx v8, v16, a2
173
+ ; CHECK-NEXT: vsext.vf8 v16, v9
174
+ ; CHECK-NEXT: vsaddu.vx v16, v16, a1
175
+ ; CHECK-NEXT: vmsltu.vx v9, v16, a2
176
+ ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
177
+ ; CHECK-NEXT: vslideup.vi v9, v8, 2
185
178
; CHECK-NEXT: lui a0, %hi(.LCPI10_2)
186
179
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_2)
187
180
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
188
181
; CHECK-NEXT: vle8.v v8, (a0)
189
- ; CHECK-NEXT: vsext.vf8 v16, v8
190
- ; CHECK-NEXT: vsaddu.vx v8, v16, a1
191
- ; CHECK-NEXT: vmsltu.vx v16, v8, a2
192
- ; CHECK-NEXT: vsetivli zero, 8, e8, m1, tu, ma
193
- ; CHECK-NEXT: vslideup.vi v0, v16, 6
194
182
; CHECK-NEXT: lui a0, %hi(.LCPI10_3)
195
183
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_3)
196
- ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
197
- ; CHECK-NEXT: vle8.v v8, (a0)
184
+ ; CHECK-NEXT: vle8.v v10, (a0)
198
185
; CHECK-NEXT: vsext.vf8 v16, v8
199
- ; CHECK-NEXT: vsaddu.vx v8, v16, a1
200
- ; CHECK-NEXT: vmsltu.vx v16, v8, a2
201
- ; CHECK-NEXT: vsetivli zero, 10, e8, m1, tu, ma
202
- ; CHECK-NEXT: vslideup.vi v0, v16, 8
186
+ ; CHECK-NEXT: vsaddu.vx v16, v16, a1
187
+ ; CHECK-NEXT: vmsltu.vx v8, v16, a2
188
+ ; CHECK-NEXT: vsext.vf8 v16, v10
189
+ ; CHECK-NEXT: vsaddu.vx v16, v16, a1
190
+ ; CHECK-NEXT: vmsltu.vx v10, v16, a2
191
+ ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
192
+ ; CHECK-NEXT: vslideup.vi v10, v8, 2
193
+ ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
194
+ ; CHECK-NEXT: vslideup.vi v10, v9, 4
203
195
; CHECK-NEXT: lui a0, %hi(.LCPI10_4)
204
196
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_4)
205
197
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
206
198
; CHECK-NEXT: vle8.v v8, (a0)
207
- ; CHECK-NEXT: vsext.vf8 v16, v8
208
- ; CHECK-NEXT: vsaddu.vx v8, v16, a1
209
- ; CHECK-NEXT: vmsltu.vx v16, v8, a2
210
- ; CHECK-NEXT: vsetivli zero, 12, e8, m1, tu, ma
211
- ; CHECK-NEXT: vslideup.vi v0, v16, 10
212
199
; CHECK-NEXT: lui a0, %hi(.LCPI10_5)
213
200
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_5)
214
- ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
215
- ; CHECK-NEXT: vle8.v v8, (a0)
201
+ ; CHECK-NEXT: vle8.v v9, (a0)
216
202
; CHECK-NEXT: vsext.vf8 v16, v8
217
- ; CHECK-NEXT: vsaddu.vx v8, v16, a1
218
- ; CHECK-NEXT: vmsltu.vx v16, v8, a2
219
- ; CHECK-NEXT: vsetivli zero, 14, e8, m1, tu, ma
220
- ; CHECK-NEXT: vslideup.vi v0, v16, 12
203
+ ; CHECK-NEXT: vsaddu.vx v16, v16, a1
204
+ ; CHECK-NEXT: vmsltu.vx v8, v16, a2
205
+ ; CHECK-NEXT: vsext.vf8 v16, v9
206
+ ; CHECK-NEXT: vsaddu.vx v16, v16, a1
207
+ ; CHECK-NEXT: vmsltu.vx v9, v16, a2
208
+ ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
209
+ ; CHECK-NEXT: vslideup.vi v9, v8, 2
210
+ ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
221
211
; CHECK-NEXT: lui a0, %hi(.LCPI10_6)
222
212
; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_6)
223
- ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
224
213
; CHECK-NEXT: vle8.v v8, (a0)
214
+ ; CHECK-NEXT: vid.v v16
215
+ ; CHECK-NEXT: vsaddu.vx v16, v16, a1
216
+ ; CHECK-NEXT: vmsltu.vx v0, v16, a2
225
217
; CHECK-NEXT: vsext.vf8 v16, v8
226
- ; CHECK-NEXT: vsaddu.vx v8, v16, a1
227
- ; CHECK-NEXT: vmsltu.vx v16, v8, a2
228
- ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
229
- ; CHECK-NEXT: vslideup.vi v0, v16, 14
218
+ ; CHECK-NEXT: vsaddu.vx v16, v16, a1
219
+ ; CHECK-NEXT: vmsltu.vx v8, v16, a2
220
+ ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
221
+ ; CHECK-NEXT: vslideup.vi v0, v8, 2
222
+ ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
223
+ ; CHECK-NEXT: vslideup.vi v0, v9, 4
224
+ ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
225
+ ; CHECK-NEXT: vslideup.vi v0, v10, 8
230
226
; CHECK-NEXT: ret
231
227
%mask = call <128 x i1 > @llvm.get.active.lane.mask.v128i1.i64 (i64 %index , i64 %tc )
232
228
ret <128 x i1 > %mask
0 commit comments