@@ -58,6 +58,182 @@ define <4 x i1> @vpmerge_vv_v4i1(<4 x i1> %va, <4 x i1> %vb, <4 x i1> %m, i32 ze
58
58
ret <4 x i1 > %v
59
59
}
60
60
61
+ define <8 x i1 > @vpmerge_vv_v8i1 (<8 x i1 > %va , <8 x i1 > %vb , <8 x i1 > %m , i32 zeroext %evl ) {
62
+ ; RV32-LABEL: vpmerge_vv_v8i1:
63
+ ; RV32: # %bb.0:
64
+ ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
65
+ ; RV32-NEXT: vid.v v10
66
+ ; RV32-NEXT: vmsltu.vx v12, v10, a0
67
+ ; RV32-NEXT: vmand.mm v9, v9, v12
68
+ ; RV32-NEXT: vmandn.mm v8, v8, v9
69
+ ; RV32-NEXT: vmand.mm v9, v0, v9
70
+ ; RV32-NEXT: vmor.mm v0, v9, v8
71
+ ; RV32-NEXT: ret
72
+ ;
73
+ ; RV64-LABEL: vpmerge_vv_v8i1:
74
+ ; RV64: # %bb.0:
75
+ ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma
76
+ ; RV64-NEXT: vid.v v12
77
+ ; RV64-NEXT: vmsltu.vx v10, v12, a0
78
+ ; RV64-NEXT: vmand.mm v9, v9, v10
79
+ ; RV64-NEXT: vmandn.mm v8, v8, v9
80
+ ; RV64-NEXT: vmand.mm v9, v0, v9
81
+ ; RV64-NEXT: vmor.mm v0, v9, v8
82
+ ; RV64-NEXT: ret
83
+ ;
84
+ ; RV32ZVFHMIN-LABEL: vpmerge_vv_v8i1:
85
+ ; RV32ZVFHMIN: # %bb.0:
86
+ ; RV32ZVFHMIN-NEXT: vsetivli zero, 8, e32, m2, ta, ma
87
+ ; RV32ZVFHMIN-NEXT: vid.v v10
88
+ ; RV32ZVFHMIN-NEXT: vmsltu.vx v12, v10, a0
89
+ ; RV32ZVFHMIN-NEXT: vmand.mm v9, v9, v12
90
+ ; RV32ZVFHMIN-NEXT: vmandn.mm v8, v8, v9
91
+ ; RV32ZVFHMIN-NEXT: vmand.mm v9, v0, v9
92
+ ; RV32ZVFHMIN-NEXT: vmor.mm v0, v9, v8
93
+ ; RV32ZVFHMIN-NEXT: ret
94
+ ;
95
+ ; RV64ZVFHMIN-LABEL: vpmerge_vv_v8i1:
96
+ ; RV64ZVFHMIN: # %bb.0:
97
+ ; RV64ZVFHMIN-NEXT: vsetivli zero, 8, e64, m4, ta, ma
98
+ ; RV64ZVFHMIN-NEXT: vid.v v12
99
+ ; RV64ZVFHMIN-NEXT: vmsltu.vx v10, v12, a0
100
+ ; RV64ZVFHMIN-NEXT: vmand.mm v9, v9, v10
101
+ ; RV64ZVFHMIN-NEXT: vmandn.mm v8, v8, v9
102
+ ; RV64ZVFHMIN-NEXT: vmand.mm v9, v0, v9
103
+ ; RV64ZVFHMIN-NEXT: vmor.mm v0, v9, v8
104
+ ; RV64ZVFHMIN-NEXT: ret
105
+ %v = call <8 x i1 > @llvm.vp.merge.v8i1 (<8 x i1 > %m , <8 x i1 > %va , <8 x i1 > %vb , i32 %evl )
106
+ ret <8 x i1 > %v
107
+ }
108
+
109
+ define <16 x i1 > @vpmerge_vv_v16i1 (<16 x i1 > %va , <16 x i1 > %vb , <16 x i1 > %m , i32 zeroext %evl ) {
110
+ ; RV32-LABEL: vpmerge_vv_v16i1:
111
+ ; RV32: # %bb.0:
112
+ ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma
113
+ ; RV32-NEXT: vid.v v12
114
+ ; RV32-NEXT: vmsltu.vx v10, v12, a0
115
+ ; RV32-NEXT: vmand.mm v9, v9, v10
116
+ ; RV32-NEXT: vmandn.mm v8, v8, v9
117
+ ; RV32-NEXT: vmand.mm v9, v0, v9
118
+ ; RV32-NEXT: vmor.mm v0, v9, v8
119
+ ; RV32-NEXT: ret
120
+ ;
121
+ ; RV64-LABEL: vpmerge_vv_v16i1:
122
+ ; RV64: # %bb.0:
123
+ ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma
124
+ ; RV64-NEXT: vid.v v16
125
+ ; RV64-NEXT: vmsltu.vx v10, v16, a0
126
+ ; RV64-NEXT: vmand.mm v9, v9, v10
127
+ ; RV64-NEXT: vmandn.mm v8, v8, v9
128
+ ; RV64-NEXT: vmand.mm v9, v0, v9
129
+ ; RV64-NEXT: vmor.mm v0, v9, v8
130
+ ; RV64-NEXT: ret
131
+ ;
132
+ ; RV32ZVFHMIN-LABEL: vpmerge_vv_v16i1:
133
+ ; RV32ZVFHMIN: # %bb.0:
134
+ ; RV32ZVFHMIN-NEXT: vsetivli zero, 16, e32, m4, ta, ma
135
+ ; RV32ZVFHMIN-NEXT: vid.v v12
136
+ ; RV32ZVFHMIN-NEXT: vmsltu.vx v10, v12, a0
137
+ ; RV32ZVFHMIN-NEXT: vmand.mm v9, v9, v10
138
+ ; RV32ZVFHMIN-NEXT: vmandn.mm v8, v8, v9
139
+ ; RV32ZVFHMIN-NEXT: vmand.mm v9, v0, v9
140
+ ; RV32ZVFHMIN-NEXT: vmor.mm v0, v9, v8
141
+ ; RV32ZVFHMIN-NEXT: ret
142
+ ;
143
+ ; RV64ZVFHMIN-LABEL: vpmerge_vv_v16i1:
144
+ ; RV64ZVFHMIN: # %bb.0:
145
+ ; RV64ZVFHMIN-NEXT: vsetivli zero, 16, e64, m8, ta, ma
146
+ ; RV64ZVFHMIN-NEXT: vid.v v16
147
+ ; RV64ZVFHMIN-NEXT: vmsltu.vx v10, v16, a0
148
+ ; RV64ZVFHMIN-NEXT: vmand.mm v9, v9, v10
149
+ ; RV64ZVFHMIN-NEXT: vmandn.mm v8, v8, v9
150
+ ; RV64ZVFHMIN-NEXT: vmand.mm v9, v0, v9
151
+ ; RV64ZVFHMIN-NEXT: vmor.mm v0, v9, v8
152
+ ; RV64ZVFHMIN-NEXT: ret
153
+ %v = call <16 x i1 > @llvm.vp.merge.v16i1 (<16 x i1 > %m , <16 x i1 > %va , <16 x i1 > %vb , i32 %evl )
154
+ ret <16 x i1 > %v
155
+ }
156
+
157
+ define <32 x i1 > @vpmerge_vv_v32i1 (<32 x i1 > %va , <32 x i1 > %vb , <32 x i1 > %m , i32 zeroext %evl ) {
158
+ ; RV32-LABEL: vpmerge_vv_v32i1:
159
+ ; RV32: # %bb.0:
160
+ ; RV32-NEXT: li a1, 32
161
+ ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma
162
+ ; RV32-NEXT: vid.v v16
163
+ ; RV32-NEXT: vmsltu.vx v10, v16, a0
164
+ ; RV32-NEXT: vmand.mm v9, v9, v10
165
+ ; RV32-NEXT: vmandn.mm v8, v8, v9
166
+ ; RV32-NEXT: vmand.mm v9, v0, v9
167
+ ; RV32-NEXT: vmor.mm v0, v9, v8
168
+ ; RV32-NEXT: ret
169
+ ;
170
+ ; RV64-LABEL: vpmerge_vv_v32i1:
171
+ ; RV64: # %bb.0:
172
+ ; RV64-NEXT: vsetvli a1, zero, e8, m2, ta, ma
173
+ ; RV64-NEXT: vmv.v.i v10, 0
174
+ ; RV64-NEXT: vsetvli zero, a0, e8, m2, ta, ma
175
+ ; RV64-NEXT: vmerge.vim v12, v10, 1, v0
176
+ ; RV64-NEXT: vmv1r.v v0, v8
177
+ ; RV64-NEXT: vsetvli a1, zero, e8, m2, ta, ma
178
+ ; RV64-NEXT: vmerge.vim v10, v10, 1, v0
179
+ ; RV64-NEXT: vmv1r.v v0, v9
180
+ ; RV64-NEXT: vsetvli zero, a0, e8, m2, tu, ma
181
+ ; RV64-NEXT: vmerge.vvm v10, v10, v12, v0
182
+ ; RV64-NEXT: vsetvli a0, zero, e8, m2, ta, ma
183
+ ; RV64-NEXT: vmsne.vi v0, v10, 0
184
+ ; RV64-NEXT: ret
185
+ ;
186
+ ; RV32ZVFHMIN-LABEL: vpmerge_vv_v32i1:
187
+ ; RV32ZVFHMIN: # %bb.0:
188
+ ; RV32ZVFHMIN-NEXT: li a1, 32
189
+ ; RV32ZVFHMIN-NEXT: vsetvli zero, a1, e32, m8, ta, ma
190
+ ; RV32ZVFHMIN-NEXT: vid.v v16
191
+ ; RV32ZVFHMIN-NEXT: vmsltu.vx v10, v16, a0
192
+ ; RV32ZVFHMIN-NEXT: vmand.mm v9, v9, v10
193
+ ; RV32ZVFHMIN-NEXT: vmandn.mm v8, v8, v9
194
+ ; RV32ZVFHMIN-NEXT: vmand.mm v9, v0, v9
195
+ ; RV32ZVFHMIN-NEXT: vmor.mm v0, v9, v8
196
+ ; RV32ZVFHMIN-NEXT: ret
197
+ ;
198
+ ; RV64ZVFHMIN-LABEL: vpmerge_vv_v32i1:
199
+ ; RV64ZVFHMIN: # %bb.0:
200
+ ; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e8, m2, ta, ma
201
+ ; RV64ZVFHMIN-NEXT: vmv.v.i v10, 0
202
+ ; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e8, m2, ta, ma
203
+ ; RV64ZVFHMIN-NEXT: vmerge.vim v12, v10, 1, v0
204
+ ; RV64ZVFHMIN-NEXT: vmv1r.v v0, v8
205
+ ; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e8, m2, ta, ma
206
+ ; RV64ZVFHMIN-NEXT: vmerge.vim v10, v10, 1, v0
207
+ ; RV64ZVFHMIN-NEXT: vmv1r.v v0, v9
208
+ ; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e8, m2, tu, ma
209
+ ; RV64ZVFHMIN-NEXT: vmerge.vvm v10, v10, v12, v0
210
+ ; RV64ZVFHMIN-NEXT: vsetvli a0, zero, e8, m2, ta, ma
211
+ ; RV64ZVFHMIN-NEXT: vmsne.vi v0, v10, 0
212
+ ; RV64ZVFHMIN-NEXT: ret
213
+ %v = call <32 x i1 > @llvm.vp.merge.v32i1 (<32 x i1 > %m , <32 x i1 > %va , <32 x i1 > %vb , i32 %evl )
214
+ ret <32 x i1 > %v
215
+ }
216
+
217
+ define <64 x i1 > @vpmerge_vv_v64i1 (<64 x i1 > %va , <64 x i1 > %vb , <64 x i1 > %m , i32 zeroext %evl ) {
218
+ ; CHECK-LABEL: vpmerge_vv_v64i1:
219
+ ; CHECK: # %bb.0:
220
+ ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma
221
+ ; CHECK-NEXT: vmv.v.i v12, 0
222
+ ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma
223
+ ; CHECK-NEXT: vmerge.vim v16, v12, 1, v0
224
+ ; CHECK-NEXT: vmv1r.v v0, v8
225
+ ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma
226
+ ; CHECK-NEXT: vmerge.vim v12, v12, 1, v0
227
+ ; CHECK-NEXT: vmv1r.v v0, v9
228
+ ; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, ma
229
+ ; CHECK-NEXT: vmerge.vvm v12, v12, v16, v0
230
+ ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma
231
+ ; CHECK-NEXT: vmsne.vi v0, v12, 0
232
+ ; CHECK-NEXT: ret
233
+ %v = call <64 x i1 > @llvm.vp.merge.v64i1 (<64 x i1 > %m , <64 x i1 > %va , <64 x i1 > %vb , i32 %evl )
234
+ ret <64 x i1 > %v
235
+ }
236
+
61
237
declare <2 x i8 > @llvm.vp.merge.v2i8 (<2 x i1 >, <2 x i8 >, <2 x i8 >, i32 )
62
238
63
239
define <2 x i8 > @vpmerge_vv_v2i8 (<2 x i8 > %va , <2 x i8 > %vb , <2 x i1 > %m , i32 zeroext %evl ) {
@@ -1188,10 +1364,10 @@ define <32 x double> @vpmerge_vv_v32f64(<32 x double> %va, <32 x double> %vb, <3
1188
1364
; CHECK-NEXT: vle64.v v8, (a0)
1189
1365
; CHECK-NEXT: li a1, 16
1190
1366
; CHECK-NEXT: mv a0, a2
1191
- ; CHECK-NEXT: bltu a2, a1, .LBB79_2
1367
+ ; CHECK-NEXT: bltu a2, a1, .LBB83_2
1192
1368
; CHECK-NEXT: # %bb.1:
1193
1369
; CHECK-NEXT: li a0, 16
1194
- ; CHECK-NEXT: .LBB79_2 :
1370
+ ; CHECK-NEXT: .LBB83_2 :
1195
1371
; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma
1196
1372
; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0
1197
1373
; CHECK-NEXT: addi a0, a2, -16
@@ -1221,10 +1397,10 @@ define <32 x double> @vpmerge_vf_v32f64(double %a, <32 x double> %vb, <32 x i1>
1221
1397
; CHECK: # %bb.0:
1222
1398
; CHECK-NEXT: li a2, 16
1223
1399
; CHECK-NEXT: mv a1, a0
1224
- ; CHECK-NEXT: bltu a0, a2, .LBB80_2
1400
+ ; CHECK-NEXT: bltu a0, a2, .LBB84_2
1225
1401
; CHECK-NEXT: # %bb.1:
1226
1402
; CHECK-NEXT: li a1, 16
1227
- ; CHECK-NEXT: .LBB80_2 :
1403
+ ; CHECK-NEXT: .LBB84_2 :
1228
1404
; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma
1229
1405
; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0
1230
1406
; CHECK-NEXT: addi a1, a0, -16
0 commit comments