@@ -85,11 +85,11 @@ define {<2 x i64>, <2 x i64>} @vector_deinterleave_v2i64_v4i64(<4 x i64> %vec) {
85
85
;
86
86
; ZIP-LABEL: vector_deinterleave_v2i64_v4i64:
87
87
; ZIP: # %bb.0:
88
- ; ZIP-NEXT: vsetivli zero, 2, e64, m2, ta, ma
89
- ; ZIP-NEXT: vslidedown.vi v10, v8, 2
90
88
; ZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
91
- ; ZIP-NEXT: ri.vzipodd.vv v9, v8, v10
92
- ; ZIP-NEXT: vslideup.vi v8, v10, 1
89
+ ; ZIP-NEXT: ri.vunzip2a.vv v10, v8, v9
90
+ ; ZIP-NEXT: ri.vunzip2b.vv v11, v8, v9
91
+ ; ZIP-NEXT: vmv.v.v v8, v10
92
+ ; ZIP-NEXT: vmv.v.v v9, v11
93
93
; ZIP-NEXT: ret
94
94
%retval = call {<2 x i64 >, <2 x i64 >} @llvm.vector.deinterleave2.v4i64 (<4 x i64 > %vec )
95
95
ret {<2 x i64 >, <2 x i64 >} %retval
@@ -129,62 +129,51 @@ define {<4 x i64>, <4 x i64>} @vector_deinterleave_v4i64_v8i64(<8 x i64> %vec) {
129
129
; ZIP-LABEL: vector_deinterleave_v4i64_v8i64:
130
130
; ZIP: # %bb.0:
131
131
; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma
132
- ; ZIP-NEXT: vslidedown.vi v12, v8, 1
133
- ; ZIP-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
134
- ; ZIP-NEXT: vmv.v.i v0, 2
135
- ; ZIP-NEXT: vmv.v.i v14, 12
136
- ; ZIP-NEXT: vsetivli zero, 4, e64, m4, ta, ma
137
- ; ZIP-NEXT: vslidedown.vi v16, v8, 4
138
- ; ZIP-NEXT: vsetivli zero, 2, e64, m2, ta, ma
139
- ; ZIP-NEXT: vslidedown.vi v10, v8, 2
140
- ; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, mu
141
- ; ZIP-NEXT: vslidedown.vi v12, v8, 2, v0.t
142
- ; ZIP-NEXT: ri.vzip2a.vv v18, v8, v10
143
- ; ZIP-NEXT: vsetivli zero, 2, e64, m2, ta, ma
144
- ; ZIP-NEXT: vslidedown.vi v8, v16, 2
145
- ; ZIP-NEXT: vmv1r.v v0, v14
146
- ; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, mu
147
- ; ZIP-NEXT: ri.vzip2a.vv v12, v16, v8, v0.t
148
- ; ZIP-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
149
- ; ZIP-NEXT: vmv.v.i v0, 8
150
- ; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, mu
151
- ; ZIP-NEXT: vslideup.vi v8, v16, 2
152
- ; ZIP-NEXT: vslideup.vi v8, v16, 1, v0.t
153
- ; ZIP-NEXT: vmv1r.v v0, v14
154
- ; ZIP-NEXT: vmerge.vvm v8, v18, v8, v0
155
- ; ZIP-NEXT: vmv2r.v v10, v12
132
+ ; ZIP-NEXT: ri.vunzip2a.vv v12, v8, v10
133
+ ; ZIP-NEXT: ri.vunzip2b.vv v14, v8, v10
134
+ ; ZIP-NEXT: vmv.v.v v8, v12
135
+ ; ZIP-NEXT: vmv.v.v v10, v14
156
136
; ZIP-NEXT: ret
157
137
%retval = call {<4 x i64 >, <4 x i64 >} @llvm.vector.deinterleave2.v8i64 (<8 x i64 > %vec )
158
138
ret {<4 x i64 >, <4 x i64 >} %retval
159
139
}
160
140
161
141
define {<8 x i64 >, <8 x i64 >} @vector_deinterleave_v8i64_v16i64 (<16 x i64 > %vec ) {
162
- ; CHECK-LABEL: vector_deinterleave_v8i64_v16i64:
163
- ; CHECK: # %bb.0:
164
- ; CHECK-NEXT: li a0, 85
165
- ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
166
- ; CHECK-NEXT: vmv.v.i v0, -16
167
- ; CHECK-NEXT: vid.v v16
168
- ; CHECK-NEXT: vsetivli zero, 8, e64, m8, ta, ma
169
- ; CHECK-NEXT: vslidedown.vi v24, v8, 8
170
- ; CHECK-NEXT: vmv.s.x v12, a0
171
- ; CHECK-NEXT: li a0, 170
172
- ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
173
- ; CHECK-NEXT: vadd.vv v20, v16, v16
174
- ; CHECK-NEXT: vmv.s.x v21, a0
175
- ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
176
- ; CHECK-NEXT: vcompress.vm v16, v8, v12
177
- ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
178
- ; CHECK-NEXT: vadd.vi v22, v20, -8
179
- ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
180
- ; CHECK-NEXT: vcompress.vm v12, v8, v21
181
- ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
182
- ; CHECK-NEXT: vadd.vi v8, v20, -7
183
- ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
184
- ; CHECK-NEXT: vrgatherei16.vv v16, v24, v22, v0.t
185
- ; CHECK-NEXT: vrgatherei16.vv v12, v24, v8, v0.t
186
- ; CHECK-NEXT: vmv.v.v v8, v16
187
- ; CHECK-NEXT: ret
142
+ ; V-LABEL: vector_deinterleave_v8i64_v16i64:
143
+ ; V: # %bb.0:
144
+ ; V-NEXT: li a0, 85
145
+ ; V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
146
+ ; V-NEXT: vmv.v.i v0, -16
147
+ ; V-NEXT: vid.v v16
148
+ ; V-NEXT: vsetivli zero, 8, e64, m8, ta, ma
149
+ ; V-NEXT: vslidedown.vi v24, v8, 8
150
+ ; V-NEXT: vmv.s.x v12, a0
151
+ ; V-NEXT: li a0, 170
152
+ ; V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
153
+ ; V-NEXT: vadd.vv v20, v16, v16
154
+ ; V-NEXT: vmv.s.x v21, a0
155
+ ; V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
156
+ ; V-NEXT: vcompress.vm v16, v8, v12
157
+ ; V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
158
+ ; V-NEXT: vadd.vi v22, v20, -8
159
+ ; V-NEXT: vsetvli zero, zero, e64, m4, ta, ma
160
+ ; V-NEXT: vcompress.vm v12, v8, v21
161
+ ; V-NEXT: vsetvli zero, zero, e16, m1, ta, ma
162
+ ; V-NEXT: vadd.vi v8, v20, -7
163
+ ; V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
164
+ ; V-NEXT: vrgatherei16.vv v16, v24, v22, v0.t
165
+ ; V-NEXT: vrgatherei16.vv v12, v24, v8, v0.t
166
+ ; V-NEXT: vmv.v.v v8, v16
167
+ ; V-NEXT: ret
168
+ ;
169
+ ; ZIP-LABEL: vector_deinterleave_v8i64_v16i64:
170
+ ; ZIP: # %bb.0:
171
+ ; ZIP-NEXT: vsetivli zero, 8, e64, m4, ta, ma
172
+ ; ZIP-NEXT: ri.vunzip2a.vv v16, v8, v12
173
+ ; ZIP-NEXT: ri.vunzip2b.vv v20, v8, v12
174
+ ; ZIP-NEXT: vmv.v.v v8, v16
175
+ ; ZIP-NEXT: vmv.v.v v12, v20
176
+ ; ZIP-NEXT: ret
188
177
%retval = call {<8 x i64 >, <8 x i64 >} @llvm.vector.deinterleave2.v16i64 (<16 x i64 > %vec )
189
178
ret {<8 x i64 >, <8 x i64 >} %retval
190
179
}
@@ -498,11 +487,11 @@ define {<2 x double>, <2 x double>} @vector_deinterleave_v2f64_v4f64(<4 x double
498
487
;
499
488
; ZIP-LABEL: vector_deinterleave_v2f64_v4f64:
500
489
; ZIP: # %bb.0:
501
- ; ZIP-NEXT: vsetivli zero, 2, e64, m2, ta, ma
502
- ; ZIP-NEXT: vslidedown.vi v10, v8, 2
503
490
; ZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
504
- ; ZIP-NEXT: ri.vzipodd.vv v9, v8, v10
505
- ; ZIP-NEXT: vslideup.vi v8, v10, 1
491
+ ; ZIP-NEXT: ri.vunzip2a.vv v10, v8, v9
492
+ ; ZIP-NEXT: ri.vunzip2b.vv v12, v8, v9
493
+ ; ZIP-NEXT: vmv.v.v v8, v10
494
+ ; ZIP-NEXT: vmv.v.v v9, v12
506
495
; ZIP-NEXT: ret
507
496
%retval = call {<2 x double >, <2 x double >} @llvm.vector.deinterleave2.v4f64 (<4 x double > %vec )
508
497
ret {<2 x double >, <2 x double >} %retval
@@ -541,31 +530,11 @@ define {<4 x double>, <4 x double>} @vector_deinterleave_v4f64_v8f64(<8 x double
541
530
;
542
531
; ZIP-LABEL: vector_deinterleave_v4f64_v8f64:
543
532
; ZIP: # %bb.0:
544
- ; ZIP-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
545
- ; ZIP-NEXT: vmv.v.i v0, 8
546
- ; ZIP-NEXT: vsetivli zero, 4, e64, m4, ta, ma
547
- ; ZIP-NEXT: vslidedown.vi v16, v8, 4
548
- ; ZIP-NEXT: vsetivli zero, 2, e64, m2, ta, ma
549
- ; ZIP-NEXT: vslidedown.vi v12, v8, 2
550
- ; ZIP-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
551
- ; ZIP-NEXT: vmv.v.i v10, 12
552
- ; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, mu
553
- ; ZIP-NEXT: vslideup.vi v14, v16, 2
554
- ; ZIP-NEXT: vslideup.vi v14, v16, 1, v0.t
555
- ; ZIP-NEXT: ri.vzip2a.vv v18, v8, v12
556
- ; ZIP-NEXT: vmv1r.v v0, v10
557
- ; ZIP-NEXT: vmerge.vvm v12, v18, v14, v0
558
- ; ZIP-NEXT: vsetivli zero, 2, e64, m2, ta, ma
559
- ; ZIP-NEXT: vslidedown.vi v14, v16, 2
560
- ; ZIP-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
561
- ; ZIP-NEXT: vmv.v.i v0, 2
562
- ; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, mu
563
- ; ZIP-NEXT: ri.vzip2a.vv v18, v16, v14
564
- ; ZIP-NEXT: vslidedown.vi v14, v8, 1
565
- ; ZIP-NEXT: vslidedown.vi v14, v8, 2, v0.t
566
- ; ZIP-NEXT: vmv1r.v v0, v10
567
- ; ZIP-NEXT: vmerge.vvm v10, v14, v18, v0
568
- ; ZIP-NEXT: vmv2r.v v8, v12
533
+ ; ZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma
534
+ ; ZIP-NEXT: ri.vunzip2a.vv v12, v8, v10
535
+ ; ZIP-NEXT: ri.vunzip2b.vv v16, v8, v10
536
+ ; ZIP-NEXT: vmv.v.v v8, v12
537
+ ; ZIP-NEXT: vmv.v.v v10, v16
569
538
; ZIP-NEXT: ret
570
539
%retval = call {<4 x double >, <4 x double >} @llvm.vector.deinterleave2.v8f64 (<8 x double > %vec )
571
540
ret {<4 x double >, <4 x double >} %retval
0 commit comments