Skip to content

Commit c6ca13d

Browse files
authored
[RISCV] Lower interleave + deinterleave for zvfhmin and zvfbfmin (#108404)
Fortunately f16 and bf16 are always < EEW, so we can always lower via widening or narrowing. This means we don't need to add patterns for vrgather_vv_vl just yet.
1 parent fc1c481 commit c6ca13d

File tree

3 files changed

+171
-8
lines changed

3 files changed

+171
-8
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1078,7 +1078,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
10781078
ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP},
10791079
VT, Custom);
10801080
setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
1081-
ISD::EXTRACT_SUBVECTOR},
1081+
ISD::EXTRACT_SUBVECTOR, ISD::VECTOR_INTERLEAVE,
1082+
ISD::VECTOR_DEINTERLEAVE},
10821083
VT, Custom);
10831084
if (Subtarget.hasStdExtZfhmin())
10841085
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
@@ -1117,7 +1118,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
11171118
Custom);
11181119
setOperationAction(ISD::SELECT_CC, VT, Expand);
11191120
setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
1120-
ISD::EXTRACT_SUBVECTOR},
1121+
ISD::EXTRACT_SUBVECTOR, ISD::VECTOR_INTERLEAVE,
1122+
ISD::VECTOR_DEINTERLEAVE},
11211123
VT, Custom);
11221124
if (Subtarget.hasStdExtZfbfmin())
11231125
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);

llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+v,+zfh,+zvfh | FileCheck %s
3-
; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+zfh,+zvfh | FileCheck %s
2+
; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+v,+zfh,+zvfh,+zvfbfmin | FileCheck %s
3+
; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+zfh,+zvfh,+zvfbfmin | FileCheck %s
4+
; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+v,+zfh,+zvfhmin,+zvfbfmin | FileCheck %s
5+
; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+zfh,+zvfhmin,+zvfbfmin | FileCheck %s
46

57
; Integers
68

@@ -255,6 +257,18 @@ declare {<vscale x 8 x i64>, <vscale x 8 x i64>} @llvm.vector.deinterleave2.nxv1
255257

256258
; Floats
257259

260+
define {<vscale x 2 x bfloat>, <vscale x 2 x bfloat>} @vector_deinterleave_nxv2bf16_nxv4bf16(<vscale x 4 x bfloat> %vec) {
261+
; CHECK-LABEL: vector_deinterleave_nxv2bf16_nxv4bf16:
262+
; CHECK: # %bb.0:
263+
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
264+
; CHECK-NEXT: vnsrl.wi v10, v8, 0
265+
; CHECK-NEXT: vnsrl.wi v9, v8, 16
266+
; CHECK-NEXT: vmv1r.v v8, v10
267+
; CHECK-NEXT: ret
268+
%retval = call {<vscale x 2 x bfloat>, <vscale x 2 x bfloat>} @llvm.vector.deinterleave2.nxv4bf16(<vscale x 4 x bfloat> %vec)
269+
ret {<vscale x 2 x bfloat>, <vscale x 2 x bfloat>} %retval
270+
}
271+
258272
define {<vscale x 2 x half>, <vscale x 2 x half>} @vector_deinterleave_nxv2f16_nxv4f16(<vscale x 4 x half> %vec) {
259273
; CHECK-LABEL: vector_deinterleave_nxv2f16_nxv4f16:
260274
; CHECK: # %bb.0:
@@ -267,6 +281,19 @@ define {<vscale x 2 x half>, <vscale x 2 x half>} @vector_deinterleave_nxv2f16_n
267281
ret {<vscale x 2 x half>, <vscale x 2 x half>} %retval
268282
}
269283

284+
define {<vscale x 4 x bfloat>, <vscale x 4 x bfloat>} @vector_deinterleave_nxv4bf16_nxv8bf16(<vscale x 8 x bfloat> %vec) {
285+
; CHECK-LABEL: vector_deinterleave_nxv4bf16_nxv8bf16:
286+
; CHECK: # %bb.0:
287+
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
288+
; CHECK-NEXT: vnsrl.wi v10, v8, 0
289+
; CHECK-NEXT: vnsrl.wi v11, v8, 16
290+
; CHECK-NEXT: vmv.v.v v8, v10
291+
; CHECK-NEXT: vmv.v.v v9, v11
292+
; CHECK-NEXT: ret
293+
%retval = call {<vscale x 4 x bfloat>, <vscale x 4 x bfloat>} @llvm.vector.deinterleave2.nxv8bf16(<vscale x 8 x bfloat> %vec)
294+
ret {<vscale x 4 x bfloat>, <vscale x 4 x bfloat>} %retval
295+
}
296+
270297
define {<vscale x 4 x half>, <vscale x 4 x half>} @vector_deinterleave_nxv4f16_nxv8f16(<vscale x 8 x half> %vec) {
271298
; CHECK-LABEL: vector_deinterleave_nxv4f16_nxv8f16:
272299
; CHECK: # %bb.0:
@@ -294,6 +321,19 @@ define {<vscale x 2 x float>, <vscale x 2 x float>} @vector_deinterleave_nxv2f32
294321
ret {<vscale x 2 x float>, <vscale x 2 x float>} %retval
295322
}
296323

324+
define {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @vector_deinterleave_nxv8bf16_nxv16bf16(<vscale x 16 x bfloat> %vec) {
325+
; CHECK-LABEL: vector_deinterleave_nxv8bf16_nxv16bf16:
326+
; CHECK: # %bb.0:
327+
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
328+
; CHECK-NEXT: vnsrl.wi v12, v8, 0
329+
; CHECK-NEXT: vnsrl.wi v14, v8, 16
330+
; CHECK-NEXT: vmv.v.v v8, v12
331+
; CHECK-NEXT: vmv.v.v v10, v14
332+
; CHECK-NEXT: ret
333+
%retval = call {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} @llvm.vector.deinterleave2.nxv16bf16(<vscale x 16 x bfloat> %vec)
334+
ret {<vscale x 8 x bfloat>, <vscale x 8 x bfloat>} %retval
335+
}
336+
297337
define {<vscale x 8 x half>, <vscale x 8 x half>} @vector_deinterleave_nxv8f16_nxv16f16(<vscale x 16 x half> %vec) {
298338
; CHECK-LABEL: vector_deinterleave_nxv8f16_nxv16f16:
299339
; CHECK: # %bb.0:
@@ -344,6 +384,21 @@ declare {<vscale x 8 x half>, <vscale x 8 x half>} @llvm.vector.deinterleave2.nx
344384
declare {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.vector.deinterleave2.nxv8f32(<vscale x 8 x float>)
345385
declare {<vscale x 2 x double>, <vscale x 2 x double>} @llvm.vector.deinterleave2.nxv4f64(<vscale x 4 x double>)
346386

387+
define {<vscale x 32 x bfloat>, <vscale x 32 x bfloat>} @vector_deinterleave_nxv32bf16_nxv64bf16(<vscale x 64 x bfloat> %vec) {
388+
; CHECK-LABEL: vector_deinterleave_nxv32bf16_nxv64bf16:
389+
; CHECK: # %bb.0:
390+
; CHECK-NEXT: vmv8r.v v24, v8
391+
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
392+
; CHECK-NEXT: vnsrl.wi v8, v24, 0
393+
; CHECK-NEXT: vnsrl.wi v12, v16, 0
394+
; CHECK-NEXT: vnsrl.wi v0, v24, 16
395+
; CHECK-NEXT: vnsrl.wi v4, v16, 16
396+
; CHECK-NEXT: vmv8r.v v16, v0
397+
; CHECK-NEXT: ret
398+
%retval = call {<vscale x 32 x bfloat>, <vscale x 32 x bfloat>} @llvm.vector.deinterleave2.nxv64bf16(<vscale x 64 x bfloat> %vec)
399+
ret {<vscale x 32 x bfloat>, <vscale x 32 x bfloat>} %retval
400+
}
401+
347402
define {<vscale x 32 x half>, <vscale x 32 x half>} @vector_deinterleave_nxv32f16_nxv64f16(<vscale x 64 x half> %vec) {
348403
; CHECK-LABEL: vector_deinterleave_nxv32f16_nxv64f16:
349404
; CHECK: # %bb.0:

llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll

Lines changed: 110 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zfh,+zvfh | FileCheck %s
3-
; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh | FileCheck %s
4-
; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvbb,+zfh,+zvfh | FileCheck %s --check-prefix=ZVBB
5-
; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvbb,+zfh,+zvfh | FileCheck %s --check-prefix=ZVBB
2+
; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zfh,+zvfh,+zvfbfmin | FileCheck %s
3+
; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+zvfbfmin | FileCheck %s
4+
; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zfh,+zvfhmin,+zvfbfmin | FileCheck %s
5+
; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfhmin,+zvfbfmin | FileCheck %s
6+
; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvbb,+zfh,+zvfh,+zvfbfmin | FileCheck %s --check-prefix=ZVBB
7+
; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvbb,+zfh,+zvfh,+zvfbfmin | FileCheck %s --check-prefix=ZVBB
68

79
; Integers
810

@@ -364,6 +366,62 @@ declare <vscale x 16 x i64> @llvm.vector.interleave2.nxv16i64(<vscale x 8 x i64>
364366

365367
; Floats
366368

369+
define <vscale x 4 x bfloat> @vector_interleave_nxv4bf16_nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b) {
370+
; CHECK-LABEL: vector_interleave_nxv4bf16_nxv2bf16:
371+
; CHECK: # %bb.0:
372+
; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
373+
; CHECK-NEXT: vwaddu.vv v10, v8, v9
374+
; CHECK-NEXT: li a0, -1
375+
; CHECK-NEXT: vwmaccu.vx v10, a0, v9
376+
; CHECK-NEXT: csrr a0, vlenb
377+
; CHECK-NEXT: srli a0, a0, 2
378+
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
379+
; CHECK-NEXT: vslidedown.vx v8, v10, a0
380+
; CHECK-NEXT: add a1, a0, a0
381+
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
382+
; CHECK-NEXT: vslideup.vx v10, v8, a0
383+
; CHECK-NEXT: vmv.v.v v8, v10
384+
; CHECK-NEXT: ret
385+
;
386+
; ZVBB-LABEL: vector_interleave_nxv4bf16_nxv2bf16:
387+
; ZVBB: # %bb.0:
388+
; ZVBB-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
389+
; ZVBB-NEXT: vwsll.vi v10, v9, 16
390+
; ZVBB-NEXT: vwaddu.wv v10, v10, v8
391+
; ZVBB-NEXT: csrr a0, vlenb
392+
; ZVBB-NEXT: srli a0, a0, 2
393+
; ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, ma
394+
; ZVBB-NEXT: vslidedown.vx v8, v10, a0
395+
; ZVBB-NEXT: add a1, a0, a0
396+
; ZVBB-NEXT: vsetvli zero, a1, e16, m1, ta, ma
397+
; ZVBB-NEXT: vslideup.vx v10, v8, a0
398+
; ZVBB-NEXT: vmv.v.v v8, v10
399+
; ZVBB-NEXT: ret
400+
%res = call <vscale x 4 x bfloat> @llvm.vector.interleave2.nxv4bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x bfloat> %b)
401+
ret <vscale x 4 x bfloat> %res
402+
}
403+
404+
define <vscale x 8 x bfloat> @vector_interleave_nxv8bf16_nxv4bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b) {
405+
; CHECK-LABEL: vector_interleave_nxv8bf16_nxv4bf16:
406+
; CHECK: # %bb.0:
407+
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
408+
; CHECK-NEXT: vwaddu.vv v10, v8, v9
409+
; CHECK-NEXT: li a0, -1
410+
; CHECK-NEXT: vwmaccu.vx v10, a0, v9
411+
; CHECK-NEXT: vmv2r.v v8, v10
412+
; CHECK-NEXT: ret
413+
;
414+
; ZVBB-LABEL: vector_interleave_nxv8bf16_nxv4bf16:
415+
; ZVBB: # %bb.0:
416+
; ZVBB-NEXT: vsetvli a0, zero, e16, m1, ta, ma
417+
; ZVBB-NEXT: vwsll.vi v10, v9, 16
418+
; ZVBB-NEXT: vwaddu.wv v10, v10, v8
419+
; ZVBB-NEXT: vmv2r.v v8, v10
420+
; ZVBB-NEXT: ret
421+
%res = call <vscale x 8 x bfloat> @llvm.vector.interleave2.nxv8bf16(<vscale x 4 x bfloat> %a, <vscale x 4 x bfloat> %b)
422+
ret <vscale x 8 x bfloat> %res
423+
}
424+
367425
define <vscale x 4 x half> @vector_interleave_nxv4f16_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
368426
; CHECK-LABEL: vector_interleave_nxv4f16_nxv2f16:
369427
; CHECK: # %bb.0:
@@ -442,6 +500,27 @@ define <vscale x 4 x float> @vector_interleave_nxv4f32_nxv2f32(<vscale x 2 x flo
442500
ret <vscale x 4 x float> %res
443501
}
444502

503+
define <vscale x 16 x bfloat> @vector_interleave_nxv16bf16_nxv8bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) {
504+
; CHECK-LABEL: vector_interleave_nxv16bf16_nxv8bf16:
505+
; CHECK: # %bb.0:
506+
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
507+
; CHECK-NEXT: vwaddu.vv v12, v8, v10
508+
; CHECK-NEXT: li a0, -1
509+
; CHECK-NEXT: vwmaccu.vx v12, a0, v10
510+
; CHECK-NEXT: vmv4r.v v8, v12
511+
; CHECK-NEXT: ret
512+
;
513+
; ZVBB-LABEL: vector_interleave_nxv16bf16_nxv8bf16:
514+
; ZVBB: # %bb.0:
515+
; ZVBB-NEXT: vsetvli a0, zero, e16, m2, ta, ma
516+
; ZVBB-NEXT: vwsll.vi v12, v10, 16
517+
; ZVBB-NEXT: vwaddu.wv v12, v12, v8
518+
; ZVBB-NEXT: vmv4r.v v8, v12
519+
; ZVBB-NEXT: ret
520+
%res = call <vscale x 16 x bfloat> @llvm.vector.interleave2.nxv16bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b)
521+
ret <vscale x 16 x bfloat> %res
522+
}
523+
445524
define <vscale x 16 x half> @vector_interleave_nxv16f16_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
446525
; CHECK-LABEL: vector_interleave_nxv16f16_nxv8f16:
447526
; CHECK: # %bb.0:
@@ -527,6 +606,33 @@ declare <vscale x 16 x half> @llvm.vector.interleave2.nxv16f16(<vscale x 8 x hal
527606
declare <vscale x 8 x float> @llvm.vector.interleave2.nxv8f32(<vscale x 4 x float>, <vscale x 4 x float>)
528607
declare <vscale x 4 x double> @llvm.vector.interleave2.nxv4f64(<vscale x 2 x double>, <vscale x 2 x double>)
529608

609+
define <vscale x 64 x bfloat> @vector_interleave_nxv64bf16_nxv32bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b) {
610+
; CHECK-LABEL: vector_interleave_nxv64bf16_nxv32bf16:
611+
; CHECK: # %bb.0:
612+
; CHECK-NEXT: vmv8r.v v24, v8
613+
; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
614+
; CHECK-NEXT: vwaddu.vv v8, v24, v16
615+
; CHECK-NEXT: li a0, -1
616+
; CHECK-NEXT: vwmaccu.vx v8, a0, v16
617+
; CHECK-NEXT: vwaddu.vv v0, v28, v20
618+
; CHECK-NEXT: vwmaccu.vx v0, a0, v20
619+
; CHECK-NEXT: vmv8r.v v16, v0
620+
; CHECK-NEXT: ret
621+
;
622+
; ZVBB-LABEL: vector_interleave_nxv64bf16_nxv32bf16:
623+
; ZVBB: # %bb.0:
624+
; ZVBB-NEXT: vmv8r.v v24, v8
625+
; ZVBB-NEXT: vsetvli a0, zero, e16, m4, ta, ma
626+
; ZVBB-NEXT: vwsll.vi v8, v16, 16
627+
; ZVBB-NEXT: vwaddu.wv v8, v8, v24
628+
; ZVBB-NEXT: vwsll.vi v0, v20, 16
629+
; ZVBB-NEXT: vwaddu.wv v0, v0, v28
630+
; ZVBB-NEXT: vmv8r.v v16, v0
631+
; ZVBB-NEXT: ret
632+
%res = call <vscale x 64 x bfloat> @llvm.vector.interleave2.nxv64bf16(<vscale x 32 x bfloat> %a, <vscale x 32 x bfloat> %b)
633+
ret <vscale x 64 x bfloat> %res
634+
}
635+
530636
define <vscale x 64 x half> @vector_interleave_nxv64f16_nxv32f16(<vscale x 32 x half> %a, <vscale x 32 x half> %b) {
531637
; CHECK-LABEL: vector_interleave_nxv64f16_nxv32f16:
532638
; CHECK: # %bb.0:

0 commit comments

Comments
 (0)