Skip to content

Commit 7d35368

Browse files
authored
[RISCV] Lower vector_shuffle for bf16 (#114731)
This is much the same as with f16. Currently we scalarize if there's no zvfbfmin, and crash if there is zvfbfmin because it will try to create a bf16 build_vector, which we also can't lower.
1 parent a15bf88 commit 7d35368

File tree

2 files changed

+107
-18
lines changed

2 files changed

+107
-18
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1381,6 +1381,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
13811381
if (VT.getVectorElementType() == MVT::bf16) {
13821382
setOperationAction(ISD::BITCAST, VT, Custom);
13831383
setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
1384+
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
13841385
if (Subtarget.hasStdExtZfbfmin()) {
13851386
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
13861387
} else {
@@ -5197,8 +5198,9 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
51975198

51985199
MVT SplatVT = ContainerVT;
51995200

5200-
// If we don't have Zfh, we need to use an integer scalar load.
5201-
if (SVT == MVT::f16 && !Subtarget.hasStdExtZfh()) {
5201+
// f16 with zvfhmin and bf16 need to use an integer scalar load.
5202+
if (SVT == MVT::bf16 ||
5203+
(SVT == MVT::f16 && !Subtarget.hasStdExtZfh())) {
52025204
SVT = MVT::i16;
52035205
SplatVT = ContainerVT.changeVectorElementType(SVT);
52045206
}

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll

Lines changed: 103 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,19 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s
3-
; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s
4-
; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s
5-
; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s
2+
; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v -verify-machineinstrs < %s | FileCheck %s
3+
; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v -verify-machineinstrs < %s | FileCheck %s
4+
; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v -verify-machineinstrs < %s | FileCheck %s
5+
; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v -verify-machineinstrs < %s | FileCheck %s
6+
7+
define <4 x bfloat> @shuffle_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) {
8+
; CHECK-LABEL: shuffle_v4bf16:
9+
; CHECK: # %bb.0:
10+
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
11+
; CHECK-NEXT: vmv.v.i v0, 11
12+
; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
13+
; CHECK-NEXT: ret
14+
%s = shufflevector <4 x bfloat> %x, <4 x bfloat> %y, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
15+
ret <4 x bfloat> %s
16+
}
617

718
define <4 x half> @shuffle_v4f16(<4 x half> %x, <4 x half> %y) {
819
; CHECK-LABEL: shuffle_v4f16:
@@ -30,8 +41,8 @@ define <8 x float> @shuffle_v8f32(<8 x float> %x, <8 x float> %y) {
3041
define <4 x double> @shuffle_fv_v4f64(<4 x double> %x) {
3142
; CHECK-LABEL: shuffle_fv_v4f64:
3243
; CHECK: # %bb.0:
33-
; CHECK-NEXT: lui a0, %hi(.LCPI2_0)
34-
; CHECK-NEXT: fld fa5, %lo(.LCPI2_0)(a0)
44+
; CHECK-NEXT: lui a0, %hi(.LCPI3_0)
45+
; CHECK-NEXT: fld fa5, %lo(.LCPI3_0)(a0)
3546
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
3647
; CHECK-NEXT: vmv.v.i v0, 9
3748
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
@@ -44,8 +55,8 @@ define <4 x double> @shuffle_fv_v4f64(<4 x double> %x) {
4455
define <4 x double> @shuffle_vf_v4f64(<4 x double> %x) {
4556
; CHECK-LABEL: shuffle_vf_v4f64:
4657
; CHECK: # %bb.0:
47-
; CHECK-NEXT: lui a0, %hi(.LCPI3_0)
48-
; CHECK-NEXT: fld fa5, %lo(.LCPI3_0)(a0)
58+
; CHECK-NEXT: lui a0, %hi(.LCPI4_0)
59+
; CHECK-NEXT: fld fa5, %lo(.LCPI4_0)(a0)
4960
; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
5061
; CHECK-NEXT: vmv.v.i v0, 6
5162
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
@@ -92,8 +103,8 @@ define <4 x double> @vrgather_permute_shuffle_uv_v4f64(<4 x double> %x) {
92103
define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y) {
93104
; CHECK-LABEL: vrgather_shuffle_vv_v4f64:
94105
; CHECK: # %bb.0:
95-
; CHECK-NEXT: lui a0, %hi(.LCPI6_0)
96-
; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0)
106+
; CHECK-NEXT: lui a0, %hi(.LCPI7_0)
107+
; CHECK-NEXT: addi a0, a0, %lo(.LCPI7_0)
97108
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
98109
; CHECK-NEXT: vle16.v v14, (a0)
99110
; CHECK-NEXT: vmv.v.i v0, 8
@@ -109,8 +120,8 @@ define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y)
109120
define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x double> %x) {
110121
; CHECK-LABEL: vrgather_shuffle_xv_v4f64:
111122
; CHECK: # %bb.0:
112-
; CHECK-NEXT: lui a0, %hi(.LCPI7_0)
113-
; CHECK-NEXT: fld fa5, %lo(.LCPI7_0)(a0)
123+
; CHECK-NEXT: lui a0, %hi(.LCPI8_0)
124+
; CHECK-NEXT: fld fa5, %lo(.LCPI8_0)(a0)
114125
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
115126
; CHECK-NEXT: vid.v v10
116127
; CHECK-NEXT: vrsub.vi v12, v10, 4
@@ -129,8 +140,8 @@ define <4 x double> @vrgather_shuffle_vx_v4f64(<4 x double> %x) {
129140
; CHECK: # %bb.0:
130141
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
131142
; CHECK-NEXT: vid.v v10
132-
; CHECK-NEXT: lui a0, %hi(.LCPI8_0)
133-
; CHECK-NEXT: fld fa5, %lo(.LCPI8_0)(a0)
143+
; CHECK-NEXT: lui a0, %hi(.LCPI9_0)
144+
; CHECK-NEXT: fld fa5, %lo(.LCPI9_0)(a0)
134145
; CHECK-NEXT: li a0, 3
135146
; CHECK-NEXT: vmul.vx v12, v10, a0
136147
; CHECK-NEXT: vmv.v.i v0, 3
@@ -143,6 +154,28 @@ define <4 x double> @vrgather_shuffle_vx_v4f64(<4 x double> %x) {
143154
ret <4 x double> %s
144155
}
145156

157+
define <4 x bfloat> @shuffle_v8bf16_to_vslidedown_1(<8 x bfloat> %x) {
158+
; CHECK-LABEL: shuffle_v8bf16_to_vslidedown_1:
159+
; CHECK: # %bb.0: # %entry
160+
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
161+
; CHECK-NEXT: vslidedown.vi v8, v8, 1
162+
; CHECK-NEXT: ret
163+
entry:
164+
%s = shufflevector <8 x bfloat> %x, <8 x bfloat> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
165+
ret <4 x bfloat> %s
166+
}
167+
168+
define <4 x bfloat> @shuffle_v8bf16_to_vslidedown_3(<8 x bfloat> %x) {
169+
; CHECK-LABEL: shuffle_v8bf16_to_vslidedown_3:
170+
; CHECK: # %bb.0: # %entry
171+
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
172+
; CHECK-NEXT: vslidedown.vi v8, v8, 3
173+
; CHECK-NEXT: ret
174+
entry:
175+
%s = shufflevector <8 x bfloat> %x, <8 x bfloat> poison, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
176+
ret <4 x bfloat> %s
177+
}
178+
146179
define <4 x half> @shuffle_v8f16_to_vslidedown_1(<8 x half> %x) {
147180
; CHECK-LABEL: shuffle_v8f16_to_vslidedown_1:
148181
; CHECK: # %bb.0: # %entry
@@ -176,6 +209,16 @@ entry:
176209
ret <2 x float> %s
177210
}
178211

212+
define <4 x bfloat> @slidedown_v4bf16(<4 x bfloat> %x) {
213+
; CHECK-LABEL: slidedown_v4bf16:
214+
; CHECK: # %bb.0:
215+
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
216+
; CHECK-NEXT: vslidedown.vi v8, v8, 1
217+
; CHECK-NEXT: ret
218+
%s = shufflevector <4 x bfloat> %x, <4 x bfloat> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 undef>
219+
ret <4 x bfloat> %s
220+
}
221+
179222
define <4 x half> @slidedown_v4f16(<4 x half> %x) {
180223
; CHECK-LABEL: slidedown_v4f16:
181224
; CHECK: # %bb.0:
@@ -265,6 +308,50 @@ define <8 x double> @splice_binary2(<8 x double> %x, <8 x double> %y) {
265308
ret <8 x double> %s
266309
}
267310

311+
define <4 x bfloat> @vrgather_permute_shuffle_vu_v4bf16(<4 x bfloat> %x) {
312+
; CHECK-LABEL: vrgather_permute_shuffle_vu_v4bf16:
313+
; CHECK: # %bb.0:
314+
; CHECK-NEXT: lui a0, 4096
315+
; CHECK-NEXT: addi a0, a0, 513
316+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
317+
; CHECK-NEXT: vmv.s.x v9, a0
318+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
319+
; CHECK-NEXT: vsext.vf2 v10, v9
320+
; CHECK-NEXT: vrgather.vv v9, v8, v10
321+
; CHECK-NEXT: vmv1r.v v8, v9
322+
; CHECK-NEXT: ret
323+
%s = shufflevector <4 x bfloat> %x, <4 x bfloat> poison, <4 x i32> <i32 1, i32 2, i32 0, i32 1>
324+
ret <4 x bfloat> %s
325+
}
326+
327+
define <4 x bfloat> @vrgather_shuffle_vv_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y) {
328+
; CHECK-LABEL: vrgather_shuffle_vv_v4bf16:
329+
; CHECK: # %bb.0:
330+
; CHECK-NEXT: lui a0, %hi(.LCPI25_0)
331+
; CHECK-NEXT: addi a0, a0, %lo(.LCPI25_0)
332+
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
333+
; CHECK-NEXT: vle16.v v11, (a0)
334+
; CHECK-NEXT: vmv.v.i v0, 8
335+
; CHECK-NEXT: vrgather.vv v10, v8, v11
336+
; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t
337+
; CHECK-NEXT: vmv1r.v v8, v10
338+
; CHECK-NEXT: ret
339+
%s = shufflevector <4 x bfloat> %x, <4 x bfloat> %y, <4 x i32> <i32 1, i32 2, i32 0, i32 5>
340+
ret <4 x bfloat> %s
341+
}
342+
343+
define <4 x bfloat> @vrgather_shuffle_vx_v4bf16_load(ptr %p) {
344+
; CHECK-LABEL: vrgather_shuffle_vx_v4bf16_load:
345+
; CHECK: # %bb.0:
346+
; CHECK-NEXT: lh a0, 2(a0)
347+
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
348+
; CHECK-NEXT: vmv.v.x v8, a0
349+
; CHECK-NEXT: ret
350+
%v = load <4 x bfloat>, ptr %p
351+
%s = shufflevector <4 x bfloat> %v, <4 x bfloat> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
352+
ret <4 x bfloat> %s
353+
}
354+
268355
define <4 x half> @vrgather_permute_shuffle_vu_v4f16(<4 x half> %x) {
269356
; CHECK-LABEL: vrgather_permute_shuffle_vu_v4f16:
270357
; CHECK: # %bb.0:
@@ -284,8 +371,8 @@ define <4 x half> @vrgather_permute_shuffle_vu_v4f16(<4 x half> %x) {
284371
define <4 x half> @vrgather_shuffle_vv_v4f16(<4 x half> %x, <4 x half> %y) {
285372
; CHECK-LABEL: vrgather_shuffle_vv_v4f16:
286373
; CHECK: # %bb.0:
287-
; CHECK-NEXT: lui a0, %hi(.LCPI21_0)
288-
; CHECK-NEXT: addi a0, a0, %lo(.LCPI21_0)
374+
; CHECK-NEXT: lui a0, %hi(.LCPI28_0)
375+
; CHECK-NEXT: addi a0, a0, %lo(.LCPI28_0)
289376
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
290377
; CHECK-NEXT: vle16.v v11, (a0)
291378
; CHECK-NEXT: vmv.v.i v0, 8

0 commit comments

Comments
 (0)