Skip to content

Commit cb021f5

Browse files
committed
[RISCV] Don't use SEW=16 .vf instructions to move scalar bf16 into a vector.
The instructions are only defined to operator f16 data. If the scalar FPR register isn't properly nan-boxed, these instructions will create a fp16 nan not a bf16 nan in the vector register.
1 parent 50ead2e commit cb021f5

File tree

4 files changed

+139
-337
lines changed

4 files changed

+139
-337
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1102,12 +1102,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
11021102
ISD::EXTRACT_SUBVECTOR},
11031103
VT, Custom);
11041104
setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1105-
if (Subtarget.hasStdExtZfbfmin()) {
1106-
if (Subtarget.hasVInstructionsF16())
1107-
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
1108-
else
1109-
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1110-
}
1105+
if (Subtarget.hasStdExtZfbfmin())
1106+
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
11111107
setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
11121108
Custom);
11131109
setOperationAction(ISD::SELECT_CC, VT, Expand);
@@ -1340,12 +1336,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
13401336
ISD::EXTRACT_SUBVECTOR},
13411337
VT, Custom);
13421338
setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1343-
if (Subtarget.hasStdExtZfbfmin()) {
1344-
if (Subtarget.hasVInstructionsF16())
1345-
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
1346-
else
1347-
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1348-
}
1339+
if (Subtarget.hasStdExtZfbfmin())
1340+
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
13491341
setOperationAction(
13501342
{ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
13511343
Custom);
@@ -6738,8 +6730,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
67386730
Subtarget.hasStdExtZfhminOrZhinxmin() &&
67396731
!Subtarget.hasVInstructionsF16())) ||
67406732
(Op.getValueType().getScalarType() == MVT::bf16 &&
6741-
(Subtarget.hasVInstructionsBF16() && Subtarget.hasStdExtZfbfmin() &&
6742-
!Subtarget.hasVInstructionsF16()))) {
6733+
(Subtarget.hasVInstructionsBF16() && Subtarget.hasStdExtZfbfmin()))) {
67436734
if (Op.getValueType() == MVT::nxv32f16 ||
67446735
Op.getValueType() == MVT::nxv32bf16)
67456736
return SplitVectorOp(Op, DAG);

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll

Lines changed: 32 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1255,20 +1255,14 @@ define <2 x bfloat> @vpmerge_vv_v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb, <2 x
12551255
}
12561256

12571257
define <2 x bfloat> @vpmerge_vf_v2bf16(bfloat %a, <2 x bfloat> %vb, <2 x i1> %m, i32 zeroext %evl) {
1258-
; ZVFH-LABEL: vpmerge_vf_v2bf16:
1259-
; ZVFH: # %bb.0:
1260-
; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, ma
1261-
; ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
1262-
; ZVFH-NEXT: ret
1263-
;
1264-
; ZVFHMIN-LABEL: vpmerge_vf_v2bf16:
1265-
; ZVFHMIN: # %bb.0:
1266-
; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
1267-
; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
1268-
; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
1269-
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, tu, mu
1270-
; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
1271-
; ZVFHMIN-NEXT: ret
1258+
; CHECK-LABEL: vpmerge_vf_v2bf16:
1259+
; CHECK: # %bb.0:
1260+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
1261+
; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
1262+
; CHECK-NEXT: vfmv.v.f v9, fa5
1263+
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu
1264+
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
1265+
; CHECK-NEXT: ret
12721266
%elt.head = insertelement <2 x bfloat> poison, bfloat %a, i32 0
12731267
%va = shufflevector <2 x bfloat> %elt.head, <2 x bfloat> poison, <2 x i32> zeroinitializer
12741268
%v = call <2 x bfloat> @llvm.vp.merge.v2bf16(<2 x i1> %m, <2 x bfloat> %va, <2 x bfloat> %vb, i32 %evl)
@@ -1289,20 +1283,14 @@ define <4 x bfloat> @vpmerge_vv_v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb, <4 x
12891283
}
12901284

12911285
define <4 x bfloat> @vpmerge_vf_v4bf16(bfloat %a, <4 x bfloat> %vb, <4 x i1> %m, i32 zeroext %evl) {
1292-
; ZVFH-LABEL: vpmerge_vf_v4bf16:
1293-
; ZVFH: # %bb.0:
1294-
; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, tu, ma
1295-
; ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
1296-
; ZVFH-NEXT: ret
1297-
;
1298-
; ZVFHMIN-LABEL: vpmerge_vf_v4bf16:
1299-
; ZVFHMIN: # %bb.0:
1300-
; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
1301-
; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma
1302-
; ZVFHMIN-NEXT: vfmv.v.f v9, fa5
1303-
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, tu, mu
1304-
; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
1305-
; ZVFHMIN-NEXT: ret
1286+
; CHECK-LABEL: vpmerge_vf_v4bf16:
1287+
; CHECK: # %bb.0:
1288+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
1289+
; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
1290+
; CHECK-NEXT: vfmv.v.f v9, fa5
1291+
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu
1292+
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
1293+
; CHECK-NEXT: ret
13061294
%elt.head = insertelement <4 x bfloat> poison, bfloat %a, i32 0
13071295
%va = shufflevector <4 x bfloat> %elt.head, <4 x bfloat> poison, <4 x i32> zeroinitializer
13081296
%v = call <4 x bfloat> @llvm.vp.merge.v4bf16(<4 x i1> %m, <4 x bfloat> %va, <4 x bfloat> %vb, i32 %evl)
@@ -1323,20 +1311,14 @@ define <8 x bfloat> @vpmerge_vv_v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x
13231311
}
13241312

13251313
define <8 x bfloat> @vpmerge_vf_v8bf16(bfloat %a, <8 x bfloat> %vb, <8 x i1> %m, i32 zeroext %evl) {
1326-
; ZVFH-LABEL: vpmerge_vf_v8bf16:
1327-
; ZVFH: # %bb.0:
1328-
; ZVFH-NEXT: vsetvli zero, a0, e16, m1, tu, ma
1329-
; ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
1330-
; ZVFH-NEXT: ret
1331-
;
1332-
; ZVFHMIN-LABEL: vpmerge_vf_v8bf16:
1333-
; ZVFHMIN: # %bb.0:
1334-
; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
1335-
; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma
1336-
; ZVFHMIN-NEXT: vfmv.v.f v10, fa5
1337-
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, tu, mu
1338-
; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t
1339-
; ZVFHMIN-NEXT: ret
1314+
; CHECK-LABEL: vpmerge_vf_v8bf16:
1315+
; CHECK: # %bb.0:
1316+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
1317+
; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
1318+
; CHECK-NEXT: vfmv.v.f v10, fa5
1319+
; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu
1320+
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t
1321+
; CHECK-NEXT: ret
13401322
%elt.head = insertelement <8 x bfloat> poison, bfloat %a, i32 0
13411323
%va = shufflevector <8 x bfloat> %elt.head, <8 x bfloat> poison, <8 x i32> zeroinitializer
13421324
%v = call <8 x bfloat> @llvm.vp.merge.v8bf16(<8 x i1> %m, <8 x bfloat> %va, <8 x bfloat> %vb, i32 %evl)
@@ -1357,20 +1339,14 @@ define <16 x bfloat> @vpmerge_vv_v16bf16(<16 x bfloat> %va, <16 x bfloat> %vb, <
13571339
}
13581340

13591341
define <16 x bfloat> @vpmerge_vf_v16bf16(bfloat %a, <16 x bfloat> %vb, <16 x i1> %m, i32 zeroext %evl) {
1360-
; ZVFH-LABEL: vpmerge_vf_v16bf16:
1361-
; ZVFH: # %bb.0:
1362-
; ZVFH-NEXT: vsetvli zero, a0, e16, m2, tu, ma
1363-
; ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0
1364-
; ZVFH-NEXT: ret
1365-
;
1366-
; ZVFHMIN-LABEL: vpmerge_vf_v16bf16:
1367-
; ZVFHMIN: # %bb.0:
1368-
; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0
1369-
; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1370-
; ZVFHMIN-NEXT: vfmv.v.f v12, fa5
1371-
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, tu, mu
1372-
; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t
1373-
; ZVFHMIN-NEXT: ret
1342+
; CHECK-LABEL: vpmerge_vf_v16bf16:
1343+
; CHECK: # %bb.0:
1344+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
1345+
; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
1346+
; CHECK-NEXT: vfmv.v.f v12, fa5
1347+
; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu
1348+
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t
1349+
; CHECK-NEXT: ret
13741350
%elt.head = insertelement <16 x bfloat> poison, bfloat %a, i32 0
13751351
%va = shufflevector <16 x bfloat> %elt.head, <16 x bfloat> poison, <16 x i32> zeroinitializer
13761352
%v = call <16 x bfloat> @llvm.vp.merge.v16bf16(<16 x i1> %m, <16 x bfloat> %va, <16 x bfloat> %vb, i32 %evl)

0 commit comments

Comments
 (0)