Skip to content

Commit 6aa03c8

Browse files
[RISCV][VLOPT] Add support for integer multiply-add instructions
1 parent 2f09c72 commit 6aa03c8

File tree

2 files changed

+184
-1
lines changed

2 files changed

+184
-1
lines changed

llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -565,7 +565,14 @@ static bool isSupportedInstr(const MachineInstr &MI) {
565565
// Vector Widening Integer Multiply Instructions
566566
// FIXME: Add support
567567
// Vector Single-Width Integer Multiply-Add Instructions
568-
// FIXME: Add support
568+
case RISCV::VMACC_VV:
569+
case RISCV::VMACC_VX:
570+
case RISCV::VNMSAC_VV:
571+
case RISCV::VNMSAC_VX:
572+
case RISCV::VMADD_VV:
573+
case RISCV::VMADD_VX:
574+
case RISCV::VNMSUB_VV:
575+
case RISCV::VNMSUB_VX:
569576
// Vector Widening Integer Multiply-Add Instructions
570577
// FIXME: Add support
571578
case RISCV::VWMACC_VX:

llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1122,6 +1122,182 @@ define <vscale x 4 x i32> @vrem_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
11221122
ret <vscale x 4 x i32> %2
11231123
}
11241124

1125+
define <vscale x 4 x i32> @vmacc_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
1126+
; NOVLOPT-LABEL: vmacc_vv:
1127+
; NOVLOPT: # %bb.0:
1128+
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
1129+
; NOVLOPT-NEXT: vmacc.vv v8, v8, v10
1130+
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1131+
; NOVLOPT-NEXT: vmul.vv v8, v8, v10
1132+
; NOVLOPT-NEXT: ret
1133+
;
1134+
; VLOPT-LABEL: vmacc_vv:
1135+
; VLOPT: # %bb.0:
1136+
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
1137+
; VLOPT-NEXT: vmacc.vv v8, v8, v10
1138+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1139+
; VLOPT-NEXT: vmul.vv v8, v8, v10
1140+
; VLOPT-NEXT: ret
1141+
%1 = call <vscale x 4 x i32> @llvm.riscv.vmacc.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1, iXLen 0)
1142+
%2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
1143+
ret <vscale x 4 x i32> %2
1144+
}
1145+
1146+
define <vscale x 4 x i32> @vmacc_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
1147+
; NOVLOPT-LABEL: vmacc_vx:
1148+
; NOVLOPT: # %bb.0:
1149+
; NOVLOPT-NEXT: vmv2r.v v10, v8
1150+
; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
1151+
; NOVLOPT-NEXT: vmacc.vx v10, a0, v8
1152+
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1153+
; NOVLOPT-NEXT: vmul.vv v8, v10, v8
1154+
; NOVLOPT-NEXT: ret
1155+
;
1156+
; VLOPT-LABEL: vmacc_vx:
1157+
; VLOPT: # %bb.0:
1158+
; VLOPT-NEXT: vmv2r.v v10, v8
1159+
; VLOPT-NEXT: vsetvli zero, a1, e32, m2, tu, ma
1160+
; VLOPT-NEXT: vmacc.vx v10, a0, v8
1161+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1162+
; VLOPT-NEXT: vmul.vv v8, v10, v8
1163+
; VLOPT-NEXT: ret
1164+
%1 = call <vscale x 4 x i32> @llvm.riscv.vmacc.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %a, iXLen -1, iXLen 0)
1165+
%2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
1166+
ret <vscale x 4 x i32> %2
1167+
}
1168+
1169+
define <vscale x 4 x i32> @vmadd_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
1170+
; NOVLOPT-LABEL: vmadd_vv:
1171+
; NOVLOPT: # %bb.0:
1172+
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
1173+
; NOVLOPT-NEXT: vmadd.vv v8, v8, v10
1174+
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1175+
; NOVLOPT-NEXT: vmul.vv v8, v8, v10
1176+
; NOVLOPT-NEXT: ret
1177+
;
1178+
; VLOPT-LABEL: vmadd_vv:
1179+
; VLOPT: # %bb.0:
1180+
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
1181+
; VLOPT-NEXT: vmadd.vv v8, v8, v10
1182+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1183+
; VLOPT-NEXT: vmul.vv v8, v8, v10
1184+
; VLOPT-NEXT: ret
1185+
%1 = call <vscale x 4 x i32> @llvm.riscv.vmadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1, iXLen 0)
1186+
%2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
1187+
ret <vscale x 4 x i32> %2
1188+
}
1189+
1190+
define <vscale x 4 x i32> @vmadd_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
1191+
; NOVLOPT-LABEL: vmadd_vx:
1192+
; NOVLOPT: # %bb.0:
1193+
; NOVLOPT-NEXT: vmv2r.v v10, v8
1194+
; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
1195+
; NOVLOPT-NEXT: vmadd.vx v10, a0, v8
1196+
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1197+
; NOVLOPT-NEXT: vmul.vv v8, v10, v8
1198+
; NOVLOPT-NEXT: ret
1199+
;
1200+
; VLOPT-LABEL: vmadd_vx:
1201+
; VLOPT: # %bb.0:
1202+
; VLOPT-NEXT: vmv2r.v v10, v8
1203+
; VLOPT-NEXT: vsetvli zero, a1, e32, m2, tu, ma
1204+
; VLOPT-NEXT: vmadd.vx v10, a0, v8
1205+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1206+
; VLOPT-NEXT: vmul.vv v8, v10, v8
1207+
; VLOPT-NEXT: ret
1208+
%1 = call <vscale x 4 x i32> @llvm.riscv.vmadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %a, iXLen -1, iXLen 0)
1209+
%2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
1210+
ret <vscale x 4 x i32> %2
1211+
}
1212+
1213+
define <vscale x 4 x i32> @vnmsac_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
1214+
; NOVLOPT-LABEL: vnmsac_vv:
1215+
; NOVLOPT: # %bb.0:
1216+
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
1217+
; NOVLOPT-NEXT: vnmsac.vv v8, v8, v10
1218+
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1219+
; NOVLOPT-NEXT: vmul.vv v8, v8, v10
1220+
; NOVLOPT-NEXT: ret
1221+
;
1222+
; VLOPT-LABEL: vnmsac_vv:
1223+
; VLOPT: # %bb.0:
1224+
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
1225+
; VLOPT-NEXT: vnmsac.vv v8, v8, v10
1226+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1227+
; VLOPT-NEXT: vmul.vv v8, v8, v10
1228+
; VLOPT-NEXT: ret
1229+
%1 = call <vscale x 4 x i32> @llvm.riscv.vnmsac.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1, iXLen 0)
1230+
%2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
1231+
ret <vscale x 4 x i32> %2
1232+
}
1233+
1234+
define <vscale x 4 x i32> @vnmsac_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
1235+
; NOVLOPT-LABEL: vnmsac_vx:
1236+
; NOVLOPT: # %bb.0:
1237+
; NOVLOPT-NEXT: vmv2r.v v10, v8
1238+
; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
1239+
; NOVLOPT-NEXT: vnmsac.vx v10, a0, v8
1240+
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1241+
; NOVLOPT-NEXT: vmul.vv v8, v10, v8
1242+
; NOVLOPT-NEXT: ret
1243+
;
1244+
; VLOPT-LABEL: vnmsac_vx:
1245+
; VLOPT: # %bb.0:
1246+
; VLOPT-NEXT: vmv2r.v v10, v8
1247+
; VLOPT-NEXT: vsetvli zero, a1, e32, m2, tu, ma
1248+
; VLOPT-NEXT: vnmsac.vx v10, a0, v8
1249+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1250+
; VLOPT-NEXT: vmul.vv v8, v10, v8
1251+
; VLOPT-NEXT: ret
1252+
%1 = call <vscale x 4 x i32> @llvm.riscv.vnmsac.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %a, iXLen -1, iXLen 0)
1253+
%2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
1254+
ret <vscale x 4 x i32> %2
1255+
}
1256+
1257+
define <vscale x 4 x i32> @vnmsub_vv(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen %vl) {
1258+
; NOVLOPT-LABEL: vnmsub_vv:
1259+
; NOVLOPT: # %bb.0:
1260+
; NOVLOPT-NEXT: vsetvli a1, zero, e32, m2, tu, ma
1261+
; NOVLOPT-NEXT: vnmsub.vv v8, v8, v10
1262+
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1263+
; NOVLOPT-NEXT: vmul.vv v8, v8, v10
1264+
; NOVLOPT-NEXT: ret
1265+
;
1266+
; VLOPT-LABEL: vnmsub_vv:
1267+
; VLOPT: # %bb.0:
1268+
; VLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
1269+
; VLOPT-NEXT: vnmsub.vv v8, v8, v10
1270+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1271+
; VLOPT-NEXT: vmul.vv v8, v8, v10
1272+
; VLOPT-NEXT: ret
1273+
%1 = call <vscale x 4 x i32> @llvm.riscv.vnmsub.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, iXLen -1, iXLen 0)
1274+
%2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %b, iXLen %vl)
1275+
ret <vscale x 4 x i32> %2
1276+
}
1277+
1278+
define <vscale x 4 x i32> @vnmsub_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
1279+
; NOVLOPT-LABEL: vnmsub_vx:
1280+
; NOVLOPT: # %bb.0:
1281+
; NOVLOPT-NEXT: vmv2r.v v10, v8
1282+
; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, tu, ma
1283+
; NOVLOPT-NEXT: vnmsub.vx v10, a0, v8
1284+
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1285+
; NOVLOPT-NEXT: vmul.vv v8, v10, v8
1286+
; NOVLOPT-NEXT: ret
1287+
;
1288+
; VLOPT-LABEL: vnmsub_vx:
1289+
; VLOPT: # %bb.0:
1290+
; VLOPT-NEXT: vmv2r.v v10, v8
1291+
; VLOPT-NEXT: vsetvli zero, a1, e32, m2, tu, ma
1292+
; VLOPT-NEXT: vnmsub.vx v10, a0, v8
1293+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1294+
; VLOPT-NEXT: vmul.vv v8, v10, v8
1295+
; VLOPT-NEXT: ret
1296+
%1 = call <vscale x 4 x i32> @llvm.riscv.vnmsub.nxv4i32.nxv4i32(<vscale x 4 x i32> %a, i32 %b, <vscale x 4 x i32> %a, iXLen -1, iXLen 0)
1297+
%2 = call <vscale x 4 x i32> @llvm.riscv.vmul.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %a, iXLen %vl)
1298+
ret <vscale x 4 x i32> %2
1299+
}
1300+
11251301
define <vscale x 4 x i32> @vwmacc_vx(<vscale x 4 x i16> %a, i16 %b, iXLen %vl) {
11261302
; NOVLOPT-LABEL: vwmacc_vx:
11271303
; NOVLOPT: # %bb.0:

0 commit comments

Comments
 (0)