You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
[RISCV][VLOPT] Add support for widening integer mul-add instructions (#112219)
This adds support for these instructions and also tests getOperandInfo
for these instructions as well. I think the VL on the using add
instruction can be optimized further, once we add support for optimizing
non-vlmax.
@@ -1248,44 +1248,149 @@ define <vscale x 4 x i64> @vwmulu_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
1248
1248
ret <vscale x 4 x i64> %2
1249
1249
}
1250
1250
1251
-
define <vscale x 4 x i32> @vwmacc_vx(<vscale x 4 x i16> %a, i16%b, iXLen %vl) {
1251
+
define <vscale x 4 x i32> @vwmacc_vv(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, <vscale x 4 x i32> %d, iXLen %vl) {
1252
+
; NOVLOPT-LABEL: vwmacc_vv:
1253
+
; NOVLOPT: # %bb.0:
1254
+
; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma
1255
+
; NOVLOPT-NEXT: vwmacc.vv v8, v10, v11
1256
+
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1257
+
; NOVLOPT-NEXT: vadd.vv v8, v8, v12
1258
+
; NOVLOPT-NEXT: ret
1259
+
;
1260
+
; VLOPT-LABEL: vwmacc_vv:
1261
+
; VLOPT: # %bb.0:
1262
+
; VLOPT-NEXT: vsetvli zero, a0, e16, m1, tu, ma
1263
+
; VLOPT-NEXT: vwmacc.vv v8, v10, v11
1264
+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1265
+
; VLOPT-NEXT: vadd.vv v8, v8, v12
1266
+
; VLOPT-NEXT: ret
1267
+
%1 = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.nxv4i16(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
1268
+
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %d, iXLen %vl)
1269
+
ret <vscale x 4 x i32> %2
1270
+
}
1271
+
1272
+
define <vscale x 4 x i32> @vwmacc_vx(<vscale x 4 x i32> %a, i16%b, <vscale x 4 x i16> %c, iXLen %vl) {
1252
1273
; NOVLOPT-LABEL: vwmacc_vx:
1253
1274
; NOVLOPT: # %bb.0:
1254
-
; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, ta, ma
1255
-
; NOVLOPT-NEXT: vwmacc.vx v10, a0, v8
1275
+
; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, tu, ma
1276
+
; NOVLOPT-NEXT: vwmacc.vx v8, a0, v10
1256
1277
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1257
-
; NOVLOPT-NEXT: vadd.vv v8, v10, v10
1278
+
; NOVLOPT-NEXT: vadd.vv v8, v8, v8
1258
1279
; NOVLOPT-NEXT: ret
1259
1280
;
1260
1281
; VLOPT-LABEL: vwmacc_vx:
1261
1282
; VLOPT: # %bb.0:
1262
-
; VLOPT-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1263
-
; VLOPT-NEXT: vwmacc.vx v10, a0, v8
1283
+
; VLOPT-NEXT: vsetvli zero, a1, e16, m1, tu, ma
1284
+
; VLOPT-NEXT: vwmacc.vx v8, a0, v10
1264
1285
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1265
-
; VLOPT-NEXT: vadd.vv v8, v10, v10
1286
+
; VLOPT-NEXT: vadd.vv v8, v8, v8
1266
1287
; VLOPT-NEXT: ret
1267
-
%1 = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.i16(<vscale x 4 x i32> poison, i16%b, <vscale x 4 x i16> %a, iXLen -1, iXLen 0)
1288
+
%1 = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.i16(<vscale x 4 x i32> %a, i16%b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
1268
1289
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
1269
1290
ret <vscale x 4 x i32> %2
1270
1291
}
1271
1292
1272
-
define <vscale x 4 x i32> @vwmaccu_vx(<vscale x 4 x i16> %a, i16%b, iXLen %vl) {
1293
+
define <vscale x 4 x i32> @vwmaccu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, <vscale x 4 x i32> %d, iXLen %vl) {
1294
+
; NOVLOPT-LABEL: vwmaccu_vv:
1295
+
; NOVLOPT: # %bb.0:
1296
+
; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma
1297
+
; NOVLOPT-NEXT: vwmaccu.vv v8, v10, v11
1298
+
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1299
+
; NOVLOPT-NEXT: vadd.vv v8, v8, v12
1300
+
; NOVLOPT-NEXT: ret
1301
+
;
1302
+
; VLOPT-LABEL: vwmaccu_vv:
1303
+
; VLOPT: # %bb.0:
1304
+
; VLOPT-NEXT: vsetvli zero, a0, e16, m1, tu, ma
1305
+
; VLOPT-NEXT: vwmaccu.vv v8, v10, v11
1306
+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1307
+
; VLOPT-NEXT: vadd.vv v8, v8, v12
1308
+
; VLOPT-NEXT: ret
1309
+
%1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccu.nxv4i32.nxv4i16(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
1310
+
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %d, iXLen %vl)
1311
+
ret <vscale x 4 x i32> %2
1312
+
}
1313
+
1314
+
define <vscale x 4 x i32> @vwmaccu_vx(<vscale x 4 x i32> %a, i16%b, <vscale x 4 x i16> %c, <vscale x 4 x i32> %d, i32%e, iXLen %vl) {
1273
1315
; NOVLOPT-LABEL: vwmaccu_vx:
1274
1316
; NOVLOPT: # %bb.0:
1275
-
; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, ta, ma
1276
-
; NOVLOPT-NEXT: vwmaccu.vx v10, a0, v8
1277
-
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1278
-
; NOVLOPT-NEXT: vadd.vv v8, v10, v10
1317
+
; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma
1318
+
; NOVLOPT-NEXT: vwmaccu.vx v8, a0, v10
1319
+
; NOVLOPT-NEXT: vsetvli zero, a2, e32, m2, ta, ma
1320
+
; NOVLOPT-NEXT: vadd.vv v8, v8, v12
1279
1321
; NOVLOPT-NEXT: ret
1280
1322
;
1281
1323
; VLOPT-LABEL: vwmaccu_vx:
1282
1324
; VLOPT: # %bb.0:
1283
-
; VLOPT-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1284
-
; VLOPT-NEXT: vwmaccu.vx v10, a0, v8
1325
+
; VLOPT-NEXT: vsetvli zero, a2, e16, m1, tu, ma
1326
+
; VLOPT-NEXT: vwmaccu.vx v8, a0, v10
1327
+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1328
+
; VLOPT-NEXT: vadd.vv v8, v8, v12
1329
+
; VLOPT-NEXT: ret
1330
+
%1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccu.nxv4i32.i16(<vscale x 4 x i32> %a, i16%b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
1331
+
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %d, iXLen %vl)
1332
+
ret <vscale x 4 x i32> %2
1333
+
}
1334
+
1335
+
define <vscale x 4 x i32> @vwmaccsu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen %vl) {
1336
+
; NOVLOPT-LABEL: vwmaccsu_vv:
1337
+
; NOVLOPT: # %bb.0:
1338
+
; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma
1339
+
; NOVLOPT-NEXT: vwmaccsu.vv v8, v10, v11
1340
+
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1341
+
; NOVLOPT-NEXT: vadd.vv v8, v8, v8
1342
+
; NOVLOPT-NEXT: ret
1343
+
;
1344
+
; VLOPT-LABEL: vwmaccsu_vv:
1345
+
; VLOPT: # %bb.0:
1346
+
; VLOPT-NEXT: vsetvli zero, a0, e16, m1, tu, ma
1347
+
; VLOPT-NEXT: vwmaccsu.vv v8, v10, v11
1285
1348
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1286
-
; VLOPT-NEXT: vadd.vv v8, v10, v10
1349
+
; VLOPT-NEXT: vadd.vv v8, v8, v8
1350
+
; VLOPT-NEXT: ret
1351
+
%1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccsu.nxv4i32.nxv4i16(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
1352
+
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
1353
+
ret <vscale x 4 x i32> %2
1354
+
}
1355
+
1356
+
define <vscale x 4 x i32> @vwmaccsu_vx(<vscale x 4 x i32> %a, i16%b, <vscale x 4 x i16> %c, iXLen %vl) {
1357
+
; NOVLOPT-LABEL: vwmaccsu_vx:
1358
+
; NOVLOPT: # %bb.0:
1359
+
; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, tu, ma
1360
+
; NOVLOPT-NEXT: vwmaccsu.vx v8, a0, v10
1361
+
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1362
+
; NOVLOPT-NEXT: vadd.vv v8, v8, v8
1363
+
; NOVLOPT-NEXT: ret
1364
+
;
1365
+
; VLOPT-LABEL: vwmaccsu_vx:
1366
+
; VLOPT: # %bb.0:
1367
+
; VLOPT-NEXT: vsetvli zero, a1, e16, m1, tu, ma
1368
+
; VLOPT-NEXT: vwmaccsu.vx v8, a0, v10
1369
+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1370
+
; VLOPT-NEXT: vadd.vv v8, v8, v8
1371
+
; VLOPT-NEXT: ret
1372
+
%1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccsu.nxv4i32.i16(<vscale x 4 x i32> %a, i16%b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
1373
+
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
1374
+
ret <vscale x 4 x i32> %2
1375
+
}
1376
+
1377
+
define <vscale x 4 x i32> @vwmaccus_vx(<vscale x 4 x i32> %a, i16%b, <vscale x 4 x i16> %c, iXLen %vl) {
1378
+
; NOVLOPT-LABEL: vwmaccus_vx:
1379
+
; NOVLOPT: # %bb.0:
1380
+
; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, tu, ma
1381
+
; NOVLOPT-NEXT: vwmaccus.vx v8, a0, v10
1382
+
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1383
+
; NOVLOPT-NEXT: vadd.vv v8, v8, v8
1384
+
; NOVLOPT-NEXT: ret
1385
+
;
1386
+
; VLOPT-LABEL: vwmaccus_vx:
1387
+
; VLOPT: # %bb.0:
1388
+
; VLOPT-NEXT: vsetvli zero, a1, e16, m1, tu, ma
1389
+
; VLOPT-NEXT: vwmaccus.vx v8, a0, v10
1390
+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1391
+
; VLOPT-NEXT: vadd.vv v8, v8, v8
1287
1392
; VLOPT-NEXT: ret
1288
-
%1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccu.nxv4i32.i16(<vscale x 4 x i32> poison, i16%b, <vscale x 4 x i16> %a, iXLen -1, iXLen 0)
1393
+
%1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccus.nxv4i32.i16(<vscale x 4 x i32> %a, i16%b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
1289
1394
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
Copy file name to clipboardExpand all lines: llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
+14Lines changed: 14 additions & 0 deletions
Original file line number
Diff line number
Diff line change
@@ -136,3 +136,17 @@ define <vscale x 4 x i32> @different_imm_vl_with_tu(<vscale x 4 x i32> %passthru
136
136
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a,iXLen 4)
137
137
ret <vscale x 4 x i32> %w
138
138
}
139
+
140
+
define <vscale x 4 x i32> @dont_optimize_tied_def(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen %vl) {
141
+
; CHECK-LABEL: dont_optimize_tied_def:
142
+
; CHECK: # %bb.0:
143
+
; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma
144
+
; CHECK-NEXT: vwmacc.vv v8, v10, v11
145
+
; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
146
+
; CHECK-NEXT: vwmacc.vv v8, v10, v11
147
+
; CHECK-NEXT: ret
148
+
%1 = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.nxv4i16(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
149
+
%2 = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.nxv4i16(<vscale x 4 x i32> %1, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen %vl, iXLen 0)
0 commit comments