You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
@@ -1248,44 +1248,157 @@ define <vscale x 4 x i64> @vwmulu_vx(<vscale x 4 x i32> %a, i32 %b, iXLen %vl) {
1248
1248
ret <vscale x 4 x i64> %2
1249
1249
}
1250
1250
1251
-
define <vscale x 4 x i32> @vwmacc_vx(<vscale x 4 x i16> %a, i16%b, iXLen %vl) {
1251
+
define <vscale x 4 x i64> @vwmacc_vv(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, <vscale x 4 x i64> %d, iXLen %vl) {
1252
+
; NOVLOPT-LABEL: vwmacc_vv:
1253
+
; NOVLOPT: # %bb.0:
1254
+
; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma
1255
+
; NOVLOPT-NEXT: vwmacc.vv v8, v10, v11
1256
+
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
1257
+
; NOVLOPT-NEXT: vwmacc.vv v12, v8, v8
1258
+
; NOVLOPT-NEXT: vmv4r.v v8, v12
1259
+
; NOVLOPT-NEXT: ret
1260
+
;
1261
+
; VLOPT-LABEL: vwmacc_vv:
1262
+
; VLOPT: # %bb.0:
1263
+
; VLOPT-NEXT: vsetvli zero, a0, e16, m1, tu, ma
1264
+
; VLOPT-NEXT: vwmacc.vv v8, v10, v11
1265
+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, tu, ma
1266
+
; VLOPT-NEXT: vwmacc.vv v12, v8, v8
1267
+
; VLOPT-NEXT: vmv4r.v v8, v12
1268
+
; VLOPT-NEXT: ret
1269
+
%1 = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.nxv4i16(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
1270
+
%2 = call <vscale x 4 x i64> @llvm.riscv.vwmacc.nxv4i32.nxv4i16(<vscale x 4 x i64> %d, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl, iXLen 0)
1271
+
ret <vscale x 4 x i64> %2
1272
+
}
1273
+
1274
+
define <vscale x 4 x i32> @vwmacc_vx(<vscale x 4 x i32> %a, i16%b, <vscale x 4 x i16> %c, iXLen %vl) {
1252
1275
; NOVLOPT-LABEL: vwmacc_vx:
1253
1276
; NOVLOPT: # %bb.0:
1254
-
; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, ta, ma
1255
-
; NOVLOPT-NEXT: vwmacc.vx v10, a0, v8
1277
+
; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, tu, ma
1278
+
; NOVLOPT-NEXT: vwmacc.vx v8, a0, v10
1256
1279
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1257
-
; NOVLOPT-NEXT: vadd.vv v8, v10, v10
1280
+
; NOVLOPT-NEXT: vadd.vv v8, v8, v8
1258
1281
; NOVLOPT-NEXT: ret
1259
1282
;
1260
1283
; VLOPT-LABEL: vwmacc_vx:
1261
1284
; VLOPT: # %bb.0:
1262
-
; VLOPT-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1263
-
; VLOPT-NEXT: vwmacc.vx v10, a0, v8
1285
+
; VLOPT-NEXT: vsetvli zero, a1, e16, m1, tu, ma
1286
+
; VLOPT-NEXT: vwmacc.vx v8, a0, v10
1264
1287
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1265
-
; VLOPT-NEXT: vadd.vv v8, v10, v10
1288
+
; VLOPT-NEXT: vadd.vv v8, v8, v8
1266
1289
; VLOPT-NEXT: ret
1267
-
%1 = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.i16(<vscale x 4 x i32> poison, i16%b, <vscale x 4 x i16> %a, iXLen -1, iXLen 0)
1290
+
%1 = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.i16(<vscale x 4 x i32> %a, i16%b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
1268
1291
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
1269
1292
ret <vscale x 4 x i32> %2
1270
1293
}
1271
1294
1272
-
define <vscale x 4 x i32> @vwmaccu_vx(<vscale x 4 x i16> %a, i16%b, iXLen %vl) {
1295
+
define <vscale x 4 x i64> @vwmaccu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, <vscale x 4 x i64> %d, iXLen %vl) {
1296
+
; NOVLOPT-LABEL: vwmaccu_vv:
1297
+
; NOVLOPT: # %bb.0:
1298
+
; NOVLOPT-NEXT: vmv2r.v v16, v8
1299
+
; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma
1300
+
; NOVLOPT-NEXT: vwmaccu.vv v16, v10, v11
1301
+
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, tu, ma
1302
+
; NOVLOPT-NEXT: vwmaccu.vv v12, v8, v16
1303
+
; NOVLOPT-NEXT: vmv4r.v v8, v12
1304
+
; NOVLOPT-NEXT: ret
1305
+
;
1306
+
; VLOPT-LABEL: vwmaccu_vv:
1307
+
; VLOPT: # %bb.0:
1308
+
; VLOPT-NEXT: vmv2r.v v16, v8
1309
+
; VLOPT-NEXT: vsetvli zero, a0, e16, m1, tu, ma
1310
+
; VLOPT-NEXT: vwmaccu.vv v16, v10, v11
1311
+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, tu, ma
1312
+
; VLOPT-NEXT: vwmaccu.vv v12, v8, v16
1313
+
; VLOPT-NEXT: vmv4r.v v8, v12
1314
+
; VLOPT-NEXT: ret
1315
+
%1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccu.nxv4i32.nxv4i16(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
1316
+
%2 = call <vscale x 4 x i64> @llvm.riscv.vwmaccu.nxv4i64.nxv4i32(<vscale x 4 x i64> %d, <vscale x 4 x i32> %a, <vscale x 4 x i32> %1, iXLen %vl, iXLen 0)
1317
+
ret <vscale x 4 x i64> %2
1318
+
}
1319
+
1320
+
define <vscale x 4 x i64> @vwmaccu_vx(<vscale x 4 x i32> %a, i16%b, <vscale x 4 x i16> %c, <vscale x 4 x i64> %d, i32%e, iXLen %vl) {
1273
1321
; NOVLOPT-LABEL: vwmaccu_vx:
1274
1322
; NOVLOPT: # %bb.0:
1275
-
; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, ta, ma
1276
-
; NOVLOPT-NEXT: vwmaccu.vx v10, a0, v8
1277
-
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1278
-
; NOVLOPT-NEXT: vadd.vv v8, v10, v10
1323
+
; NOVLOPT-NEXT: vsetvli a3, zero, e16, m1, tu, ma
1324
+
; NOVLOPT-NEXT: vwmaccu.vx v8, a0, v10
1325
+
; NOVLOPT-NEXT: vsetvli zero, a2, e32, m2, tu, ma
1326
+
; NOVLOPT-NEXT: vwmaccu.vx v12, a1, v8
1327
+
; NOVLOPT-NEXT: vmv4r.v v8, v12
1279
1328
; NOVLOPT-NEXT: ret
1280
1329
;
1281
1330
; VLOPT-LABEL: vwmaccu_vx:
1282
1331
; VLOPT: # %bb.0:
1283
-
; VLOPT-NEXT: vsetvli zero, a1, e16, m1, ta, ma
1284
-
; VLOPT-NEXT: vwmaccu.vx v10, a0, v8
1332
+
; VLOPT-NEXT: vsetvli zero, a2, e16, m1, tu, ma
1333
+
; VLOPT-NEXT: vwmaccu.vx v8, a0, v10
1334
+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, tu, ma
1335
+
; VLOPT-NEXT: vwmaccu.vx v12, a1, v8
1336
+
; VLOPT-NEXT: vmv4r.v v8, v12
1337
+
; VLOPT-NEXT: ret
1338
+
%1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccu.nxv4i32.i16(<vscale x 4 x i32> %a, i16%b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
1339
+
%2 = call <vscale x 4 x i64> @llvm.riscv.vwmaccu.nxv4i64.i32(<vscale x 4 x i64> %d, i32%e, <vscale x 4 x i32> %1, iXLen %vl, iXLen 0)
1340
+
ret <vscale x 4 x i64> %2
1341
+
}
1342
+
1343
+
define <vscale x 4 x i32> @vwmaccsu_vv(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen %vl) {
1344
+
; NOVLOPT-LABEL: vwmaccsu_vv:
1345
+
; NOVLOPT: # %bb.0:
1346
+
; NOVLOPT-NEXT: vsetvli a1, zero, e16, m1, tu, ma
1347
+
; NOVLOPT-NEXT: vwmaccsu.vv v8, v10, v11
1348
+
; NOVLOPT-NEXT: vsetvli zero, a0, e32, m2, ta, ma
1349
+
; NOVLOPT-NEXT: vadd.vv v8, v8, v8
1350
+
; NOVLOPT-NEXT: ret
1351
+
;
1352
+
; VLOPT-LABEL: vwmaccsu_vv:
1353
+
; VLOPT: # %bb.0:
1354
+
; VLOPT-NEXT: vsetvli zero, a0, e16, m1, tu, ma
1355
+
; VLOPT-NEXT: vwmaccsu.vv v8, v10, v11
1356
+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1357
+
; VLOPT-NEXT: vadd.vv v8, v8, v8
1358
+
; VLOPT-NEXT: ret
1359
+
%1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccsu.nxv4i32.nxv4i16(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
1360
+
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
1361
+
ret <vscale x 4 x i32> %2
1362
+
}
1363
+
1364
+
define <vscale x 4 x i32> @vwmaccsu_vx(<vscale x 4 x i32> %a, i16%b, <vscale x 4 x i16> %c, iXLen %vl) {
1365
+
; NOVLOPT-LABEL: vwmaccsu_vx:
1366
+
; NOVLOPT: # %bb.0:
1367
+
; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, tu, ma
1368
+
; NOVLOPT-NEXT: vwmaccsu.vx v8, a0, v10
1369
+
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1370
+
; NOVLOPT-NEXT: vadd.vv v8, v8, v8
1371
+
; NOVLOPT-NEXT: ret
1372
+
;
1373
+
; VLOPT-LABEL: vwmaccsu_vx:
1374
+
; VLOPT: # %bb.0:
1375
+
; VLOPT-NEXT: vsetvli zero, a1, e16, m1, tu, ma
1376
+
; VLOPT-NEXT: vwmaccsu.vx v8, a0, v10
1285
1377
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1286
-
; VLOPT-NEXT: vadd.vv v8, v10, v10
1378
+
; VLOPT-NEXT: vadd.vv v8, v8, v8
1379
+
; VLOPT-NEXT: ret
1380
+
%1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccsu.nxv4i32.i16(<vscale x 4 x i32> %a, i16%b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
1381
+
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
1382
+
ret <vscale x 4 x i32> %2
1383
+
}
1384
+
1385
+
define <vscale x 4 x i32> @vwmaccus_vx(<vscale x 4 x i32> %a, i16%b, <vscale x 4 x i16> %c, iXLen %vl) {
1386
+
; NOVLOPT-LABEL: vwmaccus_vx:
1387
+
; NOVLOPT: # %bb.0:
1388
+
; NOVLOPT-NEXT: vsetvli a2, zero, e16, m1, tu, ma
1389
+
; NOVLOPT-NEXT: vwmaccus.vx v8, a0, v10
1390
+
; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
1391
+
; NOVLOPT-NEXT: vadd.vv v8, v8, v8
1392
+
; NOVLOPT-NEXT: ret
1393
+
;
1394
+
; VLOPT-LABEL: vwmaccus_vx:
1395
+
; VLOPT: # %bb.0:
1396
+
; VLOPT-NEXT: vsetvli zero, a1, e16, m1, tu, ma
1397
+
; VLOPT-NEXT: vwmaccus.vx v8, a0, v10
1398
+
; VLOPT-NEXT: vsetvli zero, zero, e32, m2, ta, ma
1399
+
; VLOPT-NEXT: vadd.vv v8, v8, v8
1287
1400
; VLOPT-NEXT: ret
1288
-
%1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccu.nxv4i32.i16(<vscale x 4 x i32> poison, i16%b, <vscale x 4 x i16> %a, iXLen -1, iXLen 0)
1401
+
%1 = call <vscale x 4 x i32> @llvm.riscv.vwmaccus.nxv4i32.i16(<vscale x 4 x i32> %a, i16%b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
1289
1402
%2 = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> %1, <vscale x 4 x i32> %1, iXLen %vl)
Copy file name to clipboardExpand all lines: llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
+14Lines changed: 14 additions & 0 deletions
Original file line number
Diff line number
Diff line change
@@ -136,3 +136,17 @@ define <vscale x 4 x i32> @different_imm_vl_with_tu(<vscale x 4 x i32> %passthru
136
136
%w = call <vscale x 4 x i32> @llvm.riscv.vadd.nxv4i32.nxv4i32(<vscale x 4 x i32> %passthru, <vscale x 4 x i32> %v, <vscale x 4 x i32> %a,iXLen 4)
137
137
ret <vscale x 4 x i32> %w
138
138
}
139
+
140
+
define <vscale x 4 x i32> @dont_optimize_tied_def(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen %vl) {
141
+
; CHECK-LABEL: dont_optimize_tied_def:
142
+
; CHECK: # %bb.0:
143
+
; CHECK-NEXT: vsetvli a1, zero, e16, m1, tu, ma
144
+
; CHECK-NEXT: vwmacc.vv v8, v10, v11
145
+
; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma
146
+
; CHECK-NEXT: vwmacc.vv v8, v10, v11
147
+
; CHECK-NEXT: ret
148
+
%1 = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.nxv4i16(<vscale x 4 x i32> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen -1, iXLen 0)
149
+
%2 = call <vscale x 4 x i32> @llvm.riscv.vwmacc.nxv4i32.nxv4i16(<vscale x 4 x i32> %1, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c, iXLen %vl, iXLen 0)
0 commit comments