File tree Expand file tree Collapse file tree 2 files changed +38
-13
lines changed Expand file tree Collapse file tree 2 files changed +38
-13
lines changed Original file line number Diff line number Diff line change @@ -2298,6 +2298,7 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
2298
2298
const int SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates = 9 ;
2299
2299
const int SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates = 17 ;
2300
2300
const int DMFMA16x16WritesVGPROverlappedSrcCWaitStates = 9 ;
2301
+ const int GFX950_DMFMA16x16WritesVGPROverlappedSrcCWaitStates = 17 ;
2301
2302
const int DMFMA4x4WritesVGPROverlappedSrcCWaitStates = 4 ;
2302
2303
const int SMFMA4x4WritesVGPROverlappedSrcABWaitStates = 5 ;
2303
2304
const int SMFMA16x16WritesVGPROverlappedSrcABWaitStates = 11 ;
@@ -2355,7 +2356,10 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
2355
2356
case AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64:
2356
2357
case AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64:
2357
2358
if (!isXDL (ST, *MI))
2358
- NeedWaitStates = DMFMA16x16WritesVGPROverlappedSrcCWaitStates;
2359
+ NeedWaitStates =
2360
+ ST.hasGFX950Insts ()
2361
+ ? GFX950_DMFMA16x16WritesVGPROverlappedSrcCWaitStates
2362
+ : DMFMA16x16WritesVGPROverlappedSrcCWaitStates;
2359
2363
break ;
2360
2364
case AMDGPU::V_MFMA_F64_4X4X4F64_e64:
2361
2365
case AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64:
Original file line number Diff line number Diff line change @@ -298,8 +298,12 @@ body: |
298
298
...
299
299
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_read_overlap
300
300
# GCN: V_MFMA
301
- # GCN-NEXT: S_NOP 7
302
- # GCN-NEXT: S_NOP 0
301
+ # GFX940-NEXT: S_NOP 7
302
+ # GFX940-NEXT: S_NOP 0
303
+
304
+ # GFX950-NEXT: S_NOP 7
305
+ # GFX950-NEXT: S_NOP 7
306
+ # GFX950-NEXT: S_NOP 0
303
307
# GCN-NEXT: V_MFMA
304
308
name : dgemm16x16_mfma_write_vgpr_mfma_read_overlap
305
309
body : |
@@ -319,8 +323,12 @@ body: |
319
323
...
320
324
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_read_overlap
321
325
# GCN: V_MFMA
322
- # GCN-NEXT: S_NOP 7
323
- # GCN-NEXT: S_NOP 0
326
+ # GFX940-NEXT: S_NOP 7
327
+ # GFX940-NEXT: S_NOP 0
328
+
329
+ # GFX950-NEXT: S_NOP 7
330
+ # GFX950-NEXT: S_NOP 7
331
+ # GFX950-NEXT: S_NOP 0
324
332
# GCN-NEXT: V_MFMA
325
333
name : dgemm16x16_mfma_write_vgpr_sgemm_mfma_read_overlap
326
334
body : |
@@ -549,8 +557,12 @@ body: |
549
557
...
550
558
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_srca_read_overlap
551
559
# GCN: V_MFMA
552
- # GCN-NEXT: S_NOP 7
553
- # GCN-NEXT: S_NOP 2
560
+ # GFX940-NEXT: S_NOP 7
561
+ # GFX940-NEXT: S_NOP 2
562
+
563
+ # GFX950-NEXT: S_NOP 7
564
+ # GFX950-NEXT: S_NOP 7
565
+ # GFX950-NEXT: S_NOP 0
554
566
# GCN-NEXT: V_MFMA
555
567
name : dgemm16x16_mfma_write_vgpr_sgemm_mfma_srca_read_overlap
556
568
body : |
@@ -1333,8 +1345,12 @@ body: |
1333
1345
...
1334
1346
# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_read_overlap
1335
1347
# GCN: V_MFMA
1336
- # GCN-NEXT: S_NOP 7
1337
- # GCN-NEXT: S_NOP 0
1348
+ # GFX940-NEXT: S_NOP 7
1349
+ # GFX940-NEXT: S_NOP 0
1350
+
1351
+ # GFX950-NEXT: S_NOP 7
1352
+ # GFX950-NEXT: S_NOP 7
1353
+ # GFX950-NEXT: S_NOP 0
1338
1354
# GCN-NEXT: V_MFMA
1339
1355
name : dgemm16x16_mfma_write_agpr_mfma_read_overlap
1340
1356
body : |
@@ -1354,8 +1370,13 @@ body: |
1354
1370
...
1355
1371
# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_sgemm_mfma_read_overlap
1356
1372
# GCN: V_MFMA
1357
- # GCN-NEXT: S_NOP 7
1358
- # GCN-NEXT: S_NOP 0
1373
+ # GFX940-NEXT: S_NOP 7
1374
+ # GFX940-NEXT: S_NOP 0
1375
+
1376
+ # GFX950-NEXT: S_NOP 7
1377
+ # GFX950-NEXT: S_NOP 7
1378
+ # GFX950-NEXT: S_NOP 0
1379
+
1359
1380
# GCN-NEXT: V_MFMA
1360
1381
name : dgemm16x16_mfma_write_agpr_sgemm_mfma_read_overlap
1361
1382
body : |
@@ -2502,8 +2523,8 @@ body: |
2502
2523
...
2503
2524
# GCN-LABEL: name: xdl_4pass_mfma_write_agpr_smfmac_read_overlap_srcc
2504
2525
# GCN: V_MFMA
2505
- # GFX940: S_NOP 4
2506
- # GFX950: S_NOP 5
2526
+ # GFX940-NEXT : S_NOP 4
2527
+ # GFX950-NEXT : S_NOP 5
2507
2528
# GCN-NEXT: V_SMFMAC_
2508
2529
name : xdl_4pass_mfma_write_agpr_smfmac_read_overlap_srcc
2509
2530
body : |
You can’t perform that action at this time.
0 commit comments