@@ -352,6 +352,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
352
352
return 1 ;
353
353
case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
354
354
case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM:
355
+ case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM_ec:
356
+ case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM_ec:
355
357
case AMDGPU::S_LOAD_DWORDX2_IMM:
356
358
case AMDGPU::S_LOAD_DWORDX2_IMM_ec:
357
359
case AMDGPU::GLOBAL_LOAD_DWORDX2:
@@ -363,6 +365,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
363
365
return 2 ;
364
366
case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM:
365
367
case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM:
368
+ case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM_ec:
369
+ case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM_ec:
366
370
case AMDGPU::S_LOAD_DWORDX3_IMM:
367
371
case AMDGPU::S_LOAD_DWORDX3_IMM_ec:
368
372
case AMDGPU::GLOBAL_LOAD_DWORDX3:
@@ -374,6 +378,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
374
378
return 3 ;
375
379
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
376
380
case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM:
381
+ case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM_ec:
382
+ case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM_ec:
377
383
case AMDGPU::S_LOAD_DWORDX4_IMM:
378
384
case AMDGPU::S_LOAD_DWORDX4_IMM_ec:
379
385
case AMDGPU::GLOBAL_LOAD_DWORDX4:
@@ -385,6 +391,8 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
385
391
return 4 ;
386
392
case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
387
393
case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM:
394
+ case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM_ec:
395
+ case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM_ec:
388
396
case AMDGPU::S_LOAD_DWORDX8_IMM:
389
397
case AMDGPU::S_LOAD_DWORDX8_IMM_ec:
390
398
return 8 ;
@@ -499,12 +507,20 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
499
507
case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM:
500
508
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
501
509
case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
510
+ case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM_ec:
511
+ case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM_ec:
512
+ case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM_ec:
513
+ case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM_ec:
502
514
return S_BUFFER_LOAD_IMM;
503
515
case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR_IMM:
504
516
case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM:
505
517
case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM:
506
518
case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM:
507
519
case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM:
520
+ case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM_ec:
521
+ case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM_ec:
522
+ case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM_ec:
523
+ case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM_ec:
508
524
return S_BUFFER_LOAD_SGPR_IMM;
509
525
case AMDGPU::S_LOAD_DWORD_IMM:
510
526
case AMDGPU::S_LOAD_DWORDX2_IMM:
@@ -587,12 +603,20 @@ static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) {
587
603
case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM:
588
604
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
589
605
case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
606
+ case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM_ec:
607
+ case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM_ec:
608
+ case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM_ec:
609
+ case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM_ec:
590
610
return AMDGPU::S_BUFFER_LOAD_DWORD_IMM;
591
611
case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR_IMM:
592
612
case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM:
593
613
case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM:
594
614
case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM:
595
615
case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM:
616
+ case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM_ec:
617
+ case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM_ec:
618
+ case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM_ec:
619
+ case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM_ec:
596
620
return AMDGPU::S_BUFFER_LOAD_DWORD_SGPR_IMM;
597
621
case AMDGPU::S_LOAD_DWORD_IMM:
598
622
case AMDGPU::S_LOAD_DWORDX2_IMM:
@@ -703,13 +727,21 @@ static AddressRegs getRegs(unsigned Opc, const SIInstrInfo &TII) {
703
727
case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM:
704
728
case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM:
705
729
case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM:
730
+ case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM_ec:
731
+ case AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM_ec:
732
+ case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM_ec:
733
+ case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM_ec:
706
734
Result.SOffset = true ;
707
735
[[fallthrough]];
708
736
case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
709
737
case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
710
738
case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM:
711
739
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
712
740
case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
741
+ case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM_ec:
742
+ case AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM_ec:
743
+ case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM_ec:
744
+ case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM_ec:
713
745
case AMDGPU::S_LOAD_DWORD_IMM:
714
746
case AMDGPU::S_LOAD_DWORDX2_IMM:
715
747
case AMDGPU::S_LOAD_DWORDX3_IMM:
@@ -1679,6 +1711,12 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeFlatStorePair(
1679
1711
return New;
1680
1712
}
1681
1713
1714
+ static bool needsConstraintedOpcode (const GCNSubtarget &STM,
1715
+ const MachineMemOperand *MMO,
1716
+ unsigned Width) {
1717
+ return STM.isXNACKEnabled () && MMO->getAlign ().value () < Width * 4 ;
1718
+ }
1719
+
1682
1720
unsigned SILoadStoreOptimizer::getNewOpcode (const CombineInfo &CI,
1683
1721
const CombineInfo &Paired) {
1684
1722
const unsigned Width = CI.Width + Paired.Width ;
@@ -1696,38 +1734,51 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI,
1696
1734
1697
1735
case UNKNOWN:
1698
1736
llvm_unreachable (" Unknown instruction class" );
1699
- case S_BUFFER_LOAD_IMM:
1737
+ case S_BUFFER_LOAD_IMM: {
1738
+ const MachineMemOperand *MMO = *CI.I ->memoperands_begin ();
1739
+ bool NeedsConstrainedOpc = needsConstraintedOpcode (*STM, MMO, Width);
1700
1740
switch (Width) {
1701
1741
default :
1702
1742
return 0 ;
1703
1743
case 2 :
1704
- return AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM;
1744
+ return NeedsConstrainedOpc ? AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM_ec
1745
+ : AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM;
1705
1746
case 3 :
1706
- return AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM;
1747
+ return NeedsConstrainedOpc ? AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM_ec
1748
+ : AMDGPU::S_BUFFER_LOAD_DWORDX3_IMM;
1707
1749
case 4 :
1708
- return AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM;
1750
+ return NeedsConstrainedOpc ? AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM_ec
1751
+ : AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM;
1709
1752
case 8 :
1710
- return AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM;
1753
+ return NeedsConstrainedOpc ? AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM_ec
1754
+ : AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM;
1711
1755
}
1712
- case S_BUFFER_LOAD_SGPR_IMM:
1756
+ }
1757
+ case S_BUFFER_LOAD_SGPR_IMM: {
1758
+ const MachineMemOperand *MMO = *CI.I ->memoperands_begin ();
1759
+ bool NeedsConstrainedOpc = needsConstraintedOpcode (*STM, MMO, Width);
1713
1760
switch (Width) {
1714
1761
default :
1715
1762
return 0 ;
1716
1763
case 2 :
1717
- return AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM;
1764
+ return NeedsConstrainedOpc ? AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM_ec
1765
+ : AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM;
1718
1766
case 3 :
1719
- return AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM;
1767
+ return NeedsConstrainedOpc ? AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM_ec
1768
+ : AMDGPU::S_BUFFER_LOAD_DWORDX3_SGPR_IMM;
1720
1769
case 4 :
1721
- return AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM;
1770
+ return NeedsConstrainedOpc ? AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM_ec
1771
+ : AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM;
1722
1772
case 8 :
1723
- return AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM;
1773
+ return NeedsConstrainedOpc ? AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM_ec
1774
+ : AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM;
1724
1775
}
1776
+ }
1725
1777
case S_LOAD_IMM: {
1726
1778
// If XNACK is enabled, use the constrained opcodes when the first load is
1727
1779
// under-aligned.
1728
1780
const MachineMemOperand *MMO = *CI.I ->memoperands_begin ();
1729
- bool NeedsConstrainedOpc =
1730
- STM->isXNACKEnabled () && MMO->getAlign ().value () < Width * 4 ;
1781
+ bool NeedsConstrainedOpc = needsConstraintedOpcode (*STM, MMO, Width);
1731
1782
switch (Width) {
1732
1783
default :
1733
1784
return 0 ;
0 commit comments