@@ -98,22 +98,22 @@ class SIMemOpInfo final {
98
98
bool IsCrossAddressSpaceOrdering = false ;
99
99
bool IsVolatile = false ;
100
100
bool IsNonTemporal = false ;
101
-
102
- SIMemOpInfo (AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent,
103
- SIAtomicScope Scope = SIAtomicScope::SYSTEM,
104
- SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC ,
105
- SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::ALL ,
106
- bool IsCrossAddressSpaceOrdering = true ,
107
- AtomicOrdering FailureOrdering =
108
- AtomicOrdering::SequentiallyConsistent ,
109
- bool IsVolatile = false ,
110
- bool IsNonTemporal = false )
111
- : Ordering(Ordering), FailureOrdering(FailureOrdering),
112
- Scope (Scope ), OrderingAddrSpace(OrderingAddrSpace ),
113
- InstrAddrSpace(InstrAddrSpace),
114
- IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
115
- IsVolatile(IsVolatile),
116
- IsNonTemporal(IsNonTemporal ) {
101
+ bool IsLastUse = false ;
102
+
103
+ SIMemOpInfo (
104
+ AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent ,
105
+ SIAtomicScope Scope = SIAtomicScope::SYSTEM ,
106
+ SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC ,
107
+ SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::ALL,
108
+ bool IsCrossAddressSpaceOrdering = true ,
109
+ AtomicOrdering FailureOrdering = AtomicOrdering::SequentiallyConsistent ,
110
+ bool IsVolatile = false , bool IsNonTemporal = false ,
111
+ bool IsLastUse = false )
112
+ : Ordering(Ordering ), FailureOrdering(FailureOrdering), Scope(Scope ),
113
+ OrderingAddrSpace (OrderingAddrSpace), InstrAddrSpace(InstrAddrSpace),
114
+ IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
115
+ IsVolatile(IsVolatile), IsNonTemporal(IsNonTemporal ),
116
+ IsLastUse(IsLastUse ) {
117
117
118
118
if (Ordering == AtomicOrdering::NotAtomic) {
119
119
assert (Scope == SIAtomicScope::NONE &&
@@ -201,6 +201,10 @@ class SIMemOpInfo final {
201
201
return IsNonTemporal;
202
202
}
203
203
204
+ // / \returns True if memory access of the machine instruction used to
205
+ // / create this SIMemOpInfo is last use, false otherwise.
206
+ bool isLastUse () const { return IsLastUse; }
207
+
204
208
// / \returns True if ordering constraint of the machine instruction used to
205
209
// / create this SIMemOpInfo is unordered or higher, false otherwise.
206
210
bool isAtomic () const {
@@ -305,12 +309,13 @@ class SICacheControl {
305
309
SIAtomicAddrSpace AddrSpace) const = 0;
306
310
307
311
// / Update \p MI memory instruction of kind \p Op associated with address
308
- // / spaces \p AddrSpace to indicate it is volatile and/or nontemporal. Return
309
- // / true iff the instruction was modified.
312
+ // / spaces \p AddrSpace to indicate it is volatile and/or
313
+ // / nontemporal/last-use. Return true iff the instruction was modified.
310
314
virtual bool enableVolatileAndOrNonTemporal (MachineBasicBlock::iterator &MI,
311
315
SIAtomicAddrSpace AddrSpace,
312
316
SIMemOp Op, bool IsVolatile,
313
- bool IsNonTemporal) const = 0;
317
+ bool IsNonTemporal,
318
+ bool IsLastUse = false ) const = 0;
314
319
315
320
virtual bool expandSystemScopeStore (MachineBasicBlock::iterator &MI) const {
316
321
return false ;
@@ -394,8 +399,8 @@ class SIGfx6CacheControl : public SICacheControl {
394
399
395
400
bool enableVolatileAndOrNonTemporal (MachineBasicBlock::iterator &MI,
396
401
SIAtomicAddrSpace AddrSpace, SIMemOp Op,
397
- bool IsVolatile,
398
- bool IsNonTemporal ) const override ;
402
+ bool IsVolatile, bool IsNonTemporal,
403
+ bool IsLastUse ) const override ;
399
404
400
405
bool insertWait (MachineBasicBlock::iterator &MI,
401
406
SIAtomicScope Scope,
@@ -447,8 +452,8 @@ class SIGfx90ACacheControl : public SIGfx7CacheControl {
447
452
448
453
bool enableVolatileAndOrNonTemporal (MachineBasicBlock::iterator &MI,
449
454
SIAtomicAddrSpace AddrSpace, SIMemOp Op,
450
- bool IsVolatile,
451
- bool IsNonTemporal ) const override ;
455
+ bool IsVolatile, bool IsNonTemporal,
456
+ bool IsLastUse ) const override ;
452
457
453
458
bool insertWait (MachineBasicBlock::iterator &MI,
454
459
SIAtomicScope Scope,
@@ -508,8 +513,8 @@ class SIGfx940CacheControl : public SIGfx90ACacheControl {
508
513
509
514
bool enableVolatileAndOrNonTemporal (MachineBasicBlock::iterator &MI,
510
515
SIAtomicAddrSpace AddrSpace, SIMemOp Op,
511
- bool IsVolatile,
512
- bool IsNonTemporal ) const override ;
516
+ bool IsVolatile, bool IsNonTemporal,
517
+ bool IsLastUse ) const override ;
513
518
514
519
bool insertAcquire (MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
515
520
SIAtomicAddrSpace AddrSpace, Position Pos) const override ;
@@ -552,8 +557,8 @@ class SIGfx10CacheControl : public SIGfx7CacheControl {
552
557
553
558
bool enableVolatileAndOrNonTemporal (MachineBasicBlock::iterator &MI,
554
559
SIAtomicAddrSpace AddrSpace, SIMemOp Op,
555
- bool IsVolatile,
556
- bool IsNonTemporal ) const override ;
560
+ bool IsVolatile, bool IsNonTemporal,
561
+ bool IsLastUse ) const override ;
557
562
558
563
bool insertWait (MachineBasicBlock::iterator &MI,
559
564
SIAtomicScope Scope,
@@ -578,8 +583,8 @@ class SIGfx11CacheControl : public SIGfx10CacheControl {
578
583
579
584
bool enableVolatileAndOrNonTemporal (MachineBasicBlock::iterator &MI,
580
585
SIAtomicAddrSpace AddrSpace, SIMemOp Op,
581
- bool IsVolatile,
582
- bool IsNonTemporal ) const override ;
586
+ bool IsVolatile, bool IsNonTemporal,
587
+ bool IsLastUse ) const override ;
583
588
};
584
589
585
590
class SIGfx12CacheControl : public SIGfx11CacheControl {
@@ -614,8 +619,8 @@ class SIGfx12CacheControl : public SIGfx11CacheControl {
614
619
615
620
bool enableVolatileAndOrNonTemporal (MachineBasicBlock::iterator &MI,
616
621
SIAtomicAddrSpace AddrSpace, SIMemOp Op,
617
- bool IsVolatile,
618
- bool IsNonTemporal ) const override ;
622
+ bool IsVolatile, bool IsNonTemporal,
623
+ bool IsLastUse ) const override ;
619
624
620
625
bool expandSystemScopeStore (MachineBasicBlock::iterator &MI) const override ;
621
626
};
@@ -745,12 +750,14 @@ std::optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
745
750
SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
746
751
bool IsNonTemporal = true ;
747
752
bool IsVolatile = false ;
753
+ bool IsLastUse = false ;
748
754
749
755
// Validator should check whether or not MMOs cover the entire set of
750
756
// locations accessed by the memory instruction.
751
757
for (const auto &MMO : MI->memoperands ()) {
752
758
IsNonTemporal &= MMO->isNonTemporal ();
753
759
IsVolatile |= MMO->isVolatile ();
760
+ IsLastUse |= MMO->getFlags () & MOLastUse;
754
761
InstrAddrSpace |=
755
762
toSIAtomicAddrSpace (MMO->getPointerInfo ().getAddrSpace ());
756
763
AtomicOrdering OpOrdering = MMO->getSuccessOrdering ();
@@ -792,7 +799,7 @@ std::optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
792
799
}
793
800
return SIMemOpInfo (Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
794
801
IsCrossAddressSpaceOrdering, FailureOrdering, IsVolatile,
795
- IsNonTemporal);
802
+ IsNonTemporal, IsLastUse );
796
803
}
797
804
798
805
std::optional<SIMemOpInfo>
@@ -969,7 +976,7 @@ bool SIGfx6CacheControl::enableRMWCacheBypass(
969
976
970
977
bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal (
971
978
MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
972
- bool IsVolatile, bool IsNonTemporal) const {
979
+ bool IsVolatile, bool IsNonTemporal, bool IsLastUse = false ) const {
973
980
// Only handle load and store, not atomic read-modify-write insructions. The
974
981
// latter use glc to indicate if the atomic returns a result and so must not
975
982
// be used for cache control.
@@ -1322,7 +1329,7 @@ bool SIGfx90ACacheControl::enableRMWCacheBypass(
1322
1329
1323
1330
bool SIGfx90ACacheControl::enableVolatileAndOrNonTemporal (
1324
1331
MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
1325
- bool IsVolatile, bool IsNonTemporal) const {
1332
+ bool IsVolatile, bool IsNonTemporal, bool IsLastUse = false ) const {
1326
1333
// Only handle load and store, not atomic read-modify-write insructions. The
1327
1334
// latter use glc to indicate if the atomic returns a result and so must not
1328
1335
// be used for cache control.
@@ -1624,7 +1631,7 @@ bool SIGfx940CacheControl::enableRMWCacheBypass(
1624
1631
1625
1632
bool SIGfx940CacheControl::enableVolatileAndOrNonTemporal (
1626
1633
MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
1627
- bool IsVolatile, bool IsNonTemporal) const {
1634
+ bool IsVolatile, bool IsNonTemporal, bool IsLastUse = false ) const {
1628
1635
// Only handle load and store, not atomic read-modify-write insructions. The
1629
1636
// latter use glc to indicate if the atomic returns a result and so must not
1630
1637
// be used for cache control.
@@ -1856,7 +1863,7 @@ bool SIGfx10CacheControl::enableLoadCacheBypass(
1856
1863
1857
1864
bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal (
1858
1865
MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
1859
- bool IsVolatile, bool IsNonTemporal) const {
1866
+ bool IsVolatile, bool IsNonTemporal, bool IsLastUse = false ) const {
1860
1867
1861
1868
// Only handle load and store, not atomic read-modify-write insructions. The
1862
1869
// latter use glc to indicate if the atomic returns a result and so must not
@@ -2127,7 +2134,7 @@ bool SIGfx11CacheControl::enableLoadCacheBypass(
2127
2134
2128
2135
bool SIGfx11CacheControl::enableVolatileAndOrNonTemporal (
2129
2136
MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
2130
- bool IsVolatile, bool IsNonTemporal) const {
2137
+ bool IsVolatile, bool IsNonTemporal, bool IsLastUse = false ) const {
2131
2138
2132
2139
// Only handle load and store, not atomic read-modify-write insructions. The
2133
2140
// latter use glc to indicate if the atomic returns a result and so must not
@@ -2379,7 +2386,7 @@ bool SIGfx12CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
2379
2386
2380
2387
bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal (
2381
2388
MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
2382
- bool IsVolatile, bool IsNonTemporal) const {
2389
+ bool IsVolatile, bool IsNonTemporal, bool IsLastUse = false ) const {
2383
2390
2384
2391
// Only handle load and store, not atomic read-modify-write instructions.
2385
2392
assert (MI->mayLoad () ^ MI->mayStore ());
@@ -2392,7 +2399,10 @@ bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
2392
2399
2393
2400
bool Changed = false ;
2394
2401
2395
- if (IsNonTemporal) {
2402
+ if (IsLastUse) {
2403
+ // Set last-use hint.
2404
+ Changed |= setTH (MI, AMDGPU::CPol::TH_LU);
2405
+ } else if (IsNonTemporal) {
2396
2406
// Set non-temporal hint for all cache levels.
2397
2407
Changed |= setTH (MI, AMDGPU::CPol::TH_NT);
2398
2408
}
@@ -2472,11 +2482,12 @@ bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI,
2472
2482
}
2473
2483
2474
2484
// Atomic instructions already bypass caches to the scope specified by the
2475
- // SyncScope operand. Only non-atomic volatile and nontemporal instructions
2476
- // need additional treatment.
2477
- Changed |= CC->enableVolatileAndOrNonTemporal (MI, MOI.getInstrAddrSpace (),
2478
- SIMemOp::LOAD, MOI.isVolatile (),
2479
- MOI.isNonTemporal ());
2485
+ // SyncScope operand. Only non-atomic volatile and nontemporal/last-use
2486
+ // instructions need additional treatment.
2487
+ Changed |= CC->enableVolatileAndOrNonTemporal (
2488
+ MI, MOI.getInstrAddrSpace (), SIMemOp::LOAD, MOI.isVolatile (),
2489
+ MOI.isNonTemporal (), MOI.isLastUse ());
2490
+
2480
2491
return Changed;
2481
2492
}
2482
2493
0 commit comments