@@ -110,6 +110,7 @@ class SIMemOpInfo final {
110
110
SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::NONE;
111
111
SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
112
112
bool IsCrossAddressSpaceOrdering = false ;
113
+ bool IsVolatile = false ;
113
114
bool IsNonTemporal = false ;
114
115
115
116
SIMemOpInfo (AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent,
@@ -119,11 +120,13 @@ class SIMemOpInfo final {
119
120
bool IsCrossAddressSpaceOrdering = true ,
120
121
AtomicOrdering FailureOrdering =
121
122
AtomicOrdering::SequentiallyConsistent,
123
+ bool IsVolatile = false ,
122
124
bool IsNonTemporal = false )
123
125
: Ordering(Ordering), FailureOrdering(FailureOrdering),
124
126
Scope (Scope), OrderingAddrSpace(OrderingAddrSpace),
125
127
InstrAddrSpace(InstrAddrSpace),
126
128
IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
129
+ IsVolatile(IsVolatile),
127
130
IsNonTemporal(IsNonTemporal) {
128
131
// There is also no cross address space ordering if the ordering
129
132
// address space is the same as the instruction address space and
@@ -171,7 +174,13 @@ class SIMemOpInfo final {
171
174
}
172
175
173
176
// / \returns True if memory access of the machine instruction used to
174
- // / create this SIMemOpInfo is non-temporal, false otherwise.
177
+ // / create this SIMemOpInfo is volatile, false otherwise.
178
+ bool isVolatile () const {
179
+ return IsVolatile;
180
+ }
181
+
182
+ // / \returns True if memory access of the machine instruction used to
183
+ // / create this SIMemOpInfo is nontemporal, false otherwise.
175
184
bool isNonTemporal () const {
176
185
return IsNonTemporal;
177
186
}
@@ -259,10 +268,13 @@ class SICacheControl {
259
268
SIAtomicScope Scope,
260
269
SIAtomicAddrSpace AddrSpace) const = 0;
261
270
262
- // / Update \p MI memory instruction to indicate it is
263
- // / nontemporal. Return true iff the instruction was modified.
264
- virtual bool enableNonTemporal (const MachineBasicBlock::iterator &MI)
265
- const = 0;
271
+ // / Update \p MI memory instruction of kind \p Op associated with address
272
+ // / spaces \p AddrSpace to indicate it is volatile and/or nontemporal. Return
273
+ // / true iff the instruction was modified.
274
+ virtual bool enableVolatileAndOrNonTemporal (MachineBasicBlock::iterator &MI,
275
+ SIAtomicAddrSpace AddrSpace,
276
+ SIMemOp Op, bool IsVolatile,
277
+ bool IsNonTemporal) const = 0;
266
278
267
279
// / Inserts any necessary instructions at position \p Pos relative
268
280
// / to instruction \p MI to ensure memory instructions before \p Pos of kind
@@ -328,7 +340,10 @@ class SIGfx6CacheControl : public SICacheControl {
328
340
SIAtomicScope Scope,
329
341
SIAtomicAddrSpace AddrSpace) const override ;
330
342
331
- bool enableNonTemporal (const MachineBasicBlock::iterator &MI) const override ;
343
+ bool enableVolatileAndOrNonTemporal (MachineBasicBlock::iterator &MI,
344
+ SIAtomicAddrSpace AddrSpace, SIMemOp Op,
345
+ bool IsVolatile,
346
+ bool IsNonTemporal) const override ;
332
347
333
348
bool insertWait (MachineBasicBlock::iterator &MI,
334
349
SIAtomicScope Scope,
@@ -378,7 +393,10 @@ class SIGfx10CacheControl : public SIGfx7CacheControl {
378
393
SIAtomicScope Scope,
379
394
SIAtomicAddrSpace AddrSpace) const override ;
380
395
381
- bool enableNonTemporal (const MachineBasicBlock::iterator &MI) const override ;
396
+ bool enableVolatileAndOrNonTemporal (MachineBasicBlock::iterator &MI,
397
+ SIAtomicAddrSpace AddrSpace, SIMemOp Op,
398
+ bool IsVolatile,
399
+ bool IsNonTemporal) const override ;
382
400
383
401
bool insertWait (MachineBasicBlock::iterator &MI,
384
402
SIAtomicScope Scope,
@@ -529,11 +547,13 @@ Optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
529
547
AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic;
530
548
SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
531
549
bool IsNonTemporal = true ;
550
+ bool IsVolatile = false ;
532
551
533
552
// Validator should check whether or not MMOs cover the entire set of
534
553
// locations accessed by the memory instruction.
535
554
for (const auto &MMO : MI->memoperands ()) {
536
555
IsNonTemporal &= MMO->isNonTemporal ();
556
+ IsVolatile |= MMO->isVolatile ();
537
557
InstrAddrSpace |=
538
558
toSIAtomicAddrSpace (MMO->getPointerInfo ().getAddrSpace ());
539
559
AtomicOrdering OpOrdering = MMO->getOrdering ();
@@ -576,7 +596,8 @@ Optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
576
596
}
577
597
}
578
598
return SIMemOpInfo (Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
579
- IsCrossAddressSpaceOrdering, FailureOrdering, IsNonTemporal);
599
+ IsCrossAddressSpaceOrdering, FailureOrdering, IsVolatile,
600
+ IsNonTemporal);
580
601
}
581
602
582
603
Optional<SIMemOpInfo> SIMemOpAccess::getLoadInfo (
@@ -703,14 +724,43 @@ bool SIGfx6CacheControl::enableLoadCacheBypass(
703
724
return Changed;
704
725
}
705
726
706
- bool SIGfx6CacheControl::enableNonTemporal (
707
- const MachineBasicBlock::iterator &MI) const {
727
+ bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal (
728
+ MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
729
+ bool IsVolatile, bool IsNonTemporal) const {
730
+ // Only handle load and store, not atomic read-modify-write insructions. The
731
+ // latter use glc to indicate if the atomic returns a result and so must not
732
+ // be used for cache control.
708
733
assert (MI->mayLoad () ^ MI->mayStore ());
734
+
735
+ // Only update load and store, not LLVM IR atomic read-modify-write
736
+ // instructions. The latter are always marked as volatile so cannot sensibly
737
+ // handle it as do not want to pessimize all atomics. Also they do not support
738
+ // the nontemporal attribute.
739
+ assert ( Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
740
+
709
741
bool Changed = false ;
710
742
711
- // / TODO: Do not enableGLCBit if rmw atomic.
712
- Changed |= enableGLCBit (MI);
713
- Changed |= enableSLCBit (MI);
743
+ if (IsVolatile) {
744
+ if (Op == SIMemOp::LOAD)
745
+ Changed |= enableGLCBit (MI);
746
+
747
+ // Ensure operation has completed at system scope to cause all volatile
748
+ // operations to be visible outside the program in a global order. Do not
749
+ // request cross address space as only the global address space can be
750
+ // observable outside the program, so no need to cause a waitcnt for LDS
751
+ // address space operations.
752
+ Changed |= insertWait (MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false ,
753
+ Position::AFTER);
754
+
755
+ return Changed;
756
+ }
757
+
758
+ if (IsNonTemporal) {
759
+ // Request L1 MISS_EVICT and L2 STREAM for load and store instructions.
760
+ Changed |= enableGLCBit (MI);
761
+ Changed |= enableSLCBit (MI);
762
+ return Changed;
763
+ }
714
764
715
765
return Changed;
716
766
}
@@ -732,7 +782,8 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI,
732
782
bool VMCnt = false ;
733
783
bool LGKMCnt = false ;
734
784
735
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
785
+ if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
786
+ SIAtomicAddrSpace::NONE) {
736
787
switch (Scope) {
737
788
case SIAtomicScope::SYSTEM:
738
789
case SIAtomicScope::AGENT:
@@ -959,13 +1010,45 @@ bool SIGfx10CacheControl::enableLoadCacheBypass(
959
1010
return Changed;
960
1011
}
961
1012
962
- bool SIGfx10CacheControl::enableNonTemporal (
963
- const MachineBasicBlock::iterator &MI) const {
1013
+ bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal (
1014
+ MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
1015
+ bool IsVolatile, bool IsNonTemporal) const {
1016
+
1017
+ // Only handle load and store, not atomic read-modify-write insructions. The
1018
+ // latter use glc to indicate if the atomic returns a result and so must not
1019
+ // be used for cache control.
964
1020
assert (MI->mayLoad () ^ MI->mayStore ());
1021
+
1022
+ // Only update load and store, not LLVM IR atomic read-modify-write
1023
+ // instructions. The latter are always marked as volatile so cannot sensibly
1024
+ // handle it as do not want to pessimize all atomics. Also they do not support
1025
+ // the nontemporal attribute.
1026
+ assert ( Op == SIMemOp::LOAD || Op == SIMemOp::STORE);
1027
+
965
1028
bool Changed = false ;
966
1029
967
- Changed |= enableSLCBit (MI);
968
- // / TODO for store (non-rmw atomic) instructions also enableGLCBit(MI)
1030
+ if (IsVolatile) {
1031
+
1032
+ if (Op == SIMemOp::LOAD) {
1033
+ Changed |= enableGLCBit (MI);
1034
+ Changed |= enableDLCBit (MI);
1035
+ }
1036
+
1037
+ // Ensure operation has completed at system scope to cause all volatile
1038
+ // operations to be visible outside the program in a global order. Do not
1039
+ // request cross address space as only the global address space can be
1040
+ // observable outside the program, so no need to cause a waitcnt for LDS
1041
+ // address space operations.
1042
+ Changed |= insertWait (MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false ,
1043
+ Position::AFTER);
1044
+ return Changed;
1045
+ }
1046
+
1047
+ if (IsNonTemporal) {
1048
+ // Request L0/L1 HIT_EVICT and L2 STREAM for load and store instructions.
1049
+ Changed |= enableSLCBit (MI);
1050
+ return Changed;
1051
+ }
969
1052
970
1053
return Changed;
971
1054
}
@@ -988,7 +1071,8 @@ bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI,
988
1071
bool VSCnt = false ;
989
1072
bool LGKMCnt = false ;
990
1073
991
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
1074
+ if ((AddrSpace & (SIAtomicAddrSpace::GLOBAL | SIAtomicAddrSpace::SCRATCH)) !=
1075
+ SIAtomicAddrSpace::NONE) {
992
1076
switch (Scope) {
993
1077
case SIAtomicScope::SYSTEM:
994
1078
case SIAtomicScope::AGENT:
@@ -1191,12 +1275,12 @@ bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI,
1191
1275
return Changed;
1192
1276
}
1193
1277
1194
- // Atomic instructions do not have the nontemporal attribute.
1195
- if (MOI. isNonTemporal ()) {
1196
- Changed |= CC-> enableNonTemporal (MI);
1197
- return Changed;
1198
- }
1199
-
1278
+ // Atomic instructions already bypass caches to the scope specified by the
1279
+ // SyncScope operand. Only non-atomic volatile and nontemporal instructions
1280
+ // need additional treatment.
1281
+ Changed |= CC-> enableVolatileAndOrNonTemporal (MI, MOI. getInstrAddrSpace (),
1282
+ SIMemOp::LOAD, MOI. isVolatile (),
1283
+ MOI. isNonTemporal ());
1200
1284
return Changed;
1201
1285
}
1202
1286
@@ -1217,12 +1301,12 @@ bool SIMemoryLegalizer::expandStore(const SIMemOpInfo &MOI,
1217
1301
return Changed;
1218
1302
}
1219
1303
1220
- // Atomic instructions do not have the nontemporal attribute.
1221
- if (MOI. isNonTemporal ()) {
1222
- Changed |= CC-> enableNonTemporal (MI);
1223
- return Changed;
1224
- }
1225
-
1304
+ // Atomic instructions already bypass caches to the scope specified by the
1305
+ // SyncScope operand. Only non-atomic volatile and nontemporal instructions
1306
+ // need additional treatment.
1307
+ Changed |= CC-> enableVolatileAndOrNonTemporal (
1308
+ MI, MOI. getInstrAddrSpace (), SIMemOp::STORE, MOI. isVolatile (),
1309
+ MOI. isNonTemporal ());
1226
1310
return Changed;
1227
1311
}
1228
1312
0 commit comments