@@ -715,12 +715,12 @@ static unsigned int getCodeAddrSpace(MemSDNode *N) {
715
715
}
716
716
717
717
struct OperationOrderings {
718
- NVPTX::OrderingUnderlyingType instr_ordering ;
719
- NVPTX::OrderingUnderlyingType fence_ordering ;
718
+ NVPTX::OrderingUnderlyingType InstrOrdering ;
719
+ NVPTX::OrderingUnderlyingType FenceOrdering ;
720
720
OperationOrderings (NVPTX::Ordering o = NVPTX::Ordering::NotAtomic,
721
721
NVPTX::Ordering f = NVPTX::Ordering::NotAtomic)
722
- : instr_ordering (static_cast <NVPTX::OrderingUnderlyingType>(o)),
723
- fence_ordering (static_cast <NVPTX::OrderingUnderlyingType>(f)) {}
722
+ : InstrOrdering (static_cast <NVPTX::OrderingUnderlyingType>(o)),
723
+ FenceOrdering (static_cast <NVPTX::OrderingUnderlyingType>(f)) {}
724
724
};
725
725
726
726
static OperationOrderings
@@ -759,12 +759,19 @@ getOperationOrderings(MemSDNode *N, const NVPTXSubtarget *Subtarget) {
759
759
// Lustig et al, A Formal Analysis of the NVIDIA PTX Memory Consistency Model, ASPLOS’19.
760
760
// https://dl.acm.org/doi/pdf/10.1145/3297858.3304043
761
761
//
762
- // | CUDA C++ Atomic Operation or Atomic Fence | PTX Atomic Operation or Fence |
763
- // |-----------------------------------------------------------------------------|-----------------------------------------|
764
- // | cuda::atomic_thread_fence(memory_order_seq_cst, cuda::thread_scope_<scope>) | fence.sc.<scope>; |
765
- // | cuda::atomic_load(memory_order_seq_cst, cuda::thread_scope_<scope>) | fence.sc.<scope>; ld.acquire.<scope>; |
766
- // | cuda::atomic_store(memory_order_seq_cst, cuda::thread_scope_<scope>) | fence.sc.<scope>; st.release.<scope>; |
767
- // | cuda::atomic_fetch_<op>(memory_order_seq_cst, cuda::thread_scope_<scope>) | fence.sc.<scope>; atom.acq_rel.<scope>; |
762
+ // | CUDA C++ Atomic Operation or Atomic Fence | PTX Atomic Operation or Fence |
763
+ // |------------------------------------------------------|-------------------------------|
764
+ // | cuda::atomic_thread_fence | fence.sc.<scope>; |
765
+ // | (memory_order_seq_cst, cuda::thread_scope_<scope>) | |
766
+ // |------------------------------------------------------|-------------------------------|
767
+ // | cuda::atomic_load | fence.sc.<scope>; |
768
+ // | (memory_order_seq_cst, cuda::thread_scope_<scope>) | ld.acquire.<scope>; |
769
+ // |------------------------------------------------------|-------------------------------|
770
+ // | cuda::atomic_store | fence.sc.<scope>; |
771
+ // | (memory_order_seq_cst, cuda::thread_scope_<scope>) | st.release.<scope>; |
772
+ // |------------------------------------------------------|-------------------------------|
773
+ // | cuda::atomic_fetch_<op> | fence.sc.<scope>; |
774
+ // | (memory_order_seq_cst, cuda::thread_scope_<scope>) | atom.acq_rel.<scope>; |
768
775
769
776
// clang-format on
770
777
@@ -897,11 +904,11 @@ getOperationOrderings(MemSDNode *N, const NVPTXSubtarget *Subtarget) {
897
904
//
898
905
// This sets the ordering of the fence to SequentiallyConsistent, and
899
906
// sets the corresponding ordering for the instruction.
900
- NVPTX::Ordering ord ;
907
+ NVPTX::Ordering InstrOrder ;
901
908
if (N->readMem ()) {
902
- ord = NVPTX::Ordering::Acquire;
909
+ InstrOrder = NVPTX::Ordering::Acquire;
903
910
} else if (N->writeMem ()) {
904
- ord = NVPTX::Ordering::Release;
911
+ InstrOrder = NVPTX::Ordering::Release;
905
912
} else {
906
913
SmallString<256 > Msg;
907
914
raw_svector_ostream OS (Msg);
@@ -912,7 +919,7 @@ getOperationOrderings(MemSDNode *N, const NVPTXSubtarget *Subtarget) {
912
919
report_fatal_error (OS.str ());
913
920
}
914
921
return AddrGenericOrGlobalOrShared
915
- ? OperationOrderings (ord ,
922
+ ? OperationOrderings (InstrOrder ,
916
923
NVPTX::Ordering::SequentiallyConsistent)
917
924
: OperationOrderings (NVPTX::Ordering::NotAtomic);
918
925
}
0 commit comments