Skip to content

Commit 7c11476

Browse files
committed
[NVPTX] Improve comments
1 parent ef39e54 commit 7c11476

File tree

1 file changed

+54
-24
lines changed

1 file changed

+54
-24
lines changed

llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp

Lines changed: 54 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -730,31 +730,63 @@ static unsigned int getCodeMemorySemantic(MemSDNode *N,
730730

731731
// Lowering for non-SequentiallyConsistent Operations
732732
//
733-
// | Atomic | Volatile | Statespace | Lowering sm_60- | Lowering sm_70+ |
734-
// |---------|----------|-------------------------------|-----------------|------------------------------------------------------|
735-
// | No | No | All | plain | .weak |
736-
// | No | Yes | Generic / Shared / Global [0] | .volatile | .volatile |
737-
// | No | Yes | Local / Const / Param | plain [1] | .weak [1] |
738-
// | Relaxed | No | Generic / Shared / Global [0] | .volatile | <atomic sem> |
739-
// | Other | No | Generic / Shared / Global [0] | Error [2] | <atomic sem> |
740-
// | Yes | No | Local / Const / Param | plain [1] | .weak [1] |
741-
// | Relaxed | Yes | Generic / Shared [0] | .volatile | .volatile |
742-
// | Relaxed | Yes | Global [0] | .volatile | .mmio.relaxed.sys (PTX 8.2+) or .volatile (PTX 8.1-) |
743-
// | Relaxed | Yes | Local / Const / Param | plain [1] | .weak [1] |
744-
// | Other | Yes | Generic / Shared / Global [0] | Error [2] | <atomic sem> [3] |
733+
// | Atomic | Volatile | Statespace | PTX sm_60- | PTX sm_70+ |
734+
// |---------|----------|--------------------|------------|------------------------------|
735+
// | No | No | All | plain | .weak |
736+
// | No | Yes | Generic,Shared, | .volatile | .volatile |
737+
// | | | Global [0] | | |
738+
// | No | Yes | Local,Const,Param | plain [1] | .weak [1] |
739+
// | Relaxed | No | Generic,Shared, | | |
740+
// | | | Global [0] | .volatile | <atomic sem> |
741+
// | Other | No | Generic,Shared, | Error [2] | <atomic sem> |
742+
// | | | Global [0] | | |
743+
// | Yes | No | Local,Const,Param | plain [1] | .weak [1] |
744+
// | Relaxed | Yes | Generic,Shared [0] | .volatile | .volatile |
745+
// | Relaxed | Yes | Global [0] | .volatile | .mmio.relaxed.sys (PTX 8.2+) |
746+
// | | | | | or .volatile (PTX 8.1-) |
747+
// | Relaxed | Yes | Local,Const,Param | plain [1] | .weak [1] |
748+
// | Other | Yes | Generic, Shared, | Error [2] | <atomic sem> [3] |
749+
// | | | / Global [0] | | |
745750

746751
// clang-format on
747752

748-
// [0]: volatile and atomics are only supported on generic addressing to
749-
// shared or global, or shared, or global.
750-
// MMIO requires generic addressing to global or global, but
751-
// (TODO) we only implement it for global.
753+
// [0]: volatile and atomics are only supported on global or shared
754+
// memory locations, accessed via generic/shared/global pointers.
755+
// MMIO is only supported on global memory locations,
756+
// accessed via generic/global pointers.
757+
// TODO: Implement MMIO access via generic pointer to global.
758+
// Currently implemented for global pointers only.
752759

753-
// [1]: TODO: this implementation exhibits PTX Undefined Behavior; it
754-
// fails to preserve the side-effects of atomics and volatile
755-
// accesses in LLVM IR to local / const / param, causing
756-
// well-formed LLVM-IR & CUDA C++ programs to be miscompiled
757-
// in sm_70+.
760+
// [1]: Lowering volatile/atomic operations to non-volatile/non-atomic
761+
// PTX instructions fails to preserve their C++ side-effects.
762+
//
763+
// Example (https://github.com/llvm/llvm-project/issues/62057):
764+
//
765+
// void example() {
766+
// std::atomic<bool> True = true;
767+
// while (True.load(std::memory_order_relaxed));
768+
// }
769+
//
770+
// A C++ program that calls "example" is well-defined: the infinite loop
771+
// performs an atomic operation. By lowering volatile/atomics to
772+
// "weak" memory operations, we are transforming the above into:
773+
//
774+
// void undefined_behavior() {
775+
// bool True = true;
776+
// while (True);
777+
// }
778+
//
779+
// which exhibits undefined behavior in both C++ and PTX.
780+
//
781+
// Calling "example" in CUDA C++ compiled for sm_60- exhibits undefined
782+
// behavior due to lack of Independent Forward Progress. Lowering these
783+
// to weak memory operations in sm_60- is therefore fine.
784+
//
785+
// TODO: lower atomic and volatile operatios to memory locations
786+
// in local, const, and param to two PTX operations in sm_70+:
787+
// - the "weak" memory operation we are currently lowering to, and
788+
// - some other memory operation that preserves the side-effect, e.g.,
789+
// a dummy volatile load.
758790

759791
if (CodeAddrSpace == NVPTX::PTXLdStInstCode::LOCAL ||
760792
CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT ||
@@ -835,17 +867,15 @@ static unsigned int getCodeMemorySemantic(MemSDNode *N,
835867
}
836868
case AtomicOrdering::SequentiallyConsistent:
837869
case AtomicOrdering::Unordered:
838-
default: {
839870
// TODO: support AcquireRelease and SequentiallyConsistent
840871
SmallString<256> Msg;
841872
raw_svector_ostream OS(Msg);
842873
OS << "NVPTX backend does not support AtomicOrdering \""
843874
<< toIRString(Ordering) << "\" yet.";
844875
report_fatal_error(OS.str());
845876
}
846-
}
847877

848-
report_fatal_error("unreachable");
878+
llvm_unreachable("unexpected unhandled case");
849879
}
850880

851881
static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,

0 commit comments

Comments
 (0)