-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[TargetLowering] Add a new function getNullPtrValue
#126665
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-selectiondag @llvm/pr-subscribers-backend-amdgpu Author: Shilei Tian (shiltian) ChangesIn most cases, This PR introduces Fixes #115083. Full diff: https://github.com/llvm/llvm-project/pull/126665.diff 6 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index bbecc7a6ddaee79..6f7b64e663e84c3 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5659,6 +5659,13 @@ class TargetLowering : public TargetLoweringBase {
LoadSDNode *OriginalLoad,
SelectionDAG &DAG) const;
+ /// Return the value of nullptr. In most cases, nullptr is a zero-value
+ /// constant with the corresponding pointer type. However, this is not always
+ /// the case. For certain address spaces on some targets, it could be a value
+ /// like ~0U.
+ virtual SDValue getNullPtrValue(unsigned AS, const SDLoc &DL,
+ SelectionDAG &DAG) const;
+
private:
SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
const SDLoc &DL, DAGCombinerInfo &DCI) const;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 7178f6398bede50..adc1d531826e73e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1807,8 +1807,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
if (isa<ConstantPointerNull>(C)) {
unsigned AS = V->getType()->getPointerAddressSpace();
- return DAG.getConstant(0, getCurSDLoc(),
- TLI.getPointerTy(DAG.getDataLayout(), AS));
+ return TLI.getNullPtrValue(AS, getCurSDLoc(), DAG);
}
if (match(C, m_VScale()))
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index adfb96041c5c06b..13220a8e9cf1294 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -12188,3 +12188,8 @@ SDValue TargetLowering::scalarizeExtractedVectorLoad(EVT ResultVT,
return Load;
}
+
+SDValue TargetLowering::getNullPtrValue(unsigned AS, const SDLoc &DL,
+ SelectionDAG &DAG) const {
+ return DAG.getConstant(0, DL, getPointerTy(DAG.getDataLayout(), AS));
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 792e17eeedab141..021f602a56ed78a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -6050,3 +6050,12 @@ bool AMDGPUTargetLowering::isReassocProfitable(MachineRegisterInfo &MRI,
Register N0, Register N1) const {
return MRI.hasOneNonDBGUse(N0); // FIXME: handle regbanks
}
+
+SDValue AMDGPUTargetLowering::getNullPtrValue(unsigned AS, const SDLoc &DL,
+ SelectionDAG &DAG) const {
+ if (AS == AMDGPUAS::PRIVATE_ADDRESS || AS == AMDGPUAS::LOCAL_ADDRESS) {
+ return DAG.getConstant(0xffffffff, DL,
+ getPointerTy(DAG.getDataLayout(), AS));
+ }
+ return TargetLowering::getNullPtrValue(AS, DL, DAG);
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index c74dc7942f52c07..9e6b2eecb5c28b3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -387,6 +387,9 @@ class AMDGPUTargetLowering : public TargetLowering {
MVT getFenceOperandTy(const DataLayout &DL) const override {
return MVT::i32;
}
+
+ SDValue getNullPtrValue(unsigned AS, const SDLoc &DL,
+ SelectionDAG &DAG) const override;
};
namespace AMDGPUISD {
diff --git a/llvm/test/CodeGen/AMDGPU/nullptr-lowering.ll b/llvm/test/CodeGen/AMDGPU/nullptr-lowering.ll
new file mode 100644
index 000000000000000..691cfd62708690e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/nullptr-lowering.ll
@@ -0,0 +1,102 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a %s -o - | FileCheck %s
+
+define i32 @nullptr_p0(ptr %p) {
+; CHECK-LABEL: nullptr_p0:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; CHECK-NEXT: ; %bb.1: ; %bb.1
+; CHECK-NEXT: v_mov_b32_e32 v0, 1
+; CHECK-NEXT: ; %bb.2: ; %UnifiedReturnBlock
+; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %cmp = icmp eq ptr %p, null
+ br i1 %cmp, label %bb.0, label %bb.1
+bb.0:
+ ret i32 0
+bb.1:
+ ret i32 1
+}
+
+define i32 @nullptr_p1(ptr addrspace(1) %p) {
+; CHECK-LABEL: nullptr_p1:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; CHECK-NEXT: ; %bb.1: ; %bb.1
+; CHECK-NEXT: v_mov_b32_e32 v0, 1
+; CHECK-NEXT: ; %bb.2: ; %UnifiedReturnBlock
+; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %cmp = icmp eq ptr addrspace(1) %p, null
+ br i1 %cmp, label %bb.0, label %bb.1
+bb.0:
+ ret i32 0
+bb.1:
+ ret i32 1
+}
+
+define i32 @nullptr_p3(ptr addrspace(3) %p) {
+; CHECK-LABEL: nullptr_p3:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; CHECK-NEXT: ; %bb.1: ; %bb.1
+; CHECK-NEXT: v_mov_b32_e32 v0, 1
+; CHECK-NEXT: ; %bb.2: ; %UnifiedReturnBlock
+; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %cmp = icmp eq ptr addrspace(3) %p, null
+ br i1 %cmp, label %bb.0, label %bb.1
+bb.0:
+ ret i32 0
+bb.1:
+ ret i32 1
+}
+
+define i32 @nullptr_p4(ptr addrspace(4) %p) {
+; CHECK-LABEL: nullptr_p4:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; CHECK-NEXT: ; %bb.1: ; %bb.1
+; CHECK-NEXT: v_mov_b32_e32 v0, 1
+; CHECK-NEXT: ; %bb.2: ; %UnifiedReturnBlock
+; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %cmp = icmp eq ptr addrspace(4) %p, null
+ br i1 %cmp, label %bb.0, label %bb.1
+bb.0:
+ ret i32 0
+bb.1:
+ ret i32 1
+}
+
+define i32 @nullptr_p5(ptr addrspace(5) %p) {
+; CHECK-LABEL: nullptr_p5:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0
+; CHECK-NEXT: v_mov_b32_e32 v0, 0
+; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
+; CHECK-NEXT: ; %bb.1: ; %bb.1
+; CHECK-NEXT: v_mov_b32_e32 v0, 1
+; CHECK-NEXT: ; %bb.2: ; %UnifiedReturnBlock
+; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %cmp = icmp eq ptr addrspace(5) %p, null
+ br i1 %cmp, label %bb.0, label %bb.1
+bb.0:
+ ret i32 0
+bb.1:
+ ret i32 1
+}
|
I have not updated all test cases yet. Just to make sure this is the right thing to do, or I didn't miss any historic reason here. The GISel will be a follow-up (if necessary). |
In most cases, `nullptr` is a zero-value constant with the corresponding pointer type. However, this is not always the case. For example, AMDGPU uses `0xffffffff` as nullptr for AS3 and AS5, leading to lowering issues. Currently, to ensure correct lowering, `ptr addrspace(5) null` must be written as `addrspacecast (ptr null to ptr addrspace(5))`. This PR introduces `TargetLowering::getNullPtrValue` to determine the correct value of `nullptr`. This helps with proper lowering of `ConstantPointerNull`, which already has the correct address space. Fixes #115083.
9aa7353
to
7576732
Compare
@@ -1807,8 +1807,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { | |||
|
|||
if (isa<ConstantPointerNull>(C)) { | |||
unsigned AS = V->getType()->getPointerAddressSpace(); | |||
return DAG.getConstant(0, getCurSDLoc(), | |||
TLI.getPointerTy(DAG.getDataLayout(), AS)); | |||
return TLI.getNullPtrValue(AS, getCurSDLoc(), DAG); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ConstantPointerNull currently means a bitpattern 0. It does not mean a known invalid or sentinel pointer.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't believe that is the case. For example,
%6 = icmp eq ptr addrspace(5) %5, null
If you check operand(1)
, it is ptr addrspace(5) null
, which is recognized as ConstantPointerNull
. Unless you’re suggesting that ptr addrspace(5) null
might not be a valid or sentinel pointer (or should not be recognized as ConstantPointerNull
), while addrspacecast (ptr null to ptr addrspace(5))
is, then InstCombine is handling this incorrectly by treating them as equivalent.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In addition, the document suggests that it is "a constant pointer value that points to null". My reading is, it is a nullptr
. I don't see where to solely treat it as 0.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure what your godbolt link is trying to show, it doesn't have an addrspace cast?
Here you can see that InstCombine does not fold addrspacecast of null: https://godbolt.org/z/jrq8a8vez There is no assumption that a zero value in one address space maps to a zero value in another.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah I think I made a mistake here.
|
||
SDValue AMDGPUTargetLowering::getNullPtrValue(unsigned AS, const SDLoc &DL, | ||
SelectionDAG &DAG) const { | ||
if (AS == AMDGPUAS::PRIVATE_ADDRESS || AS == AMDGPUAS::LOCAL_ADDRESS) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe we already have one of these in TargetMachine
Related to #91769 we will eventually want to add a variant of this for a DL specified null value, but that will probably need to coexist with ConstantPointerNull. The current state is it always means 0, it's just 0 in non 0 address spaces is considered potentially dereferenceable |
In most cases,
nullptr
is a zero-value constant with the corresponding pointertype. However, this is not always the case. For example, AMDGPU uses
0xffffffff
asnullptr
for AS3 and AS5, leading to lowering issues. Currently,to ensure correct lowering,
ptr addrspace(5) null
must be written asaddrspacecast (ptr null to ptr addrspace(5))
.This PR introduces
TargetLowering::getNullPtrValue
to determine the correctvalue of
nullptr
. This helps with proper lowering ofConstantPointerNull
,which already has the correct address space.
Fixes #115083.