-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[DAGCombiner] Add some very basic folds for ADDRSPACECAST #127733
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[DAGCombiner] Add some very basic folds for ADDRSPACECAST #127733
Conversation
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-selectiondag Author: Alex MacLean (AlexMaclean) ChangesFold an ADDRSPACECAST of another ADDRPSACECAST into a single cast or remove them completely and forward the inner ASC operand. Full diff: https://github.com/llvm/llvm-project/pull/127733.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c6fd72b6b76f4..b0af55722f7ff 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -488,6 +488,7 @@ namespace {
SDValue visitTRUNCATE(SDNode *N);
SDValue visitTRUNCATE_USAT_U(SDNode *N);
SDValue visitBITCAST(SDNode *N);
+ SDValue visitADDRSPACECAST(SDNode *N);
SDValue visitFREEZE(SDNode *N);
SDValue visitBUILD_PAIR(SDNode *N);
SDValue visitFADD(SDNode *N);
@@ -1920,6 +1921,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::TRUNCATE: return visitTRUNCATE(N);
case ISD::TRUNCATE_USAT_U: return visitTRUNCATE_USAT_U(N);
case ISD::BITCAST: return visitBITCAST(N);
+ case ISD::ADDRSPACECAST: return visitADDRSPACECAST(N);
case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
case ISD::FADD: return visitFADD(N);
case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
@@ -16054,6 +16056,25 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitADDRSPACECAST(SDNode *N) {
+ auto *ASCN1 = cast<AddrSpaceCastSDNode>(N);
+
+ if (auto *ASCN2 = dyn_cast<AddrSpaceCastSDNode>(ASCN1->getOperand(0))) {
+ assert(ASCN2->getDestAddressSpace() == ASCN1->getSrcAddressSpace());
+
+ // Fold asc[B -> A](asc[A -> B](x)) -> x
+ if (ASCN1->getDestAddressSpace() == ASCN2->getSrcAddressSpace())
+ return ASCN2->getOperand(0);
+
+ // Fold asc[B -> C](asc[A -> B](x)) -> asc[A -> C](x)
+ return DAG.getAddrSpaceCast(
+ SDLoc(N), N->getValueType(0), ASCN2->getOperand(0),
+ ASCN2->getSrcAddressSpace(), ASCN1->getDestAddressSpace());
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
EVT VT = N->getValueType(0);
return CombineConsecutiveLoads(N, VT);
diff --git a/llvm/test/CodeGen/NVPTX/addrspacecast-folding.ll b/llvm/test/CodeGen/NVPTX/addrspacecast-folding.ll
new file mode 100644
index 0000000000000..11c2b6782e0d3
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/addrspacecast-folding.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mcpu=sm_20 -O0 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -mcpu=sm_20 -O0 | %ptxas-verify %}
+
+target triple = "nvptx64-unknown-unknown"
+
+define ptr @test1(ptr %p) {
+; CHECK-LABEL: test1(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [test1_param_0];
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd1;
+; CHECK-NEXT: ret;
+ %a = addrspacecast ptr %p to ptr addrspace(5)
+ %b = addrspacecast ptr addrspace(5) %a to ptr
+ ret ptr %b
+}
+
+define ptr addrspace(1) @test2(ptr addrspace(5) %p) {
+; CHECK-LABEL: test2(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [test2_param_0];
+; CHECK-NEXT: ret;
+ %a = addrspacecast ptr addrspace(5) %p to ptr
+ %b = addrspacecast ptr %a to ptr addrspace(1)
+ ret ptr addrspace(1) %b
+}
|
@llvm/pr-subscribers-backend-nvptx Author: Alex MacLean (AlexMaclean) ChangesFold an ADDRSPACECAST of another ADDRPSACECAST into a single cast or remove them completely and forward the inner ASC operand. Full diff: https://github.com/llvm/llvm-project/pull/127733.diff 2 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c6fd72b6b76f4..b0af55722f7ff 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -488,6 +488,7 @@ namespace {
SDValue visitTRUNCATE(SDNode *N);
SDValue visitTRUNCATE_USAT_U(SDNode *N);
SDValue visitBITCAST(SDNode *N);
+ SDValue visitADDRSPACECAST(SDNode *N);
SDValue visitFREEZE(SDNode *N);
SDValue visitBUILD_PAIR(SDNode *N);
SDValue visitFADD(SDNode *N);
@@ -1920,6 +1921,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::TRUNCATE: return visitTRUNCATE(N);
case ISD::TRUNCATE_USAT_U: return visitTRUNCATE_USAT_U(N);
case ISD::BITCAST: return visitBITCAST(N);
+ case ISD::ADDRSPACECAST: return visitADDRSPACECAST(N);
case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
case ISD::FADD: return visitFADD(N);
case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
@@ -16054,6 +16056,25 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitADDRSPACECAST(SDNode *N) {
+ auto *ASCN1 = cast<AddrSpaceCastSDNode>(N);
+
+ if (auto *ASCN2 = dyn_cast<AddrSpaceCastSDNode>(ASCN1->getOperand(0))) {
+ assert(ASCN2->getDestAddressSpace() == ASCN1->getSrcAddressSpace());
+
+ // Fold asc[B -> A](asc[A -> B](x)) -> x
+ if (ASCN1->getDestAddressSpace() == ASCN2->getSrcAddressSpace())
+ return ASCN2->getOperand(0);
+
+ // Fold asc[B -> C](asc[A -> B](x)) -> asc[A -> C](x)
+ return DAG.getAddrSpaceCast(
+ SDLoc(N), N->getValueType(0), ASCN2->getOperand(0),
+ ASCN2->getSrcAddressSpace(), ASCN1->getDestAddressSpace());
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
EVT VT = N->getValueType(0);
return CombineConsecutiveLoads(N, VT);
diff --git a/llvm/test/CodeGen/NVPTX/addrspacecast-folding.ll b/llvm/test/CodeGen/NVPTX/addrspacecast-folding.ll
new file mode 100644
index 0000000000000..11c2b6782e0d3
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/addrspacecast-folding.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mcpu=sm_20 -O0 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -mcpu=sm_20 -O0 | %ptxas-verify %}
+
+target triple = "nvptx64-unknown-unknown"
+
+define ptr @test1(ptr %p) {
+; CHECK-LABEL: test1(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [test1_param_0];
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd1;
+; CHECK-NEXT: ret;
+ %a = addrspacecast ptr %p to ptr addrspace(5)
+ %b = addrspacecast ptr addrspace(5) %a to ptr
+ ret ptr %b
+}
+
+define ptr addrspace(1) @test2(ptr addrspace(5) %p) {
+; CHECK-LABEL: test2(
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.u64 %rd1, [test2_param_0];
+; CHECK-NEXT: ret;
+ %a = addrspacecast ptr addrspace(5) %p to ptr
+ %b = addrspacecast ptr %a to ptr addrspace(1)
+ ret ptr addrspace(1) %b
+}
|
5a3eaf5
to
6dc9383
Compare
if (ASCN1->getDestAddressSpace() == ASCN2->getSrcAddressSpace()) | ||
return ASCN2->getOperand(0); | ||
|
||
// Fold asc[B -> C](asc[A -> B](x)) -> asc[A -> C](x) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just curious, is AS cast always transitive?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's treated as such by instcombine (https://godbolt.org/z/dhfTd18xs) so doing the same thing here seems safe.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@nikic ^^ Would you happen to know if that's a safe assumption.
I'm concerned that it would depend on the specifics of the AS arrangement known only to the target. It may be too optimistic to assume that ASC between all address spaces on all targets are transitive.
A thought experiment that could conceivably be applicable to existing hardware (even X86).
It's not uncommon for RAM to have multiple mappings in the address space, with specific properties (e.g. cacheability).
Let's say we have cached/write-through (C), uncached (U) and write-back(W) cacheable windows.
C/U/W addresses have 1:1 mapping to each other.
We also have a virtual AS (V), which contains all the windows for C/U/W.
ASC(C->U->V)
and ASC(C->W->V)
will be distinctly different from ASC(C->V)
, as each will be pointing at a different window in V.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For integral address spaces, if the pointer is dereferenceable, the cast is assumed reversible (though we should have an instruction flag to indicate when we know the object is valid)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if the pointer is dereferenceable, the cast is assumed reversible
In my example above, each individual cast is reversible, but that does not guarantee transitivity.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@nikic Could you please help clarify the semantics here? In practice, the fact that we're treating ASC as transitive in IC makes me think no target has defined address spaces as Artem is describing, meaning it should be safe here too.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
My example is hypothetical. I don't think we have actual uses that match that pattern.
The point is that we can't imply AS transitivity implicitly. If LLVM relies on it, it should be documented explicitly, IMO.
Right now I can't tell whether the assumption of AS transitivity by existing code in LLVM is intentional or not. If it's by design, then your patch is fine. If not, then we may need to figure out how to deal with that. Probably we'll need to make transitivity target-dependent. I believe we did discuss describing additional target's AS properties in the past.
Right now LLVM does not say much about AS assumptions. The closest I can find is this; https://llvm.org/docs/LangRef.html#pointer-type
The semantics of non-zero address spaces are target-specific. ...
This suggests that whether ASC is transitive is target specific.
If an object can be proven accessible through a pointer with a different address space, the access may be modified to use that address space.
This phrase can be interpreted that we're allowed to assume transitivity, in principle, as data in my example is accessible, even if the ASC results in a different pointer value.
However, even in that case it requires the ASCs to be "proven accessible", and that in general requires target-specific knowledge. The way I read it, collapsing local->shared->generic
into just local->generic
is not allowed, as the source pointer in local AS pointer can't be accessed via shared AS. We could collapse such an invalid transition into a poison value, though.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For integral address spaces, if the pointer is dereferenceable, the cast is assumed reversible (though we should have an instruction flag to indicate when we know the object is valid)
Looking at a different aspect: Shouldn't there a check if the address spaces are integral? Because this certainly makes the first fold invalid. And reading https://llvm.org/docs/LangRef.html#nointptrtype, it seems to me that this also means that the transitive fold is invalid.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ASC(C->U->V) and ASC(C->W->V) will be distinctly different from ASC(C->V), as each will be pointing at a different window in V.
For the numeric pointer value, you may end up with a different bitpattern. At the ultimate access, through either path, there needs to be a single dereferenceable object at a single address in V. It's possible we need the usage context implies UB on access to assume transitivity
@@ -332,9 +332,7 @@ define signext i32 @setlength() { | |||
; CHECK: lgr [[MALLOC:[0-9]+]],3 | |||
; CHECK: basr 7,6 | |||
; CHECK: lgr [[LENGTH:[0-9]+]],3 | |||
; CHECK: la [[ADDR:[0-9]+]],4([[MALLOC]]) | |||
; CHECK: llgtr [[ADDR]],[[ADDR]] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removing the llgtr
is definitely a semantic change here; it removes the zeroing of high bits. @redstar can you verify whether the new code is still correct on z/OS?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Prior to my change the DAG looked like this:
t40: i64 = add nuw t23, Constant:i64<4>
t41: i32 = addrspacecast[0 -> 1] t40
t51: i64 = addrspacecast[1 -> 0] t41
t52: ch = store<(store (s64) into %ir.1, align 4, addrspace 1)> t34, t37, t51, undef:i64
Which was lowered to this:
t40: i64 = add nuw t23, Constant:i64<4>
t56: i32 = truncate t40
t57: i32 = and t56, Constant:i32<2147483647>
t54: i32 = and t57, Constant:i32<2147483647>
t55: i64 = zero_extend t54
t52: ch = store<(store (s64) into %ir.1, align 4, addrspace 1)> t34, t37, t55, undef:i64
And than simplified to this:
t40: i64 = add nuw t23, Constant:i64<4>
t59: i64 = and t40, Constant:i64<2147483647>
t52: ch = store<(store (s64) into %ir.1, align 4, addrspace 1)> t34, t37, t59, undef:i64
With my change, the addrspacecasts are eliminated and as a result the 'and' is no longer present, this seems like an improvement and it should also be correct, as the LangRef states:
the cast is assumed to be reversible (i.e. casting the result back to the original address space should yield the original bit pattern).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the cast is assumed to be reversible (i.e. casting the result back to the original address space should yield the original bit pattern).
Ok, that is a problem. zext ( x & 0x7FFFFFFF)
is certainly different from x
. The high bit indicates if the address is code or data, but that information is lost when the pointer is converted to 64 bit. When converting the pointer back to 31 bit, we assume the high bit is zero, because that is what is usually needed. (A set high bit is only required in very rare cases, e.g. changing the return address of a function stored in a stack frame.)
Any suggestion how to get around this issue?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think that the X86 backend has a similar issue. They also use addrspacecast
to cast between 64 bit and 32 bit pointers. However, with a brief look it seems they have no test case with 2 casts in the order 64 bit -> 32 bit -> 64 bit.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
both source and destination are integral pointers, and the result pointer is dereferenceable, the cast is assumed to be reversible
Doesn't this imply that the pointers must have the same size?
Okay, based on both of these discussions, I've reached the conclusion that addrspacecast is not well defined enough to safely do these transformations across all targets. I've opened #129157 for NVPTX. |
Should we update isEliminableCastPair (https://github.com/llvm/llvm-project/blob/2639dea7d83cfd5c6bbca84b24d7c5bd599b2e8e/llvm/lib/IR/Instructions.cpp#L2862C43-L2862C45) to avoid doing this in IR as well? |
We need to do something, but this depends on https://discourse.llvm.org/t/rfc-finer-grained-non-integral-pointer-properties/83176/3 I thought we already stopped doing these folds for non-integral address spaces, but I don't see that restriction |
Fold an ADDRSPACECAST of another ADDRPSACECAST into a single cast or remove them completely and forward the inner ASC operand.