-
Notifications
You must be signed in to change notification settings - Fork 14.3k
DAG: Preserve more flags when expanding gep #110815
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-debuginfo @llvm/pr-subscribers-llvm-selectiondag Author: Matt Arsenault (arsenm) ChangesThis allows selecting the addressing mode for stack instructions Full diff: https://github.com/llvm/llvm-project/pull/110815.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 25213f587116d5..6838c0b530a363 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4386,6 +4386,17 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
// it.
IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
+ SDNodeFlags ScaleFlags;
+ // The multiplication of an index by the type size does not wrap the
+ // pointer index type in a signed sense (mul nsw).
+ if (NW.hasNoUnsignedSignedWrap())
+ ScaleFlags.setNoSignedWrap(true);
+
+ // The multiplication of an index by the type size does not wrap the
+ // pointer index type in an unsigned sense (mul nuw).
+ if (NW.hasNoUnsignedWrap())
+ ScaleFlags.setNoUnsignedWrap(true);
+
if (ElementScalable) {
EVT VScaleTy = N.getValueType().getScalarType();
SDValue VScale = DAG.getNode(
@@ -4393,27 +4404,41 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
DAG.getConstant(ElementMul.getZExtValue(), dl, VScaleTy));
if (IsVectorGEP)
VScale = DAG.getSplatVector(N.getValueType(), dl, VScale);
- IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale);
+ IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale,
+ ScaleFlags);
} else {
// If this is a multiply by a power of two, turn it into a shl
// immediately. This is a very common case.
if (ElementMul != 1) {
if (ElementMul.isPowerOf2()) {
unsigned Amt = ElementMul.logBase2();
- IdxN = DAG.getNode(ISD::SHL, dl,
- N.getValueType(), IdxN,
- DAG.getConstant(Amt, dl, IdxN.getValueType()));
+ IdxN = DAG.getNode(ISD::SHL, dl, N.getValueType(), IdxN,
+ DAG.getConstant(Amt, dl, IdxN.getValueType()),
+ ScaleFlags);
} else {
SDValue Scale = DAG.getConstant(ElementMul.getZExtValue(), dl,
IdxN.getValueType());
- IdxN = DAG.getNode(ISD::MUL, dl,
- N.getValueType(), IdxN, Scale);
+ IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, Scale,
+ ScaleFlags);
}
}
}
- N = DAG.getNode(ISD::ADD, dl,
- N.getValueType(), N, IdxN);
+ SDNodeFlags AddFlags;
+
+ // The successive addition of each offset (without adding the base
+ // address) does not wrap the pointer index type in a signed sense (add
+ // nsw).
+ if (NW.hasNoUnsignedSignedWrap())
+ AddFlags.setNoSignedWrap(true);
+
+ // The successive addition of each offset (without adding the base
+ // address) does not wrap the pointer index type in an unsigned sense (add
+ // nuw).
+ if (NW.hasNoUnsignedWrap())
+ AddFlags.setNoUnsignedWrap(true);
+
+ N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, IdxN, AddFlags);
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll b/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll
index 782894976c711c..a39afa6f609c7e 100644
--- a/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll
+++ b/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll
@@ -118,8 +118,7 @@ define void @gep_inbounds_nuw_alloca(i32 %idx, i32 %val) #0 {
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0
-; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
+; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
@@ -145,8 +144,7 @@ define void @gep_nusw_nuw_alloca(i32 %idx, i32 %val) #0 {
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX8-NEXT: v_lshrrev_b32_e64 v2, 6, s32
; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0
-; GFX8-NEXT: v_add_u32_e32 v0, vcc, 16, v0
-; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
+; GFX8-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: s_setpc_b64 s[30:31]
;
diff --git a/llvm/test/DebugInfo/Sparc/pointer-add-unknown-offset-debug-info.ll b/llvm/test/DebugInfo/Sparc/pointer-add-unknown-offset-debug-info.ll
index 63d7391bd7d4f5..5fe5a90a973174 100644
--- a/llvm/test/DebugInfo/Sparc/pointer-add-unknown-offset-debug-info.ll
+++ b/llvm/test/DebugInfo/Sparc/pointer-add-unknown-offset-debug-info.ll
@@ -12,7 +12,7 @@ define void @pointer_add_unknown_offset(ptr %base, i32 %offset) !dbg !7 {
; CHECK-NEXT: [[COPY:%[0-9]+]]:i64regs = COPY $i1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:i64regs = COPY $i0
; CHECK-NEXT: [[SRAri:%[0-9]+]]:i64regs = SRAri [[COPY]], 0
- ; CHECK-NEXT: [[SLLXri:%[0-9]+]]:i64regs = SLLXri killed [[SRAri]], 2
+ ; CHECK-NEXT: [[SLLXri:%[0-9]+]]:i64regs = nsw SLLXri killed [[SRAri]], 2
; CHECK-NEXT: DBG_VALUE_LIST !13, !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus, DW_OP_stack_value), [[COPY1]], [[SLLXri]], debug-location !16
; CHECK-NEXT: DBG_VALUE_LIST !14, !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus, DW_OP_plus_uconst, 3, DW_OP_stack_value), [[COPY1]], [[SLLXri]], debug-location !16
; CHECK-NEXT: DBG_VALUE_LIST !15, !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 2, DW_OP_plus, DW_OP_LLVM_arg, 1, DW_OP_LLVM_arg, 3, DW_OP_plus, DW_OP_plus, DW_OP_stack_value), [[COPY1]], [[COPY1]], [[SLLXri]], [[SLLXri]], debug-location !16
|
// address) does not wrap the pointer index type in a signed sense (add | ||
// nsw). | ||
if (NW.hasNoUnsignedSignedWrap()) | ||
AddFlags.setNoSignedWrap(true); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This one looks incorrect. The add below is adding to the pointer (N), not just accumulating offsets, so you can't use nsw here. (The nuw below is correct.)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Loses the test benefit though
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Adjust tests to have explicit nuw flag rather than only inbounds?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's already tested here:
%gep0 = getelementptr inbounds nuw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx |
But it's still not enough. computeKnownBits still can't prove the sign bit is zero during selection with all flags on both GEPs:
define void @gep_all_flags(i32 %idx, i32 %val) {
%alloca = alloca [32 x i32], align 4, addrspace(5)
%gep0 = getelementptr inbounds nuw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx
%gep1 = getelementptr inbounds nuw i8, ptr addrspace(5) %gep0, i32 16
store volatile i32 %val, ptr addrspace(5) %gep1, align 4
ret void
}
Optimized legalized selection DAG: %bb.0 'gep_all_flags:'
SelectionDAG has 14 nodes:
t0: ch,glue = EntryToken
t4: i32,ch = CopyFromReg # D:1 t0, Register:i32 %8
t2: i32,ch = CopyFromReg # D:1 t0, Register:i32 %7
t7: i32 = shl nuw nsw # D:1 t2, Constant:i32<2>
t8: i32 = add nuw # D:1 FrameIndex:i32<0>, t7
t10: i32 = add nuw # D:1 t8, Constant:i32<16>
t13: ch = store<(volatile store (s32) into %ir.gep1, addrspace 5)> # D:1 t0, t4, t10, undef:i32
t14: ch = RET_GLUE t13
// The multiplication of an index by the type size does not wrap the | ||
// pointer index type in an unsigned sense (mul nuw). | ||
if (NW.hasNoUnsignedWrap()) | ||
ScaleFlags.setNoUnsignedWrap(true); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ScaleFlags.setNoUnsignedWrap(NW.hasNoUnsignedWrap())
;
Likewise above.
95b7c37
to
f3c253f
Compare
a20f4c0
to
449ff2d
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
f3c253f
to
c17153f
Compare
This allows selecting the addressing mode for stack instructions in cases where we need to prove the sign bit is zero.
This loses the benefit in the test
449ff2d
to
418bd54
Compare
@@ -12,7 +12,7 @@ define void @pointer_add_unknown_offset(ptr %base, i32 %offset) !dbg !7 { | |||
; CHECK-NEXT: [[COPY:%[0-9]+]]:i64regs = COPY $i1 | |||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:i64regs = COPY $i0 | |||
; CHECK-NEXT: [[SRAri:%[0-9]+]]:i64regs = SRAri [[COPY]], 0 | |||
; CHECK-NEXT: [[SLLXri:%[0-9]+]]:i64regs = SLLXri killed [[SRAri]], 2 | |||
; CHECK-NEXT: [[SLLXri:%[0-9]+]]:i64regs = nsw SLLXri killed [[SRAri]], 2 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@arsenm - is the change in this PR supposed to be specific to this target?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No. It just introduces the flags, which may or may not survive to the end of the target's selection
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could you please come up with a regression test for another Target, say for example AArch64?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This test change is incidental, this test was not the point. The real tests are supposed to be from 671cbcf (but in the final version of the patch, the codegen changes no longer manifest)
This allows selecting the addressing mode for stack instructions
in cases where we need to prove the sign bit is zero.