DAG: Preserve more flags when expanding gep #110815

arsenm · 2024-10-02T10:01:32Z

This allows selecting the addressing mode for stack instructions
in cases where we need to prove the sign bit is zero.

arsenm · 2024-10-02T10:01:41Z

DAG: Preserve more flags when expanding gep #110815 👈
AMDGPU: Add baseline tests for gep flag handling #110814
main

This stack of pull requests is managed by Graphite. Learn more about stacking.

Join @arsenm and the rest of your teammates on Graphite

llvmbot · 2024-10-02T10:02:59Z

@llvm/pr-subscribers-debuginfo
@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-llvm-selectiondag

Author: Matt Arsenault (arsenm)

Changes

This allows selecting the addressing mode for stack instructions
in cases where we need to prove the sign bit is zero.

Full diff: https://github.com/llvm/llvm-project/pull/110815.diff

3 Files Affected:

(modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+33-8)
(modified) llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll (+2-4)
(modified) llvm/test/DebugInfo/Sparc/pointer-add-unknown-offset-debug-info.ll (+1-1)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 25213f587116d5..6838c0b530a363 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4386,6 +4386,17 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
       // it.
       IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
 
+      SDNodeFlags ScaleFlags;
+      // The multiplication of an index by the type size does not wrap the
+      // pointer index type in a signed sense (mul nsw).
+      if (NW.hasNoUnsignedSignedWrap())
+        ScaleFlags.setNoSignedWrap(true);
+
+      // The multiplication of an index by the type size does not wrap the
+      // pointer index type in an unsigned sense (mul nuw).
+      if (NW.hasNoUnsignedWrap())
+        ScaleFlags.setNoUnsignedWrap(true);
+
       if (ElementScalable) {
         EVT VScaleTy = N.getValueType().getScalarType();
         SDValue VScale = DAG.getNode(
@@ -4393,27 +4404,41 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
             DAG.getConstant(ElementMul.getZExtValue(), dl, VScaleTy));
         if (IsVectorGEP)
           VScale = DAG.getSplatVector(N.getValueType(), dl, VScale);
-        IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale);
+        IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale,
+                           ScaleFlags);
       } else {
         // If this is a multiply by a power of two, turn it into a shl
         // immediately.  This is a very common case.
         if (ElementMul != 1) {
           if (ElementMul.isPowerOf2()) {
             unsigned Amt = ElementMul.logBase2();
-            IdxN = DAG.getNode(ISD::SHL, dl,
-                               N.getValueType(), IdxN,
-                               DAG.getConstant(Amt, dl, IdxN.getValueType()));
+            IdxN = DAG.getNode(ISD::SHL, dl, N.getValueType(), IdxN,
+                               DAG.getConstant(Amt, dl, IdxN.getValueType()),
+                               ScaleFlags);
           } else {
             SDValue Scale = DAG.getConstant(ElementMul.getZExtValue(), dl,
                                             IdxN.getValueType());
-            IdxN = DAG.getNode(ISD::MUL, dl,
-                               N.getValueType(), IdxN, Scale);
+            IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, Scale,
+                               ScaleFlags);
           }
         }
       }
 
-      N = DAG.getNode(ISD::ADD, dl,
-                      N.getValueType(), N, IdxN);
+      SDNodeFlags AddFlags;
+
+      // The successive addition of each offset (without adding the base
+      // address) does not wrap the pointer index type in a signed sense (add
+      // nsw).
+      if (NW.hasNoUnsignedSignedWrap())
+        AddFlags.setNoSignedWrap(true);
+
+      // The successive addition of each offset (without adding the base
+      // address) does not wrap the pointer index type in an unsigned sense (add
+      // nuw).
+      if (NW.hasNoUnsignedWrap())
+        AddFlags.setNoUnsignedWrap(true);
+
+      N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, IdxN, AddFlags);
     }
   }
 
diff --git a/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll b/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll
index 782894976c711c..a39afa6f609c7e 100644
--- a/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll
+++ b/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll
@@ -118,8 +118,7 @@ define void @gep_inbounds_nuw_alloca(i32 %idx, i32 %val) #0 {
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e64 v2, 6, s32
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 16, v0
-; GFX8-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
+; GFX8-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -145,8 +144,7 @@ define void @gep_nusw_nuw_alloca(i32 %idx, i32 %val) #0 {
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX8-NEXT:    v_lshrrev_b32_e64 v2, 6, s32
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v2, v0
-; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 16, v0
-; GFX8-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen
+; GFX8-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
diff --git a/llvm/test/DebugInfo/Sparc/pointer-add-unknown-offset-debug-info.ll b/llvm/test/DebugInfo/Sparc/pointer-add-unknown-offset-debug-info.ll
index 63d7391bd7d4f5..5fe5a90a973174 100644
--- a/llvm/test/DebugInfo/Sparc/pointer-add-unknown-offset-debug-info.ll
+++ b/llvm/test/DebugInfo/Sparc/pointer-add-unknown-offset-debug-info.ll
@@ -12,7 +12,7 @@ define void @pointer_add_unknown_offset(ptr %base, i32 %offset) !dbg !7 {
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:i64regs = COPY $i1
   ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:i64regs = COPY $i0
   ; CHECK-NEXT:   [[SRAri:%[0-9]+]]:i64regs = SRAri [[COPY]], 0
-  ; CHECK-NEXT:   [[SLLXri:%[0-9]+]]:i64regs = SLLXri killed [[SRAri]], 2
+  ; CHECK-NEXT:   [[SLLXri:%[0-9]+]]:i64regs = nsw SLLXri killed [[SRAri]], 2
   ; CHECK-NEXT:   DBG_VALUE_LIST !13, !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus, DW_OP_stack_value), [[COPY1]], [[SLLXri]], debug-location !16
   ; CHECK-NEXT:   DBG_VALUE_LIST !14, !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus, DW_OP_plus_uconst, 3, DW_OP_stack_value), [[COPY1]], [[SLLXri]], debug-location !16
   ; CHECK-NEXT:   DBG_VALUE_LIST !15, !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 2, DW_OP_plus, DW_OP_LLVM_arg, 1, DW_OP_LLVM_arg, 3, DW_OP_plus, DW_OP_plus, DW_OP_stack_value), [[COPY1]], [[COPY1]], [[SLLXri]], [[SLLXri]], debug-location !16

nikic · 2024-10-02T10:11:07Z

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

+      // address) does not wrap the pointer index type in a signed sense (add
+      // nsw).
+      if (NW.hasNoUnsignedSignedWrap())
+        AddFlags.setNoSignedWrap(true);


This one looks incorrect. The add below is adding to the pointer (N), not just accumulating offsets, so you can't use nsw here. (The nuw below is correct.)

Loses the test benefit though

Adjust tests to have explicit nuw flag rather than only inbounds?

That's already tested here:

llvm-project/llvm/test/CodeGen/AMDGPU/gep-flags-stack-offsets.ll

Line 134 in 56474da

%gep0 = getelementptr inbounds nuw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx

But it's still not enough. computeKnownBits still can't prove the sign bit is zero during selection with all flags on both GEPs:

define void @gep_all_flags(i32 %idx, i32 %val) { %alloca = alloca [32 x i32], align 4, addrspace(5) %gep0 = getelementptr inbounds nuw [32 x i32], ptr addrspace(5) %alloca, i32 0, i32 %idx %gep1 = getelementptr inbounds nuw i8, ptr addrspace(5) %gep0, i32 16 store volatile i32 %val, ptr addrspace(5) %gep1, align 4 ret void }

Optimized legalized selection DAG: %bb.0 'gep_all_flags:' SelectionDAG has 14 nodes: t0: ch,glue = EntryToken t4: i32,ch = CopyFromReg # D:1 t0, Register:i32 %8 t2: i32,ch = CopyFromReg # D:1 t0, Register:i32 %7 t7: i32 = shl nuw nsw # D:1 t2, Constant:i32<2> t8: i32 = add nuw # D:1 FrameIndex:i32<0>, t7 t10: i32 = add nuw # D:1 t8, Constant:i32<16> t13: ch = store<(volatile store (s32) into %ir.gep1, addrspace 5)> # D:1 t0, t4, t10, undef:i32 t14: ch = RET_GLUE t13

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

goldsteinn · 2024-10-02T14:26:54Z

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

+      // The multiplication of an index by the type size does not wrap the
+      // pointer index type in an unsigned sense (mul nuw).
+      if (NW.hasNoUnsignedWrap())
+        ScaleFlags.setNoUnsignedWrap(true);


ScaleFlags.setNoUnsignedWrap(NW.hasNoUnsignedWrap());

Likewise above.

nikic

LGTM

arsenm · 2024-10-09T09:42:49Z

Merge activity

Oct 9, 5:42 AM EDT: @arsenm started a stack merge that includes this pull request via Graphite.
Oct 9, 5:49 AM EDT: Graphite rebased this pull request as part of a merge.
Oct 9, 5:51 AM EDT: @arsenm merged this pull request with Graphite.

This allows selecting the addressing mode for stack instructions in cases where we need to prove the sign bit is zero.

This loses the benefit in the test

fpetrogalli · 2024-10-14T05:30:45Z

llvm/test/DebugInfo/Sparc/pointer-add-unknown-offset-debug-info.ll

@@ -12,7 +12,7 @@ define void @pointer_add_unknown_offset(ptr %base, i32 %offset) !dbg !7 {
  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:i64regs = COPY $i1
  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:i64regs = COPY $i0
  ; CHECK-NEXT:   [[SRAri:%[0-9]+]]:i64regs = SRAri [[COPY]], 0
-  ; CHECK-NEXT:   [[SLLXri:%[0-9]+]]:i64regs = SLLXri killed [[SRAri]], 2
+  ; CHECK-NEXT:   [[SLLXri:%[0-9]+]]:i64regs = nsw SLLXri killed [[SRAri]], 2


@arsenm - is the change in this PR supposed to be specific to this target?

No. It just introduces the flags, which may or may not survive to the end of the target's selection

Could you please come up with a regression test for another Target, say for example AArch64?

This test change is incidental, this test was not the point. The real tests are supposed to be from 671cbcf (but in the final version of the patch, the codegen changes no longer manifest)

arsenm mentioned this pull request Oct 2, 2024

AMDGPU: Add baseline tests for gep flag handling #110814

Merged

arsenm added backend:AMDGPU llvm:SelectionDAG SelectionDAGISel as well labels Oct 2, 2024 — with Graphite App

arsenm requested review from aeubanks, efriedma-quic, goldsteinn, jayfoad, nikic and fhahn October 2, 2024 10:02

arsenm marked this pull request as ready for review October 2, 2024 10:04

llvmbot added the debuginfo label Oct 2, 2024

nikic reviewed Oct 2, 2024

View reviewed changes

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Outdated Show resolved Hide resolved

goldsteinn reviewed Oct 2, 2024

View reviewed changes

arsenm force-pushed the users/arsenm/amdgpu-add-baseline-gep-flags-tests branch from 95b7c37 to f3c253f Compare October 9, 2024 05:52

arsenm force-pushed the users/arsenm/dag-builder-set-more-flags-from-gep branch from a20f4c0 to 449ff2d Compare October 9, 2024 05:53

nikic approved these changes Oct 9, 2024

View reviewed changes

arsenm force-pushed the users/arsenm/amdgpu-add-baseline-gep-flags-tests branch from f3c253f to c17153f Compare October 9, 2024 09:44

Base automatically changed from users/arsenm/amdgpu-add-baseline-gep-flags-tests to main October 9, 2024 09:48

arsenm added 3 commits October 9, 2024 09:48

DAG: Preserve more flags when expanding gep

495d772

This allows selecting the addressing mode for stack instructions in cases where we need to prove the sign bit is zero.

Address comments

cd141bb

This loses the benefit in the test

Address comments

418bd54

arsenm force-pushed the users/arsenm/dag-builder-set-more-flags-from-gep branch from 449ff2d to 418bd54 Compare October 9, 2024 09:48

arsenm merged commit ced15cd into main Oct 9, 2024
5 of 7 checks passed

arsenm deleted the users/arsenm/dag-builder-set-more-flags-from-gep branch October 9, 2024 09:51

fpetrogalli reviewed Oct 14, 2024

View reviewed changes

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

DAG: Preserve more flags when expanding gep #110815

DAG: Preserve more flags when expanding gep #110815

Uh oh!

arsenm commented Oct 2, 2024

Uh oh!

arsenm commented Oct 2, 2024 •

edited

Loading

Uh oh!

llvmbot commented Oct 2, 2024 •

edited

Loading

Uh oh!

nikic Oct 2, 2024

Uh oh!

arsenm Oct 2, 2024

Uh oh!

nikic Oct 2, 2024

Uh oh!

arsenm Oct 2, 2024

Uh oh!

Uh oh!

goldsteinn Oct 2, 2024

Uh oh!

nikic left a comment

Uh oh!

arsenm commented Oct 9, 2024 •

edited

Loading

Uh oh!

Uh oh!

fpetrogalli Oct 14, 2024

Uh oh!

arsenm Oct 14, 2024

Uh oh!

fpetrogalli Oct 14, 2024 •

edited

Loading

Uh oh!

arsenm Oct 15, 2024

Uh oh!

Uh oh!

DAG: Preserve more flags when expanding gep #110815

DAG: Preserve more flags when expanding gep #110815

Uh oh!

Conversation

arsenm commented Oct 2, 2024

Uh oh!

arsenm commented Oct 2, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Oct 2, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

nikic left a comment

Choose a reason for hiding this comment

Uh oh!

arsenm commented Oct 9, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Merge activity

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

fpetrogalli Oct 14, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

arsenm commented Oct 2, 2024 •

edited

Loading

llvmbot commented Oct 2, 2024 •

edited

Loading

arsenm commented Oct 9, 2024 •

edited

Loading

fpetrogalli Oct 14, 2024 •

edited

Loading