Skip to content

[GlobalISel] Combine redundant sext_inreg #131624

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -995,6 +995,10 @@ class CombinerHelper {
// overflow sub
bool matchSuboCarryOut(const MachineInstr &MI, BuildFnTy &MatchInfo) const;

// (sext_inreg (sext_inreg x, K0), K1)
bool matchRedundantSextInReg(MachineInstr &Root, MachineInstr &Other,
BuildFnTy &MatchInfo) const;

private:
/// Checks for legality of an indexed variant of \p LdSt.
bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
Expand Down
10 changes: 9 additions & 1 deletion llvm/include/llvm/Target/GlobalISel/Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -1867,6 +1867,13 @@ def anyext_of_anyext : ext_of_ext_opcodes<G_ANYEXT, G_ANYEXT>;
def anyext_of_zext : ext_of_ext_opcodes<G_ANYEXT, G_ZEXT>;
def anyext_of_sext : ext_of_ext_opcodes<G_ANYEXT, G_SEXT>;

def sext_inreg_of_sext_inreg : GICombineRule<
(defs root:$dst, build_fn_matchinfo:$matchinfo),
(match (G_SEXT_INREG $x, $src, $a):$other,
(G_SEXT_INREG $dst, $x, $b):$root,
[{ return Helper.matchRedundantSextInReg(*${root}, *${other}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;

// Push cast through build vector.
class buildvector_of_opcode<Instruction castOpcode> : GICombineRule <
(defs root:$root, build_fn_matchinfo:$matchinfo),
Expand Down Expand Up @@ -1914,7 +1921,8 @@ def cast_of_cast_combines: GICombineGroup<[
sext_of_anyext,
anyext_of_anyext,
anyext_of_zext,
anyext_of_sext
anyext_of_sext,
sext_inreg_of_sext_inreg,
]>;

def cast_combines: GICombineGroup<[
Expand Down
35 changes: 35 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -378,3 +378,38 @@ bool CombinerHelper::matchCastOfInteger(const MachineInstr &CastMI,
return false;
}
}

bool CombinerHelper::matchRedundantSextInReg(MachineInstr &Root,
MachineInstr &Other,
BuildFnTy &MatchInfo) const {
assert(Root.getOpcode() == TargetOpcode::G_SEXT_INREG &&
Other.getOpcode() == TargetOpcode::G_SEXT_INREG);

unsigned RootWidth = Root.getOperand(2).getImm();
unsigned OtherWidth = Other.getOperand(2).getImm();

Register Dst = Root.getOperand(0).getReg();
Register OtherDst = Other.getOperand(0).getReg();
Register Src = Other.getOperand(1).getReg();

if (RootWidth >= OtherWidth) {
// The root sext_inreg is entirely redundant because the other one
// is narrower.
if (!canReplaceReg(Dst, OtherDst, MRI))
return false;

MatchInfo = [=](MachineIRBuilder &B) {
Observer.changingAllUsesOfReg(MRI, Dst);
MRI.replaceRegWith(Dst, OtherDst);
Observer.finishedChangingAllUsesOfReg();
};
} else {
// RootWidth < OtherWidth, rewrite this G_SEXT_INREG with the source of the
// other G_SEXT_INREG.
MatchInfo = [=](MachineIRBuilder &B) {
B.buildSExtInReg(Dst, Src, RootWidth);
};
}

return true;
}
164 changes: 164 additions & 0 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-sext-inreg.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -run-pass=amdgpu-regbank-combiner %s -o - | FileCheck %s

---
name: inreg8_inreg16
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; CHECK-LABEL: name: inreg8_inreg16
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 8
; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
%copy:_(s32) = COPY $vgpr0
%inreg:_(s32) = G_SEXT_INREG %copy, 8
%inreg1:_(s32) = G_SEXT_INREG %inreg, 16
$vgpr0 = COPY %inreg1
...

---
name: inreg16_inreg16
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; CHECK-LABEL: name: inreg16_inreg16
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 16
; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
%copy:_(s32) = COPY $vgpr0
%inreg:_(s32) = G_SEXT_INREG %copy, 16
%inreg1:_(s32) = G_SEXT_INREG %inreg, 16
$vgpr0 = COPY %inreg1
...

---
name: inreg16_inreg8
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; CHECK-LABEL: name: inreg16_inreg8
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
; CHECK-NEXT: %inreg1:_(s32) = G_SEXT_INREG %copy, 8
; CHECK-NEXT: $vgpr0 = COPY %inreg1(s32)
%copy:_(s32) = COPY $vgpr0
%inreg:_(s32) = G_SEXT_INREG %copy, 16
%inreg1:_(s32) = G_SEXT_INREG %inreg, 8
$vgpr0 = COPY %inreg1
...

---
name: inreg16_inreg32_64bit
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1
; CHECK-LABEL: name: inreg16_inreg32_64bit
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %copy:_(s64) = COPY $vgpr0_vgpr1
; CHECK-NEXT: %inreg:_(s64) = G_SEXT_INREG %copy, 16
; CHECK-NEXT: $vgpr0_vgpr1 = COPY %inreg(s64)
%copy:_(s64) = COPY $vgpr0_vgpr1
%inreg:_(s64) = G_SEXT_INREG %copy, 16
%inreg1:_(s64) = G_SEXT_INREG %inreg, 32
$vgpr0_vgpr1 = COPY %inreg1
...

---
name: inreg32_inreg32_64bit
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1
; CHECK-LABEL: name: inreg32_inreg32_64bit
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %copy:_(s64) = COPY $vgpr0_vgpr1
; CHECK-NEXT: %inreg:_(s64) = G_SEXT_INREG %copy, 32
; CHECK-NEXT: $vgpr0_vgpr1 = COPY %inreg(s64)
%copy:_(s64) = COPY $vgpr0_vgpr1
%inreg:_(s64) = G_SEXT_INREG %copy, 32
%inreg1:_(s64) = G_SEXT_INREG %inreg, 32
$vgpr0_vgpr1 = COPY %inreg1
...

---
name: inreg32_inreg16_64bit
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1
; CHECK-LABEL: name: inreg32_inreg16_64bit
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %copy:_(s64) = COPY $vgpr0_vgpr1
; CHECK-NEXT: %inreg1:_(s64) = G_SEXT_INREG %copy, 16
; CHECK-NEXT: $vgpr0_vgpr1 = COPY %inreg1(s64)
%copy:_(s64) = COPY $vgpr0_vgpr1
%inreg:_(s64) = G_SEXT_INREG %copy, 32
%inreg1:_(s64) = G_SEXT_INREG %inreg, 16
$vgpr0_vgpr1 = COPY %inreg1
...

---
name: vector_inreg8_inreg16
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-LABEL: name: vector_inreg8_inreg16
; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-NEXT: %inreg:_(<4 x s32>) = G_SEXT_INREG %copy, 8
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg(<4 x s32>)
%copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%inreg:_(<4 x s32>) = G_SEXT_INREG %copy, 8
%inreg1:_(<4 x s32>) = G_SEXT_INREG %inreg, 16
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg1
...

---
name: vector_inreg16_inreg16
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-LABEL: name: vector_inreg16_inreg16
; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-NEXT: %inreg:_(<4 x s32>) = G_SEXT_INREG %copy, 16
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg(<4 x s32>)
%copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%inreg:_(<4 x s32>) = G_SEXT_INREG %copy, 16
%inreg1:_(<4 x s32>) = G_SEXT_INREG %inreg, 16
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg1
...

---
name: vector_inreg16_inreg8
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-LABEL: name: vector_inreg16_inreg8
; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-NEXT: %inreg1:_(<4 x s32>) = G_SEXT_INREG %copy, 8
; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg1(<4 x s32>)
%copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%inreg:_(<4 x s32>) = G_SEXT_INREG %copy, 16
%inreg1:_(<4 x s32>) = G_SEXT_INREG %inreg, 8
$vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg1
...
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -run-pass=amdgpu-regbank-combiner %s -o - | FileCheck %s

# Check (sext (trunc (sext_inreg x))) can be folded, as it's a pattern that can arise when
# CGP widening of uniform i16 ops is disabled.
# Two separate combines make it happen (sext_trunc and sext_inreg_of_sext_inreg).

---
name: trunc_s16_inreg_8
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; CHECK-LABEL: name: trunc_s16_inreg_8
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 8
; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
%copy:_(s32) = COPY $vgpr0
%inreg:_(s32) = G_SEXT_INREG %copy, 8
%trunc:_(s16) = G_TRUNC %inreg
%sext:_(s32) = G_SEXT %trunc
$vgpr0 = COPY %sext
...

---
name: trunc_s16_inreg_16
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; CHECK-LABEL: name: trunc_s16_inreg_16
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 16
; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
%copy:_(s32) = COPY $vgpr0
%inreg:_(s32) = G_SEXT_INREG %copy, 16
%trunc:_(s16) = G_TRUNC %inreg
%sext:_(s32) = G_SEXT %trunc
$vgpr0 = COPY %sext
...

---
name: trunc_s8_inreg_16
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; CHECK-LABEL: name: trunc_s8_inreg_16
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
; CHECK-NEXT: %sext:_(s32) = G_SEXT_INREG %copy, 8
; CHECK-NEXT: $vgpr0 = COPY %sext(s32)
%copy:_(s32) = COPY $vgpr0
%inreg:_(s32) = G_SEXT_INREG %copy, 16
%trunc:_(s8) = G_TRUNC %inreg
%sext:_(s32) = G_SEXT %trunc
$vgpr0 = COPY %sext
...

# TODO?: We could handle this by inserting a trunc, but I'm not sure how useful that'd be.
---
name: mismatching_types
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0
; CHECK-LABEL: name: mismatching_types
; CHECK: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 8
; CHECK-NEXT: %trunc:_(s8) = G_TRUNC %inreg(s32)
; CHECK-NEXT: %sext:_(s16) = G_SEXT %trunc(s8)
; CHECK-NEXT: %anyext:_(s32) = G_ANYEXT %sext(s16)
; CHECK-NEXT: $vgpr0 = COPY %anyext(s32)
%copy:_(s32) = COPY $vgpr0
%inreg:_(s32) = G_SEXT_INREG %copy, 8
%trunc:_(s8) = G_TRUNC %inreg
%sext:_(s16) = G_SEXT %trunc
%anyext:_(s32) = G_ANYEXT %sext
$vgpr0 = COPY %anyext
...
5 changes: 0 additions & 5 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,6 @@ define amdgpu_cs <2 x i8> @abs_sgpr_v2i8(<2 x i8> inreg %arg) {
; GFX10: ; %bb.0:
; GFX10-NEXT: s_sext_i32_i8 s0, s0
; GFX10-NEXT: s_sext_i32_i8 s1, s1
; GFX10-NEXT: s_sext_i32_i16 s0, s0
; GFX10-NEXT: s_sext_i32_i16 s1, s1
; GFX10-NEXT: s_abs_i32 s0, s0
; GFX10-NEXT: s_abs_i32 s1, s1
; GFX10-NEXT: ; return to shader part epilog
Expand Down Expand Up @@ -308,9 +306,6 @@ define amdgpu_cs <3 x i8> @abs_sgpr_v3i8(<3 x i8> inreg %arg) {
; GFX10-NEXT: s_sext_i32_i8 s0, s0
; GFX10-NEXT: s_sext_i32_i8 s1, s1
; GFX10-NEXT: s_sext_i32_i8 s2, s2
; GFX10-NEXT: s_sext_i32_i16 s0, s0
; GFX10-NEXT: s_sext_i32_i16 s1, s1
; GFX10-NEXT: s_sext_i32_i16 s2, s2
; GFX10-NEXT: s_abs_i32 s0, s0
; GFX10-NEXT: s_abs_i32 s1, s1
; GFX10-NEXT: s_abs_i32 s2, s2
Expand Down
Loading