-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[GlobalISel] Combine redundant sext_inreg #131624
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-amdgpu Author: Pierre van Houtryve (Pierre-vh) ChangesFull diff: https://github.com/llvm/llvm-project/pull/131624.diff 6 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 9b78342c8fc39..5778377d125a8 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -994,6 +994,9 @@ class CombinerHelper {
// overflow sub
bool matchSuboCarryOut(const MachineInstr &MI, BuildFnTy &MatchInfo) const;
+ // (sext_inreg (sext_inreg x, K0), K1)
+ void applyRedundantSextInReg(MachineInstr &Root, MachineInstr &Other) const;
+
private:
/// Checks for legality of an indexed variant of \p LdSt.
bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 660b03080f92e..6a0ff683a4647 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1849,6 +1849,12 @@ def anyext_of_anyext : ext_of_ext_opcodes<G_ANYEXT, G_ANYEXT>;
def anyext_of_zext : ext_of_ext_opcodes<G_ANYEXT, G_ZEXT>;
def anyext_of_sext : ext_of_ext_opcodes<G_ANYEXT, G_SEXT>;
+def sext_inreg_of_sext_inreg : GICombineRule<
+ (defs root:$dst),
+ (match (G_SEXT_INREG $x, $src, $a):$other,
+ (G_SEXT_INREG $dst, $x, $b):$root),
+ (apply [{ Helper.applyRedundantSextInReg(*${root}, *${other}); }])>;
+
// Push cast through build vector.
class buildvector_of_opcode<Instruction castOpcode> : GICombineRule <
(defs root:$root, build_fn_matchinfo:$matchinfo),
@@ -1896,7 +1902,8 @@ def cast_of_cast_combines: GICombineGroup<[
sext_of_anyext,
anyext_of_anyext,
anyext_of_zext,
- anyext_of_sext
+ anyext_of_sext,
+ sext_inreg_of_sext_inreg,
]>;
def cast_combines: GICombineGroup<[
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
index 182484754d091..ffc2384fc14fd 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
@@ -372,3 +372,30 @@ bool CombinerHelper::matchCastOfInteger(const MachineInstr &CastMI,
return false;
}
}
+
+void CombinerHelper::applyRedundantSextInReg(MachineInstr &Root,
+ MachineInstr &Other) const {
+ assert(Root.getOpcode() == TargetOpcode::G_SEXT_INREG &&
+ Other.getOpcode() == TargetOpcode::G_SEXT_INREG);
+
+ unsigned RootWidth = Root.getOperand(2).getImm();
+ unsigned OtherWidth = Other.getOperand(2).getImm();
+
+ Register Dst = Root.getOperand(0).getReg();
+ Register OtherDst = Other.getOperand(0).getReg();
+ Register Src = Other.getOperand(1).getReg();
+
+ if (RootWidth >= OtherWidth) {
+ // The root sext_inreg is entirely redundant because the other one
+ // is narrower.
+ Observer.changingAllUsesOfReg(MRI, Dst);
+ MRI.replaceRegWith(Dst, OtherDst);
+ Observer.finishedChangingAllUsesOfReg();
+ } else {
+ // RootWidth < OtherWidth, rewrite this G_SEXT_INREG with the source of the
+ // other G_SEXT_INREG.
+ Builder.buildSExtInReg(Dst, Src, RootWidth);
+ }
+
+ Root.eraseFromParent();
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-sext-inreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-sext-inreg.mir
new file mode 100644
index 0000000000000..566ee8e6c338d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-sext-inreg.mir
@@ -0,0 +1,164 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: inreg8_inreg16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: inreg8_inreg16
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 8
+ ; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
+ %copy:_(s32) = COPY $vgpr0
+ %inreg:_(s32) = G_SEXT_INREG %copy, 8
+ %inreg1:_(s32) = G_SEXT_INREG %inreg, 16
+ $vgpr0 = COPY %inreg1
+...
+
+---
+name: inreg16_inreg16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: inreg16_inreg16
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 16
+ ; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
+ %copy:_(s32) = COPY $vgpr0
+ %inreg:_(s32) = G_SEXT_INREG %copy, 16
+ %inreg1:_(s32) = G_SEXT_INREG %inreg, 16
+ $vgpr0 = COPY %inreg1
+...
+
+---
+name: inreg16_inreg8
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: inreg16_inreg8
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %inreg1:_(s32) = G_SEXT_INREG %copy, 8
+ ; CHECK-NEXT: $vgpr0 = COPY %inreg1(s32)
+ %copy:_(s32) = COPY $vgpr0
+ %inreg:_(s32) = G_SEXT_INREG %copy, 16
+ %inreg1:_(s32) = G_SEXT_INREG %inreg, 8
+ $vgpr0 = COPY %inreg1
+...
+
+---
+name: inreg16_inreg32_64bit
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ ; CHECK-LABEL: name: inreg16_inreg32_64bit
+ ; CHECK: liveins: $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(s64) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: %inreg:_(s64) = G_SEXT_INREG %copy, 16
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %inreg(s64)
+ %copy:_(s64) = COPY $vgpr0_vgpr1
+ %inreg:_(s64) = G_SEXT_INREG %copy, 16
+ %inreg1:_(s64) = G_SEXT_INREG %inreg, 32
+ $vgpr0_vgpr1 = COPY %inreg1
+...
+
+---
+name: inreg32_inreg32_64bit
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ ; CHECK-LABEL: name: inreg32_inreg32_64bit
+ ; CHECK: liveins: $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(s64) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: %inreg:_(s64) = G_SEXT_INREG %copy, 32
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %inreg(s64)
+ %copy:_(s64) = COPY $vgpr0_vgpr1
+ %inreg:_(s64) = G_SEXT_INREG %copy, 32
+ %inreg1:_(s64) = G_SEXT_INREG %inreg, 32
+ $vgpr0_vgpr1 = COPY %inreg1
+...
+
+---
+name: inreg32_inreg16_64bit
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ ; CHECK-LABEL: name: inreg32_inreg16_64bit
+ ; CHECK: liveins: $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(s64) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: %inreg1:_(s64) = G_SEXT_INREG %copy, 16
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %inreg1(s64)
+ %copy:_(s64) = COPY $vgpr0_vgpr1
+ %inreg:_(s64) = G_SEXT_INREG %copy, 32
+ %inreg1:_(s64) = G_SEXT_INREG %inreg, 16
+ $vgpr0_vgpr1 = COPY %inreg1
+...
+
+---
+name: vector_inreg8_inreg16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK-LABEL: name: vector_inreg8_inreg16
+ ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK-NEXT: %inreg:_(<4 x s32>) = G_SEXT_INREG %copy, 8
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg(<4 x s32>)
+ %copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ %inreg:_(<4 x s32>) = G_SEXT_INREG %copy, 8
+ %inreg1:_(<4 x s32>) = G_SEXT_INREG %inreg, 16
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg1
+...
+
+---
+name: vector_inreg16_inreg16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK-LABEL: name: vector_inreg16_inreg16
+ ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK-NEXT: %inreg:_(<4 x s32>) = G_SEXT_INREG %copy, 16
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg(<4 x s32>)
+ %copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ %inreg:_(<4 x s32>) = G_SEXT_INREG %copy, 16
+ %inreg1:_(<4 x s32>) = G_SEXT_INREG %inreg, 16
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg1
+...
+
+---
+name: vector_inreg16_inreg8
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK-LABEL: name: vector_inreg16_inreg8
+ ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK-NEXT: %inreg1:_(<4 x s32>) = G_SEXT_INREG %copy, 8
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg1(<4 x s32>)
+ %copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ %inreg:_(<4 x s32>) = G_SEXT_INREG %copy, 16
+ %inreg1:_(<4 x s32>) = G_SEXT_INREG %inreg, 8
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg1
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-trunc-sextinreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-trunc-sextinreg.mir
new file mode 100644
index 0000000000000..c60c137b17f84
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-trunc-sextinreg.mir
@@ -0,0 +1,87 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+# Check (sext (trunc (sext_inreg x))) can be folded, as it's a pattern that can arise when
+# CGP widening of uniform i16 ops is disabled.
+# Two separate combines make it happen (sext_trunc and sext_inreg_of_sext_inreg).
+
+---
+name: trunc_s16_inreg_8
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: trunc_s16_inreg_8
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 8
+ ; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
+ %copy:_(s32) = COPY $vgpr0
+ %inreg:_(s32) = G_SEXT_INREG %copy, 8
+ %trunc:_(s16) = G_TRUNC %inreg
+ %sext:_(s32) = G_SEXT %trunc
+ $vgpr0 = COPY %sext
+...
+
+---
+name: trunc_s16_inreg_16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: trunc_s16_inreg_16
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 16
+ ; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
+ %copy:_(s32) = COPY $vgpr0
+ %inreg:_(s32) = G_SEXT_INREG %copy, 16
+ %trunc:_(s16) = G_TRUNC %inreg
+ %sext:_(s32) = G_SEXT %trunc
+ $vgpr0 = COPY %sext
+...
+
+---
+name: trunc_s8_inreg_16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: trunc_s8_inreg_16
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %sext:_(s32) = G_SEXT_INREG %copy, 8
+ ; CHECK-NEXT: $vgpr0 = COPY %sext(s32)
+ %copy:_(s32) = COPY $vgpr0
+ %inreg:_(s32) = G_SEXT_INREG %copy, 16
+ %trunc:_(s8) = G_TRUNC %inreg
+ %sext:_(s32) = G_SEXT %trunc
+ $vgpr0 = COPY %sext
+...
+
+# TODO?: We could handle this by inserting a trunc, but I'm not sure how useful that'd be.
+---
+name: mismatching_types
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: mismatching_types
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 8
+ ; CHECK-NEXT: %trunc:_(s8) = G_TRUNC %inreg(s32)
+ ; CHECK-NEXT: %sext:_(s16) = G_SEXT %trunc(s8)
+ ; CHECK-NEXT: %anyext:_(s32) = G_ANYEXT %sext(s16)
+ ; CHECK-NEXT: $vgpr0 = COPY %anyext(s32)
+ %copy:_(s32) = COPY $vgpr0
+ %inreg:_(s32) = G_SEXT_INREG %copy, 8
+ %trunc:_(s8) = G_TRUNC %inreg
+ %sext:_(s16) = G_SEXT %trunc
+ %anyext:_(s32) = G_ANYEXT %sext
+ $vgpr0 = COPY %anyext
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll
index 41e915a4c1011..18a222e56fd0c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll
@@ -223,8 +223,6 @@ define amdgpu_cs <2 x i8> @abs_sgpr_v2i8(<2 x i8> inreg %arg) {
; GFX10: ; %bb.0:
; GFX10-NEXT: s_sext_i32_i8 s0, s0
; GFX10-NEXT: s_sext_i32_i8 s1, s1
-; GFX10-NEXT: s_sext_i32_i16 s0, s0
-; GFX10-NEXT: s_sext_i32_i16 s1, s1
; GFX10-NEXT: s_abs_i32 s0, s0
; GFX10-NEXT: s_abs_i32 s1, s1
; GFX10-NEXT: ; return to shader part epilog
@@ -308,9 +306,6 @@ define amdgpu_cs <3 x i8> @abs_sgpr_v3i8(<3 x i8> inreg %arg) {
; GFX10-NEXT: s_sext_i32_i8 s0, s0
; GFX10-NEXT: s_sext_i32_i8 s1, s1
; GFX10-NEXT: s_sext_i32_i8 s2, s2
-; GFX10-NEXT: s_sext_i32_i16 s0, s0
-; GFX10-NEXT: s_sext_i32_i16 s1, s1
-; GFX10-NEXT: s_sext_i32_i16 s2, s2
; GFX10-NEXT: s_abs_i32 s0, s0
; GFX10-NEXT: s_abs_i32 s1, s1
; GFX10-NEXT: s_abs_i32 s2, s2
|
e36f665
to
c949f2c
Compare
3f2cbbd
to
4feac2f
Compare
4feac2f
to
f95a7c0
Compare
725c9a6
to
c34a0ea
Compare
f95a7c0
to
e717745
Compare
c34a0ea
to
59a0610
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Do we need the SelectionDAG version as well?
@@ -0,0 +1,164 @@ | |||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py | |||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Drop the -verify-machineinstrs if they aren't significant here.
@@ -0,0 +1,87 @@ | |||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py | |||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ditto.
Merge activity
|
ca9b7c3
to
2f2f6b7
Compare
5a8230e
to
afc49f1
Compare
No description provided.