Skip to content

[GlobalIsel] Push cast through select. #100539

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 25, 2024

Conversation

tschuett
Copy link

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Jul 25, 2024

@llvm/pr-subscribers-backend-aarch64

@llvm/pr-subscribers-backend-amdgpu

Author: Thorsten Schütt (tschuett)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/100539.diff

12 Files Affected:

  • (modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h (+11)
  • (modified) llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h (+16)
  • (modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+16-1)
  • (modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (+10)
  • (modified) llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp (+48)
  • (added) llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir (+131)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll (+18-20)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll (+2-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll (+4-4)
  • (modified) llvm/test/CodeGen/AMDGPU/ctlz.ll (+1-1)
  • (modified) llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll (+5-4)
  • (modified) llvm/test/CodeGen/AMDGPU/cttz.ll (+1-1)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 47365c3be3b93..05d7e882f5135 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -129,6 +129,12 @@ class CombinerHelper {
 
   const TargetLowering &getTargetLowering() const;
 
+  const MachineFunction &getMachineFunction() const;
+
+  const DataLayout &getDataLayout() const;
+
+  LLVMContext &getContext() const;
+
   /// \returns true if the combiner is running pre-legalization.
   bool isPreLegalize() const;
 
@@ -884,6 +890,9 @@ class CombinerHelper {
   bool matchTruncateOfExt(const MachineInstr &Root, const MachineInstr &ExtMI,
                           BuildFnTy &MatchInfo);
 
+  bool matchCastOfSelect(const MachineInstr &Cast, const MachineInstr &SelectMI,
+                         BuildFnTy &MatchInfo);
+
 private:
   /// Checks for legality of an indexed variant of \p LdSt.
   bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
@@ -996,6 +1005,8 @@ class CombinerHelper {
 
   // Simplify (cmp cc0 x, y) (&& or ||) (cmp cc1 x, y) -> cmp cc2 x, y.
   bool tryFoldLogicOfFCmps(GLogicalBinOp *Logic, BuildFnTy &MatchInfo);
+
+  bool isCastFree(unsigned Opcode, LLT ToTy, LLT FromTy) const;
 };
 } // namespace llvm
 
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 8b7e8c0fbf1f5..ef1171d9f1f64 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -934,6 +934,22 @@ class GExtOp : public GCastOp {
   };
 };
 
+/// Represents an integer-like extending or truncating operation.
+class GExtOrTruncOp : public GCastOp {
+public:
+  static bool classof(const MachineInstr *MI) {
+    switch (MI->getOpcode()) {
+    case TargetOpcode::G_SEXT:
+    case TargetOpcode::G_ZEXT:
+    case TargetOpcode::G_ANYEXT:
+    case TargetOpcode::G_TRUNC:
+      return true;
+    default:
+      return false;
+    }
+  };
+};
+
 } // namespace llvm
 
 #endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 2362e77b54be2..2246e20ecc1dc 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1771,10 +1771,25 @@ def truncate_of_zext : truncate_of_opcode<G_ZEXT>;
 def truncate_of_sext : truncate_of_opcode<G_SEXT>;
 def truncate_of_anyext : truncate_of_opcode<G_ANYEXT>;
 
+// Push cast through select.
+class select_of_opcode<Instruction castOpcode> : GICombineRule <
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (G_SELECT $select, $cond, $true, $false):$Select,
+         (castOpcode $root, $select):$Cast,
+         [{ return Helper.matchCastOfSelect(*${Cast}, *${Select}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${Cast}, ${matchinfo}); }])>;
+
+def select_of_zext : select_of_opcode<G_ZEXT>;
+def select_of_anyext : select_of_opcode<G_ANYEXT>;
+def select_of_truncate : select_of_opcode<G_TRUNC>;
+
 def cast_combines: GICombineGroup<[
   truncate_of_zext,
   truncate_of_sext,
-  truncate_of_anyext
+  truncate_of_anyext,
+  select_of_zext,
+  select_of_anyext,
+  select_of_truncate
 ]>;
 
 
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 8c05931812af5..d930ab2984629 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -68,6 +68,16 @@ const TargetLowering &CombinerHelper::getTargetLowering() const {
   return *Builder.getMF().getSubtarget().getTargetLowering();
 }
 
+const MachineFunction &CombinerHelper::getMachineFunction() const {
+  return Builder.getMF();
+}
+
+const DataLayout &CombinerHelper::getDataLayout() const {
+  return getMachineFunction().getDataLayout();
+}
+
+LLVMContext &CombinerHelper::getContext() const { return Builder.getContext(); }
+
 /// \returns The little endian in-memory byte position of byte \p I in a
 /// \p ByteWidth bytes wide type.
 ///
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
index d36685bf28313..59295f7a65835 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
@@ -161,3 +161,51 @@ bool CombinerHelper::matchTruncateOfExt(const MachineInstr &Root,
 
   return false;
 }
+
+bool CombinerHelper::isCastFree(unsigned Opcode, LLT ToTy, LLT FromTy) const {
+  const TargetLowering &TLI = getTargetLowering();
+  const DataLayout &DL = getDataLayout();
+  LLVMContext &Ctx = getContext();
+
+  switch (Opcode) {
+  case TargetOpcode::G_ANYEXT:
+  case TargetOpcode::G_ZEXT:
+    return TLI.isZExtFree(FromTy, ToTy, DL, Ctx);
+  case TargetOpcode::G_TRUNC:
+    return TLI.isTruncateFree(FromTy, ToTy, DL, Ctx);
+  default:
+    return false;
+  }
+}
+
+bool CombinerHelper::matchCastOfSelect(const MachineInstr &CastMI,
+                                       const MachineInstr &SelectMI,
+                                       BuildFnTy &MatchInfo) {
+  const GExtOrTruncOp *Cast = cast<GExtOrTruncOp>(&CastMI);
+  const GSelect *Select = cast<GSelect>(&SelectMI);
+
+  if (!MRI.hasOneNonDBGUse(Select->getReg(0)))
+    return false;
+
+  Register Dst = Cast->getReg(0);
+  LLT DstTy = MRI.getType(Dst);
+  LLT CondTy = MRI.getType(Select->getCondReg());
+  Register TrueReg = Select->getTrueReg();
+  Register FalseReg = Select->getFalseReg();
+  LLT SrcTy = MRI.getType(TrueReg);
+  Register Cond = Select->getCondReg();
+
+  if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SELECT, {DstTy, CondTy}}))
+    return false;
+
+  if (!isCastFree(Cast->getOpcode(), DstTy, SrcTy))
+    return false;
+
+  MatchInfo = [=](MachineIRBuilder &B) {
+    auto True = B.buildInstr(Cast->getOpcode(), {DstTy}, {TrueReg});
+    auto False = B.buildInstr(Cast->getOpcode(), {DstTy}, {FalseReg});
+    B.buildSelect(Dst, Cond, True, False);
+  };
+
+  return true;
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
new file mode 100644
index 0000000000000..0f436127ea2eb
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
@@ -0,0 +1,131 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs  %s | FileCheck %s --check-prefixes=CHECK,CHECK-PRE
+# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs  %s | FileCheck %s --check-prefixes=CHECK,CHECK-POST
+
+---
+name:            test_combine_trunc_select
+legalized: true
+body:             |
+  bb.1:
+    ; CHECK-PRE-LABEL: name: test_combine_trunc_select
+    ; CHECK-PRE: %cond:_(s32) = COPY $w0
+    ; CHECK-PRE-NEXT: %lhs:_(s64) = COPY $x0
+    ; CHECK-PRE-NEXT: %rhs:_(s64) = COPY $x0
+    ; CHECK-PRE-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s64)
+    ; CHECK-PRE-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %rhs(s64)
+    ; CHECK-PRE-NEXT: %small:_(s32) = G_SELECT %cond(s32), [[TRUNC]], [[TRUNC1]]
+    ; CHECK-PRE-NEXT: $w0 = COPY %small(s32)
+    ;
+    ; CHECK-POST-LABEL: name: test_combine_trunc_select
+    ; CHECK-POST: %cond:_(s32) = COPY $w0
+    ; CHECK-POST-NEXT: %lhs:_(s64) = COPY $x0
+    ; CHECK-POST-NEXT: %rhs:_(s64) = COPY $x0
+    ; CHECK-POST-NEXT: %res:_(s64) = G_SELECT %cond(s32), %lhs, %rhs
+    ; CHECK-POST-NEXT: %small:_(s32) = G_TRUNC %res(s64)
+    ; CHECK-POST-NEXT: $w0 = COPY %small(s32)
+    %cond:_(s32) = COPY $w0
+    %lhs:_(s64) = COPY $x0
+    %rhs:_(s64) = COPY $x0
+    %res:_(s64) = G_SELECT %cond(s32), %lhs, %rhs
+    %small:_(s32) = G_TRUNC %res(s64)
+    $w0 = COPY %small(s32)
+...
+---
+name:            test_combine_zext_select
+legalized: true
+body:             |
+  bb.1:
+    ; CHECK-PRE-LABEL: name: test_combine_zext_select
+    ; CHECK-PRE: %cond:_(s32) = COPY $w0
+    ; CHECK-PRE-NEXT: %lhs:_(s32) = COPY $w0
+    ; CHECK-PRE-NEXT: %rhs:_(s32) = COPY $w0
+    ; CHECK-PRE-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT %lhs(s32)
+    ; CHECK-PRE-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT %rhs(s32)
+    ; CHECK-PRE-NEXT: %big:_(s64) = G_SELECT %cond(s32), [[ZEXT]], [[ZEXT1]]
+    ; CHECK-PRE-NEXT: $x0 = COPY %big(s64)
+    ;
+    ; CHECK-POST-LABEL: name: test_combine_zext_select
+    ; CHECK-POST: %cond:_(s32) = COPY $w0
+    ; CHECK-POST-NEXT: %lhs:_(s32) = COPY $w0
+    ; CHECK-POST-NEXT: %rhs:_(s32) = COPY $w0
+    ; CHECK-POST-NEXT: %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
+    ; CHECK-POST-NEXT: %big:_(s64) = G_ZEXT %res(s32)
+    ; CHECK-POST-NEXT: $x0 = COPY %big(s64)
+    %cond:_(s32) = COPY $w0
+    %lhs:_(s32) = COPY $w0
+    %rhs:_(s32) = COPY $w0
+    %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
+    %big:_(s64) = G_ZEXT %res(s32)
+    $x0 = COPY %big(s64)
+...
+---
+name:            test_combine_anyzext_select
+legalized: true
+body:             |
+  bb.1:
+    ; CHECK-PRE-LABEL: name: test_combine_anyzext_select
+    ; CHECK-PRE: %cond:_(s32) = COPY $w0
+    ; CHECK-PRE-NEXT: %lhs:_(s32) = COPY $w0
+    ; CHECK-PRE-NEXT: %rhs:_(s32) = COPY $w0
+    ; CHECK-PRE-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %lhs(s32)
+    ; CHECK-PRE-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT %rhs(s32)
+    ; CHECK-PRE-NEXT: %big:_(s64) = G_SELECT %cond(s32), [[ANYEXT]], [[ANYEXT1]]
+    ; CHECK-PRE-NEXT: $x0 = COPY %big(s64)
+    ;
+    ; CHECK-POST-LABEL: name: test_combine_anyzext_select
+    ; CHECK-POST: %cond:_(s32) = COPY $w0
+    ; CHECK-POST-NEXT: %lhs:_(s32) = COPY $w0
+    ; CHECK-POST-NEXT: %rhs:_(s32) = COPY $w0
+    ; CHECK-POST-NEXT: %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
+    ; CHECK-POST-NEXT: %big:_(s64) = G_ANYEXT %res(s32)
+    ; CHECK-POST-NEXT: $x0 = COPY %big(s64)
+    %cond:_(s32) = COPY $w0
+    %lhs:_(s32) = COPY $w0
+    %rhs:_(s32) = COPY $w0
+    %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
+    %big:_(s64) = G_ANYEXT %res(s32)
+    $x0 = COPY %big(s64)
+...
+---
+name:            test_combine_anyzext_select_multi_use
+legalized: true
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_combine_anyzext_select_multi_use
+    ; CHECK: %cond:_(s32) = COPY $w0
+    ; CHECK-NEXT: %lhs:_(s32) = COPY $w0
+    ; CHECK-NEXT: %rhs:_(s32) = COPY $w0
+    ; CHECK-NEXT: %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
+    ; CHECK-NEXT: %big:_(s64) = G_ANYEXT %res(s32)
+    ; CHECK-NEXT: $x0 = COPY %big(s64)
+    ; CHECK-NEXT: $w0 = COPY %res(s32)
+    %cond:_(s32) = COPY $w0
+    %lhs:_(s32) = COPY $w0
+    %rhs:_(s32) = COPY $w0
+    %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
+    %big:_(s64) = G_ANYEXT %res(s32)
+    $x0 = COPY %big(s64)
+    $w0 = COPY %res(s32)
+...
+---
+name:            test_combine_trunc_select_vector_out_of_budget
+legalized: true
+body:             |
+  bb.1:
+    ; CHECK-LABEL: name: test_combine_trunc_select_vector_out_of_budget
+    ; CHECK: %cond:_(<2 x s32>) = COPY $x0
+    ; CHECK-NEXT: %arg1:_(s64) = COPY $x0
+    ; CHECK-NEXT: %arg2:_(s64) = COPY $x0
+    ; CHECK-NEXT: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
+    ; CHECK-NEXT: %bv2:_(<2 x s64>) = G_BUILD_VECTOR %arg2(s64), %arg1(s64)
+    ; CHECK-NEXT: %res:_(<2 x s64>) = G_SELECT %cond(<2 x s32>), %bv, %bv2
+    ; CHECK-NEXT: %small:_(<2 x s32>) = G_TRUNC %res(<2 x s64>)
+    ; CHECK-NEXT: $x0 = COPY %small(<2 x s32>)
+    %cond:_(<2 x s32>) = COPY $x0
+    %arg1:_(s64) = COPY $x0
+    %arg2:_(s64) = COPY $x0
+    %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
+    %bv2:_(<2 x s64>) = G_BUILD_VECTOR %arg2(s64), %arg1(s64)
+    %res:_(<2 x s64>) = G_SELECT %cond(<2 x s32>), %bv, %bv2
+    %small:_(<2 x s32>) = G_TRUNC %res(<2 x s64>)
+    $x0 = COPY %small(<2 x s32>)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
index ec832ed0f7f3a..63f5464371cc6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
@@ -1845,39 +1845,37 @@ define amdgpu_ps i65 @s_ashr_i65(i65 inreg %value, i65 inreg %amount) {
 ; GCN-NEXT:    s_lshr_b64 s[2:3], s[0:1], s3
 ; GCN-NEXT:    s_lshl_b64 s[8:9], s[4:5], s8
 ; GCN-NEXT:    s_or_b64 s[2:3], s[2:3], s[8:9]
-; GCN-NEXT:    s_ashr_i32 s8, s5, 31
+; GCN-NEXT:    s_ashr_i32 s7, s5, 31
 ; GCN-NEXT:    s_ashr_i64 s[4:5], s[4:5], s10
 ; GCN-NEXT:    s_cmp_lg_u32 s11, 0
 ; GCN-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
 ; GCN-NEXT:    s_cmp_lg_u32 s12, 0
-; GCN-NEXT:    s_mov_b32 s9, s8
 ; GCN-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[2:3]
 ; GCN-NEXT:    s_cmp_lg_u32 s11, 0
-; GCN-NEXT:    s_cselect_b64 s[2:3], s[6:7], s[8:9]
+; GCN-NEXT:    s_cselect_b32 s2, s6, s7
 ; GCN-NEXT:    ; return to shader part epilog
 ;
 ; GFX10PLUS-LABEL: s_ashr_i65:
 ; GFX10PLUS:       ; %bb.0:
 ; GFX10PLUS-NEXT:    s_bfe_i64 s[4:5], s[2:3], 0x10000
-; GFX10PLUS-NEXT:    s_sub_i32 s12, s3, 64
-; GFX10PLUS-NEXT:    s_sub_i32 s8, 64, s3
+; GFX10PLUS-NEXT:    s_sub_i32 s10, s3, 64
+; GFX10PLUS-NEXT:    s_sub_i32 s2, 64, s3
 ; GFX10PLUS-NEXT:    s_cmp_lt_u32 s3, 64
-; GFX10PLUS-NEXT:    s_cselect_b32 s13, 1, 0
+; GFX10PLUS-NEXT:    s_cselect_b32 s11, 1, 0
 ; GFX10PLUS-NEXT:    s_cmp_eq_u32 s3, 0
-; GFX10PLUS-NEXT:    s_cselect_b32 s14, 1, 0
-; GFX10PLUS-NEXT:    s_ashr_i64 s[6:7], s[4:5], s3
-; GFX10PLUS-NEXT:    s_lshr_b64 s[2:3], s[0:1], s3
-; GFX10PLUS-NEXT:    s_lshl_b64 s[8:9], s[4:5], s8
-; GFX10PLUS-NEXT:    s_ashr_i32 s10, s5, 31
-; GFX10PLUS-NEXT:    s_or_b64 s[2:3], s[2:3], s[8:9]
-; GFX10PLUS-NEXT:    s_ashr_i64 s[4:5], s[4:5], s12
-; GFX10PLUS-NEXT:    s_cmp_lg_u32 s13, 0
-; GFX10PLUS-NEXT:    s_mov_b32 s11, s10
-; GFX10PLUS-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
-; GFX10PLUS-NEXT:    s_cmp_lg_u32 s14, 0
-; GFX10PLUS-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[2:3]
-; GFX10PLUS-NEXT:    s_cmp_lg_u32 s13, 0
-; GFX10PLUS-NEXT:    s_cselect_b64 s[2:3], s[6:7], s[10:11]
+; GFX10PLUS-NEXT:    s_cselect_b32 s12, 1, 0
+; GFX10PLUS-NEXT:    s_lshr_b64 s[6:7], s[0:1], s3
+; GFX10PLUS-NEXT:    s_lshl_b64 s[8:9], s[4:5], s2
+; GFX10PLUS-NEXT:    s_ashr_i64 s[2:3], s[4:5], s3
+; GFX10PLUS-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
+; GFX10PLUS-NEXT:    s_ashr_i32 s3, s5, 31
+; GFX10PLUS-NEXT:    s_ashr_i64 s[4:5], s[4:5], s10
+; GFX10PLUS-NEXT:    s_cmp_lg_u32 s11, 0
+; GFX10PLUS-NEXT:    s_cselect_b64 s[4:5], s[6:7], s[4:5]
+; GFX10PLUS-NEXT:    s_cmp_lg_u32 s12, 0
+; GFX10PLUS-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
+; GFX10PLUS-NEXT:    s_cmp_lg_u32 s11, 0
+; GFX10PLUS-NEXT:    s_cselect_b32 s2, s2, s3
 ; GFX10PLUS-NEXT:    ; return to shader part epilog
   %result = ashr i65 %value, %amount
   ret i65 %result
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
index 980ba3da4bac7..5dd4fa0809131 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
@@ -1766,7 +1766,7 @@ define amdgpu_ps i65 @s_lshr_i65(i65 inreg %value, i65 inreg %amount) {
 ; GCN-NEXT:    s_cmp_lg_u32 s12, 0
 ; GCN-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[2:3]
 ; GCN-NEXT:    s_cmp_lg_u32 s11, 0
-; GCN-NEXT:    s_cselect_b64 s[2:3], s[6:7], 0
+; GCN-NEXT:    s_cselect_b32 s2, s6, 0
 ; GCN-NEXT:    ; return to shader part epilog
 ;
 ; GFX10PLUS-LABEL: s_lshr_i65:
@@ -1788,7 +1788,7 @@ define amdgpu_ps i65 @s_lshr_i65(i65 inreg %value, i65 inreg %amount) {
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s12, 0
 ; GFX10PLUS-NEXT:    s_cselect_b64 s[0:1], s[0:1], s[4:5]
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s11, 0
-; GFX10PLUS-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
+; GFX10PLUS-NEXT:    s_cselect_b32 s2, s2, 0
 ; GFX10PLUS-NEXT:    ; return to shader part epilog
   %result = lshr i65 %value, %amount
   ret i65 %result
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
index c2f911cc44587..4cf1c92539c36 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
@@ -1733,9 +1733,9 @@ define amdgpu_ps i65 @s_shl_i65(i65 inreg %value, i65 inreg %amount) {
 ; GCN-NEXT:    s_lshl_b64 s[8:9], s[0:1], s10
 ; GCN-NEXT:    s_cmp_lg_u32 s11, 0
 ; GCN-NEXT:    s_cselect_b64 s[0:1], s[4:5], 0
-; GCN-NEXT:    s_cselect_b64 s[4:5], s[6:7], s[8:9]
+; GCN-NEXT:    s_cselect_b32 s3, s6, s8
 ; GCN-NEXT:    s_cmp_lg_u32 s12, 0
-; GCN-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
+; GCN-NEXT:    s_cselect_b32 s2, s2, s3
 ; GCN-NEXT:    ; return to shader part epilog
 ;
 ; GFX10PLUS-LABEL: s_shl_i65:
@@ -1753,9 +1753,9 @@ define amdgpu_ps i65 @s_shl_i65(i65 inreg %value, i65 inreg %amount) {
 ; GFX10PLUS-NEXT:    s_lshl_b64 s[6:7], s[0:1], s10
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s11, 0
 ; GFX10PLUS-NEXT:    s_cselect_b64 s[0:1], s[8:9], 0
-; GFX10PLUS-NEXT:    s_cselect_b64 s[4:5], s[4:5], s[6:7]
+; GFX10PLUS-NEXT:    s_cselect_b32 s3, s4, s6
 ; GFX10PLUS-NEXT:    s_cmp_lg_u32 s12, 0
-; GFX10PLUS-NEXT:    s_cselect_b64 s[2:3], s[2:3], s[4:5]
+; GFX10PLUS-NEXT:    s_cselect_b32 s2, s2, s3
 ; GFX10PLUS-NEXT:    ; return to shader part epilog
   %result = shl i65 %value, %amount
   ret i65 %result
diff --git a/llvm/test/CodeGen/AMDGPU/ctlz.ll b/llvm/test/CodeGen/AMDGPU/ctlz.ll
index ba0a1e75e29b7..a0b549711f339 100644
--- a/llvm/test/CodeGen/AMDGPU/ctlz.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctlz.ll
@@ -1593,7 +1593,7 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
 ; GFX10-GISEL-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
 ; GFX10-GISEL-NEXT:    v_min_u32_e32 v1, 32, v1
 ; GFX10-GISEL-NEXT:    v_subrev_nc_u32_e32 v1, 24, v1
-; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, -1, vcc_lo
+; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0xffff, vcc_lo
 ; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10-GISEL-NEXT:    global_store_byte v1, v0, s[4:5]
 ; GFX10-GISEL-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
index a55c8cdc9b6e8..2168e7fe1dd28 100644
--- a/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
@@ -1706,11 +1706,12 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8_sel_eq_neg1(ptr addrspace(1) noa
 ; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v3, vcc
 ; GFX9-GISEL-NEXT:    global_load_ubyte v0, v[0:1], off
 ; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v2, 0xffff
 ; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v2, 24, v0
-; GFX9-GISEL-NEXT:    v_ffbh_u32_e32 v2, v2
-; GFX9-GISEL-NEXT:    v_cmp_eq_u32_sdwa s[0:1], v0, v1 src0_sel:BYTE_0 src1_sel:DWORD
-; GFX9-GISEL-NEXT:    v_cndmask_b32_e64 v0, v2, -1, s[0:1]
+; GFX9-GISEL-NEXT:    v_lshlrev_b32_e32 v3, 24, v0
+; GFX9-GISEL-NEXT:    v_ffbh_u32_e32 v3, v3
+; GFX9-GISEL-NEXT:    v_cmp_eq_u32_sdwa vcc, v0, v1 src0_sel:BYTE_0 src1_sel:DWORD
+; GFX9-GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
 ; GFX9-GISEL-NEXT:    global_store_byte v1, v0, s[4:5]
 ; GFX9-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/cttz.ll b/llvm/test/CodeGen/AMDGPU/cttz.ll
index 57fe6cd4e1e45..14e6c4bcf6d8f 100644
--- a/llvm/test/CodeGen/AMDGPU/cttz.ll
+++ b/llvm/test/CodeGen/AMDGPU/cttz.ll
@@ -1359,7 +1359,7 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
 ; GFX10-GISEL-NEXT:    v_or_b32_e32 v1, 0x100, v0
 ; GFX10-GISEL-NEXT:    v_cmp_eq_u32_sdwa s0, v0, v2 src0_sel:BYTE_0 src1_sel:DWORD
 ; GFX10-GISEL-NEXT:    v_ffbl_b32_e32 v1, v1
-; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, -1, s0
+; GFX10-GISEL-NEXT:    v_cndmask_b32_e64 v0, v1, 0xffff, s0
 ; GFX10-GISEL-NEXT:    global_store_byte v2, v0, s[4:5]
 ; GFX10-GISEL-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()

@tschuett tschuett force-pushed the gisel-push-through-select branch from 5216e21 to 184635b Compare July 25, 2024 11:05
@tschuett tschuett merged commit db8c84f into llvm:main Jul 25, 2024
7 checks passed
@tschuett tschuett deleted the gisel-push-through-select branch July 25, 2024 17:21
yuxuanchen1997 pushed a commit that referenced this pull request Jul 25, 2024
Summary: 

Test Plan: 

Reviewers: 

Subscribers: 

Tasks: 

Tags: 


Differential Revision: https://phabricator.intern.facebook.com/D60250562
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants