-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[GlobalIsel] Push cast through select. #100539
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-backend-amdgpu Author: Thorsten Schütt (tschuett) ChangesFull diff: https://github.com/llvm/llvm-project/pull/100539.diff 12 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 47365c3be3b93..05d7e882f5135 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -129,6 +129,12 @@ class CombinerHelper {
const TargetLowering &getTargetLowering() const;
+ const MachineFunction &getMachineFunction() const;
+
+ const DataLayout &getDataLayout() const;
+
+ LLVMContext &getContext() const;
+
/// \returns true if the combiner is running pre-legalization.
bool isPreLegalize() const;
@@ -884,6 +890,9 @@ class CombinerHelper {
bool matchTruncateOfExt(const MachineInstr &Root, const MachineInstr &ExtMI,
BuildFnTy &MatchInfo);
+ bool matchCastOfSelect(const MachineInstr &Cast, const MachineInstr &SelectMI,
+ BuildFnTy &MatchInfo);
+
private:
/// Checks for legality of an indexed variant of \p LdSt.
bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
@@ -996,6 +1005,8 @@ class CombinerHelper {
// Simplify (cmp cc0 x, y) (&& or ||) (cmp cc1 x, y) -> cmp cc2 x, y.
bool tryFoldLogicOfFCmps(GLogicalBinOp *Logic, BuildFnTy &MatchInfo);
+
+ bool isCastFree(unsigned Opcode, LLT ToTy, LLT FromTy) const;
};
} // namespace llvm
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 8b7e8c0fbf1f5..ef1171d9f1f64 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -934,6 +934,22 @@ class GExtOp : public GCastOp {
};
};
+/// Represents an integer-like extending or truncating operation.
+class GExtOrTruncOp : public GCastOp {
+public:
+ static bool classof(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_TRUNC:
+ return true;
+ default:
+ return false;
+ }
+ };
+};
+
} // namespace llvm
#endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 2362e77b54be2..2246e20ecc1dc 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1771,10 +1771,25 @@ def truncate_of_zext : truncate_of_opcode<G_ZEXT>;
def truncate_of_sext : truncate_of_opcode<G_SEXT>;
def truncate_of_anyext : truncate_of_opcode<G_ANYEXT>;
+// Push cast through select.
+class select_of_opcode<Instruction castOpcode> : GICombineRule <
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_SELECT $select, $cond, $true, $false):$Select,
+ (castOpcode $root, $select):$Cast,
+ [{ return Helper.matchCastOfSelect(*${Cast}, *${Select}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${Cast}, ${matchinfo}); }])>;
+
+def select_of_zext : select_of_opcode<G_ZEXT>;
+def select_of_anyext : select_of_opcode<G_ANYEXT>;
+def select_of_truncate : select_of_opcode<G_TRUNC>;
+
def cast_combines: GICombineGroup<[
truncate_of_zext,
truncate_of_sext,
- truncate_of_anyext
+ truncate_of_anyext,
+ select_of_zext,
+ select_of_anyext,
+ select_of_truncate
]>;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 8c05931812af5..d930ab2984629 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -68,6 +68,16 @@ const TargetLowering &CombinerHelper::getTargetLowering() const {
return *Builder.getMF().getSubtarget().getTargetLowering();
}
+const MachineFunction &CombinerHelper::getMachineFunction() const {
+ return Builder.getMF();
+}
+
+const DataLayout &CombinerHelper::getDataLayout() const {
+ return getMachineFunction().getDataLayout();
+}
+
+LLVMContext &CombinerHelper::getContext() const { return Builder.getContext(); }
+
/// \returns The little endian in-memory byte position of byte \p I in a
/// \p ByteWidth bytes wide type.
///
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
index d36685bf28313..59295f7a65835 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
@@ -161,3 +161,51 @@ bool CombinerHelper::matchTruncateOfExt(const MachineInstr &Root,
return false;
}
+
+bool CombinerHelper::isCastFree(unsigned Opcode, LLT ToTy, LLT FromTy) const {
+ const TargetLowering &TLI = getTargetLowering();
+ const DataLayout &DL = getDataLayout();
+ LLVMContext &Ctx = getContext();
+
+ switch (Opcode) {
+ case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_ZEXT:
+ return TLI.isZExtFree(FromTy, ToTy, DL, Ctx);
+ case TargetOpcode::G_TRUNC:
+ return TLI.isTruncateFree(FromTy, ToTy, DL, Ctx);
+ default:
+ return false;
+ }
+}
+
+bool CombinerHelper::matchCastOfSelect(const MachineInstr &CastMI,
+ const MachineInstr &SelectMI,
+ BuildFnTy &MatchInfo) {
+ const GExtOrTruncOp *Cast = cast<GExtOrTruncOp>(&CastMI);
+ const GSelect *Select = cast<GSelect>(&SelectMI);
+
+ if (!MRI.hasOneNonDBGUse(Select->getReg(0)))
+ return false;
+
+ Register Dst = Cast->getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+ LLT CondTy = MRI.getType(Select->getCondReg());
+ Register TrueReg = Select->getTrueReg();
+ Register FalseReg = Select->getFalseReg();
+ LLT SrcTy = MRI.getType(TrueReg);
+ Register Cond = Select->getCondReg();
+
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SELECT, {DstTy, CondTy}}))
+ return false;
+
+ if (!isCastFree(Cast->getOpcode(), DstTy, SrcTy))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto True = B.buildInstr(Cast->getOpcode(), {DstTy}, {TrueReg});
+ auto False = B.buildInstr(Cast->getOpcode(), {DstTy}, {FalseReg});
+ B.buildSelect(Dst, Cond, True, False);
+ };
+
+ return true;
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
new file mode 100644
index 0000000000000..0f436127ea2eb
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
@@ -0,0 +1,131 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s --check-prefixes=CHECK,CHECK-PRE
+# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s | FileCheck %s --check-prefixes=CHECK,CHECK-POST
+
+---
+name: test_combine_trunc_select
+legalized: true
+body: |
+ bb.1:
+ ; CHECK-PRE-LABEL: name: test_combine_trunc_select
+ ; CHECK-PRE: %cond:_(s32) = COPY $w0
+ ; CHECK-PRE-NEXT: %lhs:_(s64) = COPY $x0
+ ; CHECK-PRE-NEXT: %rhs:_(s64) = COPY $x0
+ ; CHECK-PRE-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s64)
+ ; CHECK-PRE-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %rhs(s64)
+ ; CHECK-PRE-NEXT: %small:_(s32) = G_SELECT %cond(s32), [[TRUNC]], [[TRUNC1]]
+ ; CHECK-PRE-NEXT: $w0 = COPY %small(s32)
+ ;
+ ; CHECK-POST-LABEL: name: test_combine_trunc_select
+ ; CHECK-POST: %cond:_(s32) = COPY $w0
+ ; CHECK-POST-NEXT: %lhs:_(s64) = COPY $x0
+ ; CHECK-POST-NEXT: %rhs:_(s64) = COPY $x0
+ ; CHECK-POST-NEXT: %res:_(s64) = G_SELECT %cond(s32), %lhs, %rhs
+ ; CHECK-POST-NEXT: %small:_(s32) = G_TRUNC %res(s64)
+ ; CHECK-POST-NEXT: $w0 = COPY %small(s32)
+ %cond:_(s32) = COPY $w0
+ %lhs:_(s64) = COPY $x0
+ %rhs:_(s64) = COPY $x0
+ %res:_(s64) = G_SELECT %cond(s32), %lhs, %rhs
+ %small:_(s32) = G_TRUNC %res(s64)
+ $w0 = COPY %small(s32)
+...
+---
+name: test_combine_zext_select
+legalized: true
+body: |
+ bb.1:
+ ; CHECK-PRE-LABEL: name: test_combine_zext_select
+ ; CHECK-PRE: %cond:_(s32) = COPY $w0
+ ; CHECK-PRE-NEXT: %lhs:_(s32) = COPY $w0
+ ; CHECK-PRE-NEXT: %rhs:_(s32) = COPY $w0
+ ; CHECK-PRE-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT %lhs(s32)
+ ; CHECK-PRE-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT %rhs(s32)
+ ; CHECK-PRE-NEXT: %big:_(s64) = G_SELECT %cond(s32), [[ZEXT]], [[ZEXT1]]
+ ; CHECK-PRE-NEXT: $x0 = COPY %big(s64)
+ ;
+ ; CHECK-POST-LABEL: name: test_combine_zext_select
+ ; CHECK-POST: %cond:_(s32) = COPY $w0
+ ; CHECK-POST-NEXT: %lhs:_(s32) = COPY $w0
+ ; CHECK-POST-NEXT: %rhs:_(s32) = COPY $w0
+ ; CHECK-POST-NEXT: %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
+ ; CHECK-POST-NEXT: %big:_(s64) = G_ZEXT %res(s32)
+ ; CHECK-POST-NEXT: $x0 = COPY %big(s64)
+ %cond:_(s32) = COPY $w0
+ %lhs:_(s32) = COPY $w0
+ %rhs:_(s32) = COPY $w0
+ %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
+ %big:_(s64) = G_ZEXT %res(s32)
+ $x0 = COPY %big(s64)
+...
+---
+name: test_combine_anyzext_select
+legalized: true
+body: |
+ bb.1:
+ ; CHECK-PRE-LABEL: name: test_combine_anyzext_select
+ ; CHECK-PRE: %cond:_(s32) = COPY $w0
+ ; CHECK-PRE-NEXT: %lhs:_(s32) = COPY $w0
+ ; CHECK-PRE-NEXT: %rhs:_(s32) = COPY $w0
+ ; CHECK-PRE-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %lhs(s32)
+ ; CHECK-PRE-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT %rhs(s32)
+ ; CHECK-PRE-NEXT: %big:_(s64) = G_SELECT %cond(s32), [[ANYEXT]], [[ANYEXT1]]
+ ; CHECK-PRE-NEXT: $x0 = COPY %big(s64)
+ ;
+ ; CHECK-POST-LABEL: name: test_combine_anyzext_select
+ ; CHECK-POST: %cond:_(s32) = COPY $w0
+ ; CHECK-POST-NEXT: %lhs:_(s32) = COPY $w0
+ ; CHECK-POST-NEXT: %rhs:_(s32) = COPY $w0
+ ; CHECK-POST-NEXT: %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
+ ; CHECK-POST-NEXT: %big:_(s64) = G_ANYEXT %res(s32)
+ ; CHECK-POST-NEXT: $x0 = COPY %big(s64)
+ %cond:_(s32) = COPY $w0
+ %lhs:_(s32) = COPY $w0
+ %rhs:_(s32) = COPY $w0
+ %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
+ %big:_(s64) = G_ANYEXT %res(s32)
+ $x0 = COPY %big(s64)
+...
+---
+name: test_combine_anyzext_select_multi_use
+legalized: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_combine_anyzext_select_multi_use
+ ; CHECK: %cond:_(s32) = COPY $w0
+ ; CHECK-NEXT: %lhs:_(s32) = COPY $w0
+ ; CHECK-NEXT: %rhs:_(s32) = COPY $w0
+ ; CHECK-NEXT: %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
+ ; CHECK-NEXT: %big:_(s64) = G_ANYEXT %res(s32)
+ ; CHECK-NEXT: $x0 = COPY %big(s64)
+ ; CHECK-NEXT: $w0 = COPY %res(s32)
+ %cond:_(s32) = COPY $w0
+ %lhs:_(s32) = COPY $w0
+ %rhs:_(s32) = COPY $w0
+ %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
+ %big:_(s64) = G_ANYEXT %res(s32)
+ $x0 = COPY %big(s64)
+ $w0 = COPY %res(s32)
+...
+---
+name: test_combine_trunc_select_vector_out_of_budget
+legalized: true
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: test_combine_trunc_select_vector_out_of_budget
+ ; CHECK: %cond:_(<2 x s32>) = COPY $x0
+ ; CHECK-NEXT: %arg1:_(s64) = COPY $x0
+ ; CHECK-NEXT: %arg2:_(s64) = COPY $x0
+ ; CHECK-NEXT: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
+ ; CHECK-NEXT: %bv2:_(<2 x s64>) = G_BUILD_VECTOR %arg2(s64), %arg1(s64)
+ ; CHECK-NEXT: %res:_(<2 x s64>) = G_SELECT %cond(<2 x s32>), %bv, %bv2
+ ; CHECK-NEXT: %small:_(<2 x s32>) = G_TRUNC %res(<2 x s64>)
+ ; CHECK-NEXT: $x0 = COPY %small(<2 x s32>)
+ %cond:_(<2 x s32>) = COPY $x0
+ %arg1:_(s64) = COPY $x0
+ %arg2:_(s64) = COPY $x0
+ %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
+ %bv2:_(<2 x s64>) = G_BUILD_VECTOR %arg2(s64), %arg1(s64)
+ %res:_(<2 x s64>) = G_SELECT %cond(<2 x s32>), %bv, %bv2
+ %small:_(<2 x s32>) = G_TRUNC %res(<2 x s64>)
+ $x0 = COPY %small(<2 x s32>)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
index ec832ed0f7f3a..63f5464371cc6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
@@ -1845,39 +1845,37 @@ define amdgpu_ps i65 @s_ashr_i65(i65 inreg %value, i65 inreg %amount) {
; GCN-NEXT: s_lshr_b64 s[2:3], s[0:1], s3
; GCN-NEXT: s_lshl_b64 s[8:9], s[4:5], s8
; GCN-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
-; GCN-NEXT: s_ashr_i32 s8, s5, 31
+; GCN-NEXT: s_ashr_i32 s7, s5, 31
; GCN-NEXT: s_ashr_i64 s[4:5], s[4:5], s10
; GCN-NEXT: s_cmp_lg_u32 s11, 0
; GCN-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
; GCN-NEXT: s_cmp_lg_u32 s12, 0
-; GCN-NEXT: s_mov_b32 s9, s8
; GCN-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
; GCN-NEXT: s_cmp_lg_u32 s11, 0
-; GCN-NEXT: s_cselect_b64 s[2:3], s[6:7], s[8:9]
+; GCN-NEXT: s_cselect_b32 s2, s6, s7
; GCN-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: s_ashr_i65:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x10000
-; GFX10PLUS-NEXT: s_sub_i32 s12, s3, 64
-; GFX10PLUS-NEXT: s_sub_i32 s8, 64, s3
+; GFX10PLUS-NEXT: s_sub_i32 s10, s3, 64
+; GFX10PLUS-NEXT: s_sub_i32 s2, 64, s3
; GFX10PLUS-NEXT: s_cmp_lt_u32 s3, 64
-; GFX10PLUS-NEXT: s_cselect_b32 s13, 1, 0
+; GFX10PLUS-NEXT: s_cselect_b32 s11, 1, 0
; GFX10PLUS-NEXT: s_cmp_eq_u32 s3, 0
-; GFX10PLUS-NEXT: s_cselect_b32 s14, 1, 0
-; GFX10PLUS-NEXT: s_ashr_i64 s[6:7], s[4:5], s3
-; GFX10PLUS-NEXT: s_lshr_b64 s[2:3], s[0:1], s3
-; GFX10PLUS-NEXT: s_lshl_b64 s[8:9], s[4:5], s8
-; GFX10PLUS-NEXT: s_ashr_i32 s10, s5, 31
-; GFX10PLUS-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
-; GFX10PLUS-NEXT: s_ashr_i64 s[4:5], s[4:5], s12
-; GFX10PLUS-NEXT: s_cmp_lg_u32 s13, 0
-; GFX10PLUS-NEXT: s_mov_b32 s11, s10
-; GFX10PLUS-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
-; GFX10PLUS-NEXT: s_cmp_lg_u32 s14, 0
-; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
-; GFX10PLUS-NEXT: s_cmp_lg_u32 s13, 0
-; GFX10PLUS-NEXT: s_cselect_b64 s[2:3], s[6:7], s[10:11]
+; GFX10PLUS-NEXT: s_cselect_b32 s12, 1, 0
+; GFX10PLUS-NEXT: s_lshr_b64 s[6:7], s[0:1], s3
+; GFX10PLUS-NEXT: s_lshl_b64 s[8:9], s[4:5], s2
+; GFX10PLUS-NEXT: s_ashr_i64 s[2:3], s[4:5], s3
+; GFX10PLUS-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
+; GFX10PLUS-NEXT: s_ashr_i32 s3, s5, 31
+; GFX10PLUS-NEXT: s_ashr_i64 s[4:5], s[4:5], s10
+; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0
+; GFX10PLUS-NEXT: s_cselect_b64 s[4:5], s[6:7], s[4:5]
+; GFX10PLUS-NEXT: s_cmp_lg_u32 s12, 0
+; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5]
+; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0
+; GFX10PLUS-NEXT: s_cselect_b32 s2, s2, s3
; GFX10PLUS-NEXT: ; return to shader part epilog
%result = ashr i65 %value, %amount
ret i65 %result
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
index 980ba3da4bac7..5dd4fa0809131 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
@@ -1766,7 +1766,7 @@ define amdgpu_ps i65 @s_lshr_i65(i65 inreg %value, i65 inreg %amount) {
; GCN-NEXT: s_cmp_lg_u32 s12, 0
; GCN-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
; GCN-NEXT: s_cmp_lg_u32 s11, 0
-; GCN-NEXT: s_cselect_b64 s[2:3], s[6:7], 0
+; GCN-NEXT: s_cselect_b32 s2, s6, 0
; GCN-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: s_lshr_i65:
@@ -1788,7 +1788,7 @@ define amdgpu_ps i65 @s_lshr_i65(i65 inreg %value, i65 inreg %amount) {
; GFX10PLUS-NEXT: s_cmp_lg_u32 s12, 0
; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5]
; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0
-; GFX10PLUS-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
+; GFX10PLUS-NEXT: s_cselect_b32 s2, s2, 0
; GFX10PLUS-NEXT: ; return to shader part epilog
%result = lshr i65 %value, %amount
ret i65 %result
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
index c2f911cc44587..4cf1c92539c36 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
@@ -1733,9 +1733,9 @@ define amdgpu_ps i65 @s_shl_i65(i65 inreg %value, i65 inreg %amount) {
; GCN-NEXT: s_lshl_b64 s[8:9], s[0:1], s10
; GCN-NEXT: s_cmp_lg_u32 s11, 0
; GCN-NEXT: s_cselect_b64 s[0:1], s[4:5], 0
-; GCN-NEXT: s_cselect_b64 s[4:5], s[6:7], s[8:9]
+; GCN-NEXT: s_cselect_b32 s3, s6, s8
; GCN-NEXT: s_cmp_lg_u32 s12, 0
-; GCN-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
+; GCN-NEXT: s_cselect_b32 s2, s2, s3
; GCN-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: s_shl_i65:
@@ -1753,9 +1753,9 @@ define amdgpu_ps i65 @s_shl_i65(i65 inreg %value, i65 inreg %amount) {
; GFX10PLUS-NEXT: s_lshl_b64 s[6:7], s[0:1], s10
; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0
; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[8:9], 0
-; GFX10PLUS-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7]
+; GFX10PLUS-NEXT: s_cselect_b32 s3, s4, s6
; GFX10PLUS-NEXT: s_cmp_lg_u32 s12, 0
-; GFX10PLUS-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
+; GFX10PLUS-NEXT: s_cselect_b32 s2, s2, s3
; GFX10PLUS-NEXT: ; return to shader part epilog
%result = shl i65 %value, %amount
ret i65 %result
diff --git a/llvm/test/CodeGen/AMDGPU/ctlz.ll b/llvm/test/CodeGen/AMDGPU/ctlz.ll
index ba0a1e75e29b7..a0b549711f339 100644
--- a/llvm/test/CodeGen/AMDGPU/ctlz.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctlz.ll
@@ -1593,7 +1593,7 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1
; GFX10-GISEL-NEXT: v_subrev_nc_u32_e32 v1, 24, v1
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, -1, vcc_lo
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0xffff, vcc_lo
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: global_store_byte v1, v0, s[4:5]
; GFX10-GISEL-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
index a55c8cdc9b6e8..2168e7fe1dd28 100644
--- a/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
@@ -1706,11 +1706,12 @@ define amdgpu_kernel void @v_ctlz_zero_undef_i8_sel_eq_neg1(ptr addrspace(1) noa
; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v3, vcc
; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0xffff
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
-; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v0
-; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v2, v2
-; GFX9-GISEL-NEXT: v_cmp_eq_u32_sdwa s[0:1], v0, v1 src0_sel:BYTE_0 src1_sel:DWORD
-; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, -1, s[0:1]
+; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v3, 24, v0
+; GFX9-GISEL-NEXT: v_ffbh_u32_e32 v3, v3
+; GFX9-GISEL-NEXT: v_cmp_eq_u32_sdwa vcc, v0, v1 src0_sel:BYTE_0 src1_sel:DWORD
+; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc
; GFX9-GISEL-NEXT: global_store_byte v1, v0, s[4:5]
; GFX9-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/cttz.ll b/llvm/test/CodeGen/AMDGPU/cttz.ll
index 57fe6cd4e1e45..14e6c4bcf6d8f 100644
--- a/llvm/test/CodeGen/AMDGPU/cttz.ll
+++ b/llvm/test/CodeGen/AMDGPU/cttz.ll
@@ -1359,7 +1359,7 @@ define amdgpu_kernel void @v_cttz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
; GFX10-GISEL-NEXT: v_or_b32_e32 v1, 0x100, v0
; GFX10-GISEL-NEXT: v_cmp_eq_u32_sdwa s0, v0, v2 src0_sel:BYTE_0 src1_sel:DWORD
; GFX10-GISEL-NEXT: v_ffbl_b32_e32 v1, v1
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, -1, s0
+; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0xffff, s0
; GFX10-GISEL-NEXT: global_store_byte v2, v0, s[4:5]
; GFX10-GISEL-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
5216e21
to
184635b
Compare
arsenm
approved these changes
Jul 25, 2024
yuxuanchen1997
pushed a commit
that referenced
this pull request
Jul 25, 2024
Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60250562
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
No description provided.