Skip to content

[GlobalIsel] Combine trunc of binop #107721

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Sep 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -831,6 +831,12 @@ class CombinerHelper {
/// Combine ors.
bool matchOr(MachineInstr &MI, BuildFnTy &MatchInfo);

/// trunc (binop X, C) --> binop (trunc X, trunc C).
bool matchNarrowBinop(const MachineInstr &TruncMI,
const MachineInstr &BinopMI, BuildFnTy &MatchInfo);

bool matchCastOfInteger(const MachineInstr &CastMI, APInt &MatchInfo);

/// Combine addos.
bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo);

Expand Down
36 changes: 35 additions & 1 deletion llvm/include/llvm/Target/GlobalISel/Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -1867,6 +1867,33 @@ class buildvector_of_opcode<Instruction castOpcode> : GICombineRule <

def buildvector_of_truncate : buildvector_of_opcode<G_TRUNC>;

// narrow binop.
// trunc (binop X, C) --> binop (trunc X, trunc C)
class narrow_binop_opcode<Instruction binopOpcode> : GICombineRule <
(defs root:$root, build_fn_matchinfo:$matchinfo),
(match (G_CONSTANT $const, $imm),
(binopOpcode $binop, $x, $const):$Binop,
(G_TRUNC $root, $binop):$Trunc,
[{ return Helper.matchNarrowBinop(*${Trunc}, *${Binop}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${Trunc}, ${matchinfo}); }])>;

def narrow_binop_add : narrow_binop_opcode<G_ADD>;
def narrow_binop_sub : narrow_binop_opcode<G_SUB>;
def narrow_binop_mul : narrow_binop_opcode<G_MUL>;
def narrow_binop_and : narrow_binop_opcode<G_AND>;
def narrow_binop_or : narrow_binop_opcode<G_OR>;
def narrow_binop_xor : narrow_binop_opcode<G_XOR>;

// Cast of integer.
class integer_of_opcode<Instruction castOpcode> : GICombineRule <
(defs root:$root, apint_matchinfo:$matchinfo),
(match (G_CONSTANT $int, $imm),
(castOpcode $root, $int):$Cast,
[{ return Helper.matchCastOfInteger(*${Cast}, ${matchinfo}); }]),
(apply [{ Helper.replaceInstWithConstant(*${Cast}, ${matchinfo}); }])>;

def integer_of_truncate : integer_of_opcode<G_TRUNC>;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this really not already exist as a fold?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried it twice, but it was never merged.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, this exists in the artifact combiner: https://github.com/llvm/llvm-project/blob/main/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h#L270-L281. I was a bit confused because I had run into this case before and it was getting folded.

If you're adding it to CombinerHelper, could it be removed from the artifact combiner? Not sure if that will lead to an infinite loop though. I think this can then also be introduced for anyext, sext, zext in that case.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The artefact combiner should not be touched. If this combine runs after the legalizer, the trunc(constant) would not hit anymore. There are no exts in this combine, we can only fold trunc(constant) in this PR.


def cast_combines: GICombineGroup<[
truncate_of_zext,
truncate_of_sext,
Expand All @@ -1881,7 +1908,14 @@ def cast_combines: GICombineGroup<[
anyext_of_anyext,
anyext_of_zext,
anyext_of_sext,
buildvector_of_truncate
buildvector_of_truncate,
narrow_binop_add,
narrow_binop_sub,
narrow_binop_mul,
narrow_binop_and,
narrow_binop_or,
narrow_binop_xor,
integer_of_truncate
]>;


Expand Down
46 changes: 46 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -313,3 +313,49 @@ bool CombinerHelper::matchCastOfBuildVector(const MachineInstr &CastMI,

return true;
}

bool CombinerHelper::matchNarrowBinop(const MachineInstr &TruncMI,
const MachineInstr &BinopMI,
BuildFnTy &MatchInfo) {
const GTrunc *Trunc = cast<GTrunc>(&TruncMI);
const GBinOp *BinOp = cast<GBinOp>(&BinopMI);

if (!MRI.hasOneNonDBGUse(BinOp->getReg(0)))
return false;

Register Dst = Trunc->getReg(0);
LLT DstTy = MRI.getType(Dst);

// Is narrow binop legal?
if (!isLegalOrBeforeLegalizer({BinOp->getOpcode(), {DstTy}}))
return false;

MatchInfo = [=](MachineIRBuilder &B) {
auto LHS = B.buildTrunc(DstTy, BinOp->getLHSReg());
auto RHS = B.buildTrunc(DstTy, BinOp->getRHSReg());
B.buildInstr(BinOp->getOpcode(), {Dst}, {LHS, RHS});
};

return true;
}

bool CombinerHelper::matchCastOfInteger(const MachineInstr &CastMI,
APInt &MatchInfo) {
const GExtOrTruncOp *Cast = cast<GExtOrTruncOp>(&CastMI);

APInt Input = getIConstantFromReg(Cast->getSrcReg(), MRI);

LLT DstTy = MRI.getType(Cast->getReg(0));

if (!isConstantLegalOrBeforeLegalizer(DstTy))
return false;

switch (Cast->getOpcode()) {
case TargetOpcode::G_TRUNC: {
MatchInfo = Input.trunc(DstTy.getScalarSizeInBits());
return true;
}
default:
return false;
}
}
136 changes: 136 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/combine-narrow-binop.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s --check-prefixes=CHECK

---
name: test_combine_trunc_xor_i64
body: |
bb.1:
; CHECK-LABEL: name: test_combine_trunc_xor_i64
; CHECK: %lhs:_(s64) = COPY $x0
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s64)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
; CHECK-NEXT: %small:_(s32) = G_XOR [[TRUNC]], [[C]]
; CHECK-NEXT: $w0 = COPY %small(s32)
%lhs:_(s64) = COPY $x0
%rhs:_(s64) = G_CONSTANT i64 5
%res:_(s64) = G_XOR %lhs, %rhs
%small:_(s32) = G_TRUNC %res(s64)
$w0 = COPY %small(s32)
...
---
name: test_combine_trunc_add_i64
body: |
bb.1:
; CHECK-LABEL: name: test_combine_trunc_add_i64
; CHECK: %lhs:_(s64) = COPY $x0
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s64)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
; CHECK-NEXT: %small:_(s32) = G_ADD [[TRUNC]], [[C]]
; CHECK-NEXT: $w0 = COPY %small(s32)
%lhs:_(s64) = COPY $x0
%rhs:_(s64) = G_CONSTANT i64 5
%res:_(s64) = G_ADD %lhs, %rhs
%small:_(s32) = G_TRUNC %res(s64)
$w0 = COPY %small(s32)
...
---
name: test_combine_trunc_mul_i64
body: |
bb.1:
; CHECK-LABEL: name: test_combine_trunc_mul_i64
; CHECK: %lhs:_(s64) = COPY $x0
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s64)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
; CHECK-NEXT: %small:_(s32) = G_MUL [[TRUNC]], [[C]]
; CHECK-NEXT: $w0 = COPY %small(s32)
%lhs:_(s64) = COPY $x0
%rhs:_(s64) = G_CONSTANT i64 5
%res:_(s64) = G_MUL %lhs, %rhs
%small:_(s32) = G_TRUNC %res(s64)
$w0 = COPY %small(s32)
...
---
name: test_combine_trunc_and_i64
body: |
bb.1:
; CHECK-LABEL: name: test_combine_trunc_and_i64
; CHECK: %lhs:_(s64) = COPY $x0
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s64)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
; CHECK-NEXT: %small:_(s32) = G_AND [[TRUNC]], [[C]]
; CHECK-NEXT: $w0 = COPY %small(s32)
%lhs:_(s64) = COPY $x0
%rhs:_(s64) = G_CONSTANT i64 5
%res:_(s64) = G_AND %lhs, %rhs
%small:_(s32) = G_TRUNC %res(s64)
$w0 = COPY %small(s32)
...
---
name: test_combine_trunc_or_i64
body: |
bb.1:
; CHECK-LABEL: name: test_combine_trunc_or_i64
; CHECK: %lhs:_(s64) = COPY $x0
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s64)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
; CHECK-NEXT: %small:_(s32) = G_OR [[TRUNC]], [[C]]
; CHECK-NEXT: $w0 = COPY %small(s32)
%lhs:_(s64) = COPY $x0
%rhs:_(s64) = G_CONSTANT i64 5
%res:_(s64) = G_OR %lhs, %rhs
%small:_(s32) = G_TRUNC %res(s64)
$w0 = COPY %small(s32)
...
---
name: test_combine_trunc_sub_i128
body: |
bb.1:
; CHECK-LABEL: name: test_combine_trunc_sub_i128
; CHECK: %lhs:_(s128) = COPY $q0
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s128)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
; CHECK-NEXT: %small:_(s32) = G_SUB [[TRUNC]], [[C]]
; CHECK-NEXT: $w0 = COPY %small(s32)
%lhs:_(s128) = COPY $q0
%rhs:_(s128) = G_CONSTANT i128 5
%res:_(s128) = G_SUB %lhs, %rhs
%small:_(s32) = G_TRUNC %res(s128)
$w0 = COPY %small(s32)
...
---
name: test_combine_trunc_sub_i128_multi_use
body: |
bb.1:
; CHECK-LABEL: name: test_combine_trunc_sub_i128_multi_use
; CHECK: %lhs:_(s128) = COPY $q0
; CHECK-NEXT: %rhs:_(s128) = G_CONSTANT i128 5
; CHECK-NEXT: %res:_(s128) = G_SUB %lhs, %rhs
; CHECK-NEXT: %small:_(s32) = G_TRUNC %res(s128)
; CHECK-NEXT: $q0 = COPY %res(s128)
; CHECK-NEXT: $w0 = COPY %small(s32)
%lhs:_(s128) = COPY $q0
%rhs:_(s128) = G_CONSTANT i128 5
%res:_(s128) = G_SUB %lhs, %rhs
%small:_(s32) = G_TRUNC %res(s128)
$q0 = COPY %res(s128)
$w0 = COPY %small(s32)
...
---
name: test_combine_trunc_xor_vector_pattern_did_not_match
body: |
bb.1:
; CHECK-LABEL: name: test_combine_trunc_xor_vector_pattern_did_not_match
; CHECK: %arg1:_(s64) = COPY $x0
; CHECK-NEXT: %arg2:_(s64) = COPY $x0
; CHECK-NEXT: %lhs:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
; CHECK-NEXT: %rhs:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
; CHECK-NEXT: %res:_(<2 x s64>) = G_XOR %lhs, %rhs
; CHECK-NEXT: %small:_(<2 x s16>) = G_TRUNC %res(<2 x s64>)
; CHECK-NEXT: $w0 = COPY %small(<2 x s16>)
%arg1:_(s64) = COPY $x0
%arg2:_(s64) = COPY $x0
%lhs:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
%rhs:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
%res:_(<2 x s64>) = G_XOR %lhs, %rhs
%small:_(<2 x s16>) = G_TRUNC %res(<2 x s64>)
$w0 = COPY %small(<2 x s16>)
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/GlobalISel/inline-memset.mir
Original file line number Diff line number Diff line change
Expand Up @@ -224,10 +224,10 @@ body: |
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64)
; CHECK-NEXT: G_STORE [[C]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.dst + 8, align 1)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C]](s64)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64)
; CHECK-NEXT: G_STORE [[TRUNC]](s16), [[PTR_ADD1]](p0) :: (store (s16) into %ir.dst + 16, align 1)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 16448
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
; CHECK-NEXT: G_STORE [[C2]](s16), [[PTR_ADD1]](p0) :: (store (s16) into %ir.dst + 16, align 1)
; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
%1:_(s8) = G_CONSTANT i8 64
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,8 @@ tracksRegLiveness: true
body: |
bb.1:
; CHECK-LABEL: name: test
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64)
; CHECK-NEXT: $w0 = COPY [[TRUNC]](s32)
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: $w0 = COPY [[C]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:_(s16) = G_CONSTANT i16 0
%2:_(s1) = G_CONSTANT i1 true
Expand Down Expand Up @@ -41,9 +40,7 @@ body: |
bb.1:
; CHECK-LABEL: name: test_inverted_div_rem
; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[C]](s32)
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC]](s8)
; CHECK-NEXT: $w0 = COPY [[SEXT]](s32)
; CHECK-NEXT: $w0 = COPY [[C]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
%0:_(s16) = G_CONSTANT i16 0
%2:_(s1) = G_CONSTANT i1 true
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/combine-itofp.mir
Original file line number Diff line number Diff line change
Expand Up @@ -193,10 +193,10 @@ body: |
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]]
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64)
; CHECK-NEXT: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[TRUNC]]
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]]
; CHECK-NEXT: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CVT_F32_UBYTE0_]](s32)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = G_CONSTANT i64 255
Expand All @@ -216,10 +216,10 @@ body: |
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 255
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]]
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[AND]](s64)
; CHECK-NEXT: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[TRUNC]]
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C]]
; CHECK-NEXT: [[AMDGPU_CVT_F32_UBYTE0_:%[0-9]+]]:_(s32) = G_AMDGPU_CVT_F32_UBYTE0 [[AND]]
; CHECK-NEXT: $vgpr0 = COPY [[AMDGPU_CVT_F32_UBYTE0_]](s32)
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = G_CONSTANT i64 255
Expand Down
26 changes: 13 additions & 13 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/combine-zext-trunc.mir
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@ body: |
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: %var:_(s32) = COPY $vgpr0
; GCN-NEXT: %c3FFF:_(s32) = G_CONSTANT i32 16383
; GCN-NEXT: %low_bits:_(s32) = G_AND %var, %c3FFF
; GCN-NEXT: $vgpr0 = COPY %low_bits(s32)
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC %var(s32)
; GCN-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 16383
; GCN-NEXT: %trunc:_(s16) = G_AND [[TRUNC]], [[C]]
; GCN-NEXT: %zext:_(s32) = G_ZEXT %trunc(s16)
; GCN-NEXT: $vgpr0 = COPY %zext(s32)
%var:_(s32) = COPY $vgpr0
%c3FFF:_(s32) = G_CONSTANT i32 16383
%low_bits:_(s32) = G_AND %var, %c3FFF
Expand All @@ -34,10 +36,8 @@ body: |
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: %var:_(s32) = COPY $vgpr0
; GCN-NEXT: %cFFFFF:_(s32) = G_CONSTANT i32 1048575
; GCN-NEXT: %low_bits:_(s32) = G_AND %var, %cFFFFF
; GCN-NEXT: %trunc:_(s16) = G_TRUNC %low_bits(s32)
; GCN-NEXT: %zext:_(s32) = G_ZEXT %trunc(s16)
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC %var(s32)
; GCN-NEXT: %zext:_(s32) = G_ZEXT [[TRUNC]](s16)
; GCN-NEXT: $vgpr0 = COPY %zext(s32)
%var:_(s32) = COPY $vgpr0
%cFFFFF:_(s32) = G_CONSTANT i32 1048575
Expand All @@ -58,9 +58,9 @@ body: |
; GCN: liveins: $vgpr0_vgpr1
; GCN-NEXT: {{ $}}
; GCN-NEXT: %var:_(s64) = COPY $vgpr0_vgpr1
; GCN-NEXT: %c3FFF:_(s64) = G_CONSTANT i64 16383
; GCN-NEXT: %low_bits:_(s64) = G_AND %var, %c3FFF
; GCN-NEXT: %trunc:_(s16) = G_TRUNC %low_bits(s64)
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC %var(s64)
; GCN-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 16383
; GCN-NEXT: %trunc:_(s16) = G_AND [[TRUNC]], [[C]]
; GCN-NEXT: %zext:_(s32) = G_ZEXT %trunc(s16)
; GCN-NEXT: $vgpr0 = COPY %zext(s32)
%var:_(s64) = COPY $vgpr0_vgpr1
Expand All @@ -82,9 +82,9 @@ body: |
; GCN: liveins: $vgpr0
; GCN-NEXT: {{ $}}
; GCN-NEXT: %var:_(s32) = COPY $vgpr0
; GCN-NEXT: %c3FFF:_(s32) = G_CONSTANT i32 16383
; GCN-NEXT: %low_bits:_(s32) = G_AND %var, %c3FFF
; GCN-NEXT: %trunc:_(s16) = G_TRUNC %low_bits(s32)
; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC %var(s32)
; GCN-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 16383
; GCN-NEXT: %trunc:_(s16) = G_AND [[TRUNC]], [[C]]
; GCN-NEXT: %zext:_(s64) = G_ZEXT %trunc(s16)
; GCN-NEXT: $vgpr0_vgpr1 = COPY %zext(s64)
%var:_(s32) = COPY $vgpr0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -238,13 +238,12 @@ define amdgpu_cs void @single_lane_execution_attribute(i32 inreg %.userdata0, <3
; GFX10-NEXT: s_load_dwordx8 s[4:11], s[12:13], 0x0
; GFX10-NEXT: v_mbcnt_hi_u32_b32 v1, -1, v1
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 2, v1
; GFX10-NEXT: v_and_b32_e32 v3, 1, v1
; GFX10-NEXT: v_xor_b32_e32 v3, 1, v3
; GFX10-NEXT: v_xor_b32_e32 v3, 1, v1
; GFX10-NEXT: v_and_b32_e32 v3, 1, v3
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: buffer_load_dword v2, v2, s[4:7], 0 offen
; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v3
; GFX10-NEXT: ; implicit-def: $vgpr3
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: buffer_load_dword v2, v2, s[4:7], 0 offen
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 0, v2
; GFX10-NEXT: s_cbranch_vccnz .LBB4_4
Expand Down
Loading
Loading