Skip to content

[GlobalIsel] Push cast through select. #100539

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,12 @@ class CombinerHelper {

const TargetLowering &getTargetLowering() const;

const MachineFunction &getMachineFunction() const;

const DataLayout &getDataLayout() const;

LLVMContext &getContext() const;

/// \returns true if the combiner is running pre-legalization.
bool isPreLegalize() const;

Expand Down Expand Up @@ -884,6 +890,9 @@ class CombinerHelper {
bool matchTruncateOfExt(const MachineInstr &Root, const MachineInstr &ExtMI,
BuildFnTy &MatchInfo);

bool matchCastOfSelect(const MachineInstr &Cast, const MachineInstr &SelectMI,
BuildFnTy &MatchInfo);

private:
/// Checks for legality of an indexed variant of \p LdSt.
bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
Expand Down Expand Up @@ -996,6 +1005,8 @@ class CombinerHelper {

// Simplify (cmp cc0 x, y) (&& or ||) (cmp cc1 x, y) -> cmp cc2 x, y.
bool tryFoldLogicOfFCmps(GLogicalBinOp *Logic, BuildFnTy &MatchInfo);

bool isCastFree(unsigned Opcode, LLT ToTy, LLT FromTy) const;
};
} // namespace llvm

Expand Down
16 changes: 16 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
Original file line number Diff line number Diff line change
Expand Up @@ -934,6 +934,22 @@ class GExtOp : public GCastOp {
};
};

/// Represents an integer-like extending or truncating operation.
class GExtOrTruncOp : public GCastOp {
public:
static bool classof(const MachineInstr *MI) {
switch (MI->getOpcode()) {
case TargetOpcode::G_SEXT:
case TargetOpcode::G_ZEXT:
case TargetOpcode::G_ANYEXT:
case TargetOpcode::G_TRUNC:
return true;
default:
return false;
}
};
};

} // namespace llvm

#endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
17 changes: 16 additions & 1 deletion llvm/include/llvm/Target/GlobalISel/Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -1771,10 +1771,25 @@ def truncate_of_zext : truncate_of_opcode<G_ZEXT>;
def truncate_of_sext : truncate_of_opcode<G_SEXT>;
def truncate_of_anyext : truncate_of_opcode<G_ANYEXT>;

// Push cast through select.
class select_of_opcode<Instruction castOpcode> : GICombineRule <
(defs root:$root, build_fn_matchinfo:$matchinfo),
(match (G_SELECT $select, $cond, $true, $false):$Select,
(castOpcode $root, $select):$Cast,
[{ return Helper.matchCastOfSelect(*${Cast}, *${Select}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${Cast}, ${matchinfo}); }])>;

def select_of_zext : select_of_opcode<G_ZEXT>;
def select_of_anyext : select_of_opcode<G_ANYEXT>;
def select_of_truncate : select_of_opcode<G_TRUNC>;

def cast_combines: GICombineGroup<[
truncate_of_zext,
truncate_of_sext,
truncate_of_anyext
truncate_of_anyext,
select_of_zext,
select_of_anyext,
select_of_truncate
]>;


Expand Down
10 changes: 10 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,16 @@ const TargetLowering &CombinerHelper::getTargetLowering() const {
return *Builder.getMF().getSubtarget().getTargetLowering();
}

const MachineFunction &CombinerHelper::getMachineFunction() const {
return Builder.getMF();
}

const DataLayout &CombinerHelper::getDataLayout() const {
return getMachineFunction().getDataLayout();
}

LLVMContext &CombinerHelper::getContext() const { return Builder.getContext(); }

/// \returns The little endian in-memory byte position of byte \p I in a
/// \p ByteWidth bytes wide type.
///
Expand Down
48 changes: 48 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,51 @@ bool CombinerHelper::matchTruncateOfExt(const MachineInstr &Root,

return false;
}

bool CombinerHelper::isCastFree(unsigned Opcode, LLT ToTy, LLT FromTy) const {
const TargetLowering &TLI = getTargetLowering();
const DataLayout &DL = getDataLayout();
LLVMContext &Ctx = getContext();

switch (Opcode) {
case TargetOpcode::G_ANYEXT:
case TargetOpcode::G_ZEXT:
return TLI.isZExtFree(FromTy, ToTy, DL, Ctx);
case TargetOpcode::G_TRUNC:
return TLI.isTruncateFree(FromTy, ToTy, DL, Ctx);
default:
return false;
}
}

bool CombinerHelper::matchCastOfSelect(const MachineInstr &CastMI,
const MachineInstr &SelectMI,
BuildFnTy &MatchInfo) {
const GExtOrTruncOp *Cast = cast<GExtOrTruncOp>(&CastMI);
const GSelect *Select = cast<GSelect>(&SelectMI);

if (!MRI.hasOneNonDBGUse(Select->getReg(0)))
return false;

Register Dst = Cast->getReg(0);
LLT DstTy = MRI.getType(Dst);
LLT CondTy = MRI.getType(Select->getCondReg());
Register TrueReg = Select->getTrueReg();
Register FalseReg = Select->getFalseReg();
LLT SrcTy = MRI.getType(TrueReg);
Register Cond = Select->getCondReg();

if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SELECT, {DstTy, CondTy}}))
return false;

if (!isCastFree(Cast->getOpcode(), DstTy, SrcTy))
return false;

MatchInfo = [=](MachineIRBuilder &B) {
auto True = B.buildInstr(Cast->getOpcode(), {DstTy}, {TrueReg});
auto False = B.buildInstr(Cast->getOpcode(), {DstTy}, {FalseReg});
B.buildSelect(Dst, Cond, True, False);
};

return true;
}
131 changes: 131 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/combine-cast.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s | FileCheck %s --check-prefixes=CHECK,CHECK-PRE
# RUN: llc -o - -mtriple=aarch64-unknown-unknown -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s | FileCheck %s --check-prefixes=CHECK,CHECK-POST

---
name: test_combine_trunc_select
legalized: true
body: |
bb.1:
; CHECK-PRE-LABEL: name: test_combine_trunc_select
; CHECK-PRE: %cond:_(s32) = COPY $w0
; CHECK-PRE-NEXT: %lhs:_(s64) = COPY $x0
; CHECK-PRE-NEXT: %rhs:_(s64) = COPY $x0
; CHECK-PRE-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC %lhs(s64)
; CHECK-PRE-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC %rhs(s64)
; CHECK-PRE-NEXT: %small:_(s32) = G_SELECT %cond(s32), [[TRUNC]], [[TRUNC1]]
; CHECK-PRE-NEXT: $w0 = COPY %small(s32)
;
; CHECK-POST-LABEL: name: test_combine_trunc_select
; CHECK-POST: %cond:_(s32) = COPY $w0
; CHECK-POST-NEXT: %lhs:_(s64) = COPY $x0
; CHECK-POST-NEXT: %rhs:_(s64) = COPY $x0
; CHECK-POST-NEXT: %res:_(s64) = G_SELECT %cond(s32), %lhs, %rhs
; CHECK-POST-NEXT: %small:_(s32) = G_TRUNC %res(s64)
; CHECK-POST-NEXT: $w0 = COPY %small(s32)
%cond:_(s32) = COPY $w0
%lhs:_(s64) = COPY $x0
%rhs:_(s64) = COPY $x0
%res:_(s64) = G_SELECT %cond(s32), %lhs, %rhs
%small:_(s32) = G_TRUNC %res(s64)
$w0 = COPY %small(s32)
...
---
name: test_combine_zext_select
legalized: true
body: |
bb.1:
; CHECK-PRE-LABEL: name: test_combine_zext_select
; CHECK-PRE: %cond:_(s32) = COPY $w0
; CHECK-PRE-NEXT: %lhs:_(s32) = COPY $w0
; CHECK-PRE-NEXT: %rhs:_(s32) = COPY $w0
; CHECK-PRE-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT %lhs(s32)
; CHECK-PRE-NEXT: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT %rhs(s32)
; CHECK-PRE-NEXT: %big:_(s64) = G_SELECT %cond(s32), [[ZEXT]], [[ZEXT1]]
; CHECK-PRE-NEXT: $x0 = COPY %big(s64)
;
; CHECK-POST-LABEL: name: test_combine_zext_select
; CHECK-POST: %cond:_(s32) = COPY $w0
; CHECK-POST-NEXT: %lhs:_(s32) = COPY $w0
; CHECK-POST-NEXT: %rhs:_(s32) = COPY $w0
; CHECK-POST-NEXT: %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
; CHECK-POST-NEXT: %big:_(s64) = G_ZEXT %res(s32)
; CHECK-POST-NEXT: $x0 = COPY %big(s64)
%cond:_(s32) = COPY $w0
%lhs:_(s32) = COPY $w0
%rhs:_(s32) = COPY $w0
%res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
%big:_(s64) = G_ZEXT %res(s32)
$x0 = COPY %big(s64)
...
---
name: test_combine_anyzext_select
legalized: true
body: |
bb.1:
; CHECK-PRE-LABEL: name: test_combine_anyzext_select
; CHECK-PRE: %cond:_(s32) = COPY $w0
; CHECK-PRE-NEXT: %lhs:_(s32) = COPY $w0
; CHECK-PRE-NEXT: %rhs:_(s32) = COPY $w0
; CHECK-PRE-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %lhs(s32)
; CHECK-PRE-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT %rhs(s32)
; CHECK-PRE-NEXT: %big:_(s64) = G_SELECT %cond(s32), [[ANYEXT]], [[ANYEXT1]]
; CHECK-PRE-NEXT: $x0 = COPY %big(s64)
;
; CHECK-POST-LABEL: name: test_combine_anyzext_select
; CHECK-POST: %cond:_(s32) = COPY $w0
; CHECK-POST-NEXT: %lhs:_(s32) = COPY $w0
; CHECK-POST-NEXT: %rhs:_(s32) = COPY $w0
; CHECK-POST-NEXT: %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
; CHECK-POST-NEXT: %big:_(s64) = G_ANYEXT %res(s32)
; CHECK-POST-NEXT: $x0 = COPY %big(s64)
%cond:_(s32) = COPY $w0
%lhs:_(s32) = COPY $w0
%rhs:_(s32) = COPY $w0
%res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
%big:_(s64) = G_ANYEXT %res(s32)
$x0 = COPY %big(s64)
...
---
name: test_combine_anyzext_select_multi_use
legalized: true
body: |
bb.1:
; CHECK-LABEL: name: test_combine_anyzext_select_multi_use
; CHECK: %cond:_(s32) = COPY $w0
; CHECK-NEXT: %lhs:_(s32) = COPY $w0
; CHECK-NEXT: %rhs:_(s32) = COPY $w0
; CHECK-NEXT: %res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
; CHECK-NEXT: %big:_(s64) = G_ANYEXT %res(s32)
; CHECK-NEXT: $x0 = COPY %big(s64)
; CHECK-NEXT: $w0 = COPY %res(s32)
%cond:_(s32) = COPY $w0
%lhs:_(s32) = COPY $w0
%rhs:_(s32) = COPY $w0
%res:_(s32) = G_SELECT %cond(s32), %lhs, %rhs
%big:_(s64) = G_ANYEXT %res(s32)
$x0 = COPY %big(s64)
$w0 = COPY %res(s32)
...
---
name: test_combine_trunc_select_vector_out_of_budget
legalized: true
body: |
bb.1:
; CHECK-LABEL: name: test_combine_trunc_select_vector_out_of_budget
; CHECK: %cond:_(<2 x s32>) = COPY $x0
; CHECK-NEXT: %arg1:_(s64) = COPY $x0
; CHECK-NEXT: %arg2:_(s64) = COPY $x0
; CHECK-NEXT: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
; CHECK-NEXT: %bv2:_(<2 x s64>) = G_BUILD_VECTOR %arg2(s64), %arg1(s64)
; CHECK-NEXT: %res:_(<2 x s64>) = G_SELECT %cond(<2 x s32>), %bv, %bv2
; CHECK-NEXT: %small:_(<2 x s32>) = G_TRUNC %res(<2 x s64>)
; CHECK-NEXT: $x0 = COPY %small(<2 x s32>)
%cond:_(<2 x s32>) = COPY $x0
%arg1:_(s64) = COPY $x0
%arg2:_(s64) = COPY $x0
%bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64)
%bv2:_(<2 x s64>) = G_BUILD_VECTOR %arg2(s64), %arg1(s64)
%res:_(<2 x s64>) = G_SELECT %cond(<2 x s32>), %bv, %bv2
%small:_(<2 x s32>) = G_TRUNC %res(<2 x s64>)
$x0 = COPY %small(<2 x s32>)
38 changes: 18 additions & 20 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/ashr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1845,39 +1845,37 @@ define amdgpu_ps i65 @s_ashr_i65(i65 inreg %value, i65 inreg %amount) {
; GCN-NEXT: s_lshr_b64 s[2:3], s[0:1], s3
; GCN-NEXT: s_lshl_b64 s[8:9], s[4:5], s8
; GCN-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
; GCN-NEXT: s_ashr_i32 s8, s5, 31
; GCN-NEXT: s_ashr_i32 s7, s5, 31
; GCN-NEXT: s_ashr_i64 s[4:5], s[4:5], s10
; GCN-NEXT: s_cmp_lg_u32 s11, 0
; GCN-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
; GCN-NEXT: s_cmp_lg_u32 s12, 0
; GCN-NEXT: s_mov_b32 s9, s8
; GCN-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
; GCN-NEXT: s_cmp_lg_u32 s11, 0
; GCN-NEXT: s_cselect_b64 s[2:3], s[6:7], s[8:9]
; GCN-NEXT: s_cselect_b32 s2, s6, s7
; GCN-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: s_ashr_i65:
; GFX10PLUS: ; %bb.0:
; GFX10PLUS-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x10000
; GFX10PLUS-NEXT: s_sub_i32 s12, s3, 64
; GFX10PLUS-NEXT: s_sub_i32 s8, 64, s3
; GFX10PLUS-NEXT: s_sub_i32 s10, s3, 64
; GFX10PLUS-NEXT: s_sub_i32 s2, 64, s3
; GFX10PLUS-NEXT: s_cmp_lt_u32 s3, 64
; GFX10PLUS-NEXT: s_cselect_b32 s13, 1, 0
; GFX10PLUS-NEXT: s_cselect_b32 s11, 1, 0
; GFX10PLUS-NEXT: s_cmp_eq_u32 s3, 0
; GFX10PLUS-NEXT: s_cselect_b32 s14, 1, 0
; GFX10PLUS-NEXT: s_ashr_i64 s[6:7], s[4:5], s3
; GFX10PLUS-NEXT: s_lshr_b64 s[2:3], s[0:1], s3
; GFX10PLUS-NEXT: s_lshl_b64 s[8:9], s[4:5], s8
; GFX10PLUS-NEXT: s_ashr_i32 s10, s5, 31
; GFX10PLUS-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9]
; GFX10PLUS-NEXT: s_ashr_i64 s[4:5], s[4:5], s12
; GFX10PLUS-NEXT: s_cmp_lg_u32 s13, 0
; GFX10PLUS-NEXT: s_mov_b32 s11, s10
; GFX10PLUS-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
; GFX10PLUS-NEXT: s_cmp_lg_u32 s14, 0
; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
; GFX10PLUS-NEXT: s_cmp_lg_u32 s13, 0
; GFX10PLUS-NEXT: s_cselect_b64 s[2:3], s[6:7], s[10:11]
; GFX10PLUS-NEXT: s_cselect_b32 s12, 1, 0
; GFX10PLUS-NEXT: s_lshr_b64 s[6:7], s[0:1], s3
; GFX10PLUS-NEXT: s_lshl_b64 s[8:9], s[4:5], s2
; GFX10PLUS-NEXT: s_ashr_i64 s[2:3], s[4:5], s3
; GFX10PLUS-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9]
; GFX10PLUS-NEXT: s_ashr_i32 s3, s5, 31
; GFX10PLUS-NEXT: s_ashr_i64 s[4:5], s[4:5], s10
; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0
; GFX10PLUS-NEXT: s_cselect_b64 s[4:5], s[6:7], s[4:5]
; GFX10PLUS-NEXT: s_cmp_lg_u32 s12, 0
; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5]
; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0
; GFX10PLUS-NEXT: s_cselect_b32 s2, s2, s3
; GFX10PLUS-NEXT: ; return to shader part epilog
%result = ashr i65 %value, %amount
ret i65 %result
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/lshr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1766,7 +1766,7 @@ define amdgpu_ps i65 @s_lshr_i65(i65 inreg %value, i65 inreg %amount) {
; GCN-NEXT: s_cmp_lg_u32 s12, 0
; GCN-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3]
; GCN-NEXT: s_cmp_lg_u32 s11, 0
; GCN-NEXT: s_cselect_b64 s[2:3], s[6:7], 0
; GCN-NEXT: s_cselect_b32 s2, s6, 0
; GCN-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: s_lshr_i65:
Expand All @@ -1788,7 +1788,7 @@ define amdgpu_ps i65 @s_lshr_i65(i65 inreg %value, i65 inreg %amount) {
; GFX10PLUS-NEXT: s_cmp_lg_u32 s12, 0
; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5]
; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0
; GFX10PLUS-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
; GFX10PLUS-NEXT: s_cselect_b32 s2, s2, 0
; GFX10PLUS-NEXT: ; return to shader part epilog
%result = lshr i65 %value, %amount
ret i65 %result
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/shl.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1733,9 +1733,9 @@ define amdgpu_ps i65 @s_shl_i65(i65 inreg %value, i65 inreg %amount) {
; GCN-NEXT: s_lshl_b64 s[8:9], s[0:1], s10
; GCN-NEXT: s_cmp_lg_u32 s11, 0
; GCN-NEXT: s_cselect_b64 s[0:1], s[4:5], 0
; GCN-NEXT: s_cselect_b64 s[4:5], s[6:7], s[8:9]
; GCN-NEXT: s_cselect_b32 s3, s6, s8
; GCN-NEXT: s_cmp_lg_u32 s12, 0
; GCN-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
; GCN-NEXT: s_cselect_b32 s2, s2, s3
; GCN-NEXT: ; return to shader part epilog
;
; GFX10PLUS-LABEL: s_shl_i65:
Expand All @@ -1753,9 +1753,9 @@ define amdgpu_ps i65 @s_shl_i65(i65 inreg %value, i65 inreg %amount) {
; GFX10PLUS-NEXT: s_lshl_b64 s[6:7], s[0:1], s10
; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0
; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[8:9], 0
; GFX10PLUS-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7]
; GFX10PLUS-NEXT: s_cselect_b32 s3, s4, s6
; GFX10PLUS-NEXT: s_cmp_lg_u32 s12, 0
; GFX10PLUS-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5]
; GFX10PLUS-NEXT: s_cselect_b32 s2, s2, s3
; GFX10PLUS-NEXT: ; return to shader part epilog
%result = shl i65 %value, %amount
ret i65 %result
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/ctlz.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1593,7 +1593,7 @@ define amdgpu_kernel void @v_ctlz_i32_sel_ne_bitwidth(ptr addrspace(1) noalias %
; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
; GFX10-GISEL-NEXT: v_min_u32_e32 v1, 32, v1
; GFX10-GISEL-NEXT: v_subrev_nc_u32_e32 v1, 24, v1
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, -1, vcc_lo
; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, v1, 0xffff, vcc_lo
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX10-GISEL-NEXT: global_store_byte v1, v0, s[4:5]
; GFX10-GISEL-NEXT: s_endpgm
Expand Down
Loading
Loading