Skip to content

[GISel] Fold shifts to constant result. #123510

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jan 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -840,8 +840,10 @@ class CombinerHelper {
bool matchRedundantBinOpInEquality(MachineInstr &MI,
BuildFnTy &MatchInfo) const;

/// Match shifts greater or equal to the bitwidth of the operation.
bool matchShiftsTooBig(MachineInstr &MI) const;
/// Match shifts greater or equal to the range (the bitwidth of the result
/// datatype, or the effective bitwidth of the source value).
bool matchShiftsTooBig(MachineInstr &MI,
std::optional<int64_t> &MatchInfo) const;

/// Match constant LHS ops that should be commuted.
bool matchCommuteConstantToRHS(MachineInstr &MI) const;
Expand Down
20 changes: 16 additions & 4 deletions llvm/include/llvm/Target/GlobalISel/Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -306,11 +306,23 @@ def ptr_add_immed_chain : GICombineRule<
[{ return Helper.matchPtrAddImmedChain(*${d}, ${matchinfo}); }]),
(apply [{ Helper.applyPtrAddImmedChain(*${d}, ${matchinfo}); }])>;

def shift_const_op : GICombinePatFrag<
(outs root:$dst), (ins),
!foreach(op,
[G_SHL, G_ASHR, G_LSHR],
(pattern (op $dst, $shifted, $amt)))>;
def shift_result_matchdata : GIDefMatchData<"std::optional<int64_t>">;
def shifts_too_big : GICombineRule<
(defs root:$root),
(match (wip_match_opcode G_SHL, G_ASHR, G_LSHR):$root,
[{ return Helper.matchShiftsTooBig(*${root}); }]),
(apply [{ Helper.replaceInstWithUndef(*${root}); }])>;
(defs root:$root, shift_result_matchdata:$matchinfo),
(match (shift_const_op $root):$mi,
[{ return Helper.matchShiftsTooBig(*${mi}, ${matchinfo}); }]),
(apply [{
if (${matchinfo}) {
Helper.replaceInstWithConstant(*${mi}, *${matchinfo});
} else {
Helper.replaceInstWithUndef(*${mi});
}
}])>;

// Fold shift (shift base x), y -> shift base, (x+y), if shifts are same
def shift_immed_matchdata : GIDefMatchData<"RegisterImmPair">;
Expand Down
49 changes: 47 additions & 2 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6590,12 +6590,57 @@ bool CombinerHelper::matchRedundantBinOpInEquality(MachineInstr &MI,
return CmpInst::isEquality(Pred) && Y.isValid();
}

bool CombinerHelper::matchShiftsTooBig(MachineInstr &MI) const {
/// Return the minimum useless shift amount that results in complete loss of the
/// source value. Return std::nullopt when it cannot determine a value.
static std::optional<unsigned>
getMinUselessShift(KnownBits ValueKB, unsigned Opcode,
std::optional<int64_t> &Result) {
assert(Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR ||
Opcode == TargetOpcode::G_ASHR && "Expect G_SHL, G_LSHR or G_ASHR.");
auto SignificantBits = 0;
switch (Opcode) {
case TargetOpcode::G_SHL:
SignificantBits = ValueKB.countMinTrailingZeros();
Result = 0;
break;
case TargetOpcode::G_LSHR:
Result = 0;
SignificantBits = ValueKB.countMinLeadingZeros();
break;
case TargetOpcode::G_ASHR:
if (ValueKB.isNonNegative()) {
SignificantBits = ValueKB.countMinLeadingZeros();
Result = 0;
} else if (ValueKB.isNegative()) {
SignificantBits = ValueKB.countMinLeadingOnes();
Result = -1;
} else {
// Cannot determine shift result.
Result = std::nullopt;
}
break;
Comment on lines +6610 to +6621
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be using computeKnownSignBits instead of computeKnownBits, it can be slightly smarter

default:
break;
}
return ValueKB.getBitWidth() - SignificantBits;
}

bool CombinerHelper::matchShiftsTooBig(
MachineInstr &MI, std::optional<int64_t> &MatchInfo) const {
Register ShiftVal = MI.getOperand(1).getReg();
Register ShiftReg = MI.getOperand(2).getReg();
LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
auto IsShiftTooBig = [&](const Constant *C) {
auto *CI = dyn_cast<ConstantInt>(C);
return CI && CI->uge(ResTy.getScalarSizeInBits());
if (!CI)
return false;
if (CI->uge(ResTy.getScalarSizeInBits())) {
MatchInfo = std::nullopt;
return true;
}
auto OptMaxUsefulShift = getMinUselessShift(KB->getKnownBits(ShiftVal),
MI.getOpcode(), MatchInfo);
return OptMaxUsefulShift && CI->uge(*OptMaxUsefulShift);
};
return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
}
Expand Down
208 changes: 208 additions & 0 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shifts.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -run-pass=amdgpu-prelegalizer-combiner %s -o - | FileCheck %s

---
name: combine_ashr
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr31

liveins: $vgpr0, $vgpr1

; CHECK-LABEL: name: combine_ashr
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr31, $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: G_STORE [[C]](s32), [[MV]](p0) :: (store (s32))
; CHECK-NEXT: SI_RETURN
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(p0) = G_MERGE_VALUES %0(s32), %1(s32)
%3:_(s32) = G_CONSTANT i32 10
%4:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x)
%5:_(s32) = G_ASHR %4, %3(s32)
G_STORE %5(s32), %2(p0) :: (store (s32))
SI_RETURN

...
---
name: combine_lshr
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr31

liveins: $vgpr0, $vgpr1

; CHECK-LABEL: name: combine_lshr
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr31, $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: G_STORE [[C]](s32), [[MV]](p0) :: (store (s32))
; CHECK-NEXT: SI_RETURN
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(p0) = G_MERGE_VALUES %0(s32), %1(s32)
%3:_(s32) = G_CONSTANT i32 10
%4:_(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.workitem.id.x)
%5:_(s32) = G_LSHR %4, %3(s32)
G_STORE %5(s32), %2(p0) :: (store (s32))
SI_RETURN

...
---
name: combine_shl
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr31

liveins: $vgpr0, $vgpr1

; CHECK-LABEL: name: combine_shl
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr31, $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: G_STORE [[C]](s32), [[MV]](p0) :: (store (s32))
; CHECK-NEXT: SI_RETURN
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(p0) = G_MERGE_VALUES %0(s32), %1(s32)
%3:_(s32) = G_CONSTANT i32 16
%4:_(s32) = G_CONSTANT i32 4294901760
%5:_(s32) = G_SHL %4, %3(s32)
G_STORE %5(s32), %2(p0) :: (store (s32))
SI_RETURN

...
---
name: combine_ashr2
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr31

liveins: $vgpr0, $vgpr1

; CHECK-LABEL: name: combine_ashr2
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr31, $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 -1
; CHECK-NEXT: G_STORE [[C]](s8), [[MV]](p0) :: (store (s8))
; CHECK-NEXT: SI_RETURN
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(p0) = G_MERGE_VALUES %0(s32), %1(s32)
%3:_(s32) = G_CONSTANT i32 1
%4:_(s8) = G_CONSTANT i8 -2
%5:_(s8) = G_ASHR %4, %3(s32)
G_STORE %5(s8), %2(p0) :: (store (s8))
SI_RETURN

...
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Test vector cases

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

test cases added

---
name: combine_vector_lshr
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr31

liveins: $vgpr0, $vgpr1

; CHECK-LABEL: name: combine_vector_lshr
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr31, $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: $vgpr0 = COPY [[C]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[C]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
%0:_(<2 x s32>) = G_IMPLICIT_DEF
%1:_(s32) = G_CONSTANT i32 511
%2:_(s32) = G_CONSTANT i32 0
%3:_(s32) = G_CONSTANT i32 1
%4:_(s32) = G_CONSTANT i32 9
%5:_(<2 x s32>) = G_BUILD_VECTOR %4(s32), %4(s32)
%6:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1(s32), %2(s32)
%7:_(<2 x s32>) = G_INSERT_VECTOR_ELT %6, %1(s32), %3(s32)
%8:_(<2 x s32>) = G_LSHR %7, %5(<2 x s32>)
%9:_(s32), %10:_(s32) = G_UNMERGE_VALUES %8(<2 x s32>)
$vgpr0 = COPY %9(s32)
$vgpr1 = COPY %10(s32)
SI_RETURN implicit $vgpr0, implicit $vgpr1

...
---
name: combine_vector_shl
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr31

liveins: $vgpr0, $vgpr1

; CHECK-LABEL: name: combine_vector_shl
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr31, $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: $vgpr0 = COPY [[C]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[C]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
%0:_(<2 x s32>) = G_IMPLICIT_DEF
%1:_(s32) = G_CONSTANT i32 4294901760
%2:_(s32) = G_CONSTANT i32 0
%3:_(s32) = G_CONSTANT i32 1
%4:_(s32) = G_CONSTANT i32 16
%5:_(<2 x s32>) = G_BUILD_VECTOR %4(s32), %4(s32)
%6:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1(s32), %2(s32)
%7:_(<2 x s32>) = G_INSERT_VECTOR_ELT %6, %1(s32), %3(s32)
%8:_(<2 x s32>) = G_SHL %7, %5(<2 x s32>)
%9:_(s32), %10:_(s32) = G_UNMERGE_VALUES %8(<2 x s32>)
$vgpr0 = COPY %9(s32)
$vgpr1 = COPY %10(s32)
SI_RETURN implicit $vgpr0, implicit $vgpr1

...
---
name: combine_vector_ashr
tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr31

liveins: $vgpr0, $vgpr1

; CHECK-LABEL: name: combine_vector_ashr
; CHECK: liveins: $vgpr0, $vgpr1, $vgpr31, $vgpr0, $vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
; CHECK-NEXT: $vgpr0 = COPY [[C]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[C]](s32)
; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
%0:_(<2 x s32>) = G_IMPLICIT_DEF
%1:_(s32) = G_CONSTANT i32 -1
%2:_(s32) = G_CONSTANT i32 0
%3:_(s32) = G_CONSTANT i32 1
%4:_(s32) = G_CONSTANT i32 1
%5:_(<2 x s32>) = G_BUILD_VECTOR %4(s32), %4(s32)
%6:_(<2 x s32>) = G_INSERT_VECTOR_ELT %0, %1(s32), %2(s32)
%7:_(<2 x s32>) = G_INSERT_VECTOR_ELT %6, %1(s32), %3(s32)
%8:_(<2 x s32>) = G_ASHR %7, %5(<2 x s32>)
%9:_(s32), %10:_(s32) = G_UNMERGE_VALUES %8(<2 x s32>)
$vgpr0 = COPY %9(s32)
$vgpr1 = COPY %10(s32)
SI_RETURN implicit $vgpr0, implicit $vgpr1

...
Original file line number Diff line number Diff line change
Expand Up @@ -374,23 +374,15 @@ body: |
; GFX6-LABEL: name: do_not_shl_v2s32_zero_by_16_from_zext_v2s16
; GFX6: liveins: $vgpr0
; GFX6-NEXT: {{ $}}
; GFX6-NEXT: %zero:_(s16) = G_CONSTANT i16 0
; GFX6-NEXT: %zerovector:_(<2 x s16>) = G_BUILD_VECTOR %zero(s16), %zero(s16)
; GFX6-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16
; GFX6-NEXT: %shiftamtvector:_(<2 x s16>) = G_BUILD_VECTOR %shiftamt(s16), %shiftamt(s16)
; GFX6-NEXT: %extend:_(<2 x s32>) = G_ZEXT %zerovector(<2 x s16>)
; GFX6-NEXT: %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvector(<2 x s16>)
; GFX6-NEXT: %6:_(s32) = G_CONSTANT i32 0
; GFX6-NEXT: %shl:_(<2 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>)
;
; GFX9-LABEL: name: do_not_shl_v2s32_zero_by_16_from_zext_v2s16
; GFX9: liveins: $vgpr0
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: %zero:_(s16) = G_CONSTANT i16 0
; GFX9-NEXT: %zerovector:_(<2 x s16>) = G_BUILD_VECTOR %zero(s16), %zero(s16)
; GFX9-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16
; GFX9-NEXT: %shiftamtvector:_(<2 x s16>) = G_BUILD_VECTOR %shiftamt(s16), %shiftamt(s16)
; GFX9-NEXT: %extend:_(<2 x s32>) = G_ZEXT %zerovector(<2 x s16>)
; GFX9-NEXT: %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvector(<2 x s16>)
; GFX9-NEXT: %6:_(s32) = G_CONSTANT i32 0
; GFX9-NEXT: %shl:_(<2 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>)
%zero:_(s16) = G_CONSTANT i16 0
%zerovector:_(<2 x s16>) = G_BUILD_VECTOR %zero, %zero:_(s16)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -246,23 +246,15 @@ body: |
; GFX6-LABEL: name: do_not_shl_v2s32_zero_by_16_from_zext_v2s16
; GFX6: liveins: $vgpr0, $vgpr1
; GFX6-NEXT: {{ $}}
; GFX6-NEXT: %zero:_(s16) = G_CONSTANT i16 0
; GFX6-NEXT: %zerovector:_(<2 x s16>) = G_BUILD_VECTOR %zero(s16), %zero(s16)
; GFX6-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16
; GFX6-NEXT: %shiftamtvector:_(<2 x s16>) = G_BUILD_VECTOR %shiftamt(s16), %shiftamt(s16)
; GFX6-NEXT: %extend:_(<2 x s32>) = G_ZEXT %zerovector(<2 x s16>)
; GFX6-NEXT: %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvector(<2 x s16>)
; GFX6-NEXT: %6:_(s32) = G_CONSTANT i32 0
; GFX6-NEXT: %shl:_(<2 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32)
; GFX6-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>)
;
; GFX9-LABEL: name: do_not_shl_v2s32_zero_by_16_from_zext_v2s16
; GFX9: liveins: $vgpr0, $vgpr1
; GFX9-NEXT: {{ $}}
; GFX9-NEXT: %zero:_(s16) = G_CONSTANT i16 0
; GFX9-NEXT: %zerovector:_(<2 x s16>) = G_BUILD_VECTOR %zero(s16), %zero(s16)
; GFX9-NEXT: %shiftamt:_(s16) = G_CONSTANT i16 16
; GFX9-NEXT: %shiftamtvector:_(<2 x s16>) = G_BUILD_VECTOR %shiftamt(s16), %shiftamt(s16)
; GFX9-NEXT: %extend:_(<2 x s32>) = G_ZEXT %zerovector(<2 x s16>)
; GFX9-NEXT: %shl:_(<2 x s32>) = G_SHL %extend, %shiftamtvector(<2 x s16>)
; GFX9-NEXT: %6:_(s32) = G_CONSTANT i32 0
; GFX9-NEXT: %shl:_(<2 x s32>) = G_BUILD_VECTOR %6(s32), %6(s32)
; GFX9-NEXT: $vgpr0_vgpr1 = COPY %shl(<2 x s32>)
%zero:_(s16) = G_CONSTANT i16 0
%zerovector:_(<2 x s16>) = G_BUILD_VECTOR %zero, %zero:_(s16)
Expand Down
16 changes: 6 additions & 10 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1434,13 +1434,11 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
; SI-LABEL: v_test_sitofp_i64_byte_to_f32:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_ffbh_i32_e32 v2, 0
; SI-NEXT: v_add_i32_e32 v2, vcc, -1, v2
; SI-NEXT: v_and_b32_e32 v0, 0xff, v0
; SI-NEXT: v_ashrrev_i32_e32 v2, 31, v0
; SI-NEXT: v_ffbh_i32_e32 v3, 0
; SI-NEXT: v_add_i32_e32 v2, vcc, 32, v2
; SI-NEXT: v_add_i32_e32 v3, vcc, -1, v3
; SI-NEXT: v_mov_b32_e32 v1, 0
; SI-NEXT: v_min_u32_e32 v2, v3, v2
; SI-NEXT: v_min_u32_e32 v2, 32, v2
; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], v2
; SI-NEXT: v_min_u32_e32 v0, 1, v0
; SI-NEXT: v_or_b32_e32 v0, v1, v0
Expand All @@ -1452,13 +1450,11 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
; VI-LABEL: v_test_sitofp_i64_byte_to_f32:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_ffbh_i32_e32 v2, 0
; VI-NEXT: v_add_u32_e32 v2, vcc, -1, v2
; VI-NEXT: v_and_b32_e32 v0, 0xff, v0
; VI-NEXT: v_ashrrev_i32_e32 v2, 31, v0
; VI-NEXT: v_ffbh_i32_e32 v3, 0
; VI-NEXT: v_add_u32_e32 v2, vcc, 32, v2
; VI-NEXT: v_add_u32_e32 v3, vcc, -1, v3
; VI-NEXT: v_mov_b32_e32 v1, 0
; VI-NEXT: v_min_u32_e32 v2, v3, v2
; VI-NEXT: v_min_u32_e32 v2, 32, v2
; VI-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1]
; VI-NEXT: v_min_u32_e32 v0, 1, v0
; VI-NEXT: v_or_b32_e32 v0, v1, v0
Expand Down
Loading
Loading