Skip to content

AMDGPU: Materialize bitwise not of inline immediates #95960

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 41 additions & 17 deletions llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ class SIShrinkInstructions : public MachineFunctionPass {
bool isKImmOperand(const MachineOperand &Src) const;
bool isKUImmOperand(const MachineOperand &Src) const;
bool isKImmOrKUImmOperand(const MachineOperand &Src, bool &IsUnsigned) const;
bool isReverseInlineImm(const MachineOperand &Src, int32_t &ReverseImm) const;
void copyExtraImplicitOps(MachineInstr &NewMI, MachineInstr &MI) const;
void shrinkScalarCompare(MachineInstr &MI) const;
void shrinkMIMG(MachineInstr &MI) const;
Expand Down Expand Up @@ -183,15 +182,36 @@ bool SIShrinkInstructions::isKImmOrKUImmOperand(const MachineOperand &Src,
return false;
}

/// \returns true if the constant in \p Src should be replaced with a bitreverse
/// of an inline immediate.
bool SIShrinkInstructions::isReverseInlineImm(const MachineOperand &Src,
int32_t &ReverseImm) const {
if (!isInt<32>(Src.getImm()) || TII->isInlineConstant(Src))
return false;
/// \returns the opcode of an instruction a move immediate of the constant \p
/// Src can be replaced with if the constant is replaced with \p ModifiedImm.
/// i.e.
///
/// If the bitreverse of a constant is an inline immediate, reverse the
/// immediate and return the bitreverse opcode.
///
/// If the bitwise negation of a constant is an inline immediate, reverse the
/// immediate and return the bitwise not opcode.
static unsigned canModifyToInlineImmOp32(const SIInstrInfo *TII,
const MachineOperand &Src,
int32_t &ModifiedImm, bool Scalar) {
if (TII->isInlineConstant(Src))
return 0;
int32_t SrcImm = static_cast<int32_t>(Src.getImm());

if (!Scalar) {
// We could handle the scalar case with here, but we would need to check
// that SCC is not live as S_NOT_B32 clobbers it. It's probably not worth
// it, as the reasonable values are already covered by s_movk_i32.
ModifiedImm = ~SrcImm;
if (TII->isInlineConstant(APInt(32, ModifiedImm)))
return AMDGPU::V_NOT_B32_e32;
}

ModifiedImm = reverseBits<int32_t>(SrcImm);
if (TII->isInlineConstant(APInt(32, ModifiedImm)))
return Scalar ? AMDGPU::S_BREV_B32 : AMDGPU::V_BFREV_B32_e32;

ReverseImm = reverseBits<int32_t>(static_cast<int32_t>(Src.getImm()));
return ReverseImm >= -16 && ReverseImm <= 64;
return 0;
}

/// Copy implicit register operands from specified instruction to this
Expand Down Expand Up @@ -801,10 +821,12 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
// XXX - not exactly a check for post-regalloc run.
MachineOperand &Src = MI.getOperand(1);
if (Src.isImm() && MI.getOperand(0).getReg().isPhysical()) {
int32_t ReverseImm;
if (isReverseInlineImm(Src, ReverseImm)) {
MI.setDesc(TII->get(AMDGPU::V_BFREV_B32_e32));
Src.setImm(ReverseImm);
int32_t ModImm;
unsigned ModOpcode =
canModifyToInlineImmOp32(TII, Src, ModImm, /*Scalar=*/false);
if (ModOpcode != 0) {
MI.setDesc(TII->get(ModOpcode));
Src.setImm(static_cast<int64_t>(ModImm));
continue;
}
}
Expand Down Expand Up @@ -863,13 +885,15 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
MachineOperand &Src = MI.getOperand(1);

if (Src.isImm() && Dst.getReg().isPhysical()) {
int32_t ReverseImm;
unsigned ModOpc;
int32_t ModImm;
if (isKImmOperand(Src)) {
MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
Src.setImm(SignExtend64(Src.getImm(), 32));
} else if (isReverseInlineImm(Src, ReverseImm)) {
MI.setDesc(TII->get(AMDGPU::S_BREV_B32));
Src.setImm(ReverseImm);
} else if ((ModOpc = canModifyToInlineImmOp32(TII, Src, ModImm,
/*Scalar=*/true))) {
MI.setDesc(TII->get(ModOpc));
Src.setImm(static_cast<int64_t>(ModImm));
}
}

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_splat(<2 x i16> %a) {
; GFX8-LABEL: v_add_v2i16_neg_inline_imm_splat:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffc0
; GFX8-NEXT: v_not_b32_e32 v1, 63
; GFX8-NEXT: v_add_u16_e32 v2, 0xffc0, v0
; GFX8-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
Expand Down Expand Up @@ -244,7 +244,7 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_hi(<2 x i16> %a) {
; GFX8-LABEL: v_add_v2i16_neg_inline_imm_hi:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffc0
; GFX8-NEXT: v_not_b32_e32 v1, 63
; GFX8-NEXT: v_add_u16_e32 v2, 4, v0
; GFX8-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1486,7 +1486,7 @@ define amdgpu_ps i24 @s_fshl_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
; GFX6: ; %bb.0:
; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0
; GFX6-NEXT: v_mov_b32_e32 v1, 0xffffffe8
; GFX6-NEXT: v_not_b32_e32 v1, 23
; GFX6-NEXT: s_and_b32 s2, s2, 0xffffff
; GFX6-NEXT: s_bfe_u32 s1, s1, 0x170001
; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
Expand Down Expand Up @@ -1516,7 +1516,7 @@ define amdgpu_ps i24 @s_fshl_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
; GFX8: ; %bb.0:
; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0
; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffe8
; GFX8-NEXT: v_not_b32_e32 v1, 23
; GFX8-NEXT: s_and_b32 s2, s2, 0xffffff
; GFX8-NEXT: s_bfe_u32 s1, s1, 0x170001
; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
Expand Down Expand Up @@ -1546,7 +1546,7 @@ define amdgpu_ps i24 @s_fshl_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
; GFX9: ; %bb.0:
; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0
; GFX9-NEXT: v_mov_b32_e32 v1, 0xffffffe8
; GFX9-NEXT: v_not_b32_e32 v1, 23
; GFX9-NEXT: s_and_b32 s2, s2, 0xffffff
; GFX9-NEXT: s_bfe_u32 s1, s1, 0x170001
; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
Expand Down Expand Up @@ -1646,7 +1646,7 @@ define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) {
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v3, 24
; GFX6-NEXT: v_rcp_iflag_f32_e32 v3, v3
; GFX6-NEXT: v_mov_b32_e32 v4, 0xffffffe8
; GFX6-NEXT: v_not_b32_e32 v4, 23
; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
; GFX6-NEXT: v_bfe_u32 v1, v1, 1, 23
; GFX6-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
Expand Down Expand Up @@ -1676,7 +1676,7 @@ define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) {
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v3, 24
; GFX8-NEXT: v_rcp_iflag_f32_e32 v3, v3
; GFX8-NEXT: v_mov_b32_e32 v4, 0xffffffe8
; GFX8-NEXT: v_not_b32_e32 v4, 23
; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2
; GFX8-NEXT: v_bfe_u32 v1, v1, 1, 23
; GFX8-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
Expand Down Expand Up @@ -1706,7 +1706,7 @@ define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v3, 24
; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3
; GFX9-NEXT: v_mov_b32_e32 v4, 0xffffffe8
; GFX9-NEXT: v_not_b32_e32 v4, 23
; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2
; GFX9-NEXT: v_bfe_u32 v1, v1, 1, 23
; GFX9-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
Expand Down Expand Up @@ -1822,7 +1822,7 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX6-NEXT: s_lshl_b32 s0, s0, 16
; GFX6-NEXT: v_mov_b32_e32 v3, 0xffffffe8
; GFX6-NEXT: v_not_b32_e32 v3, 23
; GFX6-NEXT: s_or_b32 s6, s8, s6
; GFX6-NEXT: v_or_b32_e32 v0, s0, v0
; GFX6-NEXT: s_lshr_b32 s0, s2, 16
Expand Down Expand Up @@ -1959,7 +1959,7 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX8-NEXT: s_or_b32 s2, s2, s6
; GFX8-NEXT: s_lshl_b32 s3, s3, 8
; GFX8-NEXT: s_and_b32 s6, s9, 0xff
; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffe8
; GFX8-NEXT: v_not_b32_e32 v1, 23
; GFX8-NEXT: s_or_b32 s3, s8, s3
; GFX8-NEXT: s_and_b32 s6, 0xffff, s6
; GFX8-NEXT: v_mul_lo_u32 v1, v0, v1
Expand Down Expand Up @@ -2079,7 +2079,7 @@ define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX9-NEXT: s_or_b32 s2, s2, s6
; GFX9-NEXT: s_lshl_b32 s3, s3, 8
; GFX9-NEXT: s_and_b32 s6, s9, 0xff
; GFX9-NEXT: v_mov_b32_e32 v1, 0xffffffe8
; GFX9-NEXT: v_not_b32_e32 v1, 23
; GFX9-NEXT: s_or_b32 s3, s8, s3
; GFX9-NEXT: s_and_b32 s6, 0xffff, s6
; GFX9-NEXT: v_mul_lo_u32 v1, v0, v1
Expand Down Expand Up @@ -2414,7 +2414,7 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v6, 24
; GFX6-NEXT: v_rcp_iflag_f32_e32 v6, v6
; GFX6-NEXT: v_mov_b32_e32 v7, 0xffffffe8
; GFX6-NEXT: v_not_b32_e32 v7, 23
; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4
; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v5
; GFX6-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
Expand Down Expand Up @@ -2461,7 +2461,7 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v6, 24
; GFX8-NEXT: v_rcp_iflag_f32_e32 v6, v6
; GFX8-NEXT: v_mov_b32_e32 v7, 0xffffffe8
; GFX8-NEXT: v_not_b32_e32 v7, 23
; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4
; GFX8-NEXT: v_and_b32_e32 v5, 0xffffff, v5
; GFX8-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
Expand Down Expand Up @@ -2508,7 +2508,7 @@ define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v6, 24
; GFX9-NEXT: v_rcp_iflag_f32_e32 v6, v6
; GFX9-NEXT: v_mov_b32_e32 v7, 0xffffffe8
; GFX9-NEXT: v_not_b32_e32 v7, 23
; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4
; GFX9-NEXT: v_and_b32_e32 v5, 0xffffff, v5
; GFX9-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1487,7 +1487,7 @@ define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
; GFX6: ; %bb.0:
; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0
; GFX6-NEXT: v_mov_b32_e32 v1, 0xffffffe8
; GFX6-NEXT: v_not_b32_e32 v1, 23
; GFX6-NEXT: s_and_b32 s2, s2, 0xffffff
; GFX6-NEXT: s_lshl_b32 s0, s0, 1
; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
Expand Down Expand Up @@ -1518,7 +1518,7 @@ define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
; GFX8: ; %bb.0:
; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0
; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffe8
; GFX8-NEXT: v_not_b32_e32 v1, 23
; GFX8-NEXT: s_and_b32 s2, s2, 0xffffff
; GFX8-NEXT: s_lshl_b32 s0, s0, 1
; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
Expand Down Expand Up @@ -1549,7 +1549,7 @@ define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt)
; GFX9: ; %bb.0:
; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 24
; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0
; GFX9-NEXT: v_mov_b32_e32 v1, 0xffffffe8
; GFX9-NEXT: v_not_b32_e32 v1, 23
; GFX9-NEXT: s_and_b32 s2, s2, 0xffffff
; GFX9-NEXT: s_and_b32 s1, s1, 0xffffff
; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
Expand Down Expand Up @@ -1652,7 +1652,7 @@ define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) {
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v3, 24
; GFX6-NEXT: v_rcp_iflag_f32_e32 v3, v3
; GFX6-NEXT: v_mov_b32_e32 v4, 0xffffffe8
; GFX6-NEXT: v_not_b32_e32 v4, 23
; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2
; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX6-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
Expand Down Expand Up @@ -1683,7 +1683,7 @@ define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) {
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v3, 24
; GFX8-NEXT: v_rcp_iflag_f32_e32 v3, v3
; GFX8-NEXT: v_mov_b32_e32 v4, 0xffffffe8
; GFX8-NEXT: v_not_b32_e32 v4, 23
; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX8-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
Expand Down Expand Up @@ -1714,7 +1714,7 @@ define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v3, 24
; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3
; GFX9-NEXT: v_mov_b32_e32 v4, 0xffffffe8
; GFX9-NEXT: v_not_b32_e32 v4, 23
; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2
; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff, v1
; GFX9-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
Expand Down Expand Up @@ -1820,7 +1820,7 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX6-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; GFX6-NEXT: v_cvt_u32_f32_e32 v2, v2
; GFX6-NEXT: v_mov_b32_e32 v0, s0
; GFX6-NEXT: v_mov_b32_e32 v3, 0xffffffe8
; GFX6-NEXT: v_not_b32_e32 v3, 23
; GFX6-NEXT: s_lshr_b32 s6, s0, 16
; GFX6-NEXT: s_and_b32 s8, s0, 0xff
; GFX6-NEXT: s_lshl_b32 s9, s9, 8
Expand Down Expand Up @@ -1962,7 +1962,7 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX8-NEXT: s_or_b32 s2, s2, s8
; GFX8-NEXT: s_lshl_b32 s3, s3, 8
; GFX8-NEXT: s_and_b32 s8, s11, 0xff
; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffe8
; GFX8-NEXT: v_not_b32_e32 v1, 23
; GFX8-NEXT: s_or_b32 s3, s10, s3
; GFX8-NEXT: s_and_b32 s8, 0xffff, s8
; GFX8-NEXT: v_mul_lo_u32 v1, v0, v1
Expand Down Expand Up @@ -2082,7 +2082,7 @@ define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 i
; GFX9-NEXT: s_or_b32 s2, s2, s8
; GFX9-NEXT: s_lshl_b32 s3, s3, 8
; GFX9-NEXT: s_and_b32 s8, s11, 0xff
; GFX9-NEXT: v_mov_b32_e32 v1, 0xffffffe8
; GFX9-NEXT: v_not_b32_e32 v1, 23
; GFX9-NEXT: s_or_b32 s3, s10, s3
; GFX9-NEXT: s_and_b32 s8, 0xffff, s8
; GFX9-NEXT: v_mul_lo_u32 v1, v0, v1
Expand Down Expand Up @@ -2424,7 +2424,7 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v6, 24
; GFX6-NEXT: v_rcp_iflag_f32_e32 v6, v6
; GFX6-NEXT: v_mov_b32_e32 v7, 0xffffffe8
; GFX6-NEXT: v_not_b32_e32 v7, 23
; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4
; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v5
; GFX6-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
Expand Down Expand Up @@ -2473,7 +2473,7 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v6, 24
; GFX8-NEXT: v_rcp_iflag_f32_e32 v6, v6
; GFX8-NEXT: v_mov_b32_e32 v7, 0xffffffe8
; GFX8-NEXT: v_not_b32_e32 v7, 23
; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4
; GFX8-NEXT: v_and_b32_e32 v5, 0xffffff, v5
; GFX8-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
Expand Down Expand Up @@ -2522,7 +2522,7 @@ define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) {
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v6, 24
; GFX9-NEXT: v_rcp_iflag_f32_e32 v6, v6
; GFX9-NEXT: v_mov_b32_e32 v7, 0xffffffe8
; GFX9-NEXT: v_not_b32_e32 v7, 23
; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4
; GFX9-NEXT: v_and_b32_e32 v5, 0xffffff, v5
; GFX9-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -865,7 +865,7 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_nsa_reassign(ptr %p_ray, <4
; GFX1030-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1030-NEXT: flat_load_dword v2, v[0:1]
; GFX1030-NEXT: v_mov_b32_e32 v0, 0xb36211c7
; GFX1030-NEXT: v_mov_b32_e32 v1, 0x102
; GFX1030-NEXT: v_bfrev_b32_e32 v1, 4.0
; GFX1030-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[0:11], s[0:3]
; GFX1030-NEXT: s_waitcnt vmcnt(0)
Expand Down Expand Up @@ -894,7 +894,7 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_nsa_reassign(ptr %p_ray, <4
; GFX1013-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1013-NEXT: flat_load_dword v2, v[0:1]
; GFX1013-NEXT: v_mov_b32_e32 v0, 0xb36211c7
; GFX1013-NEXT: v_mov_b32_e32 v1, 0x102
; GFX1013-NEXT: v_bfrev_b32_e32 v1, 4.0
; GFX1013-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1013-NEXT: image_bvh64_intersect_ray v[0:3], v[0:11], s[4:7]
; GFX1013-NEXT: s_waitcnt vmcnt(0)
Expand Down Expand Up @@ -973,7 +973,7 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(ptr %p_ray
; GFX1030-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1030-NEXT: flat_load_dword v2, v[0:1]
; GFX1030-NEXT: v_mov_b32_e32 v0, 0xb36211c6
; GFX1030-NEXT: v_mov_b32_e32 v1, 0x102
; GFX1030-NEXT: v_bfrev_b32_e32 v1, 4.0
; GFX1030-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1030-NEXT: image_bvh64_intersect_ray v[0:3], v[0:8], s[0:3] a16
; GFX1030-NEXT: s_waitcnt vmcnt(0)
Expand All @@ -999,7 +999,7 @@ define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(ptr %p_ray
; GFX1013-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
; GFX1013-NEXT: flat_load_dword v2, v[0:1]
; GFX1013-NEXT: v_mov_b32_e32 v0, 0xb36211c6
; GFX1013-NEXT: v_mov_b32_e32 v1, 0x102
; GFX1013-NEXT: v_bfrev_b32_e32 v1, 4.0
; GFX1013-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX1013-NEXT: image_bvh64_intersect_ray v[0:3], v[0:8], s[4:7] a16
; GFX1013-NEXT: s_waitcnt vmcnt(0)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/sub.v2i16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ define <2 x i16> @v_sub_v2i16_neg_inline_imm_splat(<2 x i16> %a) {
; GFX8-LABEL: v_sub_v2i16_neg_inline_imm_splat:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffc0
; GFX8-NEXT: v_not_b32_e32 v1, 63
; GFX8-NEXT: v_subrev_u16_e32 v2, 0xffc0, v0
; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
Expand Down Expand Up @@ -211,7 +211,7 @@ define <2 x i16> @v_sub_v2i16_neg_inline_imm_hi(<2 x i16> %a) {
; GFX8-LABEL: v_sub_v2i16_neg_inline_imm_hi:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffc0
; GFX8-NEXT: v_not_b32_e32 v1, 63
; GFX8-NEXT: v_subrev_u16_e32 v2, 4, v0
; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
; GFX8-NEXT: v_or_b32_e32 v0, v2, v0
Expand Down
Loading
Loading