Skip to content

AMDGPU/GlobalISel: Do not try to form v_bitop3_b32 for SGPR results #117940

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3764,10 +3764,15 @@ bool AMDGPUInstructionSelector::selectBITOP3(MachineInstr &MI) const {
if (!Subtarget->hasBitOp3Insts())
return false;

Register DstReg = MI.getOperand(0).getReg();
const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
const bool IsVALU = DstRB->getID() == AMDGPU::VGPRRegBankID;
if (!IsVALU)
return false;

SmallVector<Register, 3> Src;
uint8_t TTbl;
unsigned NumOpcodes;
Register DstReg = MI.getOperand(0).getReg();

std::tie(NumOpcodes, TTbl) = BitOp3_Op(DstReg, Src, *MRI);

Expand All @@ -3776,13 +3781,10 @@ bool AMDGPUInstructionSelector::selectBITOP3(MachineInstr &MI) const {
if (NumOpcodes < 2 || Src.empty())
return false;

const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
const bool IsVALU = DstRB->getID() == AMDGPU::VGPRRegBankID;

// For a uniform case threshold should be higher to account for moves between
// VGPRs and SGPRs. It needs one operand in a VGPR, rest two can be in SGPRs
// and a readtfirstlane after.
if (NumOpcodes < 4 && !IsVALU)
if (NumOpcodes < 4)
return false;

bool IsB32 = MRI->getType(DstReg) == LLT::scalar(32);
Expand Down
160 changes: 114 additions & 46 deletions llvm/test/CodeGen/AMDGPU/bitop3.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -mcpu=gfx950 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950,GFX950-SDAG %s
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx950 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950,GFX950-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn-- -mcpu=gfx950 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950,GFX950-SDAG %s
; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=gfx950 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950,GFX950-GISEL %s

; ========= Single bit functions =========

Expand Down Expand Up @@ -45,10 +45,17 @@ define amdgpu_ps float @not_and_and_not_and(i32 %a, i32 %b, i32 %c) {
}

define amdgpu_ps float @not_and_and_and(i32 %a, i32 %b, i32 %c) {
; GCN-LABEL: not_and_and_and:
; GCN: ; %bb.0:
; GCN-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:8
; GCN-NEXT: ; return to shader part epilog
; GFX950-SDAG-LABEL: not_and_and_and:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:8
; GFX950-SDAG-NEXT: ; return to shader part epilog
;
; GFX950-GISEL-LABEL: not_and_and_and:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: v_not_b32_e32 v0, v0
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v2
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1
; GFX950-GISEL-NEXT: ; return to shader part epilog
%nota = xor i32 %a, -1
%and1 = and i32 %nota, %c
%and2 = and i32 %and1, %b
Expand All @@ -70,10 +77,17 @@ define amdgpu_ps float @and_not_and_not_and(i32 %a, i32 %b, i32 %c) {
}

define amdgpu_ps float @and_not_and_and(i32 %a, i32 %b, i32 %c) {
; GCN-LABEL: and_not_and_and:
; GCN: ; %bb.0:
; GCN-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x20
; GCN-NEXT: ; return to shader part epilog
; GFX950-SDAG-LABEL: and_not_and_and:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x20
; GFX950-SDAG-NEXT: ; return to shader part epilog
;
; GFX950-GISEL-LABEL: and_not_and_and:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: v_not_b32_e32 v1, v1
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v2
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1
; GFX950-GISEL-NEXT: ; return to shader part epilog
%notb = xor i32 %b, -1
%and1 = and i32 %a, %c
%and2 = and i32 %and1, %notb
Expand All @@ -82,10 +96,17 @@ define amdgpu_ps float @and_not_and_and(i32 %a, i32 %b, i32 %c) {
}

define amdgpu_ps float @and_and_not_and(i32 %a, i32 %b, i32 %c) {
; GCN-LABEL: and_and_not_and:
; GCN: ; %bb.0:
; GCN-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x40
; GCN-NEXT: ; return to shader part epilog
; GFX950-SDAG-LABEL: and_and_not_and:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x40
; GFX950-SDAG-NEXT: ; return to shader part epilog
;
; GFX950-GISEL-LABEL: and_and_not_and:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: v_not_b32_e32 v2, v2
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v2
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1
; GFX950-GISEL-NEXT: ; return to shader part epilog
%notc = xor i32 %c, -1
%and1 = and i32 %a, %notc
%and2 = and i32 %and1, %b
Expand All @@ -94,10 +115,16 @@ define amdgpu_ps float @and_and_not_and(i32 %a, i32 %b, i32 %c) {
}

define amdgpu_ps float @and_and_and(i32 %a, i32 %b, i32 %c) {
; GCN-LABEL: and_and_and:
; GCN: ; %bb.0:
; GCN-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x80
; GCN-NEXT: ; return to shader part epilog
; GFX950-SDAG-LABEL: and_and_and:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x80
; GFX950-SDAG-NEXT: ; return to shader part epilog
;
; GFX950-GISEL-LABEL: and_and_and:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v2
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1
; GFX950-GISEL-NEXT: ; return to shader part epilog
%and1 = and i32 %a, %c
%and2 = and i32 %and1, %b
%ret_cast = bitcast i32 %and2 to float
Expand All @@ -107,21 +134,34 @@ define amdgpu_ps float @and_and_and(i32 %a, i32 %b, i32 %c) {
; ========= Multi bit functions =========

define amdgpu_ps float @test_12(i32 %a, i32 %b) {
; GCN-LABEL: test_12:
; GCN: ; %bb.0:
; GCN-NEXT: v_bitop3_b32 v0, v0, v1, v0 bitop3:0xc
; GCN-NEXT: ; return to shader part epilog
; GFX950-SDAG-LABEL: test_12:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v0 bitop3:0xc
; GFX950-SDAG-NEXT: ; return to shader part epilog
;
; GFX950-GISEL-LABEL: test_12:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: v_not_b32_e32 v0, v0
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1
; GFX950-GISEL-NEXT: ; return to shader part epilog
%nota = xor i32 %a, -1
%and1 = and i32 %nota, %b
%ret_cast = bitcast i32 %and1 to float
ret float %ret_cast
}

define amdgpu_ps float @test_63(i32 %a, i32 %b) {
; GCN-LABEL: test_63:
; GCN: ; %bb.0:
; GCN-NEXT: v_bitop3_b32 v0, v0, v1, v0 bitop3:0x3f
; GCN-NEXT: ; return to shader part epilog
; GFX950-SDAG-LABEL: test_63:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v1, v0 bitop3:0x3f
; GFX950-SDAG-NEXT: ; return to shader part epilog
;
; GFX950-GISEL-LABEL: test_63:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: v_not_b32_e32 v0, v0
; GFX950-GISEL-NEXT: v_not_b32_e32 v1, v1
; GFX950-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
; GFX950-GISEL-NEXT: ; return to shader part epilog
%nota = xor i32 %a, -1
%notb = xor i32 %b, -1
%or = or i32 %nota, %notb
Expand All @@ -143,10 +183,17 @@ define amdgpu_ps float @test_59(i32 %a, i32 %b, i32 %c) {
}

define amdgpu_ps float @test_126(i32 %a, i32 %b, i32 %c) {
; GCN-LABEL: test_126:
; GCN: ; %bb.0:
; GCN-NEXT: v_bitop3_b32 v0, v0, v2, v1 bitop3:0x7e
; GCN-NEXT: ; return to shader part epilog
; GFX950-SDAG-LABEL: test_126:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v0, v2, v1 bitop3:0x7e
; GFX950-SDAG-NEXT: ; return to shader part epilog
;
; GFX950-GISEL-LABEL: test_126:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: v_xor_b32_e32 v1, v0, v1
; GFX950-GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
; GFX950-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
; GFX950-GISEL-NEXT: ; return to shader part epilog
%xor1 = xor i32 %a, %b
%xor2 = xor i32 %a, %c
%or = or i32 %xor1, %xor2
Expand All @@ -167,9 +214,9 @@ define amdgpu_ps float @test_12_src_overflow(i32 %a, i32 %b, i32 %c) {
;
; GFX950-GISEL-LABEL: test_12_src_overflow:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: v_bitop3_b32 v3, v0, v2, v0 bitop3:0xc
; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v2, v0 bitop3:3
; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v3, v1, v0 bitop3:0xc8
; GFX950-GISEL-NEXT: v_not_b32_e32 v0, v0
; GFX950-GISEL-NEXT: v_bfi_b32 v0, v2, v0, v0
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1
; GFX950-GISEL-NEXT: ; return to shader part epilog
%nota = xor i32 %a, -1
%notc = xor i32 %c, -1
Expand All @@ -185,13 +232,27 @@ define amdgpu_ps float @test_12_src_overflow(i32 %a, i32 %b, i32 %c) {
; This could be a single LOP3 operation with tbl = 100, but Src vector exhausted during search.

define amdgpu_ps float @test_100_src_overflow(i32 %a, i32 %b, i32 %c) {
; GCN-LABEL: test_100_src_overflow:
; GCN: ; %bb.0:
; GCN-NEXT: v_bitop3_b32 v3, v1, v2, v0 bitop3:0x10
; GCN-NEXT: v_bitop3_b32 v4, v0, v2, v1 bitop3:0x40
; GCN-NEXT: v_bitop3_b32 v0, v1, v2, v0 bitop3:0x20
; GCN-NEXT: v_or3_b32 v0, v3, v4, v0
; GCN-NEXT: ; return to shader part epilog
; GFX950-SDAG-LABEL: test_100_src_overflow:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: v_bitop3_b32 v3, v1, v2, v0 bitop3:0x10
; GFX950-SDAG-NEXT: v_bitop3_b32 v4, v0, v2, v1 bitop3:0x40
; GFX950-SDAG-NEXT: v_bitop3_b32 v0, v1, v2, v0 bitop3:0x20
; GFX950-SDAG-NEXT: v_or3_b32 v0, v3, v4, v0
; GFX950-SDAG-NEXT: ; return to shader part epilog
;
; GFX950-GISEL-LABEL: test_100_src_overflow:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: v_or_b32_e32 v3, v2, v0
; GFX950-GISEL-NEXT: v_not_b32_e32 v3, v3
; GFX950-GISEL-NEXT: v_not_b32_e32 v4, v1
; GFX950-GISEL-NEXT: v_and_b32_e32 v3, v1, v3
; GFX950-GISEL-NEXT: v_and_b32_e32 v4, v0, v4
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v1, v0
; GFX950-GISEL-NEXT: v_not_b32_e32 v1, v2
; GFX950-GISEL-NEXT: v_and_b32_e32 v4, v4, v2
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1
; GFX950-GISEL-NEXT: v_or3_b32 v0, v3, v4, v0
; GFX950-GISEL-NEXT: ; return to shader part epilog
%or1 = or i32 %c, %a
%not1 = xor i32 %or1, -1
%and1 = and i32 %b, %not1
Expand Down Expand Up @@ -260,12 +321,19 @@ define amdgpu_ps float @uniform_3_op(i32 inreg %a, i32 inreg %b, i32 inreg %c) {
}

define amdgpu_ps float @uniform_4_op(i32 inreg %a, i32 inreg %b, i32 inreg %c) {
; GCN-LABEL: uniform_4_op:
; GCN: ; %bb.0:
; GCN-NEXT: v_mov_b32_e32 v0, s1
; GCN-NEXT: v_mov_b32_e32 v1, s2
; GCN-NEXT: v_bitop3_b32 v0, s0, v0, v1 bitop3:2
; GCN-NEXT: ; return to shader part epilog
; GFX950-SDAG-LABEL: uniform_4_op:
; GFX950-SDAG: ; %bb.0:
; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, s1
; GFX950-SDAG-NEXT: v_mov_b32_e32 v1, s2
; GFX950-SDAG-NEXT: v_bitop3_b32 v0, s0, v0, v1 bitop3:2
; GFX950-SDAG-NEXT: ; return to shader part epilog
;
; GFX950-GISEL-LABEL: uniform_4_op:
; GFX950-GISEL: ; %bb.0:
; GFX950-GISEL-NEXT: s_andn2_b32 s0, s2, s0
; GFX950-GISEL-NEXT: s_andn2_b32 s0, s0, s1
; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX950-GISEL-NEXT: ; return to shader part epilog
%nota = xor i32 %a, -1
%notb = xor i32 %b, -1
%and1 = and i32 %nota, %c
Expand Down