-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[AMDGPU] Allow selection of BITOP3 for some 2 opcodes and B32 cases #122267
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-amdgpu Author: Jakub Chlanda (jchlanda) ChangesThis came up in downstream static analysis - as a dead code. Admittedly, it depends on what the intention was when checking for If that's incorrect, that whole if statement can be removed, as it is after a check for: Full diff: https://github.com/llvm/llvm-project/pull/122267.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 041b9b4d66f63f..72d43ac9fbc245 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -3782,13 +3782,7 @@ bool AMDGPUInstructionSelector::selectBITOP3(MachineInstr &MI) const {
if (NumOpcodes < 2 || Src.empty())
return false;
- // For a uniform case threshold should be higher to account for moves between
- // VGPRs and SGPRs. It needs one operand in a VGPR, rest two can be in SGPRs
- // and a readtfirstlane after.
- if (NumOpcodes < 4)
- return false;
-
- bool IsB32 = MRI->getType(DstReg) == LLT::scalar(32);
+ const bool IsB32 = MRI->getType(DstReg) == LLT::scalar(32);
if (NumOpcodes == 2 && IsB32) {
// Avoid using BITOP3 for OR3, XOR3, AND_OR. This is not faster but makes
// asm more readable. This cannot be modeled with AddedComplexity because
@@ -3797,7 +3791,11 @@ bool AMDGPUInstructionSelector::selectBITOP3(MachineInstr &MI) const {
mi_match(MI, *MRI, m_GOr(m_GOr(m_Reg(), m_Reg()), m_Reg())) ||
mi_match(MI, *MRI, m_GOr(m_GAnd(m_Reg(), m_Reg()), m_Reg())))
return false;
- }
+ } else if (NumOpcodes < 4)
+ // For a uniform case threshold should be higher to account for moves
+ // between VGPRs and SGPRs. It needs one operand in a VGPR, rest two can be
+ // in SGPRs and a readtfirstlane after.
+ return false;
unsigned Opc = IsB32 ? AMDGPU::V_BITOP3_B32_e64 : AMDGPU::V_BITOP3_B16_e64;
unsigned CBL = STI.getConstantBusLimit(Opc);
diff --git a/llvm/test/CodeGen/AMDGPU/bitop3.ll b/llvm/test/CodeGen/AMDGPU/bitop3.ll
index b08ab5a2dc4223..eb149a93ee3288 100644
--- a/llvm/test/CodeGen/AMDGPU/bitop3.ll
+++ b/llvm/test/CodeGen/AMDGPU/bitop3.ll
@@ -52,8 +52,7 @@ define amdgpu_ps float @not_and_and_and(i32 %a, i32 %b, i32 %c) {
;
; GFX950-GISEL-LABEL: not_and_and_and:
; GFX950-GISEL: ; %bb.0:
-; GFX950-GISEL-NEXT: v_not_b32_e32 v0, v0
-; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v2
+; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v2, v0 bitop3:0xc
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1
; GFX950-GISEL-NEXT: ; return to shader part epilog
%nota = xor i32 %a, -1
@@ -103,8 +102,7 @@ define amdgpu_ps float @and_and_not_and(i32 %a, i32 %b, i32 %c) {
;
; GFX950-GISEL-LABEL: and_and_not_and:
; GFX950-GISEL: ; %bb.0:
-; GFX950-GISEL-NEXT: v_not_b32_e32 v2, v2
-; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v2
+; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v2, v0 bitop3:0x30
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1
; GFX950-GISEL-NEXT: ; return to shader part epilog
%notc = xor i32 %c, -1
@@ -122,8 +120,7 @@ define amdgpu_ps float @and_and_and(i32 %a, i32 %b, i32 %c) {
;
; GFX950-GISEL-LABEL: and_and_and:
; GFX950-GISEL: ; %bb.0:
-; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v2
-; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x80
; GFX950-GISEL-NEXT: ; return to shader part epilog
%and1 = and i32 %a, %c
%and2 = and i32 %and1, %b
@@ -141,8 +138,7 @@ define amdgpu_ps float @test_12(i32 %a, i32 %b) {
;
; GFX950-GISEL-LABEL: test_12:
; GFX950-GISEL: ; %bb.0:
-; GFX950-GISEL-NEXT: v_not_b32_e32 v0, v0
-; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v0 bitop3:0xc
; GFX950-GISEL-NEXT: ; return to shader part epilog
%nota = xor i32 %a, -1
%and1 = and i32 %nota, %b
@@ -214,9 +210,11 @@ define amdgpu_ps float @test_12_src_overflow(i32 %a, i32 %b, i32 %c) {
;
; GFX950-GISEL-LABEL: test_12_src_overflow:
; GFX950-GISEL: ; %bb.0:
-; GFX950-GISEL-NEXT: v_not_b32_e32 v0, v0
-; GFX950-GISEL-NEXT: v_bfi_b32 v0, v2, v0, v0
-; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v0, v1
+; GFX950-GISEL-NEXT: v_not_b32_e32 v3, v0
+; GFX950-GISEL-NEXT: v_not_b32_e32 v4, v2
+; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v2, v0 bitop3:0xc
+; GFX950-GISEL-NEXT: v_and_b32_e32 v2, v3, v4
+; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0xc8
; GFX950-GISEL-NEXT: ; return to shader part epilog
%nota = xor i32 %a, -1
%notc = xor i32 %c, -1
@@ -242,11 +240,9 @@ define amdgpu_ps float @test_100_src_overflow(i32 %a, i32 %b, i32 %c) {
;
; GFX950-GISEL-LABEL: test_100_src_overflow:
; GFX950-GISEL: ; %bb.0:
-; GFX950-GISEL-NEXT: v_or_b32_e32 v3, v2, v0
-; GFX950-GISEL-NEXT: v_not_b32_e32 v3, v3
-; GFX950-GISEL-NEXT: v_not_b32_e32 v4, v1
+; GFX950-GISEL-NEXT: v_bitop3_b32 v3, v2, v0, v2 bitop3:3
; GFX950-GISEL-NEXT: v_and_b32_e32 v3, v1, v3
-; GFX950-GISEL-NEXT: v_and_b32_e32 v4, v0, v4
+; GFX950-GISEL-NEXT: v_bitop3_b32 v4, v0, v1, v0 bitop3:0x30
; GFX950-GISEL-NEXT: v_and_b32_e32 v0, v1, v0
; GFX950-GISEL-NEXT: v_not_b32_e32 v1, v2
; GFX950-GISEL-NEXT: v_and_b32_e32 v4, v4, v2
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/16/builds/11751 Here is the relevant piece of the build log for the reference
|
The failure, unless I can't spot something obvious, does not look related to this PR. |
…lvm#122267) This came up in downstream static analysis - as a dead code. Admittedly, it depends on what the intention was when checking for [`if (NumOpcodes == 2 && IsB32)`](https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp#L3792C3-L3792C32) and I took a guess that for certain cases the selection should take place. If that's incorrect, that whole if statement can be removed, as it is after a check for: [`if (NumOpcodes < 4)`](https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp#L3788)
This came up in downstream static analysis - as a dead code.
Admittedly, it depends on what the intention was when checking for
if (NumOpcodes == 2 && IsB32)
and I took a guess that for certain cases the selection should take place.If that's incorrect, that whole if statement can be removed, as it is after a check for:
if (NumOpcodes < 4)