-
Notifications
You must be signed in to change notification settings - Fork 14.3k
AMDGPU: Change bitop3 intrinsic operand to i32 #118647
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
@llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) ChangesFull diff: https://github.com/llvm/llvm-project/pull/118647.diff 4 Files Affected:
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 112c26d20db14e..92418b9104ad14 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -774,7 +774,7 @@ def int_amdgcn_prng_b32 : DefaultAttrsIntrinsic<
def int_amdgcn_bitop3 :
DefaultAttrsIntrinsic<[llvm_anyint_ty],
- [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i8_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]>;
} // TargetPrefix = "amdgcn"
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index d8eb9d155315a6..fc8c12a674e466 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1271,7 +1271,7 @@ def ByteSel : NamedIntOperand<"byte_sel"> {
let Validator = "isUInt<2>";
}
-def BitOp3 : CustomOperand<i8, 1, "BitOp3">;
+def BitOp3 : CustomOperand<i32, 1, "BitOp3">;
def bitop3_0 : DefaultOperand<BitOp3, 0>;
class KImmFPOperand<ValueType vt> : ImmOperand<vt> {
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index ff9376e635af96..a00785bf29c77a 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -1291,28 +1291,28 @@ let SubtargetPredicate = isGFX12Plus in {
let SubtargetPredicate = HasBitOp3Insts in {
let isReMaterializable = 1 in {
defm V_BITOP3_B16 : VOP3Inst <"v_bitop3_b16",
- VOP3_BITOP3_Profile<VOPProfile_True16<VOPProfile <[i16, i16, i16, i16, i8]>>,
+ VOP3_BITOP3_Profile<VOPProfile_True16<VOPProfile <[i16, i16, i16, i16, i32]>>,
VOP3_OPSEL>>;
defm V_BITOP3_B32 : VOP3Inst <"v_bitop3_b32",
- VOP3_BITOP3_Profile<VOPProfile <[i32, i32, i32, i32, i8]>, VOP3_REGULAR>>;
+ VOP3_BITOP3_Profile<VOPProfile <[i32, i32, i32, i32, i32]>, VOP3_REGULAR>>;
}
def : GCNPat<
- (i32 (int_amdgcn_bitop3 i32:$src0, i32:$src1, i32:$src2, i8:$bitop3)),
+ (i32 (int_amdgcn_bitop3 i32:$src0, i32:$src1, i32:$src2, i32:$bitop3)),
(i32 (V_BITOP3_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2, timm:$bitop3))
>;
def : GCNPat<
- (i16 (int_amdgcn_bitop3 i16:$src0, i16:$src1, i16:$src2, i8:$bitop3)),
+ (i16 (int_amdgcn_bitop3 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)),
(i16 (V_BITOP3_B16_e64 0, VSrc_b16:$src0, 0, VSrc_b16:$src1, 0, VSrc_b16:$src2, timm:$bitop3, 0))
>;
def : GCNPat<
- (i32 (BITOP3_32 i32:$src0, i32:$src1, i32:$src2, i8:$bitop3)),
+ (i32 (BITOP3_32 i32:$src0, i32:$src1, i32:$src2, i32:$bitop3)),
(i32 (V_BITOP3_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2, timm:$bitop3))
>;
def : GCNPat<
- (i16 (BITOP3_16 i16:$src0, i16:$src1, i16:$src2, i8:$bitop3)),
+ (i16 (BITOP3_16 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)),
(i16 (V_BITOP3_B16_e64 0, VSrc_b16:$src0, 0, VSrc_b16:$src1, 0, VSrc_b16:$src2, timm:$bitop3, 0))
>;
} // End SubtargetPredicate = HasBitOp3Insts
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bitop3.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bitop3.ll
index ff2f4db0d7a5f9..b6232cbc384967 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bitop3.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bitop3.ll
@@ -2,15 +2,15 @@
; RUN: llc -march=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950-SDAG %s
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950-GISEL %s
-declare i32 @llvm.amdgcn.bitop3.i32(i32, i32, i32, i8)
-declare i16 @llvm.amdgcn.bitop3.i16(i16, i16, i16, i8)
+declare i32 @llvm.amdgcn.bitop3.i32(i32, i32, i32, i32)
+declare i16 @llvm.amdgcn.bitop3.i16(i16, i16, i16, i32)
define amdgpu_ps float @bitop3_b32_vvv(i32 %a, i32 %b, i32 %c) {
; GCN-LABEL: bitop3_b32_vvv:
; GCN: ; %bb.0:
; GCN-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0xf
; GCN-NEXT: ; return to shader part epilog
- %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i8 15)
+ %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i32 15)
%ret_cast = bitcast i32 %ret to float
ret float %ret_cast
}
@@ -20,7 +20,7 @@ define amdgpu_ps float @bitop3_b32_svv(i32 inreg %a, i32 %b, i32 %c) {
; GCN: ; %bb.0:
; GCN-NEXT: v_bitop3_b32 v0, s0, v0, v1 bitop3:0x10
; GCN-NEXT: ; return to shader part epilog
- %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i8 16)
+ %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i32 16)
%ret_cast = bitcast i32 %ret to float
ret float %ret_cast
}
@@ -31,7 +31,7 @@ define amdgpu_ps float @bitop3_b32_ssv(i32 inreg %a, i32 inreg %b, i32 %c) {
; GCN-NEXT: v_mov_b32_e32 v1, s1
; GCN-NEXT: v_bitop3_b32 v0, s0, v1, v0 bitop3:0x11
; GCN-NEXT: ; return to shader part epilog
- %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i8 17)
+ %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i32 17)
%ret_cast = bitcast i32 %ret to float
ret float %ret_cast
}
@@ -43,7 +43,7 @@ define amdgpu_ps float @bitop3_b32_sss(i32 inreg %a, i32 inreg %b, i32 inreg %c)
; GCN-NEXT: v_mov_b32_e32 v1, s2
; GCN-NEXT: v_bitop3_b32 v0, s0, v0, v1 bitop3:0x12
; GCN-NEXT: ; return to shader part epilog
- %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i8 18)
+ %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i32 18)
%ret_cast = bitcast i32 %ret to float
ret float %ret_cast
}
@@ -60,7 +60,7 @@ define amdgpu_ps float @bitop3_b32_vvi(i32 %a, i32 %b) {
; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e8
; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x13
; GFX950-GISEL-NEXT: ; return to shader part epilog
- %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 1000, i8 19)
+ %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 1000, i32 19)
%ret_cast = bitcast i32 %ret to float
ret float %ret_cast
}
@@ -79,7 +79,7 @@ define amdgpu_ps float @bitop3_b32_vii(i32 %a) {
; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e8
; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x14
; GFX950-GISEL-NEXT: ; return to shader part epilog
- %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 2000, i32 1000, i8 20)
+ %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 2000, i32 1000, i32 20)
%ret_cast = bitcast i32 %ret to float
ret float %ret_cast
}
@@ -102,7 +102,7 @@ define amdgpu_ps float @bitop3_b32_iii() {
; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e8
; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x15
; GFX950-GISEL-NEXT: ; return to shader part epilog
- %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 3000, i32 2000, i32 1000, i8 21)
+ %ret = call i32 @llvm.amdgcn.bitop3.i32(i32 3000, i32 2000, i32 1000, i32 21)
%ret_cast = bitcast i32 %ret to float
ret float %ret_cast
}
@@ -112,7 +112,7 @@ define amdgpu_ps half @bitop3_b16_vvv(i16 %a, i16 %b, i16 %c) {
; GCN: ; %bb.0:
; GCN-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:0xf
; GCN-NEXT: ; return to shader part epilog
- %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i8 15)
+ %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i32 15)
%ret_cast = bitcast i16 %ret to half
ret half %ret_cast
}
@@ -122,7 +122,7 @@ define amdgpu_ps half @bitop3_b16_svv(i16 inreg %a, i16 %b, i16 %c) {
; GCN: ; %bb.0:
; GCN-NEXT: v_bitop3_b16 v0, s0, v0, v1 bitop3:0x10
; GCN-NEXT: ; return to shader part epilog
- %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i8 16)
+ %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i32 16)
%ret_cast = bitcast i16 %ret to half
ret half %ret_cast
}
@@ -133,7 +133,7 @@ define amdgpu_ps half @bitop3_b16_ssv(i16 inreg %a, i16 inreg %b, i16 %c) {
; GCN-NEXT: v_mov_b32_e32 v1, s1
; GCN-NEXT: v_bitop3_b16 v0, s0, v1, v0 bitop3:0x11
; GCN-NEXT: ; return to shader part epilog
- %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i8 17)
+ %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i32 17)
%ret_cast = bitcast i16 %ret to half
ret half %ret_cast
}
@@ -145,7 +145,7 @@ define amdgpu_ps half @bitop3_b16_sss(i16 inreg %a, i16 inreg %b, i16 inreg %c)
; GCN-NEXT: v_mov_b32_e32 v1, s2
; GCN-NEXT: v_bitop3_b16 v0, s0, v0, v1 bitop3:0x12
; GCN-NEXT: ; return to shader part epilog
- %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i8 18)
+ %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i32 18)
%ret_cast = bitcast i16 %ret to half
ret half %ret_cast
}
@@ -162,7 +162,7 @@ define amdgpu_ps half @bitop3_b16_vvi(i16 %a, i16 %b) {
; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e8
; GFX950-GISEL-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:0x13
; GFX950-GISEL-NEXT: ; return to shader part epilog
- %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 1000, i8 19)
+ %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 1000, i32 19)
%ret_cast = bitcast i16 %ret to half
ret half %ret_cast
}
@@ -181,7 +181,7 @@ define amdgpu_ps half @bitop3_b16_vii(i16 %a) {
; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e8
; GFX950-GISEL-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:0x14
; GFX950-GISEL-NEXT: ; return to shader part epilog
- %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 2000, i16 1000, i8 20)
+ %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 2000, i16 1000, i32 20)
%ret_cast = bitcast i16 %ret to half
ret half %ret_cast
}
@@ -203,7 +203,7 @@ define amdgpu_ps half @bitop3_b16_iii() {
; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e8
; GFX950-GISEL-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:0x15
; GFX950-GISEL-NEXT: ; return to shader part epilog
- %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 3000, i16 2000, i16 1000, i8 21)
+ %ret = call i16 @llvm.amdgcn.bitop3.i16(i16 3000, i16 2000, i16 1000, i32 21)
%ret_cast = bitcast i16 %ret to half
ret half %ret_cast
}
|
why is so? |
This showed up in a merge commit after the original patch was committed. This is what's in the branch. I'm guessing this was to avoid work to fixup the type after 335bc3c |
3a94843
to
be4f050
Compare
No description provided.