Skip to content

AMDGPU: Change bitop3 intrinsic operand to i32 #118647

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions clang/include/clang/Basic/BuiltinsAMDGPU.def
Original file line number Diff line number Diff line change
Expand Up @@ -616,8 +616,8 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_bf6_f32, "V6UiV32fUif", "nc
TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_fp6_bf16, "V6UiV32yUif", "nc", "f16bf16-to-fp6bf6-cvt-scale-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_fp6_f16, "V6UiV32hUif", "nc", "f16bf16-to-fp6bf6-cvt-scale-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_fp6_f32, "V6UiV32fUif", "nc", "f16bf16-to-fp6bf6-cvt-scale-insts")
TARGET_BUILTIN(__builtin_amdgcn_bitop3_b32, "iiiiIUc", "nc", "bitop3-insts")
TARGET_BUILTIN(__builtin_amdgcn_bitop3_b16, "ssssIUc", "nc", "bitop3-insts")
TARGET_BUILTIN(__builtin_amdgcn_bitop3_b32, "iiiiIUi", "nc", "bitop3-insts")
TARGET_BUILTIN(__builtin_amdgcn_bitop3_b16, "ssssIUi", "nc", "bitop3-insts")

TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf16_f32, "V2yV2yfUiIb", "nc", "f32-to-f16bf16-cvt-sr-insts")
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_f16_f32, "V2hV2hfUiIb", "nc", "f32-to-f16bf16-cvt-sr-insts")
Expand Down
4 changes: 2 additions & 2 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl
Original file line number Diff line number Diff line change
Expand Up @@ -1673,7 +1673,7 @@ void test_cvt_scalef32_sr_fp8_f32(global unsigned *out, float src, uint seed, fl
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[B_ADDR]], align 4
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[C_ADDR]], align 4
// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.bitop3.i32(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i8 1)
// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.bitop3.i32(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 1)
// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
// CHECK-NEXT: ret void
Expand All @@ -1696,7 +1696,7 @@ void test_bitop3_b32(global uint* out, uint a, uint b, uint c)
// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr addrspace(5) [[A_ADDR]], align 2
// CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr addrspace(5) [[B_ADDR]], align 2
// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(5) [[C_ADDR]], align 2
// CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.amdgcn.bitop3.i16(i16 [[TMP0]], i16 [[TMP1]], i16 [[TMP2]], i8 1)
// CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.amdgcn.bitop3.i16(i16 [[TMP0]], i16 [[TMP1]], i16 [[TMP2]], i32 1)
// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
// CHECK-NEXT: store i16 [[TMP3]], ptr addrspace(1) [[TMP4]], align 2
// CHECK-NEXT: ret void
Expand Down
2 changes: 1 addition & 1 deletion llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -774,7 +774,7 @@ def int_amdgcn_prng_b32 : DefaultAttrsIntrinsic<

def int_amdgcn_bitop3 :
DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i8_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]>;

} // TargetPrefix = "amdgcn"
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1271,7 +1271,7 @@ def ByteSel : NamedIntOperand<"byte_sel"> {
let Validator = "isUInt<2>";
}

def BitOp3 : CustomOperand<i8, 1, "BitOp3">;
def BitOp3 : CustomOperand<i32, 1, "BitOp3">;
def bitop3_0 : DefaultOperand<BitOp3, 0>;

class KImmFPOperand<ValueType vt> : ImmOperand<vt> {
Expand Down
12 changes: 6 additions & 6 deletions llvm/lib/Target/AMDGPU/VOP3Instructions.td
Original file line number Diff line number Diff line change
Expand Up @@ -1291,28 +1291,28 @@ let SubtargetPredicate = isGFX12Plus in {
let SubtargetPredicate = HasBitOp3Insts in {
let isReMaterializable = 1 in {
defm V_BITOP3_B16 : VOP3Inst <"v_bitop3_b16",
VOP3_BITOP3_Profile<VOPProfile_True16<VOPProfile <[i16, i16, i16, i16, i8]>>,
VOP3_BITOP3_Profile<VOPProfile_True16<VOPProfile <[i16, i16, i16, i16, i32]>>,
VOP3_OPSEL>>;
defm V_BITOP3_B32 : VOP3Inst <"v_bitop3_b32",
VOP3_BITOP3_Profile<VOPProfile <[i32, i32, i32, i32, i8]>, VOP3_REGULAR>>;
VOP3_BITOP3_Profile<VOPProfile <[i32, i32, i32, i32, i32]>, VOP3_REGULAR>>;
}
def : GCNPat<
(i32 (int_amdgcn_bitop3 i32:$src0, i32:$src1, i32:$src2, i8:$bitop3)),
(i32 (int_amdgcn_bitop3 i32:$src0, i32:$src1, i32:$src2, i32:$bitop3)),
(i32 (V_BITOP3_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2, timm:$bitop3))
>;

def : GCNPat<
(i16 (int_amdgcn_bitop3 i16:$src0, i16:$src1, i16:$src2, i8:$bitop3)),
(i16 (int_amdgcn_bitop3 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)),
(i16 (V_BITOP3_B16_e64 0, VSrc_b16:$src0, 0, VSrc_b16:$src1, 0, VSrc_b16:$src2, timm:$bitop3, 0))
>;

def : GCNPat<
(i32 (BITOP3_32 i32:$src0, i32:$src1, i32:$src2, i8:$bitop3)),
(i32 (BITOP3_32 i32:$src0, i32:$src1, i32:$src2, i32:$bitop3)),
(i32 (V_BITOP3_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2, timm:$bitop3))
>;

def : GCNPat<
(i16 (BITOP3_16 i16:$src0, i16:$src1, i16:$src2, i8:$bitop3)),
(i16 (BITOP3_16 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)),
(i16 (V_BITOP3_B16_e64 0, VSrc_b16:$src0, 0, VSrc_b16:$src1, 0, VSrc_b16:$src2, timm:$bitop3, 0))
>;
} // End SubtargetPredicate = HasBitOp3Insts
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bitop3.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@
; RUN: llc -march=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950-SDAG %s
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950-GISEL %s

declare i32 @llvm.amdgcn.bitop3.i32(i32, i32, i32, i8)
declare i16 @llvm.amdgcn.bitop3.i16(i16, i16, i16, i8)
declare i32 @llvm.amdgcn.bitop3.i32(i32, i32, i32, i32)
declare i16 @llvm.amdgcn.bitop3.i16(i16, i16, i16, i32)

define amdgpu_ps float @bitop3_b32_vvv(i32 %a, i32 %b, i32 %c) {
; GCN-LABEL: bitop3_b32_vvv:
; GCN: ; %bb.0:
; GCN-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0xf
; GCN-NEXT: ; return to shader part epilog
%ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i8 15)
%ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i32 15)
%ret_cast = bitcast i32 %ret to float
ret float %ret_cast
}
Expand All @@ -20,7 +20,7 @@ define amdgpu_ps float @bitop3_b32_svv(i32 inreg %a, i32 %b, i32 %c) {
; GCN: ; %bb.0:
; GCN-NEXT: v_bitop3_b32 v0, s0, v0, v1 bitop3:0x10
; GCN-NEXT: ; return to shader part epilog
%ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i8 16)
%ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i32 16)
%ret_cast = bitcast i32 %ret to float
ret float %ret_cast
}
Expand All @@ -31,7 +31,7 @@ define amdgpu_ps float @bitop3_b32_ssv(i32 inreg %a, i32 inreg %b, i32 %c) {
; GCN-NEXT: v_mov_b32_e32 v1, s1
; GCN-NEXT: v_bitop3_b32 v0, s0, v1, v0 bitop3:0x11
; GCN-NEXT: ; return to shader part epilog
%ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i8 17)
%ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i32 17)
%ret_cast = bitcast i32 %ret to float
ret float %ret_cast
}
Expand All @@ -43,7 +43,7 @@ define amdgpu_ps float @bitop3_b32_sss(i32 inreg %a, i32 inreg %b, i32 inreg %c)
; GCN-NEXT: v_mov_b32_e32 v1, s2
; GCN-NEXT: v_bitop3_b32 v0, s0, v0, v1 bitop3:0x12
; GCN-NEXT: ; return to shader part epilog
%ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i8 18)
%ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i32 18)
%ret_cast = bitcast i32 %ret to float
ret float %ret_cast
}
Expand All @@ -60,7 +60,7 @@ define amdgpu_ps float @bitop3_b32_vvi(i32 %a, i32 %b) {
; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e8
; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x13
; GFX950-GISEL-NEXT: ; return to shader part epilog
%ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 1000, i8 19)
%ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 1000, i32 19)
%ret_cast = bitcast i32 %ret to float
ret float %ret_cast
}
Expand All @@ -79,7 +79,7 @@ define amdgpu_ps float @bitop3_b32_vii(i32 %a) {
; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e8
; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x14
; GFX950-GISEL-NEXT: ; return to shader part epilog
%ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 2000, i32 1000, i8 20)
%ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 2000, i32 1000, i32 20)
%ret_cast = bitcast i32 %ret to float
ret float %ret_cast
}
Expand All @@ -102,7 +102,7 @@ define amdgpu_ps float @bitop3_b32_iii() {
; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e8
; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x15
; GFX950-GISEL-NEXT: ; return to shader part epilog
%ret = call i32 @llvm.amdgcn.bitop3.i32(i32 3000, i32 2000, i32 1000, i8 21)
%ret = call i32 @llvm.amdgcn.bitop3.i32(i32 3000, i32 2000, i32 1000, i32 21)
%ret_cast = bitcast i32 %ret to float
ret float %ret_cast
}
Expand All @@ -112,7 +112,7 @@ define amdgpu_ps half @bitop3_b16_vvv(i16 %a, i16 %b, i16 %c) {
; GCN: ; %bb.0:
; GCN-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:0xf
; GCN-NEXT: ; return to shader part epilog
%ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i8 15)
%ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i32 15)
%ret_cast = bitcast i16 %ret to half
ret half %ret_cast
}
Expand All @@ -122,7 +122,7 @@ define amdgpu_ps half @bitop3_b16_svv(i16 inreg %a, i16 %b, i16 %c) {
; GCN: ; %bb.0:
; GCN-NEXT: v_bitop3_b16 v0, s0, v0, v1 bitop3:0x10
; GCN-NEXT: ; return to shader part epilog
%ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i8 16)
%ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i32 16)
%ret_cast = bitcast i16 %ret to half
ret half %ret_cast
}
Expand All @@ -133,7 +133,7 @@ define amdgpu_ps half @bitop3_b16_ssv(i16 inreg %a, i16 inreg %b, i16 %c) {
; GCN-NEXT: v_mov_b32_e32 v1, s1
; GCN-NEXT: v_bitop3_b16 v0, s0, v1, v0 bitop3:0x11
; GCN-NEXT: ; return to shader part epilog
%ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i8 17)
%ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i32 17)
%ret_cast = bitcast i16 %ret to half
ret half %ret_cast
}
Expand All @@ -145,7 +145,7 @@ define amdgpu_ps half @bitop3_b16_sss(i16 inreg %a, i16 inreg %b, i16 inreg %c)
; GCN-NEXT: v_mov_b32_e32 v1, s2
; GCN-NEXT: v_bitop3_b16 v0, s0, v0, v1 bitop3:0x12
; GCN-NEXT: ; return to shader part epilog
%ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i8 18)
%ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i32 18)
%ret_cast = bitcast i16 %ret to half
ret half %ret_cast
}
Expand All @@ -162,7 +162,7 @@ define amdgpu_ps half @bitop3_b16_vvi(i16 %a, i16 %b) {
; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e8
; GFX950-GISEL-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:0x13
; GFX950-GISEL-NEXT: ; return to shader part epilog
%ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 1000, i8 19)
%ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 1000, i32 19)
%ret_cast = bitcast i16 %ret to half
ret half %ret_cast
}
Expand All @@ -181,7 +181,7 @@ define amdgpu_ps half @bitop3_b16_vii(i16 %a) {
; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e8
; GFX950-GISEL-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:0x14
; GFX950-GISEL-NEXT: ; return to shader part epilog
%ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 2000, i16 1000, i8 20)
%ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 2000, i16 1000, i32 20)
%ret_cast = bitcast i16 %ret to half
ret half %ret_cast
}
Expand All @@ -203,7 +203,7 @@ define amdgpu_ps half @bitop3_b16_iii() {
; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e8
; GFX950-GISEL-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:0x15
; GFX950-GISEL-NEXT: ; return to shader part epilog
%ret = call i16 @llvm.amdgcn.bitop3.i16(i16 3000, i16 2000, i16 1000, i8 21)
%ret = call i16 @llvm.amdgcn.bitop3.i16(i16 3000, i16 2000, i16 1000, i32 21)
%ret_cast = bitcast i16 %ret to half
ret half %ret_cast
}
Loading