Skip to content

Commit e0f5253

Browse files
authored
AMDGPU: Change bitop3 intrinsic operand to i32 (#118647)
1 parent e84c918 commit e0f5253

File tree

6 files changed

+28
-28
lines changed

6 files changed

+28
-28
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -616,8 +616,8 @@ TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_bf6_f32, "V6UiV32fUif", "nc
616616
TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_fp6_bf16, "V6UiV32yUif", "nc", "f16bf16-to-fp6bf6-cvt-scale-insts")
617617
TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_fp6_f16, "V6UiV32hUif", "nc", "f16bf16-to-fp6bf6-cvt-scale-insts")
618618
TARGET_BUILTIN(__builtin_amdgcn_cvt_scalef32_sr_pk32_fp6_f32, "V6UiV32fUif", "nc", "f16bf16-to-fp6bf6-cvt-scale-insts")
619-
TARGET_BUILTIN(__builtin_amdgcn_bitop3_b32, "iiiiIUc", "nc", "bitop3-insts")
620-
TARGET_BUILTIN(__builtin_amdgcn_bitop3_b16, "ssssIUc", "nc", "bitop3-insts")
619+
TARGET_BUILTIN(__builtin_amdgcn_bitop3_b32, "iiiiIUi", "nc", "bitop3-insts")
620+
TARGET_BUILTIN(__builtin_amdgcn_bitop3_b16, "ssssIUi", "nc", "bitop3-insts")
621621

622622
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_bf16_f32, "V2yV2yfUiIb", "nc", "f32-to-f16bf16-cvt-sr-insts")
623623
TARGET_BUILTIN(__builtin_amdgcn_cvt_sr_f16_f32, "V2hV2hfUiIb", "nc", "f32-to-f16bf16-cvt-sr-insts")

clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1673,7 +1673,7 @@ void test_cvt_scalef32_sr_fp8_f32(global unsigned *out, float src, uint seed, fl
16731673
// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(5) [[A_ADDR]], align 4
16741674
// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(5) [[B_ADDR]], align 4
16751675
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[C_ADDR]], align 4
1676-
// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.bitop3.i32(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i8 1)
1676+
// CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.bitop3.i32(i32 [[TMP0]], i32 [[TMP1]], i32 [[TMP2]], i32 1)
16771677
// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
16781678
// CHECK-NEXT: store i32 [[TMP3]], ptr addrspace(1) [[TMP4]], align 4
16791679
// CHECK-NEXT: ret void
@@ -1696,7 +1696,7 @@ void test_bitop3_b32(global uint* out, uint a, uint b, uint c)
16961696
// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr addrspace(5) [[A_ADDR]], align 2
16971697
// CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr addrspace(5) [[B_ADDR]], align 2
16981698
// CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(5) [[C_ADDR]], align 2
1699-
// CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.amdgcn.bitop3.i16(i16 [[TMP0]], i16 [[TMP1]], i16 [[TMP2]], i8 1)
1699+
// CHECK-NEXT: [[TMP3:%.*]] = call i16 @llvm.amdgcn.bitop3.i16(i16 [[TMP0]], i16 [[TMP1]], i16 [[TMP2]], i32 1)
17001700
// CHECK-NEXT: [[TMP4:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[OUT_ADDR]], align 8
17011701
// CHECK-NEXT: store i16 [[TMP3]], ptr addrspace(1) [[TMP4]], align 2
17021702
// CHECK-NEXT: ret void

llvm/include/llvm/IR/IntrinsicsAMDGPU.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -774,7 +774,7 @@ def int_amdgcn_prng_b32 : DefaultAttrsIntrinsic<
774774

775775
def int_amdgcn_bitop3 :
776776
DefaultAttrsIntrinsic<[llvm_anyint_ty],
777-
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i8_ty],
777+
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
778778
[IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<3>>]>;
779779

780780
} // TargetPrefix = "amdgcn"

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1271,7 +1271,7 @@ def ByteSel : NamedIntOperand<"byte_sel"> {
12711271
let Validator = "isUInt<2>";
12721272
}
12731273

1274-
def BitOp3 : CustomOperand<i8, 1, "BitOp3">;
1274+
def BitOp3 : CustomOperand<i32, 1, "BitOp3">;
12751275
def bitop3_0 : DefaultOperand<BitOp3, 0>;
12761276

12771277
class KImmFPOperand<ValueType vt> : ImmOperand<vt> {

llvm/lib/Target/AMDGPU/VOP3Instructions.td

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1291,28 +1291,28 @@ let SubtargetPredicate = isGFX12Plus in {
12911291
let SubtargetPredicate = HasBitOp3Insts in {
12921292
let isReMaterializable = 1 in {
12931293
defm V_BITOP3_B16 : VOP3Inst <"v_bitop3_b16",
1294-
VOP3_BITOP3_Profile<VOPProfile_True16<VOPProfile <[i16, i16, i16, i16, i8]>>,
1294+
VOP3_BITOP3_Profile<VOPProfile_True16<VOPProfile <[i16, i16, i16, i16, i32]>>,
12951295
VOP3_OPSEL>>;
12961296
defm V_BITOP3_B32 : VOP3Inst <"v_bitop3_b32",
1297-
VOP3_BITOP3_Profile<VOPProfile <[i32, i32, i32, i32, i8]>, VOP3_REGULAR>>;
1297+
VOP3_BITOP3_Profile<VOPProfile <[i32, i32, i32, i32, i32]>, VOP3_REGULAR>>;
12981298
}
12991299
def : GCNPat<
1300-
(i32 (int_amdgcn_bitop3 i32:$src0, i32:$src1, i32:$src2, i8:$bitop3)),
1300+
(i32 (int_amdgcn_bitop3 i32:$src0, i32:$src1, i32:$src2, i32:$bitop3)),
13011301
(i32 (V_BITOP3_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2, timm:$bitop3))
13021302
>;
13031303

13041304
def : GCNPat<
1305-
(i16 (int_amdgcn_bitop3 i16:$src0, i16:$src1, i16:$src2, i8:$bitop3)),
1305+
(i16 (int_amdgcn_bitop3 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)),
13061306
(i16 (V_BITOP3_B16_e64 0, VSrc_b16:$src0, 0, VSrc_b16:$src1, 0, VSrc_b16:$src2, timm:$bitop3, 0))
13071307
>;
13081308

13091309
def : GCNPat<
1310-
(i32 (BITOP3_32 i32:$src0, i32:$src1, i32:$src2, i8:$bitop3)),
1310+
(i32 (BITOP3_32 i32:$src0, i32:$src1, i32:$src2, i32:$bitop3)),
13111311
(i32 (V_BITOP3_B32_e64 VSrc_b32:$src0, VSrc_b32:$src1, VSrc_b32:$src2, timm:$bitop3))
13121312
>;
13131313

13141314
def : GCNPat<
1315-
(i16 (BITOP3_16 i16:$src0, i16:$src1, i16:$src2, i8:$bitop3)),
1315+
(i16 (BITOP3_16 i16:$src0, i16:$src1, i16:$src2, i32:$bitop3)),
13161316
(i16 (V_BITOP3_B16_e64 0, VSrc_b16:$src0, 0, VSrc_b16:$src1, 0, VSrc_b16:$src2, timm:$bitop3, 0))
13171317
>;
13181318
} // End SubtargetPredicate = HasBitOp3Insts

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bitop3.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,15 @@
22
; RUN: llc -march=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950-SDAG %s
33
; RUN: llc -global-isel -march=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX950-GISEL %s
44

5-
declare i32 @llvm.amdgcn.bitop3.i32(i32, i32, i32, i8)
6-
declare i16 @llvm.amdgcn.bitop3.i16(i16, i16, i16, i8)
5+
declare i32 @llvm.amdgcn.bitop3.i32(i32, i32, i32, i32)
6+
declare i16 @llvm.amdgcn.bitop3.i16(i16, i16, i16, i32)
77

88
define amdgpu_ps float @bitop3_b32_vvv(i32 %a, i32 %b, i32 %c) {
99
; GCN-LABEL: bitop3_b32_vvv:
1010
; GCN: ; %bb.0:
1111
; GCN-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0xf
1212
; GCN-NEXT: ; return to shader part epilog
13-
%ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i8 15)
13+
%ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i32 15)
1414
%ret_cast = bitcast i32 %ret to float
1515
ret float %ret_cast
1616
}
@@ -20,7 +20,7 @@ define amdgpu_ps float @bitop3_b32_svv(i32 inreg %a, i32 %b, i32 %c) {
2020
; GCN: ; %bb.0:
2121
; GCN-NEXT: v_bitop3_b32 v0, s0, v0, v1 bitop3:0x10
2222
; GCN-NEXT: ; return to shader part epilog
23-
%ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i8 16)
23+
%ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i32 16)
2424
%ret_cast = bitcast i32 %ret to float
2525
ret float %ret_cast
2626
}
@@ -31,7 +31,7 @@ define amdgpu_ps float @bitop3_b32_ssv(i32 inreg %a, i32 inreg %b, i32 %c) {
3131
; GCN-NEXT: v_mov_b32_e32 v1, s1
3232
; GCN-NEXT: v_bitop3_b32 v0, s0, v1, v0 bitop3:0x11
3333
; GCN-NEXT: ; return to shader part epilog
34-
%ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i8 17)
34+
%ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i32 17)
3535
%ret_cast = bitcast i32 %ret to float
3636
ret float %ret_cast
3737
}
@@ -43,7 +43,7 @@ define amdgpu_ps float @bitop3_b32_sss(i32 inreg %a, i32 inreg %b, i32 inreg %c)
4343
; GCN-NEXT: v_mov_b32_e32 v1, s2
4444
; GCN-NEXT: v_bitop3_b32 v0, s0, v0, v1 bitop3:0x12
4545
; GCN-NEXT: ; return to shader part epilog
46-
%ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i8 18)
46+
%ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 %c, i32 18)
4747
%ret_cast = bitcast i32 %ret to float
4848
ret float %ret_cast
4949
}
@@ -60,7 +60,7 @@ define amdgpu_ps float @bitop3_b32_vvi(i32 %a, i32 %b) {
6060
; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e8
6161
; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x13
6262
; GFX950-GISEL-NEXT: ; return to shader part epilog
63-
%ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 1000, i8 19)
63+
%ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 %b, i32 1000, i32 19)
6464
%ret_cast = bitcast i32 %ret to float
6565
ret float %ret_cast
6666
}
@@ -79,7 +79,7 @@ define amdgpu_ps float @bitop3_b32_vii(i32 %a) {
7979
; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e8
8080
; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x14
8181
; GFX950-GISEL-NEXT: ; return to shader part epilog
82-
%ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 2000, i32 1000, i8 20)
82+
%ret = call i32 @llvm.amdgcn.bitop3.i32(i32 %a, i32 2000, i32 1000, i32 20)
8383
%ret_cast = bitcast i32 %ret to float
8484
ret float %ret_cast
8585
}
@@ -102,7 +102,7 @@ define amdgpu_ps float @bitop3_b32_iii() {
102102
; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e8
103103
; GFX950-GISEL-NEXT: v_bitop3_b32 v0, v0, v1, v2 bitop3:0x15
104104
; GFX950-GISEL-NEXT: ; return to shader part epilog
105-
%ret = call i32 @llvm.amdgcn.bitop3.i32(i32 3000, i32 2000, i32 1000, i8 21)
105+
%ret = call i32 @llvm.amdgcn.bitop3.i32(i32 3000, i32 2000, i32 1000, i32 21)
106106
%ret_cast = bitcast i32 %ret to float
107107
ret float %ret_cast
108108
}
@@ -112,7 +112,7 @@ define amdgpu_ps half @bitop3_b16_vvv(i16 %a, i16 %b, i16 %c) {
112112
; GCN: ; %bb.0:
113113
; GCN-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:0xf
114114
; GCN-NEXT: ; return to shader part epilog
115-
%ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i8 15)
115+
%ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i32 15)
116116
%ret_cast = bitcast i16 %ret to half
117117
ret half %ret_cast
118118
}
@@ -122,7 +122,7 @@ define amdgpu_ps half @bitop3_b16_svv(i16 inreg %a, i16 %b, i16 %c) {
122122
; GCN: ; %bb.0:
123123
; GCN-NEXT: v_bitop3_b16 v0, s0, v0, v1 bitop3:0x10
124124
; GCN-NEXT: ; return to shader part epilog
125-
%ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i8 16)
125+
%ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i32 16)
126126
%ret_cast = bitcast i16 %ret to half
127127
ret half %ret_cast
128128
}
@@ -133,7 +133,7 @@ define amdgpu_ps half @bitop3_b16_ssv(i16 inreg %a, i16 inreg %b, i16 %c) {
133133
; GCN-NEXT: v_mov_b32_e32 v1, s1
134134
; GCN-NEXT: v_bitop3_b16 v0, s0, v1, v0 bitop3:0x11
135135
; GCN-NEXT: ; return to shader part epilog
136-
%ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i8 17)
136+
%ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i32 17)
137137
%ret_cast = bitcast i16 %ret to half
138138
ret half %ret_cast
139139
}
@@ -145,7 +145,7 @@ define amdgpu_ps half @bitop3_b16_sss(i16 inreg %a, i16 inreg %b, i16 inreg %c)
145145
; GCN-NEXT: v_mov_b32_e32 v1, s2
146146
; GCN-NEXT: v_bitop3_b16 v0, s0, v0, v1 bitop3:0x12
147147
; GCN-NEXT: ; return to shader part epilog
148-
%ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i8 18)
148+
%ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 %c, i32 18)
149149
%ret_cast = bitcast i16 %ret to half
150150
ret half %ret_cast
151151
}
@@ -162,7 +162,7 @@ define amdgpu_ps half @bitop3_b16_vvi(i16 %a, i16 %b) {
162162
; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e8
163163
; GFX950-GISEL-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:0x13
164164
; GFX950-GISEL-NEXT: ; return to shader part epilog
165-
%ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 1000, i8 19)
165+
%ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 %b, i16 1000, i32 19)
166166
%ret_cast = bitcast i16 %ret to half
167167
ret half %ret_cast
168168
}
@@ -181,7 +181,7 @@ define amdgpu_ps half @bitop3_b16_vii(i16 %a) {
181181
; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e8
182182
; GFX950-GISEL-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:0x14
183183
; GFX950-GISEL-NEXT: ; return to shader part epilog
184-
%ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 2000, i16 1000, i8 20)
184+
%ret = call i16 @llvm.amdgcn.bitop3.i16(i16 %a, i16 2000, i16 1000, i32 20)
185185
%ret_cast = bitcast i16 %ret to half
186186
ret half %ret_cast
187187
}
@@ -203,7 +203,7 @@ define amdgpu_ps half @bitop3_b16_iii() {
203203
; GFX950-GISEL-NEXT: v_mov_b32_e32 v2, 0x3e8
204204
; GFX950-GISEL-NEXT: v_bitop3_b16 v0, v0, v1, v2 bitop3:0x15
205205
; GFX950-GISEL-NEXT: ; return to shader part epilog
206-
%ret = call i16 @llvm.amdgcn.bitop3.i16(i16 3000, i16 2000, i16 1000, i8 21)
206+
%ret = call i16 @llvm.amdgcn.bitop3.i16(i16 3000, i16 2000, i16 1000, i32 21)
207207
%ret_cast = bitcast i16 %ret to half
208208
ret half %ret_cast
209209
}

0 commit comments

Comments
 (0)