Skip to content

Commit 892c58c

Browse files
authored
[Clang][AMDGPU] Add builtins for instrinsic llvm.amdgcn.raw.ptr.buffer.load (#99258)
1 parent 82cca0c commit 892c58c

File tree

4 files changed

+244
-0
lines changed

4 files changed

+244
-0
lines changed

clang/include/clang/Basic/BuiltinsAMDGPU.def

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,12 @@ BUILTIN(__builtin_amdgcn_raw_buffer_store_b32, "viQbiiIi", "n")
155155
BUILTIN(__builtin_amdgcn_raw_buffer_store_b64, "vV2iQbiiIi", "n")
156156
BUILTIN(__builtin_amdgcn_raw_buffer_store_b96, "vV3iQbiiIi", "n")
157157
BUILTIN(__builtin_amdgcn_raw_buffer_store_b128, "vV4iQbiiIi", "n")
158+
BUILTIN(__builtin_amdgcn_raw_buffer_load_b8, "UcQbiiIi", "n")
159+
BUILTIN(__builtin_amdgcn_raw_buffer_load_b16, "UsQbiiIi", "n")
160+
BUILTIN(__builtin_amdgcn_raw_buffer_load_b32, "UiQbiiIi", "n")
161+
BUILTIN(__builtin_amdgcn_raw_buffer_load_b64, "V2UiQbiiIi", "n")
162+
BUILTIN(__builtin_amdgcn_raw_buffer_load_b96, "V3UiQbiiIi", "n")
163+
BUILTIN(__builtin_amdgcn_raw_buffer_load_b128, "V4UiQbiiIi", "n")
158164

159165
//===----------------------------------------------------------------------===//
160166
// Ballot builtins.

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19185,6 +19185,39 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
1918519185
case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:
1918619186
return emitBuiltinWithOneOverloadedType<5>(
1918719187
*this, E, Intrinsic::amdgcn_raw_ptr_buffer_store);
19188+
case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
19189+
case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
19190+
case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
19191+
case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
19192+
case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
19193+
case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {
19194+
llvm::Type *RetTy = nullptr;
19195+
switch (BuiltinID) {
19196+
case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
19197+
RetTy = Int8Ty;
19198+
break;
19199+
case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
19200+
RetTy = Int16Ty;
19201+
break;
19202+
case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
19203+
RetTy = Int32Ty;
19204+
break;
19205+
case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
19206+
RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/2);
19207+
break;
19208+
case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
19209+
RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/3);
19210+
break;
19211+
case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:
19212+
RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/4);
19213+
break;
19214+
}
19215+
Function *F =
19216+
CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy);
19217+
return Builder.CreateCall(
19218+
F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)),
19219+
EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))});
19220+
}
1918819221
default:
1918919222
return nullptr;
1919019223
}
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
// REQUIRES: amdgpu-registered-target
3+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde -emit-llvm -o - %s | FileCheck %s
4+
5+
typedef unsigned char u8;
6+
typedef unsigned short u16;
7+
typedef unsigned int u32;
8+
typedef unsigned int v2u32 __attribute__((ext_vector_type(2)));
9+
typedef unsigned int v3u32 __attribute__((ext_vector_type(3)));
10+
typedef unsigned int v4u32 __attribute__((ext_vector_type(4)));
11+
12+
// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b8(
13+
// CHECK-NEXT: entry:
14+
// CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
15+
// CHECK-NEXT: ret i8 [[TMP0]]
16+
//
17+
u8 test_amdgcn_raw_ptr_buffer_load_b8(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
18+
return __builtin_amdgcn_raw_buffer_load_b8(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
19+
}
20+
21+
// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b16(
22+
// CHECK-NEXT: entry:
23+
// CHECK-NEXT: [[TMP0:%.*]] = tail call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
24+
// CHECK-NEXT: ret i16 [[TMP0]]
25+
//
26+
u16 test_amdgcn_raw_ptr_buffer_load_b16(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
27+
return __builtin_amdgcn_raw_buffer_load_b16(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
28+
}
29+
30+
// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b32(
31+
// CHECK-NEXT: entry:
32+
// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
33+
// CHECK-NEXT: ret i32 [[TMP0]]
34+
//
35+
u32 test_amdgcn_raw_ptr_buffer_load_b32(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
36+
return __builtin_amdgcn_raw_buffer_load_b32(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
37+
}
38+
39+
// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b64(
40+
// CHECK-NEXT: entry:
41+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
42+
// CHECK-NEXT: ret <2 x i32> [[TMP0]]
43+
//
44+
v2u32 test_amdgcn_raw_ptr_buffer_load_b64(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
45+
return __builtin_amdgcn_raw_buffer_load_b64(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
46+
}
47+
48+
// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b96(
49+
// CHECK-NEXT: entry:
50+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
51+
// CHECK-NEXT: ret <3 x i32> [[TMP0]]
52+
//
53+
v3u32 test_amdgcn_raw_ptr_buffer_load_b96(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
54+
return __builtin_amdgcn_raw_buffer_load_b96(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
55+
}
56+
57+
// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b128(
58+
// CHECK-NEXT: entry:
59+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
60+
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
61+
//
62+
v4u32 test_amdgcn_raw_ptr_buffer_load_b128(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
63+
return __builtin_amdgcn_raw_buffer_load_b128(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
64+
}
65+
66+
// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b8_non_const_offset(
67+
// CHECK-NEXT: entry:
68+
// CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
69+
// CHECK-NEXT: ret i8 [[TMP0]]
70+
//
71+
u8 test_amdgcn_raw_ptr_buffer_load_b8_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
72+
return __builtin_amdgcn_raw_buffer_load_b8(rsrc, offset, /*soffset=*/0, /*aux=*/0);
73+
}
74+
75+
// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b16_non_const_offset(
76+
// CHECK-NEXT: entry:
77+
// CHECK-NEXT: [[TMP0:%.*]] = tail call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
78+
// CHECK-NEXT: ret i16 [[TMP0]]
79+
//
80+
u16 test_amdgcn_raw_ptr_buffer_load_b16_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
81+
return __builtin_amdgcn_raw_buffer_load_b16(rsrc, offset, /*soffset=*/0, /*aux=*/0);
82+
}
83+
84+
// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b32_non_const_offset(
85+
// CHECK-NEXT: entry:
86+
// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
87+
// CHECK-NEXT: ret i32 [[TMP0]]
88+
//
89+
u32 test_amdgcn_raw_ptr_buffer_load_b32_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
90+
return __builtin_amdgcn_raw_buffer_load_b32(rsrc, offset, /*soffset=*/0, /*aux=*/0);
91+
}
92+
93+
// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b64_non_const_offset(
94+
// CHECK-NEXT: entry:
95+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
96+
// CHECK-NEXT: ret <2 x i32> [[TMP0]]
97+
//
98+
v2u32 test_amdgcn_raw_ptr_buffer_load_b64_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
99+
return __builtin_amdgcn_raw_buffer_load_b64(rsrc, offset, /*soffset=*/0, /*aux=*/0);
100+
}
101+
102+
// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b96_non_const_offset(
103+
// CHECK-NEXT: entry:
104+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
105+
// CHECK-NEXT: ret <3 x i32> [[TMP0]]
106+
//
107+
v3u32 test_amdgcn_raw_ptr_buffer_load_b96_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
108+
return __builtin_amdgcn_raw_buffer_load_b96(rsrc, offset, /*soffset=*/0, /*aux=*/0);
109+
}
110+
111+
// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b128_non_const_offset(
112+
// CHECK-NEXT: entry:
113+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
114+
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
115+
//
116+
v4u32 test_amdgcn_raw_ptr_buffer_load_b128_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
117+
return __builtin_amdgcn_raw_buffer_load_b128(rsrc, offset, /*soffset=*/0, /*aux=*/0);
118+
}
119+
120+
// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b8_non_const_soffset(
121+
// CHECK-NEXT: entry:
122+
// CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
123+
// CHECK-NEXT: ret i8 [[TMP0]]
124+
//
125+
u8 test_amdgcn_raw_ptr_buffer_load_b8_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
126+
return __builtin_amdgcn_raw_buffer_load_b8(rsrc, /*offset=*/0, soffset, /*aux=*/0);
127+
}
128+
129+
// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b16_non_const_soffset(
130+
// CHECK-NEXT: entry:
131+
// CHECK-NEXT: [[TMP0:%.*]] = tail call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
132+
// CHECK-NEXT: ret i16 [[TMP0]]
133+
//
134+
u16 test_amdgcn_raw_ptr_buffer_load_b16_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
135+
return __builtin_amdgcn_raw_buffer_load_b16(rsrc, /*offset=*/0, soffset, /*aux=*/0);
136+
}
137+
138+
// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b32_non_const_soffset(
139+
// CHECK-NEXT: entry:
140+
// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
141+
// CHECK-NEXT: ret i32 [[TMP0]]
142+
//
143+
u32 test_amdgcn_raw_ptr_buffer_load_b32_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
144+
return __builtin_amdgcn_raw_buffer_load_b32(rsrc, /*offset=*/0, soffset, /*aux=*/0);
145+
}
146+
147+
// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b64_non_const_soffset(
148+
// CHECK-NEXT: entry:
149+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
150+
// CHECK-NEXT: ret <2 x i32> [[TMP0]]
151+
//
152+
v2u32 test_amdgcn_raw_ptr_buffer_load_b64_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
153+
return __builtin_amdgcn_raw_buffer_load_b64(rsrc, /*offset=*/0, soffset, /*aux=*/0);
154+
}
155+
156+
// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b96_non_const_soffset(
157+
// CHECK-NEXT: entry:
158+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
159+
// CHECK-NEXT: ret <3 x i32> [[TMP0]]
160+
//
161+
v3u32 test_amdgcn_raw_ptr_buffer_load_b96_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
162+
return __builtin_amdgcn_raw_buffer_load_b96(rsrc, /*offset=*/0, soffset, /*aux=*/0);
163+
}
164+
165+
// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b128_non_const_soffset(
166+
// CHECK-NEXT: entry:
167+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
168+
// CHECK-NEXT: ret <4 x i32> [[TMP0]]
169+
//
170+
v4u32 test_amdgcn_raw_ptr_buffer_load_b128_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
171+
return __builtin_amdgcn_raw_buffer_load_b128(rsrc, /*offset=*/0, soffset, /*aux=*/0);
172+
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde -S -verify -o - %s
2+
// REQUIRES: amdgpu-registered-target
3+
4+
typedef unsigned char u8;
5+
typedef unsigned short u16;
6+
typedef unsigned int u32;
7+
typedef unsigned int v2u32 __attribute__((ext_vector_type(2)));
8+
typedef unsigned int v3u32 __attribute__((ext_vector_type(3)));
9+
typedef unsigned int v4u32 __attribute__((ext_vector_type(4)));
10+
11+
u8 test_amdgcn_raw_ptr_buffer_load_b8(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) {
12+
return __builtin_amdgcn_raw_buffer_load_b8(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b8' must be a constant integer}}
13+
}
14+
15+
u16 test_amdgcn_raw_ptr_buffer_load_b16(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) {
16+
return __builtin_amdgcn_raw_buffer_load_b16(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b16' must be a constant integer}}
17+
}
18+
19+
u32 test_amdgcn_raw_ptr_buffer_load_b32(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) {
20+
return __builtin_amdgcn_raw_buffer_load_b32(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b32' must be a constant integer}}
21+
}
22+
23+
v2u32 test_amdgcn_raw_ptr_buffer_load_b64(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) {
24+
return __builtin_amdgcn_raw_buffer_load_b64(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b64' must be a constant integer}}
25+
}
26+
27+
v3u32 test_amdgcn_raw_ptr_buffer_load_b96(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) {
28+
return __builtin_amdgcn_raw_buffer_load_b96(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b96' must be a constant integer}}
29+
}
30+
31+
v4u32 test_amdgcn_raw_ptr_buffer_load_b128(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) {
32+
return __builtin_amdgcn_raw_buffer_load_b128(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b128' must be a constant integer}}
33+
}

0 commit comments

Comments
 (0)