Skip to content

Commit 30a60bf

Browse files
author
iclsrc
committed
Merge from 'sycl' to 'sycl-web'
2 parents dc8257b + d7b059e commit 30a60bf

File tree

20 files changed

+791
-52
lines changed

20 files changed

+791
-52
lines changed

clang/include/clang/Basic/Attr.td

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1852,6 +1852,23 @@ def IntelFPGABankBits : Attr {
18521852
}];
18531853
}
18541854

1855+
def IntelFPGAForcePow2Depth : Attr {
1856+
let Spellings = [CXX11<"intelfpga","force_pow2_depth">];
1857+
let Args = [ExprArgument<"Value">];
1858+
let Subjects = SubjectList<[IntelFPGAConstVar, IntelFPGALocalStaticSlaveMemVar,
1859+
Field], ErrorDiag>;
1860+
let LangOpts = [SYCLIsDevice, SYCLIsHost];
1861+
let Documentation = [IntelFPGAForcePow2DepthAttrDocs];
1862+
let AdditionalMembers = [{
1863+
static unsigned getMinValue() {
1864+
return 0;
1865+
}
1866+
static unsigned getMaxValue() {
1867+
return 1;
1868+
}
1869+
}];
1870+
}
1871+
18551872
def Naked : InheritableAttr {
18561873
let Spellings = [GCC<"naked">, Declspec<"naked">];
18571874
let Subjects = SubjectList<[Function]>;

clang/include/clang/Basic/AttrDocs.td

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1935,6 +1935,26 @@ pointer address bits to bank on.
19351935
}];
19361936
}
19371937

1938+
def IntelFPGAForcePow2DepthAttrDocs : Documentation {
1939+
let Category = DocCatVariable;
1940+
let Heading = "force_pow2_depth (IntelFPGA)";
1941+
let Content = [{
1942+
This attribute may be attached to a variable or struct member declaration and
1943+
provides explicit control over the geometry of memory blocks used in a given
1944+
memory system.
1945+
1946+
In the presence of this attribute, the compiler:
1947+
1948+
1. Will automatically size the memory depth to the next largest power of 2 if
1949+
force_pow2_depth is set to 1, and will prefer width-stitching of RAM blocks
1950+
over depth-stitching.
1951+
1952+
2. Will not size the memory to the next largest power of 2 if force_pow2_depth
1953+
is set to 0, and will prefer depth-stitching over width-stitching if RAM usage
1954+
can be lowered.
1955+
}];
1956+
}
1957+
19381958
def SYCLIntelKernelArgsRestrictDocs : Documentation {
19391959
let Category = DocCatVariable;
19401960
let Heading = "kernel_args_restrict";

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4072,6 +4072,11 @@ void CodeGenModule::generateIntelFPGAAnnotation(
40724072
}
40734073
if (D->hasAttr<IntelFPGASimpleDualPortAttr>())
40744074
Out << "{simple_dual_port:1}";
4075+
if (const auto *FP2D = D->getAttr<IntelFPGAForcePow2DepthAttr>()) {
4076+
llvm::APSInt FP2DInt =
4077+
FP2D->getValue()->EvaluateKnownConstInt(getContext());
4078+
Out << '{' << FP2D->getSpelling() << ':' << FP2DInt << '}';
4079+
}
40754080
}
40764081

40774082
void CodeGenModule::addGlobalIntelFPGAAnnotation(const VarDecl *VD,

clang/lib/Sema/SemaDeclAttr.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5221,6 +5221,8 @@ static bool checkIntelFPGARegisterAttrCompatibility(Sema &S, Decl *D,
52215221
InCompat = true;
52225222
if (checkAttrMutualExclusion<IntelFPGABankBitsAttr>(S, D, Attr))
52235223
InCompat = true;
5224+
if (checkAttrMutualExclusion<IntelFPGAForcePow2DepthAttr>(S, D, Attr))
5225+
InCompat = true;
52245226

52255227
return InCompat;
52265228
}
@@ -5425,6 +5427,24 @@ static void handleIntelFPGAPrivateCopiesAttr(Sema &S, Decl *D,
54255427
D, Attr, Attr.getArgAsExpr(0));
54265428
}
54275429

5430+
static void handleIntelFPGAForcePow2DepthAttr(Sema &S, Decl *D,
5431+
const ParsedAttr &Attr) {
5432+
if (S.LangOpts.SYCLIsHost)
5433+
return;
5434+
5435+
checkForDuplicateAttribute<IntelFPGAForcePow2DepthAttr>(S, D, Attr);
5436+
5437+
if (checkAttrMutualExclusion<IntelFPGARegisterAttr>(S, D, Attr))
5438+
return;
5439+
5440+
if (!D->hasAttr<IntelFPGAMemoryAttr>())
5441+
D->addAttr(IntelFPGAMemoryAttr::CreateImplicit(
5442+
S.Context, IntelFPGAMemoryAttr::Default));
5443+
5444+
S.AddOneConstantValueAttr<IntelFPGAForcePow2DepthAttr>(D, Attr,
5445+
Attr.getArgAsExpr(0));
5446+
}
5447+
54285448
static void handleXRayLogArgsAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
54295449
ParamIdx ArgCount;
54305450

@@ -8059,6 +8079,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
80598079
case ParsedAttr::AT_IntelFPGABankBits:
80608080
handleIntelFPGABankBitsAttr(S, D, AL);
80618081
break;
8082+
case ParsedAttr::AT_IntelFPGAForcePow2Depth:
8083+
handleIntelFPGAForcePow2DepthAttr(S, D, AL);
8084+
break;
80628085
case ParsedAttr::AT_SYCLIntelPipeIO:
80638086
handleSYCLIntelPipeIOAttr(S, D, AL);
80648087
break;

clang/lib/Sema/SemaStmtAsm.cpp

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -256,16 +256,6 @@ StmtResult Sema::ActOnGCCAsmStmt(SourceLocation AsmLoc, bool IsSimple,
256256
// The parser verifies that there is a string literal here.
257257
assert(AsmString->isAscii());
258258

259-
// Skip all the checks if we are compiling SYCL device code, but the function
260-
// is not marked to be used on device, this code won't be codegen'ed anyway.
261-
if (getLangOpts().SYCLIsDevice) {
262-
SYCLDiagIfDeviceCode(AsmLoc, diag::err_sycl_restrict) << KernelUseAssembly;
263-
return new (Context)
264-
GCCAsmStmt(Context, AsmLoc, IsSimple, IsVolatile, NumOutputs, NumInputs,
265-
Names, Constraints, Exprs.data(), AsmString, NumClobbers,
266-
Clobbers, NumLabels, RParenLoc);
267-
}
268-
269259
FunctionDecl *FD = dyn_cast<FunctionDecl>(getCurLexicalContext());
270260
llvm::StringMap<bool> FeatureMap;
271261
Context.getFunctionFeatureMap(FeatureMap, FD);
@@ -902,9 +892,6 @@ StmtResult Sema::ActOnMSAsmStmt(SourceLocation AsmLoc, SourceLocation LBraceLoc,
902892
SourceLocation EndLoc) {
903893
bool IsSimple = (NumOutputs != 0 || NumInputs != 0);
904894
setFunctionHasBranchProtectedScope();
905-
if (getLangOpts().SYCLIsDevice)
906-
SYCLDiagIfDeviceCode(AsmLoc, diag::err_sycl_restrict)
907-
<< KernelUseAssembly;
908895
MSAsmStmt *NS =
909896
new (Context) MSAsmStmt(Context, AsmLoc, LBraceLoc, IsSimple,
910897
/*IsVolatile*/ true, AsmToks, NumOutputs, NumInputs,

clang/lib/Sema/SemaTemplateInstantiateDecl.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -556,6 +556,17 @@ static void instantiateIntelFPGABankBitsAttr(
556556
S.AddIntelFPGABankBitsAttr(New, *Attr, Args.data(), Args.size());
557557
}
558558

559+
static void instantiateIntelFPGAForcePow2DepthAttr(
560+
Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
561+
const IntelFPGAForcePow2DepthAttr *Attr, Decl *New) {
562+
EnterExpressionEvaluationContext Unevaluated(
563+
S, Sema::ExpressionEvaluationContext::ConstantEvaluated);
564+
ExprResult Result = S.SubstExpr(Attr->getValue(), TemplateArgs);
565+
if (!Result.isInvalid())
566+
S.AddOneConstantValueAttr<IntelFPGAForcePow2DepthAttr>(
567+
New, *Attr, Result.getAs<Expr>());
568+
}
569+
559570
static void instantiateSYCLIntelPipeIOAttr(
560571
Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
561572
const SYCLIntelPipeIOAttr *Attr, Decl *New) {
@@ -700,6 +711,11 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
700711
instantiateIntelFPGABankBitsAttr(*this, TemplateArgs, IntelFPGABankBits,
701712
New);
702713
}
714+
if (const auto *IntelFPGAForcePow2Depth =
715+
dyn_cast<IntelFPGAForcePow2DepthAttr>(TmplAttr)) {
716+
instantiateIntelFPGAForcePow2DepthAttr(*this, TemplateArgs,
717+
IntelFPGAForcePow2Depth, New);
718+
}
703719
if (const auto *SYCLIntelPipeIO = dyn_cast<SYCLIntelPipeIOAttr>(TmplAttr)) {
704720
instantiateSYCLIntelPipeIOAttr(*this, TemplateArgs, SYCLIntelPipeIO, New);
705721
continue;

clang/test/CodeGenSYCL/inline_asm.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// RUN: %clang_cc1 -fsycl -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -emit-llvm -x c++ %s -o - | FileCheck %s
2+
3+
class kernel;
4+
5+
template <typename name, typename Func>
6+
__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) {
7+
// CHECK: %[[ARRAY_A:[0-9a-z]+]] = alloca [100 x i32], align 4
8+
// CHECK: %[[IDX:.*]] = getelementptr inbounds [100 x i32], [100 x i32]* %[[ARRAY_A]], i64 0, i64 0
9+
int a[100], i = 0;
10+
// CHECK-NEXT: call void asm sideeffect
11+
// CHECK: ".decl V52 v_type=G type=d num_elts=16 align=GRF
12+
// CHECK: svm_gather.4.1 (M1, 16) $0.0 V52.0
13+
// CHECK: add(M1, 16) V52(0, 0)<1> V52(0, 0)<1; 1, 0> 0x1
14+
// CHECK: svm_scatter.4.1 (M1, 16) $0.0 V52.0",
15+
// CHECK: "rw"(i32* nonnull %[[IDX]])
16+
asm volatile(".decl V52 v_type=G type=d num_elts=16 align=GRF\n"
17+
"svm_gather.4.1 (M1, 16) %0.0 V52.0\n"
18+
"add(M1, 16) V52(0, 0)<1> V52(0, 0)<1; 1, 0> 0x1\n"
19+
"svm_scatter.4.1 (M1, 16) %0.0 V52.0"
20+
:
21+
: "rw"(&a[i]));
22+
}
23+
24+
int main() {
25+
kernel_single_task<class kernel>([]() {});
26+
return 0;
27+
}

clang/test/CodeGenSYCL/intel-fpga-local.cpp

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
// CHECK-DEVICE: [[ANN_numbanks_4:@.str]] = {{.*}}{memory:DEFAULT}{sizeinfo:4}{numbanks:4}
66
// CHECK-BOTH: @_ZZ15attrs_on_staticvE15static_annotate = internal{{.*}}constant i32 30, align 4
77
// CHECK-BOTH: [[ANN_annotate:@.str[.0-9]*]] = {{.*}}foobar
8+
// CHECK-BOTH: @_ZZ15attrs_on_staticvE16static_force_p2d = internal{{.*}}constant i32 40, align 4
9+
// CHECK-DEVICE: [[ANN_force_pow2_depth_0:@.str[.0-9]*]] = {{.*}}{memory:DEFAULT}{sizeinfo:4}{force_pow2_depth:0}
810
// CHECK-DEVICE: [[ANN_register:@.str.[0-9]*]] = {{.*}}{register:1}
911
// CHECK-DEVICE: [[ANN_memory_default:@.str.[0-9]*]] = {{.*}}{memory:DEFAULT}{sizeinfo:4}
1012
// CHECK-DEVICE: [[ANN_mlab_sizeinfo_500:@.str.[0-9]*]] = {{.*}}{memory:MLAB}{sizeinfo:4,500}
@@ -22,22 +24,25 @@
2224
// CHECK-DEVICE: [[ANN_bankbits_bankwidth:@.str.[0-9]*]] = {{.*}}{memory:DEFAULT}{sizeinfo:4,10,2}{bankwidth:16}{numbanks:2}{bank_bits:0}
2325
// CHECK-DEVICE: [[ANN_memory_blockram:@.str.[0-9]*]] = {{.*}}{memory:BLOCK_RAM}{sizeinfo:4}
2426
// CHECK-DEVICE: [[ANN_memory_mlab:@.str.[0-9]*]] = {{.*}}{memory:MLAB}{sizeinfo:4}
27+
// CHECK-DEVICE: [[ANN_force_pow2_depth_1:@.str.[0-9]*]] = {{.*}}{memory:DEFAULT}{sizeinfo:4}{force_pow2_depth:1}
2528
// CHECK-DEVICE: [[ANN_private_copies_4:@.str.[0-9]*]] = {{.*}}{memory:DEFAULT}{sizeinfo:4}{private_copies:4}
2629
// CHECK-DEVICE: [[ANN_max_replicates_4:@.str.[0-9]*]] = {{.*}}{max_replicates:4}
2730

2831
// CHECK-BOTH: @llvm.global.annotations
2932
// CHECK-DEVICE-SAME: { i8* addrspacecast (i8 addrspace(1)* bitcast (i32 addrspace(1)* @_ZZ15attrs_on_staticvE15static_numbanks to i8 addrspace(1)*) to i8*)
30-
// CHECK-DEVICE-SAME: [[ANN_numbanks_4]]{{.*}} i32 39
33+
// CHECK-DEVICE-SAME: [[ANN_numbanks_4]]{{.*}} i32 43
3134
// CHECK-DEVICE-SAME: { i8* addrspacecast (i8 addrspace(1)* bitcast (i32 addrspace(1)* @_ZZ15attrs_on_staticvE15static_annotate to i8 addrspace(1)*) to i8*)
3235
// CHECK-HOST-SAME: { i8* bitcast (i32* @_ZZ15attrs_on_staticvE15static_annotate to i8*)
33-
// CHECK-BOTH-SAME: [[ANN_annotate]]{{.*}} i32 40
34-
36+
// CHECK-BOTH-SAME: [[ANN_annotate]]{{.*}} i32 44
37+
// CHECK-DEVICE-SAME: { i8* addrspacecast (i8 addrspace(1)* bitcast (i32 addrspace(1)* @_ZZ15attrs_on_staticvE16static_force_p2d to i8 addrspace(1)*) to i8*)
38+
// CHECK-DEVICE-SAME: [[ANN_force_pow2_depth_0]]{{.*}} i32 45
3539
// CHECK-HOST-NOT: llvm.var.annotation
3640
// CHECK-HOST-NOT: llvm.ptr.annotation
3741

3842
void attrs_on_static() {
3943
const static int static_numbanks [[intelfpga::numbanks(4)]] = 20;
4044
const static int static_annotate [[clang::annotate("foobar")]] = 30;
45+
const static int static_force_p2d [[intelfpga::force_pow2_depth(0)]] = 40;
4146
}
4247

4348
void attrs_on_var() {
@@ -105,6 +110,10 @@ void attrs_on_var() {
105110
// CHECK-DEVICE: %[[VAR_BANK_BITS_WIDTH1:bank_bits_width[0-9]+]] = bitcast{{.*}}%bank_bits_width
106111
// CHECK-DEVICE: @llvm.var.annotation{{.*}}%[[VAR_BANK_BITS_WIDTH1]],{{.*}}[[ANN_bankbits_bankwidth]]
107112
[[intelfpga::bank_bits(0), intelfpga::bankwidth(16)]] int bank_bits_width[10][2];
113+
// CHECK-DEVICE: %[[VAR_FP2D:[0-9]+]] = bitcast{{.*}}%force_p2d
114+
// CHECK-DEVICE: %[[VAR_FP2D1:force_p2d[0-9]+]] = bitcast{{.*}}%force_p2d
115+
// CHECK-DEVICE: llvm.var.annotation{{.*}}%[[VAR_FP2D1]],{{.*}}[[ANN_force_pow2_depth_0]]
116+
int force_p2d [[intelfpga::force_pow2_depth(0)]];
108117
}
109118

110119
void attrs_on_struct() {
@@ -123,6 +132,7 @@ void attrs_on_struct() {
123132
int maxreplicates [[intelfpga::max_replicates(2)]];
124133
int dualport [[intelfpga::simple_dual_port]];
125134
int bankbits [[intelfpga::bank_bits(4, 5)]];
135+
int force_p2d [[intelfpga::force_pow2_depth(1)]];
126136
} s;
127137

128138
// CHECK-DEVICE: %[[FIELD_NUMBANKS:.*]] = getelementptr inbounds %struct.{{.*}}.attrs_on_struct{{.*}}
@@ -167,12 +177,15 @@ void attrs_on_struct() {
167177
// CHECK-DEVICE: %[[FIELD_BANKBITS:.*]] = getelementptr inbounds %struct.{{.*}}.attrs_on_struct{{.*}}
168178
// CHECK-DEVICE: call i32* @llvm.ptr.annotation.p0i32{{.*}}%[[FIELD_BANKBITS]]{{.*}}[[ANN_bankbits_4_5]]
169179
s.bankbits = 0;
180+
// CHECK-DEVICE: %[[FIELD_FP2D:.*]] = getelementptr inbounds %struct.{{.*}}.attrs_on_struct{{.*}}
181+
// CHECK-DEVICE: call i32* @llvm.ptr.annotation.p0i32{{.*}}%[[FIELD_FP2D]]{{.*}}[[ANN_force_pow2_depth_1]]
182+
s.force_p2d = 0;
170183
}
171184

172185
// CHECK-HOST-NOT: llvm.var.annotation
173186
// CHECK-HOST-NOT: llvm.ptr.annotation
174187

175-
template <int A, int B>
188+
template <int A, int B, int C>
176189
void attrs_with_template_param() {
177190
// CHECK-DEVICE: %[[TEMPL_NUMBANKS:numbanks[0-9]+]] = bitcast{{.*}}%numbanks
178191
// CHECK-DEVICE: @llvm.var.annotation{{.*}}%[[TEMPL_NUMBANKS]],{{.*}}[[ANN_numbanks_4]]
@@ -189,13 +202,17 @@ void attrs_with_template_param() {
189202
// CHECK-DEVICE: %[[TEMPL_BANKBITS:bankbits[0-9]+]] = bitcast{{.*}}%bankbits
190203
// CHECK-DEVICE: @llvm.var.annotation{{.*}}%[[TEMPL_BANKBITS]],{{.*}}[[ANN_bankbits_4_5]]
191204
int bankbits [[intelfpga::bank_bits(A, B)]];
205+
// CHECK-DEVICE: %[[TEMPL_FP2D:force_p2d[0-9]+]] = bitcast{{.*}}%force_p2d
206+
// CHECK-DEVICE: @llvm.var.annotation{{.*}}%[[TEMPL_FP2D]]{{.*}}[[ANN_force_pow2_depth_1]]
207+
int force_p2d [[intelfpga::force_pow2_depth(C)]];
192208

193209
struct templ_on_struct_fields {
194210
int numbanks [[intelfpga::numbanks(A)]] ;
195211
int bankwidth [[intelfpga::bankwidth(A)]];
196212
int privatecopies [[intelfpga::private_copies(A)]];
197213
int maxreplicates [[intelfpga::max_replicates(A)]];
198214
int bankbits [[intelfpga::bank_bits(A, B)]];
215+
int force_p2d [[intelfpga::force_pow2_depth(C)]];
199216
} s;
200217

201218
// CHECK-DEVICE: %[[FIELD_NUMBANKS:.*]] = getelementptr inbounds %struct.{{.*}}.templ_on_struct_fields{{.*}}
@@ -213,6 +230,9 @@ void attrs_with_template_param() {
213230
// CHECK-DEVICE: %[[FIELD_BANKBITS:.*]] = getelementptr inbounds %struct.{{.*}}.templ_on_struct_fields{{.*}}
214231
// CHECK-DEVICE: call i32* @llvm.ptr.annotation.p0i32{{.*}}%[[FIELD_BANKBITS]]{{.*}}[[ANN_bankbits_4_5]]
215232
s.bankbits = 0;
233+
// CHECK-DEVICE: %[[FIELD_FP2D:.*]] = getelementptr inbounds %struct.{{.*}}.templ_on_struct_fields{{.*}}
234+
// CHECK-DEVICE: call i32* @llvm.ptr.annotation.p0i32{{.*}}%[[FIELD_FP2D]]{{.*}}[[ANN_force_pow2_depth_1]]
235+
s.force_p2d = 0;
216236
}
217237

218238
void field_addrspace_cast() {
@@ -246,7 +266,7 @@ int main() {
246266
attrs_on_var();
247267
attrs_on_struct();
248268
field_addrspace_cast();
249-
attrs_with_template_param<4,5>();
269+
attrs_with_template_param<4, 5, 1>();
250270
});
251271
return 0;
252272
}

clang/test/SemaSYCL/inline-asm.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsyntax-only -verify %s -DLINUX_ASM
22
// RUN: %clang_cc1 -fsycl -fsycl-is-device -fsyntax-only -verify -triple x86_64-windows -fasm-blocks %s
33

4+
// expected-no-diagnostics
5+
46
void foo() {
57
int a;
68
#ifdef LINUX_ASM
@@ -13,26 +15,24 @@ void foo() {
1315
void bar() {
1416
int a;
1517
#ifdef LINUX_ASM
16-
__asm__("int3"); // expected-error {{SYCL kernel cannot use inline assembly}}
18+
__asm__("int3");
1719
#else
18-
__asm int 3 // expected-error {{SYCL kernel cannot use inline assembly}}
20+
__asm int 3
1921
#endif // LINUX_ASM
2022
}
2123

2224
template <typename Name, typename Func>
2325
__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) {
24-
// expected-note@+1 {{called by 'kernel_single_task<fake_kernel, (lambda}}
2526
kernelFunc();
2627
#ifdef LINUX_ASM
27-
__asm__("int3"); // expected-error {{SYCL kernel cannot use inline assembly}}
28+
__asm__("int3");
2829
#else
29-
__asm int 3 // expected-error {{SYCL kernel cannot use inline assembly}}
30+
__asm int 3
3031
#endif // LINUX_ASM
3132
}
3233

3334
int main() {
3435
foo();
35-
// expected-note@+1 {{called by 'operator()'}}
3636
kernel_single_task<class fake_kernel>([]() { bar(); });
3737
return 0;
3838
}

0 commit comments

Comments
 (0)