Skip to content

Commit 68ab67a

Browse files
authored
[SYCL] Add support for new FPGA loop attribute nofusion (#2715)
This patch adds support a new loop attribute for FPGA, intel::nofusion. This attribute should be passed to the FPGA backend, and ignored by the emulator. The attribute indicates that the annotated loop should not be fused with any adjacent loop. Note: this does not include a corresponding [[intel::fusion]] attribute, because a different mechanism (loop_fuse) will be built for FPGA. Syntax: [[intel::nofusion]] The LLVM IR representation should be similar to the representation used for #pragma nofusion. The llvm.loop metadata should specify llvm.loop.fusion.disable. Signed-off-by: Soumi Manna <[email protected]>
1 parent 764dea9 commit 68ab67a

File tree

9 files changed

+187
-4
lines changed

9 files changed

+187
-4
lines changed

clang/include/clang/Basic/Attr.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1849,6 +1849,18 @@ def SYCLIntelFPGASpeculatedIterations : Attr {
18491849
let Documentation = [SYCLIntelFPGASpeculatedIterationsAttrDocs];
18501850
}
18511851

1852+
def SYCLIntelFPGANofusion : Attr {
1853+
let Spellings = [CXX11<"intel","nofusion">];
1854+
let LangOpts = [SYCLIsDevice, SYCLIsHost];
1855+
let HasCustomTypeTransform = 1;
1856+
let AdditionalMembers = [{
1857+
static const char *getName() {
1858+
return "nofusion";
1859+
}
1860+
}];
1861+
let Documentation = [SYCLIntelFPGANofusionAttrDocs];
1862+
}
1863+
18521864
def IntelFPGALocalNonConstVar : SubsetSubject<Var,
18531865
[{S->hasLocalStorage() &&
18541866
S->getKind() != Decl::ImplicitParam &&

clang/include/clang/Basic/AttrDocs.td

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2418,6 +2418,15 @@ used on the same loop in conjunction with disable_loop_pipelining.
24182418
}];
24192419
}
24202420

2421+
def SYCLIntelFPGANofusionAttrDocs : Documentation {
2422+
let Category = DocCatVariable;
2423+
let Heading = "intel::nofusion";
2424+
let Content = [{
2425+
This attribute applies to a loop. Indicates that the annotated
2426+
loop should not be fused with any adjacent loop.
2427+
}];
2428+
}
2429+
24212430
def SYCLDeviceIndirectlyCallableDocs : Documentation {
24222431
let Category = DocCatFunction;
24232432
let Heading = "intel::device_indirectly_callable";

clang/lib/CodeGen/CGLoopInfo.cpp

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -578,6 +578,12 @@ MDNode *LoopInfo::createMetadata(
578578
LoopProperties.push_back(MDNode::get(Ctx, Vals));
579579
}
580580

581+
// nofusion attribute corresponds to 'llvm.loop.fusion.disable' metadata
582+
if (Attrs.SYCLNofusionEnable) {
583+
Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.fusion.disable")};
584+
LoopProperties.push_back(MDNode::get(Ctx, Vals));
585+
}
586+
581587
if (Attrs.SYCLSpeculatedIterationsEnable) {
582588
Metadata *Vals[] = {
583589
MDString::get(Ctx, "llvm.loop.intel.speculated.iterations.count"),
@@ -604,7 +610,8 @@ LoopAttributes::LoopAttributes(bool IsParallel)
604610
SYCLSpeculatedIterationsEnable(false),
605611
SYCLSpeculatedIterationsNIterations(0), UnrollCount(0),
606612
UnrollAndJamCount(0), DistributeEnable(LoopAttributes::Unspecified),
607-
PipelineDisabled(false), PipelineInitiationInterval(0) {}
613+
PipelineDisabled(false), PipelineInitiationInterval(0),
614+
SYCLNofusionEnable(false) {}
608615

609616
void LoopAttributes::clear() {
610617
IsParallel = false;
@@ -631,6 +638,7 @@ void LoopAttributes::clear() {
631638
DistributeEnable = LoopAttributes::Unspecified;
632639
PipelineDisabled = false;
633640
PipelineInitiationInterval = 0;
641+
SYCLNofusionEnable = false;
634642
}
635643

636644
LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs,
@@ -663,7 +671,7 @@ LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs,
663671
Attrs.UnrollEnable == LoopAttributes::Unspecified &&
664672
Attrs.UnrollAndJamEnable == LoopAttributes::Unspecified &&
665673
Attrs.DistributeEnable == LoopAttributes::Unspecified && !StartLoc &&
666-
!EndLoc)
674+
Attrs.SYCLNofusionEnable == false && !EndLoc)
667675
return;
668676

669677
TempLoopID = MDNode::getTemporary(Header->getContext(), None);
@@ -970,6 +978,8 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
970978
// For attribute speculated_iterations:
971979
// n - 'llvm.loop.intel.speculated.iterations.count, i32 n' metadata will be
972980
// emitted
981+
// For attribute nofusion:
982+
// 'llvm.loop.fusion.disable' metadata will be emitted
973983
for (const auto *Attr : Attrs) {
974984
const SYCLIntelFPGAIVDepAttr *IntelFPGAIVDep =
975985
dyn_cast<SYCLIntelFPGAIVDepAttr>(Attr);
@@ -986,10 +996,13 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
986996
dyn_cast<SYCLIntelFPGAMaxInterleavingAttr>(Attr);
987997
const SYCLIntelFPGASpeculatedIterationsAttr *IntelFPGASpeculatedIterations =
988998
dyn_cast<SYCLIntelFPGASpeculatedIterationsAttr>(Attr);
999+
const SYCLIntelFPGANofusionAttr *IntelFPGANofusion =
1000+
dyn_cast<SYCLIntelFPGANofusionAttr>(Attr);
9891001

9901002
if (!IntelFPGAIVDep && !IntelFPGAII && !IntelFPGAMaxConcurrency &&
9911003
!IntelFPGALoopCoalesce && !IntelFPGADisableLoopPipelining &&
992-
!IntelFPGAMaxInterleaving && !IntelFPGASpeculatedIterations)
1004+
!IntelFPGAMaxInterleaving && !IntelFPGASpeculatedIterations &&
1005+
!IntelFPGANofusion)
9931006
continue;
9941007

9951008
if (IntelFPGAIVDep)
@@ -1034,6 +1047,9 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx,
10341047
->getIntegerConstantExpr(Ctx)
10351048
->getSExtValue());
10361049
}
1050+
1051+
if (IntelFPGANofusion)
1052+
setSYCLNofusionEnable();
10371053
}
10381054

10391055
if (CGOpts.OptimizationLevel > 0)

clang/lib/CodeGen/CGLoopInfo.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,9 @@ struct LoopAttributes {
149149

150150
/// Value for llvm.loop.pipeline.iicount metadata.
151151
unsigned PipelineInitiationInterval;
152+
153+
/// Flag for llvm.loop.fusion.disable metatdata.
154+
bool SYCLNofusionEnable;
152155
};
153156

154157
/// Information used when generating a structured loop.
@@ -405,6 +408,9 @@ class LoopInfoStack {
405408
StagedAttrs.PipelineInitiationInterval = C;
406409
}
407410

411+
/// Set flag of nofusion for the next loop pushed.
412+
void setSYCLNofusionEnable() { StagedAttrs.SYCLNofusionEnable = true; }
413+
408414
private:
409415
/// Returns true if there is LoopInfo on the stack.
410416
bool hasInfo() const { return !Active.empty(); }

clang/lib/Parse/ParseStmt.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2582,7 +2582,8 @@ bool Parser::ParseSYCLLoopAttributes(ParsedAttributes &Attrs) {
25822582
Attrs.begin()->getKind() != ParsedAttr::AT_SYCLIntelFPGAMaxInterleaving &&
25832583
Attrs.begin()->getKind() !=
25842584
ParsedAttr::AT_SYCLIntelFPGASpeculatedIterations &&
2585-
Attrs.begin()->getKind() != ParsedAttr::AT_LoopUnrollHint)
2585+
Attrs.begin()->getKind() != ParsedAttr::AT_LoopUnrollHint &&
2586+
Attrs.begin()->getKind() != ParsedAttr::AT_SYCLIntelFPGANofusion)
25862587
return true;
25872588

25882589
bool IsIntelFPGAAttribute = (Attrs.begin()->getKind() != ParsedAttr::AT_LoopUnrollHint);

clang/lib/Sema/SemaStmtAttr.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,19 @@ static Attr *handleIntelFPGAIVDepAttr(Sema &S, const ParsedAttr &A) {
317317
NumArgs == 2 ? A.getArgAsExpr(1) : nullptr);
318318
}
319319

320+
static Attr *handleIntelFPGANofusionAttr(Sema &S, const ParsedAttr &A) {
321+
if (S.LangOpts.SYCLIsHost)
322+
return nullptr;
323+
324+
unsigned NumArgs = A.getNumArgs();
325+
if (NumArgs > 0) {
326+
S.Diag(A.getLoc(), diag::warn_attribute_too_many_arguments) << A << 0;
327+
return nullptr;
328+
}
329+
330+
return new (S.Context) SYCLIntelFPGANofusionAttr(S.Context, A);
331+
}
332+
320333
static Attr *handleLoopHintAttr(Sema &S, Stmt *St, const ParsedAttr &A,
321334
SourceRange) {
322335
IdentifierLoc *PragmaNameLoc = A.getArgAsIdent(0);
@@ -675,6 +688,8 @@ static void CheckForIncompatibleSYCLLoopAttributes(
675688
S, Attrs, Range);
676689

677690
CheckRedundantSYCLIntelFPGAIVDepAttrs(S, Attrs);
691+
CheckForDuplicationSYCLLoopAttribute<SYCLIntelFPGANofusionAttr>(S, Attrs,
692+
Range);
678693
}
679694

680695
void CheckForIncompatibleUnrollHintAttributes(
@@ -803,6 +818,8 @@ static Attr *ProcessStmtAttribute(Sema &S, Stmt *St, const ParsedAttr &A,
803818
return handleLikely(S, St, A, Range);
804819
case ParsedAttr::AT_Unlikely:
805820
return handleUnlikely(S, St, A, Range);
821+
case ParsedAttr::AT_SYCLIntelFPGANofusion:
822+
return handleIntelFPGANofusionAttr(S, A);
806823
default:
807824
// if we're here, then we parsed a known attribute, but didn't recognize
808825
// it as a statement attribute => it is declaration attribute
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
// RUN: %clang_cc1 -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -fsycl -fsycl-is-device -internal-isystem %S/Inputs -emit-llvm %s -o - | FileCheck %s
2+
3+
#include "sycl.hpp"
4+
5+
using namespace cl::sycl;
6+
queue q;
7+
8+
void nofusion() {
9+
int a[10];
10+
11+
int i = 0;
12+
[[intel::nofusion]] while (i < 10) {
13+
// CHECK: br label {{.*}}, !llvm.loop ![[MD_NF_1:.*]]
14+
a[i] += 2;
15+
}
16+
17+
[[intel::nofusion]] do {
18+
// CHECK: br i1 %{{.*}}, !llvm.loop ![[MD_NF_2:.*]]
19+
a[i] += 3;
20+
}
21+
while (i < 10)
22+
;
23+
24+
[[intel::nofusion]] for (int i = 0; i < 10; ++i) {
25+
// CHECK: br label %{{.*}}, !llvm.loop ![[MD_NF_3:.*]]
26+
for (int j = 0; j < 10; ++j) {
27+
// CHECK-NOT: br label %{{.*}}, !llvm.loop !{{.*}}
28+
a[i] += a[j];
29+
}
30+
}
31+
32+
int k;
33+
[[intel::nofusion]] for (auto k : a) {
34+
// CHECK: br label %{{.*}}, !llvm.loop ![[MD_NF_5:.*]]
35+
k += 4;
36+
}
37+
38+
[[intel::nofusion]] for (int i = 0; i < 10; ++i) {
39+
// CHECK: br label %{{.*}}, !llvm.loop ![[MD_NF_6:.*]]
40+
a[i] += 5;
41+
}
42+
43+
for (int i = 0; i < 10; ++i) {
44+
// CHECK-NOT: br label %{{.*}}, !llvm.loop !{{.*}}
45+
[[intel::nofusion]] for (int j = 0; j < 10; ++j) {
46+
// CHECK: br label %{{.*}}, !llvm.loop ![[MD_NF_8:.*]]
47+
a[i] += a[j];
48+
}
49+
}
50+
}
51+
52+
int main() {
53+
q.submit([&](handler &h) {
54+
h.single_task<class kernel_function>([]() { nofusion(); });
55+
});
56+
return 0;
57+
}
58+
59+
// CHECK: ![[MD_NF_1]] = distinct !{![[MD_NF_1]], ![[MD_Nofusion:[0-9]+]]}
60+
// CHECK: ![[MD_Nofusion]] = !{!"llvm.loop.fusion.disable"}
61+
// CHECK: ![[MD_NF_2]] = distinct !{![[MD_NF_2]], ![[MD_Nofusion]]}
62+
// CHECK: ![[MD_NF_3]] = distinct !{![[MD_NF_3]], ![[MD_Nofusion]]}
63+
// CHECK: ![[MD_NF_5]] = distinct !{![[MD_NF_5]], ![[MD_Nofusion]]}
64+
// CHECK: ![[MD_NF_6]] = distinct !{![[MD_NF_6]], ![[MD_Nofusion]]}
65+
// CHECK: ![[MD_NF_8]] = distinct !{![[MD_NF_8]], ![[MD_Nofusion]]}

clang/test/SemaSYCL/intel-fpga-loops.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ void foo() {
2525
[[intel::max_interleaving(4)]] int i[10];
2626
// expected-error@+1 {{intelfpga loop attributes must be applied to for, while, or do statements}}
2727
[[intel::speculated_iterations(6)]] int j[10];
28+
// expected-error@+1 {{intelfpga loop attributes must be applied to for, while, or do statements}}
29+
[[intel::nofusion]] int k[10];
2830
}
2931

3032
// Test for deprecated spelling of Intel FPGA loop attributes
@@ -114,6 +116,9 @@ void boo() {
114116
// expected-warning@+1 {{'speculated_iterations' attribute takes no more than 1 argument - attribute ignored}}
115117
[[intel::speculated_iterations(1, 2)]] for (int i = 0; i != 10; ++i)
116118
a[i] = 0;
119+
// expected-warning@+1 {{'nofusion' attribute takes no more than 0 arguments - attribute ignored}}
120+
[[intel::nofusion(0)]] for (int i = 0; i != 10; ++i)
121+
a[i] = 0;
117122
}
118123

119124
// Test for incorrect argument value for Intel FPGA loop attributes
@@ -187,6 +192,10 @@ void goo() {
187192
// no diagnostics are expected
188193
[[intel::ivdep(2, s.ptr)]] for (int i = 0; i != 10; ++i)
189194
s.ptr[i] = 0;
195+
196+
// no diagnostics are expected
197+
[[intel::nofusion]] for (int i = 0; i != 10; ++i)
198+
a[i] = 0;
190199
}
191200

192201
// Test for Intel FPGA loop attributes duplication
@@ -290,6 +299,11 @@ void zoo() {
290299
// expected-note@+1 {{previous attribute is here}}
291300
[[intel::ivdep(a, 3)]] for (int i = 0; i != 10; ++i)
292301
a[i] = 0;
302+
303+
[[intel::nofusion]]
304+
// expected-error@-1 {{duplicate Intel FPGA loop attribute 'nofusion'}}
305+
[[intel::nofusion]] for (int i = 0; i != 10; ++i)
306+
a[i] = 0;
293307
}
294308

295309
// Test for Intel FPGA loop attributes compatibility
@@ -319,6 +333,10 @@ void loop_attrs_compatibility() {
319333
[[intel::disable_loop_pipelining]]
320334
[[intel::ivdep]] for (int i = 0; i != 10; ++i)
321335
a[i] = 0;
336+
// no diagnostics are expected
337+
[[intel::disable_loop_pipelining]]
338+
[[intel::nofusion]] for (int i = 0; i != 10; ++i)
339+
a[i] = 0;
322340
}
323341

324342
template<int A, int B, int C>
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// RUN: %clang_cc1 -fsycl -fsycl-is-device -internal-isystem %S/Inputs -fsyntax-only -ast-dump -Wno-sycl-2017-compat -verify %s | FileCheck %s
2+
// expected-no-diagnostics
3+
4+
#include "sycl.hpp"
5+
6+
using namespace cl::sycl;
7+
queue q;
8+
9+
void nofusion() {
10+
int a1[10], a2[10];
11+
12+
// CHECK: AttributedStmt
13+
// CHECK-NEXT: SYCLIntelFPGANofusionAttr {{.*}}
14+
[[intel::nofusion]] for (int p = 0; p < 10; ++p) {
15+
a1[p] = a2[p] = 0;
16+
}
17+
18+
// CHECK: AttributedStmt
19+
// CHECK-NEXT: SYCLIntelFPGANofusionAttr {{.*}}
20+
int i = 0;
21+
[[intel::nofusion]] while (i < 10) {
22+
a1[i] += 3;
23+
}
24+
25+
// CHECK: AttributedStmt
26+
// CHECK-NEXT: SYCLIntelFPGANofusionAttr {{.*}}
27+
for (int i = 0; i < 10; ++i) {
28+
[[intel::nofusion]] for (int j = 0; j < 10; ++j) {
29+
a1[i] += a1[j];
30+
}
31+
}
32+
}
33+
34+
int main() {
35+
q.submit([&](handler &h) {
36+
h.single_task<class kernel_function>([]() { nofusion(); });
37+
});
38+
return 0;
39+
}

0 commit comments

Comments
 (0)