Skip to content

Commit 6cc97d2

Browse files
authored
[SYCL] dot_product support. (#2609)
This change adds a new Intel-specific header that supports dot_product APIs. The current implementation relies on the device compiler recognizing dot-product patterns and converting them to instructions, if available. A future update will provide the same functionality through the SPIR-V dot_product extension. Signed-off-by: rdeodhar <[email protected]>
1 parent b9bf9f5 commit 6cc97d2

File tree

4 files changed

+732
-0
lines changed

4 files changed

+732
-0
lines changed
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
= SYCL_INTEL_dot_accumulate
2+
:source-highlighter: coderay
3+
:coderay-linenums-mode: table
4+
:doctype: book
5+
:encoding: utf-8
6+
:lang: en
7+
8+
:blank: pass:[ +]
9+
10+
// Set the default source code type in this document to C,
11+
// for syntax highlighting purposes.
12+
:language: c
13+
14+
// This is what is needed for C++, since docbook uses c++
15+
// and everything else uses cpp. This doesn't work when
16+
// source blocks are in table cells, though, so don't use
17+
// C++ unless it is required.
18+
//:language: {basebackend@docbook:c++:cpp}
19+
20+
== Introduction
21+
22+
IMPORTANT: This specification is a draft.
23+
24+
NOTE: Khronos(R) is a registered trademark and SYCL(TM) and SPIR(TM) are trademarks of The Khronos Group Inc. OpenCL(TM) is a trademark of Apple Inc. used by permission by Khronos.
25+
26+
NOTE: This document is better viewed when rendered as html with asciidoctor. GitHub does not render image icons.
27+
28+
== Name Strings
29+
30+
`SYCL_ONEAPI_dot_accumulate`
31+
32+
This is a placeholder name.
33+
34+
== Notice
35+
36+
Copyright (c) 2020 Intel Corporation. All rights reserved.
37+
38+
== Status
39+
40+
Working Draft
41+
42+
This is a preview extension specification, intended to provide early access to a feature for review and community feedback. When the feature matures, this specification may be released as a formal extension.
43+
44+
Because the interfaces defined by this specification are not final and are subject to change they are not intended to be used by shipping software products.
45+
46+
== Version
47+
48+
Built On: {docdate} +
49+
Revision: B
50+
51+
== Contact
52+
53+
Ben Ashbaugh, Intel (ben 'dot' ashbaugh 'at' intel 'dot' com)
54+
55+
== Dependencies
56+
57+
This extension is written against the SYCL 1.2.1 specification, Revision v1.2.1-6.
58+
59+
== Overview
60+
61+
This extension adds new SYCL built-in functions that may simplify development and provide access specialized hardware instructions when a SYCL kernel needs to perform a dot product of two vectors followed by a scalar accumulation.
62+
63+
== Enabling the extension
64+
65+
The extension is always enabled. The dot product functionality may be emulated in software or executed using hardware when suitable instructions are available.
66+
67+
== Modifications of SYCL 1.2.1 specification
68+
69+
=== Add to Section 4.13.6 - Geometric Functions
70+
71+
Additionally, the following additional functions are available in the namespace `cl::sycl::intel` on the host and device.
72+
73+
[cols="4a,4",options="header"]
74+
|====
75+
| *Function*
76+
| *Description*
77+
78+
|[source,c]
79+
----
80+
int32_t dot_acc(vec<int8_t,4> a,
81+
vec<int8_t,4> b,
82+
int32_t c)
83+
int32_t dot_acc(vec<int8_t,4> a,
84+
vec<uint8_t,4> b,
85+
int32_t c)
86+
int32_t dot_acc(vec<uint8_t,4> a,
87+
vec<int8_t,4> b,
88+
int32_t c)
89+
int32_t dot_acc(vec<uint8_t,4> a,
90+
vec<uint8_t,4> b,
91+
int32_t c)
92+
----
93+
94+
|Performs a four-component integer dot product accumulate operation. +
95+
{blank}
96+
The value that is returned is equivalent to +
97+
{blank}
98+
*dot*(_a_, _b_) + _c_
99+
100+
|====
101+
102+
== Sample Header
103+
104+
[source,c++]
105+
----
106+
namespace cl {
107+
namespace sycl {
108+
namespace ONEAPI {
109+
110+
int32_t dot_acc(vec<int8_t,4> a, vec<int8_t,4> b, int32_t c);
111+
int32_t dot_acc(vec<int8_t,4> a, vec<uint8_t,4> b, int32_t c);
112+
int32_t dot_acc(vec<uint8_t,4> a, vec<int8_t,4> b, int32_t c);
113+
int32_t dot_acc(vec<uint8_t,4> a, vec<uint8_t,4> b, int32_t c);
114+
115+
int32_t dot_acc(int32_t a, int32_t b, int32_t c);
116+
int32_t dot_acc(int32_t a, uint32_t b, int32_t c);
117+
int32_t dot_acc(uint32_t a, int32_t b, int32_t c);
118+
int32_t dot_acc(uint32_t a, uint32_t b, int32_t c);
119+
120+
} // ONEAPI
121+
} // sycl
122+
} // cl
123+
----
124+
125+
== Issues
126+
127+
None.
128+
129+
== Revision History
130+
131+
[cols="5,15,15,70"]
132+
[grid="rows"]
133+
[options="header"]
134+
|========================================
135+
|Rev|Date|Author|Changes
136+
|A|2019-12-13|Ben Ashbaugh|*Initial draft*
137+
|B|2019-12-18|Ben Ashbaugh|Switched to standard C++ fixed width types.
138+
|C|2020-10-26|Rajiv Deodhar|Added int32 types.
139+
|========================================
140+
141+
//************************************************************************
142+
//Other formatting suggestions:
143+
//
144+
//* Use *bold* text for host APIs, or [source] syntax highlighting.
145+
//* Use `mono` text for device APIs, or [source] syntax highlighting.
146+
//* Use `mono` text for extension names, types, or enum values.
147+
//* Use _italics_ for parameters.
148+
//************************************************************************
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
//==----------- dot_product.hpp ------- SYCL dot-product -------------------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// DP4A extension
10+
11+
#pragma once
12+
13+
__SYCL_INLINE_NAMESPACE(cl) {
14+
namespace sycl {
15+
namespace ONEAPI {
16+
17+
union Us {
18+
char s[4];
19+
int32_t i;
20+
};
21+
union Uu {
22+
unsigned char s[4];
23+
uint32_t i;
24+
};
25+
26+
int32_t dot_acc(int32_t pa, int32_t pb, int32_t c) {
27+
Us a = *(reinterpret_cast<Us *>(&pa));
28+
Us b = *(reinterpret_cast<Us *>(&pb));
29+
return a.s[0] * b.s[0] + a.s[1] * b.s[1] + a.s[2] * b.s[2] + a.s[3] * b.s[3] +
30+
c;
31+
}
32+
33+
int32_t dot_acc(uint32_t pa, uint32_t pb, int32_t c) {
34+
Uu a = *(reinterpret_cast<Uu *>(&pa));
35+
Uu b = *(reinterpret_cast<Uu *>(&pb));
36+
return a.s[0] * b.s[0] + a.s[1] * b.s[1] + a.s[2] * b.s[2] + a.s[3] * b.s[3] +
37+
c;
38+
}
39+
40+
int32_t dot_acc(int32_t pa, uint32_t pb, int32_t c) {
41+
Us a = *(reinterpret_cast<Us *>(&pa));
42+
Uu b = *(reinterpret_cast<Uu *>(&pb));
43+
return a.s[0] * b.s[0] + a.s[1] * b.s[1] + a.s[2] * b.s[2] + a.s[3] * b.s[3] +
44+
c;
45+
}
46+
47+
int32_t dot_acc(uint32_t pa, int32_t pb, int32_t c) {
48+
Uu a = *(reinterpret_cast<Uu *>(&pa));
49+
Us b = *(reinterpret_cast<Us *>(&pb));
50+
return a.s[0] * b.s[0] + a.s[1] * b.s[1] + a.s[2] * b.s[2] + a.s[3] * b.s[3] +
51+
c;
52+
}
53+
54+
int32_t dot_acc(vec<int8_t, 4> a, vec<int8_t, 4> b, int32_t c) {
55+
return a.s0() * b.s0() + a.s1() * b.s1() + a.s2() * b.s2() + a.s3() * b.s3() +
56+
c;
57+
}
58+
59+
int32_t dot_acc(vec<uint8_t, 4> a, vec<uint8_t, 4> b, int32_t c) {
60+
return a.s0() * b.s0() + a.s1() * b.s1() + a.s2() * b.s2() + a.s3() * b.s3() +
61+
c;
62+
}
63+
64+
int32_t dot_acc(vec<uint8_t, 4> a, vec<int8_t, 4> b, int32_t c) {
65+
return a.s0() * b.s0() + a.s1() * b.s1() + a.s2() * b.s2() + a.s3() * b.s3() +
66+
c;
67+
}
68+
69+
int32_t dot_acc(vec<int8_t, 4> a, vec<uint8_t, 4> b, int32_t c) {
70+
return a.s0() * b.s0() + a.s1() * b.s1() + a.s2() * b.s2() + a.s3() * b.s3() +
71+
c;
72+
}
73+
74+
} // namespace ONEAPI
75+
} // namespace sycl
76+
} // __SYCL_INLINE_NAMESPACE(cl)

0 commit comments

Comments
 (0)