Skip to content

[SYCL] Add fma_relu extension #5749

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 24 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
025cf7e
Added bfloat16 support for cuda backend.
JackAKirk Jan 25, 2022
66b4e33
deleted intel namespace bfloat16.
JackAKirk Jan 25, 2022
2d04406
Format.
JackAKirk Jan 25, 2022
9418f74
Changed extension macro name.
JackAKirk Jan 25, 2022
65fddfa
Merge branch 'sycl' into bf16-cvt-ext
JackAKirk Feb 17, 2022
4d99f3f
fixed test.
JackAKirk Feb 17, 2022
3982001
Used neg ptx7.0 builtin for unary minus
JackAKirk Mar 4, 2022
450e1b5
Adding fma_relu extension
Mar 7, 2022
8d2d11f
Replaced SYCL_EXT_INTEL_BF16_CONVERSION.asciidoc with SYCL_EXT_ONEAPI…
JackAKirk Mar 7, 2022
a514505
Remove redundant include
Mar 7, 2022
d8bc53f
Merge branch 'sycl' into bf16-cvt-ext
JackAKirk Mar 8, 2022
37a18d7
Adding symbols to linux dump
Mar 11, 2022
a7b2fdc
Merge main into branch
Mar 11, 2022
7b40302
Responding to comments
hdelan Mar 14, 2022
2f9b7d7
Merge branch 'sycl' into bf16-cvt-ext
JackAKirk Mar 15, 2022
8a29c44
Renamed extension to cover all bfloat16 funct.
JackAKirk Mar 15, 2022
f53577f
Merge remote-tracking branch 'Jack/bf16-cvt-ext' into add_relu
Apr 4, 2022
49aca06
Making fma_relu accept the bfloat16 class
Apr 4, 2022
02cbc5b
Merge branch 'add_relu' of https://github.com/hdelan/llvm into add_relu
Apr 4, 2022
9fb55df
Update doc
Apr 4, 2022
358c943
Update sycl/doc/extensions/experimental/sycl_ext_oneapi_fma_relu.asci…
hdelan Apr 4, 2022
7c6d728
Update sycl/include/sycl/ext/oneapi/experimental/builtins.hpp
hdelan Apr 4, 2022
390ae97
Update sycl/include/sycl/ext/oneapi/experimental/builtins.hpp
hdelan Apr 4, 2022
f08791a
Using bits instead of reinterpret cast
Apr 4, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions libclc/generic/include/spirv/spirv_builtins.h
Original file line number Diff line number Diff line change
Expand Up @@ -14146,6 +14146,22 @@ _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp16_t
__spirv_ocl_fma(__clc_vec16_fp16_t, __clc_vec16_fp16_t, __clc_vec16_fp16_t);
#endif

#ifdef cl_khr_fp16
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_fp16_t __clc_fma_relu(__clc_fp16_t,
__clc_fp16_t,
__clc_fp16_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp16_t
__clc_fma_relu(__clc_vec2_fp16_t, __clc_vec2_fp16_t, __clc_vec2_fp16_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec3_fp16_t
__clc_fma_relu(__clc_vec3_fp16_t, __clc_vec3_fp16_t, __clc_vec3_fp16_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec4_fp16_t
__clc_fma_relu(__clc_vec4_fp16_t, __clc_vec4_fp16_t, __clc_vec4_fp16_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec8_fp16_t
__clc_fma_relu(__clc_vec8_fp16_t, __clc_vec8_fp16_t, __clc_vec8_fp16_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp16_t
__clc_fma_relu(__clc_vec16_fp16_t, __clc_vec16_fp16_t, __clc_vec16_fp16_t);
#endif

_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_fp32_t
__spirv_ocl_fmax(__clc_fp32_t, __clc_fp32_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp32_t
Expand Down
36 changes: 36 additions & 0 deletions libclc/generic/libspirv/float16.cl
Original file line number Diff line number Diff line change
Expand Up @@ -4570,6 +4570,42 @@ __spirv_ocl_fma(__clc_vec16_float16_t args_0, __clc_vec16_float16_t args_1,
as_half16(args_2));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_fp16_t __clc_fma_relu(
__clc_float16_t args_0, __clc_float16_t args_1, __clc_float16_t args_2) {
return __clc_fma_relu(as_half(args_0), as_half(args_1), as_half(args_2));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec2_fp16_t
__clc_fma_relu(__clc_vec2_float16_t args_0, __clc_vec2_float16_t args_1,
__clc_vec2_float16_t args_2) {
return __clc_fma_relu(as_half2(args_0), as_half2(args_1), as_half2(args_2));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec3_fp16_t
__clc_fma_relu(__clc_vec3_float16_t args_0, __clc_vec3_float16_t args_1,
__clc_vec3_float16_t args_2) {
return __clc_fma_relu(as_half3(args_0), as_half3(args_1), as_half3(args_2));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec4_fp16_t
__clc_fma_relu(__clc_vec4_float16_t args_0, __clc_vec4_float16_t args_1,
__clc_vec4_float16_t args_2) {
return __clc_fma_relu(as_half4(args_0), as_half4(args_1), as_half4(args_2));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec8_fp16_t
__clc_fma_relu(__clc_vec8_float16_t args_0, __clc_vec8_float16_t args_1,
__clc_vec8_float16_t args_2) {
return __clc_fma_relu(as_half8(args_0), as_half8(args_1), as_half8(args_2));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec16_fp16_t
__clc_fma_relu(__clc_vec16_float16_t args_0, __clc_vec16_float16_t args_1,
__clc_vec16_float16_t args_2) {
return __clc_fma_relu(as_half16(args_0), as_half16(args_1),
as_half16(args_2));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_fp16_t
__spirv_ocl_fmax(__clc_float16_t args_0, __clc_float16_t args_1) {
return __spirv_ocl_fmax(as_half(args_0), as_half(args_1));
Expand Down
Loading