Skip to content

Commit c0291b2

Browse files
authored
[SYCL][Matrix] Add GNR matrix combinations to spec and runtime query (#11867)
As a minor addition, I also added tf32 combination to PVC as part of this PR
1 parent 096676e commit c0291b2

File tree

4 files changed

+52
-12
lines changed

4 files changed

+52
-12
lines changed

sycl/doc/extensions/experimental/sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -855,24 +855,32 @@ XMX hardware. Note that these can be returned using
855855

856856
==== Intel AMX Supported Combinations
857857
This is currently available in devices with the architecture
858-
`architecture::intel_cpu_spr`. In this architecture's implementation,
859-
the type of the C matrix must be the same as the type of the D
860-
matrix. Therefore, that common type is shown in a single column in the
861-
table below.
858+
`architecture::intel_cpu_spr`, and `architecture::intel_cpu_gnr`. In
859+
this architecture's implementation, the type of the C matrix must be
860+
the same as the type of the D matrix. Therefore, that common type is
861+
shown in a single column in the table below.
862862

863863
[frame="none",options="header"]
864864
|======================
865-
| A type | B type | C and D type | M | N | K
865+
| A type | B type | C and D type | M | N | K | device
866866
| `matrix_type::uint8` | `matrix_type::uint8` |
867867
`matrix_type::sint32` | +<=+ 16 | +<=+ 16 | +<=+ 64
868+
|`architecture::intel_gpu_spr`, `architecture::intel_gpu_gnr`
868869
| `matrix_type::uint8` | `matrix_type::sint8` |
869870
`matrix_type::sint32` | +<=+ 16 | +<=+ 16 | +<=+ 64
871+
|`architecture::intel_gpu_spr`, `architecture::intel_gpu_gnr`
870872
| `matrix_type::sint8` | `matrix_type::uint8` |
871873
`matrix_type::sint32` | +<=+ 16 | +<=+ 16 | +<=+ 64
874+
|`architecture::intel_gpu_spr`, `architecture::intel_gpu_gnr`
872875
| `matrix_type::sint8` | `matrix_type::sint8` |
873876
`matrix_type::sint32` | +<=+ 16 | +<=+ 16 | +<=+ 64
877+
|`architecture::intel_gpu_spr`, `architecture::intel_gpu_gnr`
874878
| `matrix_type::bf16` | `matrix_type::bf16` |
875879
`matrix_type::fp32` | +<=+ 16 | +<=+ 16 | +<=+ 32
880+
|`architecture::intel_gpu_spr`, `architecture::intel_gpu_gnr`
881+
| `matrix_type::fp16` | `matrix_type::fp16` |
882+
`matrix_type::fp32` | +<=+ 16 | +<=+ 16 | +<=+ 32
883+
|`architecture::intel_gpu_gnr`
876884
|======================
877885

878886
==== Intel XMX Supported Combinations
@@ -911,6 +919,9 @@ architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`
911919
`matrix_type::fp32` .2+| +<=+ 8 | 16 .2+| 16 |
912920
`architecture::intel_gpu_pvc` |8| `architecture::intel_gpu_dg2_g10,
913921
architecture::intel_gpu_dg2_g11, architecture::intel_gpu_dg2_g12`
922+
| `matrix_type::tf32` | `matrix_type::tf32` |
923+
`matrix_type::fp32` | +<=+ 8 | 16 | 8 |
924+
`architecture::intel_gpu_pvc`
914925
|======================
915926

916927
==== Nvidia Tensor Cores Supported Combinations
@@ -1011,7 +1022,7 @@ be used. An attempt to run the compiled code on an unsupported architecture will
10111022
operations support
10121023
|4 |2022-08-25 |Dounia Khaldi |Update the matrix spec by adding the
10131024
new matrix use parameter and remove reference to the AOT AMX initial
1014-
implementation
1025+
implementation
10151026
|5 |2022-11-07 |Dounia Khaldi |Update the matrix spec by making it
10161027
portable across Intel AMX, Intel XMX and Nvidia Tensor Cores, and move
10171028
the Intel-specifics to a separate extension document
@@ -1020,4 +1031,6 @@ type, runtime query, and supported combinations appendix for Intel AMX
10201031
and Intel XMX
10211032
|7 |2023-04-11 |Jack Kirk |Add Nvidia Tensor Cores supported combinations
10221033
|8 |2023-10-05 |Mahmoud Moadeli |Add AMD Matrix Core supported combinations
1034+
|9 |2023-11-13 |Dounia Khaldi |Add Granite Rapids Intel AMX
1035+
supported combinations
10231036
|======================

sycl/doc/extensions/experimental/sycl_ext_oneapi_device_architecture.asciidoc

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ namespace sycl::ext::oneapi::experimental {
100100
enum class architecture : /* unspecified */ {
101101
x86_64,
102102
intel_cpu_spr,
103+
intel_cpu_gnr,
103104
intel_gpu_bdw,
104105
intel_gpu_skl,
105106
intel_gpu_kbl,
@@ -202,6 +203,12 @@ of these enumerators, and it provides a brief description of their meanings.
202203
enumeration is currently limited. See the section "Limitations with
203204
the experimental version" for details.
204205

206+
|`intel_cpu_gnr`
207+
|-
208+
|Intel Xeon processor codenamed Granite Rapids. The utility of this
209+
enumeration is currently limited. See the section "Limitations with
210+
the experimental version" for details.
211+
205212
|`intel_gpu_bdw`
206213
|-
207214
|Broadwell Intel graphics architecture.
@@ -596,11 +603,11 @@ feature, the application must be compiled in ahead-of-time (AOT) mode using
596603
description of the `-fsycl-targets` option. These are the target names of the
597604
form "intel_gpu_*", "nvidia_gpu_*", or "amd_gpu_*".
598605

599-
The architecture enumeration `intel_cpu_spr` does not currently work
600-
with any of the APIs described in this extension. It cannot be used
601-
with the `if_architecture_is` function, the
602-
`device::ext_oneapi_architecture_is` function, or the
603-
`info::device::architecture` query descriptor. It currently exists
606+
The architecture enumerations `intel_cpu_spr` and `intel_cpu_gnr` do
607+
not currently work with any of the APIs described in this
608+
extension. They cannot be used with the `if_architecture_is` function,
609+
the `device::ext_oneapi_architecture_is` function, or the
610+
`info::device::architecture` query descriptor. They currently exist
604611
only for use with the
605612
link:sycl_ext_matrix/sycl_ext_oneapi_matrix.asciidoc[sycl_ext_oneapi_matrix]
606613
extension.

sycl/include/sycl/ext/oneapi/experimental/device_architecture.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ namespace ext::oneapi::experimental {
1515
enum class architecture {
1616
x86_64,
1717
intel_cpu_spr,
18+
intel_cpu_gnr,
1819
intel_gpu_bdw,
1920
intel_gpu_skl,
2021
intel_gpu_kbl,

sycl/source/detail/device_info.hpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -622,7 +622,9 @@ struct get_device_info_impl<range<Dimensions>,
622622
// This macro is only for Intel CPU architectures
623623
// TODO: extend the macro with other CPU architectures when they will be added
624624
// to ext_oneapi_device_architecture
625-
#define INTEL_CPU_ARCHES(X) X(8, oneapi_exp_arch::intel_cpu_spr)
625+
#define INTEL_CPU_ARCHES(X) \
626+
X(8, oneapi_exp_arch::intel_cpu_spr) \
627+
X(9, oneapi_exp_arch::intel_cpu_gnr)
626628

627629
#define CMP_NVIDIA_AMD(s, i) \
628630
if (strcmp(s, arch) == 0) \
@@ -732,6 +734,21 @@ struct get_device_info_impl<
732734
{16, 16, 32, 0, 0, 0, matrix_type::bf16, matrix_type::bf16,
733735
matrix_type::fp32, matrix_type::fp32},
734736
};
737+
else if (architecture::intel_cpu_gnr == DeviceArch)
738+
return {
739+
{16, 16, 64, 0, 0, 0, matrix_type::uint8, matrix_type::uint8,
740+
matrix_type::sint32, matrix_type::sint32},
741+
{16, 16, 64, 0, 0, 0, matrix_type::uint8, matrix_type::sint8,
742+
matrix_type::sint32, matrix_type::sint32},
743+
{16, 16, 64, 0, 0, 0, matrix_type::sint8, matrix_type::uint8,
744+
matrix_type::sint32, matrix_type::sint32},
745+
{16, 16, 64, 0, 0, 0, matrix_type::sint8, matrix_type::sint8,
746+
matrix_type::sint32, matrix_type::sint32},
747+
{16, 16, 32, 0, 0, 0, matrix_type::bf16, matrix_type::bf16,
748+
matrix_type::fp32, matrix_type::fp32},
749+
{16, 16, 32, 0, 0, 0, matrix_type::fp16, matrix_type::fp16,
750+
matrix_type::fp32, matrix_type::fp32},
751+
};
735752
else if (architecture::intel_gpu_pvc == DeviceArch)
736753
return {
737754
{8, 0, 0, 0, 16, 32, matrix_type::uint8, matrix_type::uint8,
@@ -746,6 +763,8 @@ struct get_device_info_impl<
746763
matrix_type::fp32, matrix_type::fp32},
747764
{8, 0, 0, 0, 16, 16, matrix_type::bf16, matrix_type::bf16,
748765
matrix_type::fp32, matrix_type::fp32},
766+
{8, 0, 0, 0, 16, 8, matrix_type::tf32, matrix_type::tf32,
767+
matrix_type::fp32, matrix_type::fp32},
749768
};
750769
else if ((architecture::intel_gpu_dg2_g10 == DeviceArch) ||
751770
(architecture::intel_gpu_dg2_g11 == DeviceArch) ||

0 commit comments

Comments
 (0)