Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit 90738b1

Browse files
authored
[ESIMD] Add a LIT test verifying DPAS with 2 tfloat32 arguments (#1180)
* [ESIMD] Add a LIT test verifying DPAS with 2 tfloat32 arguments Signed-off-by: Vyacheslav N Klochkov <[email protected]>
1 parent c2d6146 commit 90738b1

File tree

1 file changed

+89
-0
lines changed

1 file changed

+89
-0
lines changed

SYCL/ESIMD/dpas/dpas_tf32.cpp

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
//==---------------- dpas_tf32.cpp - DPC++ ESIMD on-device test ----------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
// REQUIRES: gpu-intel-pvc || esimd_emulator
9+
// UNSUPPORTED: cuda || hip
10+
// RUN: %clangxx -fsycl -DESIMD_XE_HPC %s -o %t.out
11+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
12+
// XFAIL: esimd_emulator
13+
14+
// The test verifies the low-level API for DPAS with 'tfloat32' types.
15+
// It checks the versions of DPAS with and without the accumulator operand.
16+
17+
#include "../esimd_test_utils.hpp"
18+
19+
#include <sycl/ext/intel/esimd.hpp>
20+
#include <sycl/sycl.hpp>
21+
22+
using namespace sycl;
23+
using namespace sycl::ext::intel::esimd;
24+
using namespace sycl::ext::intel::experimental::esimd;
25+
26+
int main() {
27+
queue Q(esimd_test::ESIMDSelector{}, esimd_test::createExceptionHandler());
28+
29+
constexpr int REPEAT_COUNT = 8;
30+
constexpr int SYSTOLIC_DEPTH = 8;
31+
constexpr int EXECUTION_SIZE = 16;
32+
33+
constexpr int M = REPEAT_COUNT;
34+
constexpr int N = EXECUTION_SIZE;
35+
constexpr int K = SYSTOLIC_DEPTH; // SYSTOLIC_DEPTH * OPS_PER_CHANNEL
36+
float *A = malloc_shared<float>(M * K, Q);
37+
float *B = malloc_shared<float>(K * N, Q);
38+
float *C = malloc_shared<float>(M * N, Q);
39+
float *D = malloc_shared<float>(M * N, Q);
40+
for (int I = 0; I < M * K; ++I)
41+
A[I] = I;
42+
for (int I = 0; I < K * N; ++I)
43+
B[I] = I;
44+
45+
Q.single_task([=]() SYCL_ESIMD_KERNEL {
46+
simd<float, M * K> AVec(A);
47+
simd<float, K * N> BVec(B);
48+
auto AView = AVec.template bit_cast_view<uint>();
49+
auto BView = BVec.template bit_cast_view<uint>();
50+
// C(MxN) = A(MxK) * B(KxN)
51+
simd<float, M *N> CVec =
52+
dpas<argument_type::TF32, argument_type::TF32, SYSTOLIC_DEPTH,
53+
REPEAT_COUNT, float, uint, uint, M * N, K * N, M * K>(
54+
BView.read(), AView.read());
55+
CVec.copy_to(C);
56+
57+
// D(MxN) = D(MxN) + A(MxK) * B(KxN);
58+
simd<float, M *N> DVec = 1.0;
59+
DVec = dpas<argument_type::TF32, argument_type::TF32, SYSTOLIC_DEPTH,
60+
REPEAT_COUNT, float, uint, uint, M * N, K * N, M * K>(
61+
DVec, BView.read(), AView.read());
62+
DVec.copy_to(D);
63+
}).wait();
64+
65+
unsigned ErrCnt = 0;
66+
for (unsigned I = 0; (I < M * N) && (ErrCnt < 10); ++I) {
67+
int m = I / N;
68+
int n = I % N;
69+
float RefResC = 0.0f;
70+
for (int k = 0; k < K; ++k)
71+
RefResC += float((m * K + k) * (k * N + n));
72+
if (std::abs(RefResC - C[I]) > 0.001) {
73+
std::cerr << "C[i] vs ref: " << C[I] << " : " << RefResC << std::endl;
74+
ErrCnt++;
75+
}
76+
float RefResD = RefResC + 1.0;
77+
if (std::abs(RefResD - D[I]) > 0.001) {
78+
std::cerr << "D[i] vs ref: " << D[I] << " : " << RefResD << std::endl;
79+
ErrCnt++;
80+
}
81+
}
82+
free(A, Q);
83+
free(B, Q);
84+
free(C, Q);
85+
free(D, Q);
86+
87+
std::cout << (ErrCnt > 0 ? "FAILED\n" : "Passed\n");
88+
return ErrCnt > 0 ? 1 : 0;
89+
}

0 commit comments

Comments
 (0)