Skip to content

Commit 5dfd68a

Browse files
Add Ip sampling metrics calculate values Support
Related-To: LOCI-2757 Signed-off-by: davidoli <[email protected]>
1 parent 0791898 commit 5dfd68a

File tree

4 files changed

+486
-9
lines changed

4 files changed

+486
-9
lines changed

level_zero/tools/source/metrics/metric_ip_sampling_source.cpp

Lines changed: 184 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
#include "level_zero/tools/source/metrics/os_metric_ip_sampling.h"
1212
#include <level_zero/zet_api.h>
1313

14+
#include <cstring>
15+
1416
namespace L0 {
1517
constexpr uint32_t ipSamplinMetricCount = 10u;
1618
constexpr uint32_t ipSamplinDomainId = 100u;
@@ -149,14 +151,194 @@ ze_result_t IpSamplingMetricGroupImp::metricGet(uint32_t *pCount, zet_metric_han
149151
ze_result_t IpSamplingMetricGroupImp::calculateMetricValues(const zet_metric_group_calculation_type_t type, size_t rawDataSize,
150152
const uint8_t *pRawData, uint32_t *pMetricValueCount,
151153
zet_typed_value_t *pMetricValues) {
152-
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
154+
const bool calculateCountOnly = *pMetricValueCount == 0;
155+
if (calculateCountOnly) {
156+
return getCalculatedMetricCount(rawDataSize, *pMetricValueCount);
157+
} else {
158+
return getCalculatedMetricValues(type, rawDataSize, pRawData, *pMetricValueCount, pMetricValues);
159+
}
153160
}
154161

155162
ze_result_t IpSamplingMetricGroupImp::calculateMetricValuesExp(const zet_metric_group_calculation_type_t type, size_t rawDataSize,
156163
const uint8_t *pRawData, uint32_t *pSetCount,
157164
uint32_t *pTotalMetricValueCount, uint32_t *pMetricCounts,
158165
zet_typed_value_t *pMetricValues) {
159-
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
166+
const bool calculationCountOnly = *pTotalMetricValueCount == 0;
167+
ze_result_t result = this->calculateMetricValues(type, rawDataSize, pRawData, pTotalMetricValueCount, pMetricValues);
168+
169+
if (result == ZE_RESULT_SUCCESS) {
170+
*pSetCount = 1;
171+
if (!calculationCountOnly) {
172+
pMetricCounts[0] = *pTotalMetricValueCount;
173+
}
174+
} else {
175+
if (calculationCountOnly) {
176+
*pSetCount = 0;
177+
*pTotalMetricValueCount = 0;
178+
} else {
179+
pMetricCounts[0] = 0;
180+
}
181+
}
182+
return result;
183+
}
184+
185+
ze_result_t IpSamplingMetricGroupImp::getCalculatedMetricCount(const size_t rawDataSize,
186+
uint32_t &metricValueCount) {
187+
188+
uint32_t rawReportSize = 64;
189+
190+
if ((rawDataSize % rawReportSize) != 0) {
191+
return ZE_RESULT_ERROR_INVALID_SIZE;
192+
}
193+
194+
const uint32_t rawReportCount = static_cast<uint32_t>(rawDataSize) / rawReportSize;
195+
metricValueCount = rawReportCount * properties.metricCount;
196+
return ZE_RESULT_SUCCESS;
197+
}
198+
199+
ze_result_t IpSamplingMetricGroupImp::getCalculatedMetricValues(const zet_metric_group_calculation_type_t type, const size_t rawDataSize, const uint8_t *pRawData,
200+
uint32_t &metricValueCount,
201+
zet_typed_value_t *pCalculatedData) {
202+
StallSumIpDataMap_t stallSumIpDataMap;
203+
204+
// MAX_METRIC_VALUES is not supported yet.
205+
if (type != ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES) {
206+
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
207+
}
208+
209+
DEBUG_BREAK_IF(pCalculatedData == nullptr);
210+
211+
uint32_t rawReportSize = 64;
212+
213+
if ((rawDataSize % rawReportSize) != 0) {
214+
return ZE_RESULT_ERROR_INVALID_SIZE;
215+
}
216+
217+
const uint32_t rawReportCount = static_cast<uint32_t>(rawDataSize) / rawReportSize;
218+
219+
for (const uint8_t *pRawIpData = pRawData; pRawIpData < pRawData + (rawReportCount * rawReportSize); pRawIpData += rawReportSize) {
220+
stallIpDataMapUpdate(stallSumIpDataMap, pRawIpData);
221+
}
222+
223+
metricValueCount = std::min<uint32_t>(metricValueCount, static_cast<uint32_t>(stallSumIpDataMap.size()) * properties.metricCount);
224+
std::vector<zet_typed_value_t> ipDataValues;
225+
uint32_t i = 0;
226+
for (auto it = stallSumIpDataMap.begin(); it != stallSumIpDataMap.end(); ++it) {
227+
stallSumIpDataToTypedValues(it->first, it->second, ipDataValues);
228+
for (auto jt = ipDataValues.begin(); (jt != ipDataValues.end()) && (i < metricValueCount); jt++, i++) {
229+
*(pCalculatedData + i) = *jt;
230+
}
231+
ipDataValues.clear();
232+
}
233+
234+
return ZE_RESULT_SUCCESS;
235+
}
236+
237+
/*
238+
* stall sample data item format:
239+
*
240+
* Bits Field
241+
* 0 to 28 IP (addr)
242+
* 29 to 36 active count
243+
* 37 to 44 other count
244+
* 45 to 52 control count
245+
* 53 to 60 pipestall count
246+
* 61 to 68 send count
247+
* 69 to 76 dist_acc count
248+
* 77 to 84 sbid count
249+
* 85 to 92 sync count
250+
* 93 to 100 inst_fetch count
251+
*
252+
* bytes 49 and 50, subSlice
253+
* bytes 51 and 52, flags
254+
*
255+
* total size 64 bytes
256+
*/
257+
void IpSamplingMetricGroupImp::stallIpDataMapUpdate(StallSumIpDataMap_t &stallSumIpDataMap, const uint8_t *pRawIpData) {
258+
259+
const uint8_t *tempAddr = pRawIpData;
260+
uint64_t ip = 0ULL;
261+
memcpy_s(reinterpret_cast<uint8_t *>(&ip), sizeof(ip), tempAddr, sizeof(ip));
262+
ip &= 0x1fffffff;
263+
StallSumIpData_t &stallSumData = stallSumIpDataMap[ip];
264+
tempAddr += 3;
265+
266+
auto getCount = [&tempAddr]() {
267+
uint16_t tempCount = 0;
268+
memcpy_s(reinterpret_cast<uint8_t *>(&tempCount), sizeof(tempCount), tempAddr, sizeof(tempCount));
269+
tempCount = (tempCount >> 5) & 0xff;
270+
tempAddr += 1;
271+
return static_cast<uint8_t>(tempCount);
272+
};
273+
274+
stallSumData.activeCount += getCount();
275+
stallSumData.otherCount += getCount();
276+
stallSumData.controlCount += getCount();
277+
stallSumData.pipeStallCount += getCount();
278+
stallSumData.sendCount += getCount();
279+
stallSumData.distAccCount += getCount();
280+
stallSumData.sbidCount += getCount();
281+
stallSumData.syncCount += getCount();
282+
stallSumData.instFetchCount += getCount();
283+
284+
struct stallCntrInfo {
285+
uint16_t subslice;
286+
uint16_t flags;
287+
} stallCntrInfo = {};
288+
289+
tempAddr = pRawIpData + 48;
290+
memcpy_s(reinterpret_cast<uint8_t *>(&stallCntrInfo), sizeof(stallCntrInfo), tempAddr, sizeof(stallCntrInfo));
291+
292+
constexpr int overflowDropFlag = (1 << 8);
293+
if (stallCntrInfo.flags & overflowDropFlag) {
294+
PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Stall Sampling Data Lost %s\n", " ");
295+
}
296+
}
297+
298+
// The order of push_back calls must match the order of metricPropertiesList.
299+
void IpSamplingMetricGroupImp::stallSumIpDataToTypedValues(uint64_t ip,
300+
StallSumIpData_t &sumIpData,
301+
std::vector<zet_typed_value_t> &ipDataValues) {
302+
zet_typed_value_t tmpValueData;
303+
tmpValueData.type = ZET_VALUE_TYPE_UINT64;
304+
tmpValueData.value.ui64 = ip;
305+
ipDataValues.push_back(tmpValueData);
306+
307+
tmpValueData.type = ZET_VALUE_TYPE_UINT64;
308+
tmpValueData.value.ui64 = sumIpData.activeCount;
309+
ipDataValues.push_back(tmpValueData);
310+
311+
tmpValueData.type = ZET_VALUE_TYPE_UINT64;
312+
tmpValueData.value.ui64 = sumIpData.controlCount;
313+
ipDataValues.push_back(tmpValueData);
314+
315+
tmpValueData.type = ZET_VALUE_TYPE_UINT64;
316+
tmpValueData.value.ui64 = sumIpData.pipeStallCount;
317+
ipDataValues.push_back(tmpValueData);
318+
319+
tmpValueData.type = ZET_VALUE_TYPE_UINT64;
320+
tmpValueData.value.ui64 = sumIpData.sendCount;
321+
ipDataValues.push_back(tmpValueData);
322+
323+
tmpValueData.type = ZET_VALUE_TYPE_UINT64;
324+
tmpValueData.value.ui64 = sumIpData.distAccCount;
325+
ipDataValues.push_back(tmpValueData);
326+
327+
tmpValueData.type = ZET_VALUE_TYPE_UINT64;
328+
tmpValueData.value.ui64 = sumIpData.sbidCount;
329+
ipDataValues.push_back(tmpValueData);
330+
331+
tmpValueData.type = ZET_VALUE_TYPE_UINT64;
332+
tmpValueData.value.ui64 = sumIpData.syncCount;
333+
ipDataValues.push_back(tmpValueData);
334+
335+
tmpValueData.type = ZET_VALUE_TYPE_UINT64;
336+
tmpValueData.value.ui64 = sumIpData.instFetchCount;
337+
ipDataValues.push_back(tmpValueData);
338+
339+
tmpValueData.type = ZET_VALUE_TYPE_UINT64;
340+
tmpValueData.value.ui64 = sumIpData.otherCount;
341+
ipDataValues.push_back(tmpValueData);
160342
}
161343

162344
bool IpSamplingMetricGroupImp::activate() {

level_zero/tools/source/metrics/metric_ip_sampling_source.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,20 @@ class IpSamplingMetricSourceImp : public MetricSource {
3939
std::unique_ptr<IpSamplingMetricGroupImp> cachedMetricGroup = nullptr;
4040
};
4141

42+
typedef struct StallSumIpData {
43+
uint64_t activeCount;
44+
uint64_t otherCount;
45+
uint64_t controlCount;
46+
uint64_t pipeStallCount;
47+
uint64_t sendCount;
48+
uint64_t distAccCount;
49+
uint64_t sbidCount;
50+
uint64_t syncCount;
51+
uint64_t instFetchCount;
52+
} StallSumIpData_t;
53+
54+
typedef std::map<uint64_t, StallSumIpData_t> StallSumIpDataMap_t;
55+
4256
struct IpSamplingMetricGroupImp : public MetricGroup {
4357
IpSamplingMetricGroupImp(std::vector<IpSamplingMetricImp> &metrics);
4458
virtual ~IpSamplingMetricGroupImp() = default;
@@ -71,6 +85,12 @@ struct IpSamplingMetricGroupImp : public MetricGroup {
7185
private:
7286
std::vector<std::unique_ptr<IpSamplingMetricImp>> metrics = {};
7387
zet_metric_group_properties_t properties = {};
88+
ze_result_t getCalculatedMetricCount(const size_t rawDataSize, uint32_t &metricValueCount);
89+
ze_result_t getCalculatedMetricValues(const zet_metric_group_calculation_type_t type, const size_t rawDataSize, const uint8_t *pRawData,
90+
uint32_t &metricValueCount,
91+
zet_typed_value_t *pCalculatedData);
92+
void stallIpDataMapUpdate(StallSumIpDataMap_t &, const uint8_t *pRawIpData);
93+
void stallSumIpDataToTypedValues(uint64_t ip, StallSumIpData_t &sumIpData, std::vector<zet_typed_value_t> &ipDataValues);
7494
};
7595

7696
struct IpSamplingMetricImp : public Metric {

level_zero/tools/test/unit_tests/sources/metrics/metric_ip_sampling_fixture.h

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,79 @@
1212
namespace L0 {
1313
namespace ult {
1414
class MockMetricIpSamplingOsInterface;
15+
16+
class MockStallRawIpData {
17+
public:
18+
static constexpr uint32_t ipShift = 29;
19+
static constexpr uint32_t ipMask = 0x1fffffff;
20+
21+
static constexpr uint32_t byteShift = 8;
22+
static constexpr uint32_t byteMask = 0xff;
23+
24+
static constexpr uint32_t wordShift = 16;
25+
static constexpr uint32_t wordMask = 0xffff;
26+
27+
uint64_t rawData[8];
28+
MockStallRawIpData(uint64_t ip, uint64_t activeCount, uint64_t otherCount, uint64_t controlCount,
29+
uint64_t pipeStallCount, uint64_t sendCount, uint64_t distAccCount,
30+
uint64_t sbidCount, uint64_t syncCount, uint64_t instFetchCount, uint64_t subSlice,
31+
uint64_t flags) {
32+
33+
rawData[0] = (ip & ipMask) |
34+
((activeCount & byteMask) << ipShift) |
35+
((otherCount & byteMask) << (ipShift + byteShift)) |
36+
((controlCount & byteMask) << (ipShift + 2 * byteShift)) |
37+
((pipeStallCount & byteMask) << (ipShift + 3 * byteShift)) |
38+
((sendCount & 0x7) << (ipShift + 4 * byteShift));
39+
40+
rawData[1] = ((sendCount & 0xf8) >> 3) |
41+
((distAccCount & byteMask) << 5) |
42+
((sbidCount & byteMask) << (5 + byteShift)) |
43+
((syncCount & byteMask) << (5 + 2 * byteShift)) |
44+
((instFetchCount & byteMask) << (5 + 3 * byteShift));
45+
46+
rawData[2] = 0LL;
47+
rawData[3] = 0LL;
48+
rawData[4] = 0LL;
49+
rawData[5] = 0LL;
50+
rawData[6] = (subSlice & wordMask) | ((flags & wordMask) << wordShift);
51+
rawData[7] = 0;
52+
}
53+
};
1554
class MetricIpSamplingFixture : public MultiDeviceFixture,
1655
public ::testing::Test {
1756
public:
1857
void SetUp() override;
1958
void TearDown() override;
2059

60+
std::vector<MockStallRawIpData> rawDataVector = {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1000, 0x01},
61+
{1, 9, 8, 7, 6, 5, 4, 3, 2, 1, 1000, 0x02},
62+
{10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 1001, 0x100}, // set the overflow bit in flags
63+
{10, 90, 80, 70, 60, 50, 40, 30, 20, 10, 1000, 0x3}};
64+
65+
size_t rawDataVectorSize = sizeof(rawDataVector[0]) * rawDataVector.size();
66+
std::vector<zet_typed_value_t> expectedMetricValues = {
67+
{ZET_VALUE_TYPE_UINT64, {1}},
68+
{ZET_VALUE_TYPE_UINT64, {11}},
69+
{ZET_VALUE_TYPE_UINT64, {11}},
70+
{ZET_VALUE_TYPE_UINT64, {11}},
71+
{ZET_VALUE_TYPE_UINT64, {11}},
72+
{ZET_VALUE_TYPE_UINT64, {11}},
73+
{ZET_VALUE_TYPE_UINT64, {11}},
74+
{ZET_VALUE_TYPE_UINT64, {11}},
75+
{ZET_VALUE_TYPE_UINT64, {11}},
76+
{ZET_VALUE_TYPE_UINT64, {11}},
77+
{ZET_VALUE_TYPE_UINT64, {10}},
78+
{ZET_VALUE_TYPE_UINT64, {110}},
79+
{ZET_VALUE_TYPE_UINT64, {110}},
80+
{ZET_VALUE_TYPE_UINT64, {110}},
81+
{ZET_VALUE_TYPE_UINT64, {110}},
82+
{ZET_VALUE_TYPE_UINT64, {110}},
83+
{ZET_VALUE_TYPE_UINT64, {110}},
84+
{ZET_VALUE_TYPE_UINT64, {110}},
85+
{ZET_VALUE_TYPE_UINT64, {110}},
86+
{ZET_VALUE_TYPE_UINT64, {110}}};
87+
2188
std::vector<MockMetricIpSamplingOsInterface *> osInterfaceVector = {};
2289
std::vector<L0::Device *> testDevices = {};
2390
};

0 commit comments

Comments
 (0)