|
11 | 11 | #include "level_zero/tools/source/metrics/os_metric_ip_sampling.h"
|
12 | 12 | #include <level_zero/zet_api.h>
|
13 | 13 |
|
| 14 | +#include <cstring> |
| 15 | + |
14 | 16 | namespace L0 {
|
15 | 17 | constexpr uint32_t ipSamplinMetricCount = 10u;
|
16 | 18 | constexpr uint32_t ipSamplinDomainId = 100u;
|
@@ -149,14 +151,194 @@ ze_result_t IpSamplingMetricGroupImp::metricGet(uint32_t *pCount, zet_metric_han
|
149 | 151 | ze_result_t IpSamplingMetricGroupImp::calculateMetricValues(const zet_metric_group_calculation_type_t type, size_t rawDataSize,
|
150 | 152 | const uint8_t *pRawData, uint32_t *pMetricValueCount,
|
151 | 153 | zet_typed_value_t *pMetricValues) {
|
152 |
| - return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; |
| 154 | + const bool calculateCountOnly = *pMetricValueCount == 0; |
| 155 | + if (calculateCountOnly) { |
| 156 | + return getCalculatedMetricCount(rawDataSize, *pMetricValueCount); |
| 157 | + } else { |
| 158 | + return getCalculatedMetricValues(type, rawDataSize, pRawData, *pMetricValueCount, pMetricValues); |
| 159 | + } |
153 | 160 | }
|
154 | 161 |
|
155 | 162 | ze_result_t IpSamplingMetricGroupImp::calculateMetricValuesExp(const zet_metric_group_calculation_type_t type, size_t rawDataSize,
|
156 | 163 | const uint8_t *pRawData, uint32_t *pSetCount,
|
157 | 164 | uint32_t *pTotalMetricValueCount, uint32_t *pMetricCounts,
|
158 | 165 | zet_typed_value_t *pMetricValues) {
|
159 |
| - return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; |
| 166 | + const bool calculationCountOnly = *pTotalMetricValueCount == 0; |
| 167 | + ze_result_t result = this->calculateMetricValues(type, rawDataSize, pRawData, pTotalMetricValueCount, pMetricValues); |
| 168 | + |
| 169 | + if (result == ZE_RESULT_SUCCESS) { |
| 170 | + *pSetCount = 1; |
| 171 | + if (!calculationCountOnly) { |
| 172 | + pMetricCounts[0] = *pTotalMetricValueCount; |
| 173 | + } |
| 174 | + } else { |
| 175 | + if (calculationCountOnly) { |
| 176 | + *pSetCount = 0; |
| 177 | + *pTotalMetricValueCount = 0; |
| 178 | + } else { |
| 179 | + pMetricCounts[0] = 0; |
| 180 | + } |
| 181 | + } |
| 182 | + return result; |
| 183 | +} |
| 184 | + |
| 185 | +ze_result_t IpSamplingMetricGroupImp::getCalculatedMetricCount(const size_t rawDataSize, |
| 186 | + uint32_t &metricValueCount) { |
| 187 | + |
| 188 | + uint32_t rawReportSize = 64; |
| 189 | + |
| 190 | + if ((rawDataSize % rawReportSize) != 0) { |
| 191 | + return ZE_RESULT_ERROR_INVALID_SIZE; |
| 192 | + } |
| 193 | + |
| 194 | + const uint32_t rawReportCount = static_cast<uint32_t>(rawDataSize) / rawReportSize; |
| 195 | + metricValueCount = rawReportCount * properties.metricCount; |
| 196 | + return ZE_RESULT_SUCCESS; |
| 197 | +} |
| 198 | + |
| 199 | +ze_result_t IpSamplingMetricGroupImp::getCalculatedMetricValues(const zet_metric_group_calculation_type_t type, const size_t rawDataSize, const uint8_t *pRawData, |
| 200 | + uint32_t &metricValueCount, |
| 201 | + zet_typed_value_t *pCalculatedData) { |
| 202 | + StallSumIpDataMap_t stallSumIpDataMap; |
| 203 | + |
| 204 | + // MAX_METRIC_VALUES is not supported yet. |
| 205 | + if (type != ZET_METRIC_GROUP_CALCULATION_TYPE_METRIC_VALUES) { |
| 206 | + return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; |
| 207 | + } |
| 208 | + |
| 209 | + DEBUG_BREAK_IF(pCalculatedData == nullptr); |
| 210 | + |
| 211 | + uint32_t rawReportSize = 64; |
| 212 | + |
| 213 | + if ((rawDataSize % rawReportSize) != 0) { |
| 214 | + return ZE_RESULT_ERROR_INVALID_SIZE; |
| 215 | + } |
| 216 | + |
| 217 | + const uint32_t rawReportCount = static_cast<uint32_t>(rawDataSize) / rawReportSize; |
| 218 | + |
| 219 | + for (const uint8_t *pRawIpData = pRawData; pRawIpData < pRawData + (rawReportCount * rawReportSize); pRawIpData += rawReportSize) { |
| 220 | + stallIpDataMapUpdate(stallSumIpDataMap, pRawIpData); |
| 221 | + } |
| 222 | + |
| 223 | + metricValueCount = std::min<uint32_t>(metricValueCount, static_cast<uint32_t>(stallSumIpDataMap.size()) * properties.metricCount); |
| 224 | + std::vector<zet_typed_value_t> ipDataValues; |
| 225 | + uint32_t i = 0; |
| 226 | + for (auto it = stallSumIpDataMap.begin(); it != stallSumIpDataMap.end(); ++it) { |
| 227 | + stallSumIpDataToTypedValues(it->first, it->second, ipDataValues); |
| 228 | + for (auto jt = ipDataValues.begin(); (jt != ipDataValues.end()) && (i < metricValueCount); jt++, i++) { |
| 229 | + *(pCalculatedData + i) = *jt; |
| 230 | + } |
| 231 | + ipDataValues.clear(); |
| 232 | + } |
| 233 | + |
| 234 | + return ZE_RESULT_SUCCESS; |
| 235 | +} |
| 236 | + |
| 237 | +/* |
| 238 | + * stall sample data item format: |
| 239 | + * |
| 240 | + * Bits Field |
| 241 | + * 0 to 28 IP (addr) |
| 242 | + * 29 to 36 active count |
| 243 | + * 37 to 44 other count |
| 244 | + * 45 to 52 control count |
| 245 | + * 53 to 60 pipestall count |
| 246 | + * 61 to 68 send count |
| 247 | + * 69 to 76 dist_acc count |
| 248 | + * 77 to 84 sbid count |
| 249 | + * 85 to 92 sync count |
| 250 | + * 93 to 100 inst_fetch count |
| 251 | + * |
| 252 | + * bytes 49 and 50, subSlice |
| 253 | + * bytes 51 and 52, flags |
| 254 | + * |
| 255 | + * total size 64 bytes |
| 256 | + */ |
| 257 | +void IpSamplingMetricGroupImp::stallIpDataMapUpdate(StallSumIpDataMap_t &stallSumIpDataMap, const uint8_t *pRawIpData) { |
| 258 | + |
| 259 | + const uint8_t *tempAddr = pRawIpData; |
| 260 | + uint64_t ip = 0ULL; |
| 261 | + memcpy_s(reinterpret_cast<uint8_t *>(&ip), sizeof(ip), tempAddr, sizeof(ip)); |
| 262 | + ip &= 0x1fffffff; |
| 263 | + StallSumIpData_t &stallSumData = stallSumIpDataMap[ip]; |
| 264 | + tempAddr += 3; |
| 265 | + |
| 266 | + auto getCount = [&tempAddr]() { |
| 267 | + uint16_t tempCount = 0; |
| 268 | + memcpy_s(reinterpret_cast<uint8_t *>(&tempCount), sizeof(tempCount), tempAddr, sizeof(tempCount)); |
| 269 | + tempCount = (tempCount >> 5) & 0xff; |
| 270 | + tempAddr += 1; |
| 271 | + return static_cast<uint8_t>(tempCount); |
| 272 | + }; |
| 273 | + |
| 274 | + stallSumData.activeCount += getCount(); |
| 275 | + stallSumData.otherCount += getCount(); |
| 276 | + stallSumData.controlCount += getCount(); |
| 277 | + stallSumData.pipeStallCount += getCount(); |
| 278 | + stallSumData.sendCount += getCount(); |
| 279 | + stallSumData.distAccCount += getCount(); |
| 280 | + stallSumData.sbidCount += getCount(); |
| 281 | + stallSumData.syncCount += getCount(); |
| 282 | + stallSumData.instFetchCount += getCount(); |
| 283 | + |
| 284 | + struct stallCntrInfo { |
| 285 | + uint16_t subslice; |
| 286 | + uint16_t flags; |
| 287 | + } stallCntrInfo = {}; |
| 288 | + |
| 289 | + tempAddr = pRawIpData + 48; |
| 290 | + memcpy_s(reinterpret_cast<uint8_t *>(&stallCntrInfo), sizeof(stallCntrInfo), tempAddr, sizeof(stallCntrInfo)); |
| 291 | + |
| 292 | + constexpr int overflowDropFlag = (1 << 8); |
| 293 | + if (stallCntrInfo.flags & overflowDropFlag) { |
| 294 | + PRINT_DEBUG_STRING(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr, "Stall Sampling Data Lost %s\n", " "); |
| 295 | + } |
| 296 | +} |
| 297 | + |
| 298 | +// The order of push_back calls must match the order of metricPropertiesList. |
| 299 | +void IpSamplingMetricGroupImp::stallSumIpDataToTypedValues(uint64_t ip, |
| 300 | + StallSumIpData_t &sumIpData, |
| 301 | + std::vector<zet_typed_value_t> &ipDataValues) { |
| 302 | + zet_typed_value_t tmpValueData; |
| 303 | + tmpValueData.type = ZET_VALUE_TYPE_UINT64; |
| 304 | + tmpValueData.value.ui64 = ip; |
| 305 | + ipDataValues.push_back(tmpValueData); |
| 306 | + |
| 307 | + tmpValueData.type = ZET_VALUE_TYPE_UINT64; |
| 308 | + tmpValueData.value.ui64 = sumIpData.activeCount; |
| 309 | + ipDataValues.push_back(tmpValueData); |
| 310 | + |
| 311 | + tmpValueData.type = ZET_VALUE_TYPE_UINT64; |
| 312 | + tmpValueData.value.ui64 = sumIpData.controlCount; |
| 313 | + ipDataValues.push_back(tmpValueData); |
| 314 | + |
| 315 | + tmpValueData.type = ZET_VALUE_TYPE_UINT64; |
| 316 | + tmpValueData.value.ui64 = sumIpData.pipeStallCount; |
| 317 | + ipDataValues.push_back(tmpValueData); |
| 318 | + |
| 319 | + tmpValueData.type = ZET_VALUE_TYPE_UINT64; |
| 320 | + tmpValueData.value.ui64 = sumIpData.sendCount; |
| 321 | + ipDataValues.push_back(tmpValueData); |
| 322 | + |
| 323 | + tmpValueData.type = ZET_VALUE_TYPE_UINT64; |
| 324 | + tmpValueData.value.ui64 = sumIpData.distAccCount; |
| 325 | + ipDataValues.push_back(tmpValueData); |
| 326 | + |
| 327 | + tmpValueData.type = ZET_VALUE_TYPE_UINT64; |
| 328 | + tmpValueData.value.ui64 = sumIpData.sbidCount; |
| 329 | + ipDataValues.push_back(tmpValueData); |
| 330 | + |
| 331 | + tmpValueData.type = ZET_VALUE_TYPE_UINT64; |
| 332 | + tmpValueData.value.ui64 = sumIpData.syncCount; |
| 333 | + ipDataValues.push_back(tmpValueData); |
| 334 | + |
| 335 | + tmpValueData.type = ZET_VALUE_TYPE_UINT64; |
| 336 | + tmpValueData.value.ui64 = sumIpData.instFetchCount; |
| 337 | + ipDataValues.push_back(tmpValueData); |
| 338 | + |
| 339 | + tmpValueData.type = ZET_VALUE_TYPE_UINT64; |
| 340 | + tmpValueData.value.ui64 = sumIpData.otherCount; |
| 341 | + ipDataValues.push_back(tmpValueData); |
160 | 342 | }
|
161 | 343 |
|
162 | 344 | bool IpSamplingMetricGroupImp::activate() {
|
|
0 commit comments