|
14 | 14 | from argparse import Action, ArgumentParser, Namespace
|
15 | 15 | from io import BytesIO
|
16 | 16 | from logging import info, warning
|
17 |
| -from typing import Any, List, Optional |
| 17 | +from typing import Any, Dict, List, Optional |
18 | 18 | from urllib import error, request
|
19 | 19 |
|
20 | 20 |
|
|
24 | 24 | BENCHMARK_RESULTS_FILENAME = "benchmark_results.json"
|
25 | 25 | ARTIFACTS_FILENAME_REGEX = re.compile(r"(android|ios)-artifacts-(?P<job_id>\d+).json")
|
26 | 26 |
|
| 27 | +# iOS-related regexes and variables |
| 28 | +IOS_TEST_SPEC_REGEX = re.compile( |
| 29 | + r"Test Case\s+'-\[(?P<test_class>\w+)\s+(?P<test_name>\w+)\]'\s+measured\s+\[(?P<metric>.+)\]\s+average:\s+(?P<value>[\d\.]+)," |
| 30 | +) |
| 31 | +IOS_TEST_NAME_REGEX = re.compile( |
| 32 | + r"test_(?P<method>forward|load|generate)_(?P<model_name>\w+)_pte.*iOS_(?P<ios_ver>\w+)_iPhone(?P<iphone_ver>\w+)" |
| 33 | +) |
| 34 | +IOS_MODEL_NAME_REGEX = re.compile(r"(?P<model>[^_]+)_(?P<backend>\w+)_(?P<dtype>\w+)") |
| 35 | + |
27 | 36 |
|
28 | 37 | class ValidateArtifacts(Action):
|
29 | 38 | def __call__(
|
@@ -135,6 +144,130 @@ def extract_android_benchmark_results(
|
135 | 144 | return []
|
136 | 145 |
|
137 | 146 |
|
| 147 | +def initialize_ios_metadata(test_name: str) -> Dict[str, any]: |
| 148 | + """ |
| 149 | + Extract the benchmark metadata from the test name, for example: |
| 150 | + test_forward_llama2_pte_iOS_17_2_1_iPhone15_4 |
| 151 | + test_load_resnet50_xnnpack_q8_pte_iOS_17_2_1_iPhone15_4 |
| 152 | + """ |
| 153 | + m = IOS_TEST_NAME_REGEX.match(test_name) |
| 154 | + if not m: |
| 155 | + return {} |
| 156 | + |
| 157 | + method = m.group("method") |
| 158 | + model_name = m.group("model_name") |
| 159 | + ios_ver = m.group("ios_ver").replace("_", ".") |
| 160 | + iphone_ver = m.group("iphone_ver").replace("_", ".") |
| 161 | + |
| 162 | + # NB: This looks brittle, but unless we can return iOS benchmark results in JSON |
| 163 | + # format by the test, the mapping is needed to match with Android test |
| 164 | + if method == "load": |
| 165 | + metric = "model_load_time(ms)" |
| 166 | + elif method == "forward": |
| 167 | + metric = ( |
| 168 | + "generate_time(ms)" |
| 169 | + if "llama" in model_name |
| 170 | + else "avg_inference_latency(ms)" |
| 171 | + ) |
| 172 | + elif method == "generate": |
| 173 | + metric = "token_per_sec" |
| 174 | + |
| 175 | + backend = "" |
| 176 | + quantization = "unknown" |
| 177 | + |
| 178 | + m = IOS_MODEL_NAME_REGEX.match(model_name) |
| 179 | + if m: |
| 180 | + backend = m.group("backend") |
| 181 | + quantization = m.group("dtype") |
| 182 | + model_name = m.group("model") |
| 183 | + |
| 184 | + return { |
| 185 | + "benchmarkModel": { |
| 186 | + "backend": backend, |
| 187 | + "quantization": quantization, |
| 188 | + "name": model_name, |
| 189 | + }, |
| 190 | + "deviceInfo": { |
| 191 | + "arch": f"iPhone {iphone_ver}", |
| 192 | + "device": f"iPhone {iphone_ver}", |
| 193 | + "os": f"iOS {ios_ver}", |
| 194 | + "availMem": 0, |
| 195 | + "totalMem": 0, |
| 196 | + }, |
| 197 | + "metric": metric, |
| 198 | + # These fields will be populated later by extract_ios_metric |
| 199 | + "actualValue": 0, |
| 200 | + "targetValue": 0, |
| 201 | + } |
| 202 | + |
| 203 | + |
| 204 | +def extract_ios_metric( |
| 205 | + benchmark_result: Dict[str, Any], |
| 206 | + test_name: str, |
| 207 | + metric_name: str, |
| 208 | + metric_value: float, |
| 209 | +) -> Dict[str, Any]: |
| 210 | + """ |
| 211 | + Map the metric name from iOS xcresult to the benchmark result |
| 212 | + """ |
| 213 | + if metric_name == "Clock Monotonic Time, s": |
| 214 | + # The benchmark value is in ms |
| 215 | + benchmark_result["actualValue"] = metric_value * 1000 |
| 216 | + elif metric_name == "Tokens Per Second, t/s": |
| 217 | + benchmark_result["actualValue"] = metric_value |
| 218 | + |
| 219 | + return benchmark_result |
| 220 | + |
| 221 | + |
| 222 | +def extract_ios_benchmark_results( |
| 223 | + job_name: str, artifact_type: str, artifact_s3_url: str |
| 224 | +) -> List: |
| 225 | + """ |
| 226 | + The benchmark results from iOS are currently from xcresult, which could either |
| 227 | + be parsed from CUSTOMER_ARTIFACT or get from the test spec output. The latter |
| 228 | + is probably easier to process |
| 229 | + """ |
| 230 | + if artifact_type != "TESTSPEC_OUTPUT": |
| 231 | + return [] |
| 232 | + |
| 233 | + try: |
| 234 | + benchmark_results = [] |
| 235 | + |
| 236 | + with request.urlopen(artifact_s3_url) as data: |
| 237 | + current_test_name = "" |
| 238 | + current_record = {} |
| 239 | + |
| 240 | + for line in data.read().decode("utf8").splitlines(): |
| 241 | + s = IOS_TEST_SPEC_REGEX.search(line) |
| 242 | + if not s: |
| 243 | + continue |
| 244 | + |
| 245 | + test_class = s.group("test_class") |
| 246 | + test_name = s.group("test_name") |
| 247 | + metric_name = s.group("metric") |
| 248 | + metric_value = float(s.group("value")) |
| 249 | + |
| 250 | + if test_name != current_test_name: |
| 251 | + if current_record: |
| 252 | + # Save the benchmark result in the same format used by Android |
| 253 | + benchmark_results.append(current_record.copy()) |
| 254 | + |
| 255 | + current_test_name = test_name |
| 256 | + current_record = initialize_ios_metadata(current_test_name) |
| 257 | + |
| 258 | + current_record = extract_ios_metric( |
| 259 | + current_record, test_name, metric_name, metric_value |
| 260 | + ) |
| 261 | + |
| 262 | + benchmark_results.append(current_record.copy()) |
| 263 | + |
| 264 | + return benchmark_results |
| 265 | + |
| 266 | + except error.HTTPError: |
| 267 | + warning(f"Fail to {artifact_type} {artifact_s3_url}") |
| 268 | + return [] |
| 269 | + |
| 270 | + |
138 | 271 | def extract_job_id(artifacts_filename: str) -> int:
|
139 | 272 | """
|
140 | 273 | Extract the job id from the artifacts filename
|
@@ -222,23 +355,25 @@ def main() -> None:
|
222 | 355 | benchmark_results = extract_android_benchmark_results(
|
223 | 356 | job_name, artifact_type, artifact_s3_url
|
224 | 357 | )
|
225 |
| - if benchmark_results: |
226 |
| - benchmark_results = transform( |
227 |
| - app_type, |
228 |
| - benchmark_results, |
229 |
| - args.repo, |
230 |
| - args.head_branch, |
231 |
| - args.workflow_name, |
232 |
| - args.workflow_run_id, |
233 |
| - args.workflow_run_attempt, |
234 |
| - job_name, |
235 |
| - extract_job_id(args.artifacts), |
236 |
| - ) |
237 |
| - all_benchmark_results.extend(benchmark_results) |
238 | 358 |
|
239 | 359 | if app_type == "IOS_APP":
|
240 |
| - # TODO (huydhn): Implement the logic for iOS next |
241 |
| - pass |
| 360 | + benchmark_results = extract_ios_benchmark_results( |
| 361 | + job_name, artifact_type, artifact_s3_url |
| 362 | + ) |
| 363 | + |
| 364 | + if benchmark_results: |
| 365 | + benchmark_results = transform( |
| 366 | + app_type, |
| 367 | + benchmark_results, |
| 368 | + args.repo, |
| 369 | + args.head_branch, |
| 370 | + args.workflow_name, |
| 371 | + args.workflow_run_id, |
| 372 | + args.workflow_run_attempt, |
| 373 | + job_name, |
| 374 | + extract_job_id(args.artifacts), |
| 375 | + ) |
| 376 | + all_benchmark_results.extend(benchmark_results) |
242 | 377 |
|
243 | 378 | if all_benchmark_results:
|
244 | 379 | output_file = os.path.basename(args.artifacts)
|
|
0 commit comments