Skip to content

Commit 2ee3aa2

Browse files
BLOrange-AMDdnikolaev-amd
authored andcommitted
CONSOLIDATED COMMITS: Implementation of PyTorch ut parsing script - QA helper functions
======================================================================================= Implementation of PyTorch ut parsing script - QA helper function (#1386) * Initial implementation of PyTorch ut parsing script * Extracted path variables * Use nested dict to save results * Fixes typo * Cleanup * Fixes several issues * Minor name change * Update run_pytorch_unit_tests.py * Added file banners * Supported running from API * Added more help info * Consistent naming * Format help text --------- Co-authored-by: Jithun Nair <[email protected]> Co-authored-by: Jithun Nair <[email protected]> Print consolidated log file for pytorch unit test automation scripts (#1433) * Print consolidated log file for pytorch uts * Update run_entire_tests subprocess call as well * lint * Add ERROR string [SWDEV-466849] Enhancements for PyTorch UT helper scripts (#1491) * Check that >1 GPUs are visible when running TEST_CONFIG=distributed * Add EXECUTION_TIME to file-level and aggregate statistics PyTorch unit test helper scripts enhancements (#1517) * Fail earlier for distributed-on-1-GPU scenario * print cmd in consolidated log with prettier formatting * python->python3 Fixes https://ontrack-internal.amd.com/browse/SWDEV-477264 --------- Co-authored-by: blorange-amd <[email protected]> Several issues fix of QA helper script (#1564) Fixes SWDEV-475071: https://ontrack-internal.amd.com/browse/SWDEV-475071 Removed args inside function (#1595) Fixes SWDEV-475071 (cherry picked from commit 041aa1b47978154de63edc6b7ffcdea218a847a3) QA script - Added multi gpu check with priority_tests (#1604) Fixes SWDEV-487907. Verified throwing exception for distributed is working correctly on single gpu with command: python .automation_scripts/run_pytorch_unit_tests.py --priority_test (cherry picked from commit 57cc742271cbf4547f9213710e57f6444bbc983e) (cherry picked from commit 6d5c3dc)
1 parent 123a164 commit 2ee3aa2

File tree

2 files changed

+696
-0
lines changed

2 files changed

+696
-0
lines changed
Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
""" The Python PyTorch testing script.
2+
##
3+
# Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
4+
#
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy
6+
# of this software and associated documentation files (the "Software"), to deal
7+
# in the Software without restriction, including without limitation the rights
8+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
# copies of the Software, and to permit persons to whom the Software is
10+
# furnished to do so, subject to the following conditions:
11+
#
12+
# The above copyright notice and this permission notice shall be included in
13+
# all copies or substantial portions of the Software.
14+
#
15+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21+
# THE SOFTWARE.
22+
"""
23+
24+
import xml.etree.ElementTree as ET
25+
from pathlib import Path
26+
from typing import Any, Dict, Tuple
27+
28+
# Backends list
29+
BACKENDS_LIST = [
30+
"dist-gloo",
31+
"dist-nccl"
32+
]
33+
34+
TARGET_WORKFLOW = "--rerun-disabled-tests"
35+
36+
def get_job_id(report: Path) -> int:
37+
# [Job id in artifacts]
38+
# Retrieve the job id from the report path. In our GHA workflows, we append
39+
# the job id to the end of the report name, so `report` looks like:
40+
# unzipped-test-reports-foo_5596745227/test/test-reports/foo/TEST-foo.xml
41+
# and we want to get `5596745227` out of it.
42+
try:
43+
return int(report.parts[0].rpartition("_")[2])
44+
except ValueError:
45+
return -1
46+
47+
def is_rerun_disabled_tests(root: ET.ElementTree) -> bool:
48+
"""
49+
Check if the test report is coming from rerun_disabled_tests workflow
50+
"""
51+
skipped = root.find(".//*skipped")
52+
# Need to check against None here, if not skipped doesn't work as expected
53+
if skipped is None:
54+
return False
55+
56+
message = skipped.attrib.get("message", "")
57+
return TARGET_WORKFLOW in message or "num_red" in message
58+
59+
def parse_xml_report(
60+
tag: str,
61+
report: Path,
62+
workflow_id: int,
63+
workflow_run_attempt: int,
64+
work_flow_name: str
65+
) -> Dict[Tuple[str], Dict[str, Any]]:
66+
"""Convert a test report xml file into a JSON-serializable list of test cases."""
67+
print(f"Parsing {tag}s for test report: {report}")
68+
69+
job_id = get_job_id(report)
70+
print(f"Found job id: {job_id}")
71+
72+
test_cases: Dict[Tuple[str], Dict[str, Any]] = {}
73+
74+
root = ET.parse(report)
75+
# TODO: unlike unittest, pytest-flakefinder used by rerun disabled tests for test_ops
76+
# includes skipped messages multiple times (50 times by default). This slows down
77+
# this script too much (O(n)) because it tries to gather all the stats. This should
78+
# be fixed later in the way we use pytest-flakefinder. A zipped test report from rerun
79+
# disabled test is only few MB, but will balloon up to a much bigger XML file after
80+
# extracting from a dozen to few hundred MB
81+
if is_rerun_disabled_tests(root):
82+
return test_cases
83+
84+
for test_case in root.iter(tag):
85+
case = process_xml_element(test_case)
86+
if tag == 'testcase':
87+
case["workflow_id"] = workflow_id
88+
case["workflow_run_attempt"] = workflow_run_attempt
89+
case["job_id"] = job_id
90+
case["work_flow_name"] = work_flow_name
91+
92+
# [invoking file]
93+
# The name of the file that the test is located in is not necessarily
94+
# the same as the name of the file that invoked the test.
95+
# For example, `test_jit.py` calls into multiple other test files (e.g.
96+
# jit/test_dce.py). For sharding/test selection purposes, we want to
97+
# record the file that invoked the test.
98+
#
99+
# To do this, we leverage an implementation detail of how we write out
100+
# tests (https://bit.ly/3ajEV1M), which is that reports are created
101+
# under a folder with the same name as the invoking file.
102+
case_name = report.parent.name
103+
for ind in range(len(BACKENDS_LIST)):
104+
if BACKENDS_LIST[ind] in report.parts:
105+
case_name = case_name + "_" + BACKENDS_LIST[ind]
106+
break
107+
case["invoking_file"] = case_name
108+
test_cases[ ( case["invoking_file"], case["classname"], case["name"], case["work_flow_name"] ) ] = case
109+
elif tag == 'testsuite':
110+
case["work_flow_name"] = work_flow_name
111+
case["invoking_xml"] = report.name
112+
case["running_time_xml"] = case["time"]
113+
case_name = report.parent.name
114+
for ind in range(len(BACKENDS_LIST)):
115+
if BACKENDS_LIST[ind] in report.parts:
116+
case_name = case_name + "_" + BACKENDS_LIST[ind]
117+
break
118+
case["invoking_file"] = case_name
119+
120+
test_cases[ ( case["invoking_file"], case["invoking_xml"], case["work_flow_name"] ) ] = case
121+
122+
return test_cases
123+
124+
def process_xml_element(element: ET.Element) -> Dict[str, Any]:
125+
"""Convert a test suite element into a JSON-serializable dict."""
126+
ret: Dict[str, Any] = {}
127+
128+
# Convert attributes directly into dict elements.
129+
# e.g.
130+
# <testcase name="test_foo" classname="test_bar"></testcase>
131+
# becomes:
132+
# {"name": "test_foo", "classname": "test_bar"}
133+
ret.update(element.attrib)
134+
135+
# The XML format encodes all values as strings. Convert to ints/floats if
136+
# possible to make aggregation possible in Rockset.
137+
for k, v in ret.items():
138+
try:
139+
ret[k] = int(v)
140+
except ValueError:
141+
pass
142+
try:
143+
ret[k] = float(v)
144+
except ValueError:
145+
pass
146+
147+
# Convert inner and outer text into special dict elements.
148+
# e.g.
149+
# <testcase>my_inner_text</testcase> my_tail
150+
# becomes:
151+
# {"text": "my_inner_text", "tail": " my_tail"}
152+
if element.text and element.text.strip():
153+
ret["text"] = element.text
154+
if element.tail and element.tail.strip():
155+
ret["tail"] = element.tail
156+
157+
# Convert child elements recursively, placing them at a key:
158+
# e.g.
159+
# <testcase>
160+
# <foo>hello</foo>
161+
# <foo>world</foo>
162+
# <bar>another</bar>
163+
# </testcase>
164+
# becomes
165+
# {
166+
# "foo": [{"text": "hello"}, {"text": "world"}],
167+
# "bar": {"text": "another"}
168+
# }
169+
for child in element:
170+
if child.tag not in ret:
171+
ret[child.tag] = process_xml_element(child)
172+
else:
173+
# If there are multiple tags with the same name, they should be
174+
# coalesced into a list.
175+
if not isinstance(ret[child.tag], list):
176+
ret[child.tag] = [ret[child.tag]]
177+
ret[child.tag].append(process_xml_element(child))
178+
return ret

0 commit comments

Comments
 (0)