1
+ """ The Python PyTorch testing script.
2
+ ##
3
+ # Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+ """
23
+
24
+ import xml .etree .ElementTree as ET
25
+ from pathlib import Path
26
+ from typing import Any , Dict , Tuple
27
+
28
+ # Backends list
29
+ BACKENDS_LIST = [
30
+ "dist-gloo" ,
31
+ "dist-nccl"
32
+ ]
33
+
34
+ TARGET_WORKFLOW = "--rerun-disabled-tests"
35
+
36
+ def get_job_id (report : Path ) -> int :
37
+ # [Job id in artifacts]
38
+ # Retrieve the job id from the report path. In our GHA workflows, we append
39
+ # the job id to the end of the report name, so `report` looks like:
40
+ # unzipped-test-reports-foo_5596745227/test/test-reports/foo/TEST-foo.xml
41
+ # and we want to get `5596745227` out of it.
42
+ try :
43
+ return int (report .parts [0 ].rpartition ("_" )[2 ])
44
+ except ValueError :
45
+ return - 1
46
+
47
+ def is_rerun_disabled_tests (root : ET .ElementTree ) -> bool :
48
+ """
49
+ Check if the test report is coming from rerun_disabled_tests workflow
50
+ """
51
+ skipped = root .find (".//*skipped" )
52
+ # Need to check against None here, if not skipped doesn't work as expected
53
+ if skipped is None :
54
+ return False
55
+
56
+ message = skipped .attrib .get ("message" , "" )
57
+ return TARGET_WORKFLOW in message or "num_red" in message
58
+
59
+ def parse_xml_report (
60
+ tag : str ,
61
+ report : Path ,
62
+ workflow_id : int ,
63
+ workflow_run_attempt : int ,
64
+ work_flow_name : str
65
+ ) -> Dict [Tuple [str ], Dict [str , Any ]]:
66
+ """Convert a test report xml file into a JSON-serializable list of test cases."""
67
+ print (f"Parsing { tag } s for test report: { report } " )
68
+
69
+ job_id = get_job_id (report )
70
+ print (f"Found job id: { job_id } " )
71
+
72
+ test_cases : Dict [Tuple [str ], Dict [str , Any ]] = {}
73
+
74
+ root = ET .parse (report )
75
+ # TODO: unlike unittest, pytest-flakefinder used by rerun disabled tests for test_ops
76
+ # includes skipped messages multiple times (50 times by default). This slows down
77
+ # this script too much (O(n)) because it tries to gather all the stats. This should
78
+ # be fixed later in the way we use pytest-flakefinder. A zipped test report from rerun
79
+ # disabled test is only few MB, but will balloon up to a much bigger XML file after
80
+ # extracting from a dozen to few hundred MB
81
+ if is_rerun_disabled_tests (root ):
82
+ return test_cases
83
+
84
+ for test_case in root .iter (tag ):
85
+ case = process_xml_element (test_case )
86
+ if tag == 'testcase' :
87
+ case ["workflow_id" ] = workflow_id
88
+ case ["workflow_run_attempt" ] = workflow_run_attempt
89
+ case ["job_id" ] = job_id
90
+ case ["work_flow_name" ] = work_flow_name
91
+
92
+ # [invoking file]
93
+ # The name of the file that the test is located in is not necessarily
94
+ # the same as the name of the file that invoked the test.
95
+ # For example, `test_jit.py` calls into multiple other test files (e.g.
96
+ # jit/test_dce.py). For sharding/test selection purposes, we want to
97
+ # record the file that invoked the test.
98
+ #
99
+ # To do this, we leverage an implementation detail of how we write out
100
+ # tests (https://bit.ly/3ajEV1M), which is that reports are created
101
+ # under a folder with the same name as the invoking file.
102
+ case_name = report .parent .name
103
+ for ind in range (len (BACKENDS_LIST )):
104
+ if BACKENDS_LIST [ind ] in report .parts :
105
+ case_name = case_name + "_" + BACKENDS_LIST [ind ]
106
+ break
107
+ case ["invoking_file" ] = case_name
108
+ test_cases [ ( case ["invoking_file" ], case ["classname" ], case ["name" ], case ["work_flow_name" ] ) ] = case
109
+ elif tag == 'testsuite' :
110
+ case ["work_flow_name" ] = work_flow_name
111
+ case ["invoking_xml" ] = report .name
112
+ case ["running_time_xml" ] = case ["time" ]
113
+ case_name = report .parent .name
114
+ for ind in range (len (BACKENDS_LIST )):
115
+ if BACKENDS_LIST [ind ] in report .parts :
116
+ case_name = case_name + "_" + BACKENDS_LIST [ind ]
117
+ break
118
+ case ["invoking_file" ] = case_name
119
+
120
+ test_cases [ ( case ["invoking_file" ], case ["invoking_xml" ], case ["work_flow_name" ] ) ] = case
121
+
122
+ return test_cases
123
+
124
+ def process_xml_element (element : ET .Element ) -> Dict [str , Any ]:
125
+ """Convert a test suite element into a JSON-serializable dict."""
126
+ ret : Dict [str , Any ] = {}
127
+
128
+ # Convert attributes directly into dict elements.
129
+ # e.g.
130
+ # <testcase name="test_foo" classname="test_bar"></testcase>
131
+ # becomes:
132
+ # {"name": "test_foo", "classname": "test_bar"}
133
+ ret .update (element .attrib )
134
+
135
+ # The XML format encodes all values as strings. Convert to ints/floats if
136
+ # possible to make aggregation possible in Rockset.
137
+ for k , v in ret .items ():
138
+ try :
139
+ ret [k ] = int (v )
140
+ except ValueError :
141
+ pass
142
+ try :
143
+ ret [k ] = float (v )
144
+ except ValueError :
145
+ pass
146
+
147
+ # Convert inner and outer text into special dict elements.
148
+ # e.g.
149
+ # <testcase>my_inner_text</testcase> my_tail
150
+ # becomes:
151
+ # {"text": "my_inner_text", "tail": " my_tail"}
152
+ if element .text and element .text .strip ():
153
+ ret ["text" ] = element .text
154
+ if element .tail and element .tail .strip ():
155
+ ret ["tail" ] = element .tail
156
+
157
+ # Convert child elements recursively, placing them at a key:
158
+ # e.g.
159
+ # <testcase>
160
+ # <foo>hello</foo>
161
+ # <foo>world</foo>
162
+ # <bar>another</bar>
163
+ # </testcase>
164
+ # becomes
165
+ # {
166
+ # "foo": [{"text": "hello"}, {"text": "world"}],
167
+ # "bar": {"text": "another"}
168
+ # }
169
+ for child in element :
170
+ if child .tag not in ret :
171
+ ret [child .tag ] = process_xml_element (child )
172
+ else :
173
+ # If there are multiple tags with the same name, they should be
174
+ # coalesced into a list.
175
+ if not isinstance (ret [child .tag ], list ):
176
+ ret [child .tag ] = [ret [child .tag ]]
177
+ ret [child .tag ].append (process_xml_element (child ))
178
+ return ret
0 commit comments