Skip to content

Commit d72859f

Browse files
Daniel Hwangzeroomega
authored andcommitted
[scan-build-py] Update scan-build-py to allow outputing as SARIF
clang static analysis reports can be generated in html, plist, or sarif format. This updates scan-build-py to be able to specify SARIF as the desired output format, as previously it only support plist and html formats. Differential Revision: https://reviews.llvm.org/D94251
1 parent d3e13b5 commit d72859f

File tree

5 files changed

+657
-11
lines changed

5 files changed

+657
-11
lines changed

clang/tools/scan-build-py/libscanbuild/analyze.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ def scan_build():
5252

5353
args = parse_args_for_scan_build()
5454
# will re-assign the report directory as new output
55-
with report_directory(args.output, args.keep_empty) as args.output:
55+
with report_directory(
56+
args.output, args.keep_empty, args.output_format) as args.output:
5657
# Run against a build command. there are cases, when analyzer run
5758
# is not required. But we need to set up everything for the
5859
# wrappers, because 'configure' needs to capture the CC/CXX values
@@ -79,7 +80,7 @@ def analyze_build():
7980

8081
args = parse_args_for_analyze_build()
8182
# will re-assign the report directory as new output
82-
with report_directory(args.output, args.keep_empty) as args.output:
83+
with report_directory(args.output, args.keep_empty, args.output_format) as args.output:
8384
# Run the analyzer against a compilation db.
8485
govern_analyzer_runs(args)
8586
# Cover report generation and bug counting.
@@ -336,7 +337,7 @@ def analyze_compiler_wrapper_impl(result, execution):
336337

337338

338339
@contextlib.contextmanager
339-
def report_directory(hint, keep):
340+
def report_directory(hint, keep, output_format):
340341
""" Responsible for the report directory.
341342
342343
hint -- could specify the parent directory of the output directory.
@@ -355,7 +356,11 @@ def report_directory(hint, keep):
355356
yield name
356357
finally:
357358
if os.listdir(name):
358-
msg = "Run 'scan-view %s' to examine bug reports."
359+
if output_format != 'sarif':
360+
# 'scan-view' currently does not support sarif format.
361+
msg = "Run 'scan-view %s' to examine bug reports."
362+
else:
363+
msg = "View result at %s/results-merged.sarif."
359364
keep = True
360365
else:
361366
if keep:
@@ -433,7 +438,7 @@ def wrapper(*args, **kwargs):
433438
'direct_args', # arguments from command line
434439
'force_debug', # kill non debug macros
435440
'output_dir', # where generated report files shall go
436-
'output_format', # it's 'plist', 'html', both or plist-multi-file
441+
'output_format', # it's 'plist', 'html', 'plist-html', 'plist-multi-file', or 'sarif'
437442
'output_failures', # generate crash reports or not
438443
'ctu']) # ctu control options
439444
def run(opts):
@@ -537,6 +542,12 @@ def target():
537542
dir=opts['output_dir'])
538543
os.close(handle)
539544
return name
545+
elif opts['output_format'] == 'sarif':
546+
(handle, name) = tempfile.mkstemp(prefix='result-',
547+
suffix='.sarif',
548+
dir=opts['output_dir'])
549+
os.close(handle)
550+
return name
540551
return opts['output_dir']
541552

542553
try:

clang/tools/scan-build-py/libscanbuild/arguments.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,14 @@ def create_analyze_parser(from_build_command):
244244
action='store_const',
245245
help="""Cause the results as a set of .plist files with extra
246246
information on related files.""")
247+
format_group.add_argument(
248+
'--sarif',
249+
'-sarif',
250+
dest='output_format',
251+
const='sarif',
252+
default='html',
253+
action='store_const',
254+
help="""Cause the results as a result.sarif file.""")
247255

248256
advanced = parser.add_argument_group('advanced options')
249257
advanced.add_argument(

clang/tools/scan-build-py/libscanbuild/report.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ def document(args):
2727
""" Generates cover report and returns the number of bugs/crashes. """
2828

2929
html_reports_available = args.output_format in {'html', 'plist-html'}
30+
sarif_reports_available = args.output_format in {'sarif'}
3031

3132
logging.debug('count crashes and bugs')
3233
crash_count = sum(1 for _ in read_crashes(args.output))
@@ -57,6 +58,11 @@ def document(args):
5758
finally:
5859
for fragment in fragments:
5960
os.remove(fragment)
61+
62+
if sarif_reports_available:
63+
logging.debug('merging sarif files')
64+
merge_sarif_files(args.output)
65+
6066
return result
6167

6268

@@ -277,6 +283,98 @@ def empty(file_name):
277283
if not duplicate(bug):
278284
yield bug
279285

286+
def merge_sarif_files(output_dir, sort_files=False):
287+
""" Reads and merges all .sarif files in the given output directory.
288+
289+
Each sarif file in the output directory is understood as a single run
290+
and thus appear separate in the top level runs array. This requires
291+
modifying the run index of any embedded links in messages.
292+
"""
293+
294+
def empty(file_name):
295+
return os.stat(file_name).st_size == 0
296+
297+
def update_sarif_object(sarif_object, runs_count_offset):
298+
"""
299+
Given a SARIF object, checks its dictionary entries for a 'message' property.
300+
If it exists, updates the message index of embedded links in the run index.
301+
302+
Recursively looks through entries in the dictionary.
303+
"""
304+
if not isinstance(sarif_object, dict):
305+
return sarif_object
306+
307+
if 'message' in sarif_object:
308+
sarif_object['message'] = match_and_update_run(sarif_object['message'], runs_count_offset)
309+
310+
for key in sarif_object:
311+
if isinstance(sarif_object[key], list):
312+
# iterate through subobjects and update it.
313+
arr = [update_sarif_object(entry, runs_count_offset) for entry in sarif_object[key]]
314+
sarif_object[key] = arr
315+
elif isinstance(sarif_object[key], dict):
316+
sarif_object[key] = update_sarif_object(sarif_object[key], runs_count_offset)
317+
else:
318+
# do nothing
319+
pass
320+
321+
return sarif_object
322+
323+
324+
def match_and_update_run(message, runs_count_offset):
325+
"""
326+
Given a SARIF message object, checks if the text property contains an embedded link and
327+
updates the run index if necessary.
328+
"""
329+
if 'text' not in message:
330+
return message
331+
332+
# we only merge runs, so we only need to update the run index
333+
pattern = re.compile(r'sarif:/runs/(\d+)')
334+
335+
text = message['text']
336+
matches = re.finditer(pattern, text)
337+
matches_list = list(matches)
338+
339+
# update matches from right to left to make increasing character length (9->10) smoother
340+
for idx in range(len(matches_list) - 1, -1, -1):
341+
match = matches_list[idx]
342+
new_run_count = str(runs_count_offset + int(match.group(1)))
343+
text = text[0:match.start(1)] + new_run_count + text[match.end(1):]
344+
345+
message['text'] = text
346+
return message
347+
348+
349+
350+
sarif_files = (file for file in glob.iglob(os.path.join(output_dir, '*.sarif')) if not empty(file))
351+
# exposed for testing since the order of files returned by glob is not guaranteed to be sorted
352+
if sort_files:
353+
sarif_files = list(sarif_files)
354+
sarif_files.sort()
355+
356+
runs_count = 0
357+
merged = {}
358+
for sarif_file in sarif_files:
359+
with open(sarif_file) as fp:
360+
sarif = json.load(fp)
361+
if 'runs' not in sarif:
362+
continue
363+
364+
# start with the first file
365+
if not merged:
366+
merged = sarif
367+
else:
368+
# extract the run and append it to the merged output
369+
for run in sarif['runs']:
370+
new_run = update_sarif_object(run, runs_count)
371+
merged['runs'].append(new_run)
372+
373+
runs_count += len(sarif['runs'])
374+
375+
with open(os.path.join(output_dir, 'results-merged.sarif'), 'w') as out:
376+
json.dump(merged, out, indent=4, sort_keys=True)
377+
280378

281379
def parse_bug_plist(filename):
282380
""" Returns the generator of bugs from a single .plist file. """

clang/tools/scan-build-py/tests/unit/test_analyze.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ def call(self, params):
128128
class RunAnalyzerTest(unittest.TestCase):
129129

130130
@staticmethod
131-
def run_analyzer(content, failures_report):
131+
def run_analyzer(content, failures_report, output_format='plist'):
132132
with libear.TemporaryDirectory() as tmpdir:
133133
filename = os.path.join(tmpdir, 'test.cpp')
134134
with open(filename, 'w') as handle:
@@ -141,31 +141,46 @@ def run_analyzer(content, failures_report):
141141
'direct_args': [],
142142
'file': filename,
143143
'output_dir': tmpdir,
144-
'output_format': 'plist',
144+
'output_format': output_format,
145145
'output_failures': failures_report
146146
}
147147
spy = Spy()
148148
result = sut.run_analyzer(opts, spy.call)
149-
return (result, spy.arg)
149+
output_files = []
150+
for entry in os.listdir(tmpdir):
151+
output_files.append(entry)
152+
return (result, spy.arg, output_files)
150153

151154
def test_run_analyzer(self):
152155
content = "int div(int n, int d) { return n / d; }"
153-
(result, fwds) = RunAnalyzerTest.run_analyzer(content, False)
156+
(result, fwds, _) = RunAnalyzerTest.run_analyzer(content, False)
154157
self.assertEqual(None, fwds)
155158
self.assertEqual(0, result['exit_code'])
156159

157160
def test_run_analyzer_crash(self):
158161
content = "int div(int n, int d) { return n / d }"
159-
(result, fwds) = RunAnalyzerTest.run_analyzer(content, False)
162+
(result, fwds, _) = RunAnalyzerTest.run_analyzer(content, False)
160163
self.assertEqual(None, fwds)
161164
self.assertEqual(1, result['exit_code'])
162165

163166
def test_run_analyzer_crash_and_forwarded(self):
164167
content = "int div(int n, int d) { return n / d }"
165-
(_, fwds) = RunAnalyzerTest.run_analyzer(content, True)
168+
(_, fwds, _) = RunAnalyzerTest.run_analyzer(content, True)
166169
self.assertEqual(1, fwds['exit_code'])
167170
self.assertTrue(len(fwds['error_output']) > 0)
168171

172+
def test_run_analyzer_with_sarif(self):
173+
content = "int div(int n, int d) { return n / d; }"
174+
(result, fwds, output_files) = RunAnalyzerTest.run_analyzer(content, False, output_format='sarif')
175+
self.assertEqual(None, fwds)
176+
self.assertEqual(0, result['exit_code'])
177+
178+
pattern = re.compile(r'^result-.+\.sarif$')
179+
for f in output_files:
180+
if re.match(pattern, f):
181+
return
182+
self.fail('no result sarif files found in output')
183+
169184

170185
class ReportFailureTest(unittest.TestCase):
171186

0 commit comments

Comments
 (0)