| # -*- coding: utf-8 -*- |
| # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| # See https://llvm.org/LICENSE.txt for license information. |
| # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| """ This module is responsible to generate 'index.html' for the report. |
| |
| The input for this step is the output directory, where individual reports |
| could be found. It parses those reports and generates 'index.html'. """ |
| |
| import re |
| import os |
| import os.path |
| import sys |
| import shutil |
| import plistlib |
| import glob |
| import json |
| import logging |
| import datetime |
| from libscanbuild import duplicate_check |
| from libscanbuild.clang import get_version |
| |
| __all__ = ["document"] |
| |
| |
| def document(args): |
| """Generates cover report and returns the number of bugs/crashes.""" |
| |
| html_reports_available = args.output_format in {"html", "plist-html", "sarif-html"} |
| sarif_reports_available = args.output_format in {"sarif", "sarif-html"} |
| |
| logging.debug("count crashes and bugs") |
| crash_count = sum(1 for _ in read_crashes(args.output)) |
| bug_counter = create_counters() |
| for bug in read_bugs(args.output, html_reports_available): |
| bug_counter(bug) |
| result = crash_count + bug_counter.total |
| |
| if html_reports_available and result: |
| use_cdb = os.path.exists(args.cdb) |
| |
| logging.debug("generate index.html file") |
| # common prefix for source files to have sorter path |
| prefix = commonprefix_from(args.cdb) if use_cdb else os.getcwd() |
| # assemble the cover from multiple fragments |
| fragments = [] |
| try: |
| if bug_counter.total: |
| fragments.append(bug_summary(args.output, bug_counter)) |
| fragments.append(bug_report(args.output, prefix)) |
| if crash_count: |
| fragments.append(crash_report(args.output, prefix)) |
| assemble_cover(args, prefix, fragments) |
| # copy additional files to the report |
| copy_resource_files(args.output) |
| if use_cdb: |
| shutil.copy(args.cdb, args.output) |
| finally: |
| for fragment in fragments: |
| os.remove(fragment) |
| |
| if sarif_reports_available: |
| logging.debug("merging sarif files") |
| merge_sarif_files(args.output) |
| |
| return result |
| |
| |
| def assemble_cover(args, prefix, fragments): |
| """Put together the fragments into a final report.""" |
| |
| import getpass |
| import socket |
| |
| if args.html_title is None: |
| args.html_title = os.path.basename(prefix) + " - analyzer results" |
| |
| with open(os.path.join(args.output, "index.html"), "w") as handle: |
| indent = 0 |
| handle.write( |
| reindent( |
| """ |
| |<!DOCTYPE html> |
| |<html> |
| | <head> |
| | <title>{html_title}</title> |
| | <link type="text/css" rel="stylesheet" href="scanview.css"/> |
| | <script type='text/javascript' src="sorttable.js"></script> |
| | <script type='text/javascript' src='selectable.js'></script> |
| | </head>""", |
| indent, |
| ).format(html_title=args.html_title) |
| ) |
| handle.write(comment("SUMMARYENDHEAD")) |
| handle.write( |
| reindent( |
| """ |
| | <body> |
| | <h1>{html_title}</h1> |
| | <table> |
| | <tr><th>User:</th><td>{user_name}@{host_name}</td></tr> |
| | <tr><th>Working Directory:</th><td>{current_dir}</td></tr> |
| | <tr><th>Command Line:</th><td>{cmd_args}</td></tr> |
| | <tr><th>Clang Version:</th><td>{clang_version}</td></tr> |
| | <tr><th>Date:</th><td>{date}</td></tr> |
| | </table>""", |
| indent, |
| ).format( |
| html_title=args.html_title, |
| user_name=getpass.getuser(), |
| host_name=socket.gethostname(), |
| current_dir=prefix, |
| cmd_args=" ".join(sys.argv), |
| clang_version=get_version(args.clang), |
| date=datetime.datetime.today().strftime("%c"), |
| ) |
| ) |
| for fragment in fragments: |
| # copy the content of fragments |
| with open(fragment, "r") as input_handle: |
| shutil.copyfileobj(input_handle, handle) |
| handle.write( |
| reindent( |
| """ |
| | </body> |
| |</html>""", |
| indent, |
| ) |
| ) |
| |
| |
| def bug_summary(output_dir, bug_counter): |
| """Bug summary is a HTML table to give a better overview of the bugs.""" |
| |
| name = os.path.join(output_dir, "summary.html.fragment") |
| with open(name, "w") as handle: |
| indent = 4 |
| handle.write( |
| reindent( |
| """ |
| |<h2>Bug Summary</h2> |
| |<table> |
| | <thead> |
| | <tr> |
| | <td>Bug Type</td> |
| | <td>Quantity</td> |
| | <td class="sorttable_nosort">Display?</td> |
| | </tr> |
| | </thead> |
| | <tbody>""", |
| indent, |
| ) |
| ) |
| handle.write( |
| reindent( |
| """ |
| | <tr style="font-weight:bold"> |
| | <td class="SUMM_DESC">All Bugs</td> |
| | <td class="Q">{0}</td> |
| | <td> |
| | <center> |
| | <input checked type="checkbox" id="AllBugsCheck" |
| | onClick="CopyCheckedStateToCheckButtons(this);"/> |
| | </center> |
| | </td> |
| | </tr>""", |
| indent, |
| ).format(bug_counter.total) |
| ) |
| for category, types in bug_counter.categories.items(): |
| handle.write( |
| reindent( |
| """ |
| | <tr> |
| | <th>{0}</th><th colspan=2></th> |
| | </tr>""", |
| indent, |
| ).format(category) |
| ) |
| for bug_type in types.values(): |
| handle.write( |
| reindent( |
| """ |
| | <tr> |
| | <td class="SUMM_DESC">{bug_type}</td> |
| | <td class="Q">{bug_count}</td> |
| | <td> |
| | <center> |
| | <input checked type="checkbox" |
| | onClick="ToggleDisplay(this,'{bug_type_class}');"/> |
| | </center> |
| | </td> |
| | </tr>""", |
| indent, |
| ).format(**bug_type) |
| ) |
| handle.write( |
| reindent( |
| """ |
| | </tbody> |
| |</table>""", |
| indent, |
| ) |
| ) |
| handle.write(comment("SUMMARYBUGEND")) |
| return name |
| |
| |
| def bug_report(output_dir, prefix): |
| """Creates a fragment from the analyzer reports.""" |
| |
| pretty = prettify_bug(prefix, output_dir) |
| bugs = (pretty(bug) for bug in read_bugs(output_dir, True)) |
| |
| name = os.path.join(output_dir, "bugs.html.fragment") |
| with open(name, "w") as handle: |
| indent = 4 |
| handle.write( |
| reindent( |
| """ |
| |<h2>Reports</h2> |
| |<table class="sortable" style="table-layout:automatic"> |
| | <thead> |
| | <tr> |
| | <td>Bug Group</td> |
| | <td class="sorttable_sorted"> |
| | Bug Type |
| | <span id="sorttable_sortfwdind"> ▾</span> |
| | </td> |
| | <td>File</td> |
| | <td>Function/Method</td> |
| | <td class="Q">Line</td> |
| | <td class="Q">Path Length</td> |
| | <td class="sorttable_nosort"></td> |
| | </tr> |
| | </thead> |
| | <tbody>""", |
| indent, |
| ) |
| ) |
| handle.write(comment("REPORTBUGCOL")) |
| for current in bugs: |
| handle.write( |
| reindent( |
| """ |
| | <tr class="{bug_type_class}"> |
| | <td class="DESC">{bug_category}</td> |
| | <td class="DESC">{bug_type}</td> |
| | <td>{bug_file}</td> |
| | <td class="DESC">{bug_function}</td> |
| | <td class="Q">{bug_line}</td> |
| | <td class="Q">{bug_path_length}</td> |
| | <td><a href="{report_file}#EndPath">View Report</a></td> |
| | </tr>""", |
| indent, |
| ).format(**current) |
| ) |
| handle.write(comment("REPORTBUG", {"id": current["report_file"]})) |
| handle.write( |
| reindent( |
| """ |
| | </tbody> |
| |</table>""", |
| indent, |
| ) |
| ) |
| handle.write(comment("REPORTBUGEND")) |
| return name |
| |
| |
| def crash_report(output_dir, prefix): |
| """Creates a fragment from the compiler crashes.""" |
| |
| pretty = prettify_crash(prefix, output_dir) |
| crashes = (pretty(crash) for crash in read_crashes(output_dir)) |
| |
| name = os.path.join(output_dir, "crashes.html.fragment") |
| with open(name, "w") as handle: |
| indent = 4 |
| handle.write( |
| reindent( |
| """ |
| |<h2>Analyzer Failures</h2> |
| |<p>The analyzer had problems processing the following files:</p> |
| |<table> |
| | <thead> |
| | <tr> |
| | <td>Problem</td> |
| | <td>Source File</td> |
| | <td>Preprocessed File</td> |
| | <td>STDERR Output</td> |
| | </tr> |
| | </thead> |
| | <tbody>""", |
| indent, |
| ) |
| ) |
| for current in crashes: |
| handle.write( |
| reindent( |
| """ |
| | <tr> |
| | <td>{problem}</td> |
| | <td>{source}</td> |
| | <td><a href="{file}">preprocessor output</a></td> |
| | <td><a href="{stderr}">analyzer std err</a></td> |
| | </tr>""", |
| indent, |
| ).format(**current) |
| ) |
| handle.write(comment("REPORTPROBLEM", current)) |
| handle.write( |
| reindent( |
| """ |
| | </tbody> |
| |</table>""", |
| indent, |
| ) |
| ) |
| handle.write(comment("REPORTCRASHES")) |
| return name |
| |
| |
| def read_crashes(output_dir): |
| """Generate a unique sequence of crashes from given output directory.""" |
| |
| return ( |
| parse_crash(filename) |
| for filename in glob.iglob(os.path.join(output_dir, "failures", "*.info.txt")) |
| ) |
| |
| |
| def read_bugs(output_dir, html): |
| # type: (str, bool) -> Generator[Dict[str, Any], None, None] |
| """Generate a unique sequence of bugs from given output directory. |
| |
| Duplicates can be in a project if the same module was compiled multiple |
| times with different compiler options. These would be better to show in |
| the final report (cover) only once.""" |
| |
| def empty(file_name): |
| return os.stat(file_name).st_size == 0 |
| |
| duplicate = duplicate_check( |
| lambda bug: "{bug_line}.{bug_path_length}:{bug_file}".format(**bug) |
| ) |
| |
| # get the right parser for the job. |
| parser = parse_bug_html if html else parse_bug_plist |
| # get the input files, which are not empty. |
| pattern = os.path.join(output_dir, "*.html" if html else "*.plist") |
| bug_files = (file for file in glob.iglob(pattern) if not empty(file)) |
| |
| for bug_file in bug_files: |
| for bug in parser(bug_file): |
| if not duplicate(bug): |
| yield bug |
| |
| |
| def merge_sarif_files(output_dir, sort_files=False): |
| """Reads and merges all .sarif files in the given output directory. |
| |
| Each sarif file in the output directory is understood as a single run |
| and thus appear separate in the top level runs array. This requires |
| modifying the run index of any embedded links in messages. |
| """ |
| |
| def empty(file_name): |
| return os.stat(file_name).st_size == 0 |
| |
| def update_sarif_object(sarif_object, runs_count_offset): |
| """ |
| Given a SARIF object, checks its dictionary entries for a 'message' property. |
| If it exists, updates the message index of embedded links in the run index. |
| |
| Recursively looks through entries in the dictionary. |
| """ |
| if not isinstance(sarif_object, dict): |
| return sarif_object |
| |
| if "message" in sarif_object: |
| sarif_object["message"] = match_and_update_run( |
| sarif_object["message"], runs_count_offset |
| ) |
| |
| for key in sarif_object: |
| if isinstance(sarif_object[key], list): |
| # iterate through subobjects and update it. |
| arr = [ |
| update_sarif_object(entry, runs_count_offset) |
| for entry in sarif_object[key] |
| ] |
| sarif_object[key] = arr |
| elif isinstance(sarif_object[key], dict): |
| sarif_object[key] = update_sarif_object( |
| sarif_object[key], runs_count_offset |
| ) |
| else: |
| # do nothing |
| pass |
| |
| return sarif_object |
| |
| def match_and_update_run(message, runs_count_offset): |
| """ |
| Given a SARIF message object, checks if the text property contains an embedded link and |
| updates the run index if necessary. |
| """ |
| if "text" not in message: |
| return message |
| |
| # we only merge runs, so we only need to update the run index |
| pattern = re.compile(r"sarif:/runs/(\d+)") |
| |
| text = message["text"] |
| matches = re.finditer(pattern, text) |
| matches_list = list(matches) |
| |
| # update matches from right to left to make increasing character length (9->10) smoother |
| for idx in range(len(matches_list) - 1, -1, -1): |
| match = matches_list[idx] |
| new_run_count = str(runs_count_offset + int(match.group(1))) |
| text = text[0 : match.start(1)] + new_run_count + text[match.end(1) :] |
| |
| message["text"] = text |
| return message |
| |
| sarif_files = ( |
| file |
| for file in glob.iglob(os.path.join(output_dir, "*.sarif")) |
| if not empty(file) |
| ) |
| # exposed for testing since the order of files returned by glob is not guaranteed to be sorted |
| if sort_files: |
| sarif_files = list(sarif_files) |
| sarif_files.sort() |
| |
| runs_count = 0 |
| merged = {} |
| for sarif_file in sarif_files: |
| with open(sarif_file) as fp: |
| sarif = json.load(fp) |
| if "runs" not in sarif: |
| continue |
| |
| # start with the first file |
| if not merged: |
| merged = sarif |
| else: |
| # extract the run and append it to the merged output |
| for run in sarif["runs"]: |
| new_run = update_sarif_object(run, runs_count) |
| merged["runs"].append(new_run) |
| |
| runs_count += len(sarif["runs"]) |
| |
| with open(os.path.join(output_dir, "results-merged.sarif"), "w") as out: |
| json.dump(merged, out, indent=4, sort_keys=True) |
| |
| |
| def parse_bug_plist(filename): |
| """Returns the generator of bugs from a single .plist file.""" |
| |
| with open(filename, "rb") as fp: |
| content = plistlib.load(fp) |
| files = content.get("files") |
| for bug in content.get("diagnostics", []): |
| if len(files) <= int(bug["location"]["file"]): |
| logging.warning('Parsing bug from "%s" failed', filename) |
| continue |
| |
| yield { |
| "result": filename, |
| "bug_type": bug["type"], |
| "bug_category": bug["category"], |
| "bug_line": int(bug["location"]["line"]), |
| "bug_path_length": int(bug["location"]["col"]), |
| "bug_file": files[int(bug["location"]["file"])], |
| } |
| |
| |
| def parse_bug_html(filename): |
| """Parse out the bug information from HTML output.""" |
| |
| patterns = [ |
| re.compile(r"<!-- BUGTYPE (?P<bug_type>.*) -->$"), |
| re.compile(r"<!-- BUGFILE (?P<bug_file>.*) -->$"), |
| re.compile(r"<!-- BUGPATHLENGTH (?P<bug_path_length>.*) -->$"), |
| re.compile(r"<!-- BUGLINE (?P<bug_line>.*) -->$"), |
| re.compile(r"<!-- BUGCATEGORY (?P<bug_category>.*) -->$"), |
| re.compile(r"<!-- BUGDESC (?P<bug_description>.*) -->$"), |
| re.compile(r"<!-- FUNCTIONNAME (?P<bug_function>.*) -->$"), |
| ] |
| endsign = re.compile(r"<!-- BUGMETAEND -->") |
| |
| bug = { |
| "report_file": filename, |
| "bug_function": "n/a", # compatibility with < clang-3.5 |
| "bug_category": "Other", |
| "bug_line": 0, |
| "bug_path_length": 1, |
| } |
| |
| with open(filename, encoding="utf-8") as handler: |
| for line in handler.readlines(): |
| # do not read the file further |
| if endsign.match(line): |
| break |
| # search for the right lines |
| for regex in patterns: |
| match = regex.match(line.strip()) |
| if match: |
| bug.update(match.groupdict()) |
| break |
| |
| encode_value(bug, "bug_line", int) |
| encode_value(bug, "bug_path_length", int) |
| |
| yield bug |
| |
| |
| def parse_crash(filename): |
| """Parse out the crash information from the report file.""" |
| |
| match = re.match(r"(.*)\.info\.txt", filename) |
| name = match.group(1) if match else None |
| with open(filename, mode="rb") as handler: |
| # this is a workaround to fix windows read '\r\n' as new lines. |
| lines = [line.decode().rstrip() for line in handler.readlines()] |
| return { |
| "source": lines[0], |
| "problem": lines[1], |
| "file": name, |
| "info": name + ".info.txt", |
| "stderr": name + ".stderr.txt", |
| } |
| |
| |
| def category_type_name(bug): |
| """Create a new bug attribute from bug by category and type. |
| |
| The result will be used as CSS class selector in the final report.""" |
| |
| def smash(key): |
| """Make value ready to be HTML attribute value.""" |
| |
| return bug.get(key, "").lower().replace(" ", "_").replace("'", "") |
| |
| return escape("bt_" + smash("bug_category") + "_" + smash("bug_type")) |
| |
| |
| def create_counters(): |
| """Create counters for bug statistics. |
| |
| Two entries are maintained: 'total' is an integer, represents the |
| number of bugs. The 'categories' is a two level categorisation of bug |
| counters. The first level is 'bug category' the second is 'bug type'. |
| Each entry in this classification is a dictionary of 'count', 'type' |
| and 'label'.""" |
| |
| def predicate(bug): |
| bug_category = bug["bug_category"] |
| bug_type = bug["bug_type"] |
| current_category = predicate.categories.get(bug_category, dict()) |
| current_type = current_category.get( |
| bug_type, |
| { |
| "bug_type": bug_type, |
| "bug_type_class": category_type_name(bug), |
| "bug_count": 0, |
| }, |
| ) |
| current_type.update({"bug_count": current_type["bug_count"] + 1}) |
| current_category.update({bug_type: current_type}) |
| predicate.categories.update({bug_category: current_category}) |
| predicate.total += 1 |
| |
| predicate.total = 0 |
| predicate.categories = dict() |
| return predicate |
| |
| |
| def prettify_bug(prefix, output_dir): |
| def predicate(bug): |
| """Make safe this values to embed into HTML.""" |
| |
| bug["bug_type_class"] = category_type_name(bug) |
| |
| encode_value(bug, "bug_file", lambda x: escape(chop(prefix, x))) |
| encode_value(bug, "bug_category", escape) |
| encode_value(bug, "bug_type", escape) |
| encode_value(bug, "report_file", lambda x: escape(chop(output_dir, x))) |
| return bug |
| |
| return predicate |
| |
| |
| def prettify_crash(prefix, output_dir): |
| def predicate(crash): |
| """Make safe this values to embed into HTML.""" |
| |
| encode_value(crash, "source", lambda x: escape(chop(prefix, x))) |
| encode_value(crash, "problem", escape) |
| encode_value(crash, "file", lambda x: escape(chop(output_dir, x))) |
| encode_value(crash, "info", lambda x: escape(chop(output_dir, x))) |
| encode_value(crash, "stderr", lambda x: escape(chop(output_dir, x))) |
| return crash |
| |
| return predicate |
| |
| |
| def copy_resource_files(output_dir): |
| """Copy the javascript and css files to the report directory.""" |
| |
| this_dir = os.path.dirname(os.path.realpath(__file__)) |
| for resource in os.listdir(os.path.join(this_dir, "resources")): |
| shutil.copy(os.path.join(this_dir, "resources", resource), output_dir) |
| |
| |
| def encode_value(container, key, encode): |
| """Run 'encode' on 'container[key]' value and update it.""" |
| |
| if key in container: |
| value = encode(container[key]) |
| container.update({key: value}) |
| |
| |
| def chop(prefix, filename): |
| """Create 'filename' from '/prefix/filename'""" |
| |
| return filename if not len(prefix) else os.path.relpath(filename, prefix) |
| |
| |
| def escape(text): |
| """Paranoid HTML escape method. (Python version independent)""" |
| |
| escape_table = { |
| "&": "&", |
| '"': """, |
| "'": "'", |
| ">": ">", |
| "<": "<", |
| } |
| return "".join(escape_table.get(c, c) for c in text) |
| |
| |
| def reindent(text, indent): |
| """Utility function to format html output and keep indentation.""" |
| |
| result = "" |
| for line in text.splitlines(): |
| if len(line.strip()): |
| result += " " * indent + line.split("|")[1] + os.linesep |
| return result |
| |
| |
| def comment(name, opts=dict()): |
| """Utility function to format meta information as comment.""" |
| |
| attributes = "" |
| for key, value in opts.items(): |
| attributes += ' {0}="{1}"'.format(key, value) |
| |
| return "<!-- {0}{1} -->{2}".format(name, attributes, os.linesep) |
| |
| |
| def commonprefix_from(filename): |
| """Create file prefix from a compilation database entries.""" |
| |
| with open(filename, "r") as handle: |
| return commonprefix(item["file"] for item in json.load(handle)) |
| |
| |
| def commonprefix(files): |
| """Fixed version of os.path.commonprefix. |
| |
| :param files: list of file names. |
| :return: the longest path prefix that is a prefix of all files.""" |
| result = None |
| for current in files: |
| if result is not None: |
| result = os.path.commonprefix([result, current]) |
| else: |
| result = current |
| |
| if result is None: |
| return "" |
| elif not os.path.isdir(result): |
| return os.path.dirname(result) |
| else: |
| return os.path.abspath(result) |