| #!/usr/bin/env python |
| |
| # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| |
| from third_party import asan_symbolize |
| |
| import argparse |
| import base64 |
| import json |
| import os |
| import sys |
| |
| class LineBuffered(object): |
| """Disable buffering on a file object.""" |
| def __init__(self, stream): |
| self.stream = stream |
| |
| def write(self, data): |
| self.stream.write(data) |
| if '\n' in data: |
| self.stream.flush() |
| |
| def __getattr__(self, attr): |
| return getattr(self.stream, attr) |
| |
| |
| def disable_buffering(): |
| """Makes this process and child processes stdout unbuffered.""" |
| if not os.environ.get('PYTHONUNBUFFERED'): |
| # Since sys.stdout is a C++ object, it's impossible to do |
| # sys.stdout.write = lambda... |
| sys.stdout = LineBuffered(sys.stdout) |
| os.environ['PYTHONUNBUFFERED'] = 'x' |
| |
| |
| def set_symbolizer_path(): |
| """Set the path to the llvm-symbolize binary in the Chromium source tree.""" |
| if not os.environ.get('LLVM_SYMBOLIZER_PATH'): |
| script_dir = os.path.dirname(os.path.abspath(__file__)) |
| # Assume this script resides three levels below src/ (i.e. |
| # src/tools/valgrind/asan/). |
| src_root = os.path.join(script_dir, "..", "..", "..") |
| symbolizer_path = os.path.join(src_root, 'third_party', |
| 'llvm-build', 'Release+Asserts', 'bin', 'llvm-symbolizer') |
| assert(os.path.isfile(symbolizer_path)) |
| os.environ['LLVM_SYMBOLIZER_PATH'] = os.path.abspath(symbolizer_path) |
| |
| |
| # Construct a path to the .dSYM bundle for the given binary. |
| # There are three possible cases for binary location in Chromium: |
| # 1. The binary is a standalone executable or dynamic library in the product |
| # dir, the debug info is in "binary.dSYM" in the product dir. |
| # 2. The binary is a standalone framework or .app bundle, the debug info is in |
| # "Framework.framework.dSYM" or "App.app.dSYM" in the product dir. |
| # 3. The binary is a framework or an .app bundle within another .app bundle |
| # (e.g. Outer.app/Contents/Versions/1.2.3.4/Inner.app), and the debug info |
| # is in Inner.app.dSYM in the product dir. |
| # The first case is handled by llvm-symbolizer, so we only need to construct |
| # .dSYM paths for .app bundles and frameworks. |
| # We're assuming that there're no more than two nested bundles in the binary |
| # path. Only one of these bundles may be a framework and frameworks cannot |
| # contain other bundles. |
| def chrome_dsym_hints(binary): |
| path_parts = binary.split(os.path.sep) |
| app_positions = [] |
| framework_positions = [] |
| for index, part in enumerate(path_parts): |
| if part.endswith('.app'): |
| app_positions.append(index) |
| elif part.endswith('.framework'): |
| framework_positions.append(index) |
| bundle_positions = app_positions + framework_positions |
| bundle_positions.sort() |
| assert len(bundle_positions) <= 2, \ |
| "The path contains more than two nested bundles: %s" % binary |
| if len(bundle_positions) == 0: |
| # Case 1: this is a standalone executable or dylib. |
| return [] |
| assert (not (len(app_positions) == 1 and |
| len(framework_positions) == 1 and |
| app_positions[0] > framework_positions[0])), \ |
| "The path contains an app bundle inside a framework: %s" % binary |
| # Cases 2 and 3. The outermost bundle (which is the only bundle in the case 2) |
| # is located in the product dir. |
| outermost_bundle = bundle_positions[0] |
| product_dir = path_parts[:outermost_bundle] |
| # In case 2 this is the same as |outermost_bundle|. |
| innermost_bundle = bundle_positions[-1] |
| dsym_path = product_dir + [path_parts[innermost_bundle]] |
| result = '%s.dSYM' % os.path.sep.join(dsym_path) |
| return [result] |
| |
| |
| # We want our output to match base::EscapeJSONString(), which produces |
| # doubly-escaped strings. The first escaping pass is handled by this class. The |
| # second pass happens when JSON data is dumped to file. |
| class StringEncoder(json.JSONEncoder): |
| def __init__(self): |
| json.JSONEncoder.__init__(self) |
| |
| def encode(self, s): |
| assert(isinstance(s, basestring)) |
| encoded = json.JSONEncoder.encode(self, s) |
| assert(len(encoded) >= 2) |
| assert(encoded[0] == '"') |
| assert(encoded[-1] == '"') |
| encoded = encoded[1:-1] |
| # Special case from base::EscapeJSONString(). |
| encoded = encoded.replace('<', '\u003C') |
| return encoded |
| |
| |
| class JSONTestRunSymbolizer(object): |
| def __init__(self, symbolization_loop): |
| self.string_encoder = StringEncoder() |
| self.symbolization_loop = symbolization_loop |
| |
| def symbolize_snippet(self, snippet): |
| symbolized_lines = [] |
| for line in snippet.split('\n'): |
| symbolized_lines += self.symbolization_loop.process_line(line) |
| return '\n'.join(symbolized_lines) |
| |
| def symbolize(self, test_run): |
| original_snippet = base64.b64decode(test_run['output_snippet_base64']) |
| symbolized_snippet = self.symbolize_snippet(original_snippet) |
| if symbolized_snippet == original_snippet: |
| # No sanitizer reports in snippet. |
| return |
| |
| test_run['original_output_snippet'] = test_run['output_snippet'] |
| test_run['original_output_snippet_base64'] = \ |
| test_run['output_snippet_base64'] |
| |
| escaped_snippet = StringEncoder().encode(symbolized_snippet) |
| test_run['output_snippet'] = escaped_snippet |
| test_run['output_snippet_base64'] = \ |
| base64.b64encode(symbolized_snippet) |
| test_run['snippet_processed_by'] = 'asan_symbolize.py' |
| # Originally, "lossless" refers to "no Unicode data lost while encoding the |
| # string". However, since we're applying another kind of transformation |
| # (symbolization), it doesn't seem right to consider the snippet lossless. |
| test_run['losless_snippet'] = False |
| |
| |
| def symbolize_snippets_in_json(filename, symbolization_loop): |
| with open(filename, 'r') as f: |
| json_data = json.load(f) |
| |
| test_run_symbolizer = JSONTestRunSymbolizer(symbolization_loop) |
| for iteration_data in json_data['per_iteration_data']: |
| for test_name, test_runs in iteration_data.iteritems(): |
| for test_run in test_runs: |
| test_run_symbolizer.symbolize(test_run) |
| |
| with open(filename, 'w') as f: |
| json.dump(json_data, f, indent=3, sort_keys=True) |
| |
| |
| def main(): |
| parser = argparse.ArgumentParser(description='Symbolize sanitizer reports.') |
| parser.add_argument('--test-summary-json-file', |
| help='Path to a JSON file produced by the test launcher. The script will ' |
| 'ignore stdandard input and instead symbolize the output stnippets ' |
| 'inside the JSON file. The result will be written back to the JSON ' |
| 'file.') |
| parser.add_argument('strip_path_prefix', nargs='*', |
| help='When printing source file names, the longest prefix ending in one ' |
| 'of these substrings will be stripped. E.g.: "Release/../../".') |
| args = parser.parse_args() |
| |
| disable_buffering() |
| set_symbolizer_path() |
| asan_symbolize.demangle = True |
| asan_symbolize.fix_filename_patterns = args.strip_path_prefix |
| loop = asan_symbolize.SymbolizationLoop(dsym_hint_producer=chrome_dsym_hints) |
| |
| if args.test_summary_json_file: |
| symbolize_snippets_in_json(args.test_summary_json_file, loop) |
| else: |
| # Process stdin. |
| asan_symbolize.logfile = sys.stdin |
| loop.process_logfile() |
| |
| if __name__ == '__main__': |
| main() |