James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | # Copyright (c) 2013 The Chromium Authors. All rights reserved. |
| 3 | # Use of this source code is governed by a BSD-style license that can be |
| 4 | # found in the LICENSE file. |
| 5 | |
| 6 | """Wrapper script to help run clang tools across Chromium code. |
| 7 | |
| 8 | How to use this tool: |
| 9 | If you want to run the tool across all Chromium code: |
| 10 | run_tool.py <tool> <path/to/compiledb> |
| 11 | |
James Robinson | c4c1c59 | 2014-11-21 18:27:04 -0800 | [diff] [blame] | 12 | If you want to include all files mentioned in the compilation database: |
| 13 | run_tool.py <tool> <path/to/compiledb> --all |
| 14 | |
James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 15 | If you only want to run the tool across just chrome/browser and content/browser: |
| 16 | run_tool.py <tool> <path/to/compiledb> chrome/browser content/browser |
| 17 | |
| 18 | Please see https://code.google.com/p/chromium/wiki/ClangToolRefactoring for more |
| 19 | information, which documents the entire automated refactoring flow in Chromium. |
| 20 | |
| 21 | Why use this tool: |
| 22 | The clang tool implementation doesn't take advantage of multiple cores, and if |
| 23 | it fails mysteriously in the middle, all the generated replacements will be |
| 24 | lost. |
| 25 | |
| 26 | Unfortunately, if the work is simply sharded across multiple cores by running |
| 27 | multiple RefactoringTools, problems arise when they attempt to rewrite a file at |
| 28 | the same time. To work around that, clang tools that are run using this tool |
| 29 | should output edits to stdout in the following format: |
| 30 | |
| 31 | ==== BEGIN EDITS ==== |
| 32 | r:<file path>:<offset>:<length>:<replacement text> |
| 33 | r:<file path>:<offset>:<length>:<replacement text> |
| 34 | ...etc... |
| 35 | ==== END EDITS ==== |
| 36 | |
| 37 | Any generated edits are applied once the clang tool has finished running |
| 38 | across Chromium, regardless of whether some instances failed or not. |
| 39 | """ |
| 40 | |
| 41 | import collections |
| 42 | import functools |
James Robinson | c4c1c59 | 2014-11-21 18:27:04 -0800 | [diff] [blame] | 43 | import json |
James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 44 | import multiprocessing |
| 45 | import os.path |
| 46 | import pipes |
| 47 | import subprocess |
| 48 | import sys |
| 49 | |
| 50 | |
| 51 | Edit = collections.namedtuple( |
| 52 | 'Edit', ('edit_type', 'offset', 'length', 'replacement')) |
| 53 | |
| 54 | |
| 55 | def _GetFilesFromGit(paths = None): |
| 56 | """Gets the list of files in the git repository. |
| 57 | |
| 58 | Args: |
| 59 | paths: Prefix filter for the returned paths. May contain multiple entries. |
| 60 | """ |
James Robinson | 6e9a1c9 | 2014-11-13 17:05:42 -0800 | [diff] [blame] | 61 | args = [] |
| 62 | if sys.platform == 'win32': |
| 63 | args.append('git.bat') |
| 64 | else: |
| 65 | args.append('git') |
| 66 | args.append('ls-files') |
James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 67 | if paths: |
| 68 | args.extend(paths) |
| 69 | command = subprocess.Popen(args, stdout=subprocess.PIPE) |
| 70 | output, _ = command.communicate() |
James Robinson | 6e9a1c9 | 2014-11-13 17:05:42 -0800 | [diff] [blame] | 71 | return [os.path.realpath(p) for p in output.splitlines()] |
James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 72 | |
| 73 | |
James Robinson | c4c1c59 | 2014-11-21 18:27:04 -0800 | [diff] [blame] | 74 | def _GetFilesFromCompileDB(build_directory): |
| 75 | """ Gets the list of files mentioned in the compilation database. |
| 76 | |
| 77 | Args: |
| 78 | build_directory: Directory that contains the compile database. |
| 79 | """ |
| 80 | compiledb_path = os.path.join(build_directory, 'compile_commands.json') |
| 81 | with open(compiledb_path, 'rb') as compiledb_file: |
| 82 | json_commands = json.load(compiledb_file) |
| 83 | |
| 84 | return [os.path.join(entry['directory'], entry['file']) |
| 85 | for entry in json_commands] |
| 86 | |
| 87 | |
James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 88 | def _ExtractEditsFromStdout(build_directory, stdout): |
| 89 | """Extracts generated list of edits from the tool's stdout. |
| 90 | |
| 91 | The expected format is documented at the top of this file. |
| 92 | |
| 93 | Args: |
| 94 | build_directory: Directory that contains the compile database. Used to |
| 95 | normalize the filenames. |
| 96 | stdout: The stdout from running the clang tool. |
| 97 | |
| 98 | Returns: |
| 99 | A dictionary mapping filenames to the associated edits. |
| 100 | """ |
| 101 | lines = stdout.splitlines() |
| 102 | start_index = lines.index('==== BEGIN EDITS ====') |
| 103 | end_index = lines.index('==== END EDITS ====') |
| 104 | edits = collections.defaultdict(list) |
| 105 | for line in lines[start_index + 1:end_index]: |
| 106 | try: |
James Robinson | 6e9a1c9 | 2014-11-13 17:05:42 -0800 | [diff] [blame] | 107 | edit_type, path, offset, length, replacement = line.split(':::', 4) |
James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 108 | replacement = replacement.replace("\0", "\n"); |
James Robinson | 6e9a1c9 | 2014-11-13 17:05:42 -0800 | [diff] [blame] | 109 | # Normalize the file path emitted by the clang tool. |
| 110 | path = os.path.realpath(os.path.join(build_directory, path)) |
James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 111 | edits[path].append(Edit(edit_type, int(offset), int(length), replacement)) |
| 112 | except ValueError: |
| 113 | print 'Unable to parse edit: %s' % line |
| 114 | return edits |
| 115 | |
| 116 | |
| 117 | def _ExecuteTool(toolname, build_directory, filename): |
| 118 | """Executes the tool. |
| 119 | |
| 120 | This is defined outside the class so it can be pickled for the multiprocessing |
| 121 | module. |
| 122 | |
| 123 | Args: |
| 124 | toolname: Path to the tool to execute. |
| 125 | build_directory: Directory that contains the compile database. |
| 126 | filename: The file to run the tool over. |
| 127 | |
| 128 | Returns: |
| 129 | A dictionary that must contain the key "status" and a boolean value |
| 130 | associated with it. |
| 131 | |
| 132 | If status is True, then the generated edits are stored with the key "edits" |
| 133 | in the dictionary. |
| 134 | |
| 135 | Otherwise, the filename and the output from stderr are associated with the |
| 136 | keys "filename" and "stderr" respectively. |
| 137 | """ |
| 138 | command = subprocess.Popen((toolname, '-p', build_directory, filename), |
| 139 | stdout=subprocess.PIPE, |
| 140 | stderr=subprocess.PIPE) |
| 141 | stdout, stderr = command.communicate() |
| 142 | if command.returncode != 0: |
| 143 | return {'status': False, 'filename': filename, 'stderr': stderr} |
| 144 | else: |
| 145 | return {'status': True, |
| 146 | 'edits': _ExtractEditsFromStdout(build_directory, stdout)} |
| 147 | |
| 148 | |
| 149 | class _CompilerDispatcher(object): |
| 150 | """Multiprocessing controller for running clang tools in parallel.""" |
| 151 | |
| 152 | def __init__(self, toolname, build_directory, filenames): |
| 153 | """Initializer method. |
| 154 | |
| 155 | Args: |
| 156 | toolname: Path to the tool to execute. |
| 157 | build_directory: Directory that contains the compile database. |
| 158 | filenames: The files to run the tool over. |
| 159 | """ |
| 160 | self.__toolname = toolname |
| 161 | self.__build_directory = build_directory |
| 162 | self.__filenames = filenames |
| 163 | self.__success_count = 0 |
| 164 | self.__failed_count = 0 |
James Robinson | 6e9a1c9 | 2014-11-13 17:05:42 -0800 | [diff] [blame] | 165 | self.__edit_count = 0 |
James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 166 | self.__edits = collections.defaultdict(list) |
| 167 | |
| 168 | @property |
| 169 | def edits(self): |
| 170 | return self.__edits |
| 171 | |
| 172 | @property |
| 173 | def failed_count(self): |
| 174 | return self.__failed_count |
| 175 | |
| 176 | def Run(self): |
| 177 | """Does the grunt work.""" |
| 178 | pool = multiprocessing.Pool() |
| 179 | result_iterator = pool.imap_unordered( |
| 180 | functools.partial(_ExecuteTool, self.__toolname, |
| 181 | self.__build_directory), |
| 182 | self.__filenames) |
| 183 | for result in result_iterator: |
| 184 | self.__ProcessResult(result) |
| 185 | sys.stdout.write('\n') |
| 186 | sys.stdout.flush() |
| 187 | |
| 188 | def __ProcessResult(self, result): |
| 189 | """Handles result processing. |
| 190 | |
| 191 | Args: |
| 192 | result: The result dictionary returned by _ExecuteTool. |
| 193 | """ |
| 194 | if result['status']: |
| 195 | self.__success_count += 1 |
| 196 | for k, v in result['edits'].iteritems(): |
| 197 | self.__edits[k].extend(v) |
James Robinson | 6e9a1c9 | 2014-11-13 17:05:42 -0800 | [diff] [blame] | 198 | self.__edit_count += len(v) |
James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 199 | else: |
| 200 | self.__failed_count += 1 |
| 201 | sys.stdout.write('\nFailed to process %s\n' % result['filename']) |
| 202 | sys.stdout.write(result['stderr']) |
| 203 | sys.stdout.write('\n') |
| 204 | percentage = ( |
| 205 | float(self.__success_count + self.__failed_count) / |
| 206 | len(self.__filenames)) * 100 |
James Robinson | 6e9a1c9 | 2014-11-13 17:05:42 -0800 | [diff] [blame] | 207 | sys.stdout.write('Succeeded: %d, Failed: %d, Edits: %d [%.2f%%]\r' % ( |
| 208 | self.__success_count, self.__failed_count, self.__edit_count, |
| 209 | percentage)) |
James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 210 | sys.stdout.flush() |
| 211 | |
| 212 | |
| 213 | def _ApplyEdits(edits, clang_format_diff_path): |
| 214 | """Apply the generated edits. |
| 215 | |
| 216 | Args: |
| 217 | edits: A dict mapping filenames to Edit instances that apply to that file. |
| 218 | clang_format_diff_path: Path to the clang-format-diff.py helper to help |
| 219 | automatically reformat diffs to avoid style violations. Pass None if the |
| 220 | clang-format step should be skipped. |
| 221 | """ |
| 222 | edit_count = 0 |
| 223 | for k, v in edits.iteritems(): |
| 224 | # Sort the edits and iterate through them in reverse order. Sorting allows |
| 225 | # duplicate edits to be quickly skipped, while reversing means that |
| 226 | # subsequent edits don't need to have their offsets updated with each edit |
| 227 | # applied. |
| 228 | v.sort() |
| 229 | last_edit = None |
| 230 | with open(k, 'rb+') as f: |
| 231 | contents = bytearray(f.read()) |
| 232 | for edit in reversed(v): |
| 233 | if edit == last_edit: |
| 234 | continue |
| 235 | last_edit = edit |
| 236 | contents[edit.offset:edit.offset + edit.length] = edit.replacement |
| 237 | if not edit.replacement: |
| 238 | _ExtendDeletionIfElementIsInList(contents, edit.offset) |
| 239 | edit_count += 1 |
| 240 | f.seek(0) |
| 241 | f.truncate() |
| 242 | f.write(contents) |
| 243 | if clang_format_diff_path: |
| 244 | # TODO(dcheng): python3.3 exposes this publicly as shlex.quote, but Chrome |
| 245 | # uses python2.7. Use the deprecated interface until Chrome uses a newer |
| 246 | # Python. |
| 247 | if subprocess.call('git diff -U0 %s | python %s -i -p1 -style=file ' % ( |
| 248 | pipes.quote(k), clang_format_diff_path), shell=True) != 0: |
| 249 | print 'clang-format failed for %s' % k |
| 250 | print 'Applied %d edits to %d files' % (edit_count, len(edits)) |
| 251 | |
| 252 | |
| 253 | _WHITESPACE_BYTES = frozenset((ord('\t'), ord('\n'), ord('\r'), ord(' '))) |
| 254 | |
| 255 | |
| 256 | def _ExtendDeletionIfElementIsInList(contents, offset): |
| 257 | """Extends the range of a deletion if the deleted element was part of a list. |
| 258 | |
| 259 | This rewriter helper makes it easy for refactoring tools to remove elements |
| 260 | from a list. Even if a matcher callback knows that it is removing an element |
| 261 | from a list, it may not have enough information to accurately remove the list |
| 262 | element; for example, another matcher callback may end up removing an adjacent |
| 263 | list element, or all the list elements may end up being removed. |
| 264 | |
| 265 | With this helper, refactoring tools can simply remove the list element and not |
| 266 | worry about having to include the comma in the replacement. |
| 267 | |
| 268 | Args: |
| 269 | contents: A bytearray with the deletion already applied. |
| 270 | offset: The offset in the bytearray where the deleted range used to be. |
| 271 | """ |
| 272 | char_before = char_after = None |
| 273 | left_trim_count = 0 |
| 274 | for byte in reversed(contents[:offset]): |
| 275 | left_trim_count += 1 |
| 276 | if byte in _WHITESPACE_BYTES: |
| 277 | continue |
| 278 | if byte in (ord(','), ord(':'), ord('('), ord('{')): |
| 279 | char_before = chr(byte) |
| 280 | break |
| 281 | |
| 282 | right_trim_count = 0 |
| 283 | for byte in contents[offset:]: |
| 284 | right_trim_count += 1 |
| 285 | if byte in _WHITESPACE_BYTES: |
| 286 | continue |
| 287 | if byte == ord(','): |
| 288 | char_after = chr(byte) |
| 289 | break |
| 290 | |
| 291 | if char_before: |
| 292 | if char_after: |
| 293 | del contents[offset:offset + right_trim_count] |
| 294 | elif char_before in (',', ':'): |
| 295 | del contents[offset - left_trim_count:offset] |
| 296 | |
| 297 | |
| 298 | def main(argv): |
| 299 | if len(argv) < 2: |
| 300 | print 'Usage: run_tool.py <clang tool> <compile DB> <path 1> <path 2> ...' |
| 301 | print ' <clang tool> is the clang tool that should be run.' |
| 302 | print ' <compile db> is the directory that contains the compile database' |
| 303 | print ' <path 1> <path2> ... can be used to filter what files are edited' |
| 304 | return 1 |
| 305 | |
| 306 | clang_format_diff_path = os.path.join( |
| 307 | os.path.dirname(os.path.realpath(__file__)), |
| 308 | '../../../third_party/llvm/tools/clang/tools/clang-format', |
| 309 | 'clang-format-diff.py') |
| 310 | # TODO(dcheng): Allow this to be controlled with a flag as well. |
James Robinson | 6e9a1c9 | 2014-11-13 17:05:42 -0800 | [diff] [blame] | 311 | # TODO(dcheng): Shell escaping of args to git diff to clang-format is broken |
| 312 | # on Windows. |
| 313 | if not os.path.isfile(clang_format_diff_path) or sys.platform == 'win32': |
James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 314 | clang_format_diff_path = None |
| 315 | |
James Robinson | c4c1c59 | 2014-11-21 18:27:04 -0800 | [diff] [blame] | 316 | if len(argv) == 3 and argv[2] == '--all': |
James Robinson | 6a64b81 | 2014-12-03 13:38:42 -0800 | [diff] [blame] | 317 | filenames = set(_GetFilesFromCompileDB(argv[1])) |
Viet-Trung Luu | 235cf3d | 2015-06-11 10:01:25 -0700 | [diff] [blame] | 318 | source_filenames = filenames |
James Robinson | c4c1c59 | 2014-11-21 18:27:04 -0800 | [diff] [blame] | 319 | else: |
James Robinson | 6a64b81 | 2014-12-03 13:38:42 -0800 | [diff] [blame] | 320 | filenames = set(_GetFilesFromGit(argv[2:])) |
| 321 | # Filter out files that aren't C/C++/Obj-C/Obj-C++. |
| 322 | extensions = frozenset(('.c', '.cc', '.m', '.mm')) |
Viet-Trung Luu | 235cf3d | 2015-06-11 10:01:25 -0700 | [diff] [blame] | 323 | source_filenames = [f for f in filenames |
| 324 | if os.path.splitext(f)[1] in extensions] |
| 325 | dispatcher = _CompilerDispatcher(argv[0], argv[1], source_filenames) |
James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 326 | dispatcher.Run() |
| 327 | # Filter out edits to files that aren't in the git repository, since it's not |
| 328 | # useful to modify files that aren't under source control--typically, these |
| 329 | # are generated files or files in a git submodule that's not part of Chromium. |
| 330 | _ApplyEdits({k : v for k, v in dispatcher.edits.iteritems() |
James Robinson | 6e9a1c9 | 2014-11-13 17:05:42 -0800 | [diff] [blame] | 331 | if os.path.realpath(k) in filenames}, |
James Robinson | 646469d | 2014-10-03 15:33:28 -0700 | [diff] [blame] | 332 | clang_format_diff_path) |
| 333 | if dispatcher.failed_count != 0: |
| 334 | return 2 |
| 335 | return 0 |
| 336 | |
| 337 | |
| 338 | if __name__ == '__main__': |
| 339 | sys.exit(main(sys.argv[1:])) |