blob: 56cd5d24f8be2f400190136d9705972f9fb1e6c4 [file] [log] [blame]
James Robinson646469d2014-10-03 15:33:28 -07001#!/usr/bin/env python
2# Copyright (c) 2013 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Wrapper script to help run clang tools across Chromium code.
7
8How to use this tool:
9If you want to run the tool across all Chromium code:
10run_tool.py <tool> <path/to/compiledb>
11
James Robinsonc4c1c592014-11-21 18:27:04 -080012If you want to include all files mentioned in the compilation database:
13run_tool.py <tool> <path/to/compiledb> --all
14
James Robinson646469d2014-10-03 15:33:28 -070015If you only want to run the tool across just chrome/browser and content/browser:
16run_tool.py <tool> <path/to/compiledb> chrome/browser content/browser
17
18Please see https://code.google.com/p/chromium/wiki/ClangToolRefactoring for more
19information, which documents the entire automated refactoring flow in Chromium.
20
21Why use this tool:
22The clang tool implementation doesn't take advantage of multiple cores, and if
23it fails mysteriously in the middle, all the generated replacements will be
24lost.
25
26Unfortunately, if the work is simply sharded across multiple cores by running
27multiple RefactoringTools, problems arise when they attempt to rewrite a file at
28the same time. To work around that, clang tools that are run using this tool
29should output edits to stdout in the following format:
30
31==== BEGIN EDITS ====
32r:<file path>:<offset>:<length>:<replacement text>
33r:<file path>:<offset>:<length>:<replacement text>
34...etc...
35==== END EDITS ====
36
37Any generated edits are applied once the clang tool has finished running
38across Chromium, regardless of whether some instances failed or not.
39"""
40
41import collections
42import functools
James Robinsonc4c1c592014-11-21 18:27:04 -080043import json
James Robinson646469d2014-10-03 15:33:28 -070044import multiprocessing
45import os.path
46import pipes
47import subprocess
48import sys
49
50
51Edit = collections.namedtuple(
52 'Edit', ('edit_type', 'offset', 'length', 'replacement'))
53
54
55def _GetFilesFromGit(paths = None):
56 """Gets the list of files in the git repository.
57
58 Args:
59 paths: Prefix filter for the returned paths. May contain multiple entries.
60 """
James Robinson6e9a1c92014-11-13 17:05:42 -080061 args = []
62 if sys.platform == 'win32':
63 args.append('git.bat')
64 else:
65 args.append('git')
66 args.append('ls-files')
James Robinson646469d2014-10-03 15:33:28 -070067 if paths:
68 args.extend(paths)
69 command = subprocess.Popen(args, stdout=subprocess.PIPE)
70 output, _ = command.communicate()
James Robinson6e9a1c92014-11-13 17:05:42 -080071 return [os.path.realpath(p) for p in output.splitlines()]
James Robinson646469d2014-10-03 15:33:28 -070072
73
James Robinsonc4c1c592014-11-21 18:27:04 -080074def _GetFilesFromCompileDB(build_directory):
75 """ Gets the list of files mentioned in the compilation database.
76
77 Args:
78 build_directory: Directory that contains the compile database.
79 """
80 compiledb_path = os.path.join(build_directory, 'compile_commands.json')
81 with open(compiledb_path, 'rb') as compiledb_file:
82 json_commands = json.load(compiledb_file)
83
84 return [os.path.join(entry['directory'], entry['file'])
85 for entry in json_commands]
86
87
James Robinson646469d2014-10-03 15:33:28 -070088def _ExtractEditsFromStdout(build_directory, stdout):
89 """Extracts generated list of edits from the tool's stdout.
90
91 The expected format is documented at the top of this file.
92
93 Args:
94 build_directory: Directory that contains the compile database. Used to
95 normalize the filenames.
96 stdout: The stdout from running the clang tool.
97
98 Returns:
99 A dictionary mapping filenames to the associated edits.
100 """
101 lines = stdout.splitlines()
102 start_index = lines.index('==== BEGIN EDITS ====')
103 end_index = lines.index('==== END EDITS ====')
104 edits = collections.defaultdict(list)
105 for line in lines[start_index + 1:end_index]:
106 try:
James Robinson6e9a1c92014-11-13 17:05:42 -0800107 edit_type, path, offset, length, replacement = line.split(':::', 4)
James Robinson646469d2014-10-03 15:33:28 -0700108 replacement = replacement.replace("\0", "\n");
James Robinson6e9a1c92014-11-13 17:05:42 -0800109 # Normalize the file path emitted by the clang tool.
110 path = os.path.realpath(os.path.join(build_directory, path))
James Robinson646469d2014-10-03 15:33:28 -0700111 edits[path].append(Edit(edit_type, int(offset), int(length), replacement))
112 except ValueError:
113 print 'Unable to parse edit: %s' % line
114 return edits
115
116
117def _ExecuteTool(toolname, build_directory, filename):
118 """Executes the tool.
119
120 This is defined outside the class so it can be pickled for the multiprocessing
121 module.
122
123 Args:
124 toolname: Path to the tool to execute.
125 build_directory: Directory that contains the compile database.
126 filename: The file to run the tool over.
127
128 Returns:
129 A dictionary that must contain the key "status" and a boolean value
130 associated with it.
131
132 If status is True, then the generated edits are stored with the key "edits"
133 in the dictionary.
134
135 Otherwise, the filename and the output from stderr are associated with the
136 keys "filename" and "stderr" respectively.
137 """
138 command = subprocess.Popen((toolname, '-p', build_directory, filename),
139 stdout=subprocess.PIPE,
140 stderr=subprocess.PIPE)
141 stdout, stderr = command.communicate()
142 if command.returncode != 0:
143 return {'status': False, 'filename': filename, 'stderr': stderr}
144 else:
145 return {'status': True,
146 'edits': _ExtractEditsFromStdout(build_directory, stdout)}
147
148
149class _CompilerDispatcher(object):
150 """Multiprocessing controller for running clang tools in parallel."""
151
152 def __init__(self, toolname, build_directory, filenames):
153 """Initializer method.
154
155 Args:
156 toolname: Path to the tool to execute.
157 build_directory: Directory that contains the compile database.
158 filenames: The files to run the tool over.
159 """
160 self.__toolname = toolname
161 self.__build_directory = build_directory
162 self.__filenames = filenames
163 self.__success_count = 0
164 self.__failed_count = 0
James Robinson6e9a1c92014-11-13 17:05:42 -0800165 self.__edit_count = 0
James Robinson646469d2014-10-03 15:33:28 -0700166 self.__edits = collections.defaultdict(list)
167
168 @property
169 def edits(self):
170 return self.__edits
171
172 @property
173 def failed_count(self):
174 return self.__failed_count
175
176 def Run(self):
177 """Does the grunt work."""
178 pool = multiprocessing.Pool()
179 result_iterator = pool.imap_unordered(
180 functools.partial(_ExecuteTool, self.__toolname,
181 self.__build_directory),
182 self.__filenames)
183 for result in result_iterator:
184 self.__ProcessResult(result)
185 sys.stdout.write('\n')
186 sys.stdout.flush()
187
188 def __ProcessResult(self, result):
189 """Handles result processing.
190
191 Args:
192 result: The result dictionary returned by _ExecuteTool.
193 """
194 if result['status']:
195 self.__success_count += 1
196 for k, v in result['edits'].iteritems():
197 self.__edits[k].extend(v)
James Robinson6e9a1c92014-11-13 17:05:42 -0800198 self.__edit_count += len(v)
James Robinson646469d2014-10-03 15:33:28 -0700199 else:
200 self.__failed_count += 1
201 sys.stdout.write('\nFailed to process %s\n' % result['filename'])
202 sys.stdout.write(result['stderr'])
203 sys.stdout.write('\n')
204 percentage = (
205 float(self.__success_count + self.__failed_count) /
206 len(self.__filenames)) * 100
James Robinson6e9a1c92014-11-13 17:05:42 -0800207 sys.stdout.write('Succeeded: %d, Failed: %d, Edits: %d [%.2f%%]\r' % (
208 self.__success_count, self.__failed_count, self.__edit_count,
209 percentage))
James Robinson646469d2014-10-03 15:33:28 -0700210 sys.stdout.flush()
211
212
213def _ApplyEdits(edits, clang_format_diff_path):
214 """Apply the generated edits.
215
216 Args:
217 edits: A dict mapping filenames to Edit instances that apply to that file.
218 clang_format_diff_path: Path to the clang-format-diff.py helper to help
219 automatically reformat diffs to avoid style violations. Pass None if the
220 clang-format step should be skipped.
221 """
222 edit_count = 0
223 for k, v in edits.iteritems():
224 # Sort the edits and iterate through them in reverse order. Sorting allows
225 # duplicate edits to be quickly skipped, while reversing means that
226 # subsequent edits don't need to have their offsets updated with each edit
227 # applied.
228 v.sort()
229 last_edit = None
230 with open(k, 'rb+') as f:
231 contents = bytearray(f.read())
232 for edit in reversed(v):
233 if edit == last_edit:
234 continue
235 last_edit = edit
236 contents[edit.offset:edit.offset + edit.length] = edit.replacement
237 if not edit.replacement:
238 _ExtendDeletionIfElementIsInList(contents, edit.offset)
239 edit_count += 1
240 f.seek(0)
241 f.truncate()
242 f.write(contents)
243 if clang_format_diff_path:
244 # TODO(dcheng): python3.3 exposes this publicly as shlex.quote, but Chrome
245 # uses python2.7. Use the deprecated interface until Chrome uses a newer
246 # Python.
247 if subprocess.call('git diff -U0 %s | python %s -i -p1 -style=file ' % (
248 pipes.quote(k), clang_format_diff_path), shell=True) != 0:
249 print 'clang-format failed for %s' % k
250 print 'Applied %d edits to %d files' % (edit_count, len(edits))
251
252
253_WHITESPACE_BYTES = frozenset((ord('\t'), ord('\n'), ord('\r'), ord(' ')))
254
255
256def _ExtendDeletionIfElementIsInList(contents, offset):
257 """Extends the range of a deletion if the deleted element was part of a list.
258
259 This rewriter helper makes it easy for refactoring tools to remove elements
260 from a list. Even if a matcher callback knows that it is removing an element
261 from a list, it may not have enough information to accurately remove the list
262 element; for example, another matcher callback may end up removing an adjacent
263 list element, or all the list elements may end up being removed.
264
265 With this helper, refactoring tools can simply remove the list element and not
266 worry about having to include the comma in the replacement.
267
268 Args:
269 contents: A bytearray with the deletion already applied.
270 offset: The offset in the bytearray where the deleted range used to be.
271 """
272 char_before = char_after = None
273 left_trim_count = 0
274 for byte in reversed(contents[:offset]):
275 left_trim_count += 1
276 if byte in _WHITESPACE_BYTES:
277 continue
278 if byte in (ord(','), ord(':'), ord('('), ord('{')):
279 char_before = chr(byte)
280 break
281
282 right_trim_count = 0
283 for byte in contents[offset:]:
284 right_trim_count += 1
285 if byte in _WHITESPACE_BYTES:
286 continue
287 if byte == ord(','):
288 char_after = chr(byte)
289 break
290
291 if char_before:
292 if char_after:
293 del contents[offset:offset + right_trim_count]
294 elif char_before in (',', ':'):
295 del contents[offset - left_trim_count:offset]
296
297
298def main(argv):
299 if len(argv) < 2:
300 print 'Usage: run_tool.py <clang tool> <compile DB> <path 1> <path 2> ...'
301 print ' <clang tool> is the clang tool that should be run.'
302 print ' <compile db> is the directory that contains the compile database'
303 print ' <path 1> <path2> ... can be used to filter what files are edited'
304 return 1
305
306 clang_format_diff_path = os.path.join(
307 os.path.dirname(os.path.realpath(__file__)),
308 '../../../third_party/llvm/tools/clang/tools/clang-format',
309 'clang-format-diff.py')
310 # TODO(dcheng): Allow this to be controlled with a flag as well.
James Robinson6e9a1c92014-11-13 17:05:42 -0800311 # TODO(dcheng): Shell escaping of args to git diff to clang-format is broken
312 # on Windows.
313 if not os.path.isfile(clang_format_diff_path) or sys.platform == 'win32':
James Robinson646469d2014-10-03 15:33:28 -0700314 clang_format_diff_path = None
315
James Robinsonc4c1c592014-11-21 18:27:04 -0800316 if len(argv) == 3 and argv[2] == '--all':
James Robinson6a64b812014-12-03 13:38:42 -0800317 filenames = set(_GetFilesFromCompileDB(argv[1]))
Viet-Trung Luu235cf3d2015-06-11 10:01:25 -0700318 source_filenames = filenames
James Robinsonc4c1c592014-11-21 18:27:04 -0800319 else:
James Robinson6a64b812014-12-03 13:38:42 -0800320 filenames = set(_GetFilesFromGit(argv[2:]))
321 # Filter out files that aren't C/C++/Obj-C/Obj-C++.
322 extensions = frozenset(('.c', '.cc', '.m', '.mm'))
Viet-Trung Luu235cf3d2015-06-11 10:01:25 -0700323 source_filenames = [f for f in filenames
324 if os.path.splitext(f)[1] in extensions]
325 dispatcher = _CompilerDispatcher(argv[0], argv[1], source_filenames)
James Robinson646469d2014-10-03 15:33:28 -0700326 dispatcher.Run()
327 # Filter out edits to files that aren't in the git repository, since it's not
328 # useful to modify files that aren't under source control--typically, these
329 # are generated files or files in a git submodule that's not part of Chromium.
330 _ApplyEdits({k : v for k, v in dispatcher.edits.iteritems()
James Robinson6e9a1c92014-11-13 17:05:42 -0800331 if os.path.realpath(k) in filenames},
James Robinson646469d2014-10-03 15:33:28 -0700332 clang_format_diff_path)
333 if dispatcher.failed_count != 0:
334 return 2
335 return 0
336
337
338if __name__ == '__main__':
339 sys.exit(main(sys.argv[1:]))