Blame - tools/clang/scripts/run_tool.py - mojo

blob: 56cd5d24f8be2f400190136d9705972f9fb1e6c4 [file] [log] [blame]

James Robinson	646469d	2014-10-03 15:33:28 -0700	[diff] [blame]	1	#!/usr/bin/env python
				2	# Copyright (c) 2013 The Chromium Authors. All rights reserved.
				3	# Use of this source code is governed by a BSD-style license that can be
				4	# found in the LICENSE file.
				5
				6	"""Wrapper script to help run clang tools across Chromium code.
				7
				8	How to use this tool:
				9	If you want to run the tool across all Chromium code:
				10	run_tool.py <tool> <path/to/compiledb>
				11
James Robinson	c4c1c59	2014-11-21 18:27:04 -0800	[diff] [blame]	12	If you want to include all files mentioned in the compilation database:
				13	run_tool.py <tool> <path/to/compiledb> --all
				14
James Robinson	646469d	2014-10-03 15:33:28 -0700	[diff] [blame]	15	If you only want to run the tool across just chrome/browser and content/browser:
				16	run_tool.py <tool> <path/to/compiledb> chrome/browser content/browser
				17
				18	Please see https://code.google.com/p/chromium/wiki/ClangToolRefactoring for more
				19	information, which documents the entire automated refactoring flow in Chromium.
				20
				21	Why use this tool:
				22	The clang tool implementation doesn't take advantage of multiple cores, and if
				23	it fails mysteriously in the middle, all the generated replacements will be
				24	lost.
				25
				26	Unfortunately, if the work is simply sharded across multiple cores by running
				27	multiple RefactoringTools, problems arise when they attempt to rewrite a file at
				28	the same time. To work around that, clang tools that are run using this tool
				29	should output edits to stdout in the following format:
				30
				31	==== BEGIN EDITS ====
				32	r:<file path>:<offset>:<length>:<replacement text>
				33	r:<file path>:<offset>:<length>:<replacement text>
				34	...etc...
				35	==== END EDITS ====
				36
				37	Any generated edits are applied once the clang tool has finished running
				38	across Chromium, regardless of whether some instances failed or not.
				39	"""
				40
				41	import collections
				42	import functools
James Robinson	c4c1c59	2014-11-21 18:27:04 -0800	[diff] [blame]	43	import json
James Robinson	646469d	2014-10-03 15:33:28 -0700	[diff] [blame]	44	import multiprocessing
				45	import os.path
				46	import pipes
				47	import subprocess
				48	import sys
				49
				50
				51	Edit = collections.namedtuple(
				52	'Edit', ('edit_type', 'offset', 'length', 'replacement'))
				53
				54
				55	def _GetFilesFromGit(paths = None):
				56	"""Gets the list of files in the git repository.
				57
				58	Args:
				59	paths: Prefix filter for the returned paths. May contain multiple entries.
				60	"""
James Robinson	6e9a1c9	2014-11-13 17:05:42 -0800	[diff] [blame]	61	args = []
				62	if sys.platform == 'win32':
				63	args.append('git.bat')
				64	else:
				65	args.append('git')
				66	args.append('ls-files')
James Robinson	646469d	2014-10-03 15:33:28 -0700	[diff] [blame]	67	if paths:
				68	args.extend(paths)
				69	command = subprocess.Popen(args, stdout=subprocess.PIPE)
				70	output, _ = command.communicate()
James Robinson	6e9a1c9	2014-11-13 17:05:42 -0800	[diff] [blame]	71	return [os.path.realpath(p) for p in output.splitlines()]
James Robinson	646469d	2014-10-03 15:33:28 -0700	[diff] [blame]	72
				73
James Robinson	c4c1c59	2014-11-21 18:27:04 -0800	[diff] [blame]	74	def _GetFilesFromCompileDB(build_directory):
				75	""" Gets the list of files mentioned in the compilation database.
				76
				77	Args:
				78	build_directory: Directory that contains the compile database.
				79	"""
				80	compiledb_path = os.path.join(build_directory, 'compile_commands.json')
				81	with open(compiledb_path, 'rb') as compiledb_file:
				82	json_commands = json.load(compiledb_file)
				83
				84	return [os.path.join(entry['directory'], entry['file'])
				85	for entry in json_commands]
				86
				87
James Robinson	646469d	2014-10-03 15:33:28 -0700	[diff] [blame]	88	def _ExtractEditsFromStdout(build_directory, stdout):
				89	"""Extracts generated list of edits from the tool's stdout.
				90
				91	The expected format is documented at the top of this file.
				92
				93	Args:
				94	build_directory: Directory that contains the compile database. Used to
				95	normalize the filenames.
				96	stdout: The stdout from running the clang tool.
				97
				98	Returns:
				99	A dictionary mapping filenames to the associated edits.
				100	"""
				101	lines = stdout.splitlines()
				102	start_index = lines.index('==== BEGIN EDITS ====')
				103	end_index = lines.index('==== END EDITS ====')
				104	edits = collections.defaultdict(list)
				105	for line in lines[start_index + 1:end_index]:
				106	try:
James Robinson	6e9a1c9	2014-11-13 17:05:42 -0800	[diff] [blame]	107	edit_type, path, offset, length, replacement = line.split(':::', 4)
James Robinson	646469d	2014-10-03 15:33:28 -0700	[diff] [blame]	108	replacement = replacement.replace("\0", "\n");
James Robinson	6e9a1c9	2014-11-13 17:05:42 -0800	[diff] [blame]	109	# Normalize the file path emitted by the clang tool.
				110	path = os.path.realpath(os.path.join(build_directory, path))
James Robinson	646469d	2014-10-03 15:33:28 -0700	[diff] [blame]	111	edits[path].append(Edit(edit_type, int(offset), int(length), replacement))
				112	except ValueError:
				113	print 'Unable to parse edit: %s' % line
				114	return edits
				115
				116
				117	def _ExecuteTool(toolname, build_directory, filename):
				118	"""Executes the tool.
				119
				120	This is defined outside the class so it can be pickled for the multiprocessing
				121	module.
				122
				123	Args:
				124	toolname: Path to the tool to execute.
				125	build_directory: Directory that contains the compile database.
				126	filename: The file to run the tool over.
				127
				128	Returns:
				129	A dictionary that must contain the key "status" and a boolean value
				130	associated with it.
				131
				132	If status is True, then the generated edits are stored with the key "edits"
				133	in the dictionary.
				134
				135	Otherwise, the filename and the output from stderr are associated with the
				136	keys "filename" and "stderr" respectively.
				137	"""
				138	command = subprocess.Popen((toolname, '-p', build_directory, filename),
				139	stdout=subprocess.PIPE,
				140	stderr=subprocess.PIPE)
				141	stdout, stderr = command.communicate()
				142	if command.returncode != 0:
				143	return {'status': False, 'filename': filename, 'stderr': stderr}
				144	else:
				145	return {'status': True,
				146	'edits': _ExtractEditsFromStdout(build_directory, stdout)}
				147
				148
				149	class _CompilerDispatcher(object):
				150	"""Multiprocessing controller for running clang tools in parallel."""
				151
				152	def __init__(self, toolname, build_directory, filenames):
				153	"""Initializer method.
				154
				155	Args:
				156	toolname: Path to the tool to execute.
				157	build_directory: Directory that contains the compile database.
				158	filenames: The files to run the tool over.
				159	"""
				160	self.__toolname = toolname
				161	self.__build_directory = build_directory
				162	self.__filenames = filenames
				163	self.__success_count = 0
				164	self.__failed_count = 0
James Robinson	6e9a1c9	2014-11-13 17:05:42 -0800	[diff] [blame]	165	self.__edit_count = 0
James Robinson	646469d	2014-10-03 15:33:28 -0700	[diff] [blame]	166	self.__edits = collections.defaultdict(list)
				167
				168	@property
				169	def edits(self):
				170	return self.__edits
				171
				172	@property
				173	def failed_count(self):
				174	return self.__failed_count
				175
				176	def Run(self):
				177	"""Does the grunt work."""
				178	pool = multiprocessing.Pool()
				179	result_iterator = pool.imap_unordered(
				180	functools.partial(_ExecuteTool, self.__toolname,
				181	self.__build_directory),
				182	self.__filenames)
				183	for result in result_iterator:
				184	self.__ProcessResult(result)
				185	sys.stdout.write('\n')
				186	sys.stdout.flush()
				187
				188	def __ProcessResult(self, result):
				189	"""Handles result processing.
				190
				191	Args:
				192	result: The result dictionary returned by _ExecuteTool.
				193	"""
				194	if result['status']:
				195	self.__success_count += 1
				196	for k, v in result['edits'].iteritems():
				197	self.__edits[k].extend(v)
James Robinson	6e9a1c9	2014-11-13 17:05:42 -0800	[diff] [blame]	198	self.__edit_count += len(v)
James Robinson	646469d	2014-10-03 15:33:28 -0700	[diff] [blame]	199	else:
				200	self.__failed_count += 1
				201	sys.stdout.write('\nFailed to process %s\n' % result['filename'])
				202	sys.stdout.write(result['stderr'])
				203	sys.stdout.write('\n')
				204	percentage = (
				205	float(self.__success_count + self.__failed_count) /
				206	len(self.__filenames)) * 100
James Robinson	6e9a1c9	2014-11-13 17:05:42 -0800	[diff] [blame]	207	sys.stdout.write('Succeeded: %d, Failed: %d, Edits: %d [%.2f%%]\r' % (
				208	self.__success_count, self.__failed_count, self.__edit_count,
				209	percentage))
James Robinson	646469d	2014-10-03 15:33:28 -0700	[diff] [blame]	210	sys.stdout.flush()
				211
				212
				213	def _ApplyEdits(edits, clang_format_diff_path):
				214	"""Apply the generated edits.
				215
				216	Args:
				217	edits: A dict mapping filenames to Edit instances that apply to that file.
				218	clang_format_diff_path: Path to the clang-format-diff.py helper to help
				219	automatically reformat diffs to avoid style violations. Pass None if the
				220	clang-format step should be skipped.
				221	"""
				222	edit_count = 0
				223	for k, v in edits.iteritems():
				224	# Sort the edits and iterate through them in reverse order. Sorting allows
				225	# duplicate edits to be quickly skipped, while reversing means that
				226	# subsequent edits don't need to have their offsets updated with each edit
				227	# applied.
				228	v.sort()
				229	last_edit = None
				230	with open(k, 'rb+') as f:
				231	contents = bytearray(f.read())
				232	for edit in reversed(v):
				233	if edit == last_edit:
				234	continue
				235	last_edit = edit
				236	contents[edit.offset:edit.offset + edit.length] = edit.replacement
				237	if not edit.replacement:
				238	_ExtendDeletionIfElementIsInList(contents, edit.offset)
				239	edit_count += 1
				240	f.seek(0)
				241	f.truncate()
				242	f.write(contents)
				243	if clang_format_diff_path:
				244	# TODO(dcheng): python3.3 exposes this publicly as shlex.quote, but Chrome
				245	# uses python2.7. Use the deprecated interface until Chrome uses a newer
				246	# Python.
				247	if subprocess.call('git diff -U0 %s \| python %s -i -p1 -style=file ' % (
				248	pipes.quote(k), clang_format_diff_path), shell=True) != 0:
				249	print 'clang-format failed for %s' % k
				250	print 'Applied %d edits to %d files' % (edit_count, len(edits))
				251
				252
				253	_WHITESPACE_BYTES = frozenset((ord('\t'), ord('\n'), ord('\r'), ord(' ')))
				254
				255
				256	def _ExtendDeletionIfElementIsInList(contents, offset):
				257	"""Extends the range of a deletion if the deleted element was part of a list.
				258
				259	This rewriter helper makes it easy for refactoring tools to remove elements
				260	from a list. Even if a matcher callback knows that it is removing an element
				261	from a list, it may not have enough information to accurately remove the list
				262	element; for example, another matcher callback may end up removing an adjacent
				263	list element, or all the list elements may end up being removed.
				264
				265	With this helper, refactoring tools can simply remove the list element and not
				266	worry about having to include the comma in the replacement.
				267
				268	Args:
				269	contents: A bytearray with the deletion already applied.
				270	offset: The offset in the bytearray where the deleted range used to be.
				271	"""
				272	char_before = char_after = None
				273	left_trim_count = 0
				274	for byte in reversed(contents[:offset]):
				275	left_trim_count += 1
				276	if byte in _WHITESPACE_BYTES:
				277	continue
				278	if byte in (ord(','), ord(':'), ord('('), ord('{')):
				279	char_before = chr(byte)
				280	break
				281
				282	right_trim_count = 0
				283	for byte in contents[offset:]:
				284	right_trim_count += 1
				285	if byte in _WHITESPACE_BYTES:
				286	continue
				287	if byte == ord(','):
				288	char_after = chr(byte)
				289	break
				290
				291	if char_before:
				292	if char_after:
				293	del contents[offset:offset + right_trim_count]
				294	elif char_before in (',', ':'):
				295	del contents[offset - left_trim_count:offset]
				296
				297
				298	def main(argv):
				299	if len(argv) < 2:
				300	print 'Usage: run_tool.py <clang tool> <compile DB> <path 1> <path 2> ...'
				301	print ' <clang tool> is the clang tool that should be run.'
				302	print ' <compile db> is the directory that contains the compile database'
				303	print ' <path 1> <path2> ... can be used to filter what files are edited'
				304	return 1
				305
				306	clang_format_diff_path = os.path.join(
				307	os.path.dirname(os.path.realpath(__file__)),
				308	'../../../third_party/llvm/tools/clang/tools/clang-format',
				309	'clang-format-diff.py')
				310	# TODO(dcheng): Allow this to be controlled with a flag as well.
James Robinson	6e9a1c9	2014-11-13 17:05:42 -0800	[diff] [blame]	311	# TODO(dcheng): Shell escaping of args to git diff to clang-format is broken
				312	# on Windows.
				313	if not os.path.isfile(clang_format_diff_path) or sys.platform == 'win32':
James Robinson	646469d	2014-10-03 15:33:28 -0700	[diff] [blame]	314	clang_format_diff_path = None
				315
James Robinson	c4c1c59	2014-11-21 18:27:04 -0800	[diff] [blame]	316	if len(argv) == 3 and argv[2] == '--all':
James Robinson	6a64b81	2014-12-03 13:38:42 -0800	[diff] [blame]	317	filenames = set(_GetFilesFromCompileDB(argv[1]))
Viet-Trung Luu	235cf3d	2015-06-11 10:01:25 -0700	[diff] [blame]	318	source_filenames = filenames
James Robinson	c4c1c59	2014-11-21 18:27:04 -0800	[diff] [blame]	319	else:
James Robinson	6a64b81	2014-12-03 13:38:42 -0800	[diff] [blame]	320	filenames = set(_GetFilesFromGit(argv[2:]))
				321	# Filter out files that aren't C/C++/Obj-C/Obj-C++.
				322	extensions = frozenset(('.c', '.cc', '.m', '.mm'))
Viet-Trung Luu	235cf3d	2015-06-11 10:01:25 -0700	[diff] [blame]	323	source_filenames = [f for f in filenames
				324	if os.path.splitext(f)[1] in extensions]
				325	dispatcher = _CompilerDispatcher(argv[0], argv[1], source_filenames)
James Robinson	646469d	2014-10-03 15:33:28 -0700	[diff] [blame]	326	dispatcher.Run()
				327	# Filter out edits to files that aren't in the git repository, since it's not
				328	# useful to modify files that aren't under source control--typically, these
				329	# are generated files or files in a git submodule that's not part of Chromium.
				330	_ApplyEdits({k : v for k, v in dispatcher.edits.iteritems()
James Robinson	6e9a1c9	2014-11-13 17:05:42 -0800	[diff] [blame]	331	if os.path.realpath(k) in filenames},
James Robinson	646469d	2014-10-03 15:33:28 -0700	[diff] [blame]	332	clang_format_diff_path)
				333	if dispatcher.failed_count != 0:
				334	return 2
				335	return 0
				336
				337
				338	if __name__ == '__main__':
				339	sys.exit(main(sys.argv[1:]))