James Robinson | dffc411 | 2014-10-21 14:16:02 -0700 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 3 | # Use of this source code is governed by a BSD-style license that can be |
| 4 | # found in the LICENSE file. |
| 5 | |
| 6 | """Makes sure files have the right permissions. |
| 7 | |
| 8 | Some developers have broken SCM configurations that flip the executable |
| 9 | permission on for no good reason. Unix developers who run ls --color will then |
| 10 | see .cc files in green and get confused. |
| 11 | |
| 12 | - For file extensions that must be executable, add it to EXECUTABLE_EXTENSIONS. |
| 13 | - For file extensions that must not be executable, add it to |
| 14 | NOT_EXECUTABLE_EXTENSIONS. |
| 15 | - To ignore all the files inside a directory, add it to IGNORED_PATHS. |
| 16 | - For file base name with ambiguous state and that should not be checked for |
| 17 | shebang, add it to IGNORED_FILENAMES. |
| 18 | |
| 19 | Any file not matching the above will be opened and looked if it has a shebang |
| 20 | or an ELF header. If this does not match the executable bit on the file, the |
| 21 | file will be flagged. |
| 22 | |
| 23 | Note that all directory separators must be slashes (Unix-style) and not |
| 24 | backslashes. All directories should be relative to the source root and all |
| 25 | file paths should be only lowercase. |
| 26 | """ |
| 27 | |
| 28 | import json |
| 29 | import logging |
| 30 | import optparse |
| 31 | import os |
| 32 | import stat |
| 33 | import string |
| 34 | import subprocess |
| 35 | import sys |
| 36 | |
| 37 | #### USER EDITABLE SECTION STARTS HERE #### |
| 38 | |
| 39 | # Files with these extensions must have executable bit set. |
| 40 | # |
| 41 | # Case-sensitive. |
| 42 | EXECUTABLE_EXTENSIONS = ( |
| 43 | 'bat', |
| 44 | 'dll', |
| 45 | 'dylib', |
| 46 | 'exe', |
| 47 | ) |
| 48 | |
| 49 | # These files must have executable bit set. |
| 50 | # |
| 51 | # Case-insensitive, lower-case only. |
| 52 | EXECUTABLE_PATHS = ( |
| 53 | 'chrome/test/data/app_shim/app_shim_32_bit.app/contents/' |
| 54 | 'macos/app_mode_loader', |
| 55 | 'chrome/test/data/extensions/uitest/plugins/plugin.plugin/contents/' |
| 56 | 'macos/testnetscapeplugin', |
| 57 | 'chrome/test/data/extensions/uitest/plugins_private/plugin.plugin/contents/' |
| 58 | 'macos/testnetscapeplugin', |
| 59 | ) |
| 60 | |
| 61 | # These files must not have the executable bit set. This is mainly a performance |
| 62 | # optimization as these files are not checked for shebang. The list was |
| 63 | # partially generated from: |
| 64 | # git ls-files | grep "\\." | sed 's/.*\.//' | sort | uniq -c | sort -b -g |
| 65 | # |
| 66 | # Case-sensitive. |
| 67 | NON_EXECUTABLE_EXTENSIONS = ( |
| 68 | '1', |
| 69 | '3ds', |
| 70 | 'S', |
| 71 | 'am', |
| 72 | 'applescript', |
| 73 | 'asm', |
| 74 | 'c', |
| 75 | 'cc', |
| 76 | 'cfg', |
| 77 | 'chromium', |
| 78 | 'cpp', |
| 79 | 'crx', |
| 80 | 'cs', |
| 81 | 'css', |
| 82 | 'cur', |
| 83 | 'def', |
| 84 | 'der', |
| 85 | 'expected', |
| 86 | 'gif', |
| 87 | 'grd', |
| 88 | 'gyp', |
| 89 | 'gypi', |
| 90 | 'h', |
| 91 | 'hh', |
| 92 | 'htm', |
| 93 | 'html', |
| 94 | 'hyph', |
| 95 | 'ico', |
| 96 | 'idl', |
| 97 | 'java', |
| 98 | 'jpg', |
| 99 | 'js', |
| 100 | 'json', |
| 101 | 'm', |
| 102 | 'm4', |
| 103 | 'mm', |
| 104 | 'mms', |
| 105 | 'mock-http-headers', |
| 106 | 'nexe', |
| 107 | 'nmf', |
| 108 | 'onc', |
| 109 | 'pat', |
| 110 | 'patch', |
| 111 | 'pdf', |
| 112 | 'pem', |
| 113 | 'plist', |
| 114 | 'png', |
| 115 | 'proto', |
| 116 | 'rc', |
| 117 | 'rfx', |
| 118 | 'rgs', |
| 119 | 'rules', |
| 120 | 'spec', |
| 121 | 'sql', |
| 122 | 'srpc', |
| 123 | 'svg', |
| 124 | 'tcl', |
| 125 | 'test', |
| 126 | 'tga', |
| 127 | 'txt', |
| 128 | 'vcproj', |
| 129 | 'vsprops', |
| 130 | 'webm', |
| 131 | 'word', |
| 132 | 'xib', |
| 133 | 'xml', |
| 134 | 'xtb', |
| 135 | 'zip', |
| 136 | ) |
| 137 | |
| 138 | # These files must not have executable bit set. |
| 139 | # |
| 140 | # Case-insensitive, lower-case only. |
| 141 | NON_EXECUTABLE_PATHS = ( |
| 142 | 'build/android/tests/symbolize/liba.so', |
| 143 | 'build/android/tests/symbolize/libb.so', |
| 144 | 'chrome/installer/mac/sign_app.sh.in', |
| 145 | 'chrome/installer/mac/sign_versioned_dir.sh.in', |
| 146 | 'chrome/test/data/extensions/uitest/plugins/plugin32.so', |
| 147 | 'chrome/test/data/extensions/uitest/plugins/plugin64.so', |
| 148 | 'chrome/test/data/extensions/uitest/plugins_private/plugin32.so', |
| 149 | 'chrome/test/data/extensions/uitest/plugins_private/plugin64.so', |
| 150 | 'components/test/data/component_updater/ihfokbkgjpifnbbojhneepfflplebdkc/' |
| 151 | 'ihfokbkgjpifnbbojhneepfflplebdkc_1/a_changing_binary_file', |
| 152 | 'components/test/data/component_updater/ihfokbkgjpifnbbojhneepfflplebdkc/' |
| 153 | 'ihfokbkgjpifnbbojhneepfflplebdkc_2/a_changing_binary_file', |
| 154 | 'courgette/testdata/elf-32-1', |
| 155 | 'courgette/testdata/elf-32-2', |
| 156 | 'courgette/testdata/elf-64', |
| 157 | ) |
| 158 | |
| 159 | # File names that are always whitelisted. (These are mostly autoconf spew.) |
| 160 | # |
| 161 | # Case-sensitive. |
| 162 | IGNORED_FILENAMES = ( |
| 163 | 'config.guess', |
| 164 | 'config.sub', |
| 165 | 'configure', |
| 166 | 'depcomp', |
| 167 | 'install-sh', |
| 168 | 'missing', |
| 169 | 'mkinstalldirs', |
| 170 | 'naclsdk', |
| 171 | 'scons', |
| 172 | ) |
| 173 | |
| 174 | # File paths starting with one of these will be ignored as well. |
| 175 | # Please consider fixing your file permissions, rather than adding to this list. |
| 176 | # |
| 177 | # Case-insensitive, lower-case only. |
| 178 | IGNORED_PATHS = ( |
| 179 | 'native_client_sdk/src/build_tools/sdk_tools/third_party/fancy_urllib/' |
| 180 | '__init__.py', |
| 181 | 'out/', |
| 182 | # TODO(maruel): Fix these. |
| 183 | 'third_party/android_testrunner/', |
| 184 | 'third_party/bintrees/', |
| 185 | 'third_party/closure_linter/', |
| 186 | 'third_party/devscripts/licensecheck.pl.vanilla', |
| 187 | 'third_party/hyphen/', |
| 188 | 'third_party/jemalloc/', |
| 189 | 'third_party/lcov-1.9/contrib/galaxy/conglomerate_functions.pl', |
| 190 | 'third_party/lcov-1.9/contrib/galaxy/gen_makefile.sh', |
| 191 | 'third_party/lcov/contrib/galaxy/conglomerate_functions.pl', |
| 192 | 'third_party/lcov/contrib/galaxy/gen_makefile.sh', |
| 193 | 'third_party/libevent/autogen.sh', |
| 194 | 'third_party/libevent/test/test.sh', |
| 195 | 'third_party/libxml/linux/xml2-config', |
| 196 | 'third_party/libxml/src/ltmain.sh', |
| 197 | 'third_party/mesa/', |
| 198 | 'third_party/protobuf/', |
| 199 | 'third_party/python_gflags/gflags.py', |
| 200 | 'third_party/sqlite/', |
| 201 | 'third_party/talloc/script/mksyms.sh', |
| 202 | 'third_party/tcmalloc/', |
| 203 | 'third_party/tlslite/setup.py', |
| 204 | ) |
| 205 | |
| 206 | #### USER EDITABLE SECTION ENDS HERE #### |
| 207 | |
| 208 | assert set(EXECUTABLE_EXTENSIONS) & set(NON_EXECUTABLE_EXTENSIONS) == set() |
| 209 | assert set(EXECUTABLE_PATHS) & set(NON_EXECUTABLE_PATHS) == set() |
| 210 | |
| 211 | VALID_CHARS = set(string.ascii_lowercase + string.digits + '/-_.') |
| 212 | for paths in (EXECUTABLE_PATHS, NON_EXECUTABLE_PATHS, IGNORED_PATHS): |
| 213 | assert all([set(path).issubset(VALID_CHARS) for path in paths]) |
| 214 | |
| 215 | |
| 216 | def capture(cmd, cwd): |
| 217 | """Returns the output of a command. |
| 218 | |
| 219 | Ignores the error code or stderr. |
| 220 | """ |
| 221 | logging.debug('%s; cwd=%s' % (' '.join(cmd), cwd)) |
| 222 | env = os.environ.copy() |
| 223 | env['LANGUAGE'] = 'en_US.UTF-8' |
| 224 | p = subprocess.Popen( |
| 225 | cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=cwd, env=env) |
| 226 | return p.communicate()[0] |
| 227 | |
| 228 | |
| 229 | def get_git_root(dir_path): |
| 230 | """Returns the git checkout root or None.""" |
| 231 | root = capture(['git', 'rev-parse', '--show-toplevel'], dir_path).strip() |
| 232 | if root: |
| 233 | return root |
| 234 | |
| 235 | |
| 236 | def is_ignored(rel_path): |
| 237 | """Returns True if rel_path is in our whitelist of files to ignore.""" |
| 238 | rel_path = rel_path.lower() |
| 239 | return ( |
| 240 | os.path.basename(rel_path) in IGNORED_FILENAMES or |
| 241 | rel_path.lower().startswith(IGNORED_PATHS)) |
| 242 | |
| 243 | |
| 244 | def must_be_executable(rel_path): |
| 245 | """The file name represents a file type that must have the executable bit |
| 246 | set. |
| 247 | """ |
| 248 | return (os.path.splitext(rel_path)[1][1:] in EXECUTABLE_EXTENSIONS or |
| 249 | rel_path.lower() in EXECUTABLE_PATHS) |
| 250 | |
| 251 | |
| 252 | def must_not_be_executable(rel_path): |
| 253 | """The file name represents a file type that must not have the executable |
| 254 | bit set. |
| 255 | """ |
| 256 | return (os.path.splitext(rel_path)[1][1:] in NON_EXECUTABLE_EXTENSIONS or |
| 257 | rel_path.lower() in NON_EXECUTABLE_PATHS) |
| 258 | |
| 259 | |
| 260 | def has_executable_bit(full_path): |
| 261 | """Returns if any executable bit is set.""" |
| 262 | permission = stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH |
| 263 | return bool(permission & os.stat(full_path).st_mode) |
| 264 | |
| 265 | |
| 266 | def has_shebang_or_is_elf(full_path): |
| 267 | """Returns if the file starts with #!/ or is an ELF binary. |
| 268 | |
| 269 | full_path is the absolute path to the file. |
| 270 | """ |
| 271 | with open(full_path, 'rb') as f: |
| 272 | data = f.read(4) |
| 273 | return (data[:3] == '#!/' or data == '#! /', data == '\x7fELF') |
| 274 | |
| 275 | |
| 276 | def check_file(root_path, rel_path): |
| 277 | """Checks the permissions of the file whose path is root_path + rel_path and |
| 278 | returns an error if it is inconsistent. Returns None on success. |
| 279 | |
| 280 | It is assumed that the file is not ignored by is_ignored(). |
| 281 | |
| 282 | If the file name is matched with must_be_executable() or |
| 283 | must_not_be_executable(), only its executable bit is checked. |
| 284 | Otherwise, the first few bytes of the file are read to verify if it has a |
| 285 | shebang or ELF header and compares this with the executable bit on the file. |
| 286 | """ |
| 287 | full_path = os.path.join(root_path, rel_path) |
| 288 | def result_dict(error): |
| 289 | return { |
| 290 | 'error': error, |
| 291 | 'full_path': full_path, |
| 292 | 'rel_path': rel_path, |
| 293 | } |
| 294 | try: |
| 295 | bit = has_executable_bit(full_path) |
| 296 | except OSError: |
| 297 | # It's faster to catch exception than call os.path.islink(). Chromium |
| 298 | # tree happens to have invalid symlinks under |
| 299 | # third_party/openssl/openssl/test/. |
| 300 | return None |
| 301 | |
| 302 | if must_be_executable(rel_path): |
| 303 | if not bit: |
| 304 | return result_dict('Must have executable bit set') |
| 305 | return |
| 306 | if must_not_be_executable(rel_path): |
| 307 | if bit: |
| 308 | return result_dict('Must not have executable bit set') |
| 309 | return |
| 310 | |
| 311 | # For the others, it depends on the file header. |
| 312 | (shebang, elf) = has_shebang_or_is_elf(full_path) |
| 313 | if bit != (shebang or elf): |
| 314 | if bit: |
| 315 | return result_dict('Has executable bit but not shebang or ELF header') |
| 316 | if shebang: |
| 317 | return result_dict('Has shebang but not executable bit') |
| 318 | return result_dict('Has ELF header but not executable bit') |
| 319 | |
| 320 | |
| 321 | def check_files(root, files): |
| 322 | gen = (check_file(root, f) for f in files if not is_ignored(f)) |
| 323 | return filter(None, gen) |
| 324 | |
| 325 | |
| 326 | class ApiBase(object): |
| 327 | def __init__(self, root_dir, bare_output): |
| 328 | self.root_dir = root_dir |
| 329 | self.bare_output = bare_output |
| 330 | self.count = 0 |
| 331 | self.count_read_header = 0 |
| 332 | |
| 333 | def check_file(self, rel_path): |
| 334 | logging.debug('check_file(%s)' % rel_path) |
| 335 | self.count += 1 |
| 336 | |
| 337 | if (not must_be_executable(rel_path) and |
| 338 | not must_not_be_executable(rel_path)): |
| 339 | self.count_read_header += 1 |
| 340 | |
| 341 | return check_file(self.root_dir, rel_path) |
| 342 | |
| 343 | def check_dir(self, rel_path): |
| 344 | return self.check(rel_path) |
| 345 | |
| 346 | def check(self, start_dir): |
| 347 | """Check the files in start_dir, recursively check its subdirectories.""" |
| 348 | errors = [] |
| 349 | items = self.list_dir(start_dir) |
| 350 | logging.info('check(%s) -> %d' % (start_dir, len(items))) |
| 351 | for item in items: |
| 352 | full_path = os.path.join(self.root_dir, start_dir, item) |
| 353 | rel_path = full_path[len(self.root_dir) + 1:] |
| 354 | if is_ignored(rel_path): |
| 355 | continue |
| 356 | if os.path.isdir(full_path): |
| 357 | # Depth first. |
| 358 | errors.extend(self.check_dir(rel_path)) |
| 359 | else: |
| 360 | error = self.check_file(rel_path) |
| 361 | if error: |
| 362 | errors.append(error) |
| 363 | return errors |
| 364 | |
| 365 | def list_dir(self, start_dir): |
| 366 | """Lists all the files and directory inside start_dir.""" |
| 367 | return sorted( |
| 368 | x for x in os.listdir(os.path.join(self.root_dir, start_dir)) |
| 369 | if not x.startswith('.') |
| 370 | ) |
| 371 | |
| 372 | |
| 373 | class ApiAllFilesAtOnceBase(ApiBase): |
| 374 | _files = None |
| 375 | |
| 376 | def list_dir(self, start_dir): |
| 377 | """Lists all the files and directory inside start_dir.""" |
| 378 | if self._files is None: |
| 379 | self._files = sorted(self._get_all_files()) |
| 380 | if not self.bare_output: |
| 381 | print 'Found %s files' % len(self._files) |
| 382 | start_dir = start_dir[len(self.root_dir) + 1:] |
| 383 | return [ |
| 384 | x[len(start_dir):] for x in self._files if x.startswith(start_dir) |
| 385 | ] |
| 386 | |
| 387 | def _get_all_files(self): |
| 388 | """Lists all the files and directory inside self._root_dir.""" |
| 389 | raise NotImplementedError() |
| 390 | |
| 391 | |
| 392 | class ApiGit(ApiAllFilesAtOnceBase): |
| 393 | def _get_all_files(self): |
| 394 | return capture(['git', 'ls-files'], cwd=self.root_dir).splitlines() |
| 395 | |
| 396 | |
| 397 | def get_scm(dir_path, bare): |
| 398 | """Returns a properly configured ApiBase instance.""" |
| 399 | cwd = os.getcwd() |
| 400 | root = get_git_root(dir_path or cwd) |
| 401 | if root: |
| 402 | if not bare: |
| 403 | print('Found git repository at %s' % root) |
| 404 | return ApiGit(dir_path or root, bare) |
| 405 | |
| 406 | # Returns a non-scm aware checker. |
| 407 | if not bare: |
| 408 | print('Failed to determine the SCM for %s' % dir_path) |
| 409 | return ApiBase(dir_path or cwd, bare) |
| 410 | |
| 411 | |
| 412 | def main(): |
| 413 | usage = """Usage: python %prog [--root <root>] [tocheck] |
| 414 | tocheck Specifies the directory, relative to root, to check. This defaults |
| 415 | to "." so it checks everything. |
| 416 | |
| 417 | Examples: |
| 418 | python %prog |
| 419 | python %prog --root /path/to/source chrome""" |
| 420 | |
| 421 | parser = optparse.OptionParser(usage=usage) |
| 422 | parser.add_option( |
| 423 | '--root', |
| 424 | help='Specifies the repository root. This defaults ' |
| 425 | 'to the checkout repository root') |
| 426 | parser.add_option( |
| 427 | '-v', '--verbose', action='count', default=0, help='Print debug logging') |
| 428 | parser.add_option( |
| 429 | '--bare', |
| 430 | action='store_true', |
| 431 | default=False, |
| 432 | help='Prints the bare filename triggering the checks') |
| 433 | parser.add_option( |
| 434 | '--file', action='append', dest='files', |
| 435 | help='Specifics a list of files to check the permissions of. Only these ' |
| 436 | 'files will be checked') |
| 437 | parser.add_option('--json', help='Path to JSON output file') |
| 438 | options, args = parser.parse_args() |
| 439 | |
| 440 | levels = [logging.ERROR, logging.INFO, logging.DEBUG] |
| 441 | logging.basicConfig(level=levels[min(len(levels) - 1, options.verbose)]) |
| 442 | |
| 443 | if len(args) > 1: |
| 444 | parser.error('Too many arguments used') |
| 445 | |
| 446 | if options.root: |
| 447 | options.root = os.path.abspath(options.root) |
| 448 | |
| 449 | if options.files: |
| 450 | # --file implies --bare (for PRESUBMIT.py). |
| 451 | options.bare = True |
| 452 | |
| 453 | errors = check_files(options.root, options.files) |
| 454 | else: |
| 455 | api = get_scm(options.root, options.bare) |
| 456 | start_dir = args[0] if args else api.root_dir |
| 457 | errors = api.check(start_dir) |
| 458 | |
| 459 | if not options.bare: |
| 460 | print('Processed %s files, %d files where tested for shebang/ELF ' |
| 461 | 'header' % (api.count, api.count_read_header)) |
| 462 | |
| 463 | if options.json: |
| 464 | with open(options.json, 'w') as f: |
| 465 | json.dump(errors, f) |
| 466 | |
| 467 | if errors: |
| 468 | if options.bare: |
| 469 | print '\n'.join(e['full_path'] for e in errors) |
| 470 | else: |
| 471 | print '\nFAILED\n' |
| 472 | print '\n'.join('%s: %s' % (e['full_path'], e['error']) for e in errors) |
| 473 | return 1 |
| 474 | if not options.bare: |
| 475 | print '\nSUCCESS\n' |
| 476 | return 0 |
| 477 | |
| 478 | |
| 479 | if '__main__' == __name__: |
| 480 | sys.exit(main()) |