
Initial conversion performed using '2to3 -f print .'. Imports added and duplicate parentheses removed manually. Manually converted files, comments and inline code that 2to3 missed. Presubmit disabled due to an unrelated error in find_runtime_symbols/find_runtime_symbols.py. Afterwards ran "git cl format --python" and cherry-picked the formatting changes. There are no intended behavioural changes. NOPRESUBMIT=true Bug: 941669 Change-Id: I3174ed0eb7005d493c6bc44751353b8fae18a1f1 Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/1818478 Commit-Queue: Raul Tambre <raul@tambre.ee> Reviewed-by: Nico Weber <thakis@chromium.org> Auto-Submit: Raul Tambre <raul@tambre.ee> Cr-Commit-Position: refs/heads/master@{#699202}
357 lines
12 KiB
Python
Executable File
357 lines
12 KiB
Python
Executable File
#!/usr/bin/env python
|
|
# Copyright 2018 The Chromium Authors. All rights reserved.
|
|
# Use of this source code is governed by a BSD-style license that can be
|
|
# found in the LICENSE file.
|
|
|
|
from __future__ import print_function
|
|
|
|
import argparse
|
|
import subprocess
|
|
import pickle
|
|
import os
|
|
from os import path
|
|
from datetime import date, timedelta
|
|
from collections import namedtuple, defaultdict, Counter
|
|
|
|
Commit = namedtuple('Commit', ['hash', 'author', 'commit_date', 'dirs'])
|
|
|
|
# dict mapping each subdirectory and author to the number of their commits and
|
|
# modifications in that directory
|
|
DIRECTORY_AUTHORS = defaultdict(dict)
|
|
|
|
# cache for directory owners for memoisation purposes
|
|
OWNERS_CACHE = {}
|
|
|
|
# filename for pickle cache
|
|
CACHE_FILENAME = 'suggest_owners.cache'
|
|
|
|
def _RunGitCommand(options, cmd_args):
|
|
repo_path = path.join(options.repo_path, '.git')
|
|
cmd = ['git', '--git-dir', repo_path] + cmd_args
|
|
print('>', ' '.join(cmd))
|
|
return subprocess.check_output(cmd)
|
|
|
|
|
|
def _ValidAuthor(author):
|
|
return author.find('@chromium.org') > -1 and author.find('roller') == -1
|
|
|
|
|
|
# Returns additions/deletions by a commit to a directory (and its descendants).
|
|
def getEditsForDirectory(commit, directory):
|
|
additions = deletions = 0
|
|
for commit_directory, (directory_additions, directory_deletions) \
|
|
in commit.dirs.items():
|
|
# check if commit_directory is same as or a descendant of directory
|
|
if isSubDirectory(directory, commit_directory):
|
|
additions += directory_additions
|
|
deletions += directory_deletions
|
|
return additions, deletions
|
|
|
|
|
|
# This propagates a commit touching a directory to also be touching all
|
|
# ancesstor directories.
|
|
def _PropagateCommit(options, commit):
|
|
touched_dirs = set()
|
|
# first get all the touched dirs and their ancestors
|
|
for directory in commit.dirs.iterkeys():
|
|
while directory != '':
|
|
touched_dirs.add(directory)
|
|
# get the parent directory
|
|
directory = path.dirname(directory)
|
|
# loop over them and calculate the edits per directory
|
|
for directory in touched_dirs:
|
|
author_commits, author_additions, author_deletions = \
|
|
DIRECTORY_AUTHORS[directory].get(commit.author, (0,0,0))
|
|
directory_additions, directory_deletions = \
|
|
getEditsForDirectory(commit, directory)
|
|
DIRECTORY_AUTHORS[directory][commit.author] = \
|
|
(author_commits + 1, author_additions + directory_additions,
|
|
author_deletions + directory_deletions)
|
|
|
|
|
|
# Checks if child_directory is same as or below parent_directory. For some
|
|
# reason the os.path module does not have this functionality.
|
|
def isSubDirectory(parent_directory, child_directory):
|
|
parent_directory = parent_directory + '/'
|
|
child_directory = child_directory + '/'
|
|
return child_directory.startswith(parent_directory)
|
|
|
|
|
|
def _GetGitLogCmd(options):
|
|
# TODO(mheikal): git-log with --numstat vs --name-only takes 10x the time to
|
|
# complete. It takes >15 mins for git log --numstat to return the 1 year git
|
|
# history of the full repo. Should probably add a script flag to switch off
|
|
# keeping track of number of modifications per commit.
|
|
date_limit = date.today() - timedelta(days=options.days_ago)
|
|
format_string = "%h,%ae,%cI"
|
|
cmd_args = [
|
|
'log',
|
|
'--since', date_limit.isoformat(),
|
|
'--numstat',
|
|
'--pretty=format:%s'%format_string,
|
|
]
|
|
# has to be last arg
|
|
if options.subdirectory:
|
|
cmd_args += ['--', options.subdirectory]
|
|
return cmd_args
|
|
|
|
|
|
def _ParseCommitLine(line):
|
|
commit_hash, author, commit_date = line.split(",")
|
|
return Commit(hash=commit_hash, author=author, commit_date=commit_date,
|
|
dirs={})
|
|
|
|
|
|
def _ParseFileStatsLine(current_commit, line):
|
|
try:
|
|
additions, deletions, filepath = line.split('\t')
|
|
except ValueError:
|
|
return False
|
|
if additions == '-':
|
|
additions = 0
|
|
else:
|
|
additions = int(additions)
|
|
if deletions == '-':
|
|
deletions = 0
|
|
else:
|
|
deletions = int(deletions)
|
|
dir_path = path.dirname(filepath)
|
|
commit_additions, commit_deletions = \
|
|
current_commit.dirs.get(dir_path, (0,0))
|
|
current_commit.dirs[dir_path] = (
|
|
additions + commit_additions, deletions + commit_deletions)
|
|
return True
|
|
|
|
|
|
def processAllCommits(options):
|
|
if not options.subdirectory and options.days_ago > 100:
|
|
print('git log for your query might take > 5 minutes, limit by a '
|
|
'subdirectory or reduce the number of days of history to low double '
|
|
'digits to make this faster. There is no progress indicator, it is '
|
|
'all waiting for single git log to finish.')
|
|
output = _RunGitCommand(options, _GetGitLogCmd(options))
|
|
current_commit = None
|
|
for line in output.splitlines():
|
|
if current_commit is None:
|
|
current_commit = _ParseCommitLine(line)
|
|
else:
|
|
if line == '': # all commit details read
|
|
if _ValidAuthor(current_commit.author):
|
|
_PropagateCommit(options, current_commit)
|
|
current_commit = None
|
|
else:
|
|
# Merge commits weird out git-log. If we fail to parse the line, then
|
|
# the last commit was a merge and this line is actually another commit
|
|
# description line.
|
|
if not _ParseFileStatsLine(current_commit, line):
|
|
current_commit = _ParseCommitLine(line)
|
|
# process the final commit
|
|
if _ValidAuthor(current_commit.author):
|
|
_PropagateCommit(options, current_commit)
|
|
|
|
|
|
def _CountCommits(directory):
|
|
return sum(
|
|
[count for (count, _a, _d) in DIRECTORY_AUTHORS[directory].itervalues()])
|
|
|
|
|
|
def _GetOwnerLevel(options, author, directory):
|
|
sorted_owners = sorted(_GetOwners(options, directory), key=lambda (o,l): l)
|
|
for owner, level in sorted_owners:
|
|
if author == owner:
|
|
return level
|
|
else:
|
|
return -1
|
|
|
|
|
|
# Returns the owners for a repo subdirectory. This does not understand per-file
|
|
# directives.
|
|
# TODO(mheikal): use depot_tools owners.py for parsing owners files.
|
|
def _GetOwners(options, directory_path):
|
|
if directory_path in OWNERS_CACHE:
|
|
return OWNERS_CACHE[directory_path]
|
|
owners_path = path.join(options.repo_path, directory_path, 'OWNERS')
|
|
owners = set()
|
|
parent_dir = directory_path
|
|
owner_level = 0
|
|
while parent_dir != '':
|
|
if path.isfile(owners_path):
|
|
parsed_owners, noparent = _ParseOwnersFile(options, owners_path)
|
|
owners.update([(owner, owner_level) for owner in parsed_owners])
|
|
owner_level += 1
|
|
if noparent:
|
|
break
|
|
parent_dir = path.dirname(parent_dir)
|
|
owners_path = path.join(parent_dir, 'OWNERS')
|
|
OWNERS_CACHE[directory_path] = set(owners)
|
|
return owners
|
|
|
|
|
|
# Parse an OWNERS file, returns set of owners and if the file sets noparent
|
|
def _ParseOwnersFile(options, filepath):
|
|
owners = set()
|
|
noparent = False
|
|
with open(filepath) as f:
|
|
for line in f.readlines():
|
|
line = line.strip()
|
|
# The script deals with directories so per-files are ignored.
|
|
if line == '' or line[0] == '#' or line.startswith('per-file'):
|
|
continue
|
|
if line.startswith('file://'):
|
|
relpath = line[7:]
|
|
abspath = path.join(options.repo_path, relpath)
|
|
parsed_owners, _ = _ParseOwnersFile(options, abspath)
|
|
owners.update(parsed_owners)
|
|
if line == 'set noparent':
|
|
noparent = True
|
|
index = line.find('@chromium.org')
|
|
if index > -1:
|
|
owners.add(line[:index + len('@chromium.org')])
|
|
return owners, noparent
|
|
|
|
|
|
# Trivial directories are ones that just contain a single child subdir and
|
|
# nothing else.
|
|
def _IsTrivialDirectory(options, repo_subdir):
|
|
try:
|
|
return len(os.listdir(path.join(options.repo_path, repo_subdir))) == 1
|
|
except OSError:
|
|
# directory no longer exists
|
|
return False
|
|
|
|
|
|
def computeSuggestions(options):
|
|
directory_suggestions = []
|
|
for directory, authors in sorted(
|
|
DIRECTORY_AUTHORS.iteritems(), key=lambda (d, a): d):
|
|
if _IsTrivialDirectory(options, directory):
|
|
continue
|
|
if _CountCommits(directory) < options.dir_commit_limit:
|
|
continue
|
|
# skip suggestions for directories outside the passed in directory
|
|
if (options.subdirectory
|
|
and not isSubDirectory(options.subdirectory, directory)):
|
|
continue
|
|
# sort authors by descending number of commits
|
|
sorted_authors = sorted(authors.items(),
|
|
key=lambda (author, details): -details[0])
|
|
# keep only authors above the limit
|
|
suggestions = [(a,c) for a,c in sorted_authors if \
|
|
a not in options.ignore_authors \
|
|
and c[0] >= options.author_cl_limit]
|
|
directory_suggestions.append((directory, suggestions))
|
|
return directory_suggestions
|
|
|
|
|
|
def _PrintSettings(options):
|
|
print('Showing directories with at least ({}) commits in the last ({}) '
|
|
'days.'.format(options.dir_commit_limit, options.days_ago))
|
|
print('Showing top ({}) committers who have commited at least ({}) commits '
|
|
'to the directory in the last ({}) days.'.format(
|
|
options.max_suggestions, options.author_cl_limit,
|
|
options.days_ago))
|
|
print('(owners+N) represents distance through OWNERS files for said owner\n')
|
|
|
|
|
|
def printSuggestions(options, directory_suggestions):
|
|
print('\nCommit stats:')
|
|
_PrintSettings(options)
|
|
for directory, suggestions in directory_suggestions:
|
|
print('{}: {} commits in the last {} days'.format(
|
|
directory, _CountCommits(directory), options.days_ago))
|
|
non_owner_suggestions = 0
|
|
for author, (commit_count, additions, deletions) in suggestions:
|
|
owner_level = _GetOwnerLevel(options, author, directory)
|
|
if owner_level > -1:
|
|
owner_string = ' (owner+{})'.format(owner_level)
|
|
else:
|
|
non_owner_suggestions +=1
|
|
owner_string = ''
|
|
print('{}{}, commits: {}, additions:{}, deletions: {}'.format(
|
|
author, owner_string, commit_count, additions, deletions))
|
|
if non_owner_suggestions >= options.max_suggestions:
|
|
break
|
|
print()
|
|
|
|
|
|
def _GetHeadCommitHash(options):
|
|
return _RunGitCommand(options, ['rev-parse', 'HEAD']).strip()
|
|
|
|
|
|
def _GetCacheMetadata(options):
|
|
return _GetHeadCommitHash(options), options.days_ago, options.subdirectory
|
|
|
|
|
|
def _IsCacheValid(options, metadata):
|
|
head_hash, days_ago, cached_subdirectory = metadata
|
|
if head_hash != _GetHeadCommitHash(options):
|
|
return False
|
|
if days_ago != options.days_ago:
|
|
return False
|
|
if (cached_subdirectory is not None
|
|
and not isSubDirectory(cached_subdirectory, options.subdirectory)):
|
|
return False
|
|
return True
|
|
|
|
|
|
def cacheProcessedCommits(options):
|
|
metadata = _GetCacheMetadata(options)
|
|
with open(CACHE_FILENAME, 'w') as f:
|
|
pickle.dump((metadata, DIRECTORY_AUTHORS), f)
|
|
|
|
|
|
def maybeRestoreProcessedCommits(options):
|
|
global DIRECTORY_AUTHORS
|
|
if not path.exists(CACHE_FILENAME):
|
|
return False
|
|
with open(CACHE_FILENAME) as f:
|
|
stored_metadata, cached_directory_authors = pickle.load(f)
|
|
if _IsCacheValid(options, stored_metadata):
|
|
print('Loading from cache')
|
|
DIRECTORY_AUTHORS = cached_directory_authors
|
|
return True
|
|
else:
|
|
print('Cache is stale or invalid, must rerun `git log`')
|
|
return False
|
|
|
|
def do(options):
|
|
if options.skip_cache or not maybeRestoreProcessedCommits(options):
|
|
processAllCommits(options)
|
|
cacheProcessedCommits(options)
|
|
directory_suggestions = computeSuggestions(options)
|
|
printSuggestions(options, directory_suggestions)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
|
parser.add_argument('repo_path')
|
|
parser.add_argument('--days-ago', type=int,
|
|
help='Number of days of history to search through.',
|
|
default=365, metavar='DAYS_AGO')
|
|
parser.add_argument('--subdirectory',
|
|
help='Limit suggestions to this subdirectory', default='')
|
|
parser.add_argument('--ignore-authors',
|
|
help='Ignore this comma separated list of authors')
|
|
parser.add_argument('--max-suggestions', type=int, help='Maximum number of '
|
|
'suggested authors per directory.', default=5)
|
|
parser.add_argument('--author-cl-limit', type=int, help='Do not suggest '
|
|
'authors who have commited less than this to the '
|
|
'directory in the last DAYS_AGO days.', default=10)
|
|
parser.add_argument('--dir-commit-limit', type=int, help='Skip directories '
|
|
'with less than this number of commits in the last '
|
|
'DAYS_AGO days.', default=100)
|
|
parser.add_argument('--skip-cache', action='store_true',
|
|
help='Do not read from cache.', default=False)
|
|
options = parser.parse_args()
|
|
if options.ignore_authors:
|
|
options.ignore_authors = set(
|
|
map(str.strip, options.ignore_authors.split(',')))
|
|
else:
|
|
options.ignore_authors = set()
|
|
do(options)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|