167 lines
5.3 KiB
Python
Executable File
167 lines
5.3 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Origin : https://github.com/qznc/dot/blob/master/bin/git-overview
|
|
Gather a general overview from a git repository.
|
|
It shall identify the major contributors within a project.
|
|
It shall identify interesting parts in the code.
|
|
"""
|
|
|
|
from subprocess import check_output
|
|
from datetime import datetime
|
|
from os.path import splitext
|
|
|
|
_CMD_AUTHOR_HISTORY = 'git log --date=iso --format="format:%an;%ad;%G?;%H"'
|
|
_CMD_FILELIST = "git ls-files"
|
|
_CMD_COMMIT_FILES_CHANGES = "git diff-tree --no-commit-id --name-only -r "
|
|
|
|
from collections import namedtuple
|
|
|
|
FileInfo = namedtuple("FileInfo", "path,authors,commits,firstc,lastc")
|
|
|
|
|
|
def read_log_data():
|
|
total_commits = 0
|
|
authors = dict()
|
|
files = dict()
|
|
raw = check_output(_CMD_AUTHOR_HISTORY, shell=True, universal_newlines=True)
|
|
for line in raw.split("\n"):
|
|
total_commits += 1
|
|
name, dt, signed, hash = line.split(";")
|
|
if not name in authors:
|
|
authors[name] = (datetime.now(), datetime(1900, 1, 1), 0)
|
|
first, last, count = authors[name]
|
|
when = datetime.strptime(dt[:-6], "%Y-%m-%d %H:%M:%S")
|
|
if when < first:
|
|
first = when
|
|
if when > last:
|
|
last = when
|
|
authors[name] = (first, last, count + 1)
|
|
craw = check_output(
|
|
_CMD_COMMIT_FILES_CHANGES + hash, shell=True, universal_newlines=True
|
|
)
|
|
for file in craw.split("\n"):
|
|
if file == "":
|
|
continue
|
|
if file in files:
|
|
fi = files[file]
|
|
else:
|
|
files[file] = fi = dict(
|
|
path=file,
|
|
authors=set(),
|
|
commits=0,
|
|
firstc=datetime.now(),
|
|
lastc=datetime(1900, 1, 1),
|
|
)
|
|
fi["authors"].add(name)
|
|
fi["commits"] += 1
|
|
if when < fi["firstc"]:
|
|
fi["firstc"] = when
|
|
if when > fi["lastc"]:
|
|
fi["lastc"] = when
|
|
return authors, files, total_commits
|
|
|
|
|
|
def readable_duration(dur):
|
|
"""Given a timedelta object, returns a human-readable string"""
|
|
if dur.days > 0:
|
|
if dur.days > 365 * 2:
|
|
return "%d years" % (dur.days / 365)
|
|
elif dur.days > 30 * 5:
|
|
return "%d months" % (dur.days / 30)
|
|
else:
|
|
return "%d days" % (dur.days)
|
|
else:
|
|
if dur.seconds > 60 * 60 * 2:
|
|
return "%d hours" % (dur.seconds / 60 / 60)
|
|
elif dur.seconds > 60 * 5:
|
|
return "%d minutes" % (dur.seconds / 60)
|
|
else:
|
|
return "%d seconds" % (dur.seconds)
|
|
|
|
|
|
def partition_by_extension(fileinfo):
|
|
pd = dict()
|
|
meta = dict()
|
|
total_filecount = 0
|
|
total_extfilecount = 0
|
|
for path, fi in fileinfo.items():
|
|
total_filecount += 1
|
|
root, ext = splitext(path)
|
|
if not ext:
|
|
continue
|
|
total_extfilecount += 1
|
|
if not ext in pd:
|
|
pd[ext] = dict()
|
|
meta[ext] = dict(filecount=0)
|
|
pd[ext][path] = fi
|
|
meta[ext]["filecount"] += 1
|
|
return pd, meta, total_filecount, total_extfilecount
|
|
|
|
|
|
def print_top_committers(authorinfo, total_commits):
|
|
authors = sorted(authorinfo.items(), key=lambda x: x[1][2], reverse=True)
|
|
total_authors = len(authors)
|
|
committed = 0
|
|
print("Top Committers (of %d authors):" % (total_authors))
|
|
for author, info in authors:
|
|
first, last, count = info
|
|
readable_last = last.strftime("%Y-%m-%d")
|
|
if count == 1:
|
|
print("%-20s 1 commit on %s" % (author, readable_last))
|
|
else:
|
|
duration = readable_duration(last - first)
|
|
print(
|
|
"%-20s %4d commits during %s until %s"
|
|
% (author, count, duration, readable_last)
|
|
)
|
|
committed += count
|
|
if committed > total_commits * 0.8:
|
|
break
|
|
print(
|
|
" together these authors have 80+%% of the commits (%d/%d)"
|
|
% (committed, total_commits)
|
|
)
|
|
|
|
|
|
def print_important_files(fileinfo):
|
|
files = list(fileinfo.values())
|
|
files.sort(key=lambda x: x["commits"])
|
|
print("Files with most commits:")
|
|
for x in files[-5:]:
|
|
readable_last = x["lastc"].strftime("%Y-%m-%d")
|
|
duration = readable_duration(x["lastc"] - x["firstc"])
|
|
print(
|
|
"%4d commits: %-20s during %s until %s"
|
|
% (x["commits"], x["path"], duration, readable_last)
|
|
)
|
|
print()
|
|
files.sort(key=lambda x: len(x["authors"]))
|
|
print("Files with most authors:")
|
|
for x in files[-5:]:
|
|
print("%2d authors: %-20s" % (len(x["authors"]), x["path"]))
|
|
|
|
|
|
def print_extension_info(fileinfo):
|
|
pfileinfo, extinfo, total_filecount, total_extfilecount = partition_by_extension(
|
|
fileinfo
|
|
)
|
|
pf = [(e, i) for e, i in pfileinfo.items()]
|
|
pf.sort(key=lambda x: extinfo[x[0]]["filecount"], reverse=True)
|
|
print("By file extension:")
|
|
fc = 0
|
|
for ext, info in pf:
|
|
c = extinfo[ext]["filecount"]
|
|
print("%5s: %d files" % (ext, extinfo[ext]["filecount"]))
|
|
fc += c
|
|
if fc > total_filecount * 0.8:
|
|
break
|
|
print(" together these make up 80+%% of the files (%d/%d)" % (fc, total_filecount))
|
|
|
|
|
|
authorinfo, fileinfo, total_commits = read_log_data()
|
|
print_top_committers(authorinfo, total_commits)
|
|
print()
|
|
print_important_files(fileinfo)
|
|
print()
|
|
print_extension_info(fileinfo)
|