dotfiles/dot_scripts/git-overview.py
2020-07-23 07:18:03 +02:00

167 lines
5.3 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Origin : https://github.com/qznc/dot/blob/master/bin/git-overview
Gather a general overview from a git repository.
It shall identify the major contributors within a project.
It shall identify interesting parts in the code.
"""
from subprocess import check_output
from datetime import datetime
from os.path import splitext
_CMD_AUTHOR_HISTORY = 'git log --date=iso --format="format:%an;%ad;%G?;%H"'
_CMD_FILELIST = "git ls-files"
_CMD_COMMIT_FILES_CHANGES = "git diff-tree --no-commit-id --name-only -r "
from collections import namedtuple
FileInfo = namedtuple("FileInfo", "path,authors,commits,firstc,lastc")
def read_log_data():
total_commits = 0
authors = dict()
files = dict()
raw = check_output(_CMD_AUTHOR_HISTORY, shell=True, universal_newlines=True)
for line in raw.split("\n"):
total_commits += 1
name, dt, signed, hash = line.split(";")
if not name in authors:
authors[name] = (datetime.now(), datetime(1900, 1, 1), 0)
first, last, count = authors[name]
when = datetime.strptime(dt[:-6], "%Y-%m-%d %H:%M:%S")
if when < first:
first = when
if when > last:
last = when
authors[name] = (first, last, count + 1)
craw = check_output(
_CMD_COMMIT_FILES_CHANGES + hash, shell=True, universal_newlines=True
)
for file in craw.split("\n"):
if file == "":
continue
if file in files:
fi = files[file]
else:
files[file] = fi = dict(
path=file,
authors=set(),
commits=0,
firstc=datetime.now(),
lastc=datetime(1900, 1, 1),
)
fi["authors"].add(name)
fi["commits"] += 1
if when < fi["firstc"]:
fi["firstc"] = when
if when > fi["lastc"]:
fi["lastc"] = when
return authors, files, total_commits
def readable_duration(dur):
"""Given a timedelta object, returns a human-readable string"""
if dur.days > 0:
if dur.days > 365 * 2:
return "%d years" % (dur.days / 365)
elif dur.days > 30 * 5:
return "%d months" % (dur.days / 30)
else:
return "%d days" % (dur.days)
else:
if dur.seconds > 60 * 60 * 2:
return "%d hours" % (dur.seconds / 60 / 60)
elif dur.seconds > 60 * 5:
return "%d minutes" % (dur.seconds / 60)
else:
return "%d seconds" % (dur.seconds)
def partition_by_extension(fileinfo):
pd = dict()
meta = dict()
total_filecount = 0
total_extfilecount = 0
for path, fi in fileinfo.items():
total_filecount += 1
root, ext = splitext(path)
if not ext:
continue
total_extfilecount += 1
if not ext in pd:
pd[ext] = dict()
meta[ext] = dict(filecount=0)
pd[ext][path] = fi
meta[ext]["filecount"] += 1
return pd, meta, total_filecount, total_extfilecount
def print_top_committers(authorinfo, total_commits):
authors = sorted(authorinfo.items(), key=lambda x: x[1][2], reverse=True)
total_authors = len(authors)
committed = 0
print("Top Committers (of %d authors):" % (total_authors))
for author, info in authors:
first, last, count = info
readable_last = last.strftime("%Y-%m-%d")
if count == 1:
print("%-20s 1 commit on %s" % (author, readable_last))
else:
duration = readable_duration(last - first)
print(
"%-20s %4d commits during %s until %s"
% (author, count, duration, readable_last)
)
committed += count
if committed > total_commits * 0.8:
break
print(
" together these authors have 80+%% of the commits (%d/%d)"
% (committed, total_commits)
)
def print_important_files(fileinfo):
files = list(fileinfo.values())
files.sort(key=lambda x: x["commits"])
print("Files with most commits:")
for x in files[-5:]:
readable_last = x["lastc"].strftime("%Y-%m-%d")
duration = readable_duration(x["lastc"] - x["firstc"])
print(
"%4d commits: %-20s during %s until %s"
% (x["commits"], x["path"], duration, readable_last)
)
print()
files.sort(key=lambda x: len(x["authors"]))
print("Files with most authors:")
for x in files[-5:]:
print("%2d authors: %-20s" % (len(x["authors"]), x["path"]))
def print_extension_info(fileinfo):
pfileinfo, extinfo, total_filecount, total_extfilecount = partition_by_extension(
fileinfo
)
pf = [(e, i) for e, i in pfileinfo.items()]
pf.sort(key=lambda x: extinfo[x[0]]["filecount"], reverse=True)
print("By file extension:")
fc = 0
for ext, info in pf:
c = extinfo[ext]["filecount"]
print("%5s: %d files" % (ext, extinfo[ext]["filecount"]))
fc += c
if fc > total_filecount * 0.8:
break
print(" together these make up 80+%% of the files (%d/%d)" % (fc, total_filecount))
authorinfo, fileinfo, total_commits = read_log_data()
print_top_committers(authorinfo, total_commits)
print()
print_important_files(fileinfo)
print()
print_extension_info(fileinfo)