-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgit_ls_all.py
More file actions
executable file
·79 lines (60 loc) · 2.92 KB
/
Copy pathgit_ls_all.py
File metadata and controls
executable file
·79 lines (60 loc) · 2.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/usr/bin/env python3
"""Program to list absolutely all objects in a git repository, including unreachable ones."""
import subprocess
import sys
def check_output_list(*args):
"""Acts like :py:meth:`subprocess.check_output`, but returns the
output as a list of unicode strings.
Arguments are identical to those for `check_output`.
"""
raw_output = subprocess.check_output(*args)
raw_output = raw_output.decode(sys.getdefaultencoding())
return raw_output.split("\n")
def format_git_hash(int_hash):
"""Returns `int_hash`, which must be an integer, as a 40-digit hex
string, with leading zeroes if needed.
"""
return hex(int_hash)[2:].rjust(40, "0")
def git_object_type(int_hash):
"""Returns the type of object denoted by `int_hash`."""
return subprocess.check_output("git cat-file -t {}".format(
format_git_hash(int_hash)).split(" ")).decode(sys.getdefaultencoding()).replace("\n", "")
def git_object_size(int_hash):
"""Returns the size of the object denoted by `int_hash`, in bytes."""
return int(subprocess.check_output("git cat-file -s {}".format(
format_git_hash(int_hash)).split(" ")).decode(sys.getdefaultencoding()).replace("\n", ""))
def main():
object_list = check_output_list(["git", "rev-list", "--objects", "--all"])
print("Found {:,} objects in object_list.".format(len(object_list)))
from_ref_logs = check_output_list("git rev-list --objects -g --no-walk --all".split(" "))
print("Found {:,} objects from rev-list -g".format(len(from_ref_logs)))
unreachable_objects = [line for line in
check_output_list(
"git fsck --unreachable --strict --no-progress".split(" "))
if len(line) > 0]
print("Found {:,} unreachable objects.".format(len(unreachable_objects)))
hashes = {}
for line in object_list:
parts = line.split(" ")
if len(parts) == 2:
hashes[parts[1]] = int(parts[0], 16) # hashes[file_name] = git hash
print("Found {:,} objects with file names.".format(len(hashes)))
# hashes.update([int(h, 16) for h in from_ref_logs])
# print("Found {:,} unique object ids.".format(len(hashes)))
for fname in hashes:
otype = git_object_type(hashes[fname])
if otype != "tree":
print("{}: {}".format(fname, git_object_size(hashes[fname])))
# cat_file = subprocess.check_output(["git", "cat-file",
# "blob", format_git_hash(hashes[fname])])
# git rev-list --objects --no-walk \
# $(git fsck --unreachable |
# grep '^unreachable commit' |
# cut -d' ' -f3)
# git fsck --unreachable | grep "^unreachable blob" | cut -d' ' -f3
# } 2> /dev/null | cut -d' ' -f1 | sort | uniq); do
# if git cat-file blob $hash 2> /dev/null | grep -i $content > /dev/null ; then
# echo $hash
# fi
if __name__ == "__main__":
main()