forked from udienz/vis-ubuntu-pkg
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathanalyze.py
110 lines (81 loc) · 2.51 KB
/
analyze.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#!/usr/bin/python
import sys
import gzip
import os
import json
import config
def d(*msg):
print >>sys.stderr, ' '.join(map(str, msg))
def get_path(data_dir, dist, section, arch):
return os.path.join(data_dir, dist, section,
'binary-%s' % arch, 'Packages.gz')
def read_packages(f):
data = {}
description = None
for line in f:
if len(data) == 0 and (line.strip() == '' or line[0] == ' '):
continue
if description is not None:
if line[0] == ' ':
line = line.strip()
if line == '.':
description.append('')
else:
description.append(line)
continue
else:
data['LongDescription'] = '\n'.join(description)
description = None
line = line.strip()
if line == '':
yield data
data = {}
description = None
continue
p = line.split(': ')
field = p[0]
value = ': '.join(p[1:])
if field == 'Description':
description = []
data[field] = value
def analyze(data_dir, dist, section, arch):
path = get_path(data_dir, dist, section, arch)
if not os.path.exists(path):
return None
f = gzip.open(path,'r')
count = 0
size = 0L
for package in read_packages(f):
count += 1
size += long(package['Size'])
return dict(count=count,
size=size)
def collect_data(data_dir):
total = len(config.DISTS) * len(config.SECTIONS) * len(config.ARCHS)
index = 0
sizes = {}
packages = {}
for arch in config.ARCHS:
sizes[arch] = {}
packages[arch] = {}
for dist in config.DISTS:
sizes[arch][dist] = {}
packages[arch][dist] = {}
for section in config.SECTIONS:
data = analyze(data_dir, dist, section, arch)
status = 'FAIL'
if data is not None:
sizes[arch][dist][section] = data['size']
packages[arch][dist][section] = data['count']
status = 'OK'
index += 1
d('%s of %s: %s %s %s -> %s' % \
(index, total, dist, section, arch, status))
return dict(sizes=sizes,
packages=packages)
def main():
data_dir = sys.argv[1]
data = collect_data(data_dir)
print json.dumps(data)
if __name__ == '__main__':
main()