-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdb.py
97 lines (88 loc) · 3.7 KB
/
db.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
'''
This file contains functions that are responsible for maintaining the database
'''
import time
from pymongo import MongoClient
def create_db(path, drive_service, log_file):
# Populate database with the current metadata
client = MongoClient()
db = client.googledrivedb
json_info = db.drivedb
# Initialize a collection on this database from Drive
initialize_db(path, drive_service, json_info)
write_str = time.strftime("%m.%d.%y %H:%M ", time.localtime())
write_str += 'Database initialized!\n'
log_file.write(write_str)
return db
def initialize_db(path, drive_service, json_info):
# Clear database just to be sure
json_info.remove()
file_list = drive_service.files().list().execute()['items']
for i in range(len(file_list)):
temp_dict = dict((k, file_list[i][k])
for
k
in
('id', 'title', 'parents', 'labels',
'mimeType', 'createdDate', 'modifiedDate'))
temp_dict['path'] = None
# Check if a file with that id has already been inserted
cursor = json_info.find({'id': temp_dict['id']})
if cursor.count() == 0:
# Hasn't been inserted in database yet
json_id = json_info.insert(temp_dict)
else:
print 'Duplicate\n'
remove_orphans(path, json_info)
cursor = json_info.find()
def remove_orphans(path, json_info):
# Removes entries with orphans and populates path for the rest
cursor = json_info.find()
for entry in cursor:
if 'parents' in entry:
if entry['parents']:
# parents array is not empty, attempt to trace ancestor to root
broken = False
current_id = entry['id']
current_entry = json_info.find_one({'id': current_id})
if current_entry is None:
# broken
json_info.remove({'id': entry['id']})
continue
orphans = set([])
file_path = entry['title']
while True:
if 'parents' in current_entry:
if current_entry['parents']:
# If it is root, not broken, exit
if current_entry['parents'][0]['isRoot']:
break
else:
orphans.add(current_id)
current_id = current_entry['parents'][0]['id']
current_entry = json_info.find_one({'id': current_id})
if current_entry is None:
broken = True
break
else:
file_path = current_entry['title'] + '/' + file_path
else:
broken = True
break
else:
broken = True
break
if broken:
# Remove list of orphans from database
for f in orphans:
json_info.remove({'id': f})
else:
# Set paths appropriately
file_path = path + '/' + file_path
json_info.update({'_id': entry['_id']}, {"$set": {'path': file_path}})
else:
# Parent data is empty array
json_info.remove({'_id': entry['_id']})
else:
# there is no parents data
json_info.remove({'_id': entry['_id']})