-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.py
128 lines (108 loc) · 4.01 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import datetime
import time
import json
import sys
from pymongo import MongoClient
import pymongo as Pymongo
import time
from pprint import pprint
connection = MongoClient('localhost', 27017)
def loadJson(path):
with open(path) as data_file:
data = json.loads(data_file.read())
return data
def projectsjson(jsonFile):
db = connection["projects"]
projectsDuplicate = []
for file in jsonFile:
nameAux = file.get("name")
visitedAux = file.get("visited")
try:
if((visitedAux == True) and (visitedAux != None)):
db.projectsNames.insert({"name": nameAux, "visited": True})
else:
db.projectsNames.insert({"name": nameAux, "visited": False})
except Pymongo.errors.DuplicateKeyError:
projectsDuplicate.append(file)
continue
if(len(projectsDuplicate) > 0):
print("Duplicate projects {}".format(len(projectsDuplicate)))
if(len(projectsDuplicate) < 100):
pprint(projectsDuplicate)
elif(len(projectsDuplicate) > 100):
pprint(projectsDuplicate[0:100])
print("........")
with open("./log/projectsDuplicate.json", "a") as fp:
json.dump(projectsDuplicate, fp)
return (len(jsonFile) - len(projectsDuplicate))
def keysJson(jsonFile):
db = connection["github_credencials"]
credencialsDuplicate = []
for file in jsonFile:
client_id = file.get('client_id')
client_secret = file.get('client_secret')
try:
db.credencials.insert({"client_id": client_id, "client_secret": client_secret})
except Pymongo.errors.DuplicateKeyError:
credencialsDuplicate.append(file)
continue
if(len(credencialsDuplicate) > 0):
print("Duplicate Keys {}".format(len(credencialsDuplicate)))
if(len(credencialsDuplicate) < 100):
pprint(credencialsDuplicate)
elif(len(credencialsDuplicate) > 100):
pprint(credencialsDuplicate[0:100])
print("........")
with open("./log/credencialsDuplicate.json", "a") as fp:
json.dump(credencialsDuplicate, fp)
return (len(jsonFile) - len(credencialsDuplicate))
def projectsCompleted():
db = connection["projects"]
projectsName = list(db.projectsNames.find({"visited":{"$eq":True}}))
print("Projects Finished {}".format(len(projectsName)))
def projectsLen():
db = connection["projects"]
projectsName = list(db.projectsNames.find({}))
print("Total Projects {}".format(len(projectsName)))
def keysLen():
db = connection["github_credencials"]
keys = list(db.credencials.find({}))
print("Total GitHub Keys {}".format(len(keys)))
def statusCrawler():
statusCrawler = connection["global_configs"]
status = statusCrawler.configs.find_one({"operation":"status"},{"_id":0})
print("Crawler Status {}".format(status.get("status")))
def main():
oneArgument = ["--finished","--projects","--keys","--status"]
if (len(sys.argv) <= 2) and (sys.argv[1] not in oneArgument):
print('missing arguments')
raise SystemExit
if(sys.argv[1] == "--projects"):
if(len(sys.argv) <= 2):
projectsLen()
else:
myjson=loadJson(sys.argv[2])
if(len(myjson) == 1):
myjson=[myjson]
inserted=projectsjson(myjson)
print("{} inserted projects in BD".format(inserted))
elif(sys.argv[1] == "--keys"):
if(len(sys.argv) <= 2):
keysLen()
else:
myjson=loadJson(sys.argv[2])
if(len(myjson) == 1):
myjson=[myjson]
inserted=keysJson(myjson)
print("{} inserted keys in BD".format(inserted))
elif(sys.argv[1] == "--finished"):
projectsCompleted()
elif(sys.argv[1] == "--status"):
statusCrawler()
projectsLen()
projectsCompleted()
keysLen()
else:
print("{} option not allowed".format(sys.argv[1]))
if __name__ == '__main__':
main()