-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathdry_run.py
95 lines (77 loc) · 2.53 KB
/
dry_run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
from scrapy_autoproxy.config import configuration
from scrapy_autoproxy.storage_manager import StorageManager, Redis
from scrapy_autoproxy.proxy_manager import ProxyManager
from IPython import embed
import random
import time
import threading
main_thread = threading.currentThread()
redis = Redis(**configuration.redis_config)
redis.flushall()
test_sites = ['https://api.dev.proxycrawler.com','http://gatherproxy.com','http://foo.com', 'http://bar.com', 'http://baz.com', 'http://google.com', 'http://bing.com']
crawl_statuses = [True,False]
successful = {k: 0 for k in test_sites}
failures = {k: 0 for k in test_sites }
def scoreboard():
print("--------------------------------------")
print("successes:")
print(successful)
print("--------------------------------------")
print("failures:")
print(failures)
print("--------------------------------------")
def getRunningThreads():
running = 0
for t in threading.enumerate():
print(t)
if t.isAlive():
running +=1
return running
def worker():
print(threading.currentThread().getName(), "starting")
time.sleep(random.randint(1,10))
pm = ProxyManager()
for i in range(5):
url = random.choice(test_sites)
print(threading.currentThread().getName(), "crawling %s" % url)
proxy = pm.get_proxy(url)
time.sleep(random.randint(1,12))
success = random.choice(crawl_statuses)
print(threading.currentThread().getName(), "crawl success=%s" % success)
if success:
successful[url] += 1
else:
failures[url] +=1
proxy.callback(success=success)
time.sleep(random.randint(1,6))
print(threading.currentThread().getName(), "stopping")
return
def make_workers():
workers = []
for i in range(5):
worker_name = "worker_%s" % i
wkr = threading.Thread(name=worker_name, target=worker)
workers.append(wkr)
return workers
workers = make_workers()
def daemon():
print(threading.currentThread().getName(), 'Starting daemon.')
for w in workers:
w.start()
time.sleep(15)
while(True):
scoreboard()
if getRunningThreads() == 1:
break
time.sleep(5)
sm = StorageManager()
sm.sync_to_db()
return scoreboard()
#print(threading.currentThread().getName(),'stopping daemon')
#for w in workers:
# w.join()
dmn = threading.Thread(name='daemon', target=daemon)
dmn.setDaemon = True
dmn.start()
#sm = StorageManager()
#sm.sync_to_db()