-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtemplate_pl.spider
52 lines (42 loc) · 1.56 KB
/
template_pl.spider
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# -*- coding: utf-8 -*-
import re
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
import scrapy
import base64
import hashlib
from Crypto.Cipher import AES
iteration_count = DECRYPT_ROUNDS
hashfunc = 'DECRYPT_HASHFUNC'
salt = 'DECRYPT_SALT'
iv = 'DECRYPT_IV'
def generate_key(salt, password):
return hashlib.pbkdf2_hmac(hashfunc, password, salt.decode('hex'), iteration_count, 16)
def unpad(s):
return s[:-ord(s[len(s)-1:])]
def decrypt(data, passphrase):
key = generate_key(salt, passphrase)
ciphertext = base64.b64decode(data)
cipher = AES.new(key, AES.MODE_CBC, iv.decode('hex'))
return unpad(cipher.decrypt(ciphertext)).decode('utf-8')
class SensorSpider(scrapy.Spider):
name = "SPIDER_NAME"
csrf = ""
custom_settings = { 'DOWNLOAD_FAIL_ON_DATALOSS': 'False', }
start_url = 'LINK_CSRF'
aqi_url = 'LINK_AQI'
def start_requests(self):
yield scrapy.Request(self.start_url, callback=self.get_csrf)
def get_csrf(self, response):
self.csrf = response.xpath('//script/text()')[1].re('window.csrf = (.*);')[1].strip('"')
print "-------- %s %s" % ("CSRF:",self.csrf)
# get bulk data
headers = {'_csrf_token': self.csrf,}
post_data = {'param': 'AQI', 'station': '%',}
yield scrapy.FormRequest(self.aqi_url, headers=headers, formdata=post_data, callback=self.get_aqi)
def get_aqi(self, response):
data = re.search('DATA_REGEXP', response.text).group(1)
decrypted = decrypt(data, self.csrf)
file('OUTPUT_FILE','w').write(decrypted)
print "OK"