-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathbaidupoi_city_20180426.py
130 lines (117 loc) · 4.36 KB
/
baidupoi_city_20180426.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Copyright @ Yang Chen
School of Urban Design, Wuhan University
2018/04/25.
"""
# -*- coding:utf-8 -*-
import json
import urllib2
import sys
import time
reload(sys)
sys.setdefaultencoding('utf8')
class BaiDuPOI_City(object):
def __init__(self,api_key,itemy,city):
self.api_key = api_key
self.itemy = itemy
self.city = city
def urls(self):
urls = []
for pages in range(0,50):
url = 'http://api.map.baidu.com/place/v2/search?query='+ self.itemy + '®ion='+ self.city + '&city_limit=true&scope=2&page_size=20&page_num=' + str(pages) + '&output=json&ak=' + self.api_key
urls.append(url)
return urls
def baidu_search(self):
json_sel = []
for url in self.urls():
json_obj = urllib2.urlopen(url)
data = json.load(json_obj)
if(data['status']==0):
print str(data['total'])
print 'status:ok'
else:
print 'status:error'
try:
for item in data['results']:
jname = item["name"]
jlat = item["location"]["lat"]
jlng = item["location"]["lng"]
jaddress = item["address"]
juid = item["uid"]
jtag = item["detail_info"]["tag"]
jtype = item["detail_info"]["type"]
jtag1 =str(jtag).split(';')[0];
jtag2 =str(jtag).split(';')[1];
js_sel = str(jlat) + '\t' + str(jlng)+'\t'+juid+'\t'+jname+'\t'+jaddress+'\t'+jtype+'\t'+jtag1+'\t'+jtag2
json_sel.append(js_sel)
except:
print 'error line'
return json_sel
class LocaDiv(object):
def __init__(self, loc_all):
self.loc_all = loc_all
def lat_all(self):
lat_sw = float(self.loc_all.split(',')[0])
lat_ne = float(self.loc_all.split(',')[2])
lat_list = []
for i in range(0, int((lat_ne - lat_sw + 0.0001) / 0.05)):
lat_list.append(lat_sw + 0.05 * i)
lat_list.append(lat_ne)
return lat_list
def lng_all(self):
lng_sw = float(self.loc_all.split(',')[1])
lng_ne = float(self.loc_all.split(',')[3])
lng_list = []
for i in range(0, int((lng_ne - lng_sw + 0.0001) / 0.05)):
lng_list.append(lng_sw + 0.05 * i)
lng_list.append(lng_ne)
return lng_list
def ls_com(self):
l1 = self.lat_all()
l2 = self.lng_all()
ab_list = []
for i in range(0, len(l1)):
a = str(l1[i])
for i2 in range(0, len(l2)):
b = str(l2[i2])
ab = a + ',' + b
ab_list.append(ab)
return ab_list
def ls_row(self):
l1 = self.lat_all()
l2 = self.lng_all()
ls_com_v = self.ls_com()
ls = []
for n in range(0, len(l1) - 1):
for i in range(0 + (len(l1) + 1) * n, len(l2) + (len(l2)) * n - 1):
a = ls_com_v[i]
b = ls_com_v[i + len(l2) + 1]
ab = a + ',' + b
ls.append(ab)
return ls
if __name__ == '__main__':
baidu_api = 'YyLVKiRGYcFlmPZHZn7aRo7G4IuGll0K' #这里填入你的百度API
print "开始爬数据,请稍等..."
start_time = time.time()
ct = '武汉市'
cit = ct.decode('utf-8')
c = "丽人,生活服务,商务大厦,地产小区,汽车服务,购物,餐饮,宾馆,休闲娱乐,金融,旅游景点,交通设施,教育,医疗,公司企业,美食,酒店,运动健身,教育培训,文化传媒,房地产,政府机构"
n = c.split(",")
for item in n:
print item
cate = item.decode('utf-8')
#locs_to_use = loc.ls_row()
#for loc_to_use in locs_to_use:
#api_key = "KWSoddWr4bd1CXh1gLe4GfmIuPKXuDO9"
api_key = "YyLVKiRGYcFlmPZHZn7aRo7G4IuGll0K"
par = BaiDuPOI_City(api_key,cate,cit) #请修改这里的参数
a = par.baidu_search()
doc = open('mark\\'+cit+cate+'.txt','a')
for ax in a:
doc.write(ax)
doc.write('\n')
doc.close
end_time = time.time()
print "数据爬取完毕,用时%.2f秒" % (end_time - start_time)