-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathBDpoi20180425.py
146 lines (132 loc) · 5.02 KB
/
BDpoi20180425.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Copyright @ Yang Chen
School of Urban Design, Wuhan University
2018/04/25.
Revised on 2018/04/26
POI in WGS-84 Coordinate
"""
# -*- coding:utf-8 -*-
import json
import urllib2
import sys
import time
reload(sys)
sys.setdefaultencoding('utf8')
class BaiDuPOI(object):
def __init__(self,api_key,itemy,loc):
self.api_key = api_key
self.itemy = itemy
self.loc = loc
def urls(self):
# api_key = "Vn1ebvMZ1DCw2nj5LSQ738gmOB8AkCce"
urls = []
n = 1 # 1 = WGS84, 2 = GCJ02, 3 = BD09Loc, 04 = BD09mac
for pages in range(0,20):
url = 'http://api.map.baidu.com/place/v2/search?query='+ self.itemy +'&bounds='+ self.loc + '&scope=2&page_size=20&page_num='+ str(pages) + '&output=json&coord_type='+ str(n) +'&ak=' + self.api_key
urls.append(url)
return urls
def baidu_search(self):
json_sel = []
for url in self.urls():
json_obj = urllib2.urlopen(url)
data = json.load(json_obj)
if(data['status']==0):
print str(data['total'])
print 'status:ok'
else:
print 'status:error'
try:
for item in data['results']:
jname = item["name"]
jlat = item["location"]["lat"]
jlng = item["location"]["lng"]
jaddress = item["address"]
juid = item["uid"]
jtag = item["detail_info"]["tag"]
jtype = item["detail_info"]["type"]
jtag1 =str(jtag).split(';')[0];
jtag2 =str(jtag).split(';')[1];
js_sel = str(jlat) + '\t' + str(jlng)+'\t'+juid+'\t'+jname+'\t'+jaddress+'\t'+jtype+'\t'+jtag1+'\t'+jtag2
json_sel.append(js_sel)
except:
print 'error line'
return json_sel
class LocaDiv(object):
def __init__(self,loc_all):
self.loc_all = loc_all
def lat_all(self):
lat_sw = float(self.loc_all.split(',')[0])
lat_ne = float(self.loc_all.split(',')[2])
lat_list = []
for i in range(0,int((lat_ne-lat_sw+0.0001)/0.05)):
lat_list.append(lat_sw + 0.05 * i)
lat_list.append(lat_ne)
return lat_list
def lng_all(self):
lng_sw = float(self.loc_all.split(',')[1])
lng_ne = float(self.loc_all.split(',')[3])
lng_list = []
for i in range(0,int((lng_ne-lng_sw+0.0001)/0.05)):
lng_list.append(lng_sw+0.05*i)
lng_list.append(lng_ne)
return lng_list
def ls_com(self):
l1 = self.lat_all()
l2 = self.lng_all()
ab_list = []
for i in range(0,len(l1)):
a = str(l1[i])
for i2 in range(0,len(l2)):
b = str(l2[i2])
ab = a+','+b
ab_list.append(ab)
return ab_list
def ls_row(self):
l1 = self.lat_all()
l2 = self.lng_all()
ls_com_v = self.ls_com()
ls = []
for n in range(0,len(l1)-1):
for i in range(0+(len(l1)+1)*n,len(l2)+(len(l2))*n-1):
a = ls_com_v[i]
b = ls_com_v[i+len(l2)+1]
ab = a+','+b
ls.append(ab)
return ls
if __name__ == '__main__':
baidu_api = 'YyLVKiRGYcFlmPZHZn7aRo7G4IuGll0K' #这里填入你的百度API
print "开始爬数据,请稍等..."
start_time = time.time()
#loc = LocaDiv('30.411,114.043,30.897,114.618')#武汉
#loc = LocaDiv('30.411,114.294,30.,114.382')
loc = LocaDiv('30.414,114.080,30.741,114.532')#武汉
#loc = LocaDiv('22.44550,113.75719,22.86239,114.62854')#深圳
# loc = LocaDiv('22.134,113.833,22.564,114.516')#香港
#c ="丽人,生活服务,商务大厦,地产小区,汽车服务,购物,餐饮,宾馆,休闲娱乐,金融,旅游景点,交通设施,教育,医疗,公司企业,美食,酒店,运动健身,教育培训,文化传媒,房地产,政府机构"
c ="丽人"
n =c.split(",")
for item in n:
print item
cate = item.decode('utf-8')
locs_to_use = loc.ls_row()
for loc_to_use in locs_to_use:
'''
YyLVKiRGYcFlmPZHZn7aRo7G4IuGll0K
Vn1ebvMZ1DCw2nj5LSQ738gmOB8AkCce 刘海谊
WwMkKI0MYGXv639HQvunrMOHdN8rnK8H 胡
KWSoddWr4bd1CXh1gLe4GfmIuPKXuDO9 蔡
NQ4u00oIVst9I6sCYhMI9Tql3ssfF3rk 蔡
'''
#api_key = "KWSoddWr4bd1CXh1gLe4GfmIuPKXuDO9"
api_key = "YyLVKiRGYcFlmPZHZn7aRo7G4IuGll0K"
par = BaiDuPOI(api_key,cate,loc_to_use) #请修改这里的参数
a = par.baidu_search()
doc = open('11mark\\'+cate+'.txt','a')
for ax in a:
doc.write(ax)
doc.write('\n')
doc.close
end_time = time.time()
print "数据爬取完毕,用时%.2f秒" % (end_time - start_time)