-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclustering.py
90 lines (72 loc) · 2.31 KB
/
clustering.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# -*- coding: utf-8 -*-
"""Clustering.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1oHWnSAo_tOYKekEWU-7QTdEgmkI4to2L
"""
!pip install googlemaps
import pandas as pd
import requests
import numpy as np
import urllib.parse
import re
import json
import googlemaps
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
attractions = ['Eiffel Tower','Louvre Museum','Notre Dame Cathedral','Basilica Sacre Coeur','Arc de Triomphe','Disneyland Paris','La Vallee Village','Pantheon','Musee d\'Orsay']
gmaps = googlemaps.Client(key='AIzaSyCab__7ZxRiq-b5OXDkRB-3Z9zpSn_SmsE')
myDict = {}
myDict['location'] = attractions
myDict['lat'] = []
myDict['lon'] = []
arr = []
for add in attractions:
address = add + ' France'
mapurl = 'https://nominatim.openstreetmap.org/search/' + urllib.parse.quote(address) +'?format=json'
response = requests.get(mapurl).json()
try:
lat = response[0]["lat"]
lon = response[0]["lon"]
except:
lat= None
lon= None
myDict['lat'].append(lat)
myDict['lon'].append(lon)
arr.append([lat,lon])
df = pd.DataFrame(myDict)
df['lat'] = df['lat'].astype(float)
df['lon'] = df['lon'].astype(float)
kmeans = KMeans(n_clusters=3, random_state=0).fit(arr)
df['group'] = kmeans.labels_
df.sort_values('group')
def getShortest(start,visited,grp):
temp_lst = [loc for loc in grp['location'] if loc not in visited]
dist_lst = []
for loc in temp_lst:
dist = gmaps.distance_matrix(start,loc)['rows'][0]['elements'][0]
distance = dist['distance']['text']
distance = distance.replace(' km','')
distance = distance.replace(',','')
distance = float(distance)
dist_lst.append((loc,distance))
return sorted(dist_lst, key=lambda x: x[1])[0]
def getPath(i):
grp = df[df['group']==i]
grp = grp.reset_index(drop = True)
if len(grp)>1:
lst = []
start = grp['location'][0]
lst.append(start)
cur = start
tot_dist = 0
for i in range(len(grp)-1):
next,dist_ = getShortest(cur,lst,grp)
lst.append(next)
tot_dist +=dist_
cur = next
return lst,tot_dist
else:
return [grp['location'][0]],0
for i in range(3):
print(getPath(i))