-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkmeansbasic
31 lines (29 loc) · 1.05 KB
/
kmeansbasic
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#Kmeans
i=data.frame(iris$Petal.Length,iris$Sepal.Length)
x=i
c1=kmeans(i,centers=3,nstart=5)
i$class=c1$cluster
plot(i$iris.Petal.Length,i$iris.Sepal.Length,col=i$class,pch=20)
init=sample(3,nrow(i),replace=TRUE) #initial class assignment
plot(x,col=init)
max.iter=20;threshold=.01;n.iter=1
delta=threshold+1;d0=0
ss=data.frame(matrix(NA,nrow=1,ncol=max.iter))
#Calculate the centre of each subgroup as the average position of all observations is that subgroup
center=t(sapply(unique(init),function(y) colMeans(x[init==y,])))
c0=data.frame(matrix(NA, nrow = nrow(center) , ncol = ncol(center)))
points(center,pch = 10,col=1:3)
while(delta > threshold && n.iter < max.iter && delta!=d0){
c0=center
tmp=dist(rbind(center,x))
tmp=as.matrix(tmp)[-(1:3),1:3]
k.iter=apply(tmp,1,which.min)
center=t(sapply(unique(init),function(y) colMeans(x[k.iter==y,])))
d0=delta
delta=norm(center-c0,"2") #2 norm is essentially the euclidean distance
print(n.iter)
print(delta)
n.iter = n.iter+1
}
plot(x,col=k.iter)
points(center,pch = 8,col=unique(k.iter))