forked from Introtocs/Week9_Lec
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkmeans.py
84 lines (68 loc) · 2.3 KB
/
kmeans.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# -*- coding: utf-8 -*-
"""
Created on Thu Oct 29 16:36:11 2015
@author: zhengzhang
"""
import random
import util
import cluster_student as cluster
import helper
def kmeans_iter(samples, clusters, k):
'''
implement one iteration of kmeans:
- each sample finds the closest cluster by computing
its distance to the centroids of all the clusters
- then compute every cluster's new centroid
- the algorithm converges if cluster's centroid does not
move any more
'''
converged = True
return converged
# Kmeans: take a list of samples and make k clusters
def kmeans(samples, k, verbose):
"""Assumes samples is a list of samples of class Sample,
k is a positive int, verbose is a Boolean
Returns a list containing k clusters. """
#Get k randomly chosen initial centroids
initialCentroids = random.sample(samples, k)
#Create a singleton cluster for each centroid
clusters = []
for e in initialCentroids:
clusters.append(cluster.Cluster([e]))
#Iterate until centroids do not change
converged = False
numIterations = 0
while not converged:
numIterations += 1
# replace the following line by implementing
# kmeans_iter(samples, clusters, k) in this file
converged = helper.kmeans_iter(samples, clusters, k)
#converged = kmeans_iter(samples, clusters, k)
if verbose:
print('Iteration #' + str(numIterations))
for c in clusters:
print(c)
print('\n') #add blank line
return clusters
# one run of kmeans, like:
# kmeansTest(4, False)
def kmeansTest(k=2, n=20, verbose=False):
random.seed(0)
xMean = 3
xSD = 1
yMean = 5
ySD = 1
d1Samples = util.genDistribution(xMean, xSD, yMean, ySD, n, '1.')
d2Samples = util.genDistribution(xMean+3, xSD, yMean+1, ySD, n, '2.')
allSamples = d1Samples + d2Samples
print("before clustering")
util.plot_cluster([cluster.Cluster(allSamples)])
print("after clustering")
clusters = kmeans(allSamples, k, verbose)
util.plot_cluster(clusters, verbose)
print('Final result')
for c in clusters:
print('', c)
if __name__ == "__main__":
random.seed(0)
kmeansTest(k=5)