Skip to content

Commit

Permalink
clustering still work in progress
Browse files Browse the repository at this point in the history
  • Loading branch information
seekshreyas committed Dec 8, 2013
1 parent d477cf2 commit 92a48ce
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 3 deletions.
45 changes: 42 additions & 3 deletions clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,17 @@
==========
Cluster the data after extracting features from them
"""

from __future__ import division
from optparse import OptionParser
from pprint import pprint
from classifier import getAnalysisData
import numpy as np
from sklearn.cluster import DBSCAN
import matplotlib.pyplot as mpl
from scipy.spatial import distance
from nltk import cluster
from nltk.cluster import util
from nltk.cluster import api


def getUserInput():
Expand Down Expand Up @@ -73,15 +78,49 @@ def buildColumnData(data):
n_revlength = np.array(revlength )
n_label = np.array(label )

pprint(n_hasPrivacy)
# pprint(n_hasPrivacy)
return {
'avgrating' : avgrating,
'hasDevEmail' : hasDevEmail,
'hasDevWeb' : hasDevWeb,
'hasPrivacy' : hasPrivacy,
'install' : install,
'price' : price,
'revlength' : revlength,
'label' : label,
}




def clusterer(data):
pprint(data)


clusterer = cluster.GAAClusterer(num_clusters=4)

vectors = []
for row in data:
for k, v in data[0][0].iteritems():
vectors.append(np.array(v))

clusters = clusterer.cluster(vectors, True)

print 'Clusterer:', clusterer
print 'Clustered:', vectors
print 'As:', clusters
clusterer.dendrogram().show()



def main():
userinput = getUserInput()

data = getAnalysisData(userinput)

datacol = buildColumnData(data)
dataframe = buildColumnData(data)

clusterer(dataframe)



Expand Down
Binary file added dbscan.pdf
Binary file not shown.

0 comments on commit 92a48ce

Please sign in to comment.