diff --git a/clustering.py b/clustering.py
index 95e2432..794c633 100644
--- a/clustering.py
+++ b/clustering.py
@@ -6,12 +6,17 @@
 ==========
 Cluster the data after extracting features from them
 """
-
+from __future__ import division
 from optparse import OptionParser
 from pprint import pprint
 from classifier import getAnalysisData
 import numpy as np
 from sklearn.cluster import DBSCAN
+import matplotlib.pyplot as mpl
+from scipy.spatial import distance
+from nltk import cluster
+from nltk.cluster import util
+from nltk.cluster import api
 
 
 def getUserInput():
@@ -73,7 +78,39 @@ def buildColumnData(data):
     n_revlength       = np.array(revlength  )
     n_label           = np.array(label      )
 
-    pprint(n_hasPrivacy)
+    # pprint(n_hasPrivacy)
+    return {
+        'avgrating'      : avgrating,
+        'hasDevEmail'    : hasDevEmail,
+        'hasDevWeb'      : hasDevWeb,
+        'hasPrivacy'     : hasPrivacy,
+        'install'        : install,
+        'price'          : price,
+        'revlength'      : revlength,
+        'label'          : label,
+    }
+
+
+
+
+def clusterer(data):
+    pprint(data)
+
+
+    clusterer = cluster.GAAClusterer(num_clusters=4)
+
+    vectors = []
+    for row in data:
+        for k, v in data[0][0].iteritems():
+            vectors.append(np.array(v))
+
+    clusters = clusterer.cluster(vectors, True)
+
+    print 'Clusterer:', clusterer
+    print 'Clustered:', vectors
+    print 'As:', clusters
+    clusterer.dendrogram().show()
+
 
 
 def main():
@@ -81,7 +118,9 @@ def main():
 
     data = getAnalysisData(userinput)
 
-    datacol = buildColumnData(data)
+    dataframe = buildColumnData(data)
+
+    clusterer(dataframe)
 
 
 
diff --git a/dbscan.pdf b/dbscan.pdf
new file mode 100644
index 0000000..222e558
Binary files /dev/null and b/dbscan.pdf differ