we now build tree faster

dainis-boumber · Jun 22, 2018 · 2e0462e · 2e0462e
1 parent b2bb9e7
commit 2e0462e
Show file tree

Hide file tree

Showing 3 changed files with 11 additions and 8 deletions.
diff --git a/complexity.py b/complexity.py
@@ -50,8 +50,8 @@ def plot_ds(grid_size, loc, X, y, xx, yy, title, seeds=None, colspan=1, rowspan=
 def active(classifiers, datasets, experiments, query_strat, quota=25, plot_every_n=5):
     for dataset_index, ((X_src, y_src), (X_tgt, y_tgt)) in enumerate(datasets):
         u_tgt = [None] * len(X_tgt)
-        est_src = ce.ComplexityEstimator(X_src, y_src, n_windows=10, nK=1)
-        est_tgt = ce.ComplexityEstimator(X_tgt, y_tgt, n_windows=10, nK=1)
+        est_src = ce.ComplexityEstimator(X_src, y_src, n_windows=10, nK=10)
+        est_tgt = ce.ComplexityEstimator(X_tgt, y_tgt, n_windows=10, nK=10)
 
         # Declare Dataset instance, X is the feature, y is the label (None if unlabeled)
         X = np.vstack((X_src, X_tgt))
@@ -152,7 +152,8 @@ def active(classifiers, datasets, experiments, query_strat, quota=25, plot_every
     plt.show()
 
 def main():
-    clfs = [SVC(), GaussianNB(), DecisionTreeClassifier(), MLPClassifier(hidden_layer_sizes=(10,10,10,10,10,10), solver='lbfgs', alpha=2, random_state=1, activation='relu')]
+    #clfs = [SVC(), GaussianNB(), DecisionTreeClassifier(), MLPClassifier(hidden_layer_sizes=(10,10,10,10,10,10), solver='lbfgs', alpha=2, random_state=1, activation='relu')]
+    clfs = [SVC()]
     datasets = []
     experiments = []
     query_strat = 'RandomSampling'

diff --git a/modules/complexity_estimator.py b/modules/complexity_estimator.py
@@ -1,5 +1,5 @@
 import numpy as np
-import scipy
+import scipy.spatial
 import sklearn.metrics as metr
 
 class ComplexityEstimator:
@@ -11,14 +11,16 @@ def __init__(self, X, y, n_windows=10, nK=1):
         self.X = X
         self.y = y
         self.seeds = np.random.random_integers(0, len(X) - 1, n_windows)
-        self.tree = scipy.spatial.cKDTree(X)
+        self.tree = scipy.spatial.cKDTree(X, leafsize=32, compact_nodes=False, balanced_tree=False)
+        print('build done')
         self.labels = set(y)
 
         self.Ks = np.arange(1, len(self.X) + 1, step=nK)  # ckdTree starts counting from 1
         self.Hs = np.zeros(len(self.Ks))
         self.ws = np.ndarray((n_windows, len(self.Ks)))
 
         for i, k in enumerate(self.Ks):
+            print(k)
             for j, seed in enumerate(self.seeds):
                 h = self._H(k=k, seed=seed)
                 self.ws[j, i] = h

diff --git a/modules/mnist.py b/modules/mnist.py
@@ -8,7 +8,7 @@
 # Import data and preprocess 
 mnist = pd.read_csv('./data/mnist.csv') # Using 100 samples only for this test run
 labels = mnist.as_matrix(columns=['label'])
-dataset = mnist.drop('label', axis = 1).as_matrix()
+dataset = mnist.drop('label', axis = 1).values
 dataset[dataset > 0] = 1 # Convert each pixel either 0 for white and 1 for black for better classification
 
 
@@ -24,7 +24,7 @@ def load_mnist():
         index += 1
     X = np.array(X).reshape(rows, -1)
     mnist = pd.DataFrame(X)
-    mnist = mnist.as_matrix()
+    mnist = mnist.values
     y = labels.flatten()
 
     print("Completed with X shape: ", mnist.shape)
@@ -49,7 +49,7 @@ def load_mnist_rotated():
 
     mnist_rotated = pd.DataFrame(X)
     # mnist_rotated.to_csv('./data/mnist_rotated/minst_rotated_21000.csv', index=False, header=False)
-    mnist_rotated = mnist_rotated.as_matrix()
+    mnist_rotated = mnist_rotated.values
 
     y = labels.flatten()
     print("Completed with X shape: ", mnist_rotated.shape)