Skip to content

Commit

Permalink
we now build tree faster
Browse files Browse the repository at this point in the history
  • Loading branch information
dainis-boumber committed Jun 22, 2018
1 parent b2bb9e7 commit 2e0462e
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 8 deletions.
7 changes: 4 additions & 3 deletions complexity.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ def plot_ds(grid_size, loc, X, y, xx, yy, title, seeds=None, colspan=1, rowspan=
def active(classifiers, datasets, experiments, query_strat, quota=25, plot_every_n=5):
for dataset_index, ((X_src, y_src), (X_tgt, y_tgt)) in enumerate(datasets):
u_tgt = [None] * len(X_tgt)
est_src = ce.ComplexityEstimator(X_src, y_src, n_windows=10, nK=1)
est_tgt = ce.ComplexityEstimator(X_tgt, y_tgt, n_windows=10, nK=1)
est_src = ce.ComplexityEstimator(X_src, y_src, n_windows=10, nK=10)
est_tgt = ce.ComplexityEstimator(X_tgt, y_tgt, n_windows=10, nK=10)

# Declare Dataset instance, X is the feature, y is the label (None if unlabeled)
X = np.vstack((X_src, X_tgt))
Expand Down Expand Up @@ -152,7 +152,8 @@ def active(classifiers, datasets, experiments, query_strat, quota=25, plot_every
plt.show()

def main():
clfs = [SVC(), GaussianNB(), DecisionTreeClassifier(), MLPClassifier(hidden_layer_sizes=(10,10,10,10,10,10), solver='lbfgs', alpha=2, random_state=1, activation='relu')]
#clfs = [SVC(), GaussianNB(), DecisionTreeClassifier(), MLPClassifier(hidden_layer_sizes=(10,10,10,10,10,10), solver='lbfgs', alpha=2, random_state=1, activation='relu')]
clfs = [SVC()]
datasets = []
experiments = []
query_strat = 'RandomSampling'
Expand Down
6 changes: 4 additions & 2 deletions modules/complexity_estimator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import numpy as np
import scipy
import scipy.spatial
import sklearn.metrics as metr

class ComplexityEstimator:
Expand All @@ -11,14 +11,16 @@ def __init__(self, X, y, n_windows=10, nK=1):
self.X = X
self.y = y
self.seeds = np.random.random_integers(0, len(X) - 1, n_windows)
self.tree = scipy.spatial.cKDTree(X)
self.tree = scipy.spatial.cKDTree(X, leafsize=32, compact_nodes=False, balanced_tree=False)
print('build done')
self.labels = set(y)

self.Ks = np.arange(1, len(self.X) + 1, step=nK) # ckdTree starts counting from 1
self.Hs = np.zeros(len(self.Ks))
self.ws = np.ndarray((n_windows, len(self.Ks)))

for i, k in enumerate(self.Ks):
print(k)
for j, seed in enumerate(self.seeds):
h = self._H(k=k, seed=seed)
self.ws[j, i] = h
Expand Down
6 changes: 3 additions & 3 deletions modules/mnist.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# Import data and preprocess
mnist = pd.read_csv('./data/mnist.csv') # Using 100 samples only for this test run
labels = mnist.as_matrix(columns=['label'])
dataset = mnist.drop('label', axis = 1).as_matrix()
dataset = mnist.drop('label', axis = 1).values
dataset[dataset > 0] = 1 # Convert each pixel either 0 for white and 1 for black for better classification


Expand All @@ -24,7 +24,7 @@ def load_mnist():
index += 1
X = np.array(X).reshape(rows, -1)
mnist = pd.DataFrame(X)
mnist = mnist.as_matrix()
mnist = mnist.values
y = labels.flatten()

print("Completed with X shape: ", mnist.shape)
Expand All @@ -49,7 +49,7 @@ def load_mnist_rotated():

mnist_rotated = pd.DataFrame(X)
# mnist_rotated.to_csv('./data/mnist_rotated/minst_rotated_21000.csv', index=False, header=False)
mnist_rotated = mnist_rotated.as_matrix()
mnist_rotated = mnist_rotated.values

y = labels.flatten()
print("Completed with X shape: ", mnist_rotated.shape)
Expand Down

0 comments on commit 2e0462e

Please sign in to comment.