CADA Test

dainis-boumber · Aug 24, 2018 · 73d3727 · 73d3727
1 parent 41801f2
commit 73d3727
Show file tree

Hide file tree

Showing 5 changed files with 29 additions and 14 deletions.
diff --git a/complexity.py b/complexity.py
@@ -17,9 +17,11 @@
 from modules.oracle import Oracle
 import modules.complexity_estimator as ce
 from nd_boundary_plot.plots import nd_boundary_plot
+from modules.active_da import CADA
 
 # Data pre-processing and import
-from modules import mnist
+# from modules import mnist
+from modules import mnist 
 
 ####################################################
 
@@ -153,9 +155,21 @@ def active(classifiers, datasets, experiments, query_strat, quota=25, plot_every
     plt.tight_layout()
     plt.show()
 
+def bsda_active(datasets=[], baseline_clf=SVC(), N=100):
+	for ((X_src, y_src), (X_tgt, y_tgt)) in datasets:
+		X_src, y_src = X_src, y_src
+		X_tgt, y_tgt = X_tgt, y_tgt
+
+	CADA_clf = CADA(X_src, y_src)
+	ixs = CADA_clf.query(X_tgt, N)
+	BSDA_X_Train, BSDA_y_Train = X_tgt[ixs], y_tgt[ixs]
+	baseline_clf.fit(BSDA_X_Train, BSDA_y_Train)
+	print(baseline_clf.predict(X_tgt[-ixs]))
+
+
+
 def main():
-    #clfs = [SVC(), GaussianNB(), DecisionTreeClassifier(), MLPClassifier(hidden_layer_sizes=(10,10,10,10,10,10), solver='lbfgs', alpha=2, random_state=1, activation='relu')]
-    clfs = [SVC()]
+    #baseline_clfs = [SVC(), GaussianNB(), DecisionTreeClassifier(), MLPClassifier(hidden_layer_sizes=(10,10,10,10,10,10), solver='lbfgs', alpha=2, random_state=1, activation='relu')]
     datasets = []
     experiments = []
     query_strat = 'RandomSampling'
@@ -168,14 +182,15 @@ def main():
     # experiments.append('moons')
     # datasets.append((u.hastie(1000), u.hastie(1000)))
 
-    # datasets.append((make_gaussian_quantiles(n_samples=2000, n_features=10, n_classes=3),
-    #                 make_gaussian_quantiles(n_samples=2000, n_features=10, n_classes=3)))
+    # datasets.append((make_gaussian_quantiles(n_samples=500, n_features=5, n_classes=3),
+    #                 make_gaussian_quantiles(n_samples=500, n_features=5, n_classes=3)))
     # experiments.append('gauus')
 
     datasets.append((mnist.load_mnist(), mnist.load_mnist_rotated()))
     experiments.append('MNIST_vs_MNIST_Rotated')
 
-    active(classifiers=clfs, datasets=datasets, experiments=experiments, query_strat=query_strat)
+    #baseline_active(classifiers=clfs, datasets=datasets, experiments=experiments, query_strat=query_strat)
+    bsda_active(datasets=datasets)
 
 if __name__ == "__main__":
     main()
diff --git a/modules/active_da.py b/modules/active_da.py
@@ -18,7 +18,7 @@
 
 class CADA(object):
     '''
-    1.  Compute complexity measure on source domain at different levels of locality (size of neighborhood).
+    1. Compute complexity measure on source domain at different levels of locality (size of neighborhood).
     2. Choose the single neighborhood size K* that keeps entropy below a predefined threshold.
     3. Sample N examples from target domain randomly.
     4. Grow a window around each example of size K*.
@@ -53,8 +53,8 @@ def __init__(self, source_X, source_y, max_entropy=0.1, f_samples=0.01, window_g
             stepsize = 1
         self.Ks = np.arange(mink, maxk, step=stepsize)  # ckdTree starts counting from 1
         self.Hs = np.zeros(len(self.Ks))
+        print(self.Hs)
         self.ws = np.zeros((len(self.seeds), len(self.Ks)))
-        self.banned = np.zeros((self.seeds, len(self.Ks)))
         self.K = 0
 
         for i, k in enumerate(self.Ks):

diff --git a/modules/complexity_estimator.py b/modules/complexity_estimator.py
@@ -18,7 +18,7 @@ def __init__(self, X, y, n_windows=10, nK=1):
         self.Ks = np.arange(1, len(self.X) + 1, step=nK)  # ckdTree starts counting from 1
         self.Hs = np.zeros(len(self.Ks))
         self.ws = np.ndarray((n_windows, len(self.Ks)))
-
+        
         for i, k in enumerate(self.Ks):
             print(k)
             for j, seed in enumerate(self.seeds):

diff --git a/modules/mnist.py b/modules/mnist.py
@@ -14,7 +14,7 @@
 
 def load_mnist():
 
-    rows = 42000
+    rows = 100
     columns = 784
     index = 1
     X = []
@@ -30,13 +30,13 @@ def load_mnist():
     print("Completed with X shape: ", mnist.shape)
     print("Flattened y shape: ", y.shape)
 
-    mnist, y = shuffle(X, y, random_state = 5)
+    mnist, y = shuffle(X, y, random_state = 2)
     return mnist, y
 
 
 def load_mnist_rotated():
 
-    rows = 42000
+    rows = 100
     columns = 784
     indx = 1
     X = []
@@ -55,5 +55,5 @@ def load_mnist_rotated():
     print("Completed with X shape: ", mnist_rotated.shape)
     print("Flattened y shape: ", y.shape)
 
-    mnist_rotated, y = shuffle(X, y, random_state = 15)
+    mnist_rotated, y = shuffle(X, y, random_state = 7)
     return mnist_rotated, y
diff --git a/nd_boundary_plot b/nd_boundary_plot