Skip to content

Commit

Permalink
TODO COMPLETE: Generalize visualization from 2 to N dimensions (and c…
Browse files Browse the repository at this point in the history
…lasses). See @nd_boundary_plot.plots.nd_boundary_plot(), or check out on github.

TODO: Graph more than one dataset.
TODO: Improve visualization
TODO: Run on synthetic 2D data
TODO: Run on gaussian_quartiles src, hastie_10_2 tgt
TODO: Run on digits/reversed digits
TODO: Run on text data like Enron e-mails (need to think about this one)
Signed-off-by: Dainis Boumber <[email protected]>
  • Loading branch information
dainis-boumber committed Oct 27, 2017
1 parent daebee5 commit e62f84e
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 22 deletions.
6 changes: 3 additions & 3 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[submodule "modules/nd-boundary-plot"]
path = modules/nd-boundary-plot
url = https://github.com/dainis-boumber/nd-boundary-plot.git
[submodule "nd_boundary_plot"]
path = nd_boundary_plot
url = https://github.com/dainis-boumber/nd_boundary_plot.git
26 changes: 7 additions & 19 deletions complexity.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import matplotlib.pyplot as plt

import numpy as np
from sklearn.datasets import make_blobs
from sklearn.datasets import make_classification
from sklearn.ensemble import AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC, LinearSVC

import modules.complexity_estimator as ce
import modules.util as u
from modules.oracle import Oracle
from nd_boundary_plot.plots import nd_boundary_plot


################################################################################################33
Expand Down Expand Up @@ -75,22 +76,7 @@ def active(classifiers, src_datasets, tgt_datasets, quota=25, plot_every_n=5):
model.fit(X_known, y_known) # train model with newly-updated Dataset
score = model.score(X_tgt, y_tgt)
ax = plt.subplot2grid(grid_size, (n + 1, w))
if hasattr(model, "decision_function") or len(set(y_known)) != 2:
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
else:
Z = model.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1]

# Put the result into a color plot
Z = Z.reshape(xx.shape)

ax.contourf(xx, yy, Z, alpha=.3)

# Plot also the training points
ax.scatter(X_tgt[:, 0], X_tgt[:, 1], c=y_tgt)
ax.set_xlim(xx.min(), xx.max())
ax.set_ylim(yy.min(), yy.max())
ax.set_xticks(())
ax.set_yticks(())
nd_boundary_plot(X_tgt, model, (x_min, x_max, y_min, y_max), ax)
if i == 0:
ax.set_ylabel(u.classname(model))
if n == 0:
Expand All @@ -106,8 +92,10 @@ def main():
src_datasets = []
tgt_datasets = []

src_datasets.append(make_blobs(n_samples=200, centers=3, cluster_std=3.0))
tgt_datasets.append(make_blobs(n_samples=100, centers=3, cluster_std=5.0))
src_datasets.append(
make_classification(n_features=6, n_classes=3, n_redundant=0, n_informative=4, n_clusters_per_class=1))
tgt_datasets.append(
make_classification(n_features=6, n_classes=3, n_redundant=0, n_informative=4, n_clusters_per_class=1))

active(classifiers=clfs, src_datasets=src_datasets, tgt_datasets=tgt_datasets)
#make_hastie_10_2
Expand Down
1 change: 1 addition & 0 deletions nd_boundary_plot
Submodule nd_boundary_plot added at 838d62

0 comments on commit e62f84e

Please sign in to comment.