-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathsimul_example.py
69 lines (57 loc) · 2.72 KB
/
simul_example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import sys
import os
## Setting up paths
cur_dir = os.path.dirname(os.path.realpath('simul_example.py'))
path_cython = cur_dir + '/cython_cuturi'
sys.path.insert(0, cur_dir)
sys.path.insert(0, path_cython)
from W_means_class import W_means, simulate_mixture_nc, simulate_mixture_lc
import numpy as np
### Simulation experiments
np.random.seed(1)
d = 10 # dimensions of observations
M = 2000 # number of groups
#N = np.random.choice([50, 100, 150], M) # number of observations in groups
N = 50
### Set Multilevel K-means parameters
K = 5 # number of global barycenters
K_a = 6 # number of atoms in global barycenters (can be list)
k = 4 # number of atoms in local barycenters (can be list)
k_S = 50 # number of atoms in constraint set
n_iter = 10 # number of iterations to run
var = True # wheather to use Gaussian with non-constant variance
## Generating NC model. Z is the data, rest of the output is used to evaluate the fit
print '\nRunning NC simulations'
h_atoms, h_probs, labels_M, atoms_M, g_probs, Z = simulate_mixture_nc(K, K_a, d, N, M, k, var=var)
truth = [h_atoms, h_probs, atoms_M, g_probs]
# Fitting NC
print '\nFitting MWM'
nc_cluster = W_means(K=K, K_a=K_a, k=k, n_iter=n_iter, method='NC', verbose=1).fit(Z)
print 'NC Wasserstain distance to true model is %f' % -nc_cluster.score(truth)
nc_loc_labels = nc_cluster.loc_label(Z)
# Fitting LC
print '\nFitting MWMS'
lc_cluster = W_means(K=K, K_a=K_a, k=k_S, n_iter=n_iter, method='LC', verbose=1).fit(Z)
print 'LC Wasserstain distance to true model is %f' % -lc_cluster.score(truth)
lc_loc_labels = lc_cluster.loc_label(Z)
# Fitting multistage k-means
print '\nFitting 3-stage K-means'
k_cluster = W_means(K=K, K_a=K_a, k=k, n_iter=n_iter, method='3means', verbose=1).fit(Z)
print '3-means Wasserstain distance to true model is %f' % -k_cluster.score(truth)
k_loc_labels = k_cluster.loc_label(Z)
## Generating LC model. Z is the data, rest of the output is used to evaluate the fit
print '\nRunning LC simulations'
h_atoms, h_probs, labels_M, S_set, g_probs, Z = simulate_mixture_lc(K, K_a, d, N, M, k_S, var=var)
truth = [h_atoms, h_probs, M*[S_set], g_probs]
# Fitting NC
print '\nFitting MWM'
nc_cluster = W_means(K=K, K_a=K_a, k=k, n_iter=n_iter, method='NC', verbose=1).fit(Z)
print 'NC Wasserstain distance to true model is %f' % -nc_cluster.score(truth)
# Fitting LC
print '\nFitting MWMS'
lc_cluster = W_means(K=K, K_a=K_a, k=k_S, k_init=k, n_iter=n_iter, method='LC', verbose=1).fit(Z)
print 'LC Wasserstain distance to true model is %f' % -lc_cluster.score(truth)
# Fitting multistage k-means
print '\nFitting 3-stage K-means'
k_cluster = W_means(K=K, K_a=K_a, k=k, n_iter=n_iter, method='3means', verbose=1).fit(Z)
print '3-means Wasserstain distance to true model is %f' % -k_cluster.score(truth)