>> import ite + >>> co1 = ite.cost.BASpearmanCondLT() + >>> co2 = ite.cost.BASpearmanCondLT(p=0.4) + + """ + + # initialize with 'InitX': + super().__init__(mult=mult) + + # p: + self.p = p + + def estimation(self, y, ds=None): + """ Estimate multivariate conditional version of Spearman's rho. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + ds : int vector, vector of ones + ds[i] = 1 (for all i): the i^th subspace is one-dimensional. + If ds is not given (ds=None), the vector of ones [ds = + ones(y.shape[1],dtype='int')] is emulated inside the function. + + Returns + ------- + a : float + Estimated multivariate conditional version of Spearman's rho. + + References + ---------- + Friedrich Schmid and Rafael Schmidt. Multivariate conditional + versions of Spearman's rho and related measures of tail dependence. + Journal of Multivariate Analysis, 98:1123-1140, 2007. + + C. Spearman. The proof and measurement of association between two + things. The American Journal of Psychology, 15:72-101, 1904. + + Examples + -------- + a = co.estimation(y,ds) + + """ + + if ds is None: # emulate 'ds = vector of ones' + ds = ones(y.shape[1], dtype='int') + + # verification: + self.verification_compatible_subspace_dimensions(y, ds) + self.verification_one_dimensional_subspaces(ds) + + num_of_samples, dim = y.shape # number of samples, dimension + u = copula_transformation(y) + c1 = (self.p**2 / 2)**dim + c2 = self.p**(dim + 1) / (dim + 1) + + a = (mean(prod(maximum(self.p - u, 0), axis=1)) - c1) / (c2 - c1) + + return a + + +class BASpearmanCondUT(InitX, VerOneDSubspaces, VerCompSubspaceDims): + """ Estimate multivariate conditional version of Spearman's rho. + + The measure weights the upper tail of the copula. + + Partial initialization comes from 'InitX'; verification capabilities + are inherited from 'VerOneDSubspaces' and 'VerCompSubspaceDims' (see + 'ite.cost.x_initialization.py', 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, p=0.5): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + p : float, 0
>> import ite + >>> co1 = ite.cost.BASpearmanCondUT() + >>> co2 = ite.cost.BASpearmanCondUT(p=0.4) + + """ + + # initialize with 'InitX': + super().__init__(mult=mult) + + # p: + self.p = p + + def estimation(self, y, ds=None): + """ Estimate multivariate conditional version of Spearman's rho. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + ds : int vector, vector of ones + ds[i] = 1 (for all i): the i^th subspace is one-dimensional. + If ds is not given (ds=None), the vector of ones [ds = + ones(y.shape[1],dtype='int')] is emulated inside the function. + + Returns + ------- + a : float + Estimated multivariate conditional version of Spearman's rho. + + References + ---------- + Friedrich Schmid and Rafael Schmidt. Multivariate conditional + versions of Spearman's rho and related measures of tail + dependence. Journal of Multivariate Analysis, 98:1123-1140, 2007. + + C. Spearman. The proof and measurement of association between two + things. The American Journal of Psychology, 15:72-101, 1904. + + Examples + -------- + a = co.estimation(y,ds) + + """ + + if ds is None: # emulate 'ds = vector of ones' + ds = ones(y.shape[1], dtype='int') + + # verification: + self.verification_compatible_subspace_dimensions(y, ds) + self.verification_one_dimensional_subspaces(ds) + + num_of_samples, dim = y.shape # number of samples, dimension + u = copula_transformation(y) + + c = mean(prod(1 - maximum(u, 1 - self.p), axis=1)) + c1 = (self.p * (2 - self.p) / 2)**dim + c2 = self.p**dim * (dim + 1 - self.p * dim) / (dim + 1) + + a = (c - c1) / (c2 - c1) + + return a + + +class BABlomqvist(InitX, VerOneDSubspaces, VerCompSubspaceDims): + """ Estimator of the multivariate extension of Blomqvist's beta. + + Blomqvist's beta is also known as the medial correlation coefficient. + + Initialization is inherited from 'InitX', verification capabilities + come from 'VerOneDSubspaces' and 'VerCompSubspaceDims' + ('ite.cost.x_classes.py'). + + Initialization is inherited from 'InitX', verification capabilities + come from 'VerOneDSubspaces' and 'VerCompSubspaceDims' (see + 'ite.cost.x_initialization.py', 'ite.cost.x_verification.py'). + + Examples + -------- + >>> import ite + >>> co = ite.cost.BABlomqvist() + + """ + + def estimation(self, y, ds=None): + """ Estimate multivariate extension of Blomqvist's beta. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + ds : int vector, vector of ones + ds[i] = 1 (for all i): the i^th subspace is one-dimensional. + If ds is not given (ds=None), the vector of ones [ds = + ones(y.shape[1],dtype='int')] is emulated inside the function. + + Returns + ------- + a : float + Estimated multivariate extension of Blomqvist's beta. + + References + ---------- + Friedrich Schmid, Rafael Schmidt, Thomas Blumentritt, Sandra + Gaiser, and Martin Ruppert. Copula Theory and Its Applications, + Chapter Copula based Measures of Multivariate Association. Lecture + Notes in Statistics. Springer, 2010. (multidimensional case, + len(ds)>=2) + + Manuel Ubeda-Flores. Multivariate versions of Blomqvist's beta and + Spearman's footrule. Annals of the Institute of Statistical + Mathematics, 57:781-788, 2005. + + Nils Blomqvist. On a measure of dependence between two random + variables. The Annals of Mathematical Statistics, 21:593-600, 1950. + (2D case, statistical properties) + + Frederick Mosteller. On some useful ''inefficient'' statistics. + Annals of Mathematical Statistics, 17:377--408, 1946. (2D case, + def) + + + Examples + -------- + a = co.estimation(y,ds) + + """ + + if ds is None: # emulate 'ds = vector of ones' + ds = ones(y.shape[1], dtype='int') + + # verification: + self.verification_compatible_subspace_dimensions(y, ds) + self.verification_one_dimensional_subspaces(ds) + + num_of_samples, dim = y.shape # number of samples, dimension + u = copula_transformation(y) + + h = 2**(dim - 1) / (2**(dim - 1) - 1) # h(dim) + c1 = mean(all(u <= 1/2, axis=1)) # C(1/2) + c2 = mean(all(u > 1/2, axis=1)) # \bar{C}(1/2) + a = h * (c1 + c2 - 2**(1 - dim)) + + return a diff --git a/ite-in-python/ite/cost/base_c.py b/ite-in-python/ite/cost/base_c.py new file mode 100644 index 0000000..6a621d0 --- /dev/null +++ b/ite-in-python/ite/cost/base_c.py @@ -0,0 +1,66 @@ +""" Base cross-quantity estimators. """ + +from scipy.special import psi +from numpy import mean, log + +from ite.cost.x_initialization import InitKnnK +from ite.cost.x_verification import VerEqualDSubspaces +from ite.shared import volume_of_the_unit_ball, knn_distances + + +class BCCE_KnnK(InitKnnK, VerEqualDSubspaces): + """ Cross-entropy estimator using the kNN method (S={k}) + + Initialization is inherited from 'InitKnnK', verification comes from + 'VerEqualDSubspaces' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.BCCE_KnnK() + >>> co2 = ite.cost.BCCE_KnnK(knn_method='cKDTree', k=4, eps=0.1) + >>> co3 = ite.cost.BCCE_KnnK(k=4) + + """ + + def estimation(self, y1, y2): + """ Estimate cross-entropy. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + c : float + Estimated cross-entropy. + + References + ---------- + Nikolai Leonenko, Luc Pronzato, and Vippal Savani. A class of + Renyi information estimators for multidimensional densities. + Annals of Statistics, 36(5):2153-2182, 2008. + + Examples + -------- + c = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + num_of_samples2, dim = y2.shape # number of samples, dimension + + # computation: + v = volume_of_the_unit_ball(dim) + distances_y2y1 = knn_distances(y2, y1, False, self.knn_method, + self.k, self.eps, 2)[0] + c = log(v) + log(num_of_samples2) - psi(self.k) + \ + dim * mean(log(distances_y2y1[:, -1])) + + return c diff --git a/ite-in-python/ite/cost/base_d.py b/ite-in-python/ite/cost/base_d.py new file mode 100644 index 0000000..8a11042 --- /dev/null +++ b/ite-in-python/ite/cost/base_d.py @@ -0,0 +1,1350 @@ +""" Base divergence estimators. """ + +from numpy import mean, log, absolute, sqrt, floor, sum, arange, vstack, \ + dot, abs +from scipy.spatial.distance import cdist, pdist + +from ite.cost.x_initialization import InitX, InitKnnK, InitKnnKiTi, \ + InitKnnKAlpha, InitKnnKAlphaBeta, \ + InitKernel, InitEtaKernel +from ite.cost.x_verification import VerEqualDSubspaces, \ + VerEqualSampleNumbers, \ + VerEvenSampleNumbers + +from ite.shared import knn_distances, estimate_d_temp2, estimate_i_alpha,\ + estimate_d_temp3, volume_of_the_unit_ball,\ + estimate_d_temp1 + + +class BDKL_KnnK(InitKnnK, VerEqualDSubspaces): + """ Kullback-Leibler divergence estimator using the kNN method (S={k}). + + Initialization is inherited from 'InitKnnK', verification comes from + 'VerEqualDSubspaces' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.BDKL_KnnK() + >>> co2 = ite.cost.BDKL_KnnK(knn_method='cKDTree', k=5, eps=0.1) + >>> co3 = ite.cost.BDKL_KnnK(k=4) + + """ + + def estimation(self, y1, y2): + """ Estimate KL divergence. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + d : float + Estimated KL divergence. + + References + ---------- + Fernando Perez-Cruz. Estimation of Information Theoretic Measures + for Continuous Random Variables. Advances in Neural Information + Processing Systems (NIPS), pp. 1257-1264, 2008. + + Nikolai Leonenko, Luc Pronzato, and Vippal Savani. A class of + Renyi information estimators for multidimensional densities. + Annals of Statistics, 36(5):2153-2182, 2008. + + Quing Wang, Sanjeev R. Kulkarni, and Sergio Verdu. Divergence + estimation for multidimensional densities via k-nearest-neighbor + distances. IEEE Transactions on Information Theory, 55:2392-2405, + 2009. + + Examples + -------- + d = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + # sizes: + num_of_samples1, dim = y1.shape + num_of_samples2 = y2.shape[0] + + # computation: + distances_y1y1 = knn_distances(y1, y1, True, self.knn_method, + self.k, self.eps, 2)[0] + distances_y2y1 = knn_distances(y2, y1, False, self.knn_method, + self.k, self.eps, 2)[0] + d = dim * mean(log(distances_y2y1[:, -1] / + distances_y1y1[:, -1])) + \ + log(num_of_samples2/(num_of_samples1-1)) + + return d + + +class BDEnergyDist(InitX, VerEqualDSubspaces): + """ Energy distance estimator using pairwise distances of the samples. + + Initialization is inherited from 'InitX', verification comes from + 'VerEqualDSubspaces' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + Examples + -------- + >>> import ite + >>> co = ite.cost.BDEnergyDist() + + """ + + def estimation(self, y1, y2): + """ Estimate energy distance. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + d : float + Estimated energy distance. + + References + ---------- + Gabor J. Szekely and Maria L. Rizzo. A new test for multivariate + normality. Journal of Multivariate Analysis, 93:58-80, 2005. + (metric space of negative type) + + Gabor J. Szekely and Maria L. Rizzo. Testing for equal + distributions in high dimension. InterStat, 5, 2004. (R^d) + + Ludwig Baringhaus and C. Franz. On a new multivariate + two-sample test. Journal of Multivariate Analysis, 88, 190-206, + 2004. (R^d) + + Lev Klebanov. N-Distances and Their Applications. Charles + University, Prague, 2005. (N-distance) + + A. A. Zinger and A. V. Kakosyan and L. B. Klebanov. A + characterization of distributions by mean values of statistics + and certain probabilistic metrics. Journal of Soviet + Mathematics, 1992 (N-distance, general case). + + Examples + -------- + d = co.estimation(y1, y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + # Euclidean distances: + num_of_samples1, num_of_samples2 = y1.shape[0], y2.shape[0] + mean_dist_y1y1 = 2 * sum(pdist(y1)) / num_of_samples1**2 + mean_dist_y2y2 = 2 * sum(pdist(y2)) / num_of_samples2**2 + mean_dist_y1y2 = mean(cdist(y1, y2)) + + d = 2 * mean_dist_y1y2 - mean_dist_y1y1 - mean_dist_y2y2 + + return d + + +class BDBhattacharyya_KnnK(InitKnnK, VerEqualDSubspaces): + """ Bhattacharyya distance estimator using the kNN method (S={k}). + + Partial initialization comes from 'InitKnnK', verification is + inherited from 'VerEqualDSubspaces' (see + 'ite.cost.x_initialization.py', 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, knn_method='cKDTree', k=3, eps=0, + pxdx=True): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + knn_method : str, optional + kNN computation method; 'cKDTree' or 'KDTree'. + k : int, >= 1, optional + k-nearest neighbors (default is 3). + eps : float, >= 0 + The k^th returned value is guaranteed to be no further than + (1+eps) times the distance to the real kNN (default is 0). + pxdx : boolean, optional + If pxdx == True, then we rewrite the Bhattacharyya distance + as \int p^{1/2}(x)q^{1/2}(x)dx = \int p^{-1/2}(x)q^{1/2}(x) + p(x)dx. [p(x)dx] Else, the Bhattacharyya distance is + rewritten as \int p^{1/2}(x)q^{1/2}(x)dx = + \int q^{-1/2}(x)p^{1/2}(x) q(x)dx. [q(x)dx] + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.BDBhattacharyya_KnnK() + >>> co2 = ite.cost.BDBhattacharyya_KnnK(k=4) + + """ + + # initialize with 'InitKnnK': + super().__init__(mult=mult, knn_method=knn_method, k=k, eps=eps) + + # other attributes (pxdx,_a,_b): + self.pxdx, self._a, self._b = pxdx, -1/2, 1/2 + + def estimation(self, y1, y2): + """ Estimate Bhattacharyya distance. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + d : float + Estimated Bhattacharyya distance. + + References + ---------- + Barnabas Poczos and Liang Xiong and Dougal Sutherland and Jeff + Schneider. Support Distribution Machines. Technical Report, 2012. + "http://arxiv.org/abs/1202.0302" (estimation of d_temp2) + + Examples + -------- + d = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + if self.pxdx: + d_ab = estimate_d_temp2(y1, y2, self) + else: + d_ab = estimate_d_temp2(y2, y1, self) + # absolute() to avoid possible 'log(negative)' values due to the + # finite number of samples: + d = -log(absolute(d_ab)) + + return d + + +class BDBregman_KnnK(InitKnnKAlpha, VerEqualDSubspaces): + """ Bregman distance estimator using the kNN method (S={k}). + + Initialization comes from 'InitKnnKAlpha', verification is inherited + from 'VerEqualDSubspaces' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.BDBregman_KnnK() + >>> co2 = ite.cost.BDBregman_KnnK(alpha=0.9, k=5, eps=0.1) + + """ + + def estimation(self, y1, y2): + """ Estimate Bregman distance. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + d : float + Estimated Bregman distance. + + References + ---------- + Nikolai Leonenko, Luc Pronzato, and Vippal Savani. A class of + Renyi information estimators for multidimensional densities. + Annals of Statistics, 36(5):2153-2182, 2008. + + Imre Csiszar. Generalized projections for non-negative functions. + Acta Mathematica Hungarica, 68:161-185, 1995. + + Lev M. Bregman. The relaxation method of finding the common points + of convex sets and its application to the solution of problems in + convex programming. USSR Computational Mathematics and + Mathematical Physics, 7:200-217, 1967. + + Examples + -------- + d = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + i_alpha_y1 = estimate_i_alpha(y1, self) + i_alpha_y2 = estimate_i_alpha(y2, self) + d_temp3 = estimate_d_temp3(y1, y2, self) + + d = i_alpha_y2 + i_alpha_y1 / (self.alpha - 1) -\ + self.alpha / (self.alpha - 1) * d_temp3 + + return d + + +class BDChi2_KnnK(InitKnnK, VerEqualDSubspaces): + """ Chi-square distance estimator using the kNN method (S={k}). + + Partial initialization comes from 'InitKnnK', verification is + inherited from 'VerEqualDSubspaces' (see + 'ite.cost.x_initialization.py', 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, knn_method='cKDTree', k=3, eps=0, + pxdx=True): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + knn_method : str, optional + kNN computation method; 'cKDTree' or 'KDTree'. + k : int, >= 1, optional + k-nearest neighbors (default is 3). + eps : float, >= 0 + The k^th returned value is guaranteed to be no further than + (1+eps) times the distance to the real kNN (default is 0). + pxdx : boolean, optional + If pxdx == True, then we rewrite the Pearson chi-square + divergence as \int p^2(x)q^{-1}(x)dx - 1 = + \int p^1(x)q^{-1}(x) p(x)dx - 1. [p(x)dx] + Else, the Pearson chi-square divergence is rewritten as + \int p^2(x)q^{-1}(x)dx - 1= \int q^{-2}(x)p^2(x) q(x)dx -1. + [q(x)dx] + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.BDChi2_KnnK() + >>> co2 = ite.cost.BDChi2_KnnK(k=4) + + """ + + # initialize with 'InitKnnK': + super().__init__(mult=mult, knn_method=knn_method, k=k, eps=eps) + + # other attributes (pxdx,_a,_b): + self.pxdx = pxdx + if pxdx: + self._a, self._b = 1, -1 + else: + self._a, self._b = -2, 2 + + def estimation(self, y1, y2): + """ Estimate Pearson chi-square divergence. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + d : float + Estimated Pearson chi-square divergence. + + References + ---------- + Barnabas Poczos, Liang Xiong, Dougal Sutherland, and Jeff + Schneider. Support distribution machines. Technical Report, + Carnegie Mellon University, 2012. http://arxiv.org/abs/1202.0302. + (estimation of d_temp2) + + Karl Pearson. On the criterion that a given system of deviations + from the probable in the case of correlated system of variables is + such that it can be reasonable supposed to have arisen from random + sampling. Philosophical Magazine Series, 50:157-172, 1900. + + Examples + -------- + d = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + if self.pxdx: + d = estimate_d_temp2(y1, y2, self) - 1 + else: + d = estimate_d_temp2(y2, y1, self) - 1 + + return d + + +class BDHellinger_KnnK(InitKnnK, VerEqualDSubspaces): + """ Hellinger distance estimator using the kNN method (S={k}). + + Partial initialization comes from 'InitKnnK', verification is + inherited from 'VerEqualDSubspaces' (see + 'ite.cost.x_initialization.py', 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, knn_method='cKDTree', k=3, eps=0, + pxdx=True): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + knn_method : str, optional + kNN computation method; 'cKDTree' or 'KDTree'. + k : int, >= 1, optional + k-nearest neighbors (default is 3). + eps : float, >= 0 + The k^th returned value is guaranteed to be no further than + (1+eps) times the distance to the real kNN (default is 0). + pxdx : boolean, optional + If pxdx == True, then we rewrite the Pearson chi-square + divergence as \int p^{1/2}(x)q^{1/2}(x)dx = + \int p^{-1/2}(x)q^{1/2}(x) p(x)dx. [p(x)dx] + Else, the Pearson chi-square divergence is rewritten as + \int p^{1/2}(x)q^{1/2}(x)dx = + \int q^{-1/2}(x)p^{1/2}(x) q(x)dx. [q(x)dx] + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.BDHellinger_KnnK() + >>> co2 = ite.cost.BDHellinger_KnnK(k=4) + + """ + + # initialize with 'InitKnnK': + super().__init__(mult=mult, knn_method=knn_method, k=k, eps=eps) + + # other attributes (pxdx,_a,_b): + self.pxdx, self._a, self._b = pxdx, -1/2, 1/2 + + def estimation(self, y1, y2): + """ Estimate Hellinger distance. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + d : float + Estimated Hellinger distance. + + References + ---------- + Barnabas Poczos, Liang Xiong, Dougal Sutherland, and Jeff + Schneider. Support distribution machines. Technical Report, + Carnegie Mellon University, 2012. http://arxiv.org/abs/1202.0302. + (estimation of d_temp2) + + Examples + -------- + d = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + # D_ab (Bhattacharyya coefficient): + if self.pxdx: + d_ab = estimate_d_temp2(y1, y2, self) + else: + d_ab = estimate_d_temp2(y2, y1, self) + # absolute() to avoid possible 'sqrt(negative)' values due to the + # finite number of samples: + d = sqrt(absolute(1 - d_ab)) + + return d + + +class BDKL_KnnKiTi(InitKnnKiTi, VerEqualDSubspaces): + """ Kullback-Leibler divergence estimator using the kNN method. + + In the kNN method: S_1={k_1}, S_2={k_2}; ki-s depend on the number of + samples. + + Initialization is inherited from 'InitKnnKiTi', verification comes + from 'VerEqualDSubspaces' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.BDKL_KnnKiTi() + >>> co2 = ite.cost.BDKL_KnnKiTi(knn_method='cKDTree', eps=0.1) + + """ + + def estimation(self, y1, y2): + """ Estimate KL divergence. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + d : float + Estimated KL divergence. + + References + ---------- + Quing Wang, Sanjeev R. Kulkarni, and Sergio Verdu. Divergence + estimation for multidimensional densities via k-nearest-neighbor + distances. IEEE Transactions on Information Theory, 55:2392-2405, + 2009. + + Examples + -------- + d = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + # sizes: + num_of_samples1, dim = y1.shape + num_of_samples2 = y2.shape[0] + + # ki-s depend on the number of samples: + k1 = int(floor(sqrt(num_of_samples1))) + k2 = int(floor(sqrt(num_of_samples2))) + + # computation: + dist_k1_y1y1 = knn_distances(y1, y1, True, self.knn_method, k1, + self.eps, 2)[0] + dist_k2_y2y1 = knn_distances(y2, y1, False, self.knn_method, k2, + self.eps, 2)[0] + + d = dim * mean(log(dist_k2_y2y1[:, -1] / dist_k1_y1y1[:, -1])) +\ + log(k1 / k2 * num_of_samples2 / (num_of_samples1 - 1)) + + return d + + +class BDL2_KnnK(InitKnnK, VerEqualDSubspaces): + """ L2 divergence estimator using the kNN method (S={k}). + + Initialization is inherited from 'InitKnnK', verification comes from + 'VerEqualDSubspaces' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.BDL2_KnnK() + >>> co2 = ite.cost.BDL2_KnnK(knn_method='cKDTree', k=5, eps=0.1) + >>> co3 = ite.cost.BDL2_KnnK(k=4) + + """ + + def estimation(self, y1, y2): + """ Estimate L2 divergence. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + d : float + Estimated L2 divergence. + + References + ---------- + Barnabas Poczos, Zoltan Szabo, Jeff Schneider. Nonparametric + divergence estimators for Independent Subspace Analysis. European + Signal Processing Conference (EUSIPCO), pages 1849-1853, 2011. + + Barnabas Poczos, Liang Xiong, Jeff Schneider. Nonparametric + Divergence: Estimation with Applications to Machine Learning on + Distributions. Uncertainty in Artificial Intelligence (UAI), 2011. + + Barnabas Poczos and Jeff Schneider. On the Estimation of + alpha-Divergences. International Conference on Artificial + Intelligence and Statistics (AISTATS), pages 609-617, 2011. + + Examples + -------- + d = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + # sizes: + num_of_samples1, dim = y1.shape + num_of_samples2 = y2.shape[0] + + c = volume_of_the_unit_ball(dim) + dist_k_y1y1 = knn_distances(y1, y1, True, self.knn_method, self.k, + self.eps, 2)[0][:, -1] + dist_k_y2y1 = knn_distances(y2, y1, False, self.knn_method, self.k, + self.eps, 2)[0][:, -1] + + term1 = \ + mean(dist_k_y1y1**(-dim)) * (self.k - 1) /\ + ((num_of_samples1 - 1) * c) + term2 = \ + mean(dist_k_y2y1**(-dim)) * 2 * (self.k - 1) /\ + (num_of_samples2 * c) + term3 = \ + mean((dist_k_y1y1**dim) / (dist_k_y2y1**(2 * dim))) *\ + (num_of_samples1 - 1) * (self.k - 2) * (self.k - 1) /\ + (num_of_samples2**2 * c * self.k) + l2 = term1 - term2 + term3 + # absolute() to avoid possible 'sqrt(negative)' values due to the + # finite number of samples: + d = sqrt(absolute(l2)) + + return d + + +class BDRenyi_KnnK(InitKnnKAlpha, VerEqualDSubspaces): + """ Renyi divergence estimator using the kNN method (S={k}). + + Initialization comes from 'InitKnnKAlpha', verification is inherited + from 'VerEqualDSubspaces' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + The Renyi divergence (D_{R,alpha}) equals to the Kullback-Leibler + divergence (D) in limit: D_{R,alpha} -> D, as alpha -> 1. + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.BDRenyi_KnnK() + >>> co2 = ite.cost.BDRenyi_KnnK(alpha=0.9, k=5, eps=0.1) + + """ + + def estimation(self, y1, y2): + """ Estimate Renyi divergence. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + d : float + Estimated Renyi divergence. + + References + ---------- + Barnabas Poczos, Zoltan Szabo, Jeff Schneider. Nonparametric + divergence estimators for Independent Subspace Analysis. European + Signal Processing Conference (EUSIPCO), pages 1849-1853, 2011. + + Barnabas Poczos, Jeff Schneider. On the Estimation of + alpha-Divergences. International conference on Artificial + Intelligence and Statistics (AISTATS), pages 609-617, 2011. + + Barnabas Poczos, Liang Xiong, Jeff Schneider. Nonparametric + Divergence: Estimation with Applications to Machine Learning on + Distributions. Uncertainty in Artificial Intelligence (UAI), 2011. + + Examples + -------- + d = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + d_temp1 = estimate_d_temp1(y1, y2, self) + d = log(d_temp1) / (self.alpha - 1) + return d + + +class BDTsallis_KnnK(InitKnnKAlpha, VerEqualDSubspaces): + """ Tsallis divergence estimator using the kNN method (S={k}). + + Initialization comes from 'InitKnnKAlpha', verification is inherited + from 'VerEqualDSubspaces' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + The Tsallis divergence (D_{T,alpha}) equals to the Kullback-Leibler + divergence (D) in limit: D_{T,alpha} -> D, as alpha -> 1. + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.BDTsallis_KnnK() + >>> co2 = ite.cost.BDTsallis_KnnK(alpha=0.9, k=5, eps=0.1) + + """ + + def estimation(self, y1, y2): + """ Estimate Tsallis divergence. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + d : float + Estimated Tsallis divergence. + + References + ---------- + Barnabas Poczos, Zoltan Szabo, Jeff Schneider. Nonparametric + divergence estimators for Independent Subspace Analysis. European + Signal Processing Conference (EUSIPCO), pages 1849-1853, 2011. + + Barnabas Poczos, Jeff Schneider. On the Estimation of + alpha-Divergences. International conference on Artificial + Intelligence and Statistics (AISTATS), pages 609-617, 2011. + + Examples + -------- + d = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + d_temp1 = estimate_d_temp1(y1, y2, self) + d = (d_temp1 - 1) / (self.alpha - 1) + + return d + + +class BDSharmaMittal_KnnK(InitKnnKAlphaBeta, VerEqualDSubspaces): + """ Sharma-Mittal divergence estimator using the kNN method (S={k}). + + Initialization comes from 'InitKnnKAlphaBeta', verification is + inherited from 'VerEqualDSubspaces' (see + 'ite.cost.x_initialization.py', 'ite.cost.x_verification.py'). + + The Sharma-Mittal divergence (D_{SM,alpha,beta}) equals to the + 1)Tsallis divergence (D_{T,alpha}): D_{SM,alpha,beta} = D_{T,alpha}, + if alpha = beta. + 2)Kullback-Leibler divergence (D): D_{SM,alpha,beta} -> D, as + (alpha,beta) -> (1,1). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.BDSharmaMittal_KnnK() + >>> co2 = ite.cost.BDSharmaMittal_KnnK(alpha=0.9, beta=0.7, k=5,\ + eps=0.1) + + """ + + def estimation(self, y1, y2): + """ Estimate Sharma-Mittal divergence. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + d : float + Estimated Sharma-Mittal divergence. + + References + ---------- + Barnabas Poczos, Zoltan Szabo, Jeff Schneider. Nonparametric + divergence estimators for Independent Subspace Analysis. European + Signal Processing Conference (EUSIPCO), pages 1849-1853, 2011. + + Barnabas Poczos, Jeff Schneider. On the Estimation of + alpha-Divergences. International conference on Artificial + Intelligence and Statistics (AISTATS), pages 609-617, 2011. + + Marco Massi. A step beyond Tsallis and Renyi entropies. Physics + Letters A, 338:217-224, 2005. (Sharma-Mittal divergence definition) + + Examples + -------- + d = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + d_temp1 = estimate_d_temp1(y1, y2, self) + + d = (d_temp1**((1 - self.beta) / (1 - self.alpha)) - 1) /\ + (self.beta - 1) + + return d + + +class BDSymBregman_KnnK(InitKnnKAlpha, VerEqualDSubspaces): + """ Symmetric Bregman distance estimator using the kNN method (S={k}). + + Initialization comes from 'InitKnnKAlpha', verification is inherited + from 'VerEqualDSubspaces' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.BDSymBregman_KnnK() + >>> co2 = ite.cost.BDSymBregman_KnnK(alpha=0.9, k=5, eps=0.1) + + """ + + def estimation(self, y1, y2): + """ Estimate symmetric Bregman distance. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + d : float + Estimated symmetric Bregman distance. + + References + ---------- + Nikolai Leonenko, Luc Pronzato, and Vippal Savani. A class of + Renyi information estimators for multidimensional densities. + Annals of Statistics, 36(5):2153-2182, 2008. + + Imre Csiszar. Generalized projections for non-negative functions. + Acta Mathematica Hungarica, 68:161-185, 1995. + + Lev M. Bregman. The relaxation method of finding the common points + of convex sets and its application to the solution of problems in + convex programming. USSR Computational Mathematics and + Mathematical Physics, 7:200-217, 1967. + + Examples + -------- + d = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + i_alpha_y1 = estimate_i_alpha(y1, self) + i_alpha_y2 = estimate_i_alpha(y2, self) + + d_temp3_y1y2 = estimate_d_temp3(y1, y2, self) + d_temp3_y2y1 = estimate_d_temp3(y2, y1, self) + + d = (i_alpha_y1 + i_alpha_y2 - d_temp3_y1y2 - d_temp3_y2y1) /\ + (self.alpha - 1) + + return d + + +class BDMMD_UStat(InitKernel, VerEqualDSubspaces): + """ MMD (maximum mean discrepancy) estimator applying U-statistic. + + Initialization comes from 'InitKernel', verification is inherited from + 'VerEqualDSubspaces' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + Examples + -------- + >>> import ite + >>> from ite.cost.x_kernel import Kernel + >>> co1 = ite.cost.BDMMD_UStat() + >>> k2 = Kernel({'name': 'RBF','sigma': 1}) + >>> co2 = ite.cost.BDMMD_UStat(kernel=k2) + >>> k3 = Kernel({'name': 'exponential','sigma': 1}) + >>> co3 = ite.cost.BDMMD_UStat(kernel=k3) + >>> k4 = Kernel({'name': 'Cauchy','sigma': 1}) + >>> co4 = ite.cost.BDMMD_UStat(kernel=k4) + >>> k5 = Kernel({'name': 'student','d': 1}) + >>> co5 = ite.cost.BDMMD_UStat(kernel=k5) + >>> k6 = Kernel({'name': 'Matern3p2','l': 1}) + >>> co6 = ite.cost.BDMMD_UStat(kernel=k6) + >>> k7 = Kernel({'name': 'Matern5p2','l': 1}) + >>> co7 = ite.cost.BDMMD_UStat(kernel=k7) + >>> k8 = Kernel({'name': 'polynomial','exponent':2,'c': 1}) + >>> co8 = ite.cost.BDMMD_UStat(kernel=k8) + >>> k9 = Kernel({'name': 'ratquadr','c': 1}) + >>> co9 = ite.cost.BDMMD_UStat(kernel=k9) + >>> k10 = Kernel({'name': 'invmquadr','c': 1}) + >>> co10 = ite.cost.BDMMD_UStat(kernel=k10) + + """ + + def estimation(self, y1, y2): + """ Estimate MMD. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + d : float + Estimated value of MMD. + + References + ---------- + Arthur Gretton, Karsten M. Borgwardt, Malte J. Rasch, Bernhard + Scholkopf and Alexander Smola. A Kernel Two-Sample Test. Journal + of Machine Learning Research 13 (2012) 723-773. + + Alain Berlinet and Christine Thomas-Agnan. Reproducing Kernel + Hilbert Spaces in Probability and Statistics. Kluwer, 2004. (mean + embedding) + + Examples + -------- + d = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + num_of_samples1, num_of_samples2 = y1.shape[0], y2.shape[0] + + kernel = self.kernel + ky1y1 = kernel.gram_matrix1(y1) + ky2y2 = kernel.gram_matrix1(y2) + ky1y2 = kernel.gram_matrix2(y1, y2) + + # make the diagonal zero in ky1y1 and ky2y2: + ky1y1[arange(num_of_samples1), arange(num_of_samples1)] = 0 + ky2y2[arange(num_of_samples2), arange(num_of_samples2)] = 0 + + term1 = sum(ky1y1) / (num_of_samples1 * (num_of_samples1-1)) + term2 = sum(ky2y2) / (num_of_samples2 * (num_of_samples2-1)) + term3 = -2 * sum(ky1y2) / (num_of_samples1 * num_of_samples2) + + # absolute(): to avoid 'sqrt(negative)' values: + d = sqrt(absolute(term1 + term2 + term3)) + + return d + + +class BDMMD_VStat(InitKernel, VerEqualDSubspaces): + """ MMD (maximum mean discrepancy) estimator applying V-statistic. + + Initialization comes from 'InitKernel', verification is inherited from + 'VerEqualDSubspaces' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + Examples + -------- + >>> import ite + >>> from ite.cost.x_kernel import Kernel + >>> co1 = ite.cost.BDMMD_VStat() + >>> k2 = Kernel({'name': 'RBF','sigma': 1}) + >>> co2 = ite.cost.BDMMD_VStat(kernel=k2) + >>> k3 = Kernel({'name': 'exponential','sigma': 1}) + >>> co3 = ite.cost.BDMMD_VStat(kernel=k3) + >>> k4 = Kernel({'name': 'Cauchy','sigma': 1}) + >>> co4 = ite.cost.BDMMD_VStat(kernel=k4) + >>> k5 = Kernel({'name': 'student','d': 1}) + >>> co5 = ite.cost.BDMMD_VStat(kernel=k5) + >>> k6 = Kernel({'name': 'Matern3p2','l': 1}) + >>> co6 = ite.cost.BDMMD_VStat(kernel=k6) + >>> k7 = Kernel({'name': 'Matern5p2','l': 1}) + >>> co7 = ite.cost.BDMMD_VStat(kernel=k7) + >>> k8 = Kernel({'name': 'polynomial','exponent':2,'c': 1}) + >>> co8 = ite.cost.BDMMD_VStat(kernel=k8) + >>> k9 = Kernel({'name': 'ratquadr','c': 1}) + >>> co9 = ite.cost.BDMMD_VStat(kernel=k9) + >>> k10 = Kernel({'name': 'invmquadr','c': 1}) + >>> co10 = ite.cost.BDMMD_VStat(kernel=k10) + + """ + + def estimation(self, y1, y2): + """ Estimate MMD. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + d : float + Estimated value of MMD. + + References + ---------- + Arthur Gretton, Karsten M. Borgwardt, Malte J. Rasch, Bernhard + Scholkopf and Alexander Smola. A Kernel Two-Sample Test. Journal + of Machine Learning Research 13 (2012) 723-773. + + Alain Berlinet and Christine Thomas-Agnan. Reproducing Kernel + Hilbert Spaces in Probability and Statistics. Kluwer, 2004. (mean + embedding) + + Examples + -------- + d = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + num_of_samples1, num_of_samples2 = y1.shape[0], y2.shape[0] + + kernel = self.kernel + ky1y1 = kernel.gram_matrix1(y1) + ky2y2 = kernel.gram_matrix1(y2) + ky1y2 = kernel.gram_matrix2(y1, y2) + + term1 = sum(ky1y1) / (num_of_samples1**2) + term2 = sum(ky2y2) / (num_of_samples2**2) + term3 = -2 * sum(ky1y2) / (num_of_samples1 * num_of_samples2) + + # absolute(): to avoid 'sqrt(negative)' values: + d = sqrt(absolute(term1 + term2 + term3)) + + return d + + +class BDMMD_Online(InitKernel, VerEqualDSubspaces, VerEqualSampleNumbers, + VerEvenSampleNumbers): + """ Online MMD (maximum mean discrepancy) estimator. + + Initialization comes from 'InitKernel', verification is inherited from + 'VerEqualDSubspaces', 'VerEqualSampleNumbers', 'VerEvenSampleNumbers' + (see 'ite.cost.x_initialization.py', 'ite.cost.x_verification.py'). + + Examples + -------- + >>> import ite + >>> from ite.cost.x_kernel import Kernel + >>> co1 = ite.cost.BDMMD_Online() + >>> k2 = Kernel({'name': 'RBF','sigma': 1}) + >>> co2 = ite.cost.BDMMD_Online(kernel=k2) + >>> k3 = Kernel({'name': 'exponential','sigma': 1}) + >>> co3 = ite.cost.BDMMD_Online(kernel=k3) + >>> k4 = Kernel({'name': 'Cauchy','sigma': 1}) + >>> co4 = ite.cost.BDMMD_Online(kernel=k4) + >>> k5 = Kernel({'name': 'student','d': 1}) + >>> co5 = ite.cost.BDMMD_Online(kernel=k5) + >>> k6 = Kernel({'name': 'Matern3p2','l': 1}) + >>> co6 = ite.cost.BDMMD_Online(kernel=k6) + >>> k7 = Kernel({'name': 'Matern5p2','l': 1}) + >>> co7 = ite.cost.BDMMD_Online(kernel=k7) + >>> k8 = Kernel({'name': 'polynomial', 'exponent': 2, 'c': 1}) + >>> co8 = ite.cost.BDMMD_Online(kernel=k8) + >>> k9 = Kernel({'name': 'ratquadr','c': 1}) + >>> co9 = ite.cost.BDMMD_Online(kernel=k9) + >>> k10 = Kernel({'name': 'invmquadr','c': 1}) + >>> co10 = ite.cost.BDMMD_Online(kernel=k10) + + """ + + def estimation(self, y1, y2): + """ Estimate MMD. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + Assumption: number of samples1 = number of samples2 = even. + + Returns + ------- + d : float + Estimated value of MMD. + + References + ---------- + Arthur Gretton, Karsten M. Borgwardt, Malte J. Rasch, Bernhard + Scholkopf and Alexander Smola. A Kernel Two-Sample Test. Journal + of Machine Learning Research 13 (2012) 723-773. + + Alain Berlinet and Christine Thomas-Agnan. Reproducing Kernel + Hilbert Spaces in Probability and Statistics. Kluwer, 2004. (mean + embedding) + + Examples + -------- + d = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + self.verification_equal_sample_numbers(y1, y2) + self.verification_even_sample_numbers(y1) + # the order of 'verification_equal_sample_numbers' and + # 'verification_even_sample_numbers' is important here + + num_of_samples = y1.shape[0] # = y2.shape[0] + + # y1i,y1j,y2i,y2j: + y1i = y1[0:num_of_samples:2, :] + y1j = y1[1:num_of_samples:2, :] + y2i = y2[0:num_of_samples:2, :] + y2j = y2[1:num_of_samples:2, :] + + kernel = self.kernel + + d = (kernel.sum(y1i, y1j) + kernel.sum(y2i, y2j) - + kernel.sum(y1i, y2j) - + kernel.sum(y1j, y2i)) / (num_of_samples / 2) + + return d + + +class BDMMD_UStat_IChol(InitEtaKernel, VerEqualDSubspaces): + """ MMD estimator with U-statistic & incomplete Cholesky decomposition. + + MMD refers to maximum mean discrepancy. + + Initialization comes from 'InitKernel', verification is inherited from + 'VerEqualDSubspaces' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + Examples + -------- + >>> import ite + >>> from ite.cost.x_kernel import Kernel + >>> eta = 1e-2 + >>> co1 = ite.cost.BDMMD_UStat_IChol() + >>> co1b = ite.cost.BDMMD_UStat_IChol(eta=eta) + >>> k2 = Kernel({'name': 'RBF','sigma': 1}) + >>> co2 = ite.cost.BDMMD_UStat_IChol(kernel=k2) + >>> co2b = ite.cost.BDMMD_UStat_IChol(kernel=k2,eta=eta) + >>> k3 = Kernel({'name': 'exponential','sigma': 1}) + >>> co3 = ite.cost.BDMMD_UStat_IChol(kernel=k3) + >>> co3b = ite.cost.BDMMD_UStat_IChol(kernel=k3,eta=eta) + >>> k4 = Kernel({'name': 'Cauchy','sigma': 1}) + >>> co4 = ite.cost.BDMMD_UStat_IChol(kernel=k4) + >>> co4b = ite.cost.BDMMD_UStat_IChol(kernel=k4,eta=eta) + >>> k5 = Kernel({'name': 'student','d': 1}) + >>> co5 = ite.cost.BDMMD_UStat_IChol(kernel=k5) + >>> k6 = Kernel({'name': 'Matern3p2','l': 1}) + >>> co6 = ite.cost.BDMMD_UStat_IChol(kernel=k6) + >>> k7 = Kernel({'name': 'Matern5p2','l': 1}) + >>> co7 = ite.cost.BDMMD_UStat_IChol(kernel=k7) + >>> k8 = Kernel({'name': 'polynomial','exponent':2,'c': 1}) + >>> co8 = ite.cost.BDMMD_UStat_IChol(kernel=k8) + >>> k9 = Kernel({'name': 'ratquadr','c': 1}) + >>> co9 = ite.cost.BDMMD_UStat_IChol(kernel=k9) + >>> k10 = Kernel({'name': 'invmquadr','c': 1}) + >>> co10 = ite.cost.BDMMD_UStat_IChol(kernel=k10) + + """ + + def estimation(self, y1, y2): + """ Estimate MMD. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + d : float + Estimated value of MMD. + + References + ---------- + Arthur Gretton, Karsten M. Borgwardt, Malte J. Rasch, Bernhard + Scholkopf and Alexander Smola. A Kernel Two-Sample Test. Journal + of Machine Learning Research 13 (2012) 723-773. + + Alain Berlinet and Christine Thomas-Agnan. Reproducing Kernel + Hilbert Spaces in Probability and Statistics. Kluwer, 2004. (mean + embedding) + + Examples + -------- + d = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + # sample numbers: + num_of_samples1, num_of_samples2 = y1.shape[0], y2.shape[0] + num_of_samples = num_of_samples1 + num_of_samples2 # total + + # low-rank approximation of the joint Gram matrix: + kernel = self.kernel + tolerance = self.eta * num_of_samples + l = kernel.ichol(vstack((y1, y2)), tolerance) + l1 = l[0:num_of_samples1] # broadcast + l2 = l[num_of_samples1:] # broadcast + e1l1 = sum(l1, axis=0) # row vector + e2l2 = sum(l2, axis=0) # row vector + + term1 = \ + (dot(e1l1, e1l1) - sum(l1**2)) / \ + (num_of_samples1 * (num_of_samples1 - 1)) + term2 = \ + (dot(e2l2, e2l2) - sum(l2**2)) / \ + (num_of_samples2 * (num_of_samples2 - 1)) + term3 = -2 * dot(e1l1, e2l2) / (num_of_samples1 * num_of_samples2) + + # abs(): to avoid 'sqrt(negative)' values + d = sqrt(abs(term1 + term2 + term3)) + + return d + + +class BDMMD_VStat_IChol(InitEtaKernel, VerEqualDSubspaces): + """ MMD estimator with V-statistic & incomplete Cholesky decomposition. + + MMD refers to maximum mean discrepancy. + + Initialization comes from 'InitKernel', verification is inherited from + 'VerEqualDSubspaces' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + Examples + -------- + >>> import ite + >>> from ite.cost.x_kernel import Kernel + >>> co1 = ite.cost.BDMMD_VStat_IChol() + >>> k2 = Kernel({'name': 'RBF','sigma': 1}) + >>> co2 = ite.cost.BDMMD_VStat_IChol(kernel=k2) + >>> k3 = Kernel({'name': 'exponential','sigma': 1}) + >>> co3 = ite.cost.BDMMD_VStat_IChol(kernel=k3) + >>> k4 = Kernel({'name': 'Cauchy','sigma': 1}) + >>> co4 = ite.cost.BDMMD_VStat_IChol(kernel=k4) + >>> k5 = Kernel({'name': 'student','d': 1}) + >>> co5 = ite.cost.BDMMD_VStat_IChol(kernel=k5) + >>> k6 = Kernel({'name': 'Matern3p2','l': 1}) + >>> co6 = ite.cost.BDMMD_VStat_IChol(kernel=k6) + >>> k7 = Kernel({'name': 'Matern5p2','l': 1}) + >>> co7 = ite.cost.BDMMD_VStat_IChol(kernel=k7) + >>> k8 = Kernel({'name': 'polynomial','exponent':2,'c': 1}) + >>> co8 = ite.cost.BDMMD_VStat_IChol(kernel=k8) + >>> k9 = Kernel({'name': 'ratquadr','c': 1}) + >>> co9 = ite.cost.BDMMD_VStat_IChol(kernel=k9) + >>> k10 = Kernel({'name': 'invmquadr','c': 1}) + >>> co10 = ite.cost.BDMMD_VStat_IChol(kernel=k10) + + """ + + def estimation(self, y1, y2): + """ Estimate MMD. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + d : float + Estimated value of MMD. + + References + ---------- + Arthur Gretton, Karsten M. Borgwardt, Malte J. Rasch, Bernhard + Scholkopf and Alexander Smola. A Kernel Two-Sample Test. Journal + of Machine Learning Research 13 (2012) 723-773. + + Alain Berlinet and Christine Thomas-Agnan. Reproducing Kernel + Hilbert Spaces in Probability and Statistics. Kluwer, 2004. (mean + embedding) + + Examples + -------- + d = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + num_of_samples1, num_of_samples2 = y1.shape[0], y2.shape[0] + num_of_samples = num_of_samples1 + num_of_samples2 # total + + # low-rank approximation of the joint Gram matrix: + kernel = self.kernel + tolerance = self.eta * num_of_samples + l = kernel.ichol(vstack((y1, y2)), tolerance) + # broadcasts; result:row vector: + e1l1 = sum(l[:num_of_samples1], axis=0) + e2l2 = sum(l[num_of_samples1:], axis=0) + + term1 = dot(e1l1, e1l1) / num_of_samples1**2 + term2 = dot(e2l2, e2l2) / num_of_samples2**2 + term3 = -2 * dot(e1l1, e2l2) / (num_of_samples1 * num_of_samples2) + + # abs(): to avoid 'sqrt(negative)' values + d = sqrt(abs(term1 + term2 + term3)) + + return d diff --git a/ite-in-python/ite/cost/base_h.py b/ite-in-python/ite/cost/base_h.py new file mode 100644 index 0000000..9bf22da --- /dev/null +++ b/ite-in-python/ite/cost/base_h.py @@ -0,0 +1,646 @@ +""" Base entropy estimators on distributions. """ + +from scipy.special import psi, gamma +# from scipy.special import psi, gammaln +from numpy import floor, sqrt, concatenate, ones, sort, mean, log, absolute,\ + exp, pi, sum, max + +from ite.cost.x_initialization import InitKnnK, InitX, InitKnnKAlpha, \ + InitKnnKAlphaBeta, InitKnnSAlpha +from ite.cost.x_verification import VerOneDSignal +from ite.shared import volume_of_the_unit_ball, knn_distances, \ + estimate_i_alpha, replace_infs_with_max + + +class BHShannon_KnnK(InitKnnK): + """ Shannon differential entropy estimator using kNNs (S = {k}). + + Initialization is inherited from 'InitKnnK' (see + 'ite.cost.x_initialization.py'). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.BHShannon_KnnK() + >>> co2 = ite.cost.BHShannon_KnnK(knn_method='cKDTree', k=3, eps=0.1) + >>> co3 = ite.cost.BHShannon_KnnK(k=5) + + """ + + def estimation(self, y): + """ Estimate Shannon entropy. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + + Returns + ------- + h : float + Estimated Shannon entropy. + + References + ---------- + M. N. Goria, Nikolai N. Leonenko, V. V. Mergel, and P. L. Novi + Inverardi. A new class of random vector entropy estimators and its + applications in testing statistical hypotheses. Journal of + Nonparametric Statistics, 17: 277-297, 2005. (S={k}) + + Harshinder Singh, Neeraj Misra, Vladimir Hnizdo, Adam Fedorowicz + and Eugene Demchuk. Nearest neighbor estimates of entropy. + American Journal of Mathematical and Management Sciences, 23, + 301-321, 2003. (S={k}) + + L. F. Kozachenko and Nikolai N. Leonenko. A statistical estimate + for the entropy of a random vector. Problems of Information + Transmission, 23:9-16, 1987. (S={1}) + + Examples + -------- + h = co.estimation(y) + + """ + + num_of_samples, dim = y.shape + distances_yy = knn_distances(y, y, True, self.knn_method, self.k, + self.eps, 2)[0] + v = volume_of_the_unit_ball(dim) + h = log(num_of_samples - 1) - psi(self.k) + log(v) + \ + dim * sum(log(distances_yy[:, self.k-1])) / num_of_samples + + return h + + +class BHShannon_SpacingV(InitX, VerOneDSignal): + """ Shannon entropy estimator using Vasicek's spacing method. + + Initialization is inherited from 'InitX', verification comes from + 'VerOneDSignal' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + Examples + -------- + >>> import ite + >>> co = ite.cost.BHShannon_SpacingV() + + """ + + def estimation(self, y): + """ Estimate Shannon entropy. + + Parameters + ---------- + y : (number of samples, 1)-ndarray (column vector) + One coordinate of y corresponds to one sample. + + Returns + ------- + h : float + Estimated Shannon entropy. + + References + ---------- + Oldrich Vasicek. A test for normality based on sample entropy. + Journal of the Royal Statistical Society, Series B, 38(1):54-59, + 1976. + + Examples + -------- + h = co.estimation(y) + + """ + + # verification: + self.verification_one_d_signal(y) + + # estimation: + num_of_samples = y.shape[0] # y : Tx1 + m = int(floor(sqrt(num_of_samples))) + y = sort(y, axis=0) + y = concatenate((y[0] * ones((m, 1)), y, y[-1] * ones((m, 1)))) + diffy = y[2*m:] - y[:num_of_samples] + h = mean(log(num_of_samples / (2*m) * diffy)) + + return h + + +class BHRenyi_KnnK(InitKnnKAlpha): + """ Renyi entropy estimator using the kNN method (S={k}). + + Initialization comes from 'InitKnnKAlpha' (see + 'ite.cost.x_initialization.py'). + + Notes + ----- + The Renyi entropy (H_{R,alpha}) equals to the Shannon differential (H) + entropy in limit: H_{R,alpha} -> H, as alpha -> 1. + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.BHRenyi_KnnK() + >>> co2 = ite.cost.BHRenyi_KnnK(knn_method='cKDTree', k=4, eps=0.01, \ + alpha=0.9) + >>> co3 = ite.cost.BHRenyi_KnnK(k=5, alpha=0.9) + + """ + + def estimation(self, y): + """ Estimate Renyi entropy. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + + Returns + ------- + h : float + Estimated Renyi entropy. + + References + ---------- + Nikolai Leonenko, Luc Pronzato, and Vippal Savani. A class of + Renyi information estimators for multidimensional densities. + Annals of Statistics, 36(5):2153-2182, 2008. + + Joseph E. Yukich. Probability Theory of Classical Euclidean + Optimization Problems, Lecture Notes in Mathematics, 1998, vol. + 1675. + + Examples + -------- + h = co.estimation(y) + + """ + + i_alpha = estimate_i_alpha(y, self) + h = log(i_alpha) / (1 - self.alpha) + + return h + + +class BHTsallis_KnnK(InitKnnKAlpha): + """ Tsallis entropy estimator using the kNN method (S={k}). + + Initialization comes from 'InitKnnKAlpha' (see + 'ite.cost.x_initialization.py'). + + Notes + ----- + The Tsallis entropy (H_{T,alpha}) equals to the Shannon differential + (H) entropy in limit: H_{T,alpha} -> H, as alpha -> 1. + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.BHTsallis_KnnK() + >>> co2 = ite.cost.BHTsallis_KnnK(knn_method='cKDTree', k=4,\ + eps=0.01, alpha=0.9) + >>> co3 = ite.cost.BHTsallis_KnnK(k=5, alpha=0.9) + + """ + + def estimation(self, y): + """ Estimate Tsallis entropy. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + + Returns + ------- + h : float + Estimated Tsallis entropy. + + References + ---------- + Nikolai Leonenko, Luc Pronzato, and Vippal Savani. A class of + Renyi information estimators for multidimensional densities. + Annals of Statistics, 36(5):2153-2182, 2008. + + Examples + -------- + h = co.estimation(y) + + """ + + i_alpha = estimate_i_alpha(y, self) + h = (1 - i_alpha) / (self.alpha - 1) + + return h + + +class BHSharmaMittal_KnnK(InitKnnKAlphaBeta): + """ Sharma-Mittal entropy estimator using the kNN method (S={k}). + + Initialization comes from 'InitKnnKAlphaBeta' (see + 'ite.cost.x_initialization.py'). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.BHSharmaMittal_KnnK() + >>> co2 = ite.cost.BHSharmaMittal_KnnK(knn_method='cKDTree', k=4,\ + eps=0.01, alpha=0.9, beta=0.9) + >>> co3 = ite.cost.BHSharmaMittal_KnnK(k=5, alpha=0.9, beta=0.9) + + Notes + ----- + The Sharma-Mittal entropy (H_{SM,alpha,beta}) equals to the + 1)Renyi entropy (H_{R,alpha}): H_{SM,alpha,beta} -> H_{R,alpha}, as + beta -> 1. + 2)Tsallis entropy (H_{T,alpha}): H_{SM,alpha,beta} = H_{T,alpha}, if + alpha = beta. + 3)Shannon entropy (H): H_{SM,alpha,beta} -> H, as (alpha,beta) -> + (1,1). + + """ + + def estimation(self, y): + """ Estimate Sharma-Mittal entropy. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + + Returns + ------- + h : float + Estimated Sharma-Mittal entropy. + + References + ---------- + Nikolai Leonenko, Luc Pronzato, and Vippal Savani. A class of + Renyi information estimators for multidimensional densities. + Annals of Statistics, 36(5):2153-2182, 2008. (i_alpha estimation) + + Joseph E. Yukich. Probability Theory of Classical Euclidean + Optimization Problems, Lecture Notes in Mathematics, 1998, vol. + 1675. (i_alpha estimation) + + Ethem Akturk, Baris Bagci, and Ramazan Sever. Is Sharma-Mittal + entropy really a step beyond Tsallis and Renyi entropies? + Technical report, 2007. http://arxiv.org/abs/cond-mat/0703277. + (Sharma-Mittal entropy) + + Bhudev D. Sharma and Dharam P. Mittal. New nonadditive measures of + inaccuracy. Journal of Mathematical Sciences, 10:122-133, 1975. + (Sharma-Mittal entropy) + + Examples + -------- + h = co.estimation(y) + + """ + + i_alpha = estimate_i_alpha(y, self) + h = (i_alpha**((1-self.beta) / (1-self.alpha)) - 1) / (1 - + self.beta) + + return h + + +class BHShannon_MaxEnt1(InitX, VerOneDSignal): + """ Maximum entropy distribution based Shannon entropy estimator. + + The used Gi functions are G1(x) = x exp(-x^2/2) and G2(x) = abs(x). + + Initialization is inherited from 'InitX', verification comes from + 'VerOneDSignal' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + Examples + -------- + >>> import ite + >>> co = ite.cost.BHShannon_MaxEnt1() + + """ + + def estimation(self, y): + """ Estimate Shannon entropy. + + Parameters + ---------- + y : (number of samples, 1)-ndarray (column vector) + One coordinate of y corresponds to one sample. + + Returns + ------- + h : float + Estimated Shannon entropy. + + References + ---------- + Aapo Hyvarinen. New approximations of differential entropy for + independent component analysis and projection pursuit. In Advances + in Neural Information Processing Systems (NIPS), pages 273-279, + 1997. (entropy approximation based on the maximum entropy + distribution) + + Thomas M. Cover and Joy A. Thomas. Elements of Information Theory. + John Wiley and Sons, New York, USA, 1991. (maximum entropy + distribution) + + Examples + -------- + h = co.estimation(y) + + """ + + # verification: + self.verification_one_d_signal(y) + + # estimation: + num_of_samples = y.shape[0] + + # normalize 'y' to have zero mean and unit std: + # step-1 [E=0, this step does not change the Shannon entropy of + # the variable]: + y = y - mean(y) + + # step-2 [std(Y) = 1]: + s = sqrt(sum(y**2) / (num_of_samples - 1)) + # print(s) + y /= s + + # we will take this scaling into account via the entropy + # transformation rule [ H(wz) = H(z) + log(|w|) ] at the end: + h_whiten = log(s) + + # h1, h2 -> h: + h1 = (1 + log(2 * pi)) / 2 # =H[N(0,1)] + # H2: + k1 = 36 / (8 * sqrt(3) - 9) + k2a = 1 / (2 - 6 / pi) + h2 = \ + k1 * mean(y * exp(-y**2 / 2))**2 +\ + k2a * (mean(absolute(y)) - sqrt(2 / pi))**2 + h = h1 - h2 + + # take into account the 'std=1' pre-processing: + h += h_whiten + + return h + + +class BHShannon_MaxEnt2(InitX, VerOneDSignal): + """ Maximum entropy distribution based Shannon entropy estimator. + + The used Gi functions are G1(x) = x exp(-x^2/2) and G2(x) = + exp(-x^2/2). + + Initialization is inherited from 'InitX', verification comes from + 'VerOneDSignal' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + Examples + -------- + >>> import ite + >>> co = ite.cost.BHShannon_MaxEnt2() + + """ + + def estimation(self, y): + """ Estimate Shannon entropy. + + Parameters + ---------- + y : (number of samples, 1)-ndarray (column vector) + One coordinate of y corresponds to one sample. + + Returns + ------- + h : float + Estimated Shannon entropy. + + References + ---------- + Aapo Hyvarinen. New approximations of differential entropy for + independent component analysis and projection pursuit. In Advances + in Neural Information Processing Systems (NIPS), pages 273-279, + 1997. (entropy approximation based on the maximum entropy + distribution) + + Thomas M. Cover and Joy A. Thomas. Elements of Information Theory. + John Wiley and Sons, New York, USA, 1991. (maximum entropy + distribution) + + Examples + -------- + h = co.estimation(y) + + """ + + # verification: + self.verification_one_d_signal(y) + + # estimation: + num_of_samples = y.shape[0] + + # normalize 'y' to have zero mean and unit std: + # step-1 [E=0, this step does not change the Shannon entropy of + # the variable]: + + y = y - mean(y) + + # step-2 [std(y) = 1]: + s = sqrt(sum(y**2) / (num_of_samples - 1)) + y /= s + + # we will take this scaling into account via the entropy + # transformation rule [ H(wz) = H(z) + log(|w|) ] at the end: + h_whiten = log(s) + + # h1, h2 -> h: + h1 = (1 + log(2 * pi)) / 2 # =H[N(0,1)] + # h2: + k1 = 36 / (8 * sqrt(3) - 9) + k2b = 24 / (16 * sqrt(3) - 27) + h2 = \ + k1 * mean(y * exp(-y**2 / 2))**2 + \ + k2b * (mean(exp(-y**2 / 2)) - sqrt(1/2))**2 + + h = h1 - h2 + + # take into account the 'std=1' pre-processing: + h += h_whiten + + return h + + +class BHPhi_Spacing(InitX, VerOneDSignal): + """ Phi entropy estimator using the spacing method. + + Partial initialization is inherited from 'InitX', verification comes + from 'VerOneDSignal' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, w=lambda x: 1, phi=lambda x: x**2): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + w : function, optional + This weight function is used in the Phi entropy (default + is w=lambda x: 1, i.e., x-> 1). + phi : function, optional + This is the Phi function in the Phi entropy (default is + phi=lambda x: x**2, i.e. x->x**2) + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.BHPhi_Spacing() + >>> co2 = ite.cost.BHPhi_Spacing(phi=lambda x: x**2) + + """ + + # initialize with 'InitX': + super().__init__(mult=mult) + + # other attributes: + self.w = w + self.phi = phi + + def estimation(self, y): + """ Estimate Phi entropy. + + Parameters + ---------- + y : (number of samples, 1)-ndarray (column vector) + One coordinate of y corresponds to one sample. + + Returns + ------- + h : float + Estimated Phi entropy. + + References + ---------- + Bert van Es. Estimating Functionals Related to a Density by a + Class of Statistics Based on Spacings. Scandinavian Journal of + Statistics, 19:61-72, 1992. + + Examples + -------- + h = co.estimation(y) + + """ + + # verification: + self.verification_one_d_signal(y) + + num_of_samples = y.shape[0] # y : Tx1 + # m / num_of_samples -> 0, m / log(num_of_samples) -> infty a.s., + # m, num_of_samples -> infty: + m = int(floor(sqrt(num_of_samples))) + + y = sort(y, axis=0) + y1 = y[0:num_of_samples-m] # y_{(0)},...,y_{(T-m-1)} + y2 = y[m:] # y_{m},...,y_{T-1} + h = mean(self.phi((m / (num_of_samples + 1)) / (y2 - y1)) * + (self.w(y1) + self.w(y2))) / 2 + + return h + + +class BHRenyi_KnnS(InitKnnSAlpha): + """ Renyi entropy estimator using the generalized kNN method. + + In this case the kNN parameter is a set: S \subseteq {1,...,k}). + Initialization comes from 'InitKnnSAlpha' (see + 'ite.cost.x_initialization.py'). + + Notes + ----- + The Renyi entropy (H_{R,alpha}) equals to the Shannon differential (H) + entropy in limit: H_{R,alpha} -> H, as alpha -> 1. + + Examples + -------- + >>> from numpy import array + >>> import ite + >>> co1 = ite.cost.BHRenyi_KnnS() + >>> co2 = ite.cost.BHRenyi_KnnS(knn_method='cKDTree', k=4, eps=0.01, \ + alpha=0.9) + >>> co3 = ite.cost.BHRenyi_KnnS(k=array([1,2,6]), eps=0.01, alpha=0.9) + + >>> co4 = ite.cost.BHRenyi_KnnS(k=5, alpha=0.9) + + """ + + def estimation(self, y): + """ Estimate Renyi entropy. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + + Returns + ------- + h : float + Estimated Renyi entropy. + + References + ---------- + David Pal, Barnabas Poczos, Csaba Szepesvari. Estimation of Renyi + Entropy and Mutual Information Based on Generalized + Nearest-Neighbor Graphs. Advances in Neural Information Processing + Systems (NIPS), pages 1849-1857, 2010. (general S) + + Barnabas Poczos, Andras Lorincz. Independent Subspace Analysis + Using k-Nearest Neighborhood Estimates. International Conference on + Artificial Neural Networks (ICANN), pages 163-168, 2005. (S = + {1,...,k}) + + Examples + -------- + h = co.estimation(y) + + """ + + num_of_samples, dim = y.shape + + # compute length (L): + distances_yy = knn_distances(y, y, True, self.knn_method, + max(self.k), self.eps, 2)[0] + gam = dim * (1 - self.alpha) + # S = self.k: + l = sum(replace_infs_with_max(distances_yy[:, self.k-1]**gam)) + # Note: if 'distances_yy[:, self.k-1]**gam' contains inf elements + # (this may accidentally happen in small dimensions in case of + # large sample numbers, e.g., for d=1, T=10000), then the inf-s + # are replaced with the maximal, non-inf element. + + # compute const = const(S): + + # Solution-1 (normal k): + const = sum(gamma(self.k + 1 - self.alpha) / gamma(self.k)) + + # Solution-2 (if k is 'extreme large', say self.k=180 [=> + # gamma(self.k)=inf], then use this alternative form of + # 'const', after importing gammaln). Note: we used the + # 'gamma(a) / gamma(b) = exp(gammaln(a) - gammaln(b))' + # identity. + # const = sum(exp(gammaln(self.k + 1 - self.alpha) - + # gammaln(self.k))) + + vol = volume_of_the_unit_ball(dim) + const *= ((num_of_samples - 1) / num_of_samples * vol) ** \ + (self.alpha - 1) + + h = log(l / (const * num_of_samples**self.alpha)) / (1 - + self.alpha) + + return h diff --git a/ite-in-python/ite/cost/base_i.py b/ite-in-python/ite/cost/base_i.py new file mode 100644 index 0000000..d7ef2a5 --- /dev/null +++ b/ite-in-python/ite/cost/base_i.py @@ -0,0 +1,805 @@ +""" Base mutual information estimators. """ + +from numpy import sum, sqrt, isnan, exp, mean, eye, ones, dot, cumsum, \ + hstack, newaxis, maximum, prod, abs, arange, log +from numpy.linalg import norm +from scipy.spatial.distance import pdist, squareform +from scipy.special import factorial +from scipy.linalg import det +from scipy.sparse.linalg import eigsh + +from ite.cost.x_initialization import InitX, InitEtaKernel +from ite.cost.x_verification import VerCompSubspaceDims, \ + VerSubspaceNumberIsK,\ + VerOneDSubspaces +from ite.shared import compute_dcov_dcorr_statistics, median_heuristic,\ + copula_transformation, compute_matrix_r_kcca_kgv +from ite.cost.x_kernel import Kernel + + +class BIDistCov(InitX, VerCompSubspaceDims, VerSubspaceNumberIsK): + """ Distance covariance estimator using pairwise distances. + + Partial initialization comes from 'InitX', verification is from + 'VerCompSubspaceDims' and 'VerSubspaceNumber' (see + 'ite.cost.x_initialization.py', 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, alpha=1): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + alpha : float, optional + Parameter of the distance covariance: 0 < alpha < 2 + (default is 1). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.BIDistCov() + >>> co2 = ite.cost.BIDistCov(alpha = 1.2) + + """ + # initialize with 'InitX': + super().__init__(mult=mult) + + # other attribute: + if alpha <= 0 or alpha >= 2: + raise Exception('0 < alpha < 2 is needed for this estimator!') + + self.alpha = alpha + + def estimation(self, y, ds): + """ Estimate distance covariance. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + ds : int vector + Dimensions of the individual subspaces in y; ds[i] = i^th + subspace dimension. len(ds) = 2. + + Returns + ------- + i : float + Estimated distance covariance. + + References + ---------- + Gabor J. Szekely and Maria L. Rizzo. Brownian distance covariance. + The Annals of Applied Statistics, 3:1236-1265, 2009. + + Gabor J. Szekely, Maria L. Rizzo, and Nail K. Bakirov. Measuring + and testing dependence by correlation of distances. The Annals of + Statistics, 35:2769-2794, 2007. + + Examples + -------- + i = co.estimation(y,ds) + + """ + + # verification: + self.verification_compatible_subspace_dimensions(y, ds) + self.verification_subspace_number_is_k(ds, 2) + + num_of_samples = y.shape[0] # number of samples + a = compute_dcov_dcorr_statistics(y[:, :ds[0]], self.alpha) + b = compute_dcov_dcorr_statistics(y[:, ds[0]:], self.alpha) + i = sqrt(sum(a*b)) / num_of_samples + + return i + + +class BIDistCorr(InitX, VerCompSubspaceDims, VerSubspaceNumberIsK): + """ Distance correlation estimator using pairwise distances. + + Partial initialization comes from 'InitX', verification is from + 'VerCompSubspaceDims' and 'VerSubspaceNumber' (see + 'ite.cost.x_initialization.py', 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, alpha=1): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + alpha : float, optional + Parameter of the distance covariance: 0 < alpha < 2 + (default is 1). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.BIDistCorr() + >>> co2 = ite.cost.BIDistCorr(alpha = 1.2) + + """ + # initialize with 'InitX': + super().__init__(mult=mult) + + # other attribute: + if alpha <= 0 or alpha >= 2: + raise Exception('0 < alpha < 2 is needed for this estimator!') + + self.alpha = alpha + + def estimation(self, y, ds): + """ Estimate distance correlation. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + ds : int vector + Dimensions of the individual subspaces in y; ds[i] = i^th + subspace dimension. len(ds) = 2. + + Returns + ------- + i : float + Estimated distance correlation. + + References + ---------- + Gabor J. Szekely and Maria L. Rizzo. Brownian distance covariance. + The Annals of Applied Statistics, 3:1236-1265, 2009. + + Gabor J. Szekely, Maria L. Rizzo, and Nail K. Bakirov. Measuring + and testing dependence by correlation of distances. The Annals of + Statistics, 35:2769-2794, 2007. + + Examples + -------- + i = co.estimation(y,ds) + + """ + + # verification: + self.verification_compatible_subspace_dimensions(y, ds) + self.verification_subspace_number_is_k(ds, 2) + + a = compute_dcov_dcorr_statistics(y[:, :ds[0]], self.alpha) + b = compute_dcov_dcorr_statistics(y[:, ds[0]:], self.alpha) + + n = sum(a*b) # numerator + d1 = sum(a**2) # denumerator-1 (without sqrt) + d2 = sum(b**2) # denumerator-2 (without sqrt) + + if (d1 * d2) == 0: # >=1 of the random variables is constant + i = 0 + else: + i = n / sqrt(d1 * d2) # / sqrt() + i = sqrt(i) + + return i + + +class BI3WayJoint(InitX, VerCompSubspaceDims, VerSubspaceNumberIsK): + """ Joint dependency from the mean embedding of the 'joint minus the + product of the marginals'. + + Partial initialization comes from 'InitX', verification is from + 'VerCompSubspaceDims' and 'VerSubspaceNumber' (see + 'ite.cost.x_initialization.py', 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, sigma1=0.1, sigma2=0.1, sigma3=0.1): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + sigma1 : float, optional + Std in the RBF kernel on the first subspace (default is + sigma1 = 0.1). sigma1 = nan means 'use median heuristic'. + sigma2 : float, optional + Std in the RBF kernel on the second subspace (default is + sigma2 = 0.1). sigma2 = nan means 'use median heuristic'. + sigma3 : float, optional + Std in the RBF kernel on the third subspace (default is + sigma3 = 0.1). sigma3 = nan means 'use median heuristic'. + + Examples + -------- + >>> from numpy import nan + >>> import ite + >>> co1 = ite.cost.BI3WayJoint() + >>> co2 = ite.cost.BI3WayJoint(sigma1=0.1,sigma2=0.1,sigma3=0.1) + >>> co3 = ite.cost.BI3WayJoint(sigma1=nan,sigma2=nan,sigma3=nan) + + """ + + # initialize with 'InitX': + super().__init__(mult=mult) + + # other attributes: + self.sigma1, self.sigma2, self.sigma3 = sigma1, sigma2, sigma3 + + def estimation(self, y, ds): + """ Estimate joint dependency. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + ds : int vector + Dimensions of the individual subspaces in y; ds[i] = i^th + subspace dimension. len(ds) = 3. + + Returns + ------- + i : float + Estimated joint dependency. + + References + ---------- + Dino Sejdinovic, Arthur Gretton, and Wicher Bergsma. A kernel test + for three-variable interactions. In Advances in Neural Information + Processing Systems (NIPS), pages 1124-1132, 2013. (Lancaster + three-variable interaction based dependency index). + + Henry Oliver Lancaster. The Chi-squared Distribution. John Wiley + and Sons Inc, 1969. (Lancaster interaction) + + Examples + -------- + i = co.estimation(y,ds) + + """ + + # verification: + self.verification_compatible_subspace_dimensions(y, ds) + self.verification_subspace_number_is_k(ds, 3) + + # Gram matrices (k1,k2,k3): + sigma1, sigma2, sigma3 = self.sigma1, self.sigma2, self.sigma3 + # k1 (set co.sigma1 using median heuristic, if needed): + if isnan(sigma1): + sigma1 = median_heuristic(y[:, 0:ds[0]]) + + k1 = squareform(pdist(y[:, 0:ds[0]])) + k1 = exp(-k1**2 / (2 * sigma1**2)) + + # k2 (set co.sigma2 using median heuristic, if needed): + if isnan(sigma2): + sigma2 = median_heuristic(y[:, ds[0]:ds[0]+ds[1]]) + + k2 = squareform(pdist(y[:, ds[0]:ds[0]+ds[1]])) + k2 = exp(-k2**2 / (2 * sigma2**2)) + + # k3 (set co.sigma3 using median heuristic, if needed): + if isnan(sigma3): + sigma3 = median_heuristic(y[:, ds[0]+ds[1]:]) + + k3 = squareform(pdist(y[:, ds[0]+ds[1]:], 'euclidean')) + k3 = exp(-k3**2 / (2 * sigma3**2)) + + prod_of_ks = k1 * k2 * k3 # Hadamard product + term1 = mean(prod_of_ks) + term2 = -2 * mean(mean(k1, axis=1) * mean(k2, axis=1) * + mean(k3, axis=1)) + term3 = mean(k1) * mean(k2) * mean(k3) + i = term1 + term2 + term3 + + return i + + +class BI3WayLancaster(InitX, VerCompSubspaceDims, VerSubspaceNumberIsK): + """ Estimate the Lancaster three-variable interaction measure. + + Partial initialization comes from 'InitX', verification is from + 'VerCompSubspaceDims' and 'VerSubspaceNumber' (see + 'ite.cost.x_initialization.py', 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, sigma1=0.1, sigma2=0.1, sigma3=0.1): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + sigma1 : float, optional + Std in the RBF kernel on the first subspace (default is + sigma1 = 0.1). sigma1 = nan means 'use median heuristic'. + sigma2 : float, optional + Std in the RBF kernel on the second subspace (default is + sigma2 = 0.1). sigma2 = nan means 'use median heuristic'. + sigma3 : float, optional + Std in the RBF kernel on the third subspace (default is + sigma3 = 0.1). sigma3 = nan means 'use median heuristic'. + + Examples + -------- + >>> from numpy import nan + >>> import ite + >>> co1 = ite.cost.BI3WayLancaster() + >>> co2 = ite.cost.BI3WayLancaster(sigma1=0.1, sigma2=0.1,\ + sigma3=0.1) + >>> co3 = ite.cost.BI3WayLancaster(sigma1=nan, sigma2=nan,\ + sigma3=nan) + + """ + + # initialize with 'InitX': + super().__init__(mult=mult) + + # other attributes: + self.sigma1, self.sigma2, self.sigma3 = sigma1, sigma2, sigma3 + + def estimation(self, y, ds): + """ Estimate Lancaster three-variable interaction measure. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + ds : int vector + Dimensions of the individual subspaces in y; ds[i] = i^th + subspace dimension. len(ds) = 3. + + Returns + ------- + i : float + Estimated Lancaster three-variable interaction measure. + + References + ---------- + Dino Sejdinovic, Arthur Gretton, and Wicher Bergsma. A kernel test + for three-variable interactions. In Advances in Neural Information + Processing Systems (NIPS), pages 1124-1132, 2013. (Lancaster + three-variable interaction based dependency index). + + Henry Oliver Lancaster. The Chi-squared Distribution. John Wiley + and Sons Inc, 1969. (Lancaster interaction) + + Examples + -------- + i = co.estimation(y,ds) + + """ + + # verification: + self.verification_compatible_subspace_dimensions(y, ds) + self.verification_subspace_number_is_k(ds, 3) + + num_of_samples = y.shape[0] # number of samples + + # Gram matrices (k1,k2,k3): + sigma1, sigma2, sigma3 = self.sigma1, self.sigma2, self.sigma3 + # k1 (set co.sigma1 using median heuristic, if needed): + if isnan(sigma1): + sigma1 = median_heuristic(y[:, 0:ds[0]]) + + k1 = squareform(pdist(y[:, 0:ds[0]])) + k1 = exp(-k1**2 / (2 * sigma1**2)) + + # k2 (set co.sigma2 using median heuristic, if needed): + if isnan(sigma2): + sigma2 = median_heuristic(y[:, ds[0]:ds[0]+ds[1]]) + + k2 = squareform(pdist(y[:, ds[0]:ds[0]+ds[1]])) + k2 = exp(-k2**2 / (2 * sigma2**2)) + + # k3 set co.sigma3 using median heuristic, if needed(): + if isnan(sigma3): + sigma3 = median_heuristic(y[:, ds[0]+ds[1]:]) + + k3 = squareform(pdist(y[:, ds[0]+ds[1]:])) + k3 = exp(-k3**2 / (2 * sigma3**2)) + + # centering of k1, k2, k3: + h = eye(num_of_samples) -\ + ones((num_of_samples, num_of_samples)) / num_of_samples + k1 = dot(dot(h, k1), h) + k2 = dot(dot(h, k2), h) + k3 = dot(dot(h, k3), h) + i = mean(k1 * k2 * k3) + + return i + + +class BIHSIC_IChol(InitEtaKernel, VerCompSubspaceDims): + """ Estimate HSIC using incomplete Cholesky decomposition. + + HSIC refers to Hilbert-Schmidt Independence Criterion. + + Partial initialization comes from 'InitEtaKernel', verification is + from 'VerCompSubspaceDims' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + Notes + ----- + The current implementation uses the same kernel an all the subspaces: + k = k_1 = ... = k_M, where y = [y^1;...;y^M]. + + Examples + -------- + >>> from ite.cost.x_kernel import Kernel + >>> import ite + >>> co1 = ite.cost.BIHSIC_IChol() + >>> co2 = ite.cost.BIHSIC_IChol(eta=1e-3) + >>> k = Kernel({'name': 'RBF','sigma': 1}) + >>> co3 = ite.cost.BIHSIC_IChol(kernel=k, eta=1e-3) + + """ + + def estimation(self, y, ds): + """ Estimate HSIC. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + ds : int vector + Dimensions of the individual subspaces in y; ds[i] = i^th + subspace dimension. + + Returns + ------- + i : float + Estimated value of HSIC. + + References + ---------- + Arthur Gretton, Olivier Bousquet, Alexander Smola and Bernhard + Scholkopf. Measuring Statistical Dependence with Hilbert-Schmidt + Norms. International Conference on Algorithmic Learnng Theory + (ALT), 63-78, 2005. + + Alain Berlinet and Christine Thomas-Agnan. Reproducing Kernel + Hilbert Spaces in Probability and Statistics. Kluwer, 2004. (mean + embedding) + + Examples + -------- + i = co.estimation(y,ds) + + """ + + # verification: + self.verification_compatible_subspace_dimensions(y, ds) + + # initialization: + num_of_samples = y.shape[0] # number of samples + num_of_subspaces = len(ds) + + # Step-1 (g1, g2, ...): + # 0,d_1,d_1+d_2,...,d_1+...+d_{M-1}; starting indices of the + # subspaces: + cum_ds = cumsum(hstack((0, ds[:-1]))) + gs = list() + for m in range(num_of_subspaces): + idx = range(cum_ds[m], cum_ds[m] + ds[m]) + g = self.kernel.ichol(y[:, idx], num_of_samples * self.eta) + g = g - mean(g, axis=0) # center the Gram matrix: dot(g,g.T) + gs.append(g) + + # Step-2 (g1, g2, ... -> i): + i = 0 + for i1 in range(num_of_subspaces-1): # i1 = 0:M-2 + for i2 in range(i1+1, num_of_subspaces): # i2 = i1+1:M-1 + i += norm(dot(gs[i2].T, gs[i1]))**2 # norm = Frob. norm + + i /= num_of_samples**2 + + return i + + +class BIHoeffding(InitX, VerOneDSubspaces, VerCompSubspaceDims): + """ Estimate the multivariate version of Hoeffding's Phi. + + Partial initialization comes from 'InitX', verification is from + 'VerCompSubspaceDims' and 'VerSubspaceNumber' (see + 'ite.cost.x_initialization.py', 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, small_sample_adjustment=True): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + small_sample_adjustment: boolean, optional + Whether we want small-sample adjustment. + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.BIHoeffding() + >>> co2 = ite.cost.BIHoeffding(small_sample_adjustment=False) + + """ + + # initialize with 'InitX': + super().__init__(mult=mult) + + # other attributes: + self.small_sample_adjustment = small_sample_adjustment + + def estimation(self, y, ds): + """ Estimate multivariate version of Hoeffding's Phi. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + ds : int vector + Dimensions of the individual subspaces in y; ds[i] = i^th + subspace dimension = 1 for this estimator. + + Returns + ------- + i : float + Estimated value of the multivariate version of Hoeffding's Phi. + + References + ---------- + Sandra Gaiser, Martin Ruppert, Friedrich Schmid. A multivariate + version of Hoeffding's Phi-Square. Journal of Multivariate + Analysis. 101: pages 2571-2586, 2010. + + Examples + -------- + i = co.estimation(y,ds) + + """ + + # verification: + self.verification_compatible_subspace_dimensions(y, ds) + self.verification_one_dimensional_subspaces(ds) + + num_of_samples, dim = y.shape + u = copula_transformation(y) + + # term1: + m = 1 - maximum(u[:, 0][:, newaxis], u[:, 0]) + for i in range(1, dim): + m *= 1 - maximum(u[:, i][:, newaxis], u[:, i]) + + term1 = mean(m) + + # term2: + if self.small_sample_adjustment: + term2 = \ + - mean(prod(1 - u**2 - (1 - u) / num_of_samples, + axis=1)) / \ + (2**(dim - 1)) + else: + term2 = - mean(prod(1 - u**2, axis=1)) / (2 ** (dim - 1)) + + # term3: + if self.small_sample_adjustment: + term3 = \ + ((num_of_samples - 1) * (2 * num_of_samples-1) / + (3 * 2 * num_of_samples**2))**dim + else: + term3 = 1 / 3**dim + + i = term1 + term2 + term3 + + if self.mult: + if self.small_sample_adjustment: + t1 = \ + sum((1 - arange(1, + num_of_samples) / num_of_samples)**dim + * (2*arange(1, num_of_samples) - 1)) \ + / num_of_samples**2 + t2 = \ + -2 * mean(((num_of_samples * (num_of_samples - 1) - + arange(1, num_of_samples+1) * + arange(num_of_samples)) / + (2 * num_of_samples ** 2))**dim) + t3 = term3 + inv_hd = t1 + t2 + t3 # 1 / h(d, n) + else: + inv_hd = \ + 2 / ((dim + 1) * (dim + 2)) - factorial(dim) / \ + (2 ** dim * prod(arange(dim + 1) + 1 / 2)) + \ + 1 / 3 ** dim # 1 / h(d)s + + i /= inv_hd + + i = sqrt(abs(i)) + + return i + + +class BIKGV(InitEtaKernel, VerCompSubspaceDims): + """ Estimate kernel generalized variance (KGV). + + Partial initialization comes from 'InitEtaKernel', verification is + from 'VerCompSubspaceDims' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, kernel=Kernel(), eta=1e-2, kappa=0.01): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + kernel : Kernel, optional + For examples, see 'ite.cost.x_kernel.Kernel' + eta : float, >0, optional + It is used to control the quality of the incomplete Cholesky + decomposition based Gram matrix approximation. Smaller 'eta' + means larger sized Gram factor and better approximation. + (default is 1e-2) + kappa: float, >0 + Regularization parameter. + + Examples + -------- + >>> import ite + >>> from ite.cost.x_kernel import Kernel + >>> co1 = ite.cost.BIKGV() + >>> co2 = ite.cost.BIKGV(eta=1e-4) + >>> co3 = ite.cost.BIKGV(eta=1e-4, kappa=0.02) + >>> k = Kernel({'name': 'RBF', 'sigma': 0.3}) + >>> co4 = ite.cost.BIKGV(eta=1e-4, kernel=k) + + """ + + # initialize with 'InitEtaKernel': + super().__init__(mult=mult, kernel=kernel, eta=eta) + + # other attributes: + self.kappa = kappa + + def estimation(self, y, ds): + """ Estimate KGV. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + ds : int vector + Dimensions of the individual subspaces in y; ds[i] = i^th + subspace dimension. + + Returns + ------- + i : float + Estimated value of KGV. + + References + ---------- + Francis Bach, Michael I. Jordan. Kernel Independent Component + Analysis. Journal of Machine Learning Research, 3: 1-48, 2002. + + Francis Bach, Michael I. Jordan. Learning graphical models with + Mercer kernels. International Conference on Neural Information + Processing Systems (NIPS), pages 1033-1040, 2002. + + Examples + -------- + i = co.estimation(y,ds) + + """ + + # verification: + self.verification_compatible_subspace_dimensions(y, ds) + + num_of_samples = y.shape[0] + tol = num_of_samples * self.eta + + r = compute_matrix_r_kcca_kgv(y, ds, self.kernel, tol, self.kappa) + i = -log(det(r)) / 2 + + return i + + +class BIKCCA(InitEtaKernel, VerCompSubspaceDims): + """ Kernel canonical correlation analysis (KCCA) based estimator. + + Partial initialization comes from 'InitEtaKernel', verification is + from 'VerCompSubspaceDims' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, kernel=Kernel(), eta=1e-2, kappa=0.01): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + kernel : Kernel, optional + For examples, see 'ite.cost.x_kernel.Kernel' + eta : float, >0, optional + It is used to control the quality of the incomplete Cholesky + decomposition based Gram matrix approximation. Smaller 'eta' + means larger sized Gram factor and better approximation. + (default is 1e-2) + kappa: float, >0 + Regularization parameter. + + Examples + -------- + >>> import ite + >>> from ite.cost.x_kernel import Kernel + >>> co1 = ite.cost.BIKCCA() + >>> co2 = ite.cost.BIKCCA(eta=1e-4) + >>> co3 = ite.cost.BIKCCA(eta=1e-4, kappa=0.02) + >>> k = Kernel({'name': 'RBF', 'sigma': 0.3}) + >>> co4 = ite.cost.BIKCCA(eta=1e-4, kernel=k) + + """ + + # initialize with 'InitEtaKernel': + super().__init__(mult=mult, kernel=kernel, eta=eta) + + # other attributes: + self.kappa = kappa + + def estimation(self, y, ds): + """ Estimate KCCA. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + ds : int vector + Dimensions of the individual subspaces in y; ds[i] = i^th + subspace dimension. + + Returns + ------- + i : float + Estimated value of KCCA. + + References + ---------- + Francis Bach, Michael I. Jordan. Learning graphical models with + Mercer kernels. International Conference on Neural Information + Processing Systems (NIPS), pages 1033-1040, 2002. + + Examples + -------- + i = co.estimation(y,ds) + + """ + + # verification: + self.verification_compatible_subspace_dimensions(y, ds) + + num_of_samples = y.shape[0] + tol = num_of_samples * self.eta + + r = compute_matrix_r_kcca_kgv(y, ds, self.kernel, tol, self.kappa) + eig_min = eigsh(r, k=1, which='SM')[0][0] + i = -log(eig_min) / 2 + + return i diff --git a/ite-in-python/ite/cost/base_k.py b/ite-in-python/ite/cost/base_k.py new file mode 100644 index 0000000..d8e0817 --- /dev/null +++ b/ite-in-python/ite/cost/base_k.py @@ -0,0 +1,205 @@ +""" Base kernel estimators on distributions. """ + +from ite.cost.x_initialization import InitKernel, InitKnnK, InitBagGram +from ite.cost.x_verification import VerEqualDSubspaces +from ite.shared import estimate_d_temp2 +from numpy import mean + +# scipy.spatial.distance.cdist is slightly slow; you can obtain some +# speed-up in case of larger dimensions by using +# ite.shared.cdist_large_dim: +# from ite.shared import cdist_large_dim + + +class BKProbProd_KnnK(InitKnnK, InitBagGram, VerEqualDSubspaces): + """ Probability product kernel estimator using the kNN method (S={k}). + + Partial initialization comes from 'InitKnnK' and 'InitBagGram', + verification is inherited from 'VerEqualDSubspaces' (see + 'ite.cost.x_initialization.py', 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, knn_method='cKDTree', k=3, eps=0, rho=2, + pxdx=True): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + knn_method : str, optional + kNN computation method; 'cKDTree' or 'KDTree'. + k : int, >= 1, optional + k-nearest neighbors (default is 3). + eps : float, >= 0 + The k^th returned value is guaranteed to be no further than + (1+eps) times the distance to the real kNN (default is 0). + rho: float, >0, optional + Parameter of the probability product kernel (default is 2). + Specially, for rho=1/2, one gets the Bhattacharyya kernel + (also known as the Bhattacharyya coefficient, Hellinger + affinity). + pxdx : boolean, optional + If pxdx == True, then we rewrite the probability product + kernel as \int p^{rho}(x)q^{rho}(x)dx = + \int p^{rho-1}(x)q^{rho}(x) p(x)dx. [p(x)dx] + Else, the probability product kernel is rewritten as + \int p^{rho}(x)q^{rho}(x)dx= \int q^{rho-1}(x)p^{rho}(x) + q(x)dx. [q(x)dx] + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.BKProbProd_KnnK() + >>> co2 = ite.cost.BKProbProd_KnnK(rho=0.5) + >>> co3 = ite.cost.BKProbProd_KnnK(k=4, pxdx=False, rho=1.4) + + """ + + # initialize with 'InitKnnK': + super().__init__(mult=mult, knn_method=knn_method, k=k, eps=eps) + + # other attributes: + self.rho, self.pxdx, self._a, self._b = rho, pxdx, rho-1, rho + + def estimation(self, y1, y2): + """ Estimate probability product kernel. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + k : float + Estimated probability product kernel. + + References + ---------- + Barnabas Poczos and Liang Xiong and Dougal Sutherland and + Jeff Schneider. Support Distribution Machines. Technical Report, + 2012. "http://arxiv.org/abs/1202.0302" (k-nearest neighbor based + estimation of d_temp2) + + Tony Jebara, Risi Kondor, and Andrew Howard. Probability product + kernels. Journal of Machine Learning Research, 5:819-844, 2004. + (probability product kernels --specifically--> Bhattacharyya + kernel) + + Anil K. Bhattacharyya. On a measure of divergence between two + statistical populations defined by their probability distributions. + Bulletin of the Calcutta Mathematical Society, 35:99-109, 1943. + (Bhattacharyya kernel) + + Examples + -------- + k = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + if self.pxdx: + k = estimate_d_temp2(y1, y2, self) + else: + k = estimate_d_temp2(y2, y1, self) + + return k + + +class BKExpected(InitKernel, InitBagGram, VerEqualDSubspaces): + """ Estimator for the expected kernel. + + Initialization comes from 'InitKernel' and 'InitBagGram', verification + is inherited from 'VerEqualDSubspaces' (see + 'ite.cost.x_initialization.py', 'ite.cost.x_verification.py'). + + Examples + -------- + >>> import ite + >>> from ite.cost.x_kernel import Kernel + >>> co1 = ite.cost.BKExpected() + >>> k2 = Kernel({'name': 'RBF','sigma': 1}) + >>> co2 = ite.cost.BKExpected(kernel=k2) + >>> k3 = Kernel({'name': 'exponential','sigma': 1}) + >>> co3 = ite.cost.BKExpected(kernel=k3) + >>> k4 = Kernel({'name': 'Cauchy','sigma': 1}) + >>> co4 = ite.cost.BKExpected(kernel=k4) + >>> k5 = Kernel({'name': 'student','d': 1}) + >>> co5 = ite.cost.BKExpected(kernel=k5) + >>> k6 = Kernel({'name': 'Matern3p2','l': 1}) + >>> co6 = ite.cost.BKExpected(kernel=k6) + >>> k7 = Kernel({'name': 'Matern5p2','l': 1}) + >>> co7 = ite.cost.BKExpected(kernel=k7) + >>> k8 = Kernel({'name': 'polynomial','exponent': 2,'c': 1}) + >>> co8 = ite.cost.BKExpected(kernel=k8) + >>> k9 = Kernel({'name': 'ratquadr','c': 1}) + >>> co9 = ite.cost.BKExpected(kernel=k9) + >>> k10 = Kernel({'name': 'invmquadr','c': 1}) + >>> co10 = ite.cost.BKExpected(kernel=k10) + + """ + + def estimation(self, y1, y2): + """ Estimate the value of the expected kernel. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + k : float + Estimated value of the expected kernel. + + References + ---------- + Arthur Gretton, Karsten M. Borgwardt, Malte J. Rasch, + Bernhard Scholkopf, and Alexander Smola. A kernel two-sample test. + Journal of Machine Learning Research, 13:723-773, 2012. + + Krikamol Muandet, Kenji Fukumizu, Francesco Dinuzzo, and Bernhard + Scholkopf. Learning from distributions via support measure + machines. In Advances in Neural Information Processing Systems + (NIPS), pages 10-18, 2011. + + Alain Berlinet and Christine Thomas-Agnan. Reproducing Kernel + Hilbert Spaces in Probability and Statistics. Kluwer, 2004. (mean + embedding) + + Thomas Gartner, Peter A. Flach, Adam Kowalczyk, and Alexander + Smola. Multi-instance kernels. In International Conference on + Machine Learning (ICML), pages 179-186, 2002. + (multi-instance/set/ensemble kernel) + + David Haussler. Convolution kernels on discrete structures. + Technical report, Department of Computer Science, University of + California at Santa Cruz, 1999. (convolution kernel -spec-> set + kernel) + + + Examples + -------- + k = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + kernel = self.kernel + ky1y2 = kernel.gram_matrix2(y1, y2) + + k = mean(ky1y2) + + return k diff --git a/ite-in-python/ite/cost/meta_a.py b/ite-in-python/ite/cost/meta_a.py new file mode 100644 index 0000000..d4bdee6 --- /dev/null +++ b/ite-in-python/ite/cost/meta_a.py @@ -0,0 +1,202 @@ +""" Meta association measure estimators. """ + +from numpy import sqrt, floor, ones + +from ite.cost.x_initialization import InitX +from ite.cost.x_verification import VerOneDSubspaces, VerCompSubspaceDims +from ite.cost.x_factory import co_factory + + +class MASpearmanLT(InitX, VerOneDSubspaces, VerCompSubspaceDims): + """ Estimate lower tail dependence based on conditional Spearman's rho. + + Partial initialization comes from 'InitX'; verification capabilities + are inherited from 'VerOneDSubspaces' and 'VerCompSubspaceDims' (see + 'ite.cost.x_initialization.py', 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, + spearman_cond_lt_co_name='BASpearmanCondLT', + spearman_cond_lt_co_pars=None): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + spearman_cond_lt_co_name : str, optional + You can change it to any conditional Spearman's rho + (of lower tail) estimator. (default is + 'BASpearmanCondLT') + spearman_cond_lt_co_pars : dictionary, optional + Parameters for the conditional Spearman's rho + estimator. (default is None (=> {}); in this case the + default parameter values of the conditional + Spearman's rho estimator are used) + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.MASpearmanLT() + >>> co2 = ite.cost.MASpearmanLT(spearman_cond_lt_co_name=\ + 'BASpearmanCondLT') + + """ + + # initialize with 'InitX': + spearman_cond_lt_co_pars = spearman_cond_lt_co_pars or {} + super().__init__(mult=mult) + + # initialize the conditional Spearman's rho estimator: + spearman_cond_lt_co_pars['mult'] = mult # guarantee this property + self.spearman_cond_lt_co = co_factory(spearman_cond_lt_co_name, + **spearman_cond_lt_co_pars) + + def estimation(self, y, ds=None): + """ Estimate lower tail dependence. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + ds : int vector, vector of ones + ds[i] = 1 (for all i): the i^th subspace is one-dimensional. + If ds is not given (ds=None), the vector of ones [ds = + ones(y.shape[1],dtype='int')] is emulated inside the function. + + Returns + ------- + a : float + Estimated lower tail dependence. + + References + ---------- + Friedrich Schmid and Rafael Schmidt. Multivariate conditional + versions of Spearman's rho and related measures of tail + dependence. Journal of Multivariate Analysis, 98:1123-1140, 2007. + + C. Spearman. The proof and measurement of association between two + things. The American Journal of Psychology, 15:72-101, 1904. + + Examples + -------- + a = co.estimation(y,ds) + + """ + + if ds is None: # emulate 'ds = vector of ones' + ds = ones(y.shape[1], dtype='int') + + # verification: + self.verification_compatible_subspace_dimensions(y, ds) + self.verification_one_dimensional_subspaces(ds) + + # p: + num_of_samples = y.shape[0] + k = int(floor(sqrt(num_of_samples))) + self.spearman_cond_lt_co.p = k / num_of_samples # set p + + a = self.spearman_cond_lt_co.estimation(y, ds) + + return a + + +class MASpearmanUT(InitX, VerOneDSubspaces, VerCompSubspaceDims): + """ Estimate upper tail dependence based on conditional Spearman's rho. + + Partial initialization comes from 'InitX'; verification capabilities + are inherited from 'VerOneDSubspaces' and 'VerCompSubspaceDims' (see + 'ite.cost.x_initialization.py', 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, + spearman_cond_ut_co_name='BASpearmanCondUT', + spearman_cond_ut_co_pars=None): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + spearman_cond_ut_co_name : str, optional + You can change it to any conditional Spearman's rho + (of upper tail) estimator. (default is + 'BASpearmanCondUT') + spearman_cond_ut_co_pars : dictionary, optional + Parameters for the conditional Spearman's rho + estimator. (default is None (=> {}); in this case the + default parameter values of the conditional + Spearman's rho estimator are used). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.MASpearmanUT() + >>> co2 = ite.cost.MASpearmanUT(spearman_cond_ut_co_name=\ + 'BASpearmanCondUT') + + + """ + + # initialize with 'InitX': + super().__init__(mult=mult) + + # initialize the conditional Spearman's rho estimator: + spearman_cond_ut_co_pars = spearman_cond_ut_co_pars or {} + spearman_cond_ut_co_pars['mult'] = mult # guarantee this property + self.spearman_cond_ut_co = co_factory(spearman_cond_ut_co_name, + **spearman_cond_ut_co_pars) + + def estimation(self, y, ds=None): + """ Estimate upper tail dependence. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + ds : int vector, vector of ones + ds[i] = 1 (for all i): the i^th subspace is one-dimensional. + If ds is not given (ds=None), the vector of ones [ds = + ones(y.shape[1],dtype='int')] is emulated inside the function. + + Returns + ------- + a : float + Estimated upper tail dependence. + + References + ---------- + Friedrich Schmid and Rafael Schmidt. Multivariate conditional + versions of Spearman's rho and related measures of tail + dependence. Journal of Multivariate Analysis, 98:1123-1140, 2007. + + C. Spearman. The proof and measurement of association between two + things. The American Journal of Psychology, 15:72-101, 1904. + + Examples + -------- + a = co.estimation(y,ds) + + """ + + if ds is None: # emulate 'ds = vector of ones' + ds = ones(y.shape[1], dtype='int') + + # verification: + self.verification_compatible_subspace_dimensions(y, ds) + self.verification_one_dimensional_subspaces(ds) + + # p: + num_of_samples = y.shape[0] + k = int(floor(sqrt(num_of_samples))) + self.spearman_cond_ut_co.p = k / num_of_samples # set p + + a = self.spearman_cond_ut_co.estimation(y, ds) + + return a diff --git a/ite-in-python/ite/cost/meta_c.py b/ite-in-python/ite/cost/meta_c.py new file mode 100644 index 0000000..185d882 --- /dev/null +++ b/ite-in-python/ite/cost/meta_c.py @@ -0,0 +1 @@ +""" Meta cross-quantity estimators. """ diff --git a/ite-in-python/ite/cost/meta_d.py b/ite-in-python/ite/cost/meta_d.py new file mode 100644 index 0000000..467e29e --- /dev/null +++ b/ite-in-python/ite/cost/meta_d.py @@ -0,0 +1,1111 @@ +""" Meta divergence estimators. """ + +from numpy import sqrt, floor, array, sum + +from ite.cost.x_initialization import InitX, InitAlpha +from ite.cost.x_verification import VerEqualDSubspaces, \ + VerEqualSampleNumbers +from ite.cost.x_factory import co_factory +from ite.shared import mixture_distribution + + +class MDBlockMMD(InitX, VerEqualDSubspaces, VerEqualSampleNumbers): + """ Block MMD estimator using average of U-stat. based MMD estimators. + + MMD stands for maximum mean discrepancy. + + Partial initialization comes from 'InitX', verification is from + 'VerEqualDSubspaces', 'VerEqualSampleNumbers' (see + 'ite.cost.x_initialization.py', 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, mmd_co_name='BDMMD_UStat', + mmd_co_pars=None): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + mmd_co_name : str, optional + You can change it to any U-statistic based MMD + estimator. (default is 'BDMMD_UStat') + mmd_co_pars : dictionary, optional + Parameters for the U-statistic based MMD estimator + (default is None (=> {}); in this case the default + parameter values of the U-statistic based MMD + estimator are used). + + Examples + -------- + >>> import ite + >>> from ite.cost.x_kernel import Kernel + >>> co1 = ite.cost.MDBlockMMD() + >>> co2 = ite.cost.MDBlockMMD(mmd_co_name='BDMMD_UStat') + >>> dict_ch = {'kernel': \ + Kernel({'name': 'RBF','sigma': 0.1}), 'mult': True} + >>> co3 = ite.cost.MDBlockMMD(mmd_co_name='BDMMD_UStat',\ + mmd_co_pars=dict_ch) + + """ + + # initialize with 'InitX': + super().__init__(mult=mult) + + # initialize the U-statistic based MMD estimator: + mmd_co_pars = mmd_co_pars or {} + mmd_co_pars['mult'] = mult # guarantee this property + self.mmd_co = co_factory(mmd_co_name, **mmd_co_pars) + + def estimation(self, y1, y2): + """ Estimate MMD. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + y : float + Estimated MMD. + + References + ---------- + Wojciech Zaremba, Arthur Gretton, and Matthew Blaschko. B-tests: + Low variance kernel two-sample tests. In Advances in Neural + Information Processing Systems (NIPS), pages 755-763, 2013. + + Examples + -------- + d = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + self.verification_equal_sample_numbers(y1, y2) + + num_of_samples = y1.shape[0] # =y2.shape[0] + b = int(floor(sqrt(num_of_samples))) # size of a block + num_of_blocks = int(floor(num_of_samples / b)) + + d = 0 + for k in range(num_of_blocks): + d += self.mmd_co.estimation(y1[k*b:(k+1)*b], y2[k*b:(k+1)*b]) + + d /= num_of_blocks + + return d + + +class MDEnergyDist_DMMD(InitX, VerEqualDSubspaces): + """ Energy distance estimator using MMD (maximum mean discrepancy). + + The estimation is based on the relation D(f_1,f_2;rho) = + 2 [MMD(f_1,f_2;k)]^2, where k is a kernel that generates rho, a + semimetric of negative type. + + Partial initialization comes from 'InitX', verification is from + 'VerEqualDSubspaces' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, mmd_co_name='BDMMD_UStat_IChol', + mmd_co_pars=None): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + mmd_co_name : str, optional + You can change it to any MMD estimator (default is + 'BDMMD_UStat_IChol'). + mmd_co_pars : dictionary, optional + Parameters for the MMD estimator (default is None + (=> {}); in this case the default parameter values + of the MMD estimator are used). + + Examples + -------- + >>> import ite + >>> from ite.cost.x_kernel import Kernel + >>> co1 = ite.cost.MDEnergyDist_DMMD() + >>> co2 =\ + ite.cost.MDEnergyDist_DMMD(mmd_co_name='BDMMD_UStat_IChol') + >>> dict_ch = {'kernel': \ + Kernel({'name': 'RBF','sigma': 0.1}), 'eta': 1e-2} + >>> co3 =\ + ite.cost.MDEnergyDist_DMMD(mmd_co_name='BDMMD_UStat_IChol',\ + mmd_co_pars=dict_ch) + + """ + + # initialize with 'InitX': + super().__init__(mult=mult) + + # initialize the MMD estimator: + mmd_co_pars = mmd_co_pars or {} + mmd_co_pars['mult'] = mult # guarantee this property + self.mmd_co = co_factory(mmd_co_name, **mmd_co_pars) + + def estimation(self, y1, y2): + """ Estimate energy distance. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + d : float + Estimated energy distance. + + References + ---------- + Dino Sejdinovic, Arthur Gretton, Bharath Sriperumbudur, and Kenji + Fukumizu. Hypothesis testing using pairwise distances and + associated kernels. International Conference on Machine Learning + (ICML), pages 1111-1118, 2012. (semimetric space; energy distance + <=> MMD, with a suitable kernel) + + Russell Lyons. Distance covariance in metric spaces. Annals of + Probability, 41:3284-3305, 2013. (energy distance, metric space of + negative type; pre-equivalence to MMD). + + Gabor J. Szekely and Maria L. Rizzo. A new test for multivariate + normality. Journal of Multivariate Analysis, 93:58-80, 2005. + (energy distance; metric space of negative type) + + Gabor J. Szekely and Maria L. Rizzo. Testing for equal + distributions in high dimension. InterStat, 5, 2004. (energy + distance; R^d) + + Ludwig Baringhaus and C. Franz. On a new multivariate + two-sample test. Journal of Multivariate Analysis, 88, 190-206, + 2004. (energy distance; R^d) + + Lev Klebanov. N-Distances and Their Applications. Charles + University, Prague, 2005. (N-distance) + + A. A. Zinger and A. V. Kakosyan and L. B. Klebanov. A + characterization of distributions by mean values of statistics + and certain probabilistic metrics. Journal of Soviet + Mathematics, 1992 (N-distance, general case). + + Examples + -------- + d = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + d = 2 * self.mmd_co.estimation(y1, y2)**2 + + return d + + +class MDf_DChi2(InitX, VerEqualDSubspaces): + """ f-divergence estimator based on Taylor expansion & chi^2 distance. + + Assumption: f convex and f(1) = 0. + + Partial initialization comes from 'InitX', verification is from + 'VerEqualDSubspaces' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + """ + + def __init__(self, hess=2, mult=True, chi_square_co_name='BDChi2_KnnK', + chi_square_co_pars=None): + """ Initialize the estimator. + + Parameters + ---------- + hess : float, optional + =f^{(2)}(1), the second derivative of f at 1 (default is 2). + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + chi_square_co_name : str, optional + You can change it to any Pearson chi square + divergence estimator (default is + 'BDChi2_KnnK'). + chi_square_co_pars : dictionary, optional + Parameters for the Pearson chi-square + divergence estimator (default is None + (=> {}); in this case the default parameter + values of the Pearson chi square divergence + estimator are used). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.MDf_DChi2(hess=2) + >>> co2 = ite.cost.MDf_DChi2(hess=1,\ + chi_square_co_name='BDChi2_KnnK') + >>> dict_ch = {'k': 6} + >>> co3 = ite.cost.MDf_DChi2(hess=2,\ + chi_square_co_name='BDChi2_KnnK',\ + chi_square_co_pars=dict_ch) + + """ + + # initialize with 'InitX': + super().__init__(mult=mult) + + # initialize the chi^2 divergence estimator: + chi_square_co_pars = chi_square_co_pars or {} + chi_square_co_pars['mult'] = mult # guarantee this property + self.chi_square_co = co_factory(chi_square_co_name, + **chi_square_co_pars) + # other attributes (hess): + self.hess = hess + + def estimation(self, y1, y2): + """ Estimate f-divergence. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + d : float + Estimated f-divergence. + + References + ---------- + Frank Nielsen and Richard Nock. On the chi square and higher-order + chi distances for approximating f-divergences. IEEE Signal + Processing Letters, 2:10-13, 2014. + + Neil S. Barnett, Pietro Cerone, Sever Silvestru Dragomir, and A. + Sofo. Approximating Csiszar f-divergence by the use of Taylor's + formula with integral remainder. Mathematical Inequalities and + Applications, 5:417-432, 2002. + + Examples + -------- + d = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + d = self.hess / 2 * self.chi_square_co.estimation(y1, y2) + + return d + + +class MDJDist_DKL(InitX, VerEqualDSubspaces): + """ J distance estimator. + + J distance is also known as the symmetrised Kullback-Leibler + divergence. + + The estimation is based on the relation D_J(f_1,f_2) = D(f_1,f_2) + + D(f_2,f_1), where D_J is the J distance and D denotes the + Kullback-Leibler divergence. + + Partial initialization comes from 'InitX', verification is from + 'VerEqualDSubspaces' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, kl_co_name='BDKL_KnnK', kl_co_pars=None): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + kl_co_name : str, optional + You can change it to any Kullback-Leibler divergence + estimator (default is 'BDKL_KnnK'). + kl_co_pars : dictionary, optional + Parameters for the Kullback-Leibler divergence + estimator (default is None (=> {}); in this case the + default parameter values of the Kullback-Leibler + divergence estimator are used). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.MDJDist_DKL() + >>> co2 = ite.cost.MDJDist_DKL(kl_co_name='BDKL_KnnK') + >>> co3 = ite.cost.MDJDist_DKL(kl_co_name='BDKL_KnnK',\ + kl_co_pars={'k': 6}) + + """ + + # initialize with 'InitX': + super().__init__(mult=mult) + + # initialize the KL divergence estimator: + kl_co_pars = kl_co_pars or {} + kl_co_pars['mult'] = mult # guarantee this property + self.kl_co = co_factory(kl_co_name, **kl_co_pars) + + def estimation(self, y1, y2): + """ Estimate J distance. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + d : float + Estimated J distance. + + Examples + -------- + d = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + d = self.kl_co.estimation(y1, y2) + self.kl_co.estimation(y2, y1) + + return d + + +class MDJR_HR(InitAlpha, VerEqualDSubspaces): + """ Jensen-Renyi divergence estimator based on Renyi entropy. + + The estimation is based on the relation D_JR(f_1,f_2) = + D_{JR,alpha}(f_1,f_2) = H_{R,alpha}(w1*y^1+w2*y^2) - + [w1*H_{R,alpha}(y^1) + w2*H_{R,alpha}(y^2)], where y^i has density f_i + (i=1,2), w1*y^1+w2*y^2 is the mixture distribution of y^1 and y^2 with + w1, w2 positive weights, D_JR is the Jensen-Renyi divergence, + H_{R,alpha} denotes the Renyi entropy. + + + Partial initialization comes from 'InitAlpha', verification is from + 'VerEqualDSubspaces' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, alpha=0.99, w=array([1/2, 1/2]), + renyi_co_name='BHRenyi_KnnK', renyi_co_pars=None): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + alpha : float, \ne 1, optional + Parameter of the Jensen-Renyi divergence (default is 0.99). + w : ndarray, w = [w1,w2], w_i > 0, w_2 > 0, w1 + w2 = 1, optional. + Parameters of the Jensen-Renyi divergence (default is w = + array([1/2,1/2]) ) + renyi_co_name : str, optional + You can change it to any Renyi entropy estimator + (default is 'BHRenyi_KnnK'). + renyi_co_pars : dictionary, optional + Parameters for the Renyi entropy estimator + (default is None (=> {}); in this case the default + parameter values of the Renyi entropy estimator + are used). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.MDJR_HR() + >>> co2 = ite.cost.MDJR_HR(renyi_co_name='BHRenyi_KnnK', alpha=0.8) + >>> co3 = ite.cost.MDJR_HR(renyi_co_name='BHRenyi_KnnK',\ + renyi_co_pars={'k': 6}, alpha=0.5,\ + w=array([1/4,3/4])) + + """ + + # initialize with 'InitAlpha': + super().__init__(mult=mult, alpha=alpha) + + # initialize the Renyi entropy estimator: + renyi_co_pars = renyi_co_pars or {} + renyi_co_pars['mult'] = mult # guarantee this property + renyi_co_pars['alpha'] = alpha # -||- + self.renyi_co = co_factory(renyi_co_name, **renyi_co_pars) + + # other attributes (w): + # verification: + if sum(w) != 1: + raise Exception('sum(w) has to be 1!') + + if not all(w > 0): + raise Exception('The coordinates of w have to be positive!') + + if len(w) != 2: + raise Exception('The length of w has to be 2!') + + self.w = w + + def estimation(self, y1, y2): + """ Estimate Jensen-Renyi divergence. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + d : float + Estimated Jensen-Renyi divergence. + + References + ---------- + A.B. Hamza and H. Krim. Jensen-Renyi divergence measure: + theoretical and computational perspectives. In IEEE International + Symposium on Information Theory (ISIT), page 257, 2003. + + Examples + -------- + d = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + w = self.w + mixture_y = mixture_distribution((y1, y2), w) + d = self.renyi_co.estimation(mixture_y) -\ + (w[0] * self.renyi_co.estimation(y1) + + w[1] * self.renyi_co.estimation(y2)) + + return d + + +class MDJT_HT(InitAlpha, VerEqualDSubspaces): + """ Jensen-Tsallis divergence estimator based on Tsallis entropy. + + The estimation is based on the relation D_JT(f_1,f_2) = + D_{JT,alpha}(f_1,f_2) = H_{T,alpha}((y^1+y^2)/2) - + [1/2*H_{T,alpha}(y^1) + 1/2*H_{T,alpha}(y^2)], where y^i has density + f_i (i=1,2), (y^1+y^2)/2 is the mixture distribution of y^1 and y^2 + with 1/2-1/2 weights, D_JT is the Jensen-Tsallis divergence, + H_{T,alpha} denotes the Tsallis entropy. + + Partial initialization comes from 'InitAlpha', verification is from + 'VerEqualDSubspaces' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, alpha=0.99, + tsallis_co_name='BHTsallis_KnnK', tsallis_co_pars=None): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + alpha : float, \ne 1, optional + Parameter of the Jensen-Tsallis divergence (default is + 0.99). + tsallis_co_name : str, optional + You can change it to any Tsallis entropy + estimator (default is 'BHTsallis_KnnK'). + tsallis_co_pars : dictionary, optional + Parameters for the Tsallis entropy estimator + (default is None (=> {}); in this case the + default parameter values of the Tsallis entropy + estimator are used). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.MDJT_HT() + >>> co2 = ite.cost.MDJT_HT(tsallis_co_name='BHTsallis_KnnK',\ + alpha=0.8) + >>> co3 = ite.cost.MDJT_HT(tsallis_co_name='BHTsallis_KnnK',\ + tsallis_co_pars={'k':6}, alpha=0.5) + + """ + + # initialize with 'InitAlpha': + super().__init__(mult=mult, alpha=alpha) + + # initialize the Tsallis entropy estimator: + tsallis_co_pars = tsallis_co_pars or {} + tsallis_co_pars['mult'] = mult # guarantee this property + tsallis_co_pars['alpha'] = alpha # -||- + self.tsallis_co = co_factory(tsallis_co_name, **tsallis_co_pars) + + def estimation(self, y1, y2): + """ Estimate Jensen-Tsallis divergence. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + d : float + Estimated Jensen-Tsallis divergence. + + References + ---------- + J. Burbea and C.R. Rao. On the convexity of some divergence + measures based on entropy functions. IEEE Transactions on + Information Theory, 28:489-495, 1982. + + Examples + -------- + d = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + w = array([1/2, 1/2]) + mixture_y = mixture_distribution((y1, y2), w) + d = self.tsallis_co.estimation(mixture_y) -\ + (w[0] * self.tsallis_co.estimation(y1) + + w[1] * self.tsallis_co.estimation(y2)) + + return d + + +class MDJS_HS(InitX, VerEqualDSubspaces): + """ Jensen-Shannon divergence estimator based on Shannon entropy. + + The estimation is based on the relation D_JS(f_1,f_2) = + H(w1*y^1+w2*y^2) - [w1*H(y^1) + w2*H(y^2)], where y^i has density f_i + (i=1,2), w1*y^1+w2*y^2 is the mixture distribution of y^1 and y^2 with + w1, w2 positive weights, D_JS is the Jensen-Shannon divergence, H + denotes the Shannon entropy. + + Partial initialization comes from 'InitX', verification is from + 'VerEqualDSubspaces' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, w=array([1/2, 1/2]), + shannon_co_name='BHShannon_KnnK', shannon_co_pars=None): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + w : ndarray, w = [w1,w2], w_i > 0, w_2 > 0, w1 + w2 = 1, optional. + Parameters of the Jensen-Shannon divergence (default is + w = array([1/2,1/2]) ) + shannon_co_name : str, optional + You can change it to any Shannon entropy + estimator (default is 'BHShannon_KnnK'). + shannon_co_pars : dictionary, optional + Parameters for the Shannon entropy estimator + (default is None (=> {}); in this case the + default parameter values of the Shannon entropy + estimator are used). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.MDJS_HS() + >>> co2 = ite.cost.MDJS_HS(shannon_co_name='BHShannon_KnnK') + >>> co3 = ite.cost.MDJS_HS(shannon_co_name='BHShannon_KnnK',\ + shannon_co_pars={'k':6,'eps':0.2},\ + w=array([1/4,3/4])) + + """ + + # initialize with 'InitX': + super().__init__(mult=mult) + + # initialize the Shannon entropy estimator: + shannon_co_pars = shannon_co_pars or {} + shannon_co_pars['mult'] = mult # guarantee this property + self.shannon_co = co_factory(shannon_co_name, **shannon_co_pars) + + # other attributes (w): + # verification: + if sum(w) != 1: + raise Exception('sum(w) has to be 1!') + + if not all(w > 0): + raise Exception('The coordinates of w have to be positive!') + + if len(w) != 2: + raise Exception('The length of w has to be 2!') + + self.w = w + + def estimation(self, y1, y2): + """ Estimate Jensen-Shannon divergence. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + d : float + Estimated Jensen-Shannon divergence. + + References + ---------- + Jianhua Lin. Divergence measures based on the Shannon entropy. + IEEE Transactions on Information Theory, 37:145-151, 1991. + + Examples + -------- + d = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + w = self.w + mixture_y = mixture_distribution((y1, y2), w) + d = self.shannon_co.estimation(mixture_y) -\ + (w[0] * self.shannon_co.estimation(y1) + + w[1] * self.shannon_co.estimation(y2)) + + return d + + +class MDK_DKL(InitX, VerEqualDSubspaces): + """ K divergence estimator based on Kullback-Leibler divergence. + + The estimation is based on the relation D_K(f_1,f_2) = + D(f_1,(f_1+f_2)/2), where D_K is the K divergence, D denotes the + Kullback-Leibler divergence. + + Partial initialization comes from 'InitX', verification is from + 'VerEqualDSubspaces' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, kl_co_name='BDKL_KnnK', kl_co_pars=None): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + kl_co_name : str, optional + You can change it to any Kullback-Leibler divergence + estimator (default is 'BDKL_KnnK'). + kl_co_pars : dictionary, optional + Parameters for the Kullback-Leibler divergence + estimator (default is None (=> {}); in this case the + default parameter values of the Kullback-Leibler + divergence estimator are used). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.MDK_DKL() + >>> co2 = ite.cost.MDK_DKL(kl_co_name='BDKL_KnnK') + >>> co3 = ite.cost.MDK_DKL(kl_co_name='BDKL_KnnK',\ + kl_co_pars={'k':6,'eps':0.2}) + + """ + + # initialize with 'InitX': + super().__init__(mult=mult) + + # initialize the Kullback-Leibler divergence estimator: + kl_co_pars = kl_co_pars or {} + kl_co_pars['mult'] = mult # guarantee this property + self.kl_co = co_factory(kl_co_name, **kl_co_pars) + + def estimation(self, y1, y2): + """ Estimate K divergence. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + d : float + Estimated K divergence. + + References + ---------- + Jianhua Lin. Divergence measures based on the Shannon entropy. + IEEE Transactions on Information Theory, 37:145-151, 1991. + + Examples + -------- + d = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + num_of_samples1, num_of_samples2 = y1.shape[0], y2.shape[0] + + # mixture of y1 and y2 with 1/2, 1/2 weights: + w = array([1/2, 1/2]) + # samples to the mixture (second part of y1 and y2; =:y1m, y2m): + # (max) number of samples to the mixture from y1 and from y2: + num_of_samples1m = int(floor(num_of_samples1 / 2)) + num_of_samples2m = int(floor(num_of_samples2 / 2)) + y1m = y1[num_of_samples1m:] # broadcasting + y2m = y2[num_of_samples2m:] # broadcasting + mixture_y = mixture_distribution((y1m, y2m), w) + + # with broadcasting: + d = self.kl_co.estimation(y1[:num_of_samples1m], mixture_y) + + return d + + +class MDL_DKL(InitX, VerEqualDSubspaces): + """ L divergence estimator based on Kullback-Leibler divergence. + + The estimation is based on the relation D_L(f_1,f_2) = + D(f_1,(f_1+f_2)/2) + D(f_2,(f_1+f_2)/2), where D_L is the L divergence + and D denotes the Kullback-Leibler divergence. + + Partial initialization comes from 'InitX', verification is from + 'VerEqualDSubspaces' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, kl_co_name='BDKL_KnnK', kl_co_pars=None): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + kl_co_name : str, optional + You can change it to any Kullback-Leibler divergence + estimator (default is 'BDKL_KnnK'). + kl_co_pars : dictionary, optional + Parameters for the Kullback-Leibler divergence + estimator (default is None (=> {}); in this case the + default parameter values of the Kullback-Leibler + divergence estimator are used). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.MDL_DKL() + >>> co2 = ite.cost.MDL_DKL(kl_co_name='BDKL_KnnK') + >>> co3 = ite.cost.MDL_DKL(kl_co_name='BDKL_KnnK',\ + kl_co_pars={'k':6,'eps':0.2}) + + """ + + # initialize with 'InitX': + super().__init__(mult=mult) + + # initialize the Kullback-Leibler divergence estimator: + kl_co_pars = kl_co_pars or {} + kl_co_pars['mult'] = mult # guarantee this property + self.kl_co = co_factory(kl_co_name, **kl_co_pars) + + def estimation(self, y1, y2): + """ Estimate L divergence. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + d : float + Estimated L divergence. + + References + ---------- + Jianhua Lin. Divergence measures based on the Shannon entropy. + IEEE Transactions on Information Theory, 37:145-151, 1991. + + Examples + -------- + d = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + num_of_samples1, num_of_samples2 = y1.shape[0], y2.shape[0] + + # mixture of y1 and y2 with 1/2, 1/2 weights: + w = array([1/2, 1/2]) + # samples to the mixture (second part of y1 and y2; =:y1m, y2m): + # (max) number of samples to the mixture from y1 and from y2: + num_of_samples1m = int(floor(num_of_samples1 / 2)) + num_of_samples2m = int(floor(num_of_samples2 / 2)) + y1m = y1[num_of_samples1m:] # broadcasting + y2m = y2[num_of_samples2m:] # broadcasting + mixture_y = mixture_distribution((y1m, y2m), w) + + # with broadcasting: + d = self.kl_co.estimation(y1[:num_of_samples1m], mixture_y) +\ + self.kl_co.estimation(y2[:num_of_samples1m], mixture_y) + + return d + + +class MDSymBregman_DB(InitAlpha, VerEqualDSubspaces): + """ Symmetric Bregman distance estimator from the nonsymmetric one. + + The estimation is based on the relation D_S = + (D_NS(f1,f2) + D_NS (f2,f1)) / alpha, where D_S is the symmetric + Bregman distance, D_NS is the nonsymmetric Bregman distance. + + Partial initialization comes from 'InitAlpha', verification is from + 'VerEqualDSubspaces' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, alpha=0.99, + bregman_co_name='BDBregman_KnnK', bregman_co_pars=None): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + alpha : float, \ne 1, optional + Parameter of the symmetric Bregman distance (default is + 0.99). + bregman_co_name : str, optional + You can change it to any nonsymmetric Bregman + distance estimator (default is 'BDBregman_KnnK'). + bregman_co_pars : dictionary, optional + Parameters for the nonsymmetric Bregman distance + estimator (default is None (=> {}); in this case + the default parameter values of the nonsymmetric + Bregman distance estimator are used). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.MDSymBregman_DB() + >>> co2 =\ + ite.cost.MDSymBregman_DB(bregman_co_name='BDBregman_KnnK') + >>> co3 =\ + ite.cost.MDSymBregman_DB(bregman_co_name='BDBregman_KnnK',\ + bregman_co_pars={'k':6,'eps':0.2}) + + """ + + # initialize with 'InitAlpha': + super().__init__(mult=mult, alpha=alpha) + + # initialize the nonsymmetric Bregman distance estimator: + bregman_co_pars = bregman_co_pars or {} + bregman_co_pars['mult'] = mult # guarantee this property + bregman_co_pars['alpha'] = alpha # guarantee this property + self.bregman_co = co_factory(bregman_co_name, **bregman_co_pars) + + def estimation(self, y1, y2): + """ Estimate symmetric Bregman distance. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + d : float + Estimated symmetric Bregman distance. + + References + ---------- + Nikolai Leonenko, Luc Pronzato, and Vippal Savani. A class of + Renyi information estimators for multidimensional densities. + Annals of Statistics, 36(5):2153-2182, 2008. + + Imre Csiszar. Generalized projections for non-negative functions. + Acta Mathematica Hungarica, 68:161-185, 1995. + + Lev M. Bregman. The relaxation method of finding the common points + of convex sets and its application to the solution of problems in + convex programming. USSR Computational Mathematics and + Mathematical Physics, 7:200-217, 1967. + + Examples + -------- + d = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + d = (self.bregman_co.estimation(y1, y2) + + self.bregman_co.estimation(y2, y1)) / self.alpha + + return d + + +class MDKL_HSCE(InitX, VerEqualDSubspaces): + """ Kullback-Leibler divergence from cross-entropy and Shannon entropy. + + The estimation is based on the relation D(f_1,f_2) = + CE(f_1,f_2) - H(f_1), where D denotes the Kullback-Leibler divergence, + CE is the cross-entropy, and H stands for the Shannon differential + entropy. + + Partial initialization comes from 'InitX', verification is from + 'VerEqualDSubspaces' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, shannon_co_name='BHShannon_KnnK', + shannon_co_pars=None, ce_co_name='BCCE_KnnK', + ce_co_pars=None): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + shannon_co_name : str, optional + You can change it to any Shannon entropy + (default is 'BHShannon_KnnK'). + shannon_co_pars : dictionary, optional + Parameters for the Shannon entropy estimator (default + is None (=> {}); in this case the default parameter + values of the Shannon entropy estimator are used). + ce_co_name : str, optional + You can change it to any cross-entropy estimator + (default is 'BCCE_KnnK'). + ce_co_pars : dictionary, optional + Parameters for the cross-entropy estimator (default + is None (=> {}); in this case the default parameter + values of the cross-entropy estimator are used). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.MDKL_HSCE() + >>> co2 = ite.cost.MDKL_HSCE(shannon_co_name='BHShannon_KnnK') + >>> co3 = ite.cost.MDKL_HSCE(shannon_co_name='BHShannon_KnnK',\ + shannon_co_pars={'k':6,'eps':0.2}) + >>> co4 = ite.cost.MDKL_HSCE(shannon_co_name='BHShannon_KnnK',\ + shannon_co_pars={'k':5,'eps':0.2},\ + ce_co_name='BCCE_KnnK',\ + ce_co_pars={'k':6,'eps':0.1}) + + """ + + # initialize with 'InitX': + super().__init__(mult=mult) + + # initialize the Shannon entropy estimator: + shannon_co_pars = shannon_co_pars or {} + shannon_co_pars['mult'] = True # guarantee this property + self.shannon_co = co_factory(shannon_co_name, **shannon_co_pars) + + # initialize the cross-entropy estimator: + ce_co_pars = ce_co_pars or {} + ce_co_pars['mult'] = True # guarantee this property + self.ce_co = co_factory(ce_co_name, **ce_co_pars) + + def estimation(self, y1, y2): + """ Estimate Kullback-Leibler divergence. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + d : float + Estimated KL divergence. + + References + ---------- + Jianhua Lin. Divergence measures based on the Shannon entropy. + IEEE Transactions on Information Theory, 37:145-151, 1991. + + Examples + -------- + d = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + c = self.ce_co.estimation(y1, y2) + h = self.shannon_co.estimation(y1) + d = c - h + + return d diff --git a/ite-in-python/ite/cost/meta_h.py b/ite-in-python/ite/cost/meta_h.py new file mode 100644 index 0000000..abaec54 --- /dev/null +++ b/ite-in-python/ite/cost/meta_h.py @@ -0,0 +1,278 @@ +""" Meta entropy estimators. """ + +from numpy import mean, cov, log, pi, exp, array, min, max, prod +from numpy.random import multivariate_normal, rand +from scipy.linalg import det + +from ite.cost.x_initialization import InitX, InitAlpha +from ite.cost.x_factory import co_factory + + +class MHShannon_DKLN(InitX): + """ Shannon entropy estimator using a Gaussian auxiliary variable. + + The estimtion relies on H(Y) = H(G) - D(Y,G), where G is Gaussian + [N(E(Y),cov(Y)] and D is the Kullback-Leibler divergence. + + Partial initialization comes from 'InitX' (see + 'ite.cost.x_initialization.py'). + + """ + + def __init__(self, mult=True, kl_co_name='BDKL_KnnK', kl_co_pars=None): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + kl_co_name : str, optional + You can change it to any Kullback-Leibler divergence + estimator. (default is 'BDKL_KnnK') + kl_co_pars : dictionary, optional + Parameters for the KL divergence estimator. (default + is None (=> {}); in this case the default parameter + values of the KL divergence estimator are used) + + -------- + >>> import ite + >>> co1 = ite.cost.MHShannon_DKLN() + >>> co2 = ite.cost.MHShannon_DKLN(kl_co_name='BDKL_KnnK') + + >>> dict_ch = {'knn_method': 'cKDTree', 'k': 4, 'eps': 0.2} + >>> co3 = ite.cost.MHShannon_DKLN(kl_co_name='BDKL_KnnK', \ + kl_co_pars=dict_ch) + + """ + + # initialize with 'InitX': + super().__init__(mult=mult) + + # initialize the KL divergence estimator: + kl_co_pars = kl_co_pars or {} + kl_co_pars['mult'] = True # guarantee this property + self.kl_co = co_factory(kl_co_name, **kl_co_pars) + + def estimation(self, y): + """ Estimate Shannon entropy. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + + Returns + ------- + h : float + Estimated Shannon entropy. + + References + ---------- + Quing Wang, Sanjeev R. Kulkarni, and Sergio Verdu. Universal + estimation of information measures for analog sources. Foundations + And Trends In Communications And Information Theory, 5:265-353, + 2009. + + Examples + -------- + h = co.estimation(y,ds) + + """ + + num_of_samples, dim = y.shape # number of samples, dimension + + # estimate the mean and the covariance of y: + m = mean(y, axis=0) + c = cov(y, rowvar=False) # 'rowvar=False': 1 row = 1 observation + + # entropy of N(m,c): + if dim == 1: + det_c = c # det(): 'expected square matrix' exception + # multivariate_normal(): 'cov must be 2 dimensional and square' + # exception: + c = array([[c]]) + + else: + det_c = det(c) + + h_normal = 1/2 * log((2*pi*exp(1))**dim * det_c) + + # generate samples from N(m,c): + y_normal = multivariate_normal(m, c, num_of_samples) + + h = h_normal - self.kl_co.estimation(y, y_normal) + + return h + + +class MHShannon_DKLU(InitX): + """ Shannon entropy estimator using a uniform auxiliary variable. + + + The estimation relies on H(y) = -D(y',u) + log(\prod_i(b_i-a_i)), + where y\in U[a,b] = \times_{i=1}^d U[a_i,b_i], D is the + Kullback-Leibler divergence, y' = linearly transformed version of y to + [0,1]^d, and U is the uniform distribution on [0,1]^d. + + Partial initialization comes from 'InitX' (see + 'ite.cost.x_initialization.py'). + + """ + + def __init__(self, mult=True, kl_co_name='BDKL_KnnK', kl_co_pars=None): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + kl_co_name : str, optional + You can change it to any Kullback-Leibler divergence + estimator. (default is 'BDKL_KnnK') + kl_co_pars : dictionary, optional + Parameters for the KL divergence estimator. (default + is None (=> {}); in this case the default parameter + values of the KL divergence estimator are used) + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.MHShannon_DKLU() + >>> co2 = ite.cost.MHShannon_DKLU(kl_co_name='BDKL_KnnK') + + >>> dict_ch = {'knn_method': 'cKDTree', 'k': 5, 'eps': 0.3} + >>> co3 = ite.cost.MHShannon_DKLU(kl_co_name='BDKL_KnnK', \ + kl_co_pars=dict_ch) + + """ + + # initialize with 'InitX': + super().__init__(mult=mult) + + # initialize the KL divergence estimator: + kl_co_pars = kl_co_pars or {} + kl_co_pars['mult'] = mult # guarantee this property + self.kl_co = co_factory(kl_co_name, **kl_co_pars) + + def estimation(self, y): + """ Estimate Shannon entropy. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + + Returns + ------- + h : float + Estimated Shannon entropy. + + Examples + -------- + h = co.estimation(y,ds) + + """ + + # estimate the support (a,b) of y, transform y to [0,1]^d: + a, b = min(y, axis=0), max(y, axis=0) + y = y/(b-a) + a/(a-b) + + # generate samples from U[0,1]^d: + u = rand(*y.shape) # '*': seq unpacking + + h = - self.kl_co.estimation(y, u) + log(prod(b-a)) + + return h + + +class MHTsallis_HR(InitAlpha): + """ Tsallis entropy estimator from Renyi entropy. + + The estimation relies on H_{T,alpha} = (e^{H_{R,alpha}(1-alpha)} - 1) / + (1-alpha), where H_{T,alpha} and H_{R,alpha} denotes the Tsallis and + the Renyi entropy, respectively. + + Partial initialization comes from 'InitAlpha' see + 'ite.cost.x_initialization.py'). + + Notes + ----- + The Tsallis entropy (H_{T,alpha}) equals to the Shannon differential + (H) entropy in limit: H_{T,alpha} -> H, as alpha -> 1. + + """ + + def __init__(self, mult=True, alpha=0.99, renyi_co_name='BHRenyi_KnnK', + renyi_co_pars=None): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + alpha : float, alpha \ne 1, optional + alpha in the Tsallis entropy. (default is 0.99) + renyi_co_name : str, optional + You can change it to any Renyi entropy estimator. + (default is 'BHRenyi_KnnK') + renyi_co_pars : dictionary, optional + Parameters for the Renyi entropy estimator. (default + is None (=> {}); in this case the default parameter + values of the Renyi entropy estimator are used) + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.MHTsallis_HR() + >>> co2 = ite.cost.MHTsallis_HR(renyi_co_name='BHRenyi_KnnK') + >>> co3 = ite.cost.MHTsallis_HR(alpha=0.9, \ + renyi_co_name='BHRenyi_KnnK') + + >>> dict_ch = {'knn_method': 'cKDTree', 'k': 5, 'eps': 0.1} + >>> co4 = ite.cost.MHTsallis_HR(alpha=0.9, \ + renyi_co_name='BHRenyi_KnnK', \ + renyi_co_pars=dict_ch) + + """ + + # initialize with 'InitX': + super().__init__(mult=mult, alpha=alpha) + + # initialize the Renyi entropy estimator: + renyi_co_pars = renyi_co_pars or {} + renyi_co_pars['mult'] = mult # guarantee this property + renyi_co_pars['alpha'] = alpha # -||- + self.renyi_co = co_factory(renyi_co_name, **renyi_co_pars) + + def estimation(self, y): + """ Estimate Tsallis entropy. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + + Returns + ------- + h : float + Estimated Tsallis entropy. + + Examples + -------- + h = co.estimation(y,ds) + + """ + + # Renyi entropy: + h = self.renyi_co.estimation(y) + + # transform Renyi entropy to Tsallis entropy: + h = (exp(h * (1 - self.alpha)) - 1) / (1 - self.alpha) + + return h diff --git a/ite-in-python/ite/cost/meta_h_cond.py b/ite-in-python/ite/cost/meta_h_cond.py new file mode 100644 index 0000000..9859953 --- /dev/null +++ b/ite-in-python/ite/cost/meta_h_cond.py @@ -0,0 +1,87 @@ +""" Meta conditional entropy estimators. """ + +from ite.cost.x_initialization import InitX +from ite.cost.x_factory import co_factory + + +class BcondHShannon_HShannon(InitX): + """ Conditional Shannon entropy estimator based on unconditional one. + + The estimation relies on the identity H(y^1|y^2) = H([y^1;y^2]) - + H(y^2), where H is the Shannon differential entropy. + + Partial initialization comes from 'InitX' (see + 'ite.cost.x_initialization.py'). + + """ + + def __init__(self, mult=True, h_shannon_co_name='BHShannon_KnnK', + h_shannon_co_pars=None): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + h_shannon_co_name : str, optional + You can change it to any Shannon entropy + estimator. (default is 'BHShannon_KnnK') + h_shannon_co_pars : dictionary, optional + Parameters for the Shannon entropy estimator. + (default is None (=> {}); in this case the + default parameter values of the Shannon + entropy estimator are used) + + -------- + >>> import ite + >>> co1 = ite.cost.BcondHShannon_HShannon() + >>> co2 = ite.cost.BcondHShannon_HShannon(\ + h_shannon_co_name='BHShannon_KnnK') + >>> dict_ch = {'k': 2, 'eps': 0.2} + >>> co3 = ite.cost.BcondHShannon_HShannon(\ + h_shannon_co_name='BHShannon_KnnK', \ + h_shannon_co_pars=dict_ch) + + """ + + # initialize with 'InitX': + super().__init__(mult=mult) + + # initialize the Shannon entropy estimator: + h_shannon_co_pars = h_shannon_co_pars or {} + h_shannon_co_pars['mult'] = True # guarantee this property + self.h_shannon_co = co_factory(h_shannon_co_name, + **h_shannon_co_pars) + + def estimation(self, y, dim1): + """ Estimate conditional Shannon entropy. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample from [y1; y2]. + dim1: integer, >0 + Dimension of y1. + + Returns + ------- + cond_h : float + Estimated conditional Shannon entropy. + + + Examples + -------- + cond_h = co.estimation(y,dim1) + + """ + + # Shannon entropy of y2: + h2 = self.h_shannon_co.estimation(y[:, dim1:]) + + # Shannon entropy of [y1;y2]: + h12 = self.h_shannon_co.estimation(y) + + cond_h = h12 - h2 + return cond_h diff --git a/ite-in-python/ite/cost/meta_i.py b/ite-in-python/ite/cost/meta_i.py new file mode 100644 index 0000000..08a3f71 --- /dev/null +++ b/ite-in-python/ite/cost/meta_i.py @@ -0,0 +1,842 @@ +""" Meta mutual information estimators. """ + +from numpy.random import rand +from numpy import ones + +from ite.cost.x_initialization import InitX, InitAlpha +from ite.cost.x_verification import VerCompSubspaceDims, VerOneDSubspaces,\ + VerSubspaceNumberIsK +from ite.cost.x_factory import co_factory +from ite.shared import joint_and_product_of_the_marginals_split,\ + copula_transformation + + +class MIShannon_DKL(InitX, VerCompSubspaceDims): + """ Shannon mutual information estimator based on KL divergence. + + The estimation is based on the relation I(y^1,...,y^M) = + D(f_y,\prod_{m=1}^M f_{y^m}), where I is the Shannon mutual + information, D is the Kullback-Leibler divergence. + + Partial initialization comes from 'InitX', verification is from + 'VerCompSubspaceDims' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, kl_co_name='BDKL_KnnK', kl_co_pars=None): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + kl_co_name : str, optional + You can change it to any Kullback-Leibler divergence + estimator (default is 'BDKL_KnnK'). + kl_co_pars : dictionary, optional + Parameters for the KL divergence estimator (default + is None (=> {}); in this case the default parameter + values of the KL divergence estimator are used). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.MIShannon_DKL() + >>> co2 = ite.cost.MIShannon_DKL(kl_co_name='BDKL_KnnK') + >>> dict_ch = {'knn_method': 'cKDTree', 'k': 4, 'eps': 0.1} + >>> co3 = ite.cost.MIShannon_DKL(kl_co_name='BDKL_KnnK',\ + kl_co_pars=dict_ch) + + """ + + # initialize with 'InitX': + super().__init__(mult=mult) + + # initialize the KL divergence estimator: + kl_co_pars = kl_co_pars or {} + kl_co_pars['mult'] = mult # guarantee this property + self.kl_co = co_factory(kl_co_name, **kl_co_pars) + + def estimation(self, y, ds): + """ Estimate Shannon mutual information. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + ds : int vector + Dimensions of the individual subspaces in y; ds[i] = i^th + subspace dimension. + + Returns + ------- + i : float + Estimated Shannon mutual information. + + Examples + -------- + i = co.estimation(y,ds) + + """ + + # verification: + self.verification_compatible_subspace_dimensions(y, ds) + + y1, y2 = joint_and_product_of_the_marginals_split(y, ds) + i = self.kl_co.estimation(y1, y2) + + return i + + +class MIChi2_DChi2(InitX, VerCompSubspaceDims): + """ Chi-square mutual information estimator based on chi^2 distance. + + The estimation is based on the relation I(y^1,...,y^M) = + D(f_y,\prod_{m=1}^M f_{y^m}), where I is the chi-square mutual + information, D is the chi^2 distance. + + Partial initialization comes from 'InitX', verification is from + 'VerCompSubspaceDims' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, chi2_co_name='BDChi2_KnnK', + chi2_co_pars=None): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + chi2_co_name : str, optional + You can change it to any Pearson chi-square + divergence estimator (default is 'BDChi2_KnnK'). + chi2_co_pars : dictionary, optional + Parameters for the Pearson chi-square divergence + estimator (default is None (=> {}); in this case the + default parameter values of the Pearson chi-square + divergence estimator are used). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.MIChi2_DChi2() + >>> co2 = ite.cost.MIChi2_DChi2(chi2_co_name='BDChi2_KnnK') + >>> dict_ch = {'knn_method': 'cKDTree', 'k': 4, 'eps': 0.1} + >>> co3 = ite.cost.MIChi2_DChi2(chi2_co_name='BDChi2_KnnK', \ + chi2_co_pars=dict_ch) + + """ + + # initialize with 'InitX': + super().__init__(mult=mult) + + # initialize the chi-square divergence estimator: + chi2_co_pars = chi2_co_pars or {} + chi2_co_pars['mult'] = mult # guarantee this property + self.chi2_co = co_factory(chi2_co_name, **chi2_co_pars) + + def estimation(self, y, ds): + """ Estimate chi-square mutual information. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + ds : int vector + Dimensions of the individual subspaces in y; ds[i] = i^th + subspace dimension. + + Returns + ------- + i : float + Estimated chi-square mutual information. + + Examples + -------- + i = co.estimation(y,ds) + + """ + + # verification: + self.verification_compatible_subspace_dimensions(y, ds) + + y1, y2 = joint_and_product_of_the_marginals_split(y, ds) + i = self.chi2_co.estimation(y1, y2) + + return i + + +class MIL2_DL2(InitX, VerCompSubspaceDims): + """ L2 mutual information estimator based on L2 divergence. + + The estimation is based on the relation I(y^1,...,y^M) = + D(f_y,\prod_{m=1}^M f_{y^m}), where I is the L2 mutual + information, D is the L2 divergence. + + Partial initialization comes from 'InitX', verification is from + 'VerCompSubspaceDims' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, l2_co_name='BDL2_KnnK', l2_co_pars=None): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + l2_co_name : str, optional + You can change it to any L2 divergence estimator + (default is 'BDL2_KnnK'). + l2_co_pars : dictionary, optional + Parameters for the L2 divergence estimator (default + is None (=> {}); in this case the default parameter + values of the L2 divergence estimator are used). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.MIL2_DL2() + >>> co2 = ite.cost.MIL2_DL2(l2_co_name='BDL2_KnnK') + >>> dict_ch = {'knn_method': 'cKDTree', 'k': 2, 'eps': 0.1} + >>> co3 = ite.cost.MIL2_DL2(l2_co_name='BDL2_KnnK',\ + l2_co_pars=dict_ch) + + """ + + # initialize with 'InitX': + super().__init__(mult=mult) + + # initialize the L2 divergence estimator: + l2_co_pars = l2_co_pars or {} + l2_co_pars['mult'] = mult # guarantee this property + self.l2_co = co_factory(l2_co_name, **l2_co_pars) + + def estimation(self, y, ds): + """ Estimate L2 mutual information. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + ds : int vector + Dimensions of the individual subspaces in y; ds[i] = i^th + subspace dimension. + + Returns + ------- + i : float + Estimated L2 mutual information. + + References + ---------- + Barnabas Poczos, Zoltan Szabo, Jeff Schneider: Nonparametric + divergence estimators for Independent Subspace Analysis. European + Signal Processing Conference (EUSIPCO), pages 1849-1853, 2011. + + Examples + -------- + i = co.estimation(y,ds) + + """ + + # verification: + self.verification_compatible_subspace_dimensions(y, ds) + + y1, y2 = joint_and_product_of_the_marginals_split(y, ds) + i = self.l2_co.estimation(y1, y2) + + return i + + +class MIRenyi_DR(InitAlpha, VerCompSubspaceDims): + """ Renyi mutual information estimator based on Renyi divergence. + + The estimation is based on the relation I(y^1,...,y^M) = + D(f_y,\prod_{m=1}^M f_{y^m}), where I is the Renyi mutual + information, D is the Renyi divergence. + + Partial initialization comes from 'InitAlpha', verification is from + 'VerCompSubspaceDims' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, alpha=0.99, renyi_co_name='BDRenyi_KnnK', + renyi_co_pars=None): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + alpha : float, optional + Parameter of the Renyi mutual information (default is + 0.99). + renyi_co_name : str, optional + You can change it to any Renyi divergence + estimator (default is 'BDRenyi_KnnK'). + renyi_co_pars : dictionary, optional + Parameters for the Renyi divergence estimator + (default is None (=> {}); in this case the default + parameter values of the Renyi divergence estimator + are used). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.MIRenyi_DR() + >>> co2 = ite.cost.MIRenyi_DR(renyi_co_name='BDRenyi_KnnK') + >>> co3 = ite.cost.MIRenyi_DR(renyi_co_name='BDRenyi_KnnK',\ + alpha=0.4) + >>> dict_ch = {'knn_method': 'cKDTree', 'k': 2, 'eps': 0.1} + >>> co4 = ite.cost.MIRenyi_DR(mult=True,alpha=0.9,\ + renyi_co_name='BDRenyi_KnnK',\ + renyi_co_pars=dict_ch) + + """ + + # initialize with 'InitAlpha': + super().__init__(mult=mult, alpha=alpha) + + # initialize the Renyi divergence estimator: + renyi_co_pars = renyi_co_pars or {} + renyi_co_pars['mult'] = mult # guarantee this property + renyi_co_pars['alpha'] = alpha # -||- + self.renyi_co = co_factory(renyi_co_name, **renyi_co_pars) + + def estimation(self, y, ds): + """ Estimate Renyi mutual information. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + ds : int vector + Dimensions of the individual subspaces in y; ds[i] = i^th + subspace dimension. + + Returns + ------- + i : float + Estimated Renyi mutual information. + + References + ---------- + Barnabas Poczos, Zoltan Szabo, Jeff Schneider. Nonparametric + divergence estimators for Independent Subspace Analysis. European + Signal Processing Conference (EUSIPCO), pages 1849-1853, 2011. + + Barnabas Poczos, Jeff Schneider. On the Estimation of + alpha-Divergences. International Conference on Artificial + Intelligence and Statistics (AISTATS), pages 609-617, 2011. + + Examples + -------- + i = co.estimation(y,ds) + + """ + + # verification: + self.verification_compatible_subspace_dimensions(y, ds) + + y1, y2 = joint_and_product_of_the_marginals_split(y, ds) + i = self.renyi_co.estimation(y1, y2) + + return i + + +class MITsallis_DT(InitAlpha, VerCompSubspaceDims): + """ Tsallis mutual information estimator based on Tsallis divergence. + + The estimation is based on the relation I(y^1,...,y^M) = + D(f_y,\prod_{m=1}^M f_{y^m}), where I is the Tsallis mutual + information, D is the Tsallis divergence. + + Partial initialization comes from 'InitAlpha', verification is from + 'VerCompSubspaceDims' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, alpha=0.99, + tsallis_co_name='BDTsallis_KnnK', tsallis_co_pars=None): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + alpha : float, optional + Parameter of the Renyi mutual information (default is + 0.99). + tsallis_co_name : str, optional + You can change it to any Tsallis divergence + estimator (default is 'BDTsallis_KnnK'). + tsallis_co_pars : dictionary, optional + Parameters for the Tsallis divergence estimator + (default is None (=> {}); in this case the + default parameter values of the Tsallis + divergence estimator are used). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.MITsallis_DT() + >>> co2 = ite.cost.MITsallis_DT(tsallis_co_name='BDTsallis_KnnK') + >>> co3 = ite.cost.MITsallis_DT(tsallis_co_name='BDTsallis_KnnK',\ + alpha=0.4) + >>> dict_ch = {'knn_method': 'cKDTree', 'k': 2, 'eps': 0.1} + >>> co4 = ite.cost.MITsallis_DT(mult=True,alpha=0.9,\ + tsallis_co_name='BDTsallis_KnnK',\ + tsallis_co_pars=dict_ch) + + """ + + # initialize with 'InitAlpha': + super().__init__(mult=mult, alpha=alpha) + + # initialize the Tsallis divergence estimator: + tsallis_co_pars = tsallis_co_pars or {} + tsallis_co_pars['mult'] = mult # guarantee this property + tsallis_co_pars['alpha'] = alpha # -||- + self.tsallis_co = co_factory(tsallis_co_name, **tsallis_co_pars) + + def estimation(self, y, ds): + """ Estimate Tsallis mutual information. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + ds : int vector + Dimensions of the individual subspaces in y; ds[i] = i^th + subspace dimension. + + Returns + ------- + i : float + Estimated Tsallis mutual information. + + References + ---------- + Barnabas Poczos, Zoltan Szabo, Jeff Schneider. Nonparametric + divergence estimators for Independent Subspace Analysis. European + Signal Processing Conference (EUSIPCO), pages 1849-1853, 2011. + + Barnabas Poczos, Jeff Schneider. On the Estimation of + alpha-Divergences. International Conference on Artificial + Intelligence and Statistics (AISTATS), pages 609-617, 2011. + + Examples + -------- + i = co.estimation(y,ds) + + """ + + # verification: + self.verification_compatible_subspace_dimensions(y, ds) + + y1, y2 = joint_and_product_of_the_marginals_split(y, ds) + i = self.tsallis_co.estimation(y1, y2) + + return i + + +class MIMMD_CopulaDMMD(InitX, VerCompSubspaceDims, VerOneDSubspaces): + """ Copula and MMD based kernel dependency estimator. + + MMD stands for maximum mean discrepancy. + + The estimation is based on the relation I(Y_1,...,Y_d) = MMD(P_Z,P_U), + where (i) Z =[F_1(Y_1);...;F_d(Y_d)] is the copula transformation of + Y; F_i is the cdf of Y_i, (ii) P_U is the uniform distribution on + [0,1]^d, (iii) dim(Y_1) = ... = dim(Y_d) = 1. + + Partial initialization comes from 'InitX', verification is from + 'VerCompSubspaceDims' and 'VerOneDSubspaces' (see + 'ite.cost.x_initialization.py', 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, mmd_co_name='BDMMD_UStat', + mmd_co_pars=None): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + mmd_co_name : str, optional + You can change it to any MMD estimator (default is + 'BDMMD_UStat'). + mmd_co_pars : dictionary, optional + Parameters for the MMD estimator (default is None + (=> {}); in this case the default parameter values + of the MMD estimator are used). + + Examples + -------- + >>> import ite + >>> from ite.cost.x_kernel import Kernel + >>> co1 = ite.cost.MIMMD_CopulaDMMD() + >>> co2 = ite.cost.MIMMD_CopulaDMMD(mmd_co_name='BDMMD_UStat') + >>> dict_ch = {'kernel': Kernel({'name': 'RBF','sigma': 0.1})} + >>> co3 = ite.cost.MIMMD_CopulaDMMD(mmd_co_name='BDMMD_UStat',\ + mmd_co_pars=dict_ch) + + """ + + # initialize with 'InitX': + super().__init__(mult=mult) + + # initialize the MMD estimator: + mmd_co_pars = mmd_co_pars or {} + mmd_co_pars['mult'] = mult # guarantee this property + self.mmd_co = co_factory(mmd_co_name, **mmd_co_pars) + + def estimation(self, y, ds=None): + """ Estimate copula and MMD based kernel dependency. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + ds : int vector, vector of ones + If ds is not given (ds=None), the vector of ones [ds = + ones(y.shape[1],dtype='int')] is emulated inside the function. + + Returns + ------- + i : float + Estimated copula and MMD based kernel dependency. + + References + ---------- + Barnabas Poczos, Zoubin Ghahramani, Jeff Schneider. Copula-based + Kernel Dependency Measures. International Conference on Machine + Learning (ICML), 2012. + + Examples + -------- + i = co.estimation(y,ds) + + """ + + if ds is None: # emulate 'ds = vector of ones' + ds = ones(y.shape[1], dtype='int') + + # verification: + self.verification_compatible_subspace_dimensions(y, ds) + self.verification_one_dimensional_subspaces(ds) + + z = copula_transformation(y) + u = rand(z.shape[0], z.shape[1]) + + i = self.mmd_co.estimation(z, u) + + return i + + +class MIRenyi_HR(InitAlpha, VerCompSubspaceDims, VerOneDSubspaces): + """ Renyi mutual information estimator based on Renyi entropy. + + The estimation is based on the relation I_{alpha}(X) = -H_{alpha}(Z), + where Z =[F_1(X_1);...;F_d(X_d)] is the copula transformation of X, + F_i is the cdf of X_i; I_{alpha} is the Renyi mutual information, + H_{alpha} is the Renyi entropy. + + Partial initialization comes from 'InitAlpha', verification is from + 'VerCompSubspaceDims' and 'VerOneDSubspaces' (see + 'ite.cost.x_initialization.py', 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, alpha=0.99, renyi_co_name='BHRenyi_KnnK', + renyi_co_pars=None): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + alpha : float, \ne 1 + Parameter of the Renyi mutual information. + renyi_co_name : str, optional + You can change it to any Renyi entropy estimator + (default is 'BHRenyi_KnnK'). + renyi_co_pars : dictionary, optional + Parameters for the Renyi entropy estimator + (default is None (=> {}); in this case the default + parameter values of the Renyi entropy estimator + are used). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.MIRenyi_HR() + >>> co2 = ite.cost.MIRenyi_HR(renyi_co_name='BHRenyi_KnnK') + >>> dict_ch = {'k': 2, 'eps': 0.4} + >>> co3 = ite.cost.MIRenyi_HR(renyi_co_name='BHRenyi_KnnK',\ + renyi_co_pars=dict_ch) + + """ + + # initialize with 'InitAlpha': + super().__init__(mult=mult, alpha=alpha) + + # initialize the Renyi entropy estimator: + renyi_co_pars = renyi_co_pars or {} + renyi_co_pars['mult'] = mult # guarantee this property + renyi_co_pars['alpha'] = alpha # -||- + self.renyi_co = co_factory(renyi_co_name, **renyi_co_pars) + + def estimation(self, y, ds=None): + """ Estimate Renyi mutual information. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + ds : int vector, vector of ones + If ds is not given (ds=None), the vector of ones [ds = + ones(y.shape[1],dtype='int')] is emulated inside the function. + + Returns + ------- + i : float + Estimated Renyi mutual information. + + References + ---------- + David Pal, Barnabas Poczos, Csaba Szepesvari. Estimation of Renyi + Entropy and Mutual Information Based on Generalized + Nearest-Neighbor Graphs. Advances in Neural Information Processing + Systems (NIPS), pages 1849-1857, 2010. + + Barnabas Poczos, Sergey Krishner, Csaba Szepesvari. REGO: + Rank-based Estimation of Renyi Information using Euclidean Graph + Optimization. International Conference on Artificial Intelligence + and Statistics (AISTATS), pages 605-612, 2010. + + Examples + -------- + i = co.estimation(y,ds) + + """ + + if ds is None: # emulate 'ds = vector of ones' + ds = ones(y.shape[1], dtype='int') + + # verification: + self.verification_compatible_subspace_dimensions(y, ds) + self.verification_one_dimensional_subspaces(ds) + + z = copula_transformation(y) + i = -self.renyi_co.estimation(z) + + return i + + +class MIShannon_HS(InitX, VerCompSubspaceDims): + """ Shannon mutual information estimator based on Shannon entropy. + + The estimation is based on the relation I(y^1,...,y^M) = \sum_{m=1}^M + H(y^m) - H([y^1,...,y^M]), where I is the Shannon mutual information, + H is the Shannon entropy. + + Partial initialization comes from 'InitX', verification is from + 'VerCompSubspaceDims' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, shannon_co_name='BHShannon_KnnK', + shannon_co_pars=None): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + shannon_co_name : str, optional + You can change it to any Shannon differential + entropy estimator (default is 'BHShannon_KnnK'). + shannon_co_pars : dictionary, optional + Parameters for the Shannon differential entropy + estimator (default is None (=> {}); in this case + the default parameter values of the Shannon + differential entropy estimator are used). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.MIShannon_HS() + >>> co2 = ite.cost.MIShannon_HS(shannon_co_name='BHShannon_KnnK') + >>> dict_ch = {'knn_method': 'cKDTree', 'k': 4, 'eps': 0.1} + >>> co3 = ite.cost.MIShannon_HS(shannon_co_name='BHShannon_KnnK',\ + shannon_co_pars=dict_ch) + + """ + + # initialize with 'InitX': + super().__init__(mult=mult) + + # initialize the Shannon differential entropy estimator: + shannon_co_pars = shannon_co_pars or {} + shannon_co_pars['mult'] = True # guarantee this property + self.shannon_co = co_factory(shannon_co_name, **shannon_co_pars) + + def estimation(self, y, ds): + """ Estimate Shannon mutual information. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + ds : int vector + Dimensions of the individual subspaces in y; ds[i] = i^th + subspace dimension. + + Returns + ------- + i : float + Estimated Shannon mutual information. + + References + ---------- + Thomas M. Cover, Joy A. Thomas. Elements of Information Theory, + John Wiley and Sons, New York, USA (1991). + + Examples + -------- + i = co.estimation(y,ds) + + """ + + # verification: + self.verification_compatible_subspace_dimensions(y, ds) + + # I = - H([y^1,...,y^M]): + i = -self.shannon_co.estimation(y) + + # I = I + \sum_{m=1}^M H(y^m): + idx_start = 0 + for k in range(len(ds)): + dim_k = ds[k] + idx_stop = idx_start + dim_k + # print("{0}:{1}".format(idx_start,idx_stop)) + i += self.shannon_co.estimation(y[:, idx_start:idx_stop]) + idx_start = idx_stop + + return i + + +class MIDistCov_HSIC(InitX, VerCompSubspaceDims, VerSubspaceNumberIsK): + """ Estimate distance covariance from HSIC. + + The estimation is based on the relation I(y^1,y^2;rho_1,rho_2) = + 2 HSIC(y^1,y^2;k), where HSIC stands for the Hilbert-Schmidt + independence criterion, y = [y^1; y^2] and k = k_1 x k_2, where k_i-s + generates rho_i-s, semimetrics of negative type used in distance + covariance. + + Partial initialization comes from 'InitX', verification is from + 'VerCompSubspaceDims' and 'VerSubspaceNumberIsK' (see + 'ite.cost.x_initialization.py', 'ite.cost.x_verification.py'). + + + """ + + def __init__(self, mult=True, hsic_co_name='BIHSIC_IChol', + hsic_co_pars=None): + + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + hsic_co_name : str, optional + You can change it to any HSIC estimator + (default is 'BIHSIC_IChol'). + hsic_co_pars : dictionary, optional + Parameters for the HSIC estimator (default is + None (=> {}); in this case the default parameter + values of the HSIC estimator are used. + + Examples + -------- + >>> import ite + >>> from ite.cost.x_kernel import Kernel + >>> co1 = ite.cost.MIDistCov_HSIC() + >>> co2 = ite.cost.MIDistCov_HSIC(hsic_co_name='BIHSIC_IChol') + >>> k = Kernel({'name': 'RBF','sigma': 0.3}) + >>> dict_ch = {'kernel': k, 'eta': 1e-3} + >>> co3 = ite.cost.MIDistCov_HSIC(hsic_co_name='BIHSIC_IChol',\ + hsic_co_pars=dict_ch) + + """ + + # initialize with 'InitX': + super().__init__(mult=mult) + + # initialize the HSIC estimator: + hsic_co_pars = hsic_co_pars or {} + hsic_co_pars['mult'] = mult # guarantee this property + self.hsic_co = co_factory(hsic_co_name, **hsic_co_pars) + + def estimation(self, y, ds): + """ Estimate distance covariance. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + ds : int vector + Dimensions of the individual subspaces in y; ds[i] = i^th + subspace dimension. Length(ds) = 2. + + Returns + ------- + i : float + Estimated distance covariance. + + References + ---------- + + Examples + -------- + i = co.estimation(y,ds) + + """ + + # verification: + self.verification_compatible_subspace_dimensions(y, ds) + self.verification_subspace_number_is_k(ds, 2) + + i = 2 * self.hsic_co.estimation(y, ds) + + return i diff --git a/ite-in-python/ite/cost/meta_i_cond.py b/ite-in-python/ite/cost/meta_i_cond.py new file mode 100644 index 0000000..de95731 --- /dev/null +++ b/ite-in-python/ite/cost/meta_i_cond.py @@ -0,0 +1,111 @@ +""" Meta conditional mutual information estimators. """ + +from numpy import cumsum, hstack + +from ite.cost.x_initialization import InitX +from ite.cost.x_verification import VerCompSubspaceDims +from ite.cost.x_factory import co_factory + + +class BcondIShannon_HShannon(InitX, VerCompSubspaceDims): + """ Estimate conditional mutual information from unconditional Shannon + entropy. + + Partial initialization comes from 'InitX', verification is from + 'VerCompSubspaceDims' (see 'ite.cost.x_initialization.py', + 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, h_shannon_co_name='BHShannon_KnnK', + h_shannon_co_pars=None): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + h_shannon_co_name : str, optional + You can change it to any Shannon entropy + estimator. (default is 'BHShannon_KnnK') + h_shannon_co_pars : dictionary, optional + Parameters for the Shannon entropy estimator. + (default is None (=> {}); in this case the + default parameter values of the Shannon + entropy estimator are used) + Examples + -------- + >>> import ite + >>> co1 = ite.cost.BcondIShannon_HShannon() + >>> co2 = ite.cost.BcondIShannon_HShannon(\ + h_shannon_co_name='BHShannon_KnnK') + >>> dict_ch = {'k': 2, 'eps': 0.2} + >>> co3 = ite.cost.BcondIShannon_HShannon(\ + h_shannon_co_name='BHShannon_KnnK', \ + h_shannon_co_pars=dict_ch) + """ + + # initialize with 'InitX': + super().__init__(mult=mult) + + # initialize the Shannon entropy estimator: + h_shannon_co_pars = h_shannon_co_pars or {} + h_shannon_co_pars['mult'] = True # guarantee this property + self.h_shannon_co = co_factory(h_shannon_co_name, + **h_shannon_co_pars) + + def estimation(self, y, ds): + """ Estimate conditional Shannon mutual information. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + ds : int vector + Dimensions of the individual subspaces in y; ds[i] = i^th + subspace dimension. The last block is the conditioning + variable. + + Returns + ------- + cond_i : float + Estimated conditional mutual information. + + Examples + -------- + cond_i = co.estimation(y,ds) + + """ + + # verification: + self.verification_compatible_subspace_dimensions(y, ds) + len_ds = len(ds) + if len_ds <= 2: + raise Exception('At least two non-conditioning subspaces are ' + 'needed!') + + # initialization: + # 0,d_1,d_1+d_2,...,d_1+...+d_M; starting indices of the subspaces: + cum_ds = cumsum(hstack((0, ds[:-1]))) + idx_condition = range(cum_ds[len_ds-1], + cum_ds[len_ds-1] + ds[len_ds-1]) + + # h_joint: + h_joint = self.h_shannon_co.estimation(y) + + # h_cross: + h_cross = 0 + for m in range(len_ds-1): # non-conditioning subspaces + idx_m = range(cum_ds[m], cum_ds[m] + ds[m]) + h_cross += \ + self.h_shannon_co.estimation(y[:, hstack((idx_m, + idx_condition))]) + + # h_condition: + h_condition = self.h_shannon_co.estimation(y[:, idx_condition]) + + cond_i = -h_joint + h_cross - (len_ds - 2) * h_condition + + return cond_i diff --git a/ite-in-python/ite/cost/meta_k.py b/ite-in-python/ite/cost/meta_k.py new file mode 100644 index 0000000..afde60c --- /dev/null +++ b/ite-in-python/ite/cost/meta_k.py @@ -0,0 +1,731 @@ +""" Meta kernel estimators on distributions. """ + +from numpy import array, exp, log + +from ite.cost.x_factory import co_factory +from ite.cost.x_initialization import InitX, InitAlpha, InitUAlpha, \ + InitBagGram +from ite.cost.x_verification import VerEqualDSubspaces +from ite.shared import mixture_distribution + + +class MKExpJR1_HR(InitUAlpha, InitBagGram, VerEqualDSubspaces): + """ Exponentiated Jensen-Renyi kernel-1 estimator based on Renyi + entropy. + + The estimation is based on the relation K_EJR1(f_1,f_2) = + exp[-u x H_R((y^1+y^2)/2)], where K_EJR1 is the exponentiated + Jensen-Renyi kernel-1, H_R is the Renyi entropy, (y^1+y^2)/2 is the + mixture of y^1~f_1 and y^2~f_2 with 1/2-1/2 weights, u>0. + + Partial initialization comes from 'InitUAlpha' and 'InitBagGram', + verification is inherited from 'VerEqualDSubspaces' (see + 'ite.cost.x_initialization.py', 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, alpha=0.99, u=1, + renyi_co_name='BHRenyi_KnnK', renyi_co_pars=None): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + alpha: float, 0 < alpha < 1, optional + Parameter of the exponentiated Jensen-Renyi kernel-1 + (default is 0.99). + u: float, 0 < u, optional + Parameter of the exponentiated Jensen-Renyi kernel-1 (default + is 1). + renyi_co_name : str, optional + You can change it to any Renyi entropy estimator + (default is 'BDKL_KnnK'). + renyi_co_pars : dictionary, optional + Parameters for the Renyi entropy estimator + (default is None (=> {}); in this case the default + parameter values of the Renyi entropy estimator + are used). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.MKExpJR1_HR() + >>> co2 = ite.cost.MKExpJR1_HR(renyi_co_name='BHRenyi_KnnK') + >>> co3 = ite.cost.MKExpJR1_HR(alpha=0.7,u=1.2,\ + renyi_co_name='BHRenyi_KnnK') + >>> dict_ch = {'knn_method': 'cKDTree', 'k': 4, 'eps': 0.1} + >>> co4 = ite.cost.MKExpJR1_HR(renyi_co_name='BHRenyi_KnnK',\ + renyi_co_pars=dict_ch) + + """ + + # verification (alpha == 1 is checked via 'InitUAlpha'): + # if alpha <= 0 or alpha > 1: + # raise Exception('0 < alpha < 1 has to hold!') + + # initialize with 'InitUAlpha': + super().__init__(mult=mult, u=u, alpha=alpha) + + # initialize the Renyi entropy estimator: + renyi_co_pars = renyi_co_pars or {} + renyi_co_pars['mult'] = True # guarantee this property + renyi_co_pars['alpha'] = alpha # -||- + self.renyi_co = co_factory(renyi_co_name, **renyi_co_pars) + + # other attributes (u): + self.u = u + + def estimation(self, y1, y2): + """ Estimate the value of the exponentiated Jensen-Renyi kernel-1. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + k : float + Estimated kernel value. + + References + ---------- + Andre F. T. Martins, Noah A. Smith, Eric P. Xing, Pedro M. Q. + Aguiar, and Mario A. T. Figueiredo. Nonextensive information + theoretical kernels on measures. Journal of Machine Learning + Research, 10:935-975, 2009. + + Examples + -------- + k = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + # mixture: + w = array([1/2, 1/2]) + mixture_y = mixture_distribution((y1, y2), w) + + k = exp(-self.u * self.renyi_co.estimation(mixture_y)) + + return k + + +class MKExpJR2_DJR(InitUAlpha, InitBagGram, VerEqualDSubspaces): + """ Exponentiated Jensen-Renyi kernel-2 estimator based on + Jensen-Renyi divergence + + The estimation is based on the relation K_EJR2(f_1,f_2) = + exp[-u x D_JR(f_1,f_2)], where K_EJR2 is the exponentiated + Jensen-Renyi kernel-2, D_JR is the Jensen-Renyi divergence with + uniform weights (w=(1/2,1/2)), u>0. + + Partial initialization comes from 'InitUAlpha' and 'InitBagGram', + verification is inherited from 'VerEqualDSubspaces' (see + 'ite.cost.x_initialization.py', 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, alpha=0.99, u=1, jr_co_name='MDJR_HR', + jr_co_pars=None): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + alpha: float, 0 < alpha < 1, optional + Parameter of the exponentiated Jensen-Renyi kernel-2 + (default is 0.99). + u: float, 0 < u, optional + Parameter of the exponentiated Jensen-Renyi kernel-2 (default + is 1). + jr_co_name : str, optional + You can change it to any Jensen-Renyi divergence + estimator (default is 'MDJR_HR'). + jr_co_pars : dictionary, optional + Parameters for the Jensen-Renyi divergence estimator + (default is None (=> {}); in this case the default + parameter values of the Jensen-Renyi divergence + estimator are used). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.MKExpJR2_DJR() + >>> co2 = ite.cost.MKExpJR2_DJR(jr_co_name='MDJR_HR') + >>> co3 = ite.cost.MKExpJR2_DJR(alpha=0.7,u=1.2,\ + jr_co_name='MDJR_HR') + + """ + + # verification (alpha == 1 is checked via 'InitUAlpha'): + # if alpha <= 0 or alpha > 1: + # raise Exception('0 < alpha < 1 has to hold!') + + # initialize with 'InitUAlpha': + super().__init__(mult=mult, u=u, alpha=alpha) + + # initialize the Jensen-Renyi divergence estimator: + jr_co_pars = jr_co_pars or {} + jr_co_pars['mult'] = True # guarantee this property + jr_co_pars['alpha'] = alpha # -||- + jr_co_pars['w'] = array([1/2, 1/2]) # uniform weights + self.jr_co = co_factory(jr_co_name, **jr_co_pars) + + # other attributes (u): + self.u = u + + def estimation(self, y1, y2): + """ Estimate the value of the exponentiated Jensen-Renyi kernel-2. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + k : float + Estimated kernel value. + + References + ---------- + Andre F. T. Martins, Noah A. Smith, Eric P. Xing, Pedro M. Q. + Aguiar, and Mario A. T. Figueiredo. Nonextensive information + theoretical kernels on measures. Journal of Machine Learning + Research, 10:935-975, 2009. + + Examples + -------- + k = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + k = exp(-self.u * self.jr_co.estimation(y1, y2)) + + return k + + +class MKExpJS_DJS(InitX, InitBagGram, VerEqualDSubspaces): + """ Exponentiated Jensen-Shannon kernel estimator based on + Jensen-Shannon divergence + + The estimation is based on the relation K_JS(f_1,f_2) = + exp[-u x D_JS(f_1,f_2)], where K_JS is the exponentiated + Jensen-Shannon kernel, D_JS is the Jensen-Shannon divergence with + uniform weights (w=(1/2,1/2)), u>0. + + Partial initialization comes from 'InitX' and 'InitBagGram', + verification is inherited from 'VerEqualDSubspaces' (see + 'ite.cost.x_initialization.py', 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, u=1, js_co_name='MDJS_HS', + js_co_pars=None): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + u: float, 0 < u, optional + Parameter of the exponentiated Jensen-Shannon kernel (default + is 1). + js_co_name : str, optional + You can change it to any Jensen-Shannon divergence + estimator (default is 'MDJS_HS'). + js_co_pars : dictionary, optional + Parameters for the Jensen-Shannnon divergence + estimator (default is None (=> {}); in this case the + default parameter values of the Jensen-Shannon + divergence estimator are used). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.MKExpJS_DJS() + >>> co2 = ite.cost.MKExpJS_DJS(u=1.2, js_co_name='MDJS_HS') + + """ + + if u <= 0: + raise Exception('u has to be positive!') + + # initialize with 'InitX': + super().__init__(mult=mult) + + # initialize the Jensen-Shannon divergence estimator: + js_co_pars = js_co_pars or {} + js_co_pars['mult'] = True # guarantee this property + js_co_pars['w'] = array([1/2, 1/2]) # uniform weights + self.js_co = co_factory(js_co_name, **js_co_pars) + + # other attributes (u): + self.u = u + + def estimation(self, y1, y2): + """ Estimate the value of the exponentiated Jensen-Shannon kernel. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + k : float + Estimated kernel value. + + References + ---------- + Andre F. T. Martins, Noah A. Smith, Eric P. Xing, Pedro M. Q. + Aguiar, and Mario A. T. Figueiredo. Nonextensive information + theoretical kernels on measures. Journal of Machine Learning + Research, 10:935-975, 2009. + + Andre F. T. Martins, Pedro M. Q. Aguiar, and Mario A. T. + Figueiredo. Tsallis kernels on measures. In Information Theory + Workshop (ITW), pages 298-302, 2008. + + Examples + -------- + k = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + k = exp(-self.u * self.js_co.estimation(y1, y2)) + + return k + + +class MKExpJT1_HT(InitUAlpha, InitBagGram, VerEqualDSubspaces): + """ Exponentiated Jensen-Tsallis kernel-1 estimator based on Tsallis + entropy. + + The estimation is based on the relation K_EJT1(f_1,f_2) = + exp[-u x H_T((y^1+y^2)/2)], where K_EJT1 is the exponentiated + Jensen-Tsallis kernel-1, H_T is the Tsallis entropy, (y^1+y^2)/2 is + the mixture of y^1~f_1 and y^2~f_2 with uniform (1/2,1/2) weights, u>0. + + Partial initialization comes from 'InitUAlpha' and 'InitBagGram', + verification is inherited from 'VerEqualDSubspaces' (see + 'ite.cost.x_initialization.py', 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, alpha=0.99, u=1, + tsallis_co_name='BHTsallis_KnnK', tsallis_co_pars=None): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + alpha: float, 0 < alpha <= 2, \ne 1, optional + Parameter of the exponentiated Jensen-Tsallis kernel-1 + (default is 0.99). + u: float, 0 < u, optional + Parameter of the exponentiated Jensen-Tsallis kernel-1 (default + is 1). + tsallis_co_name : str, optional + You can change it to any Tsallis entropy + estimator (default is 'BHTsallis_KnnK'). + tsallis_co_pars : dictionary, optional + Parameters for the Tsallis entropy estimator + (default is None (=> {}); in this case the + default parameter values of the Tsallis entropy + estimator are used). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.MKExpJT1_HT() + >>> co2 = ite.cost.MKExpJT1_HT(tsallis_co_name='BHTsallis_KnnK') + >>> co3 = ite.cost.MKExpJT1_HT(alpha=0.7,u=1.2,\ + tsallis_co_name='BHTsallis_KnnK') + >>> dict_ch = {'knn_method': 'cKDTree', 'k': 4, 'eps': 0.1} + >>> co4 = ite.cost.MKExpJT1_HT(tsallis_co_name='BHTsallis_KnnK',\ + tsallis_co_pars=dict_ch) + + """ + + # verification (alpha == 1 is checked via 'InitUAlpha'): + # if alpha <= 0 or alpha > 2: + # raise Exception('0 < alpha <= 2 has to hold!') + + # initialize with 'InitUAlpha': + super().__init__(mult=mult, u=u, alpha=alpha) + + # initialize the Tsallis entropy estimator: + tsallis_co_pars = tsallis_co_pars or {} + tsallis_co_pars['mult'] = True # guarantee this property + tsallis_co_pars['alpha'] = alpha # -||- + self.tsallis_co = co_factory(tsallis_co_name, **tsallis_co_pars) + + # other attributes (u): + self.u = u + + def estimation(self, y1, y2): + """ Estimate exponentiated Jensen-Tsallis kernel-1. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + k : float + Estimated kernel value. + + References + ---------- + Andre F. T. Martins, Noah A. Smith, Eric P. Xing, Pedro M. Q. + Aguiar, and Mario A. T. Figueiredo. Nonextensive information + theoretical kernels on measures. Journal of Machine Learning + Research, 10:935-975, 2009. + + Examples + -------- + k = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + # mixture: + w = array([1/2, 1/2]) + mixture_y = mixture_distribution((y1, y2), w) + + k = exp(-self.u * self.tsallis_co.estimation(mixture_y)) + + return k + + +class MKExpJT2_DJT(InitUAlpha, InitBagGram, VerEqualDSubspaces): + """ Exponentiated Jensen-Tsallis kernel-2 estimator based on + Jensen-Tsallis divergence. + + The estimation is based on the relation K_EJT2(f_1,f_2) = + exp[-u x D_JT(f_1,f_2)], where K_EJT2 is the exponentiated + Jensen-Tsallis kernel-2, D_JT is the Jensen-Tsallis divergence, u>0. + + Partial initialization comes from 'InitUAlpha' and 'InitBagGram', + verification is inherited from 'VerEqualDSubspaces' (see + 'ite.cost.x_initialization.py', 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, alpha=0.99, u=1, jt_co_name='MDJT_HT', + jt_co_pars=None): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + alpha: float, 0 < alpha <= 2, \ne 1, optional + Parameter of the exponentiated Jensen-Tsallis kernel-2 + (default is 0.99). + u: float, 0 < u, optional + Parameter of the exponentiated Jensen-Tsallis kernel-2 (default + is 1). + jt_co_name : str, optional + You can change it to any Jensen-Tsallis divergence + estimator (default is 'MDJT_HT'). + jt_co_pars : dictionary, optional + Parameters for the Jensen-Tsallis divergence + estimator (default is None (=> {}); in this case the + default parameter values of the Jensen-Tsallis + divergence estimator are used). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.MKExpJT2_DJT() + >>> co2 = ite.cost.MKExpJT2_DJT(jt_co_name='MDJT_HT') + >>> co3 = ite.cost.MKExpJT2_DJT(alpha=0.7,u=1.2,\ + jt_co_name='MDJT_HT') + + """ + + # verification (alpha == 1 is checked via 'InitUAlpha'): + # if alpha <= 0 or alpha > 2: + # raise Exception('0 < alpha <= 2 has to hold!') + + # initialize with 'InitUAlpha': + super().__init__(mult=mult, u=u, alpha=alpha) + + # initialize the Jensen-Tsallis divergence estimator: + jt_co_pars = jt_co_pars or {} + jt_co_pars['mult'] = True # guarantee this property + jt_co_pars['alpha'] = alpha # -||- + self.jt_co = co_factory(jt_co_name, **jt_co_pars) + + # other attributes (u): + self.u = u + + def estimation(self, y1, y2): + """ Estimate exponentiated Jensen-Tsallis kernel-2. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + k : float + Estimated kernel value. + + References + ---------- + Andre F. T. Martins, Noah A. Smith, Eric P. Xing, Pedro M. Q. + Aguiar, and Mario A. T. Figueiredo. Nonextensive information + theoretical kernels on measures. Journal of Machine Learning + Research, 10:935-975, 2009. + + Examples + -------- + k = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + k = exp(-self.u * self.jt_co.estimation(y1, y2)) + + return k + + +class MKJS_DJS(InitX, InitBagGram, VerEqualDSubspaces): + """ Jensen-Shannon kernel estimator based on Jensen-Shannon divergence. + + The estimation is based on the relation K_JS(f_1,f_2) = log(2) - + D_JS(f_1,f_2), where K_JS is the Jensen-Shannon kernel, and D_JS is + the Jensen-Shannon divergence with uniform weights (w=(1/2,1/2)). + + Partial initialization comes from 'InitX' and 'InitBagGram', + verification is inherited from 'VerEqualDSubspaces' (see + 'ite.cost.x_initialization.py', 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, js_co_name='MDJS_HS', js_co_pars=None): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + js_co_name : str, optional + You can change it to any Jensen-Shannon divergence + estimator (default is 'MDJS_HS'). + js_co_pars : dictionary, optional + Parameters for the Jensen-Shannnon divergence + estimator (default is None (=> {}); in this case the + default parameter values of the Jensen-Shannon + divergence estimator are used). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.MKJS_DJS() + >>> co2 = ite.cost.MKJS_DJS(js_co_name='MDJS_HS') + + """ + + # initialize with 'InitX': + super().__init__(mult=mult) + + # initialize the Jensen-Shannon divergence estimator: + js_co_pars = js_co_pars or {} + js_co_pars['mult'] = True # guarantee this property + js_co_pars['w'] = array([1/2, 1/2]) # uniform weights + self.js_co = co_factory(js_co_name, **js_co_pars) + + def estimation(self, y1, y2): + """ Estimate the value of the Jensen-Shannon kernel. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + k : float + Estimated kernel value. + + References + ---------- + Andre F. T. Martins, Noah A. Smith, Eric P. Xing, Pedro M. Q. + Aguiar, and Mario A. T. Figueiredo. Nonextensive information + theoretical kernels on measures. Journal of Machine Learning + Research, 10:935-975, 2009. + + Andre F. T. Martins, Pedro M. Q. Aguiar, and Mario A. T. + Figueiredo. Tsallis kernels on measures. In Information Theory + Workshop (ITW), pages 298-302, 2008. + + Examples + -------- + k = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + k = log(2) - self.js_co.estimation(y1, y2) + + return k + + +class MKJT_HT(InitAlpha, InitBagGram, VerEqualDSubspaces): + """ Jensen-Tsallis kernel estimator based on Tsallis entropy. + + The estimation is based on the relation K_JT(f_1,f_2) = log_{alpha}(2) + - T_alpha(f_1,f_2), where (i) K_JT is the Jensen-Tsallis kernel, (ii) + log_{alpha} is the alpha-logarithm, (iii) T_alpha is the + Jensen-Tsallis alpha-difference (that can be expressed in terms of the + Tsallis entropy) + + Partial initialization comes from 'InitAlpha' and 'InitBagGram', + verification is inherited from 'VerEqualDSubspaces' (see + 'ite.cost.x_initialization.py', 'ite.cost.x_verification.py'). + + """ + + def __init__(self, mult=True, alpha=0.99, + tsallis_co_name='BHTsallis_KnnK', tsallis_co_pars=None): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + alpha: float, 0 < alpha <= 2, \ne 1, optional + Parameter of the Jensen-Tsallis kernel (default is 0.99). + tsallis_co_name : str, optional + You can change it to any Tsallis entropy + estimator (default is 'BHTsallis_KnnK'). + tsallis_co_pars : dictionary, optional + Parameters for the Tsallis entropy estimator + (default is None (=> {}); in this case the + default parameter values of the Tsallis entropy + estimator are used). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.MKJT_HT() + >>> co2 = ite.cost.MKJT_HT(tsallis_co_name='BHTsallis_KnnK') + >>> co3 = ite.cost.MKJT_HT(alpha=0.7,\ + tsallis_co_name='BHTsallis_KnnK') + >>> dict_ch = {'knn_method': 'cKDTree', 'k': 4, 'eps': 0.1} + >>> co4 = ite.cost.MKJT_HT(tsallis_co_name='BHTsallis_KnnK',\ + tsallis_co_pars=dict_ch) + + """ + + # verification (alpha == 1 is checked via 'InitAlpha'): + if alpha <= 0 or alpha > 2: + raise Exception('0 < alpha <= 2 has to hold!') + + # initialize with 'InitAlpha': + super().__init__(mult=mult, alpha=alpha) + + # initialize the Tsallis entropy estimator: + tsallis_co_pars = tsallis_co_pars or {} + tsallis_co_pars['mult'] = True # guarantee this property + tsallis_co_pars['alpha'] = alpha # -||- + self.tsallis_co = co_factory(tsallis_co_name, **tsallis_co_pars) + + # other attribute (log_alpha_2 = alpha-logarithm of 2): + self.alpha = alpha + self.log_alpha_2 = (2**(1 - alpha) - 1) / (1 - alpha) + + def estimation(self, y1, y2): + """ Estimate the value of the Jensen-Tsallis kernel. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + k : float + Estimated kernel value. + + References + ---------- + Andre F. T. Martins, Noah A. Smith, Eric P. Xing, Pedro M. Q. + Aguiar, and Mario A. T. Figueiredo. Nonextensive information + theoretical kernels on measures. Journal of Machine Learning + Research, 10:935-975, 2009. + + Examples + -------- + k = co.estimation(y1,y2) + + """ + + # verification: + self.verification_equal_d_subspaces(y1, y2) + + # Jensen-Tsallis alpha-difference (jt): + a = self.alpha + + w = array([1/2, 1/2]) + mixture_y = mixture_distribution((y1, y2), w) # mixture + jt = \ + self.tsallis_co.estimation(mixture_y) -\ + (w[0]**a * self.tsallis_co.estimation(y1) + + w[1]**a * self.tsallis_co.estimation(y2)) + + k = self.log_alpha_2 - jt + + return k diff --git a/ite-in-python/ite/cost/x_analytical_values.py b/ite-in-python/ite/cost/x_analytical_values.py new file mode 100644 index 0000000..e0a44b6 --- /dev/null +++ b/ite-in-python/ite/cost/x_analytical_values.py @@ -0,0 +1,1043 @@ +""" Analytical expressions of information theoretical quantities. """ + +from scipy.linalg import det, inv +from numpy import log, prod, absolute, exp, pi, trace, dot, cumsum, \ + hstack, ix_, sqrt, eye, diag, array, sum + +from ite.shared import compute_h2 + + +def analytical_value_h_shannon(distr, par): + """ Analytical value of the Shannon entropy for the given distribution. + + Parameters + ---------- + distr : str + Name of the distribution. + par : dictionary + Parameters of the distribution. If distr = 'uniform': par["a"], + par["b"], par["l"] <- lxU[a,b]. If distr = 'normal' : par["cov"] + is the covariance matrix. + + Returns + ------- + h : float + Analytical value of the Shannon entropy. + + """ + + if distr == 'uniform': + # par = {"a": a, "b": b, "l": l} + h = log(prod(par["b"] - par["a"])) + log(absolute(det(par["l"]))) + elif distr == 'normal': + # par = {"cov": c} + dim = par["cov"].shape[0] # =c.shape[1] + h = 1/2 * log((2 * pi * exp(1))**dim * det(par["cov"])) + # = 1/2 * log(det(c)) + d / 2 * log(2*pi) + d / 2 + else: + raise Exception('Distribution=?') + + return h + + +def analytical_value_c_cross_entropy(distr1, distr2, par1, par2): + """ Analytical value of the cross-entropy for the given distributions. + + Parameters + ---------- + distr1, distr2 : str + Name of the distributions. + par1, par2 : dictionaries + Parameters of the distribution. If distr1 = distr2 = + 'normal': par1["mean"], par1["cov"] and par2["mean"], + par2["cov"] are the means and the covariance matrices. + + Returns + ------- + c : float + Analytical value of the cross-entropy. + + """ + + if distr1 == 'normal' and distr2 == 'normal': + # covariance matrices, expectations: + c1, m1 = par1['cov'], par1['mean'] + c2, m2 = par2['cov'], par2['mean'] + dim = len(m1) + + invc2 = inv(c2) + diffm = m1 - m2 + + c = 1/2 * (dim * log(2*pi) + log(det(c2)) + trace(dot(invc2, c1)) + + dot(diffm, dot(invc2, diffm))) + else: + raise Exception('Distribution=?') + + return c + + +def analytical_value_d_kullback_leibler(distr1, distr2, par1, par2): + """ Analytical value of the KL divergence for the given distributions. + + Parameters + ---------- + distr1, distr2 : str-s + Names of the distributions. + par1, par2 : dictionary-s + Parameters of the distributions. If distr1 = distr2 = + 'normal': par1["mean"], par1["cov"] and par2["mean"], + par2["cov"] are the means and the covariance matrices. + + Returns + ------- + d : float + Analytical value of the Kullback-Leibler divergence. + + """ + + if distr1 == 'normal' and distr2 == 'normal': + # covariance matrices, expectations: + c1, m1 = par1['cov'], par1['mean'] + c2, m2 = par2['cov'], par2['mean'] + dim = len(m1) + + invc2 = inv(c2) + diffm = m1 - m2 + + d = 1/2 * (log(det(c2)/det(c1)) + trace(dot(invc2, c1)) + + dot(diffm, dot(invc2, diffm)) - dim) + else: + raise Exception('Distribution=?') + + return d + + +def analytical_value_i_shannon(distr, par): + """ Analytical value of mutual information for the given distribution. + + Parameters + ---------- + distr : str + Name of the distribution. + par : dictionary + Parameters of the distribution. If distr = 'normal': par["ds"], + par["cov"] are the vector of component dimensions and the (joint) + covariance matrix. + + Returns + ------- + i : float + Analytical value of the Shannon mutual information. + + """ + + if distr == 'normal': + c, ds = par["cov"], par["ds"] + # 0,d_1,d_1+d_2,...,d_1+...+d_{M-1}; starting indices of the + # subspaces: + cum_ds = cumsum(hstack((0, ds[:-1]))) + i = 1 + for m in range(len(ds)): + idx = range(cum_ds[m], cum_ds[m] + ds[m]) + i *= det(c[ix_(idx, idx)]) + + i = log(i / det(c)) / 2 + else: + raise Exception('Distribution=?') + + return i + + +def analytical_value_h_renyi(distr, alpha, par): + """ Analytical value of the Renyi entropy for the given distribution. + + Parameters + ---------- + distr : str + Name of the distribution. + alpha : float, alpha \ne 1 + Parameter of the Renyi entropy. + par : dictionary + Parameters of the distribution. If distr = 'uniform': par["a"], + par["b"], par["l"] <- lxU[a,b]. If distr = 'normal' : par["cov"] + is the covariance matrix. + + Returns + ------- + h : float + Analytical value of the Renyi entropy. + + References + ---------- + Kai-Sheng Song. Renyi information, loglikelihood and an intrinsic + distribution measure. Journal of Statistical Planning and Inference + 93: 51-69, 2001. + + """ + + if distr == 'uniform': + # par = {"a": a, "b": b, "l": l} + # We also apply the transformation rule of the Renyi entropy in + # case of linear transformations: + h = log(prod(par["b"] - par["a"])) + log(absolute(det(par["l"]))) + elif distr == 'normal': + # par = {"cov": c} + dim = par["cov"].shape[0] # =c.shape[1] + h = log((2*pi)**(dim / 2) * sqrt(absolute(det(par["cov"])))) -\ + dim * log(alpha) / 2 / (1 - alpha) + else: + raise Exception('Distribution=?') + + return h + + +def analytical_value_h_tsallis(distr, alpha, par): + """ Analytical value of the Tsallis entropy for the given distribution. + + Parameters + ---------- + distr : str + Name of the distribution. + alpha : float, alpha \ne 1 + Parameter of the Tsallis entropy. + par : dictionary + Parameters of the distribution. If distr = 'uniform': par["a"], + par["b"], par["l"] <- lxU[a,b]. If distr = 'normal' : par["cov"] + is the covariance matrix. + + Returns + ------- + h : float + Analytical value of the Tsallis entropy. + + """ + + # Renyi entropy: + h = analytical_value_h_renyi(distr, alpha, par) + + # Renyi entropy -> Tsallis entropy: + h = (exp((1 - alpha) * h) - 1) / (1 - alpha) + + return h + + +def analytical_value_k_prob_product(distr1, distr2, rho, par1, par2): + """ Analytical value of the probability product kernel. + + Parameters + ---------- + distr1, distr2 : str + Name of the distributions. + rho: float, >0 + Parameter of the probability product kernel. + par1, par2 : dictionary-s + Parameters of the distributions. If distr1 = distr2 = + 'normal': par1["mean"], par1["cov"] and par2["mean"], + par2["cov"] are the means and the covariance matrices. + + Returns + ------- + k : float + Analytical value of the probability product kernel. + + """ + + if distr1 == 'normal' and distr2 == 'normal': + # covariance matrices, expectations: + c1, m1 = par1['cov'], par1['mean'] + c2, m2 = par2['cov'], par2['mean'] + dim = len(m1) + + # inv1, inv2, inv12: + inv1, inv2 = inv(c1), inv(c2) + inv12 = inv(inv1+inv2) + + m12 = dot(inv1, m1) + dot(inv2, m2) + exp_arg = \ + dot(m1, dot(inv1, m1)) + dot(m2, dot(inv2, m2)) -\ + dot(m12, dot(inv12, m12)) + + k = (2 * pi)**((1 - 2 * rho) * dim / 2) * rho**(-dim / 2) *\ + absolute(det(inv12))**(1 / 2) * \ + absolute(det(c1))**(-rho / 2) * \ + absolute(det(c2))**(-rho / 2) * exp(-rho / 2 * exp_arg) + else: + raise Exception('Distribution=?') + + return k + + +def analytical_value_k_expected(distr1, distr2, kernel, par1, par2): + """ Analytical value of expected kernel for the given distributions. + + Parameters + ---------- + distr1, distr2 : str + Names of the distributions. + kernel: Kernel class. + par1, par2 : dictionary-s + Parameters of the distributions. If distr1 = distr2 = + 'normal': par1["mean"], par1["cov"] and par2["mean"], + par2["cov"] are the means and the covariance matrices. + + Returns + ------- + k : float + Analytical value of the expected kernel. + + References + ---------- + Krikamol Muandet, Kenji Fukumizu, Francesco Dinuzzo, and Bernhard + Scholkopf. Learning from distributions via support measure machines. + In Advances in Neural Information Processing Systems (NIPS), pages + 10-18, 2011. + + """ + + if distr1 == 'normal' and distr2 == 'normal': + + # covariance matrices, expectations: + c1, m1 = par1['cov'], par1['mean'] + c2, m2 = par2['cov'], par2['mean'] + + if kernel.name == 'RBF': + dim = len(m1) + gam = 1 / kernel.sigma ** 2 + diffm = m1 - m2 + exp_arg = dot(dot(diffm, inv(c1 + c2 + eye(dim) / gam)), diffm) + k = exp(-exp_arg / 2) / \ + sqrt(absolute(det(gam * c1 + gam * c2 + eye(dim)))) + + elif kernel.name == 'polynomial': + if kernel.exponent == 2: + if kernel.c == 1: + k = (dot(m1, m2) + 1)**2 + sum(c1 * c2) + \ + dot(m1, dot(c2, m1)) + dot(m2, dot(c1, m2)) + else: + raise Exception('The offset of the polynomial kernel' + + ' (c) should be one!') + + elif kernel.exponent == 3: + if kernel.c == 1: + k = (dot(m1, m2) + 1)**3 + \ + 6 * dot(dot(c1, m1), dot(c2, m2)) + \ + 3 * (dot(m1, m2) + 1) * (sum(c1 * c2) + + dot(m1, dot(c2, m1)) + + dot(m2, dot(c1, m2))) + else: + raise Exception('The offset of the polynomial kernel' + + ' (c) should be one!') + + else: + raise Exception('The exponent of the polynomial kernel ' + + 'should be either 2 or 3!') + else: + raise Exception('Kernel=?') + + else: + raise Exception('Distribution=?') + + return k + + +def analytical_value_d_mmd(distr1, distr2, kernel, par1, par2): + """ Analytical value of MMD for the given distributions. + + Parameters + ---------- + distr1, distr2 : str + Names of the distributions. + kernel: Kernel class. + par1, par2 : dictionary-s + Parameters of the distributions. If distr1 = distr2 = + 'normal': par1["mean"], par1["cov"] and par2["mean"], + par2["cov"] are the means and the covariance matrices. + + Returns + ------- + d : float + Analytical value of MMD. + + """ + + d_pp = analytical_value_k_expected(distr1, distr1, kernel, par1, par1) + d_qq = analytical_value_k_expected(distr2, distr2, kernel, par2, par2) + d_pq = analytical_value_k_expected(distr1, distr2, kernel, par1, par2) + d = sqrt(d_pp + d_qq - 2 * d_pq) + + return d + + +def analytical_value_h_sharma_mittal(distr, alpha, beta, par): + """ Analytical value of the Sharma-Mittal entropy. + + Parameters + ---------- + distr : str + Name of the distribution. + alpha : float, 0 < alpha \ne 1 + Parameter of the Sharma-Mittal entropy. + beta : float, beta \ne 1 + Parameter of the Sharma-Mittal entropy. + + par : dictionary + Parameters of the distribution. If distr = 'normal' : par["cov"] + = covariance matrix. + + Returns + ------- + h : float + Analytical value of the Sharma-Mittal entropy. + + References + ---------- + Frank Nielsen and Richard Nock. A closed-form expression for the + Sharma-Mittal entropy of exponential families. Journal of Physics A: + Mathematical and Theoretical, 45:032003, 2012. + + """ + + if distr == 'normal': + # par = {"cov": c} + c = par['cov'] + dim = c.shape[0] # =c.shape[1] + h = (((2*pi)**(dim / 2) * sqrt(absolute(det(c))))**(1 - beta) / + alpha**(dim * (1 - beta) / (2 * (1 - alpha))) - 1) / \ + (1 - beta) + + else: + raise Exception('Distribution=?') + + return h + + +def analytical_value_h_phi(distr, par, c): + """ Analytical value of the Phi entropy for the given distribution. + + Parameters + ---------- + distr : str + Name of the distribution. + par : dictionary + Parameters of the distribution. If distr = 'uniform': par.a, + par.b in U[a,b]. + c : float, >=1 + Parameter of the Phi-entropy: phi = lambda x: x**c + + Returns + ------- + h : float + Analytical value of the Phi entropy. + + """ + + if distr == 'uniform': + a, b = par['a'], par['b'] + h = 1 / (b-a)**c + else: + raise Exception('Distribution=?') + + return h + + +def analytical_value_d_chi_square(distr1, distr2, par1, par2): + """ Analytical value of chi^2 divergence for the given distributions. + + Parameters + ---------- + distr1, distr2 : str-s. + Names of distributions. + par1, par2 : dictionary-s. + Parameters of distributions. If (distr1, distr2) = + ('uniform', 'uniform'), then both distributions are + uniform: distr1 = U[0,a] with a = par1['a'], distr2 = + U[0,b] with b = par2['a']. If (distr1, distr2) = + ('normalI', 'normalI'), then distr1 = N(m1,I) where m1 = + par1['mean'], distr2 = N(m2,I), where m2 = par2['mean']. + + Returns + ------- + d : float + Analytical value of the (Pearson) chi^2 divergence. + + References + ---------- + Frank Nielsen and Richard Nock. On the chi square and higher-order chi + distances for approximating f-divergence. IEEE Signal Processing + Letters, 2:10-13, 2014. + + """ + + if distr1 == 'uniform' and distr2 == 'uniform': + a = par1['a'] + b = par2['a'] + d = prod(b) / prod(a) - 1 + elif distr1 == 'normalI' and distr2 == 'normalI': + m1 = par1['mean'] + m2 = par2['mean'] + diffm = m2 - m1 + d = exp(dot(diffm, diffm)) - 1 + else: + raise Exception('Distribution=?') + + return d + + +def analytical_value_d_l2(distr1, distr2, par1, par2): + """ Analytical value of the L2 divergence for the given distributions. + + Parameters + ---------- + distr1, distr2 : str-s + Names of distributions. + par1, par2 : dictionary-s + Parameters of distributions. If (distr1, distr2) = + ('uniform', 'uniform'), then both distributions are + uniform: distr1 = U[0,a] with a = par1['a'], distr2 = + U[0,b] with b = par2['a']. + + Returns + ------- + d : float + Analytical value of the L2 divergence. + + """ + + if distr1 == 'uniform' and distr2 == 'uniform': + a = par1['a'] + b = par2['a'] + d = sqrt(1 / prod(b) - 1 / prod(a)) + + else: + raise Exception('Distribution=?') + + return d + + +def analytical_value_d_renyi(distr1, distr2, alpha, par1, par2): + """ Analytical value of Renyi divergence for the given distributions. + + Parameters + ---------- + distr1, distr2 : str-s + Names of distributions. + alpha : float, \ne 1 + Parameter of the Sharma-Mittal divergence. + par1, par2 : dictionary-s + Parameters of distributions. + If (distr1,distr2) = ('normal','normal'), then distr1 = + N(m1,c1), where m1 = par1['mean'], c1 = par1['cov'], + distr2 = N(m2,c2), where m2 = par2['mean'], c2 = + par2['cov']. + + Returns + ------- + d : float + Analytical value of the Renyi divergence. + + References + ---------- + Manuel Gil. On Renyi Divergence Measures for Continuous Alphabet + Sources. Phd Thesis, Queen’s University, 2011. + + """ + + if distr1 == 'normal' and distr2 == 'normal': + # covariance matrices, expectations: + c1, m1 = par1['cov'], par1['mean'] + c2, m2 = par2['cov'], par2['mean'] + + mix_c = alpha * c2 + (1 - alpha) * c1 + diffm = m1 - m2 + + d = alpha * (1/2 * dot(dot(diffm, inv(mix_c)), diffm) - + 1 / (2 * alpha * (alpha - 1)) * + log(absolute(det(mix_c)) / + (det(c1)**(1 - alpha) * det(c2)**alpha))) + + else: + raise Exception('Distribution=?') + + return d + + +def analytical_value_d_tsallis(distr1, distr2, alpha, par1, par2): + """ Analytical value of Tsallis divergence for the given distributions. + + Parameters + ---------- + distr1, distr2 : str-s + Names of distributions. + alpha : float, \ne 1 + Parameter of the Sharma-Mittal divergence. + par1, par2 : dictionary-s + Parameters of distributions. + If (distr1,distr2) = ('normal','normal'), then distr1 = + N(m1,c1), where m1 = par1['mean'], c1 = par1['cov'], + distr2 = N(m2,c2), where m2 = par2['mean'], c2 = + par2['cov']. + + Returns + ------- + d : float + Analytical value of the Tsallis divergence. + + + """ + + if distr1 == 'normal' and distr2 == 'normal': + d = analytical_value_d_renyi(distr1, distr2, alpha, par1, par2) + d = (exp((alpha - 1) * d) - 1) / (alpha - 1) + else: + raise Exception('Distribution=?') + + return d + + +def analytical_value_d_sharma_mittal(distr1, distr2, alpha, beta, par1, + par2): + """ Analytical value of the Sharma-Mittal divergence. + + Parameters + ---------- + distr1, distr2 : str-s + Names of distributions. + alpha : float, 0 < alpha \ne 1 + Parameter of the Sharma-Mittal divergence. + beta : float, beta \ne 1 + Parameter of the Sharma-Mittal divergence. + par1, par2 : dictionary-s + Parameters of distributions. + If (distr1,distr2) = ('normal','normal'), then distr1 = + N(m1,c1), where m1 = par1['mean'], c1 = par1['cov'], + distr2 = N(m2,c2), where m2 = par2['mean'], c2 = + par2['cov']. + + Returns + ------- + D : float + Analytical value of the Tsallis divergence. + + References + ---------- + Frank Nielsen and Richard Nock. A closed-form expression for the + Sharma-Mittal entropy of exponential families. Journal of Physics A: + Mathematical and Theoretical, 45:032003, 2012. + + """ + + if distr1 == 'normal' and distr2 == 'normal': + # covariance matrices, expectations: + c1, m1 = par1['cov'], par1['mean'] + c2, m2 = par2['cov'], par2['mean'] + + c = inv(alpha * inv(c1) + (1 - alpha) * inv(c2)) + diffm = m1 - m2 + + # Jensen difference divergence, c2: + j = (log(absolute(det(c1))**alpha * absolute(det(c2))**(1 - + alpha) / + absolute(det(c))) + alpha * (1 - alpha) * + dot(dot(diffm, inv(c)), diffm)) / 2 + c2 = exp(-j) + + d = (c2**((1 - beta) / (1 - alpha)) - 1) / (beta - 1) + + else: + raise Exception('Distribution=?') + + return d + + +def analytical_value_d_bregman(distr1, distr2, alpha, par1, par2): + """ Analytical value of Bregman divergence for the given distributions. + + Parameters + ---------- + distr1, distr2 : str-s + Names of distributions. + alpha : float, \ne 1 + Parameter of the Bregman divergence. + par1, par2 : dictionary-s + Parameters of distributions. If (distr1, distr2) = + ('uniform', 'uniform'), then both distributions are + uniform: distr1 = U[0,a] with a = par1['a'], distr2 = + U[0,b] with b = par2['a']. + + Returns + ------- + d : float + Analytical value of the Bregman divergence. + + """ + + if distr1 == 'uniform' and distr2 == 'uniform': + a = par1['a'] + b = par2['a'] + d = \ + -1 / (alpha - 1) * prod(b)**(1 - alpha) +\ + 1 / (alpha - 1) * prod(a)**(1 - alpha) + else: + raise Exception('Distribution=?') + + return d + + +def analytical_value_d_jensen_renyi(distr1, distr2, w, par1, par2): + """ Analytical value of the Jensen-Renyi divergence. + + Parameters + ---------- + distr1, distr2 : str-s + Names of distributions. + w : vector, w[i] > 0 (for all i), sum(w) = 1 + Weight used in the Jensen-Renyi divergence. + par1, par2 : dictionary-s + Parameters of distributions. If (distr1, distr2) = + ('normal', 'normal'), then both distributions are normal: + distr1 = N(m1,s1^2 I) with m1 = par1['mean'], s1 = + par1['std'], distr2 = N(m2,s2^2 I) with m2 = + par2['mean'], s2 = par2['std']. + + Returns + ------- + d : float + Analytical value of the Jensen-Renyi divergence. + + References + ---------- + Fei Wang, Tanveer Syeda-Mahmood, Baba C. Vemuri, David Beymer, and + Anand Rangarajan. Closed-Form Jensen-Renyi Divergence for Mixture of + Gaussians and Applications to Group-Wise Shape Registration. Medical + Image Computing and Computer-Assisted Intervention, 12: 648–655, 2009. + + """ + + if distr1 == 'normal' and distr2 == 'normal': + m1, s1 = par1['mean'], par1['std'] + m2, s2 = par2['mean'], par2['std'] + term1 = compute_h2(w, (m1, m2), (s1, s2)) + term2 = \ + w[0] * compute_h2((1,), (m1,), (s1,)) +\ + w[1] * compute_h2((1,), (m2,), (s2,)) + + # H2(\sum_i wi yi) - \sum_i w_i H2(yi), where H2 is the quadratic + # Renyi entropy: + d = term1 - term2 + + else: + raise Exception('Distribution=?') + + return d + + +def analytical_value_i_renyi(distr, alpha, par): + """ Analytical value of the Renyi mutual information. + + Parameters + ---------- + distr : str + Name of the distribution. + alpha : float + Parameter of the Renyi mutual information. + par : dictionary + Parameters of the distribution. If distr = 'normal': par["cov"] + is the covariance matrix. + + Returns + ------- + i : float + Analytical value of the Renyi mutual information. + + """ + + if distr == 'normal': + c = par["cov"] + + t1 = -alpha / 2 * log(det(c)) + t2 = -(1 - alpha) / 2 * log(prod(diag(c))) + t3 = log(det(alpha * inv(c) + (1 - alpha) * diag(1 / diag(c)))) / 2 + i = 1 / (alpha - 1) * (t1 + t2 - t3) + else: + raise Exception('Distribution=?') + + return i + + +def analytical_value_k_ejr1(distr1, distr2, u, par1, par2): + """ Analytical value of the Jensen-Renyi kernel-1. + + Parameters + ---------- + distr1, distr2 : str-s + Names of distributions. + u : float, >0 + Parameter of the Jensen-Renyi kernel-1 (alpha = 2: fixed). + par1, par2 : dictionary-s + Parameters of distributions. If (distr1, distr2) = + ('normal', 'normal'), then both distributions are normal: + distr1 = N(m1,s1^2 I) with m1 = par1['mean'], s1 = + par1['std'], distr2 = N(m2,s2^2 I) with m2 = + par2['mean'], s2 = par2['std']. + + References + ---------- + Fei Wang, Tanveer Syeda-Mahmood, Baba C. Vemuri, David Beymer, and + Anand Rangarajan. Closed-Form Jensen-Renyi Divergence for Mixture of + Gaussians and Applications to Group-Wise Shape Registration. Medical + Image Computing and Computer-Assisted Intervention, 12: 648–655, 2009. + + """ + + if distr1 == 'normal' and distr2 == 'normal': + m1, s1 = par1['mean'], par1['std'] + m2, s2 = par2['mean'], par2['std'] + w = array([1/2, 1/2]) + h = compute_h2(w, (m1, m2), (s1, s2)) # quadratic Renyi entropy + k = exp(-u * h) + else: + raise Exception('Distribution=?') + + return k + + +def analytical_value_k_ejr2(distr1, distr2, u, par1, par2): + """ Analytical value of the Jensen-Renyi kernel-2. + + Parameters + ---------- + distr1, distr2 : str-s + Names of distributions. + u : float, >0 + Parameter of the Jensen-Renyi kernel-2 (alpha = 2: fixed). + par1, par2 : dictionary-s + Parameters of distributions. If (distr1, distr2) = + ('normal', 'normal'), then both distributions are normal: + distr1 = N(m1,s1^2 I) with m1 = par1['mean'], s1 = + par1['std'], distr2 = N(m2,s2^2 I) with m2 = + par2['mean'], s2 = par2['std']. + + """ + + if distr1 == 'normal' and distr2 == 'normal': + w = array([1/2, 1/2]) + d = analytical_value_d_jensen_renyi(distr1, distr2, w, par1, par2) + k = exp(-u * d) + else: + raise Exception('Distribution=?') + + return k + + +def analytical_value_k_ejt1(distr1, distr2, u, par1, par2): + """ Analytical value of the Jensen-Tsallis kernel-1. + + Parameters + ---------- + distr1, distr2 : str-s + Names of distributions. + u : float, >0 + Parameter of the Jensen-Tsallis kernel-1 (alpha = 2: fixed). + par1, par2 : dictionary-s + Parameters of distributions. If (distr1, distr2) = + ('normal', 'normal'), then both distributions are normal: + distr1 = N(m1,s1^2 I) with m1 = par1['mean'], s1 = + par1['std'], distr2 = N(m2,s2^2 I) with m2 = + par2['mean'], s2 = par2['std']. + + References + ---------- + Fei Wang, Tanveer Syeda-Mahmood, Baba C. Vemuri, David Beymer, and + Anand Rangarajan. Closed-Form Jensen-Renyi Divergence for Mixture of + Gaussians and Applications to Group-Wise Shape Registration. Medical + Image Computing and Computer-Assisted Intervention, 12: 648–655, 2009. + (Renyi entropy) + + """ + + if distr1 == 'normal' and distr2 == 'normal': + m1, s1 = par1['mean'], par1['std'] + m2, s2 = par2['mean'], par2['std'] + w = array([1/2, 1/2]) + h = compute_h2(w, (m1, m2), (s1, s2)) # quadratic Renyi entropy + # quadratic Renyi entropy -> quadratic Tsallis entropy: + h = 1 - exp(-h) + k = exp(-u * h) + else: + raise Exception('Distribution=?') + + return k + + +def analytical_value_k_ejt2(distr1, distr2, u, par1, par2): + """ Analytical value of the Jensen-Tsallis kernel-2. + + Parameters + ---------- + distr1, distr2 : str-s + Names of distributions. + u : float, >0 + Parameter of the Jensen-Tsallis kernel-2 (alpha = 2: fixed). + par1, par2 : dictionary-s + Parameters of distributions. If (distr1, distr2) = + ('normal', 'normal'), then both distributions are normal: + distr1 = N(m1,s1^2 I) with m1 = par1['mean'], s1 = + par1['std'], distr2 = N(m2,s2^2 I) with m2 = + par2['mean'], s2 = par2['std']. + + References + ---------- + Fei Wang, Tanveer Syeda-Mahmood, Baba C. Vemuri, David Beymer, and + Anand Rangarajan. Closed-Form Jensen-Renyi Divergence for Mixture of + Gaussians and Applications to Group-Wise Shape Registration. Medical + Image Computing and Computer-Assisted Intervention, 12: 648–655, 2009. + (analytical value of the Jensen-Renyi divergence) + + """ + + if distr1 == 'normal' and distr2 == 'normal': + m1, s1 = par1['mean'], par1['std'] + m2, s2 = par2['mean'], par2['std'] + w = array([1/2, 1/2]) + # quadratic Renyi entropy -> quadratic Tsallis entropy: + term1 = 1 - exp(-compute_h2(w, (m1, m2), (s1, s2))) + term2 = \ + w[0] * (1 - exp(-compute_h2((1, ), (m1, ), (s1,)))) +\ + w[1] * (1 - exp(-compute_h2((1,), (m2,), (s2,)))) + # H2(\sum_i wi Yi) - \sum_i w_i H2(Yi), where H2 is the quadratic + # Tsallis entropy: + d = term1 - term2 + + k = exp(-u * d) + else: + raise Exception('Distribution=?') + + return k + + +def analytical_value_d_hellinger(distr1, distr2, par1, par2): + """ Analytical value of Hellinger distance for the given distributions. + + Parameters + ---------- + distr1, distr2 : str-s + Names of the distributions. + par1, par2 : dictionary-s + Parameters of the distributions. If distr1 = distr2 = + 'normal': par1["mean"], par1["cov"] and par2["mean"], + par2["cov"] are the means and the covariance matrices. + + Returns + ------- + d : float + Analytical value of the Hellinger distance. + + """ + + if distr1 == 'normal' and distr2 == 'normal': + # covariance matrices, expectations: + c1, m1 = par1['cov'], par1['mean'] + c2, m2 = par2['cov'], par2['mean'] + + # "https://en.wikipedia.org/wiki/Hellinger_distance": Examples: + diffm = m1 - m2 + avgc = (c1 + c2) / 2 + inv_avgc = inv(avgc) + d = 1 - det(c1)**(1/4) * det(c2)**(1/4) / sqrt(det(avgc)) * \ + exp(-dot(diffm, dot(inv_avgc, diffm))/8) # D^2 + + d = sqrt(d) + else: + raise Exception('Distribution=?') + + return d + + +def analytical_value_cond_h_shannon(distr, par): + """ Analytical value of the conditional Shannon entropy. + + Parameters + ---------- + distr : str-s + Names of the distributions; 'normal'. + par : dictionary + Parameters of the distribution. If distr is 'normal': par["cov"] + and par["dim1"] are the covariance matrix and the dimension of + y1. + + Returns + ------- + cond_h : float + Analytical value of the conditional Shannon entropy. + + """ + + if distr == 'normal': + # h12 (=joint entropy): + h12 = analytical_value_h_shannon(distr, par) + + # h2 (=entropy of the conditioning variable): + c, dim1 = par['cov'], par['dim1'] # covariance matrix, dim(y1) + par = {"cov": c[dim1:, dim1:]} + h2 = analytical_value_h_shannon(distr, par) + + cond_h = h12 - h2 + + else: + raise Exception('Distribution=?') + + return cond_h + + +def analytical_value_cond_i_shannon(distr, par): + """ Analytical value of the conditional Shannon mutual information. + + Parameters + ---------- + distr : str-s + Names of the distributions; 'normal'. + par : dictionary + Parameters of the distribution. If distr is 'normal': + par["cov"] and par["ds"] are the (joint) covariance matrix and + the vector of subspace dimensions. + + Returns + ------- + cond_i : float + Analytical value of the conditional Shannon mutual + information. + + """ + + # initialization: + ds = par['ds'] + len_ds = len(ds) + # 0,d_1,d_1+d_2,...,d_1+...+d_M; starting indices of the subspaces: + cum_ds = cumsum(hstack((0, ds[:-1]))) + idx_condition = range(cum_ds[len_ds - 1], + cum_ds[len_ds - 1] + ds[len_ds - 1]) + + if distr == 'normal': + c = par['cov'] + + # h_joint: + h_joint = analytical_value_h_shannon(distr, par) + + # h_cross: + h_cross = 0 + for m in range(len_ds-1): # non-conditioning subspaces + idx_m = range(cum_ds[m], cum_ds[m] + ds[m]) + idx_m_and_condition = hstack((idx_m, idx_condition)) + par = {"cov": c[ix_(idx_m_and_condition, idx_m_and_condition)]} + h_cross += analytical_value_h_shannon(distr, par) + + # h_condition: + par = {"cov": c[ix_(idx_condition, idx_condition)]} + h_condition = analytical_value_h_shannon(distr, par) + + cond_i = -h_joint + h_cross - (len_ds - 2) * h_condition + + else: + raise Exception('Distribution=?') + + return cond_i diff --git a/ite-in-python/ite/cost/x_factory.py b/ite-in-python/ite/cost/x_factory.py new file mode 100644 index 0000000..4e00b4d --- /dev/null +++ b/ite-in-python/ite/cost/x_factory.py @@ -0,0 +1,43 @@ +""" Factory for information theoretical estimators. + +For entropy / mutual information / divergence / cross quantity / +association / distribution kernel estimators. + +""" + +# assumption: the 'cost_name' entropy estimator is in module 'ite.cost' +import ite.cost + + +def co_factory(cost_name, **kwargs): + """ Creates any entropy / mutual information / divergence / cross + quantity / association / distribution kernel estimator by its name and + its parameters. + + Parameters + ---------- + cost_name : str + Name of the cost object to be created. + kwargs : dictionary + It can be used to override default parameter values in + the estimator (if needed). + Returns + ------- + co : class + Initialized estimator (cost object). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.co_factory('BHShannon_KnnK') + + >>> dict_par = {'mult': False,'k': 2} + >>> co2 = ite.cost.co_factory('BHShannon_KnnK', **dict_par) # mapping\ + # unpacking + + """ + + co = getattr(ite.cost, cost_name)(**kwargs) + # print(co) # commented out so that doctests should not give errors + + return co diff --git a/ite-in-python/ite/cost/x_initialization.py b/ite-in-python/ite/cost/x_initialization.py new file mode 100644 index 0000000..9ccd656 --- /dev/null +++ b/ite-in-python/ite/cost/x_initialization.py @@ -0,0 +1,534 @@ +""" Initialization classes for estimators. + +For entropy / mutual information / divergence / cross quantity / +association / distribution kernel estimators. + +These initialization classes are not called directly, but they are used by +inheritance. For example one typically derives a k-nearest neighbor based +estimation method from InitKnnK. InitKnnK sets (default values) for (i) +the kNN computation technique called (kNN_method), (ii) the number of +neighbors (k) and (iii) the accuracy required in kNN computation (eps). + +Note: InitKnnK (and all other classes here, except for InitBagGram) are +subclasses of InitX, which makes them printable. InitBagGram is used with +classes derived from InitX. + +""" + +from numpy import zeros, mod, array +# from numpy import zeros, floor, mod +from ite.cost.x_kernel import Kernel + + +class InitX(object): + """ Base class of all estimators giving string representation and mult. + + """ + + def __init__(self, mult=True): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + + """ + + self.mult = mult + + def __str__(self): + """ String representation of the estimator. + + Application: print(cost_object) + + Examples + -------- + >>> import ite + >>> co = ite.cost.x_initialization.InitX() + >>> print(co) + InitX -> {'mult': True} + + """ + + return ''.join((self.__class__.__name__, ' -> ', + str(self.__dict__))) + + +class InitKnnK(InitX): + """ Initialization class for estimators based on kNNs. + + k-nearest neighbors: S = {k}. + + Partial initialization comes from 'InitX'. + + """ + + def __init__(self, mult=True, knn_method='cKDTree', k=3, eps=0): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + knn_method : str, optional + kNN computation method; 'cKDTree' or 'KDTree'. + k : int, >= 1, optional + k-nearest neighbors (default is 3). + eps : float, >= 0, optional + The k^th returned value is guaranteed to be no further than + (1+eps) times the distance to the real kNN (default is 0). + + Examples + -------- + >>> import ite + >>> co = ite.cost.x_initialization.InitKnnK() + + """ + + # initialize with 'InitX': + super().__init__(mult=mult) + + # other attributes: + self.knn_method, self.k, self.eps = knn_method, k, eps + + +class InitKnnKiTi(InitX): + """ Initialization class for estimators based on k-nearest neighbors. + + k here depends on the number of samples: S = {ki(Ti)}. + + Partial initialization comes from 'InitX'. + + """ + + def __init__(self, mult=True, knn_method='cKDTree', eps=0): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + knn_method : str, optional + kNN computation method; 'cKDTree' or 'KDTree'. + eps : float, >= 0, optional + The k^th returned value is guaranteed to be no further than + (1+eps) times the distance to the real kNN (default is 0). + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.x_initialization.InitKnnKiTi() + >>> co2 = ite.cost.x_initialization.InitKnnKiTi(eps=0.1) + + """ + + # initialize with 'InitX': + super().__init__(mult=mult) + + # other attributes: + self.knn_method, self.eps = knn_method, eps + + +class InitAlpha(InitX): + """ Initialization class for estimators using an alpha \ne 1 parameter. + + Partial initialization comes from 'InitX'. + + """ + + def __init__(self, mult=True, alpha=0.99): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + alpha : float, alpha \ne 1, optional + (default is 0.99) + + Examples + -------- + >>> import ite + >>> co = ite.cost.x_initialization.InitAlpha() + + """ + + # initialize with 'InitX': + super().__init__(mult=mult) + + # alpha: + if alpha == 1: + raise Exception('Alpha can not be 1 for this estimator!') + + self.alpha = alpha + + +class InitUAlpha(InitAlpha): + """ Initialization for estimators with an u>0 & alpha \ne 1 parameter. + + Partial initialization comes from 'InitAlpha'. + + """ + + def __init__(self, mult=True, u=1.0, alpha=0.99): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + u : float, 0 < u, optional + (default is 1.0) + alpha : float, alpha \ne 1, optional + (default is 0.99) + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.x_initialization.InitUAlpha() + >>> co2 = ite.cost.x_initialization.InitUAlpha(alpha=0.7) + >>> co3 = ite.cost.x_initialization.InitUAlpha(u=1.2) + >>> co4 = ite.cost.x_initialization.InitUAlpha(u=1.2, alpha=0.7) + + + """ + + # initialize with 'InitAlpha' (it also checks the validity of + # alpha): + super().__init__(mult=mult, alpha=alpha) + + # u verification: + if u <= 0: + raise Exception('u has to positive for this estimator!') + self.u = u + + +class InitKnnKAlpha(InitAlpha): + """ Initialization for estimators based on kNNs and an alpha \ne 1. + + k-nearest neighbors: S = {k}. + + Partial initialization comes from 'InitAlpha'. + + """ + + def __init__(self, mult=True, knn_method='cKDTree', k=3, eps=0, + alpha=0.99): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + knn_method : str, optional + kNN computation method; 'cKDTree' or 'KDTree'. + k : int, >= 1, optional + k-nearest neighbors (default is 3). + eps : float, >= 0 + The k^th returned value is guaranteed to be no further than + (1+eps) times the distance to the real kNN (default is 0). + alpha : float, alpha \ne 1, optional + (default is 0.99) + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.x_initialization.InitKnnKAlpha() + >>> co2 = ite.cost.x_initialization.InitKnnKAlpha(k=2) + >>> co3 = ite.cost.x_initialization.InitKnnKAlpha(alpha=0.9) + >>> co4 = ite.cost.x_initialization.InitKnnKAlpha(k=2, alpha=0.9) + + """ + + # initialize with 'InitAlpha' (it also checks the validity of + # alpha): + super().__init__(mult=mult, alpha=alpha) + + # kNN attributes: + self.knn_method, self.k, self.eps = knn_method, k, eps + + +class InitKnnKAlphaBeta(InitKnnKAlpha): + """ Initialization for estimators based on kNNs; alpha & beta \ne 1. + + k-nearest neighbors: S = {k}. + + Partial initialization comes from 'InitKnnKAlpha'. + + """ + + def __init__(self, mult=True, knn_method='cKDTree', k=3, eps=0, + alpha=0.9, beta=0.99): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + knn_method : str, optional + kNN computation method; 'cKDTree' or 'KDTree'. + k : int, >= 1, optional + k-nearest neighbors + (default is 3). + eps : float, >= 0 + The k^th returned value is guaranteed to be no further than + (1+eps) times the distance to the real kNN (default is 0). + alpha : float, alpha \ne 1, optional + (default is 0.9) + beta : float, beta \ne 1, optional + (default is 0.99) + + Examples + -------- + >>> import ite + >>> co1 = ite.cost.x_initialization.InitKnnKAlphaBeta() + >>> co2 = ite.cost.x_initialization.InitKnnKAlphaBeta(k=2) + >>> co3 = ite.cost.x_initialization.InitKnnKAlphaBeta(alpha=0.8) + >>> co4 = ite.cost.x_initialization.InitKnnKAlphaBeta(beta=0.7) + >>> co5 = ite.cost.x_initialization.InitKnnKAlphaBeta(eps=0.1) + + >>> co6 = ite.cost.x_initialization.InitKnnKAlphaBeta(k=2,\ + alpha=0.8) + >>> co7 = ite.cost.x_initialization.InitKnnKAlphaBeta(k=2,\ + beta=0.7) + >>> co8 = ite.cost.x_initialization.InitKnnKAlphaBeta(k=2,\ + eps=0.1) + >>> co9 = ite.cost.x_initialization.InitKnnKAlphaBeta(alpha=0.8,\ + beta=0.7) + >>> co10 = ite.cost.x_initialization.InitKnnKAlphaBeta(alpha=0.8,\ + eps=0.1) + >>> co11 = ite.cost.x_initialization.InitKnnKAlphaBeta(beta=0.7,\ + eps=0.1) + >>> co12 = ite.cost.x_initialization.InitKnnKAlphaBeta(alpha=0.8,\ + beta=0.7,\ + eps=0.2) + >>> co13 = ite.cost.x_initialization.InitKnnKAlphaBeta(k=2,\ + beta=0.7,\ + eps=0.2) + >>> co14 = ite.cost.x_initialization.InitKnnKAlphaBeta(k=2,\ + alpha=0.8,\ + eps=0.2) + >>> co15 = ite.cost.x_initialization.InitKnnKAlphaBeta(k=2,\ + alpha=0.8,\ + beta=0.7) + >>> co16 = ite.cost.x_initialization.InitKnnKAlphaBeta(k=2,\ + alpha=0.8,\ + beta=0.7,\ + eps=0.2) + + """ + + # initialize with 'InitKnnKAlpha' (it also checks the validity of + # alpha): + super().__init__(mult=mult, knn_method=knn_method, k=k, eps=eps, + alpha=alpha) + + # b eta verification: + if beta == 1: + raise Exception('Beta can not be 1 for this estimator!') + + self.beta = beta + + +class InitKnnSAlpha(InitAlpha): + """ Initialization for methods based on generalized kNNs & alpha \ne 1. + + k-nearest neighbors: S \subseteq {1,...,k}. + + Partial initialization comes from 'InitAlpha'. + + """ + + def __init__(self, mult=True, knn_method='cKDTree', k=None, eps=0, + alpha=0.99): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + knn_method : str, optional + kNN computation method; 'cKDTree' or 'KDTree'. + k : int, >= 1, optional + k-nearest neighbors. In case of 'None' a default + array([1,2,4]) is taken. + eps : float, >= 0 + The k^th returned value is guaranteed to be no further than + (1+eps) times the distance to the real kNN (default is 0). + alpha : float, alpha \ne 1, optional + (default is 0.99) + + Examples + -------- + >>> from numpy import array + >>> import ite + >>> co1 = ite.cost.x_initialization.InitKnnSAlpha() + >>> co2a = ite.cost.x_initialization.InitKnnSAlpha(k=2) + >>> co2b =\ + ite.cost.x_initialization.InitKnnSAlpha(k=array([1,2,5])) + >>> co3 = ite.cost.x_initialization.InitKnnSAlpha(alpha=0.8) + >>> co4 = ite.cost.x_initialization.InitKnnSAlpha(eps=0.1) + + >>> co5a = ite.cost.x_initialization.InitKnnSAlpha(k=2, alpha=0.8) + >>> co5b =\ + ite.cost.x_initialization.InitKnnSAlpha(k=array([1,2,5]),\ + alpha=0.8) + >>> co6a = ite.cost.x_initialization.InitKnnSAlpha(k=2, eps=0.1) + >>> co6b =\ + ite.cost.x_initialization.InitKnnSAlpha(k=array([1,2,5]),\ + eps=0.1) + >>> co7 = ite.cost.x_initialization.InitKnnSAlpha(alpha=0.8,\ + eps=0.1) + >>> co8 = ite.cost.x_initialization.InitKnnSAlpha(k=2, alpha=0.8,\ + eps=0.2) + >>> co9 =\ + ite.cost.x_initialization.InitKnnSAlpha(k=array([1,2,5]),\ + alpha=0.8,\ + eps=0.2) + + """ + + # initialize with 'InitAlpha' (it also checks the validity of + # alpha): + super().__init__(mult=mult, alpha=alpha) + + # kNN attribute: + if k is None: + k = array([1, 2, 4]) + + self.knn_method, self.k, self.eps = knn_method, k, eps + + # alpha: + if alpha == 1: + raise Exception('Alpha can not be 1 for this estimator!') + + self.alpha = alpha + + +class InitKernel(InitX): + """ Initialization class for kernel based estimators. + + Partial initialization comes from 'InitX'. + + """ + + def __init__(self, mult=True, kernel=Kernel()): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + kernel : Kernel, optional + For examples, see 'ite.cost.x_kernel.Kernel' + + + """ + + # initialize with 'InitX': + super().__init__(mult=mult) + + # other attributes: + self.kernel = kernel + + +class InitBagGram(object): + """ Initialization class for kernels on distributions. + + The class provides Gram matrix computation capability. + + """ + + def gram_matrix(self, ys): + """ Gram matrix computation on a collection of bags. + + Examples + -------- + See 'ite/demos/other/demo_k_positive_semidefinite.py'. + + """ + + num_of_distributions = len(ys) + g = zeros((num_of_distributions, num_of_distributions)) + print('G computation: started.') + + for k1 in range(num_of_distributions): # k1^th distribution + if mod(k1, 10) == 0: + print('k1=' + str(k1+1) + '/' + str(num_of_distributions) + + ': started.') + + for k2 in range(k1, num_of_distributions): # k2^th distr. + # K(y[k1], y[k2]): + + # version-1 (we care about independence for k1 == k2; + # import floor): + # if k1 == k2: + # num_of_samples_half = int(floor(ys[k1].shape[0] / 2)) + # g[k1, k1] = \ + # self.estimation(ys[k1][:num_of_samples_half], + # ys[k1][num_of_samples_half:]) + # else: + # g[k1, k2] = self.estimation(ys[k1], ys[k2]) + # g[k2, k1] = g[k1, k2] # assumption: symmetry + + # version-2 (we do not care): + g[k1, k2] = self.estimation(ys[k1], ys[k2]) + # Note: '.estimation()' is implemented in the kernel + # classes + g[k2, k1] = g[k1, k2] # assumption: symmetry + + return g + + +class InitEtaKernel(InitKernel): + """ Initialization for kernel based methods with an eta > 0 parameter. + + Eta is a tolerance parameter; it is used to control the approximation + quality of incomplete Cholesky decomposition based approximation. + + Partial initialization comes from 'InitKernel'. + + """ + + def __init__(self, mult=True, kernel=Kernel(), eta=1e-2): + """ Initialize the estimator. + + Parameters + ---------- + mult : bool, optional + 'True': multiplicative constant relevant (needed) in the + estimation. 'False': estimation up to 'proportionality'. + (default is True) + kernel : Kernel, optional + For examples, see 'ite.cost.x_kernel.Kernel' + eta : float, >0, optional + It is used to control the quality of the incomplete Cholesky + decomposition based Gram matrix approximation. Smaller 'eta' + means larger-sized Gram factor and better approximation. + (default is 1e-2) + """ + + # initialize with 'InitKernel': + super().__init__(mult=mult, kernel=kernel) + + # other attributes: + self.eta = eta diff --git a/ite-in-python/ite/cost/x_kernel.py b/ite-in-python/ite/cost/x_kernel.py new file mode 100644 index 0000000..4cabae8 --- /dev/null +++ b/ite-in-python/ite/cost/x_kernel.py @@ -0,0 +1,419 @@ +""" Kernel class. + +It provides Gram matrix computation and incomplete Cholesky decomposition +capabilities. + +""" + +from scipy.spatial.distance import pdist, cdist, squareform +from numpy import sum, sqrt, exp, dot, ones, array, zeros, argmax, \ + hstack, newaxis, copy, argsort + + +class Kernel(object): + """ Kernel class """ + + def __init__(self, par=None): + """ Initialization. + + Parameters + ---------- + par : dictionary, optional + Name of the kernel and its parameters (default is + {'name': 'RBF','sigma': 1}). The name of the kernel comes + from 'RBF', 'exponential', 'Cauchy', 'student', 'Matern3p2', + 'Matern5p2', 'polynomial', 'ratquadr' (rational quadratic), + 'invmquadr' (inverse multiquadr). + + Examples + -------- + >>> from ite.cost.x_kernel import Kernel + >>> k1 = Kernel({'name': 'RBF','sigma': 1}) + >>> k2 = Kernel({'name': 'exponential','sigma': 1}) + >>> k3 = Kernel({'name': 'Cauchy','sigma': 1}) + >>> k4 = Kernel({'name': 'student','d': 1}) + >>> k5 = Kernel({'name': 'Matern3p2','l': 1}) + >>> k6 = Kernel({'name': 'Matern5p2','l': 1}) + >>> k7 = Kernel({'name': 'polynomial','exponent': 2,'c': 1}) + >>> k8 = Kernel({'name': 'ratquadr','c': 1}) + >>> k9 = Kernel({'name': 'invmquadr','c': 1}) + + from numpy.random import rand + num_of_samples, dim = 5, 2 + y1, y2 = rand(num_of_samples, dim), rand(num_of_samples+1, dim) + y1b = rand(num_of_samples, dim) + k1.gram_matrix1(y1) + k1.gram_matrix2(y1, y2) + k1.sum(y1,y1b) + k1.gram_matrix_diagonal(y1) + + """ + + # if par is None: + # par = {'name': 'RBF', 'sigma': 0.01} + if par is None: + par = {'name': 'RBF', 'sigma': 1} + # if par is None: + # par = {'name': 'exponential', 'sigma': 1} + # if par is None: + # par = {'name': 'Cauchy', 'sigma': 1} + # if par is None: + # par = {'name': 'student', 'd': 1} + # if par is None: + # par = {'name': 'Matern3p2', 'l': 1} + # if par is None: + # par = {'name': 'Matern5p2', 'l': 1} + # if par is None: + # par = {'name': 'polynomial','exponent': 2, 'c': 1} + # if par is None: + # par = {'name': 'polynomial', 'exponent': 3, 'c': 1} + # if par is None: + # par = {'name': 'ratquadr', 'c': 1} + # if par is None: + # par = {'name': 'invmquadr', 'c': 1} + + # name: + name = par['name'] + self.name = name + + # other attributes: + if name == 'RBF' or name == 'exponential' or name == 'Cauchy': + self.sigma = par['sigma'] + elif name == 'student': + self.d = par['d'] + elif name == 'Matern3p2' or name == 'Matern5p2': + self.l = par['l'] + elif name == 'polynomial': + self.c = par['c'] + self.exponent = par['exponent'] + elif name == 'ratquadr' or name == 'invmquadr': + self.c = par['c'] + else: + raise Exception('kernel=?') + + def __str__(self): + """ String representation of the kernel. + + Examples + -------- + print(kernel) + + """ + + return ''.join((self.__class__.__name__, ' -> ', + str(self.__dict__))) + + def gram_matrix1(self, y): + """ Compute the Gram matrix = [k(y[i,:],y[j,:])]; i, j: running. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + + Returns + ------- + g : ndarray. + Gram matrix of y. + + Examples + -------- + g = k.gram_matrix1(y) + + """ + + if self.name == 'RBF': + sigma = self.sigma + g = squareform(pdist(y)) + g = exp(-g ** 2 / (2 * sigma ** 2)) + elif self.name == 'exponential': + sigma = self.sigma + g = squareform(pdist(y)) + g = exp(-g / (2 * sigma ** 2)) + elif self.name == 'Cauchy': + sigma = self.sigma + g = squareform(pdist(y)) + g = 1 / (1 + g ** 2 / sigma ** 2) + elif self.name == 'student': + d = self.d + g = squareform(pdist(y)) + g = 1 / (1 + g ** d) + elif self.name == 'Matern3p2': + l = self.l + g = squareform(pdist(y)) + g = (1 + sqrt(3) * g / l) * exp(-sqrt(3) * g / l) + elif self.name == 'Matern5p2': + l = self.l + g = squareform(pdist(y)) + g = (1 + sqrt(5) * g / l + 5 * g ** 2 / (3 * l ** 2)) * \ + exp(-sqrt(5) * g / l) + elif self.name == 'polynomial': + c = self.c + exponent = self.exponent + g = (dot(y, y.T) + c) ** exponent + elif self.name == 'ratquadr': + c = self.c + g = squareform(pdist(y)) ** 2 + g = 1 - g / (g + c) + elif self.name == 'invmquadr': + c = self.c + g = squareform(pdist(y)) + g = 1 / sqrt(g ** 2 + c ** 2) + else: + raise Exception('kernel=?') + + return g + + def gram_matrix2(self, y1, y2): + """ Compute the Gram matrix = [k(y1[i,:],y2[j,:])]; i, j: running. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + g : ndarray. + Gram matrix of y1 and y2. + + Examples + -------- + g = k.gram_matrix2(y1,y2) + + """ + + if self.name == 'RBF': + sigma = self.sigma + g = cdist(y1, y2) # alternative: g = cdist_large_dim(y1,y2) + g = exp(-g ** 2 / (2 * sigma ** 2)) + elif self.name == 'exponential': + sigma = self.sigma + g = cdist(y1, y2) # alternative: g = cdist_large_dim(y1,y2) + g = exp(-g / (2 * sigma ** 2)) + elif self.name == 'Cauchy': + sigma = self.sigma + g = cdist(y1, y2) # alternative: g = cdist_large_dim(y1,y2) + g = 1 / (1 + g ** 2 / sigma ** 2) + elif self.name == 'student': + d = self.d + g = cdist(y1, y2) # alternative: g = cdist_large_dim(y1,y2) + g = 1 / (1 + g ** d) + elif self.name == 'Matern3p2': + l = self.l + g = cdist(y1, y2) # alternative: g = cdist_large_dim(y1,y2) + g = (1 + sqrt(3) * g / l) * exp(-sqrt(3) * g / l) + elif self.name == 'Matern5p2': + l = self.l + g = cdist(y1, y2) # alternative: g = cdist_large_dim(y1,y2) + g = (1 + sqrt(5) * g / l + 5 * g ** 2 / (3 * l ** 2)) * \ + exp(-sqrt(5) * g / l) + elif self.name == 'polynomial': + c = self.c + exponent = self.exponent + g = (dot(y1, y2.T) + c) ** exponent + elif self.name == 'ratquadr': + c = self.c + # alternative: g = cdist_large_dim(y1,y2)**2 + g = cdist(y1, y2) ** 2 + g = 1 - g / (g + c) + elif self.name == 'invmquadr': + c = self.c + g = cdist(y1, y2) # alternative: g = cdist_large_dim(y1,y2) + g = 1 / sqrt(g ** 2 + c ** 2) + else: + raise Exception('kernel=?') + + return g + + def sum(self, y1, y2): + """ Compute \sum_i k(y1[i,:],y2[i,:]). + + Parameters + ---------- + y1 : (number of samples, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples, dimension)-ndarray + One row of y2 corresponds to one sample. There has to be the + same number of samples in y1 and y2. + + Returns + ------- + s : float + s = \sum_i k(y1[i,:],y2[i,:]). + + """ + + # verification: + if y1.shape[0] != y1.shape[0]: + raise Exception('There should be the same number of samples ' + 'in y1 and y2!') + + if self.name == 'RBF': + sigma = self.sigma + dist2 = sum((y1 - y2) ** 2, axis=1) + s = sum(exp(-dist2 / (2 * sigma ** 2))) + elif self.name == 'exponential': + sigma = self.sigma + dist = sqrt(sum((y1 - y2) ** 2, axis=1)) + s = sum(exp(-dist / (2 * sigma ** 2))) + elif self.name == 'Cauchy': + sigma = self.sigma + dist2 = sum((y1 - y2) ** 2, axis=1) + s = sum(1 / (1 + dist2 / sigma ** 2)) + elif self.name == 'student': + d = self.d + dist2 = sqrt(sum((y1 - y2) ** 2, axis=1)) + s = sum(1 / (1 + dist2 ** d)) + elif self.name == 'Matern3p2': + l = self.l + dist = sqrt(sum((y1 - y2) ** 2, axis=1)) + s = sum((1 + sqrt(3) * dist / l) * exp(-sqrt(3) * dist / l)) + elif self.name == 'Matern5p2': + l = self.l + dist = sqrt(sum((y1 - y2) ** 2, axis=1)) + s = sum((1 + sqrt(5) * dist / l + 5 * dist ** 2 / + (3 * l ** 2)) * exp(-sqrt(5) * dist / l)) + elif self.name == 'polynomial': + c = self.c + exponent = self.exponent + s = sum((sum(y1 * y2, axis=1) + c) ** exponent) + elif self.name == 'ratquadr': + c = self.c + dist2 = sum((y1 - y2) ** 2, axis=1) + s = sum(1 - dist2 / (dist2 + c)) + elif self.name == 'invmquadr': + c = self.c + dist2 = sum((y1 - y2) ** 2, axis=1) + s = sum(1 / sqrt(dist2 + c ** 2)) + else: + raise Exception('kernel=?') + + return s + + def gram_matrix_diagonal(self, y): + """ Diagonal of the Gram matrix: [k(y[i,:],y[i,:])]; i is running. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + + Returns + ------- + diag_g : num_of_samples-ndarray + Diagonal of the Gram matrix. + + """ + + num_of_samples = y.shape[0] + + if self.name == 'RBF' or\ + self.name == 'exponential' or\ + self.name == 'Cauchy' or\ + self.name == 'student' or\ + self.name == 'ratquadr' or\ + self.name == 'Matern3p2' or\ + self.name == 'Matern5p2': + diag_g = ones(num_of_samples, dtype='float') + elif self.name == 'polynomial': + diag_g = (sum(y**2, axis=1) + self.c)**self.exponent + elif self.name == 'invmquadr': + diag_g = ones(num_of_samples, dtype='float') / self.c + else: + raise Exception('kernel=?') + + return diag_g + + def ichol(self, y, tol): + """ Incomplete Cholesky decomposition defined by the data & kernel. + + If 'a' is the true Gram matrix: a \approx dot(g_hat, g_hat.T). + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + tol : float, > 0 + Tolerance parameter; smaller 'tol' means larger sized Gram + factor and better approximation. + + Returns + ------- + g_hat : (number_of_samples, smaller dimension)-ndarray + Incomplete Cholesky(/Gram) factor. + + Notes + ----- + Symmetric pivoting is used and the algorithms stops when the sum + of the remaining pivots is less than 'tol'. + + This function is a Python implementation for general kernels of + 'chol_gauss.m', 'chol_poly.m', 'chol_hermite.m' which were written + by Francis Bach for the TCA topic (see + "http://www.di.ens.fr/~fbach/tca/tca1_0.tar.gz"). + + References + ---------- + Francis R. Bach, Michael I. Jordan. Beyond independent components: + trees and clusters. Journal of Machine Learning Research, 4:1205- + 1233, 2003. + + """ + + num_of_samples = y.shape[0] + pvec = array(range(num_of_samples)) + g_diag = self.gram_matrix_diagonal(y) + # .copy(): so that if 'g_diag' changes 'k_diag' should not also do + # so: + k_diag = g_diag.copy() + # 'i' and 'jast' follow 'Matlab indexing'; the rest is adjusted + # accordingly: + + i = 1 + + while sum(g_diag[i-1:num_of_samples]) > tol: + # g update with a new zero column to the right: + if i == 1: + g = zeros((num_of_samples, 1)) + else: + g = hstack((g, zeros((num_of_samples, 1)))) + + if i > 1: + # argmax returns the index of the (first) max + jast = argmax(g_diag[i-1:num_of_samples]) + (i - 1) + 1 + pvec[i-1], pvec[jast-1] = pvec[jast-1], pvec[i-1] + # swap the 2 rows of g: + t = copy(g[i-1]) # broadcast + g[i-1] = g[jast-1] # broadcast + g[jast-1] = t # broadcast + else: + jast = 1 + + g[i-1, i-1] = sqrt(g_diag[jast-1]) + if i < num_of_samples: + # with broadcasting: + yq = y[pvec[i-1]] + newacol = \ + self.gram_matrix2(y[pvec[i:num_of_samples]], + yq[newaxis, :]).T + if i > 1: + g[i:num_of_samples, i-1] = \ + 1 / g[i-1, i-1] * \ + (newacol - dot(g[i:num_of_samples, 0:i-1], + g[i-1, 0:i-1].T)) + else: + g[i:num_of_samples, i-1] = 1 / g[i-1, i-1] * newacol + + if i < num_of_samples: + g_diag[i:num_of_samples] = \ + k_diag[pvec[i:num_of_samples]] \ + - sum(g[i:num_of_samples]**2, axis=1) # broadcast + + i += 1 + + # permute the rows of 'g' in accord with 'pvec': + pvec = argsort(pvec) + + return g[pvec] # broadcast diff --git a/ite-in-python/ite/cost/x_python_to_matlab.py b/ite-in-python/ite/cost/x_python_to_matlab.py new file mode 100644 index 0000000..344070b --- /dev/null +++ b/ite-in-python/ite/cost/x_python_to_matlab.py @@ -0,0 +1,206 @@ +""" Python ITE <-> Matlab ITE transitions (where it exists). + +Here we define dictionaries (Python -> Matlab), and their inversions. The +inversion means a 'key<->value' change, in other words given a cost type +(A/C/D/H/I/K/condH/condI) it provides the Matlab -> Python transitions. + +""" + + +def inverted_dict(dict1): + """" Performs key <-> value inversion in the dictionary + + Parameters + ---------- + dict1 : dict + + Returns + ------- + dict2 : dict + Dictionary with inverted key-values. + + Examples + -------- + dict1 = dict(a=1,b=2,c=3,d=4) + inverted_dict(dict1) # result in {1: 'a', 2: 'b', 3: 'c', 4: 'd'} + (up to possible permutation of the elements) + + """ + + dict2 = {v: k for k, v in dict1.items()} + + return dict2 + + +def merge_dicts(dict1, dict2): + """ Merge two dictionaries. + + Parameters + ---------- + dict1, dict2 : dict + + Returns + ------- + dict_merged : dict + Merged dictionaries. + + """ + + dict_merged = dict1.copy() + dict_merged.update(dict2) + + return dict_merged + +# ################# +# Python -> Matlab: +# ################# + +# unconditional quantities: +dict_base_a_PythonToMatlab = dict(BASpearman1="Spearman1", + BASpearman2="Spearman2", + BASpearman3="Spearman3", + BASpearman4="Spearman4", + BASpearmanCondLT="Spearman_lt", + BASpearmanCondUT="Spearman_ut", + BABlomqvist="Blomqvist") + +dict_meta_a_PythonToMatlab = dict(MASpearmanLT="Spearman_L", + MASpearmanUT="Spearman_U") + +dict_base_c_PythonToMatlab = dict(BCCE_KnnK="CE_kNN_k") +dict_meta_c_PythonToMatlab = dict() + +dict_base_d_PythonToMatlab \ + = dict(BDKL_KnnK="KL_kNN_k", + BDEnergyDist="EnergyDist", + BDBhattacharyya_KnnK="Bhattacharyya_kNN_k", + BDBregman_KnnK="Bregman_kNN_k", + BDChi2_KnnK="ChiSquare_kNN_k", + BDHellinger_KnnK="Hellinger_kNN_k", + BDKL_KnnKiTi="KL_kNN_kiTi", + BDL2_KnnK="L2_kNN_k", + BDRenyi_KnnK="Renyi_kNN_k", + BDTsallis_KnnK="Tsallis_kNN_k", + BDSharmaMittal_KnnK="SharmaM_kNN_k", + BDSymBregman_KnnK="symBregman_kNN_k", + BDMMD_UStat="MMD_Ustat", + BDMMD_VStat="MMD_Vstat", + BDMMD_Online="MMD_online", + BDMMD_UStat_IChol="MMD_Ustat_iChol", + BDMMD_VStat_IChol="MMD_Vstat_iChol") + +dict_meta_d_PythonToMatlab = dict(MDBlockMMD="BMMD_DMMD_Ustat", + MDEnergyDist_DMMD="EnergyDist_DMMD", + MDf_DChi2="f_DChiSquare", + MDJDist_DKL="Jdistance", + MDJR_HR="JensenRenyi_HRenyi", + MDJT_HT="JensenTsallis_HTsallis", + MDJS_HS="JensenShannon_HShannon", + MDK_DKL="K_DKL", + MDL_DKL="L_DKL", + MDSymBregman_DB="symBregman_DBregman", + MDKL_HSCE="KL_CCE_HShannon") + +dict_base_h_PythonToMatlab = dict(BHShannon_KnnK="Shannon_kNN_k", + BHShannon_SpacingV="Shannon_spacing_V", + BHRenyi_KnnK="Renyi_kNN_k", + BHTsallis_KnnK="Tsallis_kNN_k", + BHSharmaMittal_KnnK="SharmaM_kNN_k", + BHShannon_MaxEnt1="Shannon_MaxEnt1", + BHShannon_MaxEnt2="Shannon_MaxEnt2", + BHPhi_Spacing="Phi_spacing", + BHRenyi_KnnS="Renyi_kNN_S") + +dict_meta_h_PythonToMatlab = dict(MHShannon_DKLN="Shannon_DKL_N", + MHShannon_DKLU="Shannon_DKL_U", + MHTsallis_HR="Tsallis_HRenyi") + +dict_base_i_PythonToMatlab = dict(BIDistCov="dCov", + BIDistCorr="dCor", + BI3WayJoint="3way_joint", + BI3WayLancaster="3way_Lancaster", + BIHSIC_IChol="HSIC", + BIHoeffding="Hoeffding", + BIKGV="KGV", + BIKCCA="KCCA") + +dict_meta_i_PythonToMatlab = dict(MIShannon_DKL="Shannon_DKL", + MIChi2_DChi2="ChiSquare_DChiSquare", + MIL2_DL2="L2_DL2", + MIRenyi_DR="Renyi_DRenyi", + MITsallis_DT="Tsallis_DTsallis", + MIMMD_CopulaDMMD="MMD_DMMD", + MIRenyi_HR="Renyi_HRenyi", + MIShannon_HS="Shannon_HShannon", + MIDistCov_HSIC="dCov_IHSIC") + +dict_base_k_PythonToMatlab = dict(BKProbProd_KnnK="PP_kNN_k", + BKExpected="expected") + +dict_meta_k_PythonToMatlab = dict(MKExpJR1_HR="EJR1_HR", + MKExpJR2_DJR="EJR2_DJR", + MKExpJS_DJS="EJS_DJS", + MKExpJT1_HT="EJT1_HT", + MKExpJT2_DJT="EJT2_DJT", + MKJS_DJS="JS_DJS", + MKJT_HT="JT_HJT") + +# conditional quantities: +dict_base_h_cond_PythonToMatlab = dict() +dict_meta_h_cond_PythonToMatlab = \ + dict(BcondHShannon_HShannon="Shannon_HShannon") + +dict_base_i_cond_PythonToMatlab = dict() +dict_meta_i_cond_PythonToMatlab = \ + dict(BcondIShannon_HShannon="Shannon_HShannon") + +# ################################################## +# merge the dictionaries of 'base' and 'meta' names: +# ################################################## + +# unconditional quantities: +dict_A_PythonToMatlab = merge_dicts(dict_base_a_PythonToMatlab, + dict_meta_a_PythonToMatlab) +dict_C_PythonToMatlab = merge_dicts(dict_base_c_PythonToMatlab, + dict_meta_c_PythonToMatlab) +dict_D_PythonToMatlab = merge_dicts(dict_base_d_PythonToMatlab, + dict_meta_d_PythonToMatlab) +dict_H_PythonToMatlab = merge_dicts(dict_base_h_PythonToMatlab, + dict_meta_h_PythonToMatlab) +dict_I_PythonToMatlab = merge_dicts(dict_base_i_PythonToMatlab, + dict_meta_i_PythonToMatlab) +dict_K_PythonToMatlab = merge_dicts(dict_base_k_PythonToMatlab, + dict_meta_k_PythonToMatlab) + +# conditional ones: +dict_H_Cond_PythonToMatlab = merge_dicts(dict_base_h_cond_PythonToMatlab, + dict_meta_h_cond_PythonToMatlab) +dict_I_Cond_PythonToMatlab = merge_dicts(dict_base_i_cond_PythonToMatlab, + dict_meta_i_cond_PythonToMatlab) + +# ############################################## +# Matlab -> Python by inverted the dictionaries: +# ############################################## + +# unconditional quantities: +dict_A_MatlabToPython = inverted_dict(dict_A_PythonToMatlab) +dict_C_MatlabToPython = inverted_dict(dict_C_PythonToMatlab) +dict_D_MatlabToPython = inverted_dict(dict_D_PythonToMatlab) +dict_H_MatlabToPython = inverted_dict(dict_H_PythonToMatlab) +dict_I_MatlabToPython = inverted_dict(dict_I_PythonToMatlab) +dict_K_MatlabToPython = inverted_dict(dict_K_PythonToMatlab) + +# conditional quantities: +dict_H_Cond_MatlabToPython = inverted_dict(dict_H_Cond_PythonToMatlab) +dict_I_Cond_MatlabToPython = inverted_dict(dict_I_Cond_PythonToMatlab) + + +# Examples +# -------- +# Python -> Matlab: +# >>> dict_A_PythonToMatlab['BASpearman1'] +# => 'Spearman1' is the Matlab name of 'BASpearman1' +# +# Matlab -> Python, given a cost type (A): +# >>>dict_A_MatlabToPython['Spearman1'] +# => 'BASpearman1' is the Python name of the 'Spearman1' association diff --git a/ite-in-python/ite/cost/x_verification.py b/ite-in-python/ite/cost/x_verification.py new file mode 100644 index 0000000..0ead05e --- /dev/null +++ b/ite-in-python/ite/cost/x_verification.py @@ -0,0 +1,251 @@ +""" Verification and exception classes for estimators. + +In other words, for entropy / mutual information / divergence / cross +quantity / association / distribution kernel estimators. + +The verification classes are not called directly, but they are used by +inheritance: the cost objects get them as method(s) for checking before +estimation; for example in case of divergence measures whether the samples +(in y1 and y2) have the same dimension. Each verification class is +accompanied by an exception class (ExceptionX, classX); if the required +property is violated (classX) and exception (ExceptionX) is raised. + +""" + + +class ExceptionOneDSignal(Exception): + """ Exception for VerOneDSignal '""" + + def __str__(self): + return 'The samples must be one-dimensional for this estimator!' + + +class VerOneDSignal(object): + """ Verification class with 'one-dimensional signal' capability. """ + + def verification_one_d_signal(self, y): + """ Verify if y is one-dimensional. + + If this is not the case, an ExceptionOneDSignal exception is + raised. + + Examples + -------- + >>> from numpy.random import rand + >>> import ite + >>> Ver = ite.cost.x_verification.VerOneDSignal() # <-> 'simple co' + >>> y = rand(100,1) # 100 samples from an 1D random variable + >>> Ver.verification_one_d_signal(y) + + """ + + if (y.ndim != 2) or (y.shape[1] != 1): + raise ExceptionOneDSignal() + + +class ExceptionOneDSubspaces(Exception): + """ Exception for VerOneDSubspaces """ + + def __str__(self): + return 'The subspaces must be one-dimensional for this estimator!' + + +class VerOneDSubspaces(object): + """ Verification class with 'one-dimensional subspaces' capability. """ + + def verification_one_dimensional_subspaces(self, ds): + """ Verify if ds encodes one-dimensional subspaces. + + If this is not the case, an ExceptionOneDSubspaces exception is + raised. + + Examples + -------- + >>> from numpy import ones + >>> import ite + >>> Ver = ite.cost.x_verification.VerOneDSubspaces() # 'simple co' + >>> ds = ones(4) + >>> Ver.verification_one_dimensional_subspaces(ds) + + """ + + if not(all(ds == 1)): + raise ExceptionOneDSubspaces() + + +class ExceptionCompSubspaceDims(Exception): + """ Exception for VerCompSubspaceDims """ + + def __str__(self): + return 'The subspace dimensions are not compatible with y!' + + +class VerCompSubspaceDims(object): + """ Verification with 'compatible subspace dimensions' capability. + + """ + + def verification_compatible_subspace_dimensions(self, y, ds): + """ Verify if y and ds are compatible. + + If this is not the case, an ExceptionCompSubspaceDims exception is + raised. + + Examples + -------- + >>> from numpy import array + >>> from numpy.random import rand + >>> import ite + >>> Ver = ite.cost.x_verification.VerCompSubspaceDims() # simple co + >>> ds = array([2, 2]) # 2 pieces of 2-dimensional subspaces + >>> y = rand(100, 4) + >>> Ver.verification_compatible_subspace_dimensions(y, ds) + + """ + + if y.shape[1] != sum(ds): + raise ExceptionCompSubspaceDims() + + +class ExceptionSubspaceNumberIsK(Exception): + """ Exception for VerSubspaceNumberIsK """ + + def __init__(self, k): + self.k = k + + def __str__(self): + return 'The number of subspaces must be ' + str(self.k) + \ + ' for this estimator!' + + +class VerSubspaceNumberIsK(object): + """ Verification class with 'the # of subspaces is k' capability. """ + + def verification_subspace_number_is_k(self, ds, k): + """ Verify if the number of subspaces is k. + + If this is not the case, an ExceptionSubspaceNumberIsK exception is + raised. + + Examples + -------- + >>> from numpy import array + >>> from numpy.random import rand + >>> import ite + >>> Ver = ite.cost.x_verification.VerSubspaceNumberIsK() # 'co' + >>> ds = array([3, 3]) # 2 pieces of 3-dimensional subspaces + >>> y = rand(1000, 6) + >>> Ver.verification_subspace_number_is_k(ds, 2) + + """ + + if len(ds) != k: + raise ExceptionSubspaceNumberIsK(k) + + +class ExceptionEqualDSubspaces(Exception): + """ Exception for VerEqualDSubspaces """ + + def __str__(self): + return 'The dimension of the samples in y1 and y2 must be equal!' + + +class VerEqualDSubspaces(object): + """ Verification class with 'equal subspace dimensions' capability. """ + + def verification_equal_d_subspaces(self, y1, y2): + """ Verify if y1 and y2 have the same dimensionality. + + If this is not the case, an ExceptionEqualDSubspaces exception is + raised. + + Examples + -------- + >>> from numpy.random import rand + >>> import ite + >>> Ver = ite.cost.x_verification.VerEqualDSubspaces() # 'co' + >>> y1 = rand(100, 2) + >>> y2 = rand(200, 2) + >>> Ver.verification_equal_d_subspaces(y1, y2) + + """ + + d1, d2 = y1.shape[1], y2.shape[1] + + if d1 != d2: + raise ExceptionEqualDSubspaces() + + +class ExceptionEqualSampleNumbers(Exception): + """ Exception for VerEqualSampleNumbers """ + + def __str__(self): + return 'There must be equal number of samples in y1 and' + \ + ' y2 for this estimator!' + + +class VerEqualSampleNumbers(object): + """ Verification class with 'the # of samples is equal' capability. """ + + def verification_equal_sample_numbers(self, y1, y2): + """ Verify if y1 and y2 have the same dimensionality. + + If this is not the case, an ExceptionEqualDSubspaces exception is + raised. + + Examples + -------- + >>> from numpy.random import rand + >>> import ite + >>> Ver = ite.cost.x_verification.VerEqualSampleNumbers() # 'co' + >>> y1 = rand(100, 2) + >>> y2 = rand(100, 2) + >>> Ver.verification_equal_sample_numbers(y1, y2) + + """ + + num_of_samples1, num_of_samples2 = y1.shape[0], y2.shape[0] + + if num_of_samples1 != num_of_samples2: + raise ExceptionEqualSampleNumbers() + + +class ExceptionEvenSampleNumbers(Exception): + """ Exception for VerEvenSampleNumbers """ + + def __str__(self): + return 'The number of samples must be even for this' +\ + ' estimator!' + + +class VerEvenSampleNumbers(object): + """ Verification class with 'even sample numbers' capability. + + Assumption: y1.shape[0] = y2.shape[0]. (see class + 'VerEqualSampleNumbers' above) + + """ + + def verification_even_sample_numbers(self, y1): + """ + Examples + -------- + >>> from numpy.random import rand + >>> import ite + >>> Ver = ite.cost.x_verification.VerEvenSampleNumbers() # 'co' + >>> y1 = rand(100, 2) + >>> y2 = rand(100, 2) + >>> Ver.verification_even_sample_numbers(y1) + """ + + num_of_samples = y1.shape[0] # = y2.shape[0] by assumption + if num_of_samples % 2 != 0: # check if num_of_samples is even + raise ExceptionEvenSampleNumbers() + +# Template: +# class ExceptionX(Exception): +# """ Exception for X """ +# +# def __str__(self): +# return 'XY' +# diff --git a/ite-in-python/ite/shared.py b/ite-in-python/ite/shared.py new file mode 100644 index 0000000..ba669fc --- /dev/null +++ b/ite-in-python/ite/shared.py @@ -0,0 +1,775 @@ +from scipy.spatial import KDTree, cKDTree +from scipy.spatial.distance import pdist, squareform +# from scipy.spatial.distance cdist +from scipy.special import gamma +from scipy.linalg import eigh +from scipy.stats import rankdata + +# from scipy.special import gammaln # estimate_d_temp1/2/3 +from numpy.random import permutation, choice +from numpy import pi, cumsum, hstack, zeros, sum, ix_, mean, newaxis, \ + sqrt, dot, median, exp, min, floor, log, eye, absolute, \ + array, max, any, place, inf, isinf, where, diag +from scipy.linalg import det, inv + +# scipy.spatial.distance.cdist is slightly slow; you can obtain some +# speed-up in case of larger dimensions by using +# ite.shared.cdist_large_dim: +# from ite.shared import cdist_large_dim + + +def knn_distances(y, q, y_equals_to_q, knn_method='cKDTree', knn_k=3, + knn_eps=0, knn_p=2): + """ Compute the k-nearest neighbors (kNN-s) of Q in y. + + Parameters + ---------- + q : (number of samples in q, dimension)-ndarray + Query points. + y : (number of samples in y, dimension)-ndarray + Data from which the kNN-s are searched. + y_equals_to_q : boolean + 'True' if y is equal to q; otherwise it is 'False'. + knn_method : str, optional + kNN computation method; 'cKDTree' or 'KDTree'. (default + is 'cKDTree') + knn_k : int, >= 1, optional + kNN_k-nearest neighbors. If 'y_equals_to_q' = True, then + 'knn_k' + 1 <= 'num_of_samples in y'; otherwise 'knn_k' <= + 'num_of_samples in y'. (default is 3) + knn_eps : float, >= 0, optional + The kNN_k^th returned value is guaranteed to be no further + than (1+eps) times the distance to the real knn_k. (default + is 0, i.e. the exact kNN-s are computed) + knn_p : float, 1 <= p <= infinity, optional + Which Minkowski p-norm to use. (default is 2, i.e. Euclidean + norm is taken) + + Returns + ------- + distances : array of floats + The distances to the kNNs; size: 'number of samples in q' + x 'knn_k'. + indices : array of integers + indices[iq,ik] = distance of the iq^th point in q and the + ik^th NN in q (iq = 1,...,number of samples in q; ik = + 1,...,k); it has the same shape as 'distances'. + + """ + + if knn_method == 'cKDTree': + tree = cKDTree(y) + elif knn_method == 'KDTree': + tree = KDTree(y) + + if y_equals_to_q: + if knn_k+1 > y.shape[0]: + raise Exception("'knn_k' + 1 <= 'num_of_samples in y' " + + "is not satisfied!") + + # distances, indices: |q| x (knn_k+1): + distances, indices = tree.query(q, k=knn_k+1, eps=knn_eps, p=knn_p) + + # exclude the points themselves => distances, indices: |q| x knn_k: + distances, indices = distances[:, 1:], indices[:, 1:] + else: + if knn_k > y.shape[0]: + raise Exception("'knn_k' <= 'num_of_samples in y' " + + "is not satisfied!") + + # distances, indices: |q| x knn_k: + distances, indices = tree.query(q, k=knn_k, eps=knn_eps, p=knn_p) + + return distances, indices + + +def volume_of_the_unit_ball(d): + """ Volume of the d-dimensional unit ball. + + Parameters + ---------- + d : int + dimension. + + Returns + ------- + vol : float + volume. + + """ + + vol = pi**(d/2) / gamma(d/2+1) # = 2 * pi^(d/2) / ( d*gamma(d/2) ) + + return vol + + +def joint_and_product_of_the_marginals_split(z, ds): + """ Split to samples from the joint and the product of the marginals. + + Parameters + ---------- + z : (number of samples, dimension)-ndarray + Sample points. + ds : int vector + Dimension of the individual subspaces in z; ds[i] = i^th subspace + dimension. + + Returns + ------- + x : (number of samplesx, dimension)-ndarray + Samples from the joint. + y : (number of samplesy, dimension)-ndarray + Sample from the product of the marginals; it is independent of x. + + """ + + # verification (sum(ds) = z.shape[1]): + if sum(ds) != z.shape[1]: + raise Exception('sum(ds) must be equal to z.shape[1]; in other ' + + 'words the subspace dimensions do not sum to the' + + ' total dimension!') + + # 0,d_1,d_1+d_2,...,d_1+...+d_{M-1}; starting indices of the subspaces: + cum_ds = cumsum(hstack((0, ds[:-1]))) + + num_of_samples, dim = z.shape + num_of_samples2 = num_of_samples//2 # integer division + + # x, y: + x = z[:num_of_samples2, :] + y = zeros((num_of_samples2, dim)) # preallocation + for m in range(len(ds)): + idx = range(cum_ds[m], cum_ds[m] + ds[m]) + y[:, idx] = z[ix_(num_of_samples2 + permutation(num_of_samples2), + idx)] + + return x, y + + +def estimate_i_alpha(y, co): + """ Estimate i_alpha = \int p^{\alpha}(y)dy. + + The Renyi and Tsallis entropies are simple functions of this quantity. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + co : cost object; details below. + co.knn_method : str + kNN computation method; 'cKDTree' or 'KDTree'. + co.k : int, >= 1 + k-nearest neighbors. + co.eps : float, >= 0 + the k^th returned value is guaranteed to be no further than + (1+eps) times the distance to the real kNN. + co.alpha : float + alpha in the definition of i_alpha + + Returns + ------- + i_alpha : float + Estimated i_alpha value. + + Examples + -------- + i_alpha = estimate_i_alpha(y,co) + + """ + + num_of_samples, dim = y.shape + distances_yy = knn_distances(y, y, True, co.knn_method, co.k, co.eps, + 2)[0] + v = volume_of_the_unit_ball(dim) + + # Solution-1 (normal k): + c = (gamma(co.k)/gamma(co.k + 1 - co.alpha))**(1 / (1 - co.alpha)) + + # Solution-2 (if k is 'extreme large', say self.k=180 [ => + # gamma(self.k)=inf], then use this alternative form of + # 'c', after importing gammaln). Note: we used the + # 'gamma(a) / gamma(b) = exp(gammaln(a) - gammaln(b))' + # identity. + # c = exp(gammaln(co.k) - gammaln(co.k+1-co.alpha))**(1 / (1-co.alpha)) + + s = sum(distances_yy[:, co.k-1]**(dim * (1 - co.alpha))) + i_alpha = \ + (num_of_samples - 1) / num_of_samples * v**(1 - co.alpha) * \ + c**(1 - co.alpha) * s / (num_of_samples - 1)**co.alpha + + return i_alpha + + +def copula_transformation(y): + """ Compute the copula transformation of signal y. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + + Returns + ------- + z : (number of samples, dimension)-ndarray + Estimated copula transformed variable. + + Examples + -------- + z = copula_transformation(y) + + """ + + # rank transformation (z): + num_of_samples, dim = y.shape + z = zeros((num_of_samples, dim)) + for k in range(0, dim): + z[:, k] = rankdata(y[:, k]) + + return z / y.shape[0] + + +def estimate_d_temp1(y1, y2, co): + """ Estimate d_temp1 = \int p^{\alpha}(u)q^{1-\alpha}(u)du. + + For example, the Renyi and the Tsallis divergences are simple + functions of this quantity. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + co : cost object; details below. + co.knn_method : str + kNN computation method; 'cKDTree' or 'KDTree'. + co.k : int, >= 1 + k-nearest neighbors. + co.eps : float, >= 0 + the k^th returned value is guaranteed to be no further than + (1+eps) times the distance to the real kNN. + + Returns + ------- + d_temp2 : float + Estimated d_temp2 value. + + Examples + -------- + d_temp2 = estimate_d_temp2(y1,y2,co) + + """ + + # initialization: + num_of_samples1, dim1 = y1.shape + num_of_samples2, dim2 = y2.shape + + # verification: + if dim1 != dim2: + raise Exception('The dimension of the samples in y1 and y2 must' + + ' be equal!') + # k, knn_method, eps, dim (= dim1 = dim2): + k, knn_method, eps, alpha, dim = \ + co.k, co.knn_method, co.eps, co.alpha, dim1 + + # kNN distances: + dist_k_y1y1 = knn_distances(y1, y1, True, knn_method, k, eps, + 2)[0][:, -1] + dist_k_y2y1 = knn_distances(y2, y1, False, knn_method, k, eps, + 2)[0][:, -1] + + # b: + # Solution-I ('normal' k): + b = gamma(k)**2 / (gamma(k - alpha + 1) * gamma(k + alpha - 1)) + # Solution-II (if k is 'extreme large', say k=180 [=> gamma(k)=inf], + # then use this alternative form of 'b'; the identity + # used is gamma(a)^2 / (gamma(b) * gamma(c)) = + # = exp( 2 * gammaln(a) - gammaln(b) - gammaln(c) ) + # b = exp( 2 * gammaln(k) - gammaln(k - alpha + 1) - + # gammaln(k + alpha - 1)) + + d_temp1 = mean(((num_of_samples1 - 1) / num_of_samples2 * + (dist_k_y1y1 / dist_k_y2y1)**dim)**(1 - alpha)) * b + + return d_temp1 + + +def estimate_d_temp2(y1, y2, co): + """ Estimate d_temp2 = \int p^a(u)q^b(u)p(u)du. + + For example, the Hellinger distance and the Bhattacharyya distance are + simple functions of this quantity. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + co : cost object; details below. + co.knn_method : str + kNN computation method; 'cKDTree' or 'KDTree'. + co.k : int, >= 1 + k-nearest neighbors. + co.eps : float, >= 0 + the k^th returned value is guaranteed to be no further than + (1+eps) times the distance to the real kNN. + co._a : float + co._b : float + + Returns + ------- + d_temp2 : float + Estimated d_temp2 value. + + Examples + -------- + d_temp2 = estimate_d_temp2(y1,y2,co) + + """ + + # initialization: + num_of_samples1, dim1 = y1.shape + num_of_samples2, dim2 = y2.shape + + # verification: + if dim1 != dim2: + raise Exception('The dimension of the samples in y1 and y2 must' + + ' be equal!') + + # k, knn_method, eps, a, b, dim: + k, knn_method, eps, a, b, dim = \ + co.k, co.knn_method, co.eps, co._a, co._b, dim1 # =dim2 + + # kNN distances: + dist_k_y1y1 = knn_distances(y1, y1, True, knn_method, k, eps, + 2)[0][:, -1] + dist_k_y2y1 = knn_distances(y2, y1, False, knn_method, k, eps, + 2)[0][:, -1] + + # b2 computation: + c = volume_of_the_unit_ball(dim) + # Solution-I ('normal' k): + b2 = c**(-(a+b)) * gamma(k)**2 / (gamma(k-a) * gamma(k-b)) + # Solution-II (if k is 'extreme large', say k=180 [=> gamma(k)=inf], + # then use this alternative form of 'b2'; the identity + # used is gamma(a)^2 / (gamma(b) * gamma(c)) = + # = exp( 2 * gammaln(a) - gammaln(b) - gammaln(c) ) + # b2 = c**(-(a+b)) * exp( 2 * gammaln(k) - gammaln(k-a) -gammaln(k-b) ) + + # b2 -> d_temp2: + d_temp2 = \ + (num_of_samples1 - 1)**(-a) * num_of_samples2**(-b) * b2 *\ + mean(dist_k_y1y1**(-dim * a) * dist_k_y2y1**(-dim * b)) + + return d_temp2 + + +def estimate_d_temp3(y1, y2, co): + """ Estimate d_temp3 = \int p(u)q^{a-1}(u)du. + + For example, the Bregman distance can be computed based on this + quantity. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + co : cost object; details below. + co.knn_method : str + kNN computation method; 'cKDTree' or 'KDTree'. + co.k : int, >= 1 + k-nearest neighbors. + co.eps : float, >= 0 + the k^th returned value is guaranteed to be no further than + (1+eps) times the distance to the real kNN. + + Returns + ------- + d_temp3 : float + Estimated d_temp3 value. + + Examples + -------- + d_temp2 = estimate_d_temp2(y1,y2,co) + + """ + + # initialization: + num_of_samples1, dim1 = y1.shape + num_of_samples2, dim2 = y2.shape + + # verification: + if dim1 != dim2: + raise Exception('The dimension of the samples in y1 and y2 must' + + ' be equal!') + + dim, a, k, knn_method, eps = \ + dim1, co.alpha, co.k, co.knn_method, co.eps + + # kNN distances: + distances_y2y1 = knn_distances(y2, y1, False, knn_method, k, eps, 2)[0] + + # 'ca' computation: + v = volume_of_the_unit_ball(dim) + # Solution-I ('normal' k): + ca = gamma(k) / gamma(k + 1 - a) # C^a + # Solution-II (if k is 'extreme large', say k=180 [=> gamma(k)=inf], + # then use this alternative form of 'ca'; the identity + # used is gamma(a)^2 / (gamma(b) * gamma(c)) = + # = exp( 2 * gammaln(a) - gammaln(b) - gammaln(c) ) + # ca = exp(gammaln(k) - gammaln(k + 1 - a)) + + d_temp3 = \ + num_of_samples2**(1 - a) * ca * v**(1 - a) * \ + mean(distances_y2y1[:, co.k-1]**(dim * (1 - a))) + + return d_temp3 + + +def cdist_large_dim(y1, y2): + """ Pairwise Euclidean distance computation. + + Parameters + ---------- + y1 : (number of samples1, dimension)-ndarray + One row of y1 corresponds to one sample. + y2 : (number of samples2, dimension)-ndarray + One row of y2 corresponds to one sample. + + Returns + ------- + d : ndarray + (number of samples1) x (number of samples2)+sized distance matrix: + d[i,j] = euclidean_distance(y1[i,:],y2[j,:]). + + Notes + ----- + The function provides a faster pairwise distance computation method + than scipy.spatial.distance.cdist, if the dimension is 'large'. + + Examples + -------- + d = cdist_large_dim(y1,y2) + + """ + + d = sqrt(sum(y1**2, axis=1)[:, newaxis] + sum(y2**2, axis=1) + - 2 * dot(y1, y2.T)) + + return d + + +def compute_dcov_dcorr_statistics(y, alpha): + """ Compute the statistics to distance covariance/correlation. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + alpha : float + 0 < alpha < 2 + Returns + ------- + c : (number of samples, dimension)-ndarray + Computed statistics. + + """ + d = squareform(pdist(y))**alpha + ck = mean(d, axis=0) + c = d - ck - ck[:, newaxis] + mean(ck) + + return c + + +def median_heuristic(y): + """ Estimate RBF bandwith using median heuristic. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + + Returns + ------- + bandwidth : float + Estimated RBF bandwith. + + """ + + num_of_samples = y.shape[0] # number of samples + # if y contains more samples, then it is subsampled to this cardinality + num_of_samples_used = 100 + + # subsample y (if necessary; select '100' random y columns): + if num_of_samples > num_of_samples_used: + idx = choice(num_of_samples, num_of_samples_used, replace=False) + y = y[idx] # broadcasting + + dist_vector = pdist(y) # pairwise Euclidean distances + bandwith = median(dist_vector) / sqrt(2) + + return bandwith + + +def mixture_distribution(ys, w): + """ Sampling from mixture distribution. + + The samples are generated from the given samples of the individual + distributions and the mixing weights. + + Parameters + ---------- + ys : tuple of ndarrays + ys[i]: samples from i^th distribution, ys[i][j,:]: j^th sample + from the i^th distribution. Requirement: the samples (ys[i][j,:]) + have the same dimensions (for all i, j). + w : vector, w[i] > 0 (for all i), sum(w) = 1 + Mixing weights. Requirement: len(y) = len(w). + + """ + + # verification: + if sum(w) != 1: + raise Exception('sum(w) has to be 1!') + + if not(all(w > 0)): + raise Exception('The coordinates of w have to be positive!') + + if len(w) != len(ys): + raise Exception('len(w)=len(ys) has to hold!') + + # number of samples, dimensions: + num_of_samples_v = array([y.shape[0] for y in ys]) + dim_v = array([y.shape[1] for y in ys]) + if len(set(dim_v)) != 1: # test if all the dimensions are identical + raise Exception('All the distributions in ys need to have the ' + + 'same dimensionality!') + + # take the maximal number of samples (t) for which 't*w1<=t1, ..., + # t*wM<=tM', then tm:=floor(t*wm), i.e. compute the trimmed number of + # samples: + t = min(num_of_samples_v / w) + tw = tuple(int(e) for e in floor(t * w)) + + # mix ys[i]-s: + num_of_samples = sum(tw) + mixture = zeros((num_of_samples, dim_v[0])) + idx_start = 0 + for k in range(len(ys)): + tw_k = tw[k] + idx_stop = idx_start + tw_k + # trim the 'irrelevant' part, the result is added to the mixture: + mixture[idx_start:idx_stop] = ys[k][:tw_k] # broadcasting + + idx_start = idx_stop + + # permute the samples to obtain the mixture (the weights have been + # taken into account in the trimming part): + mixture = permutation(mixture) # permute along the first dimension + + return mixture + + +def compute_h2(ws, ms, ss): + """ Compute quadratic Renyi entropy for the mixture of Gaussians model. + + + Weights, means and standard deviations are given as input. + + Parameters + ---------- + ws : tuple of floats, ws[i] > 0 (for all i), sum(ws) = 1 + Weights. + ms : tuple of vectors. + Means: ms[i] = i^th mean. + ss : tuple of floats, ss[i] > 0 (for all i). + Standard deviations: ss[i] = i^th std. + Requirement: len(ws) = len(ms) = len(ss) + + Returns + ------- + h2 : float, + Computed quadratic Renyi entropy. + + """ + + # Verification: + if sum(ws) != 1: + raise Exception('sum(w) has to be 1!') + + if not(all(tuple(i > j for i, j in zip(ws, zeros(len(ws)))))): + raise Exception('The coordinates of w have to be positive!') + + if len(ws) != len(ms) or len(ws) != len(ss): + raise Exception('len(ws)=len(ms)=len(ss) has hold!') + + # initialization: + num_of_comps = len(ws) # number of componnents + id_mtx = eye(ms[0].size) # identity matrix + term = 0 + + # without -log(): + for n1 in range(num_of_comps): + for n2 in range(num_of_comps): + term += ws[n1] * ws[n2] *\ + normal_density_at_zero(ms[n1] - ms[n2], + (ss[n1]**2 + ss[n2]**2) * + id_mtx) + + h2 = -log(term) + + return h2 + + +def normal_density_at_zero(m, c): + """ Compute the normal density with given mean and covariance at zero. + + Parameters + ---------- + m : vector + Mean. + c : ndarray + Covariance matrix. Assumption: c is square matrix and its size is + compatible with that of m. + + Returns + ------- + g : float + Computed density value. + + """ + + dim = len(m) + g = 1 / ((2 * pi)**(dim / 2) * sqrt(absolute(det(c)))) *\ + exp(-1/2 * dot(dot(m, inv(c)), m)) + + return g + + +def replace_infs_with_max(m): + """ Replace the inf elements of matrix 'm' with its largest element. + + The 'largest' is selected from the non-inf entries. If 'm' does not + contain inf-s, then the output of the function equals to its input. + + Parameters + ---------- + m : (d1, d2)-ndarray + Matrix what we want to 'clean'. + + Returns + ------- + m : float + Original 'm' but its Inf elements replaced with the max non-Inf + entry. + + Examples + -------- + >>> from numpy import inf, array + >>> m = array([[0.0,1.0,inf], [3.0,inf,5.0]]) + >>> m = replace_infs_with_max(m) + inf elements: changed to the maximal non-inf one. + >>> print(m) + [[ 0. 1. 5.] + [ 3. 5. 5.]] + >>> m = array([[0.0,1.0,2.0], [3.0,4.0,5.0]]) + >>> m = replace_infs_with_max(m) + >>> print(m) + [[ 0. 1. 2.] + [ 3. 4. 5.]] + + """ + + if any(isinf(m)): + place(m, m == inf, -inf) # they will not be maximal + max_value = max(m) + place(m, m == -inf, max_value) + print('inf elements: changed to the maximal non-inf one.') + + return m + + +def compute_matrix_r_kcca_kgv(y, ds, kernel, tol, kappa): + """ Computation of the 'r' matrix of KCCA/KGV. + + KCCA is kernel canononical correlation analysis, KGV stands for kernel + generalized variance. + + This function is a Python implementation, and an extension for the + subspace case [ds(i)>=1] of 'contrast_tca_kgv.m' which was written by + Francis Bach for the TCA topic + (see "http://www.di.ens.fr/~fbach/tca/tca1_0.tar.gz"). + + References + ---------- + Francis R. Bach, Michael I. Jordan. Beyond independent components: + trees and clusters. Journal of Machine Learning Research, 4:1205-1233, + 2003. + + Parameters + ---------- + y : (number of samples, dimension)-ndarray + One row of y corresponds to one sample. + ds : int vector + Dimensions of the individual subspaces in y; ds[i] = i^th subspace + dimension. + kernel: Kernel. + See 'ite.cost.x_kernel.py' + tol: float, > 0 + Tolerance parameter; smaller 'tol' means larger-sized Gram factor + and better approximation. + kappa: float, >0 + Regularization parameter. + + """ + + # initialization: + num_of_samples = y.shape[0] + num_of_subspaces = len(ds) + # 0,d_1,d_1+d_2,...,d_1+...+d_{M-1}; starting indices of the subspaces: + cum_ds = cumsum(hstack((0, ds[:-1]))) + + sizes = zeros(num_of_subspaces, dtype='int') + us = list() + eigs_reg = list() # regularized eigenvalues + + for m in range(num_of_subspaces): + # centered g: + idx = range(cum_ds[m], cum_ds[m] + ds[m]) + g = kernel.ichol(y[:, idx], tol) + g = g - mean(g, axis=0) # center the Gram matrix: dot(g,g.T) + + # select the 'relevant' ('>= tol') eigenvalues (eigh => + # eigenvalues are real and are in increasing order), + # eigenvectors[:,i] = i^th eigenvector; + eigenvalues, eigenvectors = eigh(dot(g.T, g)) + relevant_indices = where(eigenvalues >= tol) + if relevant_indices[0].size == 0: # empty + relevant_indices = array([0]) + eigenvalues = eigenvalues[relevant_indices] + + # append: + r1 = eigenvectors[:, relevant_indices[0]] + r2 = diag(sqrt(1 / eigenvalues)) + us.append(dot(g, dot(r1, r2))) + eigs_reg.append(eigenvalues / (num_of_samples * kappa + + eigenvalues)) + sizes[m] = len(eigenvalues) + + # 'us', 'eigenvalues_regularized' -> 'rkappa': + rkappa = eye(sum(sizes)) + # 0,d_1,d_1+d_2,...,d_1+...+d_{M-1}; starting indices of the block: + cum_sizes = cumsum(hstack((0, sizes[:-1]))) + for i in range(1, num_of_subspaces): + for j in range(i): + newbottom = dot(dot(diag(eigs_reg[i]), dot(us[i].T, us[j])), + diag(eigs_reg[j])) + idx_i = range(cum_sizes[i], cum_sizes[i] + sizes[i]) + idx_j = range(cum_sizes[j], cum_sizes[j] + sizes[j]) + rkappa[ix_(idx_i, idx_j)] = newbottom + rkappa[ix_(idx_j, idx_i)] = newbottom.T + + return rkappa