与Matlab代码类似,我们也可以用Python对TCA进行实现,其主要依赖于Numpy和Scipy两个强大的科学计算库。Python版本的TCA代码如下:
import numpy as np
import scipy.io
import scipy.linalg
import sklearn.metrics
from sklearn.neighbors import KNeighborsClassifier
def kernel(ker, X1, X2, gamma):
K = None
if not ker or ker == 'primal':
K = X1
elif ker == 'linear':
if X2 is not None:
K = sklearn.metrics.pairwise.linear_kernel(np.asarray(X1).T, np.asarray(X2).T)
else:
K = sklearn.metrics.pairwise.linear_kernel(np.asarray(X1).T)
elif ker == 'rbf':
if X2 is not None:
K = sklearn.metrics.pairwise.rbf_kernel(np.asarray(X1).T, np.asarray(X2).T, gamma)
else:
K = sklearn.metrics.pairwise.rbf_kernel(np.asarray(X1).T, None, gamma)
return K
class TCA:
def __init__(self, kernel_type='primal', dim=30, lamb=1, gamma=1):
'''
Init func
:param kernel_type: kernel, values: 'primal' | 'linear' | 'rbf'
:param dim: dimension after transfer
:param lamb: lambda value in equation
:param gamma: kernel bandwidth for rbf kernel
'''
self.kernel_type = kernel_type
self.dim = dim
self.lamb = lamb
self.gamma = gamma
def fit(self, Xs, Xt):
'''
Transform Xs and Xt
:param Xs: ns * n_feature, source feature
:param Xt: nt * n_feature, target feature
:return: Xs_new and Xt_new after TCA
'''
X = np.hstack((Xs.T, Xt.T))
X /= np.linalg.norm(X, axis=0)
m, n = X.shape
ns, nt = len(Xs), len(Xt)
e = np.vstack((1 / ns * np.ones((ns, 1)), -1 / nt * np.ones((nt, 1))))
M = e * e.T
M = M / np.linalg.norm(M, 'fro')
H = np.eye(n) - 1 / n * np.ones((n, n))
K = kernel(self.kernel_type, X, None, gamma=self.gamma)
n_eye = m if self.kernel_type == 'primal' else n
a, b = np.linalg.multi_dot([K, M, K.T]) + self.lamb * np.eye(n_eye), np.linalg.multi_dot([K, H, K.T])
w, V = scipy.linalg.eig(a, b)
ind = np.argsort(w)
A = V[:, ind[:self.dim]]
Z = np.dot(A.T, K)
Z /= np.linalg.norm(Z, axis=0)
Xs_new, Xt_new = Z[:, :ns].T, Z[:, ns:].T
return Xs_new, Xt_new
def fit_predict(self, Xs, Ys, Xt, Yt):
'''
Transform Xs and Xt, then make predictions on target using 1NN
:param Xs: ns * n_feature, source feature
:param Ys: ns * 1, source label
:param Xt: nt * n_feature, target feature
:param Yt: nt * 1, target label
:return: Accuracy and predicted_labels on the target domain
'''
Xs_new, Xt_new = self.fit(Xs, Xt)
clf = KNeighborsClassifier(n_neighbors=1)
clf.fit(Xs_new, Ys.ravel())
y_pred = clf.predict(Xt_new)
acc = sklearn.metrics.accuracy_score(Yt, y_pred)
return acc, y_pred
if __name__ == '__main__':
domains = ['caltech.mat', 'amazon.mat', 'webcam.mat', 'dslr.mat']
for i in [2]:
for j in [3]:
if i != j:
src, tar = 'data/' + domains[i], 'data/' + domains[j]
src_domain, tar_domain = scipy.io.loadmat(src), scipy.io.loadmat(tar)
Xs, Ys, Xt, Yt = src_domain['feas'], src_domain['label'], tar_domain['feas'], tar_domain['label']
tca = TCA(kernel_type='linear', dim=30, lamb=1, gamma=1)
acc, ypre = tca.fit_predict(Xs, Ys, Xt, Yt)
print(acc)
通过以上过程,我们分别使用Matlab代码和Python代码对经典的TCA方法进行了实验,完成了一个迁移学习任务。其他的非深度迁移学习方法,均可以参考上面的过程。值得庆幸的是,许多论文的作者都公布了他们的文章代码,以方便我们进行接下来的研究。读者可以从Github或者相关作者的网站上获取其他许多方法的代码。