_kernel_pca.py
13.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
"""Kernel Principal Components Analysis"""
# Author: Mathieu Blondel <mathieu@mblondel.org>
# License: BSD 3 clause
import numpy as np
from scipy import linalg
from scipy.sparse.linalg import eigsh
from ..utils import check_random_state
from ..utils.extmath import svd_flip
from ..utils.validation import check_is_fitted, _check_psd_eigenvalues
from ..exceptions import NotFittedError
from ..base import BaseEstimator, TransformerMixin
from ..preprocessing import KernelCenterer
from ..metrics.pairwise import pairwise_kernels
from ..utils.validation import _deprecate_positional_args
class KernelPCA(TransformerMixin, BaseEstimator):
"""Kernel Principal component analysis (KPCA)
Non-linear dimensionality reduction through the use of kernels (see
:ref:`metrics`).
Read more in the :ref:`User Guide <kernel_PCA>`.
Parameters
----------
n_components : int, default=None
Number of components. If None, all non-zero components are kept.
kernel : "linear" | "poly" | "rbf" | "sigmoid" | "cosine" | "precomputed"
Kernel. Default="linear".
gamma : float, default=1/n_features
Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other
kernels.
degree : int, default=3
Degree for poly kernels. Ignored by other kernels.
coef0 : float, default=1
Independent term in poly and sigmoid kernels.
Ignored by other kernels.
kernel_params : mapping of string to any, default=None
Parameters (keyword arguments) and values for kernel passed as
callable object. Ignored by other kernels.
alpha : int, default=1.0
Hyperparameter of the ridge regression that learns the
inverse transform (when fit_inverse_transform=True).
fit_inverse_transform : bool, default=False
Learn the inverse transform for non-precomputed kernels.
(i.e. learn to find the pre-image of a point)
eigen_solver : string ['auto'|'dense'|'arpack'], default='auto'
Select eigensolver to use. If n_components is much less than
the number of training samples, arpack may be more efficient
than the dense eigensolver.
tol : float, default=0
Convergence tolerance for arpack.
If 0, optimal value will be chosen by arpack.
max_iter : int, default=None
Maximum number of iterations for arpack.
If None, optimal value will be chosen by arpack.
remove_zero_eig : boolean, default=False
If True, then all components with zero eigenvalues are removed, so
that the number of components in the output may be < n_components
(and sometimes even zero due to numerical instability).
When n_components is None, this parameter is ignored and components
with zero eigenvalues are removed regardless.
random_state : int, RandomState instance, default=None
Used when ``eigen_solver`` == 'arpack'. Pass an int for reproducible
results across multiple function calls.
See :term:`Glossary <random_state>`.
.. versionadded:: 0.18
copy_X : boolean, default=True
If True, input X is copied and stored by the model in the `X_fit_`
attribute. If no further changes will be done to X, setting
`copy_X=False` saves memory by storing a reference.
.. versionadded:: 0.18
n_jobs : int or None, optional (default=None)
The number of parallel jobs to run.
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
for more details.
.. versionadded:: 0.18
Attributes
----------
lambdas_ : array, (n_components,)
Eigenvalues of the centered kernel matrix in decreasing order.
If `n_components` and `remove_zero_eig` are not set,
then all values are stored.
alphas_ : array, (n_samples, n_components)
Eigenvectors of the centered kernel matrix. If `n_components` and
`remove_zero_eig` are not set, then all components are stored.
dual_coef_ : array, (n_samples, n_features)
Inverse transform matrix. Only available when
``fit_inverse_transform`` is True.
X_transformed_fit_ : array, (n_samples, n_components)
Projection of the fitted data on the kernel principal components.
Only available when ``fit_inverse_transform`` is True.
X_fit_ : (n_samples, n_features)
The data used to fit the model. If `copy_X=False`, then `X_fit_` is
a reference. This attribute is used for the calls to transform.
Examples
--------
>>> from sklearn.datasets import load_digits
>>> from sklearn.decomposition import KernelPCA
>>> X, _ = load_digits(return_X_y=True)
>>> transformer = KernelPCA(n_components=7, kernel='linear')
>>> X_transformed = transformer.fit_transform(X)
>>> X_transformed.shape
(1797, 7)
References
----------
Kernel PCA was introduced in:
Bernhard Schoelkopf, Alexander J. Smola,
and Klaus-Robert Mueller. 1999. Kernel principal
component analysis. In Advances in kernel methods,
MIT Press, Cambridge, MA, USA 327-352.
"""
@_deprecate_positional_args
def __init__(self, n_components=None, *, kernel="linear",
gamma=None, degree=3, coef0=1, kernel_params=None,
alpha=1.0, fit_inverse_transform=False, eigen_solver='auto',
tol=0, max_iter=None, remove_zero_eig=False,
random_state=None, copy_X=True, n_jobs=None):
if fit_inverse_transform and kernel == 'precomputed':
raise ValueError(
"Cannot fit_inverse_transform with a precomputed kernel.")
self.n_components = n_components
self.kernel = kernel
self.kernel_params = kernel_params
self.gamma = gamma
self.degree = degree
self.coef0 = coef0
self.alpha = alpha
self.fit_inverse_transform = fit_inverse_transform
self.eigen_solver = eigen_solver
self.remove_zero_eig = remove_zero_eig
self.tol = tol
self.max_iter = max_iter
self.random_state = random_state
self.n_jobs = n_jobs
self.copy_X = copy_X
@property
def _pairwise(self):
return self.kernel == "precomputed"
def _get_kernel(self, X, Y=None):
if callable(self.kernel):
params = self.kernel_params or {}
else:
params = {"gamma": self.gamma,
"degree": self.degree,
"coef0": self.coef0}
return pairwise_kernels(X, Y, metric=self.kernel,
filter_params=True, n_jobs=self.n_jobs,
**params)
def _fit_transform(self, K):
""" Fit's using kernel K"""
# center kernel
K = self._centerer.fit_transform(K)
if self.n_components is None:
n_components = K.shape[0]
else:
n_components = min(K.shape[0], self.n_components)
# compute eigenvectors
if self.eigen_solver == 'auto':
if K.shape[0] > 200 and n_components < 10:
eigen_solver = 'arpack'
else:
eigen_solver = 'dense'
else:
eigen_solver = self.eigen_solver
if eigen_solver == 'dense':
self.lambdas_, self.alphas_ = linalg.eigh(
K, eigvals=(K.shape[0] - n_components, K.shape[0] - 1))
elif eigen_solver == 'arpack':
random_state = check_random_state(self.random_state)
# initialize with [-1,1] as in ARPACK
v0 = random_state.uniform(-1, 1, K.shape[0])
self.lambdas_, self.alphas_ = eigsh(K, n_components,
which="LA",
tol=self.tol,
maxiter=self.max_iter,
v0=v0)
# make sure that the eigenvalues are ok and fix numerical issues
self.lambdas_ = _check_psd_eigenvalues(self.lambdas_,
enable_warnings=False)
# flip eigenvectors' sign to enforce deterministic output
self.alphas_, _ = svd_flip(self.alphas_,
np.zeros_like(self.alphas_).T)
# sort eigenvectors in descending order
indices = self.lambdas_.argsort()[::-1]
self.lambdas_ = self.lambdas_[indices]
self.alphas_ = self.alphas_[:, indices]
# remove eigenvectors with a zero eigenvalue (null space) if required
if self.remove_zero_eig or self.n_components is None:
self.alphas_ = self.alphas_[:, self.lambdas_ > 0]
self.lambdas_ = self.lambdas_[self.lambdas_ > 0]
# Maintenance note on Eigenvectors normalization
# ----------------------------------------------
# there is a link between
# the eigenvectors of K=Phi(X)'Phi(X) and the ones of Phi(X)Phi(X)'
# if v is an eigenvector of K
# then Phi(X)v is an eigenvector of Phi(X)Phi(X)'
# if u is an eigenvector of Phi(X)Phi(X)'
# then Phi(X)'u is an eigenvector of Phi(X)'Phi(X)
#
# At this stage our self.alphas_ (the v) have norm 1, we need to scale
# them so that eigenvectors in kernel feature space (the u) have norm=1
# instead
#
# We COULD scale them here:
# self.alphas_ = self.alphas_ / np.sqrt(self.lambdas_)
#
# But choose to perform that LATER when needed, in `fit()` and in
# `transform()`.
return K
def _fit_inverse_transform(self, X_transformed, X):
if hasattr(X, "tocsr"):
raise NotImplementedError("Inverse transform not implemented for "
"sparse matrices!")
n_samples = X_transformed.shape[0]
K = self._get_kernel(X_transformed)
K.flat[::n_samples + 1] += self.alpha
self.dual_coef_ = linalg.solve(K, X, sym_pos=True, overwrite_a=True)
self.X_transformed_fit_ = X_transformed
def fit(self, X, y=None):
"""Fit the model from data in X.
Parameters
----------
X : array-like, shape (n_samples, n_features)
Training vector, where n_samples in the number of samples
and n_features is the number of features.
Returns
-------
self : object
Returns the instance itself.
"""
X = self._validate_data(X, accept_sparse='csr', copy=self.copy_X)
self._centerer = KernelCenterer()
K = self._get_kernel(X)
self._fit_transform(K)
if self.fit_inverse_transform:
# no need to use the kernel to transform X, use shortcut expression
X_transformed = self.alphas_ * np.sqrt(self.lambdas_)
self._fit_inverse_transform(X_transformed, X)
self.X_fit_ = X
return self
def fit_transform(self, X, y=None, **params):
"""Fit the model from data in X and transform X.
Parameters
----------
X : array-like, shape (n_samples, n_features)
Training vector, where n_samples in the number of samples
and n_features is the number of features.
Returns
-------
X_new : array-like, shape (n_samples, n_components)
"""
self.fit(X, **params)
# no need to use the kernel to transform X, use shortcut expression
X_transformed = self.alphas_ * np.sqrt(self.lambdas_)
if self.fit_inverse_transform:
self._fit_inverse_transform(X_transformed, X)
return X_transformed
def transform(self, X):
"""Transform X.
Parameters
----------
X : array-like, shape (n_samples, n_features)
Returns
-------
X_new : array-like, shape (n_samples, n_components)
"""
check_is_fitted(self)
# Compute centered gram matrix between X and training data X_fit_
K = self._centerer.transform(self._get_kernel(X, self.X_fit_))
# scale eigenvectors (properly account for null-space for dot product)
non_zeros = np.flatnonzero(self.lambdas_)
scaled_alphas = np.zeros_like(self.alphas_)
scaled_alphas[:, non_zeros] = (self.alphas_[:, non_zeros]
/ np.sqrt(self.lambdas_[non_zeros]))
# Project with a scalar product between K and the scaled eigenvectors
return np.dot(K, scaled_alphas)
def inverse_transform(self, X):
"""Transform X back to original space.
Parameters
----------
X : array-like, shape (n_samples, n_components)
Returns
-------
X_new : array-like, shape (n_samples, n_features)
References
----------
"Learning to Find Pre-Images", G BakIr et al, 2004.
"""
if not self.fit_inverse_transform:
raise NotFittedError("The fit_inverse_transform parameter was not"
" set to True when instantiating and hence "
"the inverse transform is not available.")
K = self._get_kernel(X, self.X_transformed_fit_)
n_samples = self.X_transformed_fit_.shape[0]
K.flat[::n_samples + 1] += self.alpha
return np.dot(K, self.dual_coef_)