_mocking.py
4.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import numpy as np
from ..base import BaseEstimator, ClassifierMixin
from .validation import _num_samples, check_array
class ArraySlicingWrapper:
"""
Parameters
----------
array
"""
def __init__(self, array):
self.array = array
def __getitem__(self, aslice):
return MockDataFrame(self.array[aslice])
class MockDataFrame:
"""
Parameters
----------
array
"""
# have shape and length but don't support indexing.
def __init__(self, array):
self.array = array
self.values = array
self.shape = array.shape
self.ndim = array.ndim
# ugly hack to make iloc work.
self.iloc = ArraySlicingWrapper(array)
def __len__(self):
return len(self.array)
def __array__(self, dtype=None):
# Pandas data frames also are array-like: we want to make sure that
# input validation in cross-validation does not try to call that
# method.
return self.array
def __eq__(self, other):
return MockDataFrame(self.array == other.array)
def __ne__(self, other):
return not self == other
class CheckingClassifier(ClassifierMixin, BaseEstimator):
"""Dummy classifier to test pipelining and meta-estimators.
Checks some property of X and y in fit / predict.
This allows testing whether pipelines / cross-validation or metaestimators
changed the input.
Parameters
----------
check_y
check_X
foo_param
expected_fit_params
Attributes
----------
classes_
"""
def __init__(self, check_y=None, check_X=None, foo_param=0,
expected_fit_params=None):
self.check_y = check_y
self.check_X = check_X
self.foo_param = foo_param
self.expected_fit_params = expected_fit_params
def fit(self, X, y, **fit_params):
"""
Fit classifier
Parameters
----------
X : array-like of shape (n_samples, n_features)
Training vector, where n_samples is the number of samples and
n_features is the number of features.
y : array-like of shape (n_samples, n_output) or (n_samples,), optional
Target relative to X for classification or regression;
None for unsupervised learning.
**fit_params : dict of string -> object
Parameters passed to the ``fit`` method of the estimator
"""
assert len(X) == len(y)
if self.check_X is not None:
assert self.check_X(X)
if self.check_y is not None:
assert self.check_y(y)
self.n_features_in_ = len(X)
self.classes_ = np.unique(check_array(y, ensure_2d=False,
allow_nd=True))
if self.expected_fit_params:
missing = set(self.expected_fit_params) - set(fit_params)
assert len(missing) == 0, 'Expected fit parameter(s) %s not ' \
'seen.' % list(missing)
for key, value in fit_params.items():
assert len(value) == len(X), (
'Fit parameter %s has length %d; '
'expected %d.'
% (key, len(value), len(X)))
return self
def predict(self, T):
"""
Parameters
----------
T : indexable, length n_samples
"""
if self.check_X is not None:
assert self.check_X(T)
return self.classes_[np.zeros(_num_samples(T), dtype=np.int)]
def score(self, X=None, Y=None):
"""
Parameters
----------
X : array-like of shape (n_samples, n_features)
Input data, where n_samples is the number of samples and
n_features is the number of features.
Y : array-like of shape (n_samples, n_output) or (n_samples,), optional
Target relative to X for classification or regression;
None for unsupervised learning.
"""
if self.foo_param > 1:
score = 1.
else:
score = 0.
return score
def _more_tags(self):
return {'_skip_test': True, 'X_types': ['1dlabel']}
class NoSampleWeightWrapper(BaseEstimator):
"""Wrap estimator which will not expose `sample_weight`.
Parameters
----------
est : estimator, default=None
The estimator to wrap.
"""
def __init__(self, est=None):
self.est = est
def fit(self, X, y):
return self.est.fit(X, y)
def predict(self, X):
return self.est.predict(X)
def predict_proba(self, X):
return self.est.predict_proba(X)
def _more_tags(self):
return {'_skip_test': True}