-
Notifications
You must be signed in to change notification settings - Fork 3
/
test_searchgrid.py
113 lines (96 loc) · 4.03 KB
/
test_searchgrid.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import pytest
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.pipeline import make_pipeline as skl_make_pipeline
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.feature_selection import SelectKBest, SelectPercentile
from sklearn.datasets import load_iris
from searchgrid import set_grid, build_param_grid, make_grid_search
from searchgrid import make_pipeline, make_union
@pytest.mark.parametrize(('estimator', 'param_grid'), [
(set_grid(SVC(), C=[1, 2]),
{'C': [1, 2]}),
(set_grid(SVC(), C=[1, 2], gamma=[1, 2]),
{'C': [1, 2], 'gamma': [1, 2]}),
(skl_make_pipeline(set_grid(SVC(), C=[1, 2], gamma=[1, 2])),
{'svc__C': [1, 2], 'svc__gamma': [1, 2]}),
])
def test_build_param_grid(estimator, param_grid):
assert build_param_grid(estimator) == param_grid
def test_build_param_grid_set_estimator():
clf1 = SVC()
clf2 = LogisticRegression()
clf3 = SVC()
clf4 = SGDClassifier()
estimator = set_grid(Pipeline([('sel', set_grid(SelectKBest(), k=[2, 3])),
('clf', None)]),
clf=[set_grid(clf1, kernel=['linear']),
clf2,
set_grid(clf3, kernel=['poly'], degree=[2, 3]),
clf4])
param_grid = [{'clf': [clf1], 'clf__kernel': ['linear'], 'sel__k': [2, 3]},
{'clf': [clf3], 'clf__kernel': ['poly'],
'clf__degree': [2, 3], 'sel__k': [2, 3]},
{'clf': [clf2, clf4], 'sel__k': [2, 3]}]
assert build_param_grid(estimator) == param_grid
def test_step_estimator_grid_not_shared():
# Fix for issue #10
lr = set_grid(LogisticRegression(), C=[1, 2, 3])
svc = SVC()
grid = build_param_grid(set_grid(Pipeline([('root', lr)]), root=[lr, svc]))
assert len(grid) == 2
assert lr in grid[0]['root']
assert svc not in grid[0]['root']
assert 'root__C' in grid[0]
assert svc in grid[1]['root']
assert lr not in grid[1]['root']
assert 'root__C' not in grid[1]
def test_make_grid_search():
X, y = load_iris(return_X_y=True)
lr = LogisticRegression()
svc = set_grid(SVC(), kernel=['poly'], degree=[2, 3])
gs1 = make_grid_search(lr, cv=5) # empty grid
gs2 = make_grid_search(svc, cv=5)
gs3 = make_grid_search([lr, svc], cv=5)
for gs, n_results in [(gs1, 1), (gs2, 2), (gs3, 3)]:
gs.fit(X, y)
assert gs.cv == 5
assert len(gs.cv_results_['params']) == n_results
svc_mask = gs3.cv_results_['param_root'] == svc
assert svc_mask.sum() == 2
assert gs3.cv_results_['param_root__degree'][svc_mask].tolist() == [2, 3]
assert gs3.cv_results_['param_root'][~svc_mask].tolist() == [lr]
def test_make_pipeline():
t1 = SelectKBest()
t2 = SelectKBest()
t3 = SelectKBest()
t4 = SelectKBest()
t5 = SelectPercentile()
t6 = SelectKBest()
t7 = SelectKBest()
t8 = SelectKBest()
t9 = SelectPercentile()
in_steps = [[t1, None],
[t2, t3],
[t4, t5], # mixed
t6,
[None, t7],
[t8, None, t9], # mixed
None]
pipe = make_pipeline(*in_steps, memory='/path/to/nowhere')
union = make_union(*in_steps)
for est, est_steps in [(pipe, pipe.steps),
(union, union.transformer_list)]:
names, steps = zip(*est_steps)
assert names == ('selectkbest-1', 'selectkbest-2', 'alt-1',
'selectkbest-3', 'selectkbest-4', 'alt-2', 'nonetype')
assert steps == (t1, t2, t4, t6, None, t8, None)
assert len(est._param_grid) == 5
assert est._param_grid[names[0]] == [t1, None]
assert est._param_grid[names[1]] == [t2, t3]
assert est._param_grid[names[2]] == [t4, t5]
assert est._param_grid[names[4]] == [None, t7]
assert est._param_grid[names[5]] == [t8, None, t9]
assert type(pipe) is Pipeline
assert type(union) is FeatureUnion
assert pipe.memory == '/path/to/nowhere'