-
Notifications
You must be signed in to change notification settings - Fork 42
/
multiprocessing_example.py
113 lines (90 loc) · 3.46 KB
/
multiprocessing_example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
"""
Hyperactive can perform optimizations of multiple different objective functions
in parallel. This can be done via multiprocessing, joblib or a custom wrapper-function.
The processes won't communicate with each other.
You can add as many searches to the optimization run (.add_search(...)) and
run each of those searches n-times (n_jobs).
In the example below we are performing 4 searches in parallel:
- model_etc one time
- model_rfc one time
- model_gbc two times
"""
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from xgboost import XGBClassifier
from sklearn.datasets import load_breast_cancer
from hyperactive import Hyperactive
data = load_breast_cancer()
X, y = data.data, data.target
def model_etc(opt):
etc = ExtraTreesClassifier(
n_estimators=opt["n_estimators"],
criterion=opt["criterion"],
max_features=opt["max_features"],
min_samples_split=opt["min_samples_split"],
min_samples_leaf=opt["min_samples_leaf"],
bootstrap=opt["bootstrap"],
)
scores = cross_val_score(etc, X, y, cv=3)
return scores.mean()
def model_rfc(opt):
rfc = RandomForestClassifier(
n_estimators=opt["n_estimators"],
criterion=opt["criterion"],
max_features=opt["max_features"],
min_samples_split=opt["min_samples_split"],
min_samples_leaf=opt["min_samples_leaf"],
bootstrap=opt["bootstrap"],
)
scores = cross_val_score(rfc, X, y, cv=3)
return scores.mean()
def model_gbc(opt):
gbc = GradientBoostingClassifier(
n_estimators=opt["n_estimators"],
learning_rate=opt["learning_rate"],
max_depth=opt["max_depth"],
min_samples_split=opt["min_samples_split"],
min_samples_leaf=opt["min_samples_leaf"],
subsample=opt["subsample"],
max_features=opt["max_features"],
)
scores = cross_val_score(gbc, X, y, cv=3)
return scores.mean()
search_space_etc = {
"n_estimators": list(range(10, 200, 10)),
"criterion": ["gini", "entropy"],
"max_features": list(np.arange(0.05, 1.01, 0.05)),
"min_samples_split": list(range(2, 21)),
"min_samples_leaf": list(range(1, 21)),
"bootstrap": [True, False],
}
search_space_rfc = {
"n_estimators": list(range(10, 200, 10)),
"criterion": ["gini", "entropy"],
"max_features": list(np.arange(0.05, 1.01, 0.05)),
"min_samples_split": list(range(2, 21)),
"min_samples_leaf": list(range(1, 21)),
"bootstrap": [True, False],
}
search_space_gbc = {
"n_estimators": list(range(10, 200, 10)),
"learning_rate": [1e-3, 1e-2, 1e-1, 0.5, 1.0],
"max_depth": list(range(1, 11)),
"min_samples_split": list(range(2, 21)),
"min_samples_leaf": list(range(1, 21)),
"subsample": list(np.arange(0.05, 1.01, 0.05)),
"max_features": list(np.arange(0.05, 1.01, 0.05)),
}
hyper = Hyperactive()
hyper.add_search(model_etc, search_space_etc, n_iter=50)
hyper.add_search(model_rfc, search_space_rfc, n_iter=50)
hyper.add_search(model_gbc, search_space_gbc, n_iter=50, n_jobs=2)
hyper.run(max_time=5)
search_data_etc = hyper.search_data(model_etc)
search_data_rfc = hyper.search_data(model_rfc)
search_data_gbc = hyper.search_data(model_gbc)
print("\n ExtraTreesClassifier search data \n", search_data_etc)
print("\n GradientBoostingClassifier search data \n", search_data_gbc)