More Related Content Similar to Hyperparameter optimization landscape (20) Hyperparameter optimization landscape12. SPACE = [skopt.space.Real(0.01, 0.5, name='learning_rate', prior='log-uniform'),
skopt.space.Integer(1, 30, name='max_depth'),
skopt.space.Integer(2, 100, name='num_leaves'),
skopt.space.Integer(10, 1000, name='min_data_in_leaf'),
skopt.space.Real(0.1, 1.0, name='feature_fraction', prior='uniform'),
skopt.space.Real(0.1, 1.0, name='subsample', prior='uniform'),
]
34. def objective(trial):
params = OrderedDict([
('learning_rate',trial.suggest_loguniform('learning_rate', 0.01, 0.5)),
('max_depth',trial.suggest_int('max_depth', 1, 30)),
('num_leaves',trial.suggest_int('num_leaves', 2, 100)),
('min_data_in_leaf',trial.suggest_int('min_data_in_leaf', 10, 1000)),
('feature_fraction',trial.suggest_uniform('feature_fraction', 0.1, 1.0)),
('subsample',trial.suggest_uniform('subsample', 0.1, 1.0))])
score = -1.0 * train_evaluate(X, y, params)
return score
36. def objective(trial):
classifier_name = trial.suggest_categorical('classifier', ['SVC', 'RandomForest'])
if classifier_name == 'SVC':
svc_c = trial.suggest_loguniform('svc_c', 1e-10, 1e10)
classifier_obj = sklearn.svm.SVC(C=svc_c)
else:
rf_max_depth = int(trial.suggest_loguniform('rf_max_depth', 2, 32))
classifier_obj = sklearn.ensemble.RandomForestClassifier(max_depth=rf_max_depth)
…
39. from optuna.integration import LightGBMPruningCallback
def objective(trial):
params = OrderedDict([
('max_depth',trial.suggest_int('max_depth', 1, 30)),
('num_leaves',trial.suggest_int('num_leaves', 2, 100))])
pruning_callback = LightGBMPruningCallback(trial, 'auc')
score = -1.0 * train_evaluate_with_pruning(X, y, params, pruning_callback)
return score
def train_evaluate_with_pruning(X, y, params, callback):
...
model = lgb.train(params, train_data, ... , callbacks = [pruning_callback])
return model.best_score['valid']['auc']
40. def monitor(score):
neptune.send_metric('run_score', score)
def objective(trial):
params = OrderedDict([
('max_depth',trial.suggest_int('max_depth', 1, 30)),
('num_leaves',trial.suggest_int('num_leaves', 2, 100))])
score = -1.0 * train_evaluate(X, y, params)
monitor(score)
return score
45. …
study = optuna.Study(study_name='distributed-search', storage='sqlite:///example.db')
study.optimize(objective, n_trials=100)
...
$ optuna create-study --study-name "distributed-search" --storage "sqlite:///example.db"
$ python optuna_search.py
$ python optuna_search.py
terminal 1
terminal 2
terminal 3
optuna_search.py
58. from hpbandster.core.worker import Worker
class TrainEvalWorker(Worker):
...
def compute(self, config, budget, working_directory, *args, **kwargs):
loss = -1.0 * train_evaluate(self.X, self.y, params)
return ({'loss': loss,
'info': { 'auxiliary_stuff': 'worked'
}
})
60. class TrainEvalWorker(Worker):
...
@staticmethod
def get_configspace():
cs = CS.ConfigurationSpace()
learning_rate = CSH.UniformFloatHyperparameter('learning_rate',
lower=0.01, upper=0.5, default_value=0.01, log=True)
subsample = CSH.UniformFloatHyperparameter('subsample',
lower=0.1, upper=1.0, default_value=0.5, log=False)
cs.add_hyperparameters([learning_rate, subsample])
return cs
62. from hpbandster.optimizers import BOHB
optim = BOHB(configspace = worker.get_configspace(),
run_id = RUN_ID,
nameserver=ns_host,
nameserver_port=ns_port,
result_logger=NeptuneLogger(),
eta=3, min_budget=0.1, max_budget=1,
num_samples=64, top_n_percent=15,
min_bandwidth=1e-3, bandwidth_factor=3)
study = optim.run(n_iterations=100)
63. class NeptuneLogger:
def new_config(self, *args, **kwargs):
pass
def __call__(self, job):
neptune.send_metric('run_score', job.result['loss'])
neptune.send_text('run_parameters', str(job.kwargs['config']))
74. workers=[]
for i in range(N_WORKERS):
w = TrainEvalWorker(run_id=RUN_ID, id=isleep_interval = 0.5,
nameserver=ns_host, nameserver_port=ns_port)
w.run(background=True)
workers.append(w)
optim = BOHB(configspace = TrainEvalWorker.get_configspace(),
run_id = RUN_ID,
nameserver=ns_host,
nameserver_port=ns_port)
study = optim.run(n_iterations=100, min_n_workers=N_WORKERS)
75. workers=[]
for i in range(N_WORKERS):
w = TrainEvalWorker(run_id=RUN_ID, id=isleep_interval = 0.5,
nameserver=ns_host, nameserver_port=ns_port)
w.run(background=False)
exit(0)
optim = BOHB(configspace = TrainEvalWorker.get_configspace(),
run_id = RUN_ID,
nameserver=ns_host,
nameserver_port=ns_port)
study = optim.run(n_iterations=100, min_n_workers=N_WORKERS)
82. Scikit-Optimize Optuna HpBandSter Hyperopt
API/ease of use Great Great Difficult Good
Documentation Great Great Ok(ish) Bad
Speed/Parallelization Fast if
sequential/None
Great Good Ok
Visualizations Amazing None Very lib specific Some
*Experimental results 0.8566 (100) 0.8419 (100)
0.8597 (10000)
0.8629 (100) 0.8420 (100)