logo

Optuna

A hyperparameter optimization framework

王哲峰 / 2023-04-07


目录

Optuna 简介

Optuna是一种自动超参数优化软件框架,专为机器学习而设计。它具有命令式、define-by-run API。 得益于 define-by-run API,使用 Optuna 编写的代码享有高度模块化, Optuna 的用户可以动态构建超参数的搜索空间

关键特征:

基本概念:

Optuna 安装

Optuna

# PyPI
$ pip install optuna

Optuna Web Dashboard

$ pip install optuna-dashboard
$ optuna-dashboard sqllite:///db.sqlite3

Optuna Demo

快速开始

import optuna
from optuna.visualization import plot_intermediate_values

def objective(trial):
    x = trial.suggest_float("x", -10, 10)
    return (x - 2) ** 2

study = optuna.create_study()
study.optimize(objective, n_trials = 100)

print(study.best_params)  # E.g. {"x": 2.002108042}
plot_intermediate_values(study)

Scikit-Learn

from sklearn import datasets
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import optuna


def objective(trial):
    regressor_name = trial.suggest_categorical(
        'regressor', 
        ['SVR', 'RandomForest']
    )
    if regressor_name == 'SVR':
        svr_c = trial.suggest_float('svr_c', 1e-10, 1e10, log = True)
        regressor_obj = SVR(C = svr_c)
    else:
        rf_max_depth = trial.suggest_int('rf_max_depth', 2, 32)
        regressor_obj = RandomForestRegressor(max_depth = rf_max_depth, n_estimators = 10)
    
    # data
    X, y = datasets.fetch_california_housing(return_X_y = True)
    X_train, X_val, y_train, y_val = train_test_split(X, y, random_state = 0)

    # model training
    regressor_obj.fit(X_train, y_train)
    y_pred = regressor_obj.predict(X_val)

    # metric
    error = mean_squared_error(y_val, y_pred)

    return error


study = optuna.create_study(direction = "maximize")
study.optimize(objective, n_trials = 100)
print(study.best_params)

XGBoost

import xgboost as xgb
import optuna


def objective(trial):
    params = {
        "silent": 1,
        "objective": "binary:logistic",
        "booster": trial.suggest_categorical("booster", ["gbtree", "gblinear", "dart"]),
        "lambda": trial.suggest_float("lambda", 1e-8, 1.0, log = True),
        "alpha": trial.suggest_float("alpha", 1e-8, 1.0, log = True),
    }
    bst = xgb.train(params, dtrain)

    return accuracy

study = optuna.create_study(direction = "maximize")
study.optimize(objective, n_trials = 100)
print(study.best_params)

LightGBM

import lightgbm as lgb
import optuna

def objective(trial):
    param = {
        'objective': 'binary',
        'metric': 'binary_logloss',
        'verbosity': -1,
        'boosting_type': 'gbdt',
        'lambda_l1': trial.suggest_float('lambda_l1', 1e-8, 10.0, log=True),
        'lambda_l2': trial.suggest_float('lambda_l2', 1e-8, 10.0, log=True),
        'num_leaves': trial.suggest_int('num_leaves', 2, 256),
        'feature_fraction': trial.suggest_float('feature_fraction', 0.4, 1.0),
        'bagging_fraction': trial.suggest_float('bagging_fraction', 0.4, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 7),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
    }

    gbm = lgb.train(param, dtrain)

    return accuracy

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

Keras

import keras
import optuna

def objective(trial):
    # model
    model = Sequential()
    # model params
    model.add(
        Conv2D(
            filters = trial.suggest_categorical('filters', [32, 64]),
            kernel_size = trial.suggest_categorical('kernel_size', [3, 5]),
            strides = trial.suggest_categorical('strides', [1, 2]),
            activation = trial.suggest_categorical('activation', ['relu', 'linear']),
            input_shape = input_shape
        )
    )
    model.add(Flatten())
    model.add(Dense(CLASSES, activation='softmax'))
    # learning rate
    lr = trial.suggest_float('lr', 1e-5, 1e-1, log=True)
    # model compile
    model.compile(
        loss = 'sparse_categorical_crossentropy', 
        optimizer = RMSprop(lr = lr), 
        metrics=['accuracy']
    )

    return accuracy

study = optuna.create_study(direction = 'maximize')
study.optimize(objective, n_trials = 100)

TensorFlow

import tensorflow as tf
import optuna

def objective(trial):
    n_layers = trial.suggest_int('n_layers', 1, 3)
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Flatten())
    for i in range(n_layers):
        num_hidden = trial.suggest_int(f'n_units_l{i}', 4, 128, log = True)
        model.add(tf.keras.layers.Dense(num_hidden, activation = 'relu'))
    model.add(tf.keras.layers.Dense(CLASSES))

    return accuracy

study = optuna.create_study(direction = 'maximize')
study.optimize(objective, n_trials = 100)

PyTorch

import torch
import optuna

def objective(trial):
    n_layers = trial.suggest_int('n_layers', 1, 3)
    layers = []
    in_features = 28 * 28
    for i in range(n_layers):
        out_features = trial.suggest_int(f'n_units_l{i}', 4, 128)
        layers.append(torch.nn.Linear(in_features, out_features))
        layers.append(torch.nn.ReLU())
        in_features = out_features
    layers.append(torch.nn.Linear(in_features, 10))
    layers.append(torch.nn.LogSoftmax(dim=1))
    model = torch.nn.Sequential(*layers).to(torch.device('cpu'))

    return accuracy

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

参考