CatBoost对比¶

import pandas as pd, numpy as np, time
from sklearn.model_selection import train_test_split

# 读取数据
data = pd.read_csv("https://cdn.coggle.club/kaggle-flight-delays/flights_10k.csv.zip")

# 提取有用的列
data = data[["MONTH","DAY","DAY_OF_WEEK","AIRLINE","FLIGHT_NUMBER","DESTINATION_AIRPORT",
                 "ORIGIN_AIRPORT","AIR_TIME", "DEPARTURE_TIME","DISTANCE","ARRIVAL_DELAY"]]
data.dropna(inplace=True)

# 筛选出部分数据
data["ARRIVAL_DELAY"] = (data["ARRIVAL_DELAY"]>10)*1

# 进行编码
cols = ["AIRLINE","FLIGHT_NUMBER","DESTINATION_AIRPORT","ORIGIN_AIRPORT"]
for item in cols:
    data[item] = data[item].astype("category").cat.codes +1

# 划分训练集和测试集
train, test, y_train, y_test = train_test_split(data.drop(["ARRIVAL_DELAY"], axis=1), data["ARRIVAL_DELAY"],
                                                random_state=10, test_size=0.25)

data

import xgboost as xgb
from sklearn import metrics
from sklearn.model_selection import GridSearchCV

def auc(m, train, test): 
    return (metrics.roc_auc_score(y_train, m.predict_proba(train)[:,1]),
                            metrics.roc_auc_score(y_test, m.predict_proba(test)[:,1]))

# Parameter Tuning
model = xgb.XGBClassifier()
param_dist = {"max_depth": [10,30,50],
              "min_child_weight" : [1,3,6],
              "n_estimators": [200],
              "learning_rate": [0.05, 0.1,0.16],}
grid_search = GridSearchCV(model, param_grid=param_dist, cv = 3, 
                                   verbose=10, n_jobs=-1)
grid_search.fit(train, y_train)

grid_search.best_estimator_

model = xgb.XGBClassifier(max_depth=3, min_child_weight=1,  n_estimators=20,\
                          n_jobs=-1 , verbose=1,learning_rate=0.16)
model.fit(train,y_train)

print(auc(model, train, test))

Fitting 3 folds for each of 27 candidates, totalling 81 fits

(0.7479275227922775, 0.7430946047035487)

import lightgbm as lgb
from sklearn import metrics

def auc2(m, train, test): 
    return (metrics.roc_auc_score(y_train,m.predict(train)),
                            metrics.roc_auc_score(y_test,m.predict(test)))

lg = lgb.LGBMClassifier(silent=False)
param_dist = {"max_depth": [25,50, 75],
              "learning_rate" : [0.01,0.05,0.1],
              "num_leaves": [300,900,1200],
              "n_estimators": [200]
             }
grid_search = GridSearchCV(lg, n_jobs=-1, param_grid=param_dist, cv = 3, scoring="roc_auc", verbose=5)
grid_search.fit(train,y_train)
grid_search.best_estimator_

d_train = lgb.Dataset(train, label=y_train, free_raw_data=False)
params = {"max_depth": 3, "learning_rate" : 0.1, "num_leaves": 900,  "n_estimators": 20}

# Without Categorical Features
model2 = lgb.train(params, d_train)
print(auc2(model2, train, test))

#With Catgeorical Features
cate_features_name = ["MONTH","DAY","DAY_OF_WEEK","AIRLINE","DESTINATION_AIRPORT",
                 "ORIGIN_AIRPORT"]
model2 = lgb.train(params, d_train, categorical_feature = cate_features_name)
print(auc2(model2, train, test))

Fitting 3 folds for each of 27 candidates, totalling 81 fits
(0.7223327449052556, 0.7163025114730949)
(0.7966919355559529, 0.7588511985812025)

/usr/local/lib/python3.6/dist-packages/lightgbm/engine.py:118: UserWarning: Found `n_estimators` in params. Will use it instead of argument
  warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
/usr/local/lib/python3.6/dist-packages/lightgbm/engine.py:118: UserWarning: Found `n_estimators` in params. Will use it instead of argument
  warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))
/usr/local/lib/python3.6/dist-packages/lightgbm/basic.py:1209: UserWarning: categorical_feature in Dataset is overridden.
New categorical_feature is ['AIRLINE', 'DAY', 'DAY_OF_WEEK', 'DESTINATION_AIRPORT', 'MONTH', 'ORIGIN_AIRPORT']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))

import catboost
cat_features_index = [0,1,2,3,4,5,6]

def auc(m, train, test): 
    return (metrics.roc_auc_score(y_train,m.predict_proba(train)[:,1]),
                            metrics.roc_auc_score(y_test,m.predict_proba(test)[:,1]))

params = {'depth': [4, 7, 10],
          'learning_rate' : [0.03, 0.1, 0.15],
         'l2_leaf_reg': [1,4,9],
         'iterations': [20],
         'silent': [True]}
cb = catboost.CatBoostClassifier()
cb_model = GridSearchCV(cb, params, scoring="roc_auc", cv = 3)
cb_model.fit(train, y_train)

# With Categorical features
clf = catboost.CatBoostClassifier(eval_metric="AUC", depth=3, silent=True,
                                  iterations= 20, l2_leaf_reg= 9, learning_rate= 0.15)
clf.fit(train,y_train)
print(auc(clf, train, test))

# With Categorical features
clf = catboost.CatBoostClassifier(eval_metric="AUC",one_hot_max_size=31, silent=True,
                            depth=3, iterations= 20, l2_leaf_reg= 9, learning_rate= 0.15)
clf.fit(train,y_train, cat_features= cat_features_index)
print(auc(clf, train, test))

(0.6925058097962102, 0.7095016558239615)
(0.7700712258670003, 0.7561893775110529)

使用500K数据

https://towardsdatascience.com/catboost-vs-light-gbm-vs-xgboost-5f93620723db

LightGBM高阶使用¶

For Faster Speed¶

Use bagging by setting bagging_fraction and bagging_freq
Use feature sub-sampling by setting feature_fraction
Use small max_bin
Use save_binary to speed up data loading in future learning
Use parallel learning, refer to Parallel Learning Guide <./Parallel-Learning-Guide.rst>__

For Better Accuracy¶

Use large max_bin (may be slower)
Use small learning_rate with large num_iterations
Use large num_leaves (may cause over-fitting)
Use bigger training data
Try dart

Deal with Over-fitting¶

Use small max_bin
Use small num_leaves
Use min_data_in_leaf and min_sum_hessian_in_leaf
Use bagging by set bagging_fraction and bagging_freq
Use feature sub-sampling by set feature_fraction
Use bigger training data
Try lambda_l1, lambda_l2 and min_gain_to_split for regularization
Try max_depth to avoid growing deep tree
Try extra_trees
Try increasing path_smooth

定义数据集¶

# coding: utf-8
import json
import lightgbm as lgb
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error

try:
    import cPickle as pickle
except BaseException:
    import pickle

print('Loading data...')
# load or create your dataset
df_train = pd.read_csv('https://cdn.coggle.club/LightGBM/examples/binary_classification/binary.train', header=None, sep='\t')
df_test = pd.read_csv('https://cdn.coggle.club/LightGBM/examples/binary_classification/binary.test', header=None, sep='\t')
W_train = pd.read_csv('https://cdn.coggle.club/LightGBM/examples/binary_classification/binary.train.weight', header=None)[0]
W_test = pd.read_csv('https://cdn.coggle.club/LightGBM/examples/binary_classification/binary.test.weight', header=None)[0]

y_train = df_train[0]
y_test = df_test[0]
X_train = df_train.drop(0, axis=1)
X_test = df_test.drop(0, axis=1)

num_train, num_feature = X_train.shape

# create dataset for lightgbm
# if you want to re-use data, remember to set free_raw_data=False
lgb_train = lgb.Dataset(X_train, y_train,
                        weight=W_train, free_raw_data=False)
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train,
                       weight=W_test, free_raw_data=False)

Loading data...

训练保存模型¶

# specify your configurations as a dict
params = {
    'boosting_type': 'gbdt',
    'objective': 'binary',
    'metric': 'binary_logloss',
    'num_leaves': 31,
    'learning_rate': 0.05,
    'feature_fraction': 0.9,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'verbose': 0
}

# generate feature names
feature_name = ['feature_' + str(col) for col in range(num_feature)]

print('Starting training...')
# feature_name and categorical_feature
gbm = lgb.train(params,
                lgb_train,
                num_boost_round=10,
                valid_sets=lgb_train,  # eval training data
                feature_name=feature_name,
                categorical_feature=[21])

print('Finished first 10 rounds...')
# check feature name
print('7th feature name is:', lgb_train.feature_name[6])

print('Saving model...')
# save model to file
gbm.save_model('model.txt')

print('Dumping model to JSON...')
# dump model to JSON (and save to file)
model_json = gbm.dump_model()

with open('model.json', 'w+') as f:
    json.dump(model_json, f, indent=4)

Starting training...
[1]	training's binary_logloss: 0.680148
[2]	training's binary_logloss: 0.671659
[3]	training's binary_logloss: 0.664139
[4]	training's binary_logloss: 0.655377
[5]	training's binary_logloss: 0.64739
[6]	training's binary_logloss: 0.640583
[7]	training's binary_logloss: 0.634757
[8]	training's binary_logloss: 0.628112
[9]	training's binary_logloss: 0.621807
[10]	training's binary_logloss: 0.615984
Finished first 10 rounds...
7th feature name is: feature_6
Saving model...
Dumping model to JSON...

特征重要性¶

# feature names
print('Feature names:', gbm.feature_name())

# feature importances
print('Feature importances:', list(gbm.feature_importance()))

Feature names: ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28']
Feature importances: [44, 15, 14, 58, 6, 125, 20, 6, 6, 60, 20, 10, 0, 42, 15, 17, 0, 24, 8, 18, 1, 22, 152, 14, 86, 146, 120, 151]

加载模型¶

print('Loading model to predict...')
# load model to predict
bst = lgb.Booster(model_file='model.txt')

# can only predict with the best iteration (or the saving iteration)
y_pred = bst.predict(X_test)

# eval with loaded model
print("The rmse of loaded model's prediction is:", mean_squared_error(y_test, y_pred) ** 0.5)

print('Dumping and loading model with pickle...')
# dump model with pickle
with open('model.pkl', 'wb') as fout:
    pickle.dump(gbm, fout)
# load model with pickle to predict
with open('model.pkl', 'rb') as fin:
    pkl_bst = pickle.load(fin)
# can predict with any iteration when loaded in pickle way
y_pred = pkl_bst.predict(X_test, num_iteration=7)
# eval with loaded model
print("The rmse of pickled model's prediction is:", mean_squared_error(y_test, y_pred) ** 0.5)

Loading model to predict...
The rmse of loaded model's prediction is: 0.4638594170866421
Dumping and loading model with pickle...
The rmse of pickled model's prediction is: 0.47202551524914116

继续训练¶

# continue training
# init_model accepts:
# 1. model file name
# 2. Booster()
gbm = lgb.train(params,
                lgb_train,
                num_boost_round=10,
                init_model='model.txt',
                valid_sets=lgb_eval)

print('Finished 10 - 20 rounds with model file...')

修改超参数¶

# decay learning rates
# learning_rates accepts:
# 1. list/tuple with length = num_boost_round
# 2. function(curr_iter)
gbm = lgb.train(params,
                lgb_train,
                num_boost_round=10,
                init_model=gbm,
                learning_rates=lambda iter: 0.05 * (0.99 ** iter),
                valid_sets=lgb_eval)

print('Finished 20 - 30 rounds with decay learning rates...')

# change other parameters during training
gbm = lgb.train(params,
                lgb_train,
                num_boost_round=10,
                init_model=gbm,
                valid_sets=lgb_eval,
                callbacks=[lgb.reset_parameter(bagging_fraction=[0.7] * 5 + [0.6] * 5)])

print('Finished 30 - 40 rounds with changing bagging_fraction...')

[41]	valid_0's binary_logloss: 0.617656
[42]	valid_0's binary_logloss: 0.61379
[43]	valid_0's binary_logloss: 0.609111
[44]	valid_0's binary_logloss: 0.604809
[45]	valid_0's binary_logloss: 0.601201
[46]	valid_0's binary_logloss: 0.597987
[47]	valid_0's binary_logloss: 0.594549
[48]	valid_0's binary_logloss: 0.590946
[49]	valid_0's binary_logloss: 0.58786
[50]	valid_0's binary_logloss: 0.58615
Finished 20 - 30 rounds with decay learning rates...
[51]	valid_0's binary_logloss: 0.617564
[52]	valid_0's binary_logloss: 0.614136
[53]	valid_0's binary_logloss: 0.609914
[54]	valid_0's binary_logloss: 0.60589
[55]	valid_0's binary_logloss: 0.6017
[56]	valid_0's binary_logloss: 0.598657
[57]	valid_0's binary_logloss: 0.59572
[58]	valid_0's binary_logloss: 0.594239
[59]	valid_0's binary_logloss: 0.591647
[60]	valid_0's binary_logloss: 0.589725
Finished 30 - 40 rounds with changing bagging_fraction...

自定义损失函数¶

# self-defined objective function
# f(preds: array, train_data: Dataset) -> grad: array, hess: array
# log likelihood loss
def loglikelihood(preds, train_data):
    labels = train_data.get_label()
    preds = 1. / (1. + np.exp(-preds))
    grad = preds - labels
    hess = preds * (1. - preds)
    return grad, hess


# self-defined eval metric
# f(preds: array, train_data: Dataset) -> name: string, eval_result: float, is_higher_better: bool
# binary error
# NOTE: when you do customized loss function, the default prediction value is margin
# This may make built-in evalution metric calculate wrong results
# For example, we are doing log likelihood loss, the prediction is score before logistic transformation
# Keep this in mind when you use the customization
def binary_error(preds, train_data):
    labels = train_data.get_label()
    preds = 1. / (1. + np.exp(-preds))
    return 'error', np.mean(labels != (preds > 0.5)), False


gbm = lgb.train(params,
                lgb_train,
                num_boost_round=10,
                init_model=gbm,
                fobj=loglikelihood,
                feval=binary_error,
                valid_sets=lgb_eval)

print('Finished 40 - 50 rounds with self-defined objective function and eval metric...')

[61]	valid_0's binary_logloss: 5.11122	valid_0's error: 0.282
[62]	valid_0's binary_logloss: 5.57209	valid_0's error: 0.296
[63]	valid_0's binary_logloss: 5.17695	valid_0's error: 0.288
[64]	valid_0's binary_logloss: 5.23134	valid_0's error: 0.28
[65]	valid_0's binary_logloss: 5.51848	valid_0's error: 0.29
[66]	valid_0's binary_logloss: 5.31948	valid_0's error: 0.276
[67]	valid_0's binary_logloss: 5.23594	valid_0's error: 0.278
[68]	valid_0's binary_logloss: 5.28474	valid_0's error: 0.274
[69]	valid_0's binary_logloss: 5.39262	valid_0's error: 0.282
[70]	valid_0's binary_logloss: 5.44993	valid_0's error: 0.282
Finished 40 - 50 rounds with self-defined objective function and eval metric...

模型调参方法¶

d_train = lgb.Dataset(train, label=y_train)
params = {"max_depth": 4, "learning_rate" : 0.05, "num_leaves": 250, 'n_estimators': 600}

data = lgb.cv(params, d_train, num_boost_round=350, nfold=5, metrics='auc')
print(pd.DataFrame(data))

/usr/local/lib/python3.6/dist-packages/lightgbm/engine.py:430: UserWarning: Found `n_estimators` in params. Will use it instead of argument
  warnings.warn("Found `{}` in params. Will use it instead of argument".format(alias))

     auc-mean  auc-stdv
0    0.648399  0.015209
1    0.660574  0.021048
2    0.663751  0.016943
3    0.667220  0.017675
4    0.671247  0.007378
..        ...       ...
595  0.743641  0.007673
596  0.743653  0.007720
597  0.743665  0.007693
598  0.743677  0.007649
599  0.743642  0.007653

[600 rows x 2 columns]

lg = lgb.LGBMClassifier(silent=False)
param_dist = {"max_depth": [4,5, 7],
              "learning_rate" : [0.01,0.05,0.1],
              "num_leaves": [300,900,1200],
              "n_estimators": [50, 100, 150]
             }
grid_search = GridSearchCV(lg, n_jobs=-1, param_grid=param_dist, cv = 5, scoring="roc_auc", verbose=5)
grid_search.fit(train,y_train)
grid_search.best_estimator_, grid_search.best_score_

Fitting 5 folds for each of 81 candidates, totalling 405 fits

(LGBMClassifier(max_depth=5, n_estimators=150, num_leaves=300, silent=False),
 0.7474793776907569)

https://github.com/fmfn/BayesianOptimization

import warnings
import time
warnings.filterwarnings("ignore")
from bayes_opt import BayesianOptimization

def lgb_eval(max_depth, learning_rate, num_leaves, n_estimators):
    params = {
             "metric" : 'auc'
        }
    params['max_depth'] = int(max(max_depth, 1))
    params['learning_rate'] = np.clip(0, 1, learning_rate)
    params['num_leaves'] = int(max(num_leaves, 1))
    params['n_estimators'] = int(max(n_estimators, 1))
    cv_result = lgb.cv(params, d_train, nfold=5, seed=0, verbose_eval =200,stratified=False)
    return 1.0 * np.array(cv_result['auc-mean']).max()


lgbBO = BayesianOptimization(lgb_eval, {'max_depth': (4, 8),
                                            'learning_rate': (0.05, 0.2),
                                            'num_leaves' : (20,1500),
                                            'n_estimators': (5, 200)}, random_state=0)

lgbBO.maximize(init_points=5, n_iter=50,acq='ei')
print(lgbBO.max)

|   iter    |  target   | learni... | max_depth | n_esti... | num_le... |
-------------------------------------------------------------------------
|  1        |  0.7335   |  0.1323   |  6.861    |  122.5    |  826.4    |
|  2        |  0.7334   |  0.1135   |  6.584    |  90.33    |  1.34e+03 |
|  3        |  0.7334   |  0.1945   |  5.534    |  159.4    |  802.8    |
|  4        |  0.7243   |  0.1352   |  7.702    |  18.85    |  149.0    |
|  5        |  0.7306   |  0.05303  |  7.33     |  156.7    |  1.308e+0 |
|  6        |  0.6997   |  0.1597   |  5.141    |  5.17     |  1.392e+0 |
|  7        |  0.7342   |  0.1454   |  6.984    |  98.78    |  1.296e+0 |
|  8        |  0.7363   |  0.1276   |  5.413    |  175.7    |  873.1    |
|  9        |  0.7354   |  0.053    |  5.547    |  131.8    |  925.1    |
|  10       |  0.7293   |  0.1999   |  7.243    |  199.2    |  946.7    |
|  11       |  0.7353   |  0.1975   |  4.435    |  54.61    |  912.5    |
|  12       |  0.7355   |  0.1146   |  5.092    |  53.75    |  996.2    |
|  13       |  0.7052   |  0.1912   |  7.422    |  5.189    |  1.078e+0 |
|  14       |  0.7358   |  0.1922   |  4.95     |  78.06    |  962.8    |
|  15       |  0.7116   |  0.09967  |  4.04     |  20.96    |  952.7    |
|  16       |  0.7324   |  0.1037   |  5.007    |  88.45    |  909.4    |
|  17       |  0.7347   |  0.1155   |  6.055    |  84.71    |  996.2    |
|  18       |  0.7338   |  0.1172   |  6.755    |  145.8    |  882.1    |
|  19       |  0.7256   |  0.0715   |  4.055    |  49.78    |  872.2    |
|  20       |  0.737    |  0.07144  |  5.479    |  170.3    |  842.9    |
[200]	cv_agg's auc: 0.727655 + 0.015254
|  21       |  0.7292   |  0.05     |  8.0      |  200.0    |  847.6    |
|  22       |  0.7369   |  0.1324   |  5.409    |  169.8    |  842.4    |
|  23       |  0.7352   |  0.1645   |  4.405    |  120.5    |  960.2    |
|  24       |  0.7329   |  0.1071   |  6.372    |  164.7    |  858.6    |
|  25       |  0.7355   |  0.09921  |  5.816    |  169.5    |  904.4    |
|  26       |  0.7268   |  0.1806   |  7.134    |  104.5    |  779.7    |
|  27       |  0.7325   |  0.1771   |  7.398    |  98.01    |  944.6    |
|  28       |  0.7349   |  0.05832  |  4.449    |  146.7    |  993.6    |
|  29       |  0.7338   |  0.06497  |  5.093    |  150.9    |  954.7    |
|  30       |  0.7392   |  0.1923   |  4.265    |  157.3    |  1.042e+0 |
|  31       |  0.7332   |  0.1828   |  6.028    |  184.3    |  1.041e+0 |
|  32       |  0.7342   |  0.1414   |  5.379    |  132.7    |  1.043e+0 |
|  33       |  0.7411   |  0.1301   |  4.0      |  157.4    |  1.07e+03 |
|  34       |  0.737    |  0.1188   |  5.065    |  161.9    |  1.099e+0 |
|  35       |  0.7303   |  0.1816   |  6.248    |  137.9    |  1.084e+0 |
|  36       |  0.7366   |  0.122    |  5.127    |  173.2    |  1.075e+0 |
|  37       |  0.7409   |  0.1373   |  4.944    |  159.0    |  1.06e+03 |
|  38       |  0.7327   |  0.1842   |  5.183    |  183.2    |  1.144e+0 |
|  39       |  0.7319   |  0.08205  |  7.022    |  62.27    |  1.245e+0 |
|  40       |  0.7381   |  0.1348   |  4.297    |  199.9    |  447.7    |
|  41       |  0.7328   |  0.05246  |  7.115    |  182.9    |  480.0    |
|  42       |  0.7292   |  0.1658   |  7.96     |  198.7    |  412.9    |
|  43       |  0.7372   |  0.09986  |  7.316    |  199.5    |  23.24    |
|  44       |  0.7327   |  0.1244   |  6.8      |  193.9    |  61.96    |
|  45       |  0.74     |  0.09358  |  4.399    |  161.9    |  20.01    |
|  46       |  0.7342   |  0.1425   |  5.555    |  132.7    |  21.95    |
|  47       |  0.7321   |  0.1062   |  7.775    |  166.1    |  37.19    |
|  48       |  0.7303   |  0.09236  |  7.36     |  199.8    |  144.2    |
|  49       |  0.74     |  0.1056   |  4.257    |  177.5    |  20.01    |
|  50       |  0.7041   |  0.1037   |  7.383    |  5.46     |  574.5    |
|  51       |  0.7303   |  0.1672   |  6.052    |  199.6    |  1.497e+0 |
|  52       |  0.7051   |  0.1512   |  7.389    |  5.452    |  348.5    |
|  53       |  0.7337   |  0.1635   |  6.491    |  199.9    |  640.3    |
|  54       |  0.7334   |  0.1917   |  6.773    |  199.1    |  707.2    |
|  55       |  0.7351   |  0.08844  |  6.084    |  140.9    |  663.9    |
=========================================================================

	MONTH	DAY	DAY_OF_WEEK	AIRLINE	FLIGHT_NUMBER	DESTINATION_AIRPORT	ORIGIN_AIRPORT	AIR_TIME	DEPARTURE_TIME	DISTANCE	ARRIVAL_DELAY
0	1	1	4	2	88	253	13	169.0	2354.0	1448	0
1	1	1	4	1	2120	213	164	263.0	2.0	2330	0
2	1	1	4	12	803	60	262	266.0	18.0	2296	0
3	1	1	4	1	238	185	164	258.0	15.0	2342	0
4	1	1	4	2	122	14	261	199.0	24.0	1448	0
...	...	...	...	...	...	...	...	...	...	...	...
9994	1	1	4	8	2399	44	215	62.0	1710.0	473	0
9995	1	1	4	7	149	128	210	28.0	1716.0	100	1
9996	1	1	4	8	2510	208	76	29.0	1653.0	147	0
9997	1	1	4	8	2512	62	215	28.0	1721.0	135	1
9998	1	1	4	8	2541	208	182	103.0	2000.0	594	1

XGBoost / LightGBM / CatBoost对比¶

LightGBM高阶使用¶

For Faster Speed¶

For Better Accuracy¶

Deal with Over-fitting¶

定义数据集¶

训练保存模型¶

特征重要性¶

加载模型¶

继续训练¶

修改超参数¶

自定义损失函数¶

模型调参方法¶

误差分析方法¶