XGBoost和LightGBM常用参数

论坛 期权论坛 脚本     
匿名技术用户   2020-12-27 01:22   1662   0

记录一下常用的,xgb和lgb的参数模板函数

辅助函数

def create_xgb_feature_map(features):
    """设置XGB特征与特征名的映射,在特征创建完成生产训练数据的时候调用,不然打印特征重要性时候无法显示特征名
    """
    outfile = open('xgb.fmap', 'w')
    i = 0
    for feat in features:
        outfile.write('{0}\t{1}\tq\n'.format(i, feat))
        i = i + 1
    outfile.close()    

def save_file(filename, data):
    """使用pickle保存文件
    """
    pickle.dump(data, open(cache_path + filename, 'wb'), protocol=4)

def load_file(filename):
    """读取文件
    """
    return pickle.load(open(cache_path + filename, 'rb'))

def initLogger(loggerName, loggerFile):
    # create a logger
    logger = logging.getLogger(loggerName)
    logger.setLevel(logging.INFO)
    consoleHandler = logging.StreamHandler()
    consoleHandler.setFormatter(logging.Formatter('%(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S'));
    logger.addHandler(consoleHandler);
    return logger

# 日志函数调用方法
logger = initLogger('logger', 'train.log')

XGB

def XGB(*data):
    X_train, X_test, y_train, y_test = data
    train = xgb.DMatrix(X_train, label=y_train)
    test = xgb.DMatrix(X_test, y_test)
    params={'booster':'gbtree',
        'objective': 'binary:logistic',
        'eval_metric':'logloss',
        'gamma':0.1, # 控制叶子结点数量
        'min_child_weight':1,
        'max_depth':6,
        'lambda':10, # 叶子结点分数l2正则
        'alpha':0.1, # 叶子结点分数l1正则
        'subsample':0.7,
        'colsample_bytree':0.7,
        'eta': 0.01,
        'seed':0, }
    watchlist = [(train,'train'),(test,'test')]
    model = xgb.train(params,train,num_boost_round=5000,evals=watchlist, early_stopping_rounds=10)
    return model

LGB

def LGB(*data):
    X_train, X_test, y_train, y_test, = data
    train = lgb.Dataset(X_train, y_train)
    test = lgb.Dataset(X_test, y_test)
    del X_train
    del X_test
    del y_train
    del y_test
    params = {
    'task': 'train',
    'boosting_type': 'gbdt',
    'max_depth':7,
    'num_leaves': 128,
    'max_bin':200,
    'feature_fraction': 0.9,
    'bagging_fraction': 0.9,
    'bagging_freq': 1,
    'nthread':-1,
    'learning_rate': 0.05,
    'lambda_l1':0.1, 
    'lambda_l2':10,
    # 'min_data_in_bin':2000,
    'objective': 'binary',
    'metric': 'binary_logloss',
    'verbose':-1,
    }
    model = lgb.train(train_set=train, 
                      params=params, 
                      num_boost_round=5000, 
                      valid_sets=[train, test], 
                      early_stopping_rounds=50, 
                      verbose_eval=5000,
                      )
    return model
分享到 :
0 人收藏
您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

积分:7942463
帖子:1588486
精华:0
期权论坛 期权论坛
发布
内容

下载期权论坛手机APP