|
记录一下常用的,xgb和lgb的参数模板函数
辅助函数
def create_xgb_feature_map(features):
"""设置XGB特征与特征名的映射,在特征创建完成生产训练数据的时候调用,不然打印特征重要性时候无法显示特征名
"""
outfile = open('xgb.fmap', 'w')
i = 0
for feat in features:
outfile.write('{0}\t{1}\tq\n'.format(i, feat))
i = i + 1
outfile.close()
def save_file(filename, data):
"""使用pickle保存文件
"""
pickle.dump(data, open(cache_path + filename, 'wb'), protocol=4)
def load_file(filename):
"""读取文件
"""
return pickle.load(open(cache_path + filename, 'rb'))
def initLogger(loggerName, loggerFile):
logger = logging.getLogger(loggerName)
logger.setLevel(logging.INFO)
consoleHandler = logging.StreamHandler()
consoleHandler.setFormatter(logging.Formatter('%(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S'));
logger.addHandler(consoleHandler);
return logger
logger = initLogger('logger', 'train.log')
XGB
def XGB(*data):
X_train, X_test, y_train, y_test = data
train = xgb.DMatrix(X_train, label=y_train)
test = xgb.DMatrix(X_test, y_test)
params={'booster':'gbtree',
'objective': 'binary:logistic',
'eval_metric':'logloss',
'gamma':0.1,
'min_child_weight':1,
'max_depth':6,
'lambda':10,
'alpha':0.1,
'subsample':0.7,
'colsample_bytree':0.7,
'eta': 0.01,
'seed':0, }
watchlist = [(train,'train'),(test,'test')]
model = xgb.train(params,train,num_boost_round=5000,evals=watchlist, early_stopping_rounds=10)
return model
LGB
def LGB(*data):
X_train, X_test, y_train, y_test, = data
train = lgb.Dataset(X_train, y_train)
test = lgb.Dataset(X_test, y_test)
del X_train
del X_test
del y_train
del y_test
params = {
'task': 'train',
'boosting_type': 'gbdt',
'max_depth':7,
'num_leaves': 128,
'max_bin':200,
'feature_fraction': 0.9,
'bagging_fraction': 0.9,
'bagging_freq': 1,
'nthread':-1,
'learning_rate': 0.05,
'lambda_l1':0.1,
'lambda_l2':10,
'objective': 'binary',
'metric': 'binary_logloss',
'verbose':-1,
}
model = lgb.train(train_set=train,
params=params,
num_boost_round=5000,
valid_sets=[train, test],
early_stopping_rounds=50,
verbose_eval=5000,
)
return model
|