""" Demo for GLM ============ """ import os import xgboost as xgb ## # this script demonstrate how to fit generalized linear model in xgboost # basically, we are using linear model, instead of tree for our boosters ## CURRENT_DIR = os.path.dirname(__file__) dtrain = xgb.DMatrix( os.path.join(CURRENT_DIR, "../data/agaricus.txt.train?format=libsvm") ) dtest = xgb.DMatrix( os.path.join(CURRENT_DIR, "../data/agaricus.txt.test?format=libsvm") ) # change booster to gblinear, so that we are fitting a linear model # alpha is the L1 regularizer # lambda is the L2 regularizer # you can also set lambda_bias which is L2 regularizer on the bias term param = { "objective": "binary:logistic", "booster": "gblinear", "alpha": 0.0001, "lambda": 1, } # normally, you do not need to set eta (step_size) # XGBoost uses a parallel coordinate descent algorithm (shotgun), # there could be affection on convergence with parallelization on certain cases # setting eta to be smaller value, e.g 0.5 can make the optimization more stable # param['eta'] = 1 ## # the rest of settings are the same ## watchlist = [(dtest, "eval"), (dtrain, "train")] num_round = 4 bst = xgb.train(param, dtrain, num_round, watchlist) preds = bst.predict(dtest) labels = dtest.get_label() print( "error=%f" % ( sum(1 for i in range(len(preds)) if int(preds[i] > 0.5) != labels[i]) / float(len(preds)) ) )