【上海校区】基于Python的Xgboost模型实现

python 未结 0 45
网络营销的特点
网络营销的特点 2021-06-22 14:36
悬赏:3
# -*- coding: utf-8 -*-
"""
Created on Mon Nov 28 13:22:08 2016
@author: yy
"""

## part1: 模型训练
import pandas as pd
import numpy as np
import os
from datetime import datetime
import xgboost as xgb

traindata=pd.read_csv("/home/op1/yy/traindata.csv")
traindata["add_date"]= pd.to_datetime(traindata["orderdate"]) #转化为日期格式

#时间过滤
traindata_a=traindata[(traindata["add_date"]<"2016-10-31")] #训练集
testdata_a=traindata[(traindata["add_date"]>="2016-10-31") & (traindata["add_date"]<="2016-11-06") & (traindata["rank1"]<=500)] #预测集

#特征列表
colnames=[
#"masterhotel"
"order_cii_notcancelcii"        
,"city"            
,"order_cii_ahead_1day"
,"order_cii_ahead_3days_avg"
,"order_cii_ahead_7days_avg"
,"order_cii_30days_avg"
,"order_cii_ahead_sameoneweek"
,"order_cii_ahead_sametwoweeks_avg"
,"star"               
,"goldstar"            
,"level"      
,"ratingservice"      
,"novoters"            
,"week_day"      
,"working_day"
,"cii_ahead_sameoneweek"
,"cii_ahead_sametwoweeks_avg"
,"cii_ahead_samethreeweeks_avg"
,"cii_ahead_samefourweeks_avg"
,"simple_estimate_constant"
,"cii_ahead_1day_avg"
,"cii_ahead_3days_avg"
,"cii_ahead_7days_avg"
,"order_ahead_lt_1days"
,"order_ahead_lt_2days"
,"order_ahead_lt_3days"
,"order_ahead_lt_7days"
,"order_ahead_lt_14days"
,"order_alldays"     
,"click_ahead_1day"   
,"click_ahead_2days"   
,"click_ahead_3days"   
,"click_ahead_7days"   
,"click_ahead_14days"  
,"browse_0day_uv"      
,"browse_1day_uv"      
,"browse_2day_uv"      
,"browse_3day_uv"      
,"browse_4day_uv"      
,"browse_5day_uv"      
,"browse_6day_uv"      
,"browse_7_14day_uv"   
,"browse_14daymore_uv"
,"order_cii_14days_avg"               
,"order_cii_21days_avg"
,"order_cii_ahead_samethreeweeks_avg"
,"order_cii_ahead_samefourweeks_avg"]

#dtrain = xgb.DMatrix( data, label=label)
#dtrain = xgb.DMatrix(data, label=label)
#训练集构建

label=traindata_a[colnames[0]]
dtrain = xgb.DMatrix(traindata_a[colnames[1:]], label=traindata_a[colnames[0]])

num_round=800
params = {}
params["objective"] = "reg:linear"
params["eta"] = 0.1
params["max_depth"] = 4
params["eval_metric"] = "rmse"
params["silent"] = 0
plst = list(params.items()) #Using 5000 rows for early stopping.

bst = xgb.train( plst, dtrain, num_round)

#预测集构建
dtest=xgb.DMatrix(testdata_a[colnames[1:]])

#结果预测
y_bar=bst.predict(dtest)

#y.shape
##  评价指标
#testdata_a[colnames[0,]]
#testdata_a.columns
actual_values=testdata_a[["masterhotel","add_date","order_cii_notcancelcii","rank1"]]
actual_values["y_bar"]=y_bar
actual_values["mae"]=abs(actual_values["y_bar"]-actual_values["order_cii_notcancelcii"])

top100=actual_values[actual_values["rank1"]<=100]
mae100=top100.groupby("add_date").mean()
mae500=actual_values.groupby("add_date").mean()

## part2: 存储模型,方便后续直接调用
bst.save_model("xgb.model") # 用于存储训练出的模型

#模型初始化
bst=xgb.Booster() #注意:名字要保持一致,否则报错!
bst.load_model("/home/op1/yuanmin/xgb.model")
y_bar=bst.predict(dtest)



## 参数重要性

pd.Series(bst.get_fscore()).sort_values(ascending=False)

pd.Series(bst.get_fscore()).sort_values(ascending=False)/sum(pd.Series(bst.get_fscore()).sort_values(ascending=False)) #重要性归一化


---------------------
作者:山谷來客
来源:CSDN
原文:https://blog.csdn.net/u010035907/article/details/53418486
版权声明:本文为博主原创文章,转载请附上博文链接!

相关标签:
回答
  • 消灭零回复
提交回复