竞赛时写的一个python小程序,得了0.93分,思路如下,1读取训练集、2数据预处理、3上模型、4将模型应用到预测及、5生成预测结果
import pandas as pd
import numpy as np
from sklearn.cross_validation import train_test_split
from sklearn import metrics
import types
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score,make_scorer
purchase1 = pd.read_csv(u'aa.csv',sep=',')
purchase1.shape
purchase1 = purchase1.replace('\\N', np.nan)
purchase1 = purchase1.fillna(method= 'pad')
purchase=purchase1.drop(['cust_id'], axis = 1)
col_type=purchase.dtypes
a1=col_type.values==object
leibie=col_type[a1].index
purchase=purchase.drop(leibie,axis = 1)
X = purchase.drop('false_flag', axis = 1)
y = purchase['false_flag']
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.5,random_state=0)