使用XGBoost和随机森林进行泰坦尼克号生还者预测实例

enlist95766 6 0 zip 2023-03-10 21:03:58

import pandas as pd

# 读取训练集数据
train = pd.read_csv('train.csv')
target = train['Survived']
data = train.loc[:, ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare']]
data['Sex'] = data['Sex'].apply(lambda x: 1 if x == 'male' else 0)
data['Age'].fillna(data['Age'].mean(), inplace=True)

# 读取测试集数据
test = pd.read_csv('test.csv')
data_test = test.loc[:, ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare']]
data_test['Sex'] = data_test['Sex'].apply(lambda x: 1 if x == 'male' else 0)
data_test['Age'].fillna(data_test['Age'].mean(), inplace=True)

# 数据处理
data1 = pd.get_dummies(data['Pclass'], prefix='Pclass')
data2 = pd.get_dummies(data_test['Pclass'], prefix='Pclass')
data = pd.concat([data, data1], axis=1)
data_test = pd.concat([data_test, data2], axis=1)
data.drop('Pclass', axis=1, inplace=True)
data_test.drop('Pclass', axis=1, inplace=True)

# 使用XGBoost模型进行预测
from xgboost import XGBClassifier
model = XGBClassifier()
model.fit(data, target)
result = model.predict(data_test)

# 输出预测结果
result_df = pd.DataFrame({'PassengerId':test['PassengerId'], 'Survived':result})
result_df.to_csv('result.csv', index=False)

用户评论
请输入评论内容
评分:
暂无评论