import numpy as np # 导入NumPy数学工具箱
import pandas as pd # 导入Pandas数据处理工具箱
df_heart = pd.read_csv("heart.csv") # 读取文件
df_heart.head() # 显示前5行数据
import matplotlib.pyplot as plt
import seaborn as sns #导入seaborn画图工具箱
%matplotlib inline
sns.countplot(x="target", data=df_heart, palette="bwr")
plt.show()
# 将某些特征转换为数值类型的哑变量
a = pd.get_dummies(df_heart['cp'], prefix = "cp")
b = pd.get_dummies(df_heart['thal'], prefix = "thal")
c = pd.get_dummies(df_heart['slope'], prefix = "slope")
frames = [df_heart, a, b, c]
df_heart = pd.concat(frames, axis = 1)
df_heart.head()
df_heart = df_heart.drop(columns = ['cp', 'thal', 'slope'])
df_heart.head()
# 构建特征和标签集
y = df_heart.target.values
X = df_heart.drop(['target'], axis = 1)
from sklearn.model_selection import train_test_split # 拆分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.2,random_state=0)
from sklearn import preprocessing
scaler = preprocessing.MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# 从训练集里再切一次出验证集
X_train, X_check, y_train, y_check = train_test_split(X_train,y_train,test_size = 0.2,random_state=0)
X_check = scaler.transform(X_check)
from sklearn.ensemble import RandomForestClassifier # 导入随机森林分类器
from sklearn.metrics import f1_score, confusion_matrix
import warnings
warnings.filterwarnings('ignore')
rf = RandomForestClassifier(n_estimators = 1000, random_state = 1)
rf.fit(X_train, y_train)
rf_acc = rf.score(X_test,y_test)*100
y_pred = rf.predict(X_test) # 预测心脏病结果
print("随机森林算法下的测试集准确率:: {:.2f}%".format(rf.score(X_test, y_test)*100))
print("随机森林算法下的测试集F1分数: {:.2f}%".format(f1_score(y_test, y_pred)*100))
print('随机森林算法下的测试集混淆矩阵:\n', confusion_matrix(y_pred, y_test))
rf_acc = rf.score(X_check,y_check)*100
y_check_pred = rf.predict(X_check) # 预测心脏病结果
print("随机森林算法下的验证集准确率:: {:.2f}%".format(rf.score(X_check, y_check)*100))
print("随机森林算法下的验证集F1分数: {:.2f}%".format(f1_score(y_check, y_check_pred)*100))
print('随机森林算法下的验证集混淆矩阵:\n', confusion_matrix(y_check_pred, y_check))
参考资料《零基础学机器学习》一书,但“切分出一份验证集“,第6跟第8点是自己从中延伸而来