随机森林是一种集成学习方法,它是由多个决策树组成的模型,其中每棵树都是随机生成的。随机深林包括两种主要类型:随机森林和极端随机树。
废话不说上代码
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn import metrics
# 加载鸢尾花数据集
iris = load_iris()
X = iris.data
y = iris.target
# 划分数据集为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 随机森林模型
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_pred_train = rf_model.predict(X_train)
rf_pred_test = rf_model.predict(X_test)
# 极端随机树模型
et_model = ExtraTreesClassifier(n_estimators=100, random_state=42)
et_model.fit(X_train, y_train)
et_pred_train = et_model.predict(X_train)
et_pred_test = et_model.predict(X_test)
# 计算准确率
rf_accuracy_train = metrics.accuracy_score(y_train, rf_pred_train)
rf_accuracy_test = metrics.accuracy_score(y_test, rf_pred_test)
et_accuracy_train = metrics.accuracy_score(y_train, et_pred_train)
et_accuracy_test = metrics.accuracy_score(y_test, et_pred_test)
# 绘制训练集和测试集的结果
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 8))
axes = axes.flatten()
# 随机森林训练集
axes[0].scatter(X_train[:, 0], X_train[:, 1], c=rf_pred_train, cmap='viridis', edgecolors='k')
axes[0].set_title(f"Random Forest - Training Accuracy: {rf_accuracy_train:.2f}")
# 随机森林测试集
axes[1].scatter(X_test[:, 0], X_test[:, 1], c=rf_pred_test, cmap='viridis', edgecolors='k')
axes[1].set_title(f"Random Forest - Testing Accuracy: {rf_accuracy_test:.2f}")
# 极端随机树训练集
axes[2].scatter(X_train[:, 0], X_train[:, 1], c=et_pred_train, cmap='viridis', edgecolors='k')
axes[2].set_title(f"Extra Trees - Training Accuracy: {et_accuracy_train:.2f}")
# 极端随机树测试集
axes[3].scatter(X_test[:, 0], X_test[:, 1], c=et_pred_test, cmap='viridis', edgecolors='k')
axes[3].set_title(f"Extra Trees - Testing Accuracy: {et_accuracy_test:.2f}")
plt.tight_layout()
plt.show()