sklearn中的随机森林小demo
独木不成林,随机森林也是一样的,随机森林是决策树的聚集
此demo主要记录了使用sklearn中的红酒数据集演示随机森林的用法

# 导入需要用到的包
from sklearn.tree import DecisionTreeClassifier  # 决策树
from sklearn.ensemble import RandomForestClassifier  # 随机森林
from sklearn.datasets import load_wine  # 红酒数据集
from sklearn.model_selection import train_test_split  # 划分数据集
from sklearn.model_selection import cross_val_score  # 交叉验证
import matplotlib.pyplot as plt  # 画图

# 载入红酒数据集
wine = load_wine()
# print(wine)
# print(wine.target)

# 划分数据集
# 参数: 数据集, 数据集标签, 划分比例
# return: 训练集数据, 测试集数据, 训练集目标, 测试集目标
x_train, x_test, y_train, y_test = train_test_split(wine.data, wine.target, test_size=0.3)
# 实例化决策树
clf = DecisionTreeClassifier(random_state=0)  # 设置 'random_state=0' 后只能生成一棵树
# 实例化随机森林
rfc = RandomForestClassifier(random_state=0)  #

clf.fit(x_train, y_train)
rfc.fit(x_train, y_train)

score_c = clf.score(x_test, y_test)
score_r = rfc.score(x_test, y_test)
#
# print(score_c)
# print(score_r)

# n_estimators: 森林中树的数量
rfc = RandomForestClassifier(n_estimators=25)
# 交叉验证 :输入的是完整的数据集和标签
# 参数 实例化好的模型 完整的数据 完整的标签 验证的次数
rfc_s = cross_val_score(rfc, wine.data, wine.target, cv=10)  # 测试10次

clf = DecisionTreeClassifier()
# 交叉验证 :输入的是完整的数据集和标签
clf_s = cross_val_score(clf, wine.data, wine.target, cv=10)

# plot方法参数: x的取值 数据 标签
# plt.plot(range(1, 11), rfc_s, label='randomForest')
# plt.plot(range(1, 11), clf_s, label='DecisionTree')
# plt.legend()
# plt.show()

rfc_s1 = []
clf_s1 = []

for i in range(10):
    rfc = RandomForestClassifier(n_estimators=25)
    rfc_s = cross_val_score(rfc, wine.data, wine.target, cv=10).mean()  # 测试10次
    rfc_s1.append(rfc_s)
    clf = DecisionTreeClassifier()
    clf_s = cross_val_score(clf, wine.data, wine.target, cv=10).mean()
    clf_s1.append(clf_s)

plt.plot(range(1, 11), rfc_s1, label='randomForest')
plt.plot(range(1, 11), clf_s1, label='DecisionTree')
plt.legend()
plt.show()
print(max(rfc_s1), rfc_s1.index(max(rfc_s1)))