sklearn中的随机森林小demo
独木不成林,随机森林也是一样的,随机森林是决策树的聚集
此demo主要记录了使用sklearn中的红酒数据集演示随机森林的用法
# 导入需要用到的包
from sklearn.tree import DecisionTreeClassifier # 决策树
from sklearn.ensemble import RandomForestClassifier # 随机森林
from sklearn.datasets import load_wine # 红酒数据集
from sklearn.model_selection import train_test_split # 划分数据集
from sklearn.model_selection import cross_val_score # 交叉验证
import matplotlib.pyplot as plt # 画图
# 载入红酒数据集
wine = load_wine()
# print(wine)
# print(wine.target)
# 划分数据集
# 参数: 数据集, 数据集标签, 划分比例
# return: 训练集数据, 测试集数据, 训练集目标, 测试集目标
x_train, x_test, y_train, y_test = train_test_split(wine.data, wine.target, test_size=0.3)
# 实例化决策树
clf = DecisionTreeClassifier(random_state=0) # 设置 'random_state=0' 后只能生成一棵树
# 实例化随机森林
rfc = RandomForestClassifier(random_state=0) #
clf.fit(x_train, y_train)
rfc.fit(x_train, y_train)
score_c = clf.score(x_test, y_test)
score_r = rfc.score(x_test, y_test)
#
# print(score_c)
# print(score_r)
# n_estimators: 森林中树的数量
rfc = RandomForestClassifier(n_estimators=25)
# 交叉验证 :输入的是完整的数据集和标签
# 参数 实例化好的模型 完整的数据 完整的标签 验证的次数
rfc_s = cross_val_score(rfc, wine.data, wine.target, cv=10) # 测试10次
clf = DecisionTreeClassifier()
# 交叉验证 :输入的是完整的数据集和标签
clf_s = cross_val_score(clf, wine.data, wine.target, cv=10)
# plot方法参数: x的取值 数据 标签
# plt.plot(range(1, 11), rfc_s, label='randomForest')
# plt.plot(range(1, 11), clf_s, label='DecisionTree')
# plt.legend()
# plt.show()
rfc_s1 = []
clf_s1 = []
for i in range(10):
rfc = RandomForestClassifier(n_estimators=25)
rfc_s = cross_val_score(rfc, wine.data, wine.target, cv=10).mean() # 测试10次
rfc_s1.append(rfc_s)
clf = DecisionTreeClassifier()
clf_s = cross_val_score(clf, wine.data, wine.target, cv=10).mean()
clf_s1.append(clf_s)
plt.plot(range(1, 11), rfc_s1, label='randomForest')
plt.plot(range(1, 11), clf_s1, label='DecisionTree')
plt.legend()
plt.show()
print(max(rfc_s1), rfc_s1.index(max(rfc_s1)))