import pandas as pd
import graphviz
data = pd.read_excel(r'tietan.xls')
print(data)
feature = data.loc[:, ['pclass','age','sex']]
print(feature.info())
feature.fillna({'age':feature['age'].mean()}, inplace=True)X = feature.to_dict(orient='records')
print(X)
from sklearn.feature_extraction import DictVectorizerdic = DictVectorizer(sparse=False) x = dic.fit_transform(X)
feature_name = dic.get_feature_names()
y = data['survived']from sklearn.model_selection import train_test_splitx_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8)
from sklearn.tree import DecisionTreeClassifier
dc = DecisionTreeClassifier(criterion='entropy', max_depth=5 )
dc.fit(x_train, y_train)
dc.predict(x_test)
print(dc.score(x_test, y_test))
from sklearn import treetree.export_graphviz(dc, 'tree.dot')
a = graphviz.Source('tree.dot')
print(a)
本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!