【Python】机器学习模型在红酒质量等级评价中的案例应用
【Python】机器学习模型在红酒质量等级评价中的案例应用
文章目录
- 【Python】机器学习模型在红酒质量等级评价中的案例应用
- 1.红酒质量数据
- 2. 划分数据并训练机器学习模型
1.红酒质量数据
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")
df = pd.read_csv('white wine data.csv')
df
| fixed acidity | volatile acidity | citric acid | residual sugar | chlorides | free sulfur dioxide | total sulfur dioxide | density | pH | sulphates | alcohol | quality | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 7.4 | 0.700 | 0.00 | 1.9 | 0.076 | 11.0 | 34.0 | 0.9978 | 3.51 | 0.56 | 9.4 | 5 |
| 1 | 7.8 | 0.880 | 0.00 | 2.6 | 0.098 | 25.0 | 67.0 | 0.9968 | 3.20 | 0.68 | 9.8 | 5 |
| 2 | 7.8 | 0.760 | 0.04 | 2.3 | 0.092 | 15.0 | 54.0 | 0.9970 | 3.26 | 0.65 | 9.8 | 5 |
| 3 | 11.2 | 0.280 | 0.56 | 1.9 | 0.075 | 17.0 | 60.0 | 0.9980 | 3.16 | 0.58 | 9.8 | 6 |
| 4 | 7.4 | 0.700 | 0.00 | 1.9 | 0.076 | 11.0 | 34.0 | 0.9978 | 3.51 | 0.56 | 9.4 | 5 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2192 | 6.4 | 0.450 | 0.07 | 1.1 | 0.030 | 10.0 | 131.0 | 0.9905 | 2.97 | 0.28 | 10.8 | 5 |
| 2193 | 6.4 | 0.475 | 0.06 | 1.0 | 0.030 | 9.0 | 131.0 | 0.9904 | 2.97 | 0.29 | 10.8 | 5 |
| 2194 | 6.3 | 0.270 | 0.38 | 0.9 | 0.051 | 7.0 | 140.0 | 0.9926 | 3.45 | 0.50 | 10.5 | 7 |
| 2195 | 6.9 | 0.410 | 0.33 | 10.1 | 0.043 | 28.0 | 152.0 | 0.9968 | 3.20 | 0.52 | 9.4 | 5 |
| 2196 | 7.0 | 0.290 | 0.37 | 4.9 | 0.034 | 26.0 | 127.0 | 0.9928 | 3.17 | 0.44 | 10.8 | 6 |
2197 rows × 12 columns
2. 划分数据并训练机器学习模型
X,y = df.iloc[:,0:-1].values,df.iloc[:,-1].values.reshape([-1,1])from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# 对数据进行标准化处理, 主要是X_train
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
ss = ss.fit(X_train)
X_train_std = ss.fit_transform(X_train)
X_test_std = ss.fit_transform(X_test)
# ss1 = StandardScaler()
# ss1 = ss1.fit(y_train)
# y_train_std = ss1.fit_transform(y_train)
# y_test_std = ss1.fit_transform(y_test)
#--------------- Modllong# SVM Classifier
def svm_classifier(train_x, train_y): from sklearn.svm import SVC model = SVC(kernel='rbf', probability=True) model.fit(train_x, train_y) return model # KNN Classifier
def knn_classifier(train_x, train_y): from sklearn.neighbors import KNeighborsClassifier model = KNeighborsClassifier() model.fit(train_x, train_y) return model # Logistic Regression Classifier
def logistic_regression_classifier(train_x, train_y): from sklearn.linear_model import LogisticRegression model = LogisticRegression(penalty='l2') model.fit(train_x, train_y) return model # Random Forest Classifier
def random_forest_classifier(train_x, train_y): from sklearn.ensemble import RandomForestClassifier model = RandomForestClassifier(n_estimators=8) model.fit(train_x, train_y) return modeltrain_x = X_train_std
train_y = y_trainmodel_svc = svm_classifier(train_x, train_y)
model_knn = knn_classifier(train_x, train_y)
model_logistic = logistic_regression_classifier(train_x, train_y)
model_rf = random_forest_classifier(train_x, train_y)
# ----------y_svc = model_svc.predict(X_test_std)
y_knn = model_knn.predict(X_test_std)
y_logistic = model_logistic.predict(X_test_std)
y_rf = model_rf.predict(X_test_std)# 结果分析from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_scoreprint('分类准确率为:',accuracy_score(y_test,y_svc),accuracy_score(y_test,y_knn),accuracy_score(y_test,y_logistic),accuracy_score(y_test,y_rf))
print('宏平均准确率:',precision_score(y_test,y_svc,average='macro'),precision_score(y_test,y_knn,average='macro'),precision_score(y_test,y_logistic,average='macro'),precision_score(y_test,y_rf,average='macro'))
print('微平均准确率:',precision_score(y_test,y_svc,average='micro'),precision_score(y_test,y_knn,average='micro'),precision_score(y_test,y_logistic,average='micro'),precision_score(y_test,y_rf,average='micro'))
print('宏平均召回率为:',recall_score(y_test,y_svc,average='macro'),recall_score(y_test,y_knn,average='macro'),recall_score(y_test,y_logistic,average='macro'),recall_score(y_test,y_rf,average='macro'))
print('微平均召回率为:',recall_score(y_test,y_svc,average='micro'),recall_score(y_test,y_knn,average='micro'),recall_score(y_test,y_logistic,average='micro'),recall_score(y_test,y_rf,average='micro'))
print('宏平均f1值为:',f1_score(y_test,y_svc,average='macro'),f1_score(y_test,y_knn,average='macro'),f1_score(y_test,y_logistic,average='macro'),f1_score(y_test,y_rf,average='macro'))
print('微平均f1值为:',f1_score(y_test,y_svc,average='micro'),f1_score(y_test,y_knn,average='micro'),f1_score(y_test,y_logistic,average='micro'),f1_score(y_test,y_rf,average='micro'))
# 误差评估
分类准确率为: 0.5818181818181818 0.5515151515151515 0.5575757575757576 0.6075757575757575
宏平均准确率: 0.280583340709923 0.2986745934975547 0.3499023740988492 0.3984623113419726
微平均准确率: 0.5818181818181818 0.5515151515151515 0.5575757575757576 0.6075757575757575
宏平均召回率为: 0.27642774299410267 0.27744523345842165 0.2858453966079045 0.3401924573344921
微平均召回率为: 0.5818181818181818 0.5515151515151515 0.5575757575757576 0.6075757575757575
宏平均f1值为: 0.27488434754737406 0.27950149117164064 0.29807537284434943 0.3552209623496858
微平均f1值为: 0.5818181818181818 0.5515151515151515 0.5575757575757576 0.6075757575757575
本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!
