【Python】机器学习模型在红酒质量等级评价中的案例应用

【Python】机器学习模型在红酒质量等级评价中的案例应用

文章目录

  • 【Python】机器学习模型在红酒质量等级评价中的案例应用
    • 1.红酒质量数据
    • 2. 划分数据并训练机器学习模型

1.红酒质量数据

import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")
df = pd.read_csv('white wine data.csv')
df
fixed acidityvolatile aciditycitric acidresidual sugarchloridesfree sulfur dioxidetotal sulfur dioxidedensitypHsulphatesalcoholquality
07.40.7000.001.90.07611.034.00.99783.510.569.45
17.80.8800.002.60.09825.067.00.99683.200.689.85
27.80.7600.042.30.09215.054.00.99703.260.659.85
311.20.2800.561.90.07517.060.00.99803.160.589.86
47.40.7000.001.90.07611.034.00.99783.510.569.45
.......................................
21926.40.4500.071.10.03010.0131.00.99052.970.2810.85
21936.40.4750.061.00.0309.0131.00.99042.970.2910.85
21946.30.2700.380.90.0517.0140.00.99263.450.5010.57
21956.90.4100.3310.10.04328.0152.00.99683.200.529.45
21967.00.2900.374.90.03426.0127.00.99283.170.4410.86

2197 rows × 12 columns

2. 划分数据并训练机器学习模型

X,y = df.iloc[:,0:-1].values,df.iloc[:,-1].values.reshape([-1,1])from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# 对数据进行标准化处理, 主要是X_train
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
ss = ss.fit(X_train)
X_train_std = ss.fit_transform(X_train)
X_test_std = ss.fit_transform(X_test)
# ss1 = StandardScaler()
# ss1 = ss1.fit(y_train)
# y_train_std = ss1.fit_transform(y_train)
# y_test_std = ss1.fit_transform(y_test)
#---------------  Modllong# SVM Classifier  
def svm_classifier(train_x, train_y):  from sklearn.svm import SVC  model = SVC(kernel='rbf', probability=True)  model.fit(train_x, train_y)  return model # KNN Classifier  
def knn_classifier(train_x, train_y):  from sklearn.neighbors import KNeighborsClassifier  model = KNeighborsClassifier()  model.fit(train_x, train_y)  return model  # Logistic Regression Classifier  
def logistic_regression_classifier(train_x, train_y):  from sklearn.linear_model import LogisticRegression  model = LogisticRegression(penalty='l2')  model.fit(train_x, train_y)  return model # Random Forest Classifier  
def random_forest_classifier(train_x, train_y):  from sklearn.ensemble import RandomForestClassifier  model = RandomForestClassifier(n_estimators=8)  model.fit(train_x, train_y)  return modeltrain_x = X_train_std
train_y = y_trainmodel_svc = svm_classifier(train_x, train_y)
model_knn = knn_classifier(train_x, train_y)
model_logistic =  logistic_regression_classifier(train_x, train_y)
model_rf = random_forest_classifier(train_x, train_y)
# ----------y_svc = model_svc.predict(X_test_std)
y_knn = model_knn.predict(X_test_std)
y_logistic = model_logistic.predict(X_test_std)
y_rf = model_rf.predict(X_test_std)# 结果分析from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import  precision_score
from sklearn.metrics import recall_scoreprint('分类准确率为:',accuracy_score(y_test,y_svc),accuracy_score(y_test,y_knn),accuracy_score(y_test,y_logistic),accuracy_score(y_test,y_rf))
print('宏平均准确率:',precision_score(y_test,y_svc,average='macro'),precision_score(y_test,y_knn,average='macro'),precision_score(y_test,y_logistic,average='macro'),precision_score(y_test,y_rf,average='macro'))
print('微平均准确率:',precision_score(y_test,y_svc,average='micro'),precision_score(y_test,y_knn,average='micro'),precision_score(y_test,y_logistic,average='micro'),precision_score(y_test,y_rf,average='micro'))
print('宏平均召回率为:',recall_score(y_test,y_svc,average='macro'),recall_score(y_test,y_knn,average='macro'),recall_score(y_test,y_logistic,average='macro'),recall_score(y_test,y_rf,average='macro'))
print('微平均召回率为:',recall_score(y_test,y_svc,average='micro'),recall_score(y_test,y_knn,average='micro'),recall_score(y_test,y_logistic,average='micro'),recall_score(y_test,y_rf,average='micro'))
print('宏平均f1值为:',f1_score(y_test,y_svc,average='macro'),f1_score(y_test,y_knn,average='macro'),f1_score(y_test,y_logistic,average='macro'),f1_score(y_test,y_rf,average='macro'))
print('微平均f1值为:',f1_score(y_test,y_svc,average='micro'),f1_score(y_test,y_knn,average='micro'),f1_score(y_test,y_logistic,average='micro'),f1_score(y_test,y_rf,average='micro'))
# 误差评估
分类准确率为: 0.5818181818181818 0.5515151515151515 0.5575757575757576 0.6075757575757575
宏平均准确率: 0.280583340709923 0.2986745934975547 0.3499023740988492 0.3984623113419726
微平均准确率: 0.5818181818181818 0.5515151515151515 0.5575757575757576 0.6075757575757575
宏平均召回率为: 0.27642774299410267 0.27744523345842165 0.2858453966079045 0.3401924573344921
微平均召回率为: 0.5818181818181818 0.5515151515151515 0.5575757575757576 0.6075757575757575
宏平均f1值为: 0.27488434754737406 0.27950149117164064 0.29807537284434943 0.3552209623496858
微平均f1值为: 0.5818181818181818 0.5515151515151515 0.5575757575757576 0.6075757575757575


本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!

相关文章

立即
投稿

微信公众账号

微信扫一扫加关注

返回
顶部