python计算四分位及绘制箱型图

一、计算四分位

#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:ZSW
@file:quantile_distance.py
@time:2021/02/05
"""import pandas as pd
import numpy as np# 读取excle文件
excel_data = pd.read_excel("1.xlsx")
# print(excel_data)data1 = excel_data.to_dict(orient="list")
data_list = data1['time']
data_list = sorted(data_list)# 法一:使用numpy工具包
res = np.percentile(data_list, (25, 50, 75), interpolation='midpoint')
print(res)# 法二:直接计算
def median(x):x = sorted(x)length = len(x)mid, rem = divmod(length, 2)  # divmod函数返回商和余数if rem:return x[:mid], x[mid + 1:], x[mid]else:return x[:mid], x[mid:], (x[mid - 1] + x[mid]) / 2lHalf, rHalf, q2 = median(data_list)
print('上四分位:{}'.format(median(lHalf)[2]))
print('中位数:{}'.format(q2))
print('下四分位:{}'.format(median(rHalf)[2]))
print('四分位距:{}'.format(median(rHalf)[2]-median(lHalf)[2]))
res = (median(rHalf)[2]-median(lHalf)[2])*1.5 + median(rHalf)[2]
print('异常阈值:{}'.format(res))# 法三:使用pandas中describe()函数
print(pd.DataFrame(data_list).describe())

结果:

[0.12638889 0.59694444 2.32527778]
上四分位:0.126388888888889
中位数:0.596944444444444
下四分位:2.3252777777777798
四分位距:2.1988888888888907
异常阈值:5.6236111111111150
count  37932.000000
mean       3.041283
std        5.612899
min        0.000000
25%        0.126389
50%        0.596944
75%        2.325139
max       25.071944

二、绘制箱型图

#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:ZSW
@file:plot_quantile_distance.py
@time:2021/02/05
"""import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl# 解决matplotlib无法显示中文问题
mpl.rcParams["font.sans-serif"] = ["SimHei"]
mpl.rcParams["axes.unicode_minus"] = Falsedf = pd.read_excel("1.xlsx")data = df['avg_time']
data.plot.box(title="箱型图")
plt.show()

                                                                                             

                                                                                                    

三、绘制多箱型图

import pandas as pd
from pylab import *rcParams['axes.unicode_minus'] = False
rcParams['font.sans-serif'] = ['Simhei']df = pd.read_excel("5.xlsx", sheet_name='Sheet1')
data1 = df.to_dict(orient="list")
df = pd.DataFrame(data1)
print(df.describe())  # 显示中位数、上下四分位数、标准偏差等内容
box_1 = data1['A类']
labels = '1'
plt.subplot(241)
plt.boxplot([box_1], notch=False, labels=labels, patch_artist = False, boxprops={'color':'black','linewidth':'2.0'},capprops={'color':'black','linewidth':'2.0'})
plt.xlabel("A类", fontsize=16)
plt.ylabel('时长(时)', fontsize=16)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)df = pd.read_excel("5.xlsx", sheet_name='Sheet2')
data1 = df.to_dict(orient="list")
df = pd.DataFrame(data1)
print(df.describe())  # 显示中位数、上下四分位数、标准偏差等内容
box_1 = data1['B类']
labels = '2'
plt.subplot(242)
plt.boxplot([box_1], notch=False, labels=labels, patch_artist=False, boxprops={'color':'black','linewidth':'2.0'},capprops={'color': 'black', 'linewidth':'2.0'})
plt.xlabel("B类",fontsize=16)
plt.ylabel('时长(时)',fontsize=16)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)df = pd.read_excel("5.xlsx", sheet_name='Sheet3')
data1 = df.to_dict(orient="list")
df = pd.DataFrame(data1)
print(df.describe())  # 显示中位数、上下四分位数、标准偏差等内容
box_1 = data1['C类']
labels = '3'
plt.subplot(243)
plt.boxplot([box_1], notch=False, labels=labels, patch_artist=False, boxprops={'color':'black','linewidth':'2.0'},capprops={'color':'black','linewidth':'2.0'})
plt.xlabel("C类",fontsize=16)
plt.ylabel('时长(时)',fontsize=16)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)df = pd.read_excel("5.xlsx", sheet_name='Sheet4')
data1 = df.to_dict(orient="list")
df = pd.DataFrame(data1)
print(df.describe())  # 显示中位数、上下四分位数、标准偏差等内容
box_1 = data1['D类']
labels = '4'
plt.subplot(244)
plt.boxplot([box_1], notch=False, labels=labels,patch_artist=False, boxprops={'color':'black','linewidth':'2.0'},capprops={'color':'black','linewidth':'2.0'})
plt.xlabel("D类",fontsize=16)
plt.ylabel('时长(时)',fontsize=16)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)df = pd.read_excel("5.xlsx", sheet_name='Sheet5')
data1 = df.to_dict(orient="list")
df = pd.DataFrame(data1)
print(df.describe())  # 显示中位数、上下四分位数、标准偏差等内容
box_1 = data1['E类']
labels = '5'
plt.subplot(245)
plt.boxplot([box_1], notch=False, labels=labels, patch_artist=False, boxprops={'color':'black','linewidth':'2.0'},capprops={'color':'black','linewidth':'2.0'})
plt.xlabel("E类",fontsize=16)
plt.ylabel('时长(时)',fontsize=16)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)df = pd.read_excel("5.xlsx", sheet_name='Sheet6')
data1 = df.to_dict(orient="list")
df = pd.DataFrame(data1)
print(df.describe())  # 显示中位数、上下四分位数、标准偏差等内容
box_1 = data1['F类']
labels = '6'
plt.subplot(246)
plt.boxplot([box_1], notch=False, labels=labels, patch_artist=False, boxprops={'color':'black','linewidth':'2.0'},capprops={'color':'black','linewidth':'2.0'})
plt.xlabel("F类",fontsize=16)
plt.ylabel('时长(时)',fontsize=16)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)df = pd.read_excel("5.xlsx", sheet_name='Sheet7')
data1 = df.to_dict(orient="list")
df = pd.DataFrame(data1)
print(df.describe())  # 显示中位数、上下四分位数、标准偏差等内容
box_1 = data1['G类']
labels = '8'
plt.subplot(247)
plt.boxplot([box_1], notch=False, labels=labels, patch_artist=False, boxprops={'color':'black','linewidth':'2.0'},capprops={'color':'black','linewidth':'2.0'})
plt.xlabel("G类",fontsize=16)
plt.ylabel('时长(时)',fontsize=16)
plt.xticks(fontsize=20)
plt.yticks(fontsize=20)plt.show()

结果:

                                   

    

参考:http://www.79tui.com/happy/605215.html

         https://blog.csdn.net/sinat_28252525/article/details/80671238

        https://blog.csdn.net/qq_27217709/article/details/89483025


本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!

相关文章

立即
投稿

微信公众账号

微信扫一扫加关注

返回
顶部