import pandas as pd
from apyori import apriori
import matplotlib.pyplot as plt
data = pd.read_csv('文具市场购物篮案例练习/工作/StationeryOrder.csv', encoding='gbk')
data.head(10)
data.info()
data[data.duplicated()]
data.describe()
def conversion_data(s):
return [s]
data['stationery'] = data['stationery'].agg(conversion_data)
new_data = data.groupby('id').sum().reset_index()
results = apriori(new_data['stationery'], min_support=0.02,
min_confidence=0.45, min_lift=1)
extract_result = []
for result in results:
support = round(result.support, 3)
for rule in result.ordered_statistics:
head_set = list(rule.items_base)
tail_set = list(rule.items_add)
if head_set == []:
continue
confidence = round(rule.confidence, 3)
lift = round(rule.lift, 3)
extract_result.append([head_set, tail_set, support, confidence, lift])
result_df = pd.DataFrame(extract_result, columns=[
'前件', '后件', '支持度', '置信度', '提升度'])
result_df['前件'] = result_df['前件'].astype('str')
result_df['后件'] = result_df['后件'].astype('str')
gel_pens = result_df[result_df['后件'] == "['中性笔']"]
gel_pens = gel_pens.sort_values('支持度')
df_index = gel_pens.reset_index().index
width = 0.2
x = df_index
x1 = x-width/2
x2 = x+width/2
y1 = gel_pens['支持度']
y2 = gel_pens['置信度']
plt.rcParams['font.family'] = ['sans-serif']
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.figure(figsize=(18, 10))
plt.bar(x1, y1, width=width)
plt.bar(x2, y2, width=width)
plt.title('“中性笔”对应前件的支持度、置信度数值比较', fontsize=20)
plt.xticks(x, gel_pens['前件'])
plt.xlabel('前件', fontsize=15)
plt.ylabel('数值', fontsize=15)
plt.ylim(0, 0.6)
plt.legend(['支持度', '置信度'], fontsize=15)
for a, b in zip(x1, y1):
plt.text(a, b, str(round(b*100, 1))+'%',
ha='center', va='bottom', fontsize=12)
for a, b in zip(x2, y2):
plt.text(a, b, str(round(b*100, 1))+'%',
ha='center', va='bottom', fontsize=12)