import os
import sys
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from math import *
import json
plt.rcParams['font.sans-serif'] = 'SimHei' # 设置中文显示
plt.rcParams['axes.unicode_minus'] = False
root_path = r"D:\win10浏览器\CUMCM2023Problems\C题"
def get_path(str):
return root_path + '\\' + str
# 共251种作物
def read_datas(path_data1, path_data2, path_data3, path_data2_sort):
data2 = pd.read_excel(path_data2)
data1 = pd.read_excel(path_data1)
data3 = pd.read_excel(path_data3)
data22 = pd.read_excel(path_data2_sort)
return data1, data2, data3, data22
class Data():
def __init__(self, path_data1=r"D:\win10浏览器\CUMCM2023Problems\C题\附件1.xlsx",
path_data2=r"D:\win10浏览器\CUMCM2023Problems\C题\附件2_做第一题简化版.xlsx",
path_data3=r"D:\win10浏览器\CUMCM2023Problems\C题\附件3_去除改版.xlsx",
path_data2_sort=r"D:\win10浏览器\CUMCM2023Problems\C题\附件2_打折与否排序.xlsx",
read_ori_plus=False
):
self.data1, self.data2, self.data3, self.data22 = read_datas(path_data1, path_data2, path_data3,
path_data2_sort)
self.types = dict()
self.data1_values = self.data1.values
self.data2_values = self.data2.values
self.data3_values = self.data3.values
self.data22_values = self.data22.values
self.data2_values_fullSort = self.data22_values
self.read_ori_plus = read_ori_plus
def function_integration(self):
self.function1()
self.function2()
# self.record_one_food_function()
# dict[单位编号]=序列号
def function1(self):
rows_type = dict() # dict[单位编号]=序列号
i = -1
type_names = list()
for row in self.data1_values:
i += 1
rows_type[int(row[0])] = i
type_names.append(row[1])
self.type_names = type_names
self.rows_type = rows_type
# 保存文件
tf = open(get_path("names_type.json"), "w")
json.dump(type_names, tf)
tf.close()
tf = open(get_path("findIndexFromID.json"), "w")
json.dump(rows_type, tf)
tf.close()
# 共6种大类 dict[分类]=list(编码)
# dict[小id]=爸爸id
def function2(self):
rows_type_big = dict() # dict[分类]=list(编码)
typeBig_names = list()
find_bigtype_fromsmallID = dict() # dict[小id]=爸爸id
typeBig_ids = list() # 大类的id
before_typeBig = 123
for row in self.data1_values:
big_id = int(row[2])
big_name = row[3]
id = int(row[0])
if before_typeBig != big_id:
before_typeBig = big_id
typeBig_names.append(big_name)
typeBig_ids.append(big_id)
rows_type_big[big_id] = list()
rows_type_big[big_id].append(id)
find_bigtype_fromsmallID[id] = big_id
self.typeBig_ids = typeBig_ids
self.rows_type_big = rows_type_big # dict[分类]=list(编码)
self.typeBig_names = typeBig_names
self.find_bigtype_fromsmallID = find_bigtype_fromsmallID
# 保存文件
tf = open(get_path("findIndexFromBigID.json"), "w")
json.dump(rows_type_big, tf)
tf.close()
tf = open(get_path("names_bigType.json"), "w")
json.dump(typeBig_names, tf)
tf.close()
tf = open(get_path("listBig_ids.json"), "w")
json.dump(typeBig_ids, tf)
tf.close()
tf = open(get_path("find_bigtype_fromsmallID.json"), "w")
json.dump(find_bigtype_fromsmallID, tf)
tf.close()
# 用于求得单个品种食物每天的销量
# 用于求得单个大品类每天的销量
def record_one_food_function(self):
time = '0'
record_one_type = np.zeros(shape=(251, 1085))
record_one_bigtype = np.zeros(shape=(6, 1085))
i = -1
record_time = list()
for current in self.data2_values:
current_time = current[0]
current_id = int(current[1])
current_weight = float(current[2])
if time != current_time:
time = current_time
i += 1 # index
record_time.append(time)
food_number = self.rows_type[current_id]
# print(food_number,self.type_names[food_number],current_id)
record_one_type[food_number][i] += current_weight
big_id = self.find_bigtype_fromsmallID[current_id]
index_bigId = self.typeBig_ids.index(big_id)
# print(current_id,index_bigId,big_id,self.typeBig_names[index_bigId])
record_one_bigtype[index_bigId][i] += current_weight
self.record_one_type = record_one_type
self.record_one_bigtype = record_one_bigtype
self.record_time = record_time
# 用于求得单个品种的每天销量
# result=1085
def function_time_count(self):
# 看有几天
sum_time = 0
current = '0'
for row in self.data2_values:
if current != row[0]:
current = row[0]
sum_time += 1
print(sum_time)
# 按行遍历
current_time = '0'
for row in self.data2_values:
if current_time != row[0]:
current_time = row[0]
# 计算每日物品的进货价
def function_deal_data3(self):
values3 = self.data3_values
times3_list = list()
record_cost = np.zeros(shape=(251, 1085))
time = '0'
i = -1
for current in values3:
current_time = current[0]
current_id = int(current[1])
current_cost = float(current[2])
if time != current_time:
time = current_time
i += 1
times3_list.append(time)
food_number = self.rows_type[current_id]
record_cost[food_number][i] = current_cost
# save
np.save(r'D:\win10浏览器\CUMCM2023Problems\C题\record_cost.npy', record_cost)
np.save(r"D:\win10浏览器\CUMCM2023Problems\C题\time3_list.npy", np.array(times3_list))
# 时间Int化
def deal_with_time_save(self):
self.time_Int = time_Int(self.time3_list)
np.save(r'D:\win10浏览器\CUMCM2023Problems\C题\time_Int.npy', self.time_Int)
# 将每天的每个品类和每个单品的销量进行记录
def save_array(self):
np.save(r'D:\win10浏览器\CUMCM2023Problems\C题\record_one_type.npy', self.record_one_type)
np.save(r'D:\win10浏览器\CUMCM2023Problems\C题\record_one_bigtype.npy', self.record_one_bigtype)
def load_array(self):
# record_one_food_function
self.record_one_type = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\record_one_type.npy')
self.record_one_bigtype = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\record_one_bigtype.npy')
self.record_cost = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\record_cost.npy')
self.time3_list = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\time3_list.npy', allow_pickle=True)
if self.read_ori_plus == True:
self.plus_type = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\plus_type.npy')
self.weight_type_noDiscount = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\weight_type_noDiscount.npy')
self.weight_type_Discount = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\weight_type_Discount.npy')
self.plus_lower_type = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\plus_lower_type.npy')
self.plus_Bigtype = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\plus_Bigtype.npy')
self.weight_Bigtype_noDiscount = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\weight_Bigtype_noDiscount.npy')
self.weight_Bigtype_Discount = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\weight_Bigtype_Discount.npy')
self.plus_lower_Bigtype = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\plus_lower_Bigtype.npy')
self.time_Int = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\time_Int.npy')
self.time_Int_list = list(self.time_Int)
# function1
tf = open(get_path("names_type.json"), "r")
new_dict = json.load(tf) # list
self.names_type = new_dict
tf.close()
self.type_names = new_dict
tf = open(get_path("findIndexFromID.json"), "r")
new_dict = json.load(tf)
self.findIndexFromID = new_dict # dict
tf.close()
self.rows_type = new_dict
# function2
tf = open(get_path("findIndexFromBigID.json"), "r")
new_dict = json.load(tf) #
self.findIndexFromBigID = new_dict
tf.close()
self.rows_type_big = new_dict # dict[分类]=list(编码)
tf = open(get_path("names_bigType.json"), "r")
new_dict = json.load(tf)
self.names_bigType = new_dict # dict
tf.close()
self.typeBig_names = new_dict
tf = open(get_path("listBig_ids.json"), "r")
new_dict = json.load(tf) # list
self.listBig_ids = new_dict
tf.close()
self.typeBig_ids = new_dict
tf = open(get_path("find_bigtype_fromsmallID.json"), "r")
new_dict = json.load(tf)
self.find_bigtype_fromsmallID = new_dict # dict
tf.close()
self.find_bigtype_fromsmallID = new_dict # 没变
# 计算加成值
def addition(self):
pass
def save_array2(self):
pass
def time3_2Test(d):
times2 = d.record_time
times3 = d.time3_list
index = 0
for time2 in times2:
time3 = times3[index]
if time2.year != time3.year or time2.month != time3.month or time2.day != time3.day:
print('error')
print(
'({} {})({} {})({} {})'.format(time2.year, time3.year, time2.month, time3.month, time2.day, time3.day))
break
index += 1
print('noError')
# 热力图
def heat_map_type(d):
np.set_printoptions(threshold=np.inf)
# print(d.record_one_type[:,0])
# print(d.record_one_type[:,-1])
# print(d.record_one_bigtype[0,:])
corr = pd.DataFrame(d.record_one_type)
corr = corr.corr()
# print(corr)
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
p1 = sns.heatmap(corr, xticklabels=d.type_names, yticklabels=d.type_names, cmap='viridis')
ax.set_title('Heat Map2')
s1 = p1.get_figure()
plt.show()
s1.savefig(r'D:\win10浏览器\CUMCM2023Problems\C题\HeatMap_small.jpg', dpi=300, bbox_inches='tight')
print('ok')
# 计算相关系数
def coff(d):
corr = pd.DataFrame(d.record_one_type.T).corr()
print(corr)
corr = np.array(corr)
corr_sort = corr.reshape(-1)
corr_sort = np.delete(corr_sort, np.where(np.isnan(corr_sort)))
corr_sort = np.sort(corr_sort)
finall_num = 16 * 2
finall_corr = corr_sort[len(corr_sort) - finall_num - 251]
print('finall_corr', finall_corr)
record_best_list = list()
m, n = np.shape(corr)
for i in range(m):
for j in range(i + 1, n):
if corr[i][j] != np.nan and abs(corr[i][j]) >= finall_corr:
temp = SN()
temp.data = corr[i][j]
temp.name1 = d.type_names[i]
temp.name2 = d.type_names[j]
record_best_list.append(temp)
print('最高相关系数')
for sn in record_best_list:
print('{} {} 相关系数为{}'.format(sn.name1, sn.name2, sn.data))
class SN:
def __init__(self):
self.data = 0
self.name1 = ''
self.name2 = ''
from sklearn.cluster import DBSCAN
# 欧式距离
def dist(x1, x2):
return sqrt(((x1 - x2) ** 2).sum())
def eps_choice_k_distance(d):
k = 4
dist_k = np.zeros(251)
dist_all = np.zeros(shape=(251, 251))
for i in range(251):
for j in range(251):
dis = dist(d.record_one_type[i, :], d.record_one_type[j, :])
dist_all[i][j] = dis
dist_all.sort(axis=1)
dist_k = dist_all[:, k]
dist_k.sort()
dist_k = dist_k[::-1]
plt.ylabel('{}-distance'.format(k))
plt.xlabel('goods')
x = range(len(dist_k))
plt.plot(dist_k)
plt.show()
def DBSCAN_type(d):
eps = 300
min_samples = 2
db = DBSCAN(eps=280, min_samples=min_samples).fit(d.record_one_type)
labels = db.labels_
print(labels)
from sklearn.cluster import KMeans
def KMS_type(d, k=80):
kms = KMeans(n_clusters=k, random_state=0).fit(d.record_one_type)
labels = kms.labels_
# record_cluster = labels.sorted()
# 首先剔除常用少量蔬菜
count = np.bincount(labels)
biggest_count = count.max()
biggest_count_number = np.where(biggest_count == count)[0][0]
# 其次剔除单个元素
cluster_need_index = list()
cluster_need_count = list()
index = -1
for i in count:
index += 1
if i <= 1 or i == biggest_count:
pass
else:
cluster_need_index.append(index) # 簇名
cluster_need_count.append(i) # 出现次数
print('最大簇的簇的簇标记为{},其内商品有:'.format(biggest_count_number))
cluster_Biggest = np.where(labels == biggest_count_number)[0]
for i in cluster_Biggest:
print(d.type_names[i])
# 处理有效簇
print('以下为有效簇的信息:')
for i in range(len(cluster_need_index)):
print('簇标记为{},簇内有商品{}个,分别为:'.format(cluster_need_index[i], cluster_need_count[i]))
cluster_current = np.where(labels == cluster_need_index[i])[0]
for j in cluster_current:
print('{}'.format(d.type_names[j], end=' '))
cluster_set = np.zeros(shape=(cluster_need_count[i], 1085))
for j in range(len(cluster_current)):
cluster_set[j, :] = d.record_one_type[cluster_current[j], :]
# print(cluster_set)
coff = np.corrcoef(cluster_set)
print('其相关系数为')
print(coff)
def KMS_type_show(d, k=80):
kms = KMeans(n_clusters=k, random_state=0).fit(d.record_one_type)
labels = kms.labels_
# record_cluster = labels.sorted()
# 首先剔除常用少量蔬菜
count = np.bincount(labels)
biggest_count = count.max()
biggest_count_number = np.where(biggest_count == count)[0][0]
# 其次剔除单个元素
cluster_need_index = list()
cluster_need_count = list()
index = -1
for i in count:
index += 1
if i <= 1 or i == biggest_count:
pass
else:
cluster_need_index.append(index) # 簇名
cluster_need_count.append(i) # 出现次数
print('最大簇的簇的簇标记为{},其内商品有:'.format(biggest_count_number))
# goods序号
cluster_Biggest = np.where(labels == biggest_count_number)[0]
cluster_set = np.zeros(shape=(biggest_count, 1085))
names_cs = list()
for i in range(len(cluster_Biggest)):
print(d.type_names[cluster_Biggest[i]])
cluster_set[i, :] = d.record_one_type[cluster_Biggest[i], :]
names_cs.append(d.type_names[cluster_Biggest[i]])
# show
x = np.arange(1085)
x = np.expand_dims(x, 0).repeat(biggest_count, axis=0)
plt.plot(x.T, cluster_set.T)
plt.legend(names_cs, loc="best")
plt.show()
# 处理有效簇
print('以下为有效簇的信息:')
for i in range(len(cluster_need_index)):
print('簇标记为{},簇内有商品{}个,分别为:'.format(cluster_need_index[i], cluster_need_count[i]))
cluster_current = np.where(labels == cluster_need_index[i])[0]
names_cs = list()
cluster_set = np.zeros(shape=(cluster_need_count[i], 1085))
index = -1
for j in cluster_current:
index += 1
print('{}'.format(d.type_names[j], end=' '))
names_cs.append(d.type_names[j])
cluster_set[index, :] = d.record_one_type[j, :]
# show
x = np.arange(1085)
x = np.expand_dims(x, 0).repeat(cluster_need_count[i], axis=0)
plt.plot(x.T, cluster_set.T)
plt.legend(names_cs, loc="best")
plt.show()
def show_bigtype(d):
x = np.arange(1085)
x = np.expand_dims(x, 0).repeat(6, axis=0)
plt.plot(x.T, d.record_one_bigtype.T)
plt.legend(d.typeBig_names, loc="best")
plt.show()
def mounth_analysis():
mounth_record = np.zeros(shape=(6, 12))
two_season = np.zeros(shape=(6, 12))
record = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\record_one_bigtype.npy')
day_record = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\time3_list.npy', allow_pickle=True)
for i in range(6):
current_month = 7
index = 0
for j in range(len(day_record)):
if current_month != day_record[j].month:
current_month = day_record[j].month
mounth_record[i][current_month - 1] += record[i][j]
for i in range(6):
kms = KMeans(n_clusters=2, random_state=0).fit(mounth_record[i, :].reshape(-1, 1))
two_season[i, :] = kms.labels_
np.save(r"D:\win10浏览器\CUMCM2023Problems\C题\two_season.npy", two_season)
print(two_season)
# 计算成本加成定价的系数
def cost_plus_pricing(d):
"""
plus_type = np.zeros(shape=(251, 1085))#美日加成
weight_type_noDiscount = np.zeros(shape=(251, 1085))#不打折销量
weight_type_Discount = np.zeros(shape=(251,1085))#打折销量
plus_lower_type = np.zeros(shape=(251,1085))#低价加成
for i in range(831136):#遍历没打折的
current_time = d.data22_values[i][0]
cur_time_Int = time_Int(current_time)
time_index = d.time_Int_list.index(cur_time_Int)
#print(time_index,"time")
current_id = d.data22_values[i][2]
current_weight = float(d.data22_values[i][3])
current_price = float(d.data22_values[i][4])
food_index = d.findIndexFromID[str(current_id)]
weight_type_noDiscount[food_index][time_index]+=current_weight
cost = d.record_cost[food_index][time_index]
#print(cost)
if cost!=0:
plus_type[food_index][time_index] = current_price/cost
#以上代码计算出没打折的销量和成本加成定价
#以下遍历出打折的
for i in range(831137,len(d.data22_values)):
current_time = d.data22_values[i][0]
cur_time_Int = time_Int(current_time)
time_index = d.time_Int_list.index(cur_time_Int)
current_id = d.data22_values[i][2]
current_weight = float(d.data22_values[i][3])
current_price = float(d.data22_values[i][4])
food_index = d.findIndexFromID[str(current_id)]
weight_type_Discount[food_index][time_index] += current_weight
if current_weight>0.005:
print(current_weight)
cost = d.record_cost[food_index][time_index]
if cost!=0:
plus_lower_type[food_index][time_index] = current_price/cost
#最后记录最低价
#以上计算出四个矩阵
#save
np.save(r'D:\win10浏览器\CUMCM2023Problems\C题\plus_type.npy', plus_type)
np.save(r'D:\win10浏览器\CUMCM2023Problems\C题\weight_type_noDiscount.npy', weight_type_noDiscount)
np.save(r'D:\win10浏览器\CUMCM2023Problems\C题\weight_type_Discount.npy', weight_type_Discount)
np.save(r'D:\win10浏览器\CUMCM2023Problems\C题\plus_lower_type.npy', plus_lower_type)
"""
plus_type = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\plus_type.npy')
weight_type_noDiscount = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\weight_type_noDiscount.npy')
weight_type_Discount = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\weight_type_Discount.npy')
plus_lower_type = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\plus_lower_type.npy')
# 小类向大类汇聚
plus_Bigtype = np.zeros(shape=(6, 1085)) # 美日加成
weight_Bigtype_noDiscount = np.zeros(shape=(6, 1085)) # 不打折销量
weight_Bigtype_Discount = np.zeros(shape=(6, 1085)) # 打折销量
plus_lower_Bigtype = np.zeros(shape=(6, 1085)) # 低价加成
for day in range(1085):
big_list_plus = list(list() for i in range(6))
big_list_wei = list(list() for i in range(6))
for food in range(251):
id = d.data1_values[food, 0]
big_id = d.find_bigtype_fromsmallID[str(id)]
index_big = d.typeBig_ids.index(big_id)
# 只有不为0才有效
if plus_type[food, day] > 0.005 and weight_type_noDiscount[food, day] >= 0.005:
big_list_plus[index_big].append(plus_type[food, day])
big_list_wei[index_big].append(weight_type_noDiscount[food, day])
for bigFood in range(6):
sum_wei = sum(big_list_wei[bigFood])
weight_Bigtype_noDiscount[bigFood][day] = sum_wei
if sum_wei != 0 or len(big_list_wei[bigFood]) != 0:
temp = np.sum(np.multiply(big_list_wei[bigFood], big_list_plus[bigFood])) / sum_wei
if temp < 4:
plus_Bigtype[bigFood][day] = temp
# 打折情况
big_list_plus = list(list() for i in range(6))
big_list_wei = list(list() for i in range(6))
for food in range(251):
id = int(d.data1_values[food, 0])
big_id = d.find_bigtype_fromsmallID[str(id)]
index_big = d.typeBig_ids.index(big_id)
# 只有不为0才有效
if plus_lower_type[food, day] > 0.005 and weight_type_Discount[food, day] >= 0.005:
big_list_plus[index_big].append(plus_lower_type[food, day])
big_list_wei[index_big].append(weight_type_Discount[food, day])
for bigFood in range(6):
sum_wei = sum(big_list_wei[bigFood])
weight_Bigtype_Discount[bigFood][day] = sum_wei
# print('a\n',big_list_wei,'b\n',big_list_plus)
if sum_wei != 0 or len(big_list_wei[bigFood]) != 0:
temp = np.sum(np.multiply(big_list_wei[bigFood], big_list_plus[bigFood])) / sum_wei
if temp < 4:
plus_lower_Bigtype[bigFood][day] = temp
# 以上四个矩阵
# save
np.save(r'D:\win10浏览器\CUMCM2023Problems\C题\plus_Bigtype.npy', plus_Bigtype)
np.save(r'D:\win10浏览器\CUMCM2023Problems\C题\weight_Bigtype_noDiscount.npy', weight_Bigtype_noDiscount)
np.save(r'D:\win10浏览器\CUMCM2023Problems\C题\weight_Bigtype_Discount.npy', weight_Bigtype_Discount)
np.save(r'D:\win10浏览器\CUMCM2023Problems\C题\plus_lower_Bigtype.npy', plus_lower_Bigtype)
# 时间序列Int化
def time_Int(li):
# list
if isinstance(li, list):
length = len(li)
time_return = np.zeros(shape=length, dtype=int)
for i in range(length):
time = li[i]
timeInt = int(time.year * 10000 + time.month * 100 + time.day)
time_return[i] = timeInt
return time_return
# Timestamp
elif isinstance(li, pd._libs.tslibs.timestamps.Timestamp):
time = li
return int(time.year * 10000 + time.month * 100 + time.day)
# ndarray
elif isinstance(li, np.ndarray):
li = li.reshape(-1)
length = len(li)
time_return = np.zeros(shape=length, dtype=int)
for i in range(length):
time = li[i]
timeInt = int(time.year * 10000 + time.month * 100 + time.day)
time_return[i] = timeInt
return time_return
# 对转化过的时间求月份
def time_get_mounth(li):
return int(int(li) % 10000 / 100)
def make_month_list():
timeInt = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\time_Int.npy')
li_return = list()
for time in timeInt:
li_return.append(time_get_mounth(time))
print(li_return)
np.save(r'D:\win10浏览器\CUMCM2023Problems\C题\month_list.npy', np.array(li_return, dtype=int))
def weight_plus_show_noDiscount():
two_reason = np.load(r"D:\win10浏览器\CUMCM2023Problems\C题\two_season.npy")
plus_Bty = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\plus_Bigtype.npy')
weight_Bty_noDiscount = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\weight_Bigtype_noDiscount.npy')
weight_Bty_Discount = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\weight_Bigtype_Discount.npy')
plus_lower_Bty = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\plus_lower_Bigtype.npy')
timeInt = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\time_Int.npy')
bigF_nameList = np.load(r"D:\win10浏览器\CUMCM2023Problems\C题\BigType_name_list.npy")
month_npList = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\month_list.npy')
for bigFood in range(6): # 遍历作物
for reason in range(2): # 遍历季节
reason_list = two_reason[bigFood]
indexs = list()
for month in range(12):
if reason_list[month] != reason:
pass
else:
indexs.extend(np.where(month_npList == month + 1)[0])
# print(indexs)
x = plus_Bty[bigFood][indexs]
y = weight_Bty_noDiscount[bigFood][indexs]
plt.title('{}{}reason销量与加成系数散点图'.format(bigF_nameList[bigFood], reason))
plt.xlabel('售价')
plt.ylabel('销售量')
plt.scatter(x, y)
plt.show()
def weight_plus_show():
two_reason = np.load(r"D:\win10浏览器\CUMCM2023Problems\C题\two_season.npy")
plus_Bty = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\plus_Bigtype.npy')
weight_Bty_noDiscount = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\weight_Bigtype_noDiscount.npy')
weight_Bty_Discount = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\weight_Bigtype_Discount.npy')
plus_lower_Bty = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\plus_lower_Bigtype.npy')
timeInt = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\time_Int.npy')
bigF_nameList = np.load(r"D:\win10浏览器\CUMCM2023Problems\C题\BigType_name_list.npy")
month_npList = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\month_list.npy')
save_sn = list(list(list(list() for k in range(2)) for j in range(2)) for i in range(6))
for bigFood in range(6): # 遍历作物
for reason in range(2): # 遍历季节
plt.subplot(1, 2, reason + 1)
reason_list = two_reason[bigFood]
indexs = list()
for month in range(12):
if reason_list[month] != reason:
pass
else:
indexs.extend(np.where(month_npList == month + 1)[0])
# print(indexs)
# print(plus_Bty[bigFood][indexs])
x = np.hstack((plus_Bty[bigFood][indexs], plus_lower_Bty[bigFood][indexs]))
y = np.hstack((weight_Bty_noDiscount[bigFood][indexs], weight_Bty_Discount[bigFood][indexs]))
save_sn[bigFood][reason][0] = x
save_sn[bigFood][reason][1] = y
return save_sn
def weight_plus_without_show():
two_reason = np.load(r"D:\win10浏览器\CUMCM2023Problems\C题\two_season.npy")
plus_Bty = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\plus_Bigtype.npy')
weight_Bty_noDiscount = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\weight_Bigtype_noDiscount.npy')
weight_Bty_Discount = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\weight_Bigtype_Discount.npy')
plus_lower_Bty = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\plus_lower_Bigtype.npy')
timeInt = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\time_Int.npy')
bigF_nameList = np.load(r"D:\win10浏览器\CUMCM2023Problems\C题\BigType_name_list.npy")
month_npList = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\month_list.npy')
save_sn = list(list(list(list() for k in range(2)) for j in range(2)) for i in range(6))
for bigFood in range(6): # 遍历作物
for reason in range(2): # 遍历季节
reason_list = two_reason[bigFood]
indexs = list()
for month in range(12):
if reason_list[month] != reason:
pass
else:
indexs.extend(np.where(month_npList == month + 1)[0])
x = np.hstack((plus_Bty[bigFood][indexs], plus_lower_Bty[bigFood][indexs]))
y = np.hstack((weight_Bty_noDiscount[bigFood][indexs], weight_Bty_Discount[bigFood][indexs]))
print(type(x), type(y))
save_sn[bigFood][reason][0] = x
save_sn[bigFood][reason][1] = y
return save_sn
month_7_reason = [1,1,0,0,1,0]
def preidct_for_q3():
path_cur = "D:\win10浏览器\CUMCM2023Problems\C题\weit_predict.xlsx"
weit_predict = pd.read_excel(path_cur)
wpredict_values = weit_predict.values
wpredict_values = wpredict_values[:,1:]
#print(wpredict_values)
preidct_values = np.zeros(shape=(7,6))
for bigFood in range(6):
result = read_modelAnd_predict(wpredict_values[:,bigFood],bigFood,month_7_reason[bigFood])
#print(result.shape)
preidct_values[:,bigFood] = result
#print(preidct_values)
data_df = pd.DataFrame(preidct_values)
writer = pd.ExcelWriter(get_path('question3_result.xlsx'))
data_df.to_excel(writer, 'page_1')
writer.save()
def weekend_statistics():
timeInt = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\time_Int.npy')
record_one_bigtype = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\record_one_bigtype.npy')
for i in range(1085):
day = (timeInt[i] % 100) % 7
#将可售写入了food_sale,是id值
def can_sale():
data = pd.read_excel(r"D:\win10浏览器\CUMCM2023Problems\C题\24-30批发.xlsx")
data_values = data.values
values_nd = data_values[:, 1]
food_sale = np.unique(values_nd)
np.save(r"D:\win10浏览器\CUMCM2023Problems\C题\food_sale.npy", food_sale)
return food_sale
def before_seven_day():
pass
def findI_food_sixty():
record_one_type = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\record_one_type.npy')
food_sale = np.load(r"D:\win10浏览器\CUMCM2023Problems\C题\food_sale.npy", allow_pickle=True)
food_sale = food_sale.reshape(-1)
tf = open(get_path("findIndexFromID.json"), "r")
findIndexFromID = json.load(tf)
print(len(food_sale))
data_return = np.zeros(shape=(len(food_sale), 1085))
return_index = -1
for id in food_sale:
return_index += 1
index = findIndexFromID[str(id)]
data_return[return_index, :] = record_one_type[index, :]
data_df = pd.DataFrame(data_return.T)
writer = pd.ExcelWriter(r'D:\win10浏览器\CUMCM2023Problems\C题\pic3\waitForJ2.xlsx')
data_df.to_excel(writer, 'page_1')
writer.save()
def re_count_getMoney():
record_one_type = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\record_one_type.npy')
food_sale = np.load(r"D:\win10浏览器\CUMCM2023Problems\C题\food_sale.npy", allow_pickle=True)
tf = open(get_path("findIndexFromID.json"), "r")
findIndexFromID = json.load(tf)
def read_excel_4():
data4 = pd.read_excel(r"D:\win10浏览器\CUMCM2023Problems\C题\附件4_4.xlsx")
#print('data4\n', data4)
data4_values = data4.values
data4_index_list = list(data4_values[:, 0])
return data4_index_list,data4_values[:,2].reshape(-1)
#最大可选择数为61
max_choice = 61
#优先使用后面的值
def cost_71_function():
data24_30 = pd.read_excel(r"D:\win10浏览器\CUMCM2023Problems\C题\24-30批发.xlsx")
food_sale = np.load(r"D:\win10浏览器\CUMCM2023Problems\C题\food_sale.npy", allow_pickle=True)
food_sale = list(food_sale)
#total 61 size food,get 71_cost
cost_71 = np.zeros(shape=(max_choice))
for row in data24_30.values:
id = int(row[1])
cost = float(row[2])
index = food_sale.index(id)
cost_71[index]=cost
np.save(r"D:\win10浏览器\CUMCM2023Problems\C题\cost_71.npy",cost_71)
print(cost_71)
from scipy.optimize import minimize
#from cvxpy import *
import pulp
from sko.PSO import PSO
def read_excel_71_predict():
data_71 = pd.read_excel(r"D:\win10浏览器\CUMCM2023Problems\C题\71_predict.xlsx")
da71_values = data_71.values.reshape(-1)
return da71_values
#优化目标
food_sale = np.load(r"D:\win10浏览器\CUMCM2023Problems\C题\food_sale.npy", allow_pickle=True)
food_sale = list(food_sale)
cost_71 = np.load(r"D:\win10浏览器\CUMCM2023Problems\C题\cost_71.npy")#预测批发价
tf = open(get_path("find_bigtype_fromsmallID.json"), "r")
find_bigtype_fromsmallID = json.load(tf)
tf = open(get_path("listBig_ids.json"), "r")
listBig_ids = json.load(tf) # list(int)
data4_index_list,loss_npList = read_excel_4()
#def read_modelAnd_predict_for4()
#优化目标
def fun_question(x):
#loss_npList = list(loss_npList)
es = x[:max_choice:]#选取61个值 进货量
qs = x[max_choice::]#选取61个值 是否选择
w=0.0
for i in range(max_choice):#遍历每一个可取值
#确定id,选择父类
id = food_sale[i]
#print(data4_index_list,'\n',id)
loss_index = data4_index_list.index(id)
loss = loss_npList[loss_index]*0.01*0.75#增加
e_loss = es[i]*(1-loss)
#print('es[i]={},loss={}'.format(es[i],loss))
#寻找物品当前的批发价
cost = cost_71[i]
#print('系数',read_modelAnd_predict(e_loss,father_index,reason),e_loss,qs[i],'cost',cost,es[i])
if qs[i]>0.5:
one = read_modelAnd_predict(e_loss,i)*e_loss*qs[i]*cost-cost*es[i]
w += one
#print('one',one)
#print(w[0])
return -1*w[0]
#ans4_final
def read_excel_71_predict_showDeal():
data_71 = pd.read_excel(r"D:\win10浏览器\CUMCM2023Problems\C题\71_predict.xlsx")
da71_values = data_71.values.reshape(-1)
data1 = pd.read_excel(r"D:\win10浏览器\CUMCM2023Problems\C题\附件1.xlsx")
data1_values = data1.values
id_list = list(data1_values[:, 0])
name_list = list(data1_values[:, 1])
return_id_list = list()
return_name_list = list()
for i in range(max_choice):
id = food_sale[i]
return_id_list.append(id)
name_index = id_list.index(id)
name = name_list[name_index]
return_name_list.append(name)
return_id_1 = np.array(return_id_list).reshape(-1,1)
return_na_2 = np.array(return_name_list).reshape(-1,1)
return_71_3 = np.array(da71_values).reshape(-1,1)
return_data = np.hstack([return_id_1,return_na_2,return_71_3])
return_data = pd.DataFrame(return_data)
print(return_data.shape)
return_data = pd.DataFrame(return_data)
writer = pd.ExcelWriter(get_path('answer71.xlsx'))
return_data.to_excel(writer, 'page_1')
writer.save()
def deal_answer3():
best_x = np.load(get_path(r'answer3\best_x.npy') )
#best_y = np.load(get_path(r'answer3\best_y.npy') )
wei_ans = best_x[:max_choice]
choice_ans = best_x[max_choice:]
for i in range(len(choice_ans)):
choice = choice_ans[i]
if choice<0.5:
wei_ans[i]=0 #对重量进行修正
#food_sale直接用
data1 = pd.read_excel(r"D:\win10浏览器\CUMCM2023Problems\C题\附件1.xlsx")
data1_values = data1.values
id_list = list(data1_values[:,0])
name_list = list(data1_values[:,1])
return_name_list = list()
return_id_list = list()
return_plus_list = list()#加成系数
return_wei_list = list()
#month_7_reason直接用
for i in range(max_choice):
if choice_ans[i]<0.005:
continue#未选中值,不计入
id = food_sale[i]
return_id_list.append(id)
father_id = find_bigtype_fromsmallID[str(id)]
father_index = listBig_ids.index(father_id)
reason = month_7_reason[father_index]
predict = read_modelAnd_predict(wei_ans[i],i)[0]
return_wei_list.append(wei_ans[i])
#print(predict)
return_plus_list.append(predict)
name_index = id_list.index(id)
name = name_list[name_index]
return_name_list.append(name)
return_id = np.array(return_id_list).reshape(-1,1)
return_name = np.array(return_name_list).reshape(-1,1)
return_wei = np.array(return_wei_list).reshape(-1,1)
return_plus = np.array(return_plus_list).reshape(-1,1)
return_data = np.hstack([return_id,return_name,return_wei,return_plus])
print(return_data.shape)
return_data = pd.DataFrame(return_data)
writer = pd.ExcelWriter(get_path('answer4.xlsx'))
return_data.to_excel(writer, 'page_1')
writer.save()
#计算标准差(用每年624-707的数据)
def get_std():
timeInt = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\time_Int.npy')
record_one_type = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\record_one_type.npy')#销量
food_sale = np.load(r"D:\win10浏览器\CUMCM2023Problems\C题\food_sale.npy", allow_pickle=True)
food_sale = list(food_sale)
tf = open(get_path("findIndexFromID.json"), "r")
findIndexFromID = json.load(tf)
std_canSale_list = list( list() for i in range(max_choice))#存放用于计算标准差的数据
#对record_one_type按天进行遍历
for i in range(1085):
time_monthAday = timeInt[i]%10000
if time_monthAday>=624 and time_monthAday<=707:#到达查看范围,停下来记录数据
for j in range(max_choice):
food_id=food_sale[j]
index = findIndexFromID[str(food_id)]
std_canSale_list[j].append(record_one_type[index][i])
std_canSale = np.std(std_canSale_list,axis=1)
np.save(get_path('std_canSale.npy'),std_canSale)
print(std_canSale)
def find_lb(lb,lower):
#lb(min_put)是硬性要求
length = len(lb)
lb_return = np.zeros(shape=length)
for i in range(length):
if lower[i]>lb[i]:
lb_return[i]=lower[i]
else:
lb_return[i]=lb[i]
return lb_return
def finb_up(min_put,upper):
length = len(min_put)
ub_return = np.zeros(shape=length)
for i in range(length):
if upper[i]>min_put[i]:
ub_return[i] = upper[i]
else:
ub_return[i] = min_put[i]+0.01
return ub_return
from sko.GA import GA
#GA计算
def optimize_function():
#进货量
#是否选择
constraint_ueq = (
lambda x: sum(x[max_choice::]) - 33,
lambda x: 27 - sum(x[max_choice::]),
)
arima_one = read_excel_71_predict()
std_one = np.load(get_path('std_canSale.npy'))
std_two = list( 2*i for i in std_one)
lb_0 = [0]*(max_choice)
ub_1 = [1]*(max_choice)
#只看销量
# 最小值
min_put = [2.5]*max_choice
#arima +- 2*std
upper = np.add(arima_one,std_one)
lower = np.subtract(arima_one,std_one)
lb = find_lb(min_put,lower)
ub = finb_up(min_put,upper)
lb = list(lb)+list(lb_0)
ub = list(ub)+list(ub_1)
precision = [1e-7 for i in range(max_choice)]+[1 for i in range(max_choice)]
ga = GA(func=fun_question,n_dim=max_choice*2,
size_pop=50,max_iter=300,prob_mut=0.001,
lb = lb,ub=ub,
constraint_ueq=constraint_ueq,
precision=precision
)
best_x, best_y = ga.run()
print('best_x:', best_x, '\n', 'best_y:', best_y)
np.save(get_path(r'answer3\best_x.npy'),best_x)
np.save(get_path(r'answer3\best_y.npy'), best_y)
Y_history = pd.DataFrame(ga.all_history_Y)
fig, ax = plt.subplots(2, 1)
ax[0].plot(Y_history.index, Y_history.values, '.', color='red')
Y_history.min(axis=1).cummin().plot(kind='line')
plt.show()
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
import joblib
def Regression_show2(save_sn):
# 防止刷新模型
if save_sn != None:
return
model = list(list(list() for j in range(2)) for i in range(6))
for foodBig in range(6):
for reason in range(2):
# x是加成定价,y是销量
X = save_sn[foodBig][reason][0].reshape(-1)
y = save_sn[foodBig][reason][1].reshape(-1, 1)
X, y = y, X
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.38, random_state=42)
mlp_regressor = MLPRegressor(hidden_layer_sizes=(400), max_iter=5000, random_state=42)
mlp_regressor.fit(X_train, y_train)
y_pred = mlp_regressor.predict(X_test)
plt.scatter(X_test, y_test, c='b', label='Data')
plt.scatter(X_test, y_pred, c='r', label='Predictions')
plt.xlabel('X')
plt.ylabel('y')
plt.legend()
plt.show()
# save model
joblib.dump(mlp_regressor,
r'D:\win10浏览器\CUMCM2023Problems\C题\model2\model{}_{}.pkl'.format(foodBig, reason))
#提选出时间
def questionn_ware_deal():
time_Int = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\time_Int.npy')
time_Int_list = list(time_Int)
plus_type = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\plus_type.npy')
plus_lower_type = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\plus_lower_type.npy')
plus_s = list([plus_type,plus_lower_type])
weight_type_noDiscount = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\weight_type_noDiscount.npy')
weight_type_Discount = np.load(r'D:\win10浏览器\CUMCM2023Problems\C题\weight_type_Discount.npy')
weight_s = list([weight_type_noDiscount,weight_type_Discount])
#只选择624到707的数据
plus = np.zeros(shape=(251,42*2))
wei = np.zeros(shape=(251,42*2))
index_big = 0
for j in range(2):
plus_cur = plus_s[j]
weight_cur = weight_s[j]
for i in range(1085):
time = time_Int_list[i]%10000
if time>=624 and time<=707:
#符合,添加
plus[:,index_big]=plus_cur[:,i]
wei[:,index_big] =weight_cur[:,i]
index_big+=1
np.save(get_path('plus_624707.npy'),plus)
np.save(get_path('wei_624707.npy'),wei)
print('ok')
tf = open(get_path("findIndexFromID.json"), "r")
findIndexFromID = json.load(tf)
#提选food
def questionn_ware_deal2():
index_list = []
for id in food_sale:
index = findIndexFromID[str(id)]
index_list.append(index)
#index_list.sort()
plus = np.load(get_path('plus_624707.npy'))
wei = np.load(get_path('wei_624707.npy'))
plus = plus[index_list,:]
wei = wei[index_list,:]
print(plus.shape,wei.shape)
np.save(get_path('plus_2_624707.npy'), plus)
np.save(get_path('wei_2_624707.npy'), wei)
print('ok')
def Regression_show():
plus = np.load(get_path('plus_2_624707.npy'))
wei = np.load(get_path('wei_2_624707.npy'))
#food_sale可用
for index in range(max_choice):
#id = food_sale[index]
#type_index = findIndexFromID[str(id)]
# x是加成定价,y是销量
X = plus[index][:].reshape(-1)
y = wei[index][:].reshape(-1,1)
X, y = y, X
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.38, random_state=42)
mlp_regressor = MLPRegressor(hidden_layer_sizes=(400), max_iter=5000, random_state=42)
mlp_regressor.fit(X_train, y_train)
if index<10:
y_pred = mlp_regressor.predict(X_test)
plt.scatter(X_test, y_test, c='b', label='Data')
plt.scatter(X_test, y_pred, c='r', label='Predictions')
plt.xlabel('X')
plt.ylabel('y')
plt.legend()
plt.show()
# save model
joblib.dump(mlp_regressor,r'D:\win10浏览器\CUMCM2023Problems\C题\model2\model{}.pkl'.format(index))
def read_modelAnd_predict2(predict_list, bigFoodIndex=0, reason=0):
# load model
# print('predict_list',predict_list)
rfc2 = joblib.load(r"D:\win10浏览器\CUMCM2023Problems\C题\model\model{}_{}.pkl".format(bigFoodIndex, reason))
result = rfc2.predict(np.array(predict_list).reshape(-1, 1))
# print('result',result)
return result
def read_modelAnd_predict(predict_list,Index=0):
# load model
# print('predict_list',predict_list)
rfc2 = joblib.load(r"D:\win10浏览器\CUMCM2023Problems\C题\model2\model{}.pkl".format(Index))
result = rfc2.predict(np.array(predict_list).reshape(-1, 1))
# print('result',result)
return result
if __name__ == '__main__':
#optimize_function()
#questionn_ware_deal2()
#Regression_show()
#read_excel_71_predict_showDeal()
deal_answer3()
#read_excel_4()
#optimize_function()
#get_std()
#cost_71_function()
#print(help(PSO))
#read_excel_71_predict()
#optimize_function()
#preidct_for_q3()
#findI_food_sixty()
# read_modelAnd_predict()
# d = Data()
# d.load_array()
# cost_plus_pricing(d)
# mounth_analysis()
# data = weight_plus_without_show()
# Regression_show(data)
"""
d = Data()
#d.function_integration()
d.load_array()
cost_plus_pricing(d)
d = Data()
"""
#d.load_array()