# 客户价值分析预处理
import pandas as pd
# 读取8月份订单信息表
August_order_info=pd.read_csv("E:/data/meal_order_info.csv",encoding='utf-8')
# 筛选出订单状态为1的数据
#August_order_info = August_order_info[August_order_info['order_status'].isin([1])]
August_order_info = August_order_info[August_order_info['order_status']==1]
# 索引重新排序
August_order_info = August_order_info.reset_index(drop=True)
print("提取8月份订单数据的维数:",August_order_info.shape)
August_order_info.to_csv("E:/data/August_order_info.csv",encoding='utf-8')
print("文件写出完成!")
# 读取8月份用户表数据
August_users_info=pd.read_csv("E:/data/users.csv",encoding='gbk')
# 匹配用户的最后一次用餐时间
for i in range(1,len(August_order_info)):
# 若 8月份用户表中的列名为【USER_ID】 == 预处理后8月份订单表中的列名为【emp_id】
# 则 找到此条数据的索引,并将索引放入列表中
series = August_users_info['USER_ID'] == August_order_info.iloc[i-1,1]
num= []
for i in range(0,series.size):
if series[i] == True :
num.append(August_users_info.index)
# 用户表最后用餐时间(iloc方法--先行后列) = 预处理后8月份订单表中的开始时间
August_users_info.iloc[num[0],14]=August_order_info.iloc[i-1,9]
August_users_info.iloc[num[0],14]=August_order_info.iloc[i-1,9]
user = August_users_info
# LAST_VISITS 列值若为空 则填充为999
user['LAST_VISITS'] = user['LAST_VISITS'].fillna(999)
# LAST_VISITS 列值 == 999 找到索引执行删除
user = user.drop(user[user['LAST_VISITS'] == 999].index.tolist())
user = user.iloc[:,[0,2,12,14]]
print(user.head())
user.to_csv('E:/data/users_august.csv', index=False, encoding='utf-8')
print("文件写出完成!")
# 客户价值分析预处理
import pandas as pd
# 读取8月份订单信息表
August_order_info=pd.read_csv("E:/data/meal_order_info.csv",encoding='utf-8')
# 筛选出订单状态为1的数据
#August_order_info = August_order_info[August_order_info['order_status'].isin([1])]
August_order_info = August_order_info[August_order_info['order_status']==1]
# 索引重新排序
August_order_info = August_order_info.reset_index(drop=True)
print("提取8月份订单数据的维数:",August_order_info.shape)
August_order_info.to_csv("E:/data/August_order_info.csv",encoding='utf-8')
print("文件写出完成!")
# 读取8月份用户表数据
August_users_info=pd.read_csv("E:/data/users.csv",encoding='gbk')
# 匹配用户的最后一次用餐时间
for i in range(1,len(August_order_info)):
# 若 8月份用户表中的列名为【USER_ID】 == 预处理后8月份订单表中的列名为【emp_id】
# 则 找到此条数据的索引,并将索引放入列表中
print(August_users_info['USER_ID'] == August_order_info.iloc[i-1,1])
num = August_users_info[August_users_info['USER_ID'] == August_order_info.iloc[i-1,1]].index.tolist()
# 用户表最后用餐时间(iloc方法--先行后列) = 预处理后8月份订单表中的开始时间
August_users_info.iloc[num[0],14]=August_order_info.iloc[i-1,9]
August_users_info.iloc[num[0],14]=August_order_info.iloc[i-1,9]
user = August_users_info
# LAST_VISITS 列值若为空 则填充为999
user['LAST_VISITS'] = user['LAST_VISITS'].fillna(999)
# LAST_VISITS 列值 == 999 找到索引执行删除
user = user.drop(user[user['LAST_VISITS'] == 999].index.tolist())
user = user.iloc[:,[0,2,12,14]]
print(user.head())
user.to_csv('E:/data/users_august.csv', index=False, encoding='utf-8')
print("文件写出完成!")