代码笔记,仅供参考
前言,对不同客户的下一次返厂时间进行预测,大多数客户的返厂次数不足10次,仅有少量客户返厂次数大于30次。
# -*- coding: utf-8 -*-
import pymysql
import time
import numpy as np
import pandas as pd
class CarTest:
def __init__(self):
self.db = pymysql.connect(host = '127.0.0.1',
port = 3306,
user = 'root',
password = '19970928',
database = 'datacup',
charset = 'utf8')
self.cur = self.db.cursor()
self.avertime = None
self.datetframe = None
self.preTime = {}
self.Accuracy = 0
def getDate(self):
with open("./data/car_srv_train.csv", 'r') as f:
data = f.readline()
data = f.readline()
print(type(data))
while (data):
# data = self.timeDeal(data)
self.mysqlInteractive(data)
data = f.readline()
def timePre(self):
dictTest = {}
for key, value in self.dataframe:
if key not in dictTest:
dictTest[key] = [value]
else:
dictTest[key].append(value)
# print(dictTest[key])
# print(dictTest)
for key in dictTest:
if len(dictTest[key]) > 5:
# print(dictTest[key])
sortTemp = dictTest[key]
sortTemp.sort(reverse = True)
# print(sortTemp)
# print(type(sortTemp))
averTimeInterval = (sortTemp[1] - sortTemp[-1])/(len(dictTest[key])-1)
divTime = sortTemp[1] - sortTemp[2]
preTime = (averTimeInterval + divTime)/2
self.preTime[key] = [preTime]
self.preTime[key].append(sortTemp[0] - sortTemp[1])
self.preTime[key].append(sortTemp[1])
self.preTime[key].append(sortTemp[0])
#1:预测间隔时间,2:真实间隔时间,3:倒数第二天, 4:倒数第一天
def errorTest(self):
#设置预测时间在3个工作日内为预测正确
trueNum = 0
countNum = 0
preDictTemp = self.preTime
for key in preDictTemp: #value为引用数据类型,可能不用key,value接收循环
countNum +=1
# print(preDictTemp[key][1]-preDictTemp[key][0])
# print(type(preDictTemp[key][1]-preDictTemp[key][0]))
if np.abs((preDictTemp[key][1]-preDictTemp[key][0]).days) < 3:
trueNum += 1
self.Accuracy = trueNum/countNum
def mysqlInteractive(self, data):
dataList = data.split(",")[0:3]
# print(dataList)
sqlLine = "insert into CarTest(ordername, username, ordertime) \
values(%s, %s, %s);"
try:
self.cur.execute(sqlLine, dataList)
self.db.commit()
print("提交成功...")
except Exception as e:
self.db.rollback()
print('错误信息:', e)
def getMysqData(self):
sqlLine = "select username, ordertime from CarTest \
order by username,ordertime;"
# sqlLine2 = """select username, DATEDIFF(max(ordertime), min(ordertime))/count(*) as avertime, count(*) as countnum from
# cartest group by username HAVING COUNT(*)>=3;"""
self.cur.execute(sqlLine)
self.dataframe = self.cur.fetchall()
# self.db.commit()
# self.cur.execute(sqlLine2)
# self.avertime = self.cur.fetchall()
# self.db.commit()
def main(self):
# self.getDate()
self.getMysqData()
self.timePre()
self.errorTest()
self.cur.close()
self.db.close()
if __name__ == '__main__':
#获取开始的时间戳
start = time.time()
dataOutLine = CarTest()
dataOutLine.main()
print("准确率为:", dataOutLine.Accuracy)
#获取结束的时间戳
end = time.time()
print('执行时间:%.2f' % (end-start))
输出:
准确率为: 0.016651248843663275
执行时间:23.67
好吧,准确率低的可以。
library(forecast)
#读取数据
getwd()
setwd("C:/Users/goatbishop/Desktop/data")
car_srv_train <- read.csv("car_srv_train.csv", header = T, stringsAsFactors = F)
#简单查看数据
head(car_srv_train)
dim(car_srv_train)
str(car_srv_train)
car_srv_train$ORDERDATE <- as.Date(car_srv_train$ORDERDATE)
test0624car <- car_srv_train
head(diff(test0624car$ORDERDATE))
test0624car <- test0624car[order(test0624car$CUST_ID, test0624car$ORDERDATE, decreasing = F), ]
#head(test0624car[,c(1:4)])
table(test0624car$CUST_ID)
innames <- names(which(table(test0624car$CUST_ID) >= 3))
test0624car3 <- test0624car[which(test0624car$CUST_ID %in% innames), ]
tablenum <- table(test0624car3$CUST_ID)
length(tablenum)
newtest0624car <- test0624car3[-cumsum(tablenum), ]
dim(test0624car3)
dim(newtest0624car)
test0624diff <- tapply(test0624car3$ORDERDATE, test0624car3$CUST_ID, diff)
dfg <- unlist(test0624diff)
dftest <- cbind(newtest0624car, dfg)
head(dftest[, c(1:3, 13)], 10)
write.csv(dftest, "dftest.csv")
#####时间序列#####
innames2 <- names(which(table(test0624car$CUST_ID) >= 30))
Pre <- c()
real <- c()
for (item in c(1:length(innames2))) {
tempdf <- dftest$dfg[which(dftest$CUST_ID == innames2[item])]
temparima <- auto.arima(tempdf[-length(tempdf)])
preout <-forecast(temparima, 1)
real <- c(real, tempdf[length(tempdf)])
Pre <- c(Pre, preout$mean)
}
errorPre <- real - Pre
write.csv(Pre, "Pre.csv")
###精确度###
a1 = 0
b1 = 0
for (item in c(1:length(errorPre))) {
b1 = b1 + 1
if (abs(errorPre[item]) < 10) {
a1 = a1 + 1
}
}
(accer <- a1/b1)
write.csv(errorPre, "errorPre.csv")
write.csv(accer, "accer.csv")
which(abs(errorPre) < 2)
tempdf <- dftest$dfg[which(dftest$CUST_ID == innames2[4])]
plot(tempdf, type = 'o', main = "时序图")
temparima <- auto.arima(tempdf[-length(tempdf)])
pretest <- forecast(temparima)
acf(pretest$residuals)
write.csv(pretest$residuals, "pretest_residuals.csv")