获取日期列表中缺失的日期
最近有个需求,就是有一个时间段的日期格式的字符串列表,其中有缺失的日期,想要写一个脚本来查出缺失的日期,算是个小工具,直接上代码。
import pandas as pd
# -*- coding: utf-8 -*-
# 导入必要模块
import pandas as pd
from sqlalchemy import create_engine
# 初始化数据库连接,使用pymysql模块
# MySQL的用户:root, 密码:123456, 端口:3306,数据库:mydb
engine = create_engine('mysql+pymysql://root:[email protected]:3306/shopdb')
# 查询语句,选出employee表中的所有数据
sql = '''
SELECT CAPTURED_TIME,`VALUE` FROM da_target_data WHERE FACTORY_ID=020299 AND TYPE='POWER_CONSUME' AND CAPTURED_TIME BETWEEN '2019-01-02 00:00:00' AND '2019-03-12 00:00:00';
'''
# read_sql_query的两个参数: sql语句, 数据库连接
df = pd.read_sql_query(sql, engine)
# 输出employee表的查询结果
print(df)
CAPTURED_TIME VALUE
0 2019-01-02 27751.84
1 2019-01-03 28782.38
2 2019-01-04 25096.68
3 2019-01-05 25278.54
4 2019-01-06 25157.30
5 2019-01-07 24696.59
6 2019-01-08 24405.61
7 2019-01-09 24563.22
8 2019-01-10 24041.89
9 2019-01-12 26648.55
10 2019-01-13 22150.55
create_time = df['CAPTURED_TIME']
create_time
0 2019-01-02
1 2019-01-03
2 2019-01-04
3 2019-01-05
4 2019-01-06
5 2019-01-07
6 2019-01-08
7 2019-01-09
8 2019-01-10
9 2019-01-12
10 2019-01-13
11 2019-01-14
12 2019-01-15
13 2019-01-16
14 2019-01-17
15 2019-01-18
16 2019-01-19
17 2019-01-20
18 2019-01-21
19 2019-01-22
20 2019-01-23
21 2019-01-24
22 2019-01-25
23 2019-01-26
24 2019-01-27
25 2019-01-28
26 2019-01-29
27 2019-01-30
28 2019-01-31
29 2019-02-01
...
39 2019-02-11
40 2019-02-12
41 2019-02-13
data_list=[]
for data_time in create_time:
data_time = str(data_time)
data_time = data_time[:10]
data_list.append(data_time)
print(data_list)
['2019-01-02', '2019-01-03', '2019-01-04', '2019-01-05', '2019-01-06', '2019-01-07', '2019-01-08', '2019-01-09', '2019-01-10', '2019-01-12', '2019-01-13', '2019-01-14']
import datetime
def get_no_date(date_str_li, start_date='', end_date=''):
"""获取没有列表中没有包含的的日期区间的日期
args:
start_date: 查询的起始日期字符串,默认为date_li中最小值
end_date: 查询的终止日期的字符串, 默认为date_li中最大值
date_str_li: 所有需要查询的日期的列表
"""
if not date_str_li:
raise ValueError('list can\'t empty')
# 所有文件名称,日期的列表
try:
date_li = [datetime.datetime.strptime(date_time, '%Y-%m-%d') for date_time in date_str_li]
except:
raise ValueError('your values can\'t be converted')
if end_date:
date_end = datetime.datetime.strptime(end_date, '%Y-%m-%d')
else:
date_end = max(date_li)
if start_date:
date_start = datetime.datetime.strptime(start_date, '%Y-%m-%d')
else:
date_start = min(date_li)
no_list = []
while True:
if date_end not in date_li:
no_list.append(date_end)
if date_end == date_start:
break
date_end -= datetime.timedelta(1)
return [datetime.date.strftime(day, '%Y-%m-%d') for day in no_list]
get_no_date(data_list)
['2019-01-11']
简单方法
full_time =pd.date_range('2019-01-02 00:00:00','2019-02-13 00:00:00')
df =df.reindex(full_time,fill_value='ww')
dff =df[df['VALUE']=='ww']
missing_date = list(dff.index)
missing_date