获取日期列表中缺失的日期

获取日期列表中缺失的日期

最近有个需求,就是有一个时间段的日期格式的字符串列表,其中有缺失的日期,想要写一个脚本来查出缺失的日期,算是个小工具,直接上代码。

import pandas as pd
# -*- coding: utf-8 -*-

# 导入必要模块
import pandas as pd
from sqlalchemy import create_engine

# 初始化数据库连接,使用pymysql模块
# MySQL的用户:root, 密码:123456, 端口:3306,数据库:mydb
engine = create_engine('mysql+pymysql://root:[email protected]:3306/shopdb')

# 查询语句,选出employee表中的所有数据
sql = '''
      SELECT CAPTURED_TIME,`VALUE` FROM da_target_data WHERE FACTORY_ID=020299 AND TYPE='POWER_CONSUME' AND CAPTURED_TIME BETWEEN '2019-01-02 00:00:00' AND '2019-03-12 00:00:00';
      '''

# read_sql_query的两个参数: sql语句, 数据库连接
df = pd.read_sql_query(sql, engine)

# 输出employee表的查询结果
print(df)

   CAPTURED_TIME     VALUE
0     2019-01-02  27751.84
1     2019-01-03  28782.38
2     2019-01-04  25096.68
3     2019-01-05  25278.54
4     2019-01-06  25157.30
5     2019-01-07  24696.59
6     2019-01-08  24405.61
7     2019-01-09  24563.22
8     2019-01-10  24041.89
9     2019-01-12  26648.55
10    2019-01-13  22150.55
create_time = df['CAPTURED_TIME']
create_time
    0    2019-01-02
    1    2019-01-03
    2    2019-01-04
    3    2019-01-05
    4    2019-01-06
    5    2019-01-07
    6    2019-01-08
    7    2019-01-09
    8    2019-01-10
    9    2019-01-12
    10   2019-01-13
    11   2019-01-14
    12   2019-01-15
    13   2019-01-16
    14   2019-01-17
    15   2019-01-18
    16   2019-01-19
    17   2019-01-20
    18   2019-01-21
    19   2019-01-22
    20   2019-01-23
    21   2019-01-24
    22   2019-01-25
    23   2019-01-26
    24   2019-01-27
    25   2019-01-28
    26   2019-01-29
    27   2019-01-30
    28   2019-01-31
    29   2019-02-01
            ...    
    39   2019-02-11
    40   2019-02-12
    41   2019-02-13
data_list=[]
for data_time in create_time:
    data_time = str(data_time)
    data_time = data_time[:10]
    data_list.append(data_time)
print(data_list)
['2019-01-02', '2019-01-03', '2019-01-04', '2019-01-05', '2019-01-06', '2019-01-07', '2019-01-08', '2019-01-09', '2019-01-10', '2019-01-12', '2019-01-13', '2019-01-14']
import datetime
def get_no_date(date_str_li, start_date='', end_date=''):
    """获取没有列表中没有包含的的日期区间的日期
    args:
        start_date: 查询的起始日期字符串,默认为date_li中最小值
        end_date: 查询的终止日期的字符串, 默认为date_li中最大值
        date_str_li: 所有需要查询的日期的列表
    """
    if not date_str_li:
        raise ValueError('list can\'t empty')
    # 所有文件名称,日期的列表
    try:
        date_li = [datetime.datetime.strptime(date_time, '%Y-%m-%d') for date_time in date_str_li]
    except:
        raise ValueError('your values can\'t  be converted')
    if end_date:
        date_end = datetime.datetime.strptime(end_date, '%Y-%m-%d')
    else:
        date_end = max(date_li)
    if start_date:
        date_start = datetime.datetime.strptime(start_date, '%Y-%m-%d')
    else:
        date_start = min(date_li)
    no_list = []
    while True:
        if date_end not in date_li:
            no_list.append(date_end)
        if date_end == date_start:
            break
        date_end -= datetime.timedelta(1)
    return [datetime.date.strftime(day, '%Y-%m-%d') for day in no_list]
get_no_date(data_list)
['2019-01-11']

简单方法

full_time =pd.date_range('2019-01-02 00:00:00','2019-02-13 00:00:00')
df =df.reindex(full_time,fill_value='ww')
dff =df[df['VALUE']=='ww']
missing_date = list(dff.index)
missing_date

你可能感兴趣的:(获取日期列表中缺失的日期)