informer辅助笔记:utils/timefeatures.py

定义了一套与时间特征相关的类和函数,旨在从时间序列数据中提取有用的时间特征,以支持各种时间序列分析和预测任务 

from typing import List

import numpy as np
import pandas as pd
from pandas.tseries import offsets
from pandas.tseries.frequencies import to_offset

1 TimeFeature 类

  • 这是一个基础类,其他与时间特征相关的类都继承自它。
  • 它提供了一个基本框架,但没有实现具体的功能。
class TimeFeature:
    def __init__(self):
        pass

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        pass

    def __repr__(self):
        return self.__class__.__name__ + "()"

 2 时间特征类

SecondOfMinuteMinuteOfHourHourOfDayDayOfWeekDayOfMonthDayOfYearMonthOfYearWeekOfYear:这些类都继承自TimeFeature,每个类都实现了一个特定的时间特征提取方法。例如,HourOfDay类提取一天中的小时数并进行规范化处理,使得值在[-0.5, 0.5]之间。

class SecondOfMinute(TimeFeature):
    """Minute of hour encoded as value between [-0.5, 0.5]"""
    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return index.second / 59.0 - 0.5

class MinuteOfHour(TimeFeature):
    """Minute of hour encoded as value between [-0.5, 0.5]"""
    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return index.minute / 59.0 - 0.5

class HourOfDay(TimeFeature):
    """Hour of day encoded as value between [-0.5, 0.5]"""
    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return index.hour / 23.0 - 0.5

class DayOfWeek(TimeFeature):
    """Hour of day encoded as value between [-0.5, 0.5]"""
    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return index.dayofweek / 6.0 - 0.5

class DayOfMonth(TimeFeature):
    """Day of month encoded as value between [-0.5, 0.5]"""
    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return (index.day - 1) / 30.0 - 0.5

class DayOfYear(TimeFeature):
    """Day of year encoded as value between [-0.5, 0.5]"""
    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return (index.dayofyear - 1) / 365.0 - 0.5

class MonthOfYear(TimeFeature):
    """Month of year encoded as value between [-0.5, 0.5]"""
    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return (index.month - 1) / 11.0 - 0.5

class WeekOfYear(TimeFeature):
    """Week of year encoded as value between [-0.5, 0.5]"""
    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return (index.week - 1) / 52.0 - 0.5

3 time_features_from_frwquency_str

def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]:
    """
    根据给定的频率字符串(如"12H", "5min", "1D"等)返回一组适当的时间特征类实例
    """

    features_by_offsets = {
        offsets.YearEnd: [],
        offsets.QuarterEnd: [MonthOfYear],
        offsets.MonthEnd: [MonthOfYear],
        offsets.Week: [DayOfMonth, WeekOfYear],
        offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear],
        offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear],
        offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear],
        offsets.Minute: [
            MinuteOfHour,
            HourOfDay,
            DayOfWeek,
            DayOfMonth,
            DayOfYear,
        ],
        offsets.Second: [
            SecondOfMinute,
            MinuteOfHour,
            HourOfDay,
            DayOfWeek,
            DayOfMonth,
            DayOfYear,
        ],
    }
    '''
    特征映射字典 features_by_offsets:

    这个字典将pandas的时间偏移类(如YearEnd、QuarterEnd、MonthEnd等)映射到对应的时间特征类列表。
        例如,对于每月的数据(MonthEnd),它映射到MonthOfYear类;
        对于每小时的数据(Hour),它映射到HourOfDay、DayOfWeek、DayOfMonth和DayOfYear类。
    '''

    offset = to_offset(freq_str)
    #使用pandas的to_offset函数将频率字符串(如"12H")转换为相应的pandas时间偏移对象。

    for offset_type, feature_classes in features_by_offsets.items():
        if isinstance(offset, offset_type):
            return [cls() for cls in feature_classes]
    '''
    遍历映射字典,检查提供的偏移对象是否属于字典中的某个偏移类型。
    如果找到匹配,为每个相关的特征类创建一个实例,并将这些实例作为列表返回。
    '''

    supported_freq_msg = f"""
    Unsupported frequency {freq_str}
    The following frequencies are supported:
        Y   - yearly
            alias: A
        M   - monthly
        W   - weekly
        D   - daily
        B   - business days
        H   - hourly
        T   - minutely
            alias: min
        S   - secondly
    """
    raise RuntimeError(supported_freq_msg)

4 time_features

'''
从日期数据中提取有用的时间特征
'''
def time_features(dates, timeenc=0, freq='h'):
    """
    > `time_features` takes in a `dates` dataframe with a 'dates' column and extracts the date down to `freq` where freq can be any of the following if `timeenc` is 0:
    > * m - [month]
    > * w - [month]
    > * d - [month, day, weekday]
    > * b - [month, day, weekday]
    > * h - [month, day, weekday, hour]
    > * t - [month, day, weekday, hour, *minute]
    >
    > If `timeenc` is 1, a similar, but different list of `freq` values are supported (all encoded between [-0.5 and 0.5]):
    > * Q - [month]
    > * M - [month]
    > * W - [Day of month, week of year]
    > * D - [Day of week, day of month, day of year]
    > * B - [Day of week, day of month, day of year]
    > * H - [Hour of day, day of week, day of month, day of year]
    > * T - [Minute of hour*, hour of day, day of week, day of month, day of year]
    > * S - [Second of minute, minute of hour, hour of day, day of week, day of month, day of year]

    *minute returns a number from 0-3 corresponding to the 15 minute period it falls into.
    """
    if timeenc==0:
        dates['month'] = dates.date.apply(lambda row:row.month,1)
        dates['day'] = dates.date.apply(lambda row:row.day,1)
        dates['weekday'] = dates.date.apply(lambda row:row.weekday(),1)
        dates['hour'] = dates.date.apply(lambda row:row.hour,1)
        dates['minute'] = dates.date.apply(lambda row:row.minute,1)
        dates['minute'] = dates.minute.map(lambda x:x//15)
        freq_map = {
            'y':[],'m':['month'],'w':['month'],'d':['month','day','weekday'],
            'b':['month','day','weekday'],'h':['month','day','weekday','hour'],
            't':['month','day','weekday','hour','minute'],
        }
        return dates[freq_map[freq.lower()]].values
        '''
        此模式下,函数直接从日期中提取特定的时间特征,如月份、日期、星期几、小时和分钟。

        freq参数指定要提取的时间特征的精度。例如,如果freq为'd',则提取月、日和星期几。

        对于分钟,它被转换为一个从0到3的数字,表示15分钟的时间段。
        '''
    if timeenc==1:
        dates = pd.to_datetime(dates.date.values)
        return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)]).transpose(1,0)
        '''
        此模式下,函数使用time_features_from_frequency_str函数来获取一组特征提取器,并应用它们来转换时间数据。
    
这些特征提取器提取的特征被编码在[-0.5, 0.5]的范围内,以提供规范化的时间特征。
freq参数在这种情况下也指定了提取的时间特征的类型和精度。
        '''

你可能感兴趣的:(python库整理,笔记,python,开发语言)