前面已经写了一系列有关LSTM时间序列预测的文章:
时间卷积网络TCN和CNN都是一种利用卷积操作提取特征的模型,CNN是通过卷积层来提取图像中的特征,而TCN则通过时序卷积层来处理时间序列数据。TCN强调如何使用非常深的网络(residual)和膨胀卷积的组合来扩大感受野进而捕捉更广泛的上下文信息。
有关TCN的原理部分不做过多讲解,原理比较简单,下面直接讲解代码。
class Chomp1d(nn.Module):
def __init__(self, chomp_size):
super(Chomp1d, self).__init__()
self.chomp_size = chomp_size
def forward(self, x):
"""
裁剪的模块,裁剪多出来的padding
"""
return x[:, :, :-self.chomp_size].contiguous()
class TemporalBlock(nn.Module):
def __init__(self, n_inputs, n_outputs, kernel_size, stride, dilation, padding, dropout=0.2):
"""
相当于一个Residual block
:param n_inputs: int, 输入通道数
:param n_outputs: int, 输出通道数
:param kernel_size: int, 卷积核尺寸
:param stride: int, 步长,一般为1
:param dilation: int, 膨胀系数
:param padding: int, 填充系数
:param dropout: float, dropout比率
"""
super(TemporalBlock, self).__init__()
self.conv1 = weight_norm(nn.Conv1d(n_inputs, n_outputs, kernel_size,
stride=stride, padding=padding, dilation=dilation))
# 经过conv1,输出的size其实是(Batch, input_channel, seq_len + padding)
self.chomp1 = Chomp1d(padding) # 裁剪掉多出来的padding部分,维持输出时间步为seq_len
self.relu1 = nn.ReLU()
self.dropout1 = nn.Dropout(dropout)
self.conv2 = weight_norm(nn.Conv1d(n_outputs, n_outputs, kernel_size,
stride=stride, padding=padding, dilation=dilation))
self.chomp2 = Chomp1d(padding) # 裁剪掉多出来的padding部分,维持输出时间步为seq_len
self.relu2 = nn.ReLU()
self.dropout2 = nn.Dropout(dropout)
self.net = nn.Sequential(self.conv1, self.chomp1, self.relu1, self.dropout1,
self.conv2, self.chomp2, self.relu2, self.dropout2)
self.downsample = nn.Conv1d(n_inputs, n_outputs, 1) if n_inputs != n_outputs else None
self.relu = nn.ReLU()
self.init_weights()
def init_weights(self):
"""
参数初始化
:return:
"""
self.conv1.weight.data.normal_(0, 0.01)
self.conv2.weight.data.normal_(0, 0.01)
if self.downsample is not None:
self.downsample.weight.data.normal_(0, 0.01)
def forward(self, x):
"""
:param x: size of (Batch, input_channel, seq_len)
:return:
"""
out = self.net(x)
res = x if self.downsample is None else self.downsample(x)
return self.relu(out + res)
class TCN(nn.Module):
def __init__(self, num_inputs, channels, kernel_size=2, dropout=0.2):
"""
:param num_inputs: int, 输入通道数
:param channels: list,每层的hidden_channel数,例如[25,25,25,25]表示有4个隐层,每层hidden_channel数为25
:param kernel_size: int, 卷积核尺寸
:param dropout: float, drop_out比率
"""
super(TCN, self).__init__()
super().__init__()
layers = []
num_levels = len(channels)
for i in range(num_levels):
dilation_size = 2 ** i # 膨胀系数:1,2,4,8……
in_channels = num_inputs if i == 0 else channels[i - 1] # 确定每一层的输入通道数
out_channels = channels[i] # 确定每一层的输出通道数
layers += [TemporalBlock(in_channels, out_channels, kernel_size, stride=1, dilation=dilation_size,
padding=(kernel_size - 1) * dilation_size, dropout=dropout)]
self.network = nn.Sequential(*layers)
def forward(self, x):
"""
:param x: size of (Batch, input_channel, seq_len)
:return: size of (Batch, output_channel, seq_len)
"""
x = self.network(x)
return x
可以看到这里TCN输入的尺寸是(batch_size, input_channel, seq_len)
,输出尺寸是(batch_size, output_channel, seq_len)
。这与前面讲的文章大致类似,如果需要直接利用TCN得到输出,可以取输出的最后一个时间步,然后经过一个nn.Linear
即可得到预测结果,即:
self.fc = nn.Linear(channels[-1], output_size)
x = x[:, :, -1]
x = self.fc(x)
TCN的输出尺寸为(batch_size, output_channel, seq_len)
,这天然满足了RNN类模型的输入要求,因此将时序数据先经过TCN再经过RNN等模型是很自然的想法。
TCN-RNN模型搭建如下:
class TCN_RNN(nn.Module):
def __init__(self):
super(TCN_RNN, self).__init__()
self.tcn = TCN(num_inputs=7, channels=[32, 32, 32])
self.rnn = nn.RNN(input_size=32, hidden_size=64,
num_layers=2, batch_first=True)
self.fc = nn.Linear(64, 1)
def forward(self, x):
x = x.permute(0, 2, 1) # b i s
x = self.tcn(x) # b h s
x = x.permute(0, 2, 1) # b s h
x, _ = self.rnn(x) # b, s, h
x = x[:, -1, :]
x = self.fc(x) # b output_size
return x
由于我们构建的输入为(batch_size, seq_len, input_size)
,而TCN要求的输入为(batch_size, input_channel, seq_len)
,因此首先需要进行一个permute操作。经过TCN后,输出为(batch_size, output_channel, seq_len)
,其中output_channel
为channels=[32, 32, 32]
中最后一个数,即32。
接着RNN的输入应该为(batch_size, seq_len, output_channel)
,因此还需要经过一个permute。最后利用一个nn.Linear
得到这个batch的预测结果。
相比TCN-RNN,TCN-LSTM只是进行了简单替换:
class TCN_LSTM(nn.Module):
def __init__(self):
super(TCN_LSTM, self).__init__()
self.tcn = TCN(num_inputs=7, channels=[32, 32, 32])
self.lstm = nn.LSTM(input_size=32, hidden_size=64,
num_layers=2, batch_first=True)
self.fc = nn.Linear(64, 1)
def forward(self, x):
x = x.permute(0, 2, 1) # b i s
x = self.tcn(x) # b h s
x = x.permute(0, 2, 1) # b s h
x, _ = self.lstm(x) # b, s, h
x = x[:, -1, :]
x = self.fc(x) # b output_size
return x
TCN-GRU类似:
class TCN_GRU(nn.Module):
def __init__(self):
super(TCN_GRU, self).__init__()
self.tcn = TCN(num_inputs=7, channels=[32, 32, 32])
self.gru = nn.GRU(input_size=32, hidden_size=64,
num_layers=2, batch_first=True)
self.fc = nn.Linear(64, 1)
def forward(self, x):
x = x.permute(0, 2, 1) # b i s
x = self.tcn(x) # b h s
x = x.permute(0, 2, 1) # b s h
x, _ = self.gru(x) # b, s, h
x = x[:, -1, :]
x = self.fc(x) # b output_size
return x
数据集依然选择前边的负荷预测数据集,前24小时的负荷+其余6个变量,预测未来1小时的负荷。由于TCN耗时较长,这里只使用了前5000条数据。
模型效果比较:
模型 | TCN | TCN-RNN | TCN-LSTM | TCN-GRU |
---|---|---|---|---|
MAPE / % | 6.91 | 5.60 | 7.79 | 6.75 |
可以发现TCN-RNN的效果稍好一点,不过以上结果只针对本实验的数据集,并且没有经过调参,因此不具备太多参考性。