在学习使用PyTorch构造RNN过程中,看到了一个HKUST的课程中关于pytorch的入门系列代码,其中有一段关于RNN序列数据的pack_pad处理看完挺有启发的。
附上:github链接
#源码+个人注解
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import torch.nn.functional as F
import numpy as np
import itertools
#返回一个包含l所有字符的列表
def flatten(l):
return list(itertools.chain.from_iterable(l))
seqs = ['ghatmasala', 'nicela', 'chutpakodas']
#构建字符索引列表(使用了set所以字符非重复),字符索引从1开始,且按字母升序排序
vocab = ['' ] + sorted(list(set(flatten(seqs))))
# 搭建模型,分别为1个embdding词嵌入模型和1个LSTM模型
embedding_size = 3
embed = nn.Embedding(len(vocab), embedding_size)
lstm = nn.LSTM(embedding_size, 5)
#转化为每个字符对应索引列表中的索引
vectorized_seqs = [[vocab.index(tok) for tok in seq]for seq in seqs]
print("vectorized_seqs", vectorized_seqs)
#输出:
#('vectorized_seqs', [[5, 6, 1, 15, 10, 1, 14, 1, 9, 1], [11, 7, 2, 4, 9, 1], [2, 6, 16, 15, 13, 1, 8, 12, 3, 1, 14]])
print([x for x in map(len, vectorized_seqs)])
#输出:
#[10, 6, 11]
# get the length of each seq in your batch
seq_lengths = torch.LongTensor([x for x in map(len, vectorized_seqs)])
# dump padding everywhere, and place seqs on the left.
# NOTE: you only need a tensor as big as your longest sequence(tensor的列维度为seq最长串的长度,构建这个tensor以便后面转化为模型的输入)
seq_tensor = Variable(torch.zeros((len(vectorized_seqs), seq_lengths.max()))).long()
for idx, (seq, seqlen) in enumerate(zip(vectorized_seqs, seq_lengths)):
seq_tensor[idx, :seqlen] = torch.LongTensor(seq)
print("seq_tensor", seq_tensor)
'''
('seq_tensor', Variable containing:
5 6 1 15 10 1 14 1 9 1 0
11 7 2 4 9 1 0 0 0 0 0
2 6 16 15 13 1 8 12 3 1 14
[torch.LongTensor of size 3x11]
)
'''
#按每个seq的长度对tensor进行排序,即第一个是长度最大的seq
seq_lengths, perm_idx = seq_lengths.sort(0, descending=True)
seq_tensor = seq_tensor[perm_idx]
print("seq_tensor after sorting", seq_tensor)
'''
('seq_tensor after sorting', Variable containing:
2 6 16 15 13 1 8 12 3 1 14
5 6 1 15 10 1 14 1 9 1 0
11 7 2 4 9 1 0 0 0 0 0
[torch.LongTensor of size 3x11]
)
'''
# utils.rnn lets you give (B,L,D) tensors where B is the batch size, L is the maxlength, if you use batch_first=True
# Otherwise, give (L,B,D) tensors
seq_tensor = seq_tensor.transpose(0, 1) # (B,L,D) -> (L,B,D)
print("seq_tensor after transposing", seq_tensor.size(), seq_tensor.data)
'''
('seq_tensor after transposing', torch.Size([11, 3]),
2 5 11
6 6 7
16 1 2
15 15 4
13 10 9
1 1 1
8 14 0
12 1 0
3 9 0
1 1 0
14 0 0
[torch.LongTensor of size 11x3]
)
'''
# embed your sequences——将seq_tensor作为embdding模型的输入,输出后扩维为11x3x3
embeded_seq_tensor = embed(seq_tensor)
print("seq_tensor after embeding", embeded_seq_tensor.size(), seq_tensor.data)
'''
('seq_tensor after embeding', torch.Size([11, 3, 3]),
2 5 11
6 6 7
16 1 2
15 15 4
13 10 9
1 1 1
8 14 0
12 1 0
3 9 0
1 1 0
14 0 0
[torch.LongTensor of size 11x3]
)
'''
# pack them up nicely——使用pack_padded_sequence将embeded_seq_tensor进行压缩(因为之前tensor是按照最长seq进行构建的,所以有些位置是空的/为0,现在将这些空的/为0的空间压缩掉),返回一个PackedSequence对象作为之后LSTM模型的输入
packed_input = pack_padded_sequence(
embeded_seq_tensor, seq_lengths.cpu().numpy())
# throw them through your LSTM (remember to give batch_first=True here if you packed with it)
packed_output, (ht, ct) = lstm(packed_input)
print packed_output
'''
PackedSequence(data=Variable containing:
0.1142 0.0952 -0.0453 0.2488 0.1718
0.2997 0.0641 0.1168 -0.1466 -0.0609
0.0767 0.1134 -0.0117 0.1579 0.1066
0.2155 0.1962 -0.1550 0.3445 0.1474
0.3370 0.1843 -0.0703 0.1179 0.0640
0.1523 0.2084 -0.0252 0.1508 0.1014
0.2912 0.1466 -0.1824 0.4241 0.2618
0.3821 0.2699 -0.0735 0.0349 0.0672
0.2373 0.1743 -0.0703 0.2885 0.2219
0.3259 0.1496 -0.1300 0.4009 0.3760
0.4194 0.1299 -0.0325 0.2450 0.2708
0.3058 0.2124 0.0008 0.1607 0.2048
0.3743 0.1569 -0.1730 0.4632 0.2992
0.1985 0.0846 0.1304 0.1277 0.2449
0.3712 0.1259 -0.0659 0.3300 0.2933
0.3800 0.2439 -0.1820 0.3006 0.2252
0.3389 0.1864 0.1256 0.0413 0.1255
0.3711 0.2257 -0.0789 0.1855 0.1880
0.2699 0.1420 0.0166 0.1272 0.2204
0.2637 0.1749 0.1914 -0.0575 0.0681
0.2670 0.0319 0.1951 0.0058 0.2326
0.3731 0.2367 0.1786 -0.0697 0.0204
0.4111 0.0972 -0.0186 0.3218 0.2449
0.4146 0.1350 0.0187 0.2434 0.2235
0.3786 0.1852 -0.0256 0.1529 0.1673
0.3871 0.2349 0.0117 0.1109 0.1390
0.2865 0.1581 0.0965 0.0066 0.1200
[torch.FloatTensor of size 27x5]
, batch_sizes=[3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 1])
'''
#打印隐藏状态ht
print ht
'''
Variable containing:
(0 ,.,.) =
0.2865 0.1581 0.0965 0.0066 0.1200
0.3871 0.2349 0.0117 0.1109 0.1390
0.3711 0.2257 -0.0789 0.1855 0.1880
[torch.FloatTensor of size 1x3x5]
'''
#打印隐藏状态ct
print ct
'''
Variable containing:
(0 ,.,.) =
0.5580 0.3379 0.1756 0.0119 0.2618
0.7071 0.4409 0.0174 0.1887 0.3112
0.6816 0.4281 -0.1172 0.3236 0.4330
[torch.FloatTensor of size 1x3x5]
'''
# unpack your output if required——有些时候需要将压缩后的输出再转化为非压缩的输出,此时用pad_packed_sequence进行解压缩,维度恢复为之前没压缩的输入的维度
output, _ = pad_packed_sequence(packed_output)
print("Lstm output", output.size(), output.data)
#输出:
'''('Lstm output', torch.Size([11, 3, 5]),
(0 ,.,.) =
0.1142 0.0952 -0.0453 0.2488 0.1718
0.2997 0.0641 0.1168 -0.1466 -0.0609
0.0767 0.1134 -0.0117 0.1579 0.1066
(1 ,.,.) =
0.2155 0.1962 -0.1550 0.3445 0.1474
0.3370 0.1843 -0.0703 0.1179 0.0640
0.1523 0.2084 -0.0252 0.1508 0.1014
(2 ,.,.) =
0.2912 0.1466 -0.1824 0.4241 0.2618
0.3821 0.2699 -0.0735 0.0349 0.0672
0.2373 0.1743 -0.0703 0.2885 0.2219
(3 ,.,.) =
0.3259 0.1496 -0.1300 0.4009 0.3760
0.4194 0.1299 -0.0325 0.2450 0.2708
0.3058 0.2124 0.0008 0.1607 0.2048
(4 ,.,.) =
0.3743 0.1569 -0.1730 0.4632 0.2992
0.1985 0.0846 0.1304 0.1277 0.2449
0.3712 0.1259 -0.0659 0.3300 0.2933
(5 ,.,.) =
0.3800 0.2439 -0.1820 0.3006 0.2252
0.3389 0.1864 0.1256 0.0413 0.1255
0.3711 0.2257 -0.0789 0.1855 0.1880
(6 ,.,.) =
0.2699 0.1420 0.0166 0.1272 0.2204
0.2637 0.1749 0.1914 -0.0575 0.0681
0.0000 0.0000 0.0000 0.0000 0.0000
(7 ,.,.) =
0.2670 0.0319 0.1951 0.0058 0.2326
0.3731 0.2367 0.1786 -0.0697 0.0204
0.0000 0.0000 0.0000 0.0000 0.0000
(8 ,.,.) =
0.4111 0.0972 -0.0186 0.3218 0.2449
0.4146 0.1350 0.0187 0.2434 0.2235
0.0000 0.0000 0.0000 0.0000 0.0000
(9 ,.,.) =
0.3786 0.1852 -0.0256 0.1529 0.1673
0.3871 0.2349 0.0117 0.1109 0.1390
0.0000 0.0000 0.0000 0.0000 0.0000
(10,.,.) =
0.2865 0.1581 0.0965 0.0066 0.1200
0.0000 0.0000 0.0000 0.0000 0.0000
0.0000 0.0000 0.0000 0.0000 0.0000
[torch.FloatTensor of size 11x3x5]
)
'''
# Or if you just want the final hidden state?
print("Last output", ht[-1].size(), ht[-1].data)
#输出:
'''
('Last output', torch.Size([3, 5]),
0.2865 0.1581 0.0965 0.0066 0.1200
0.3871 0.2349 0.0117 0.1109 0.1390
0.3711 0.2257 -0.0789 0.1855 0.1880
[torch.FloatTensor of size 3x5]
)
'''
#在转换过程中,我们注意一下几个变量的维度:
#首先是embeded_seq_tensor
print embeded_seq_tensor
'''
Variable containing:
(0 ,.,.) =
1.1440 0.1978 -0.6949
-2.0943 1.3701 0.7476
0.9801 -0.5060 -0.3273
(1 ,.,.) =
1.7591 0.1092 0.4916
1.7591 0.1092 0.4916
0.6903 -0.4349 0.1548
(2 ,.,.) =
2.3382 0.2825 -0.5383
-0.5960 1.0383 0.3377
1.1440 0.1978 -0.6949
(3 ,.,.) =
0.3405 0.7053 -1.4355
0.3405 0.7053 -1.4355
-0.8852 0.7600 -0.3285
(4 ,.,.) =
1.3288 0.7557 -0.4967
-0.3825 -1.0906 -0.8747
1.1139 0.6303 -1.0870
(5 ,.,.) =
-0.5960 1.0383 0.3377
-0.5960 1.0383 0.3377
-0.5960 1.0383 0.3377
(6 ,.,.) =
-0.4494 -0.5962 -0.4374
-0.5976 -0.2818 -0.0220
-0.1059 0.3986 -0.3438
(7 ,.,.) =
-1.8133 0.2120 -1.0831
-0.5960 1.0383 0.3377
-0.1059 0.3986 -0.3438
(8 ,.,.) =
1.1747 1.3855 -0.6076
1.1139 0.6303 -1.0870
-0.1059 0.3986 -0.3438
(9 ,.,.) =
-0.5960 1.0383 0.3377
-0.5960 1.0383 0.3377
-0.1059 0.3986 -0.3438
(10,.,.) =
-0.5976 -0.2818 -0.0220
-0.1059 0.3986 -0.3438
-0.1059 0.3986 -0.3438
[torch.FloatTensor of size 11x3x3]
'''
#其次是进行压缩后的packed_input
print packed_input
'''
PackedSequence(data=Variable containing:
1.1440 0.1978 -0.6949
-2.0943 1.3701 0.7476
0.9801 -0.5060 -0.3273
1.7591 0.1092 0.4916
1.7591 0.1092 0.4916
0.6903 -0.4349 0.1548
2.3382 0.2825 -0.5383
-0.5960 1.0383 0.3377
1.1440 0.1978 -0.6949
0.3405 0.7053 -1.4355
0.3405 0.7053 -1.4355
-0.8852 0.7600 -0.3285
1.3288 0.7557 -0.4967
-0.3825 -1.0906 -0.8747
1.1139 0.6303 -1.0870
-0.5960 1.0383 0.3377
-0.5960 1.0383 0.3377
-0.5960 1.0383 0.3377
-0.4494 -0.5962 -0.4374
-0.5976 -0.2818 -0.0220
-1.8133 0.2120 -1.0831
-0.5960 1.0383 0.3377
1.1747 1.3855 -0.6076
1.1139 0.6303 -1.0870
-0.5960 1.0383 0.3377
-0.5960 1.0383 0.3377
-0.5976 -0.2818 -0.0220
[torch.FloatTensor of size 27x3]
, batch_sizes=[3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 1])
'''
#可以看到从embeded_seq_tensor到packed_input维度从11x3x3变成了27x3,这中间删除了33-27=6行,从以上的batch_sizes=[3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 1]我们可以知道是因为之前的3个排序好的seq长度分别是11、10和6,故batch_sizes中长度为11的seq对应的是11个1,长度为10的seq对应的是10个1,长度为6的seq对应的是6个1,按11个batch分别相加得到的就是[3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 1]
以上就是关于使用pack_padded_sequence进行压缩输入和pad_packed_sequence进行解压恢复维度的处理,后续关于RNN和pytorch的学习还会继续~