PyTorch关于RNN序列数据的pack_pad处理

PyTorch关于RNN序列数据的pack_pad处理

在学习使用PyTorch构造RNN过程中,看到了一个HKUST的课程中关于pytorch的入门系列代码,其中有一段关于RNN序列数据的pack_pad处理看完挺有启发的。
附上:github链接

#源码+个人注解
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import torch.nn.functional as F
import numpy as np
import itertools

#返回一个包含l所有字符的列表
def flatten(l):
    return list(itertools.chain.from_iterable(l))

seqs = ['ghatmasala', 'nicela', 'chutpakodas']

#构建字符索引列表(使用了set所以字符非重复),字符索引从1开始,且按字母升序排序
vocab = [''] + sorted(list(set(flatten(seqs))))

# 搭建模型,分别为1个embdding词嵌入模型和1个LSTM模型
embedding_size = 3
embed = nn.Embedding(len(vocab), embedding_size)
lstm = nn.LSTM(embedding_size, 5)

#转化为每个字符对应索引列表中的索引
vectorized_seqs = [[vocab.index(tok) for tok in seq]for seq in seqs]
print("vectorized_seqs", vectorized_seqs)
#输出:
#('vectorized_seqs', [[5, 6, 1, 15, 10, 1, 14, 1, 9, 1], [11, 7, 2, 4, 9, 1], [2, 6, 16, 15, 13, 1, 8, 12, 3, 1, 14]])

print([x for x in map(len, vectorized_seqs)])
#输出:
#[10, 6, 11]

# get the length of each seq in your batch
seq_lengths = torch.LongTensor([x for x in map(len, vectorized_seqs)])

# dump padding everywhere, and place seqs on the left.
# NOTE: you only need a tensor as big as your longest sequence(tensor的列维度为seq最长串的长度,构建这个tensor以便后面转化为模型的输入)
seq_tensor = Variable(torch.zeros((len(vectorized_seqs), seq_lengths.max()))).long()
for idx, (seq, seqlen) in enumerate(zip(vectorized_seqs, seq_lengths)):
    seq_tensor[idx, :seqlen] = torch.LongTensor(seq)

print("seq_tensor", seq_tensor)
'''
('seq_tensor', Variable containing:
5     6     1    15    10     1    14     1     9     1     0
11     7     2     4     9     1     0     0     0     0     0
2     6    16    15    13     1     8    12     3     1    14
[torch.LongTensor of size 3x11]
)
'''

#按每个seq的长度对tensor进行排序,即第一个是长度最大的seq
seq_lengths, perm_idx = seq_lengths.sort(0, descending=True)
seq_tensor = seq_tensor[perm_idx]

print("seq_tensor after sorting", seq_tensor)
'''
('seq_tensor after sorting', Variable containing:
    2     6    16    15    13     1     8    12     3     1    14
   5     6     1    15    10     1    14     1     9     1     0
   11     7     2     4     9     1     0     0     0     0     0
[torch.LongTensor of size 3x11]
)
'''

# utils.rnn lets you give (B,L,D) tensors where B is the batch size, L is the maxlength, if you use batch_first=True
# Otherwise, give (L,B,D) tensors
seq_tensor = seq_tensor.transpose(0, 1)  # (B,L,D) -> (L,B,D)
print("seq_tensor after transposing", seq_tensor.size(), seq_tensor.data)
'''
('seq_tensor after transposing', torch.Size([11, 3]), 
    2     5    11
    6     6     7
   16     1     2
   15    15     4
   13    10     9
    1     1     1
    8    14     0
   12     1     0
    3     9     0
    1     1     0
   14     0     0
[torch.LongTensor of size 11x3]
)
'''

# embed your sequences——将seq_tensor作为embdding模型的输入,输出后扩维为11x3x3
embeded_seq_tensor = embed(seq_tensor)
print("seq_tensor after embeding", embeded_seq_tensor.size(), seq_tensor.data)
'''
('seq_tensor after embeding', torch.Size([11, 3, 3]), 
   2     5    11
   6     6     7
   16     1     2
   15    15     4
   13    10     9
   1     1     1
   8    14     0
   12     1     0
   3     9     0
   1     1     0
   14     0     0
[torch.LongTensor of size 11x3]
)
'''

# pack them up nicely——使用pack_padded_sequence将embeded_seq_tensor进行压缩(因为之前tensor是按照最长seq进行构建的,所以有些位置是空的/为0,现在将这些空的/为0的空间压缩掉),返回一个PackedSequence对象作为之后LSTM模型的输入
packed_input = pack_padded_sequence(
    embeded_seq_tensor, seq_lengths.cpu().numpy())

# throw them through your LSTM (remember to give batch_first=True here if you packed with it)
packed_output, (ht, ct) = lstm(packed_input)
print packed_output
'''
PackedSequence(data=Variable containing:
 0.1142  0.0952 -0.0453  0.2488  0.1718
 0.2997  0.0641  0.1168 -0.1466 -0.0609
 0.0767  0.1134 -0.0117  0.1579  0.1066
 0.2155  0.1962 -0.1550  0.3445  0.1474
 0.3370  0.1843 -0.0703  0.1179  0.0640
 0.1523  0.2084 -0.0252  0.1508  0.1014
 0.2912  0.1466 -0.1824  0.4241  0.2618
 0.3821  0.2699 -0.0735  0.0349  0.0672
 0.2373  0.1743 -0.0703  0.2885  0.2219
 0.3259  0.1496 -0.1300  0.4009  0.3760
 0.4194  0.1299 -0.0325  0.2450  0.2708
 0.3058  0.2124  0.0008  0.1607  0.2048
 0.3743  0.1569 -0.1730  0.4632  0.2992
 0.1985  0.0846  0.1304  0.1277  0.2449
 0.3712  0.1259 -0.0659  0.3300  0.2933
 0.3800  0.2439 -0.1820  0.3006  0.2252
 0.3389  0.1864  0.1256  0.0413  0.1255
 0.3711  0.2257 -0.0789  0.1855  0.1880
 0.2699  0.1420  0.0166  0.1272  0.2204
 0.2637  0.1749  0.1914 -0.0575  0.0681
 0.2670  0.0319  0.1951  0.0058  0.2326
 0.3731  0.2367  0.1786 -0.0697  0.0204
 0.4111  0.0972 -0.0186  0.3218  0.2449
 0.4146  0.1350  0.0187  0.2434  0.2235
 0.3786  0.1852 -0.0256  0.1529  0.1673
 0.3871  0.2349  0.0117  0.1109  0.1390
 0.2865  0.1581  0.0965  0.0066  0.1200
[torch.FloatTensor of size 27x5]
, batch_sizes=[3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 1])
'''

#打印隐藏状态ht
print ht
'''
Variable containing:
(0 ,.,.) = 
  0.2865  0.1581  0.0965  0.0066  0.1200
  0.3871  0.2349  0.0117  0.1109  0.1390
  0.3711  0.2257 -0.0789  0.1855  0.1880
[torch.FloatTensor of size 1x3x5]
'''

#打印隐藏状态ct
print ct
'''
Variable containing:
(0 ,.,.) = 
  0.5580  0.3379  0.1756  0.0119  0.2618
  0.7071  0.4409  0.0174  0.1887  0.3112
  0.6816  0.4281 -0.1172  0.3236  0.4330
[torch.FloatTensor of size 1x3x5]
'''

# unpack your output if required——有些时候需要将压缩后的输出再转化为非压缩的输出,此时用pad_packed_sequence进行解压缩,维度恢复为之前没压缩的输入的维度
output, _ = pad_packed_sequence(packed_output)
print("Lstm output", output.size(), output.data)
#输出:
'''('Lstm output', torch.Size([11, 3, 5]), 
(0 ,.,.) = 
  0.1142  0.0952 -0.0453  0.2488  0.1718
  0.2997  0.0641  0.1168 -0.1466 -0.0609
  0.0767  0.1134 -0.0117  0.1579  0.1066

(1 ,.,.) = 
  0.2155  0.1962 -0.1550  0.3445  0.1474
  0.3370  0.1843 -0.0703  0.1179  0.0640
  0.1523  0.2084 -0.0252  0.1508  0.1014

(2 ,.,.) = 
  0.2912  0.1466 -0.1824  0.4241  0.2618
  0.3821  0.2699 -0.0735  0.0349  0.0672
  0.2373  0.1743 -0.0703  0.2885  0.2219

(3 ,.,.) = 
  0.3259  0.1496 -0.1300  0.4009  0.3760
  0.4194  0.1299 -0.0325  0.2450  0.2708
  0.3058  0.2124  0.0008  0.1607  0.2048

(4 ,.,.) = 
  0.3743  0.1569 -0.1730  0.4632  0.2992
  0.1985  0.0846  0.1304  0.1277  0.2449
  0.3712  0.1259 -0.0659  0.3300  0.2933

(5 ,.,.) = 
  0.3800  0.2439 -0.1820  0.3006  0.2252
  0.3389  0.1864  0.1256  0.0413  0.1255
  0.3711  0.2257 -0.0789  0.1855  0.1880

(6 ,.,.) = 
  0.2699  0.1420  0.0166  0.1272  0.2204
  0.2637  0.1749  0.1914 -0.0575  0.0681
  0.0000  0.0000  0.0000  0.0000  0.0000

(7 ,.,.) = 
  0.2670  0.0319  0.1951  0.0058  0.2326
  0.3731  0.2367  0.1786 -0.0697  0.0204
  0.0000  0.0000  0.0000  0.0000  0.0000

(8 ,.,.) = 
  0.4111  0.0972 -0.0186  0.3218  0.2449
  0.4146  0.1350  0.0187  0.2434  0.2235
  0.0000  0.0000  0.0000  0.0000  0.0000

(9 ,.,.) = 
  0.3786  0.1852 -0.0256  0.1529  0.1673
  0.3871  0.2349  0.0117  0.1109  0.1390
  0.0000  0.0000  0.0000  0.0000  0.0000

(10,.,.) = 
  0.2865  0.1581  0.0965  0.0066  0.1200
  0.0000  0.0000  0.0000  0.0000  0.0000
  0.0000  0.0000  0.0000  0.0000  0.0000
[torch.FloatTensor of size 11x3x5]
)
'''


# Or if you just want the final hidden state?
print("Last output", ht[-1].size(), ht[-1].data)
#输出:
'''
('Last output', torch.Size([3, 5]), 
 0.2865  0.1581  0.0965  0.0066  0.1200
 0.3871  0.2349  0.0117  0.1109  0.1390
 0.3711  0.2257 -0.0789  0.1855  0.1880
[torch.FloatTensor of size 3x5]
)
'''
#在转换过程中,我们注意一下几个变量的维度:
#首先是embeded_seq_tensor
print embeded_seq_tensor
'''
Variable containing:
(0 ,.,.) = 
  1.1440  0.1978 -0.6949
 -2.0943  1.3701  0.7476
  0.9801 -0.5060 -0.3273

(1 ,.,.) = 
  1.7591  0.1092  0.4916
  1.7591  0.1092  0.4916
  0.6903 -0.4349  0.1548

(2 ,.,.) = 
  2.3382  0.2825 -0.5383
 -0.5960  1.0383  0.3377
  1.1440  0.1978 -0.6949

(3 ,.,.) = 
  0.3405  0.7053 -1.4355
  0.3405  0.7053 -1.4355
 -0.8852  0.7600 -0.3285

(4 ,.,.) = 
  1.3288  0.7557 -0.4967
 -0.3825 -1.0906 -0.8747
  1.1139  0.6303 -1.0870

(5 ,.,.) = 
 -0.5960  1.0383  0.3377
 -0.5960  1.0383  0.3377
 -0.5960  1.0383  0.3377

(6 ,.,.) = 
 -0.4494 -0.5962 -0.4374
 -0.5976 -0.2818 -0.0220
 -0.1059  0.3986 -0.3438

(7 ,.,.) = 
 -1.8133  0.2120 -1.0831
 -0.5960  1.0383  0.3377
 -0.1059  0.3986 -0.3438

(8 ,.,.) = 
  1.1747  1.3855 -0.6076
  1.1139  0.6303 -1.0870
 -0.1059  0.3986 -0.3438

(9 ,.,.) = 
 -0.5960  1.0383  0.3377
 -0.5960  1.0383  0.3377
 -0.1059  0.3986 -0.3438

(10,.,.) = 
 -0.5976 -0.2818 -0.0220
 -0.1059  0.3986 -0.3438
 -0.1059  0.3986 -0.3438
[torch.FloatTensor of size 11x3x3]
'''

#其次是进行压缩后的packed_input
print packed_input
'''
PackedSequence(data=Variable containing:
 1.1440  0.1978 -0.6949
-2.0943  1.3701  0.7476
 0.9801 -0.5060 -0.3273
 1.7591  0.1092  0.4916
 1.7591  0.1092  0.4916
 0.6903 -0.4349  0.1548
 2.3382  0.2825 -0.5383
-0.5960  1.0383  0.3377
 1.1440  0.1978 -0.6949
 0.3405  0.7053 -1.4355
 0.3405  0.7053 -1.4355
-0.8852  0.7600 -0.3285
 1.3288  0.7557 -0.4967
-0.3825 -1.0906 -0.8747
 1.1139  0.6303 -1.0870
-0.5960  1.0383  0.3377
-0.5960  1.0383  0.3377
-0.5960  1.0383  0.3377
-0.4494 -0.5962 -0.4374
-0.5976 -0.2818 -0.0220
-1.8133  0.2120 -1.0831
-0.5960  1.0383  0.3377
 1.1747  1.3855 -0.6076
 1.1139  0.6303 -1.0870
-0.5960  1.0383  0.3377
-0.5960  1.0383  0.3377
-0.5976 -0.2818 -0.0220
[torch.FloatTensor of size 27x3]
, batch_sizes=[3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 1])
'''
#可以看到从embeded_seq_tensor到packed_input维度从11x3x3变成了27x3,这中间删除了33-27=6行,从以上的batch_sizes=[3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 1]我们可以知道是因为之前的3个排序好的seq长度分别是11、10和6,故batch_sizes中长度为11的seq对应的是11个1,长度为10的seq对应的是10个1,长度为6的seq对应的是6个1,按11个batch分别相加得到的就是[3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 1]

以上就是关于使用pack_padded_sequence进行压缩输入和pad_packed_sequence进行解压恢复维度的处理,后续关于RNN和pytorch的学习还会继续~

你可能感兴趣的:(深度学习)