---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-383-d67388d2e4cc> in <module>
1 output_emb = myEmbed(total_words = total_words, embedding_dim = 8)
2 word_vector = torch.tensor(word_vector, dtype=torch.long).clone().detach()
----> 3 output = output_emb(word_vector)
4 print(output)
5 # word_vector
/opt/anaconda3/envs/py36/lib/python3.6/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
720 result = self._slow_forward(*input, **kwargs)
721 else:
--> 722 result = self.forward(*input, **kwargs)
723 for hook in itertools.chain(
724 _global_forward_hooks.values(),
<ipython-input-382-10f2ec94e0ae> in forward(self, sentences_idx)
4 self.embed = nn.Embedding(total_words,embedding_dim)
5 def forward(self,sentences_idx):
----> 6 return self.embed(sentences_idx).clone().detach()
/opt/anaconda3/envs/py36/lib/python3.6/site-packages/torch/nn/modules/module.py in _call_impl(self, *input, **kwargs)
720 result = self._slow_forward(*input, **kwargs)
721 else:
--> 722 result = self.forward(*input, **kwargs)
723 for hook in itertools.chain(
724 _global_forward_hooks.values(),
/opt/anaconda3/envs/py36/lib/python3.6/site-packages/torch/nn/modules/sparse.py in forward(self, input)
124 return F.embedding(
125 input, self.weight, self.padding_idx, self.max_norm,
--> 126 self.norm_type, self.scale_grad_by_freq, self.sparse)
127
128 def extra_repr(self) -> str:
/opt/anaconda3/envs/py36/lib/python3.6/site-packages/torch/nn/functional.py in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse)
1812 # remove once script supports set_grad_enabled
1813 _no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
-> 1814 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
1815
1816
IndexError: index out of range in self
sentences = ['It is a good day.','how are you?','I want to study the nn.embedding.','I want to elmate my pox.','the experience that I have done today is my favriate experience.']
sentences = [sentence.split() for sentence in sentences]
all_words = []
total_words = 0
for sentence in sentences:
all_words += [ words for words in sentence ]
no_repeat_words = set(all_words)
total_words = len(no_repeat_words)
word_to_idx = {word: i+1 for i, word in enumerate(no_repeat_words)}
word_to_idx['' ] = 0
idx_to_word = {i+1: word for i, word in enumerate(no_repeat_words)}
print('all_words:',all_words)
print('no_repeat_words:',no_repeat_words)
print('idx_to_word:',idx_to_word)
print('word_to_idx:',word_to_idx)
print('total_words',total_words)
>>>all_words: ['It', 'is', 'a', 'good', 'day.', 'how', 'are', 'you?', 'I', 'want', 'to', 'study', 'the', 'nn.embedding.', 'I', 'want', 'to', 'elmate', 'my', 'pox.', 'the', 'experience', 'that', 'I', 'have', 'done', 'today', 'is', 'my', 'favriate', 'experience.']
>>>no_repeat_words: {'a', 'want', 'nn.embedding.', 'It', 'experience.', 'my', 'today', 'study', 'favriate', 'is', 'have', 'I', 'day.', 'you?', 'how', 'elmate', 'experience', 'to', 'pox.', 'the', 'that', 'good', 'done', 'are'}
>>>idx_to_word: {1: 'a', 2: 'want', 3: 'nn.embedding.', 4: 'It', 5: 'experience.', 6: 'my', 7: 'today', 8: 'study', 9: 'favriate', 10: 'is', 11: 'have', 12: 'I', 13: 'day.', 14: 'you?', 15: 'how', 16: 'elmate', 17: 'experience', 18: 'to', 19: 'pox.', 20: 'the', 21: 'that', 22: 'good', 23: 'done', 24: 'are'}
>>>word_to_idx: {'a': 1, 'want': 2, 'nn.embedding.': 3, 'It': 4, 'experience.': 5, 'my': 6, 'today': 7, 'study': 8, 'favriate': 9, 'is': 10, 'have': 11, 'I': 12, 'day.': 13, 'you?': 14, 'how': 15, 'elmate': 16, 'experience': 17, 'to': 18, 'pox.': 19, 'the': 20, 'that': 21, 'good': 22, 'done': 23, 'are': 24, '' : 0}
>>>total_words: 24
word_vector = []
sentences_pad = []
print('填充前句子:',sentences)
max_len = max([len(sentence) for sentence in sentences])
for sentence in sentences:
if len(sentence) < max_len:
sentences_pad += [sentence.extend("" for _ in range(max_len-len(sentence)))]
else:
sentences_pad += [sentence]
for sentence in sentences:
word_vector += [[ word_to_idx[word] for word in sentence]]
# print('填充前的句子:',sentences_pad)
print('填充后的句子:',sentences_pad)
print('句子转化成向量:',word_vector)
>>>填充前句子: [['It', 'is', 'a', 'good', 'day.', '' , '' , '' , '' , '' , '' ], ['how', 'are', 'you?', '' , '' , '' , '' , '' , '' , '' , '' ], ['I', 'want', 'to', 'study', 'the', 'nn.embedding.', '' , '' , '' , '' , '' ], ['I', 'want', 'to', 'elmate', 'my', 'pox.', '' , '' , '' , '' , '' ], ['the', 'experience', 'that', 'I', 'have', 'done', 'today', 'is', 'my', 'favriate', 'experience.']]
>>>填充后的句子: [['It', 'is', 'a', 'good', 'day.', '' , '' , '' , '' , '' , '' ], ['how', 'are', 'you?', '' , '' , '' , '' , '' , '' , '' , '' ], ['I', 'want', 'to', 'study', 'the', 'nn.embedding.', '' , '' , '' , '' , '' ], ['I', 'want', 'to', 'elmate', 'my', 'pox.', '' , '' , '' , '' , '' ], ['the', 'experience', 'that', 'I', 'have', 'done', 'today', 'is', 'my', 'favriate', 'experience.']]
>>>句子转化成向量: [[4, 10, 1, 22, 13, 0, 0, 0, 0, 0, 0], [15, 24, 14, 0, 0, 0, 0, 0, 0, 0, 0], [12, 2, 18, 8, 20, 3, 0, 0, 0, 0, 0], [12, 2, 18, 16, 6, 19, 0, 0, 0, 0, 0], [20, 17, 21, 12, 11, 23, 7, 10, 6, 9, 5]]
class myEmbed(nn.Module):
def __init__(self,total_words,embedding_dim):
super(myEmbed,self).__init__()
self.embed = nn.Embedding(total_words,embedding_dim)
def forward(self,sentences_idx):
return self.embed(sentences_idx).clone().detach()
output_emb = myEmbed(total_words = total_words, embedding_dim = 8)
word_vector = torch.tensor(word_vector, dtype=torch.long).clone().detach()
output = output_emb(word_vector)
print(output)
>>> 错误信息如‘报错详情’所示
在‘3. word_vector传入nn.Embedding()’这一步出错,传入的total_words小于传入的word_vector的单词总数,造成nn.Embedding()索引溢出,简单来说num_embeddings的值设置有误,num_embeddings应当≥total_words,即为字典的单词总数。传参详情如下:
class torch.nn.Embedding(num_embeddings, embedding_dim,
padding_idx=None, max_norm=None,
norm_type=2.0, scale_grad_by_freq=False,
sparse=False, _weight=None)
存储固定字典和大小的嵌入项的简单查找表。该模块通常用于存储词嵌入并使用索引检索它们。模块的输入是一个索引列表,输出是相应的词嵌入。
1. num_embeddings (int) – 去重后字典的单词总数;
2. embedding_dim (int) – 所设置的单词维度
3. padding_idx (int, optional) – 如果给定,当遇到索引时,用嵌入向量padding_idx(初始化为0)填充输出。(选填)
4. max_norm (float, optional) – 如果给定,则对每个范数大于max_norm的嵌入向量重新规范化,使其具有max_norm范数。(选填)
5. norm_type (float, optional) – 要为max_norm选项计算的p-norm的值。默认2。(选填)
6. scale_grad_by_freq (boolean, optional) – 如果给定,这将按小批量中单词频率的倒数来缩放梯度。默认为假。(选填)
7. sparse (bool, optional) – 若为真,则梯度w.r.t权矩阵为稀疏张量。有关稀疏梯度的更多细节,请参阅注释。(选填)
// 前面部分照搬
// 1. 数据的预处理
sentences = ['It is a good day.','how are you?','I want to study the nn.embedding.','I want to elmate my pox.','the experience that I have done today is my favriate experience.']
sentences = [sentence.split() for sentence in sentences]
all_words = []
total_words = 0
for sentence in sentences:
all_words += [ words for words in sentence ]
no_repeat_words = set(all_words)
total_words = len(no_repeat_words)
word_to_idx = {word: i+1 for i, word in enumerate(no_repeat_words)}
word_to_idx['' ] = 0
idx_to_word = {i+1: word for i, word in enumerate(no_repeat_words)}
// 2. word to vector,将句子转化成向量
word_vector = []
sentences_pad = []
max_len = max([len(sentence) for sentence in sentences])
for sentence in sentences:
if len(sentence) < max_len:
sentences_pad += [sentence.extend("" for _ in range(max_len-len(sentence)))]
else:
sentences_pad += [sentence]
for sentence in sentences:
word_vector += [[ word_to_idx[word] for word in sentence]]
// 3.传入向量化的句子,生成字向量
total_words = len(word_to_idx)
class myEmbed(nn.Module):
def __init__(self,total_words,embedding_dim):
super(myEmbed,self).__init__()
self.embed = nn.Embedding(total_words,embedding_dim)
def forward(self,sentences_idx):
return self.embed(sentences_idx).clone().detach()
output_emb = myEmbed(total_words = total_words, embedding_dim = 8)
word_vector = torch.tensor(word_vector, dtype=torch.long).clone().detach()
output = output_emb(word_vector)
print(output)
tensor([[[-0.9028, -1.0990, 1.0646, 1.4747, 1.2577, 0.6634, 0.0188,
0.6545],
[-0.2176, 0.5252, 0.2574, 1.2822, -0.8745, -1.2112, 0.0584,
-0.5189],
[ 0.5240, -0.8862, -1.3594, -1.1795, -0.8441, 0.7830, 0.9485,
0.5734],
[ 1.6141, 0.2254, -0.1457, 0.7620, -1.8222, 0.4634, -0.8187,
0.3283],
[-0.3710, 0.8392, -0.6133, 0.6381, -1.7941, 0.2950, 0.3148,
2.2896],
[ 1.0500, -0.7410, 1.4759, -0.9487, 1.4232, 0.1392, 0.8788,
-0.7190],
[ 1.0500, -0.7410, 1.4759, -0.9487, 1.4232, 0.1392, 0.8788,
-0.7190],
[ 1.0500, -0.7410, 1.4759, -0.9487, 1.4232, 0.1392, 0.8788,
-0.7190],
[ 1.0500, -0.7410, 1.4759, -0.9487, 1.4232, 0.1392, 0.8788,
-0.7190],
[ 1.0500, -0.7410, 1.4759, -0.9487, 1.4232, 0.1392, 0.8788,
-0.7190],
[ 1.0500, -0.7410, 1.4759, -0.9487, 1.4232, 0.1392, 0.8788,
-0.7190]],
[[-0.1860, 1.8636, -0.6865, -0.3979, 1.1691, 1.2467, 1.5026,
0.2586],
[-0.9084, 0.0882, -0.0631, 0.0667, 0.9071, 1.6767, -0.1515,
1.1327],
[-2.6057, 0.6494, 0.0483, 0.5032, 0.5448, 0.7419, 0.8697,
-0.4805],
[ 1.0500, -0.7410, 1.4759, -0.9487, 1.4232, 0.1392, 0.8788,
-0.7190],
[ 1.0500, -0.7410, 1.4759, -0.9487, 1.4232, 0.1392, 0.8788,
-0.7190],
[ 1.0500, -0.7410, 1.4759, -0.9487, 1.4232, 0.1392, 0.8788,
-0.7190],
[ 1.0500, -0.7410, 1.4759, -0.9487, 1.4232, 0.1392, 0.8788,
-0.7190],
[ 1.0500, -0.7410, 1.4759, -0.9487, 1.4232, 0.1392, 0.8788,
-0.7190],
[ 1.0500, -0.7410, 1.4759, -0.9487, 1.4232, 0.1392, 0.8788,
-0.7190],
[ 1.0500, -0.7410, 1.4759, -0.9487, 1.4232, 0.1392, 0.8788,
-0.7190],
[ 1.0500, -0.7410, 1.4759, -0.9487, 1.4232, 0.1392, 0.8788,
-0.7190]],
[[-0.2740, 0.7465, 0.7614, -1.3599, -0.7212, 0.0880, 0.9135,
1.8307],
[ 0.3974, -0.0467, -0.8352, 0.2649, 1.9399, -2.1667, 0.3023,
-1.7938],
[-0.8383, -0.6372, -0.1922, 0.5328, 0.5292, -0.8630, -0.0764,
-1.4630],
[ 0.2232, -0.2855, -0.5257, -1.4286, -1.3177, -0.5152, -1.1457,
0.3720],
[-0.6988, -0.3652, -0.9142, 0.5403, 0.1923, -1.6566, 0.8366,
-1.1495],
[-0.1142, -1.0301, 1.1789, 0.4901, -0.2576, 0.4898, 0.4154,
1.1342],
[ 1.0500, -0.7410, 1.4759, -0.9487, 1.4232, 0.1392, 0.8788,
-0.7190],
[ 1.0500, -0.7410, 1.4759, -0.9487, 1.4232, 0.1392, 0.8788,
-0.7190],
[ 1.0500, -0.7410, 1.4759, -0.9487, 1.4232, 0.1392, 0.8788,
-0.7190],
[ 1.0500, -0.7410, 1.4759, -0.9487, 1.4232, 0.1392, 0.8788,
-0.7190],
[ 1.0500, -0.7410, 1.4759, -0.9487, 1.4232, 0.1392, 0.8788,
-0.7190]],
[[-0.2740, 0.7465, 0.7614, -1.3599, -0.7212, 0.0880, 0.9135,
1.8307],
[ 0.3974, -0.0467, -0.8352, 0.2649, 1.9399, -2.1667, 0.3023,
-1.7938],
[-0.8383, -0.6372, -0.1922, 0.5328, 0.5292, -0.8630, -0.0764,
-1.4630],
[-1.1177, -0.8047, 0.2185, -0.3761, 0.8753, 2.1269, 1.4648,
-0.1830],
[ 0.4993, 0.5043, -0.4541, -0.2609, 2.4289, 1.5842, -1.9878,
1.4654],
[ 1.8740, -0.1214, 0.6446, -0.4646, 0.3363, -0.3854, -0.4768,
0.7824],
[ 1.0500, -0.7410, 1.4759, -0.9487, 1.4232, 0.1392, 0.8788,
-0.7190],
[ 1.0500, -0.7410, 1.4759, -0.9487, 1.4232, 0.1392, 0.8788,
-0.7190],
[ 1.0500, -0.7410, 1.4759, -0.9487, 1.4232, 0.1392, 0.8788,
-0.7190],
[ 1.0500, -0.7410, 1.4759, -0.9487, 1.4232, 0.1392, 0.8788,
-0.7190],
[ 1.0500, -0.7410, 1.4759, -0.9487, 1.4232, 0.1392, 0.8788,
-0.7190]],
[[-0.6988, -0.3652, -0.9142, 0.5403, 0.1923, -1.6566, 0.8366,
-1.1495],
[ 0.4606, 0.2213, -0.6970, -0.1618, -1.8748, -0.4962, 0.5517,
-0.4841],
[ 0.0738, 0.8394, -1.1480, -0.3829, -0.0931, 1.1793, 0.2737,
-0.9046],
[-0.2740, 0.7465, 0.7614, -1.3599, -0.7212, 0.0880, 0.9135,
1.8307],
[ 1.2459, 0.6663, 1.6969, -0.2072, -1.9603, -1.4282, 0.8382,
-0.3569],
[-1.6661, 0.0275, 0.5090, 0.4771, -0.7955, 0.9199, 0.9401,
0.8285],
[ 0.2445, 0.0742, 1.6497, -0.0338, 1.8325, 0.1709, 0.7659,
-0.7233],
[-0.2176, 0.5252, 0.2574, 1.2822, -0.8745, -1.2112, 0.0584,
-0.5189],
[ 0.4993, 0.5043, -0.4541, -0.2609, 2.4289, 1.5842, -1.9878,
1.4654],
[ 0.1651, -0.1232, 1.1650, -1.3531, 0.1082, 0.1277, -1.0091,
-1.3470],
[-0.2381, 1.7149, 1.0614, -1.1837, -0.5192, 0.9356, -0.1343,
0.9358]]])
1. pytorch nn.Embeddding()的官方文档
2. 在pytorch里面实现word embedding是通过一个函数来实现的:nn.Embedding在深度学习1这篇博客中讨论了word embeding层到底怎么实现的?
3.Pytorch中的nn.Embedding()
4.pytorch embedding层报错index out of range in self