pytorch入门(2)

fizzbuzz游戏

3倍数fizz 5倍数buzz 15倍数fizzbuzz。其余数字输出本身

1
2
fizz
4
buzz
fizz
7
8
fizz
buzz
11
fizz
13
14
fizzbuzz

def fizz_buzz_encode(i):
    if i %15 == 0 : return 3
    elif i%5 == 0 : return 2
    elif i %3 == 0: return 1
    else: return 0
def fizz_buzz_decode(i,prediction):
    return [str(i),"fizz","buzz","fizzbuzz"][prediction]

def helper(i):
    print(fizz_buzz_decode(i,fizz_buzz_encode(i)))

for i in range(1,16):
    helper(i)

神经网络自己学习这个游戏

import torch
import numpy as np
import torch.nn as nn

def fizz_buzz_encoder(i):
    if i %15 == 0 : return 3
    elif i%5 == 0 : return 2
    elif i %3 == 0: return 1
    else: return 0

NUM_DIGITS = 10
NUM_HIDDEN = 100


def binary_encode(i,num_digits):
    return np.array([i>>d&1 for d in range(num_digits)][::-1]) #输入变为十位的二进制

#训练数据: 101以上的数据(10位二进制  -》 4 分类  )
trX = torch.Tensor([binary_encode(i,NUM_DIGITS) for i in range (101,2**NUM_DIGITS)])
trY = torch.LongTensor([fizz_buzz_encoder(i) for i in range(101,2**NUM_DIGITS)])


model = torch.nn.Sequential(
    torch.nn.Linear(NUM_DIGITS,NUM_HIDDEN),
    torch.nn.ReLU(),
    torch.nn.Linear(NUM_HIDDEN,4),  #四分类
)

loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr = 0.01)

BATCH_SIZE = 50
for epoch in range(500):
    for start in range(0,len(trX),BATCH_SIZE):
        end = start+BATCH_SIZE
        batchX = trX[start:end]
        batchY = trY[start:end]

        y_pre = model(batchX)
        loss = loss_fn(y_pre,batchY)
        print("EPOCH",epoch, loss.item())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
#测试数据 (101以下的数据)
testX = torch.Tensor([binary_encode(i,NUM_DIGITS) for i in range(1,101)])
with torch.no_grad():
    testY = model(testX)

def fizz_buzz_decode(i,prediction):
    return [str(i),"fizz","buzz","fizzbuzz"][prediction]

#testY : 100*4矩阵,选择四个维度取最大的那个,   max(1): 第一个维度上最大的数字拿出来 [1]:哪个是这个最大的数字ie.argmax
predicts = zip(range(1,101),list(testY.max(1)[1]))
print([fizz_buzz_decode(i,x) for i,x in predicts])

词向量

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as tud
import pandas as pd

from collections import Counter
import numpy as np
import random
import math
import sklearn
import scipy
from sklearn.metrics.pairwise import cosine_similarity

#相同数组random到相同的值,()里面的数字是开始的位置
random.seed(1)
np.random.seed(1)
torch.manual_seed(1)
#设定hyper parameter
C=3 #context window只看周围三个单词
K= 100 #随机采样 negative samples
NUM_EPOCHS = 2
MAX_VOCAB_SIZE =30000 #词汇表
BATCH_SIZE = 128
LEARNING_RATE = 0.2
EMBEDDING_SIZE = 100

def word_tokenize(text):
    return text.split()
with open("text.train.txt","r") as fin:
    text = fin.read()
text = text.split()


vocab = dict( Counter(text).most_common(MAX_VOCAB_SIZE -1))
vocab[""] = len(text)-np.sum(list(vocab.values()))
#print(type( Counter(text)), Counter(text) ) : 
#print(dict( Counter(text)))

idx_to_word = {word for word in vocab.keys()}
word_to_idx = {word:i for i, word in  enumerate(idx_to_word)}
#print(word_to_idx)
word_counts = np.array([count for count in vocab.values()],dtype=np.float32)
word_freqs = word_counts/np.sum(word_counts)
word_freqs = word_freqs **(3./4.)
word_freqs = word_freqs/np.sum(word_freqs)
VOCAB_SIZE = len(idx_to_word)
#print(VOCAB_SIZE)
class WordEmbeddingModel(nn.Module):
    def __init__():

    def forward(self,cen,co_nb,wr_nb):

model = EnbeddingModel()

optimizer = torch.optim.SGD(model.parameters,lr = LEARNING_RATE)

for e in range (NUM_EPOCHS):
    optimizer.zero_grad()
    loss = model().mean()
    loss.backward()
    optimizer.step()

你可能感兴趣的:(pytorch,phd)