pytorch-SVD第二弹

##利用SGD实现SVD算法
误差有点高,感觉像是写错了

import pandas as pd
import numpy as np

import torch

torch.cuda.empty_cache()
data = pd.read_csv('ratings.csv')

data_train = data.pivot(index = 'userId', columns = 'movieId', values = 'rating')

matrax = data_train.fillna(0)
matrix = np.array(matrax)
data = torch.tensor(matrix)
device = torch.device('cuda', 0)
data = data.to(device)

def data_iter(data, batch_size):
    num_user = data.shape[0]
    indices = list(range(num_user))
    for i in range(0, num_user, batch_size):
        j = torch.tensor(indices[i: min(i+batch_size, num_user)])
        yield data[j], j

def init_u_v(data, k):
    u = torch.normal(mean=0, std=0.1, size=(data.shape[0], k), device=device, requires_grad=True)
    v = torch.normal(mean=0, std=0.1, size=(k, data.shape[1]), device=device, requires_grad=True)
    return u, v

def funk_svd(u, v):
    y_hat = torch.mm(u, v)

    return y_hat

def MSE_loss(y, y_hat):
    one = torch.ones_like(y)
    record = torch.where(y!=0, one, y)
    loss = ((y_hat - y)**2).mul(record).sum()
    return loss

def SGD(u, v, lr, batch_size):
    u.data.sub_(lr*u.grad/batch_size)
    v.data.sub_(lr*v.grad/batch_size)
    u.grad.data.zero_()
    v.grad.data.zero_()



k = 100
lr = 0.025
epochs = 500
lamb = 0.001
net = funk_svd
loss = MSE_loss
batch_size = 32
u, v = init_u_v(data, k)

for epoch in range(epochs):
    for data_epoch, idx in data_iter(data, batch_size):
        # temp = u[idx]
        # print(temp.shape)
        l = loss(data_epoch, net(u[idx], v)) + lamb*torch.pow(u[idx], 2).sum() + lamb*torch.pow(v, 2).sum()
        l.sum().backward()
        SGD(u, v, lr, batch_size)
        # u[idx]=temp
    with torch.no_grad():
        train_l = loss(data, net(u, v))
    print('epoch:{}, loss:{}'.format(epoch+1, train_l))

loss最后为88368.29624104682

你可能感兴趣的:(RS学习)