wine数据集包含三种葡萄酒类别,总共178个样本,每个样本具有13个特征,样本数据格式如下图所示。
从wine.data中读取数据(loadDateSet)并进行降维(LL)处理
def loadDateSet(filename):
dataMat = []
labelMat = []
fr = open(filename)
for line in fr.readlines():
curLine = line.strip().split(',')
fltline = list(map(float,curLine[1:]))
dataMat.append(fltline)
labelline = int(curLine[0])
labelMat.append(labelline)
return np.array(dataMat),np.array(labelMat)
def LL(x,y):
x_norm = preprocessing.normalize(x,norm = 'l2')
lda = LinearDiscriminantAnalysis(n_components=2)
x_new = lda.fit_transform(x_norm,y)
return x_new```
构建tensor数据集函数(Data.TensorDataset)
dataMat, labelMat = loadDateSet('wine.data')
dataMat = LL(dataMat,labelMat)
pindex=np.random.permutation(dataMat.shape[0])
dataMat = dataMat[pindex,:]
labelMat = labelMat[pindex]
dataMat = torch.from_numpy(dataMat)
labelMat = torch.from_numpy(labelMat)
torch_dataset = Data.TensorDataset(dataMat[28:], labelMat[28:])
loader = Data.DataLoader(
dataset=torch_dataset,
batch_size=15,
shuffle=True,
num_workers=2
)
torch_testset = Data.TensorDataset(dataMat[0:27], labelMat[0:27])
loader2 = Data.DataLoader(
dataset=torch_testset,
batch_size=29,
shuffle=True,
num_workers=2
)
class MLP(torch.nn.Module):
def __init__(self):
super(MLP,self).__init__()
self.fc1 = torch.nn.Linear(2,5)
self.fc2 = torch.nn.Linear(5,3)
def forward(self,x):
y = F.sigmoid(self.fc1(x))
y = F.softmax(self.fc2(y),dim=1)
return y
model = MLP()
def train():
a_data = []
b_data = []
c_data = []
lossfunc = torch.nn.CrossEntropyLoss()
optimzer = torch.optim.SGD(params=model.parameters(),lr = 1)
for epoch in range(n_epoch):
train_loss = 0
for step,(batch_dataMat,batch_lableMat) in enumerate(loader):
optimzer.zero_grad()
output = model(batch_dataMat)
# print(output)
loss = lossfunc(output,batch_lableMat)
loss.backward()
optimzer.step()
train_loss += loss.item()*batch_dataMat.size(0)
train_loss = train_loss / len(loader.dataset)
# print('epoch{}:{:.6f}'.format(epoch+1,train_loss))
a_data.append(epoch+1)
b_data.append(train_loss)
c_data.append(test())
plt.plot(a_data, b_data, ls="-.", lw=2, c="c", label="plot figure")
plt.xlabel('num of train')
plt.ylabel('loss')
plt.grid() # 网格
plt.show()
plt.plot(a_data, c_data, ls="-.", lw=2, c="c", label="plot figure")
plt.xlabel('num of train')
plt.ylabel('acc')
plt.grid() # 网格
plt.show()
def test():
correct = 0
total = 0
with torch.no_grad():
for step,(batch_dataMat,batch_lableMat) in enumerate(loader):
output = model(batch_dataMat)
_,p = torch.max(output.data,1)
total += batch_lableMat.size(0)
correct += (p == batch_lableMat).sum().item()
print(100*correct/total)
return 100*correct/total
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn.functional as F
import torch.utils.data as Data
import torchvision.transforms as transaforms
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn import preprocessing
torch.set_default_tensor_type(torch.DoubleTensor)
n_epoch=10
def loadDateSet(filename):
dataMat = []
labelMat = []
fr = open(filename)
for line in fr.readlines():
curLine = line.strip().split(',')
fltline = list(map(float,curLine[1:]))
dataMat.append(fltline)
labelline = int(curLine[0])
labelMat.append(labelline)
return np.array(dataMat),np.array(labelMat)
def LL(x,y):
x_norm = preprocessing.normalize(x,norm = 'l2')
lda = LinearDiscriminantAnalysis(n_components=2)
x_new = lda.fit_transform(x_norm,y)
return x_new
dataMat, labelMat = loadDateSet('wine.data')
dataMat = LL(dataMat,labelMat)
pindex=np.random.permutation(dataMat.shape[0])
dataMat = dataMat[pindex,:]
labelMat = labelMat[pindex]
dataMat = torch.from_numpy(dataMat)
labelMat = torch.from_numpy(labelMat)
torch_dataset = Data.TensorDataset(dataMat[28:], labelMat[28:])
loader = Data.DataLoader(
dataset=torch_dataset,
batch_size=15,
shuffle=True,
num_workers=2
)
torch_testset = Data.TensorDataset(dataMat[0:27], labelMat[0:27])
loader2 = Data.DataLoader(
dataset=torch_testset,
batch_size=29,
shuffle=True,
num_workers=2
)
class MLP(torch.nn.Module):
def __init__(self):
super(MLP,self).__init__()
self.fc1 = torch.nn.Linear(2,5)
# self.fc3 = torch.nn.Linear(5,5)
self.fc2 = torch.nn.Linear(5,3)
def forward(self,x):
y = F.sigmoid(self.fc1(x))
# y = F.sigmoid(self.fc3(y))
y = F.softmax(self.fc2(y),dim=1)
return y
model = MLP()
def train():
a_data = []
b_data = []
c_data = []
lossfunc = torch.nn.CrossEntropyLoss()
optimzer = torch.optim.SGD(params=model.parameters(),lr = 1)
for epoch in range(n_epoch):
train_loss = 0
for step,(batch_dataMat,batch_lableMat) in enumerate(loader):
optimzer.zero_grad()
output = model(batch_dataMat)
# print(output)
loss = lossfunc(output,batch_lableMat)
loss.backward()
optimzer.step()
train_loss += loss.item()*batch_dataMat.size(0)
train_loss = train_loss / len(loader.dataset)
# print('epoch{}:{:.6f}'.format(epoch+1,train_loss))
a_data.append(epoch+1)
b_data.append(train_loss)
c_data.append(test())
plt.plot(a_data, b_data, ls="-.", lw=2, c="c", label="plot figure")
plt.xlabel('num of train')
plt.ylabel('loss')
plt.grid() # 网格
plt.show()
plt.plot(a_data, c_data, ls="-.", lw=2, c="c", label="plot figure")
plt.xlabel('num of train')
plt.ylabel('acc')
plt.grid() # 网格
plt.show()
def test():
correct = 0
total = 0
with torch.no_grad():
for step,(batch_dataMat,batch_lableMat) in enumerate(loader):
output = model(batch_dataMat)
_,p = torch.max(output.data,1)
total += batch_lableMat.size(0)
correct += (p == batch_lableMat).sum().item()
print(100*correct/total)
return 100*correct/total
def main():
train()
test()
main()