from pathlib import Path
import requests
pathlib库在python3.4以后是python的内置库, Python 文档给它的定义是 Object-oriented filesystem paths(面向对象的文件系统路径),基本上可以代替os.path来处理路径。
# 指定路径,如果没有,就建一个文件夹
DATA_PATH = Path(r"D:\\data666")
PATH = DATA_PATH / "minist"
PATH.mkdir(parents=True, exist_ok=True)
pathlib的mkdir接收两个参数:
URL = "http://deeplearning.net/data/mnist/" # 下载 mnist 数据集的地址
FILENAME = 'mnist.pkl.gz'
if not (PATH / FILENAME).exists():
content = requests.get(URL + FILENAME).content
(PATH / FILENAME).open("wb").write(content)
import pickle
import gzip
with gzip.open((PATH / FILENAME).as_posix(), "rb") as f:
((x_train, y_train), (x_valid, y_valid), (x_test, y_test)) = pickle.load(f, encoding="latin-1")
print(x_train.shape)
print(y_train.shape)
print(x_valid.shape)
print(y_valid.shape)
print(x_test.shape)
print(y_test.shape)
print(x_train[0].shape)
运行结果:
(50000, 784)
(50000,)
(10000, 784)
(10000,)
(10000, 784)
(10000,)
(784,)
matplotlib.pyplot.imshow()显示图像的颜色问题,想要改变分类结果图的颜色,那么可以通过改变 Colormap 来实现。imshow()函数格式为:
matplotlib.pyplot.imshow(X, cmap=None)
X: 要绘制的图像或数组。
cmap: 颜色图谱(colormap), 默认绘制为RGB(A)颜色空间。例如:matplotlib.pyplot.imshow(img, cmap=jet)
其它可选的颜色图谱请参照下面的链接:http://www.cnblogs.com/denny402/p/5122594.html
主线程序:
from matplotlib import pyplot
import numpy as np
import torch
from torch.utils.data import DataLoader
%matplotlib inline
pyplot.imshow(x_train[0].reshape((28, 28)), cmap="gray") # 打印灰度图
运行结果:
主线程序:
pyplot.imshow(x_train[0].reshape((28, 28))) # 默认瑞利图
运行结果:
主线程序:
print(type(x_train))
# 批量转换 tensor
x_train, x_test, x_valid, y_valid = map(
torch.tensor, (x_train, x_test, x_valid, y_valid)
)
print(type(x_train))
运行结果:
<class 'numpy.ndarray'>
<class 'torch.Tensor'>
主线程序:
class Mnist_Logistic(torch.nn.Module):
def __init__(self):
super(Mnist_Logistic, self).__init__()
self.lin = torch.nn.Linear(784, 10)
def forward(self, xb):
return self.lin(xb)
def get_model():
model = Mnist_Logistic()
return model, torch.optim.SGD(model.parameters(), lr = 0.1)
train_ds = TensorDataset(x_train, y_train)
train_dl = DataLoader(dataset=train_ds, batch_size=256, shuffle=True)
valid_ds = TensorDataset(x_valid, y_valid)
valid_dl = DataLoader(dataset=valid_ds, batch_size=512)
loss_func = torch.nn.CrossEntropyLoss()
model, opt = get_model()
for epoch in range(10):
model.train() # 训练前加
for xb, yb in train_dl:
pred = model(xb)
loss = loss_func(pred, yb)
loss.backward()
opt.step()
opt.zero_grad()
# 加入验证集
model.eval() # 评估模型
with torch.no_grad():
valid_loss = sum(loss_func(model(xb), yb) for xb, yb in valid_dl)
print("%d %f " % (epoch, valid_loss / len(valid_dl)))
运行结果:
0 0.455661
1 0.382214
2 0.352814
3 0.335510
4 0.325002
5 0.316920
6 0.309989
7 0.305217
8 0.302514
9 0.298360