import torch.nn as nn
net = nn.Sequential(
nn.Linear(784, 256),
nn.ReLU(),
nn.Linear(256, 10),
)
print(net)
Sequential(
(0): Linear(in_features=784, out_features=256, bias=True)
(1): ReLU()
(2): Linear(in_features=256, out_features=10, bias=True)
)
import collections
import torch.nn as nn
net2 = nn.Sequential(collections.OrderedDict([
('fc1', nn.Linear(784, 256)),
('relu1', nn.ReLU()),
('fc2', nn.Linear(256, 10))
]))
print(net2)
Sequential(
(fc1): Linear(in_features=784, out_features=256, bias=True)
(relu1): ReLU()
(fc2): Linear(in_features=256, out_features=10, bias=True)
)
net = nn.ModuleList([nn.Linear(784, 256), nn.ReLU()])
net.append(nn.Linear(256, 10)) # # 类似List的append操作
print(net[-1]) # 类似List的索引访问
print(net)
Linear(in_features=256, out_features=10, bias=True)
ModuleList(
(0): Linear(in_features=784, out_features=256, bias=True)
(1): ReLU()
(2): Linear(in_features=256, out_features=10, bias=True)
)
net = nn.ModuleDict({
'linear': nn.Linear(784, 256),
'act': nn.ReLU(),
})
net['output'] = nn.Linear(256, 10) # 添加
print(net['linear']) # 访问
print(net.output)
print(net)
Linear(in_features=784, out_features=256, bias=True)
Linear(in_features=256, out_features=10, bias=True)
ModuleDict(
(linear): Linear(in_features=784, out_features=256, bias=True)
(act): ReLU()
(output): Linear(in_features=256, out_features=10, bias=True)
)
resnet 在imageNet竞赛中分类任务第一名、目标检测第一名,获得COCO数据集中目标检测第一名,图像分割第一名。
resnet50讲解,网络的输入照片大小是224x224的经过conv1,conv2,conv3,conv4,conv5最后在平均池化,全连接层。由于中间有重复利用的模块,所以我们需要将它们写成一个类,用来重复调用即可
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-T62Ryf1x-1692613704487)(attachment:image.png)]
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-HLB5OCZK-1692613704487)(attachment:9d746212c1fad0ba497b73093a5db2b0_Center.png)]
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-cxrFganj-1692613704488)(attachment:image.png)]
import torch.nn as nn
import torch
class Block(nn.Module):
def __init__(self, in_channels, out_channels, stride=1, downsample=False):
super(Block, self).__init__()
out_channel_01, out_channel_02, out_channel_03 = out_channels
self.downsample = downsample
self.relu = nn.ReLU(inplace=True)
self.conv1 = nn.Sequential(
nn.Conv2d(in_channels, out_channel_01, kernel_size=1, stride=1,bias=False),
nn.BatchNorm2d(out_channel_01),
nn.ReLU(inplace=True)
)
self.conv2 = nn.Sequential(
nn.Conv2d(out_channel_01, out_channel_02, kernel_size=3, stride=stride, padding=1, bias=False),
nn.BatchNorm2d(out_channel_02),
nn.ReLU(inplace=True)
)
self.conv3 = nn.Sequential(
nn.Conv2d(out_channel_02, out_channel_03, kernel_size=1, stride=1, bias=False),
nn.BatchNorm2d(out_channel_03),
)
if downsample:
self.shortcut = nn.Sequential(
nn.Conv2d(in_channels, out_channel_03, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(out_channel_03)
)
def forward(self,x):
x_shortcut = x
x = self.conv1(x)
x = self.conv2(x)
x = self.conv3(x)
if self.downsample:
x_shortcut = self.shortcut(x_shortcut)
x = x + x_shortcut
x = self.relu(x)
return x
class Resnet50(nn.Module):
def __init__(self):
super(Resnet50,self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
nn.BatchNorm2d(64),
nn.ReLU(),
)
Layers = [3, 4, 6, 3]
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.conv2 = self._make_layer(64, (64, 64, 256), Layers[0],1)
self.conv3 = self._make_layer(256, (128, 128, 512), Layers[1], 2)
self.conv4 = self._make_layer(512, (256, 256, 1024), Layers[2], 2)
self.conv5 = self._make_layer(1024, (512, 512, 2048), Layers[3], 2)
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
self.fc = nn.Sequential(
nn.Linear(2048, 1000)
)
def forward(self, input):
x = self.conv1(input)
x = self.maxpool(x)
x = self.conv2(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.conv5(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.fc(x)
return x
def _make_layer(self, in_channels, out_channels, blocks, stride=1):
layers = []
block_1 = Block(in_channels, out_channels, stride=stride, downsample=True)
layers.append(block_1)
for i in range(1, blocks):
layers.append(Block(out_channels[2], out_channels, stride=1, downsample=False))
return nn.Sequential(*layers)
#打印网络结构
net = Resnet50()
x = torch.rand((10, 3, 224, 224))
for name,layer in net.named_children():
if name != "fc":
x = layer(x)
print(name, 'output shaoe:', x.shape)
else:
x = x.view(x.size(0), -1)
x = layer(x)
print(name, 'output shaoe:', x.shape)
conv1 output shaoe: torch.Size([10, 64, 112, 112])
maxpool output shaoe: torch.Size([10, 64, 56, 56])
conv2 output shaoe: torch.Size([10, 256, 56, 56])
conv3 output shaoe: torch.Size([10, 512, 28, 28])
conv4 output shaoe: torch.Size([10, 1024, 14, 14])
conv5 output shaoe: torch.Size([10, 2048, 7, 7])
avgpool output shaoe: torch.Size([10, 2048, 1, 1])
fc output shaoe: torch.Size([10, 1000])
#torchinfo 可视化网络结构
from torchinfo import summary
net = Resnet50()
summary(net,((10, 3, 224, 224)))
D:\Users\xulele\Anaconda3\lib\site-packages\torchinfo\torchinfo.py:477: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()
action_fn=lambda data: sys.getsizeof(data.storage()),
D:\Users\xulele\Anaconda3\lib\site-packages\torch\storage.py:665: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()
return super().__sizeof__() + self.nbytes()
==========================================================================================
Layer (type:depth-idx) Output Shape Param #
==========================================================================================
Resnet50 [10, 1000] --
├─Sequential: 1-1 [10, 64, 112, 112] --
│ └─Conv2d: 2-1 [10, 64, 112, 112] 9,472
│ └─BatchNorm2d: 2-2 [10, 64, 112, 112] 128
│ └─ReLU: 2-3 [10, 64, 112, 112] --
├─MaxPool2d: 1-2 [10, 64, 56, 56] --
├─Sequential: 1-3 [10, 256, 56, 56] --
│ └─Block: 2-4 [10, 256, 56, 56] --
│ │ └─Sequential: 3-1 [10, 64, 56, 56] 4,224
│ │ └─Sequential: 3-2 [10, 64, 56, 56] 36,992
│ │ └─Sequential: 3-3 [10, 256, 56, 56] 16,896
│ │ └─Sequential: 3-4 [10, 256, 56, 56] 16,896
│ │ └─ReLU: 3-5 [10, 256, 56, 56] --
│ └─Block: 2-5 [10, 256, 56, 56] --
│ │ └─Sequential: 3-6 [10, 64, 56, 56] 16,512
│ │ └─Sequential: 3-7 [10, 64, 56, 56] 36,992
│ │ └─Sequential: 3-8 [10, 256, 56, 56] 16,896
│ │ └─ReLU: 3-9 [10, 256, 56, 56] --
│ └─Block: 2-6 [10, 256, 56, 56] --
│ │ └─Sequential: 3-10 [10, 64, 56, 56] 16,512
│ │ └─Sequential: 3-11 [10, 64, 56, 56] 36,992
│ │ └─Sequential: 3-12 [10, 256, 56, 56] 16,896
│ │ └─ReLU: 3-13 [10, 256, 56, 56] --
├─Sequential: 1-4 [10, 512, 28, 28] --
│ └─Block: 2-7 [10, 512, 28, 28] --
│ │ └─Sequential: 3-14 [10, 128, 56, 56] 33,024
│ │ └─Sequential: 3-15 [10, 128, 28, 28] 147,712
│ │ └─Sequential: 3-16 [10, 512, 28, 28] 66,560
│ │ └─Sequential: 3-17 [10, 512, 28, 28] 132,096
│ │ └─ReLU: 3-18 [10, 512, 28, 28] --
│ └─Block: 2-8 [10, 512, 28, 28] --
│ │ └─Sequential: 3-19 [10, 128, 28, 28] 65,792
│ │ └─Sequential: 3-20 [10, 128, 28, 28] 147,712
│ │ └─Sequential: 3-21 [10, 512, 28, 28] 66,560
│ │ └─ReLU: 3-22 [10, 512, 28, 28] --
│ └─Block: 2-9 [10, 512, 28, 28] --
│ │ └─Sequential: 3-23 [10, 128, 28, 28] 65,792
│ │ └─Sequential: 3-24 [10, 128, 28, 28] 147,712
│ │ └─Sequential: 3-25 [10, 512, 28, 28] 66,560
│ │ └─ReLU: 3-26 [10, 512, 28, 28] --
│ └─Block: 2-10 [10, 512, 28, 28] --
│ │ └─Sequential: 3-27 [10, 128, 28, 28] 65,792
│ │ └─Sequential: 3-28 [10, 128, 28, 28] 147,712
│ │ └─Sequential: 3-29 [10, 512, 28, 28] 66,560
│ │ └─ReLU: 3-30 [10, 512, 28, 28] --
├─Sequential: 1-5 [10, 1024, 14, 14] --
│ └─Block: 2-11 [10, 1024, 14, 14] --
│ │ └─Sequential: 3-31 [10, 256, 28, 28] 131,584
│ │ └─Sequential: 3-32 [10, 256, 14, 14] 590,336
│ │ └─Sequential: 3-33 [10, 1024, 14, 14] 264,192
│ │ └─Sequential: 3-34 [10, 1024, 14, 14] 526,336
│ │ └─ReLU: 3-35 [10, 1024, 14, 14] --
│ └─Block: 2-12 [10, 1024, 14, 14] --
│ │ └─Sequential: 3-36 [10, 256, 14, 14] 262,656
│ │ └─Sequential: 3-37 [10, 256, 14, 14] 590,336
│ │ └─Sequential: 3-38 [10, 1024, 14, 14] 264,192
│ │ └─ReLU: 3-39 [10, 1024, 14, 14] --
│ └─Block: 2-13 [10, 1024, 14, 14] --
│ │ └─Sequential: 3-40 [10, 256, 14, 14] 262,656
│ │ └─Sequential: 3-41 [10, 256, 14, 14] 590,336
│ │ └─Sequential: 3-42 [10, 1024, 14, 14] 264,192
│ │ └─ReLU: 3-43 [10, 1024, 14, 14] --
│ └─Block: 2-14 [10, 1024, 14, 14] --
│ │ └─Sequential: 3-44 [10, 256, 14, 14] 262,656
│ │ └─Sequential: 3-45 [10, 256, 14, 14] 590,336
│ │ └─Sequential: 3-46 [10, 1024, 14, 14] 264,192
│ │ └─ReLU: 3-47 [10, 1024, 14, 14] --
│ └─Block: 2-15 [10, 1024, 14, 14] --
│ │ └─Sequential: 3-48 [10, 256, 14, 14] 262,656
│ │ └─Sequential: 3-49 [10, 256, 14, 14] 590,336
│ │ └─Sequential: 3-50 [10, 1024, 14, 14] 264,192
│ │ └─ReLU: 3-51 [10, 1024, 14, 14] --
│ └─Block: 2-16 [10, 1024, 14, 14] --
│ │ └─Sequential: 3-52 [10, 256, 14, 14] 262,656
│ │ └─Sequential: 3-53 [10, 256, 14, 14] 590,336
│ │ └─Sequential: 3-54 [10, 1024, 14, 14] 264,192
│ │ └─ReLU: 3-55 [10, 1024, 14, 14] --
├─Sequential: 1-6 [10, 2048, 7, 7] --
│ └─Block: 2-17 [10, 2048, 7, 7] --
│ │ └─Sequential: 3-56 [10, 512, 14, 14] 525,312
│ │ └─Sequential: 3-57 [10, 512, 7, 7] 2,360,320
│ │ └─Sequential: 3-58 [10, 2048, 7, 7] 1,052,672
│ │ └─Sequential: 3-59 [10, 2048, 7, 7] 2,101,248
│ │ └─ReLU: 3-60 [10, 2048, 7, 7] --
│ └─Block: 2-18 [10, 2048, 7, 7] --
│ │ └─Sequential: 3-61 [10, 512, 7, 7] 1,049,600
│ │ └─Sequential: 3-62 [10, 512, 7, 7] 2,360,320
│ │ └─Sequential: 3-63 [10, 2048, 7, 7] 1,052,672
│ │ └─ReLU: 3-64 [10, 2048, 7, 7] --
│ └─Block: 2-19 [10, 2048, 7, 7] --
│ │ └─Sequential: 3-65 [10, 512, 7, 7] 1,049,600
│ │ └─Sequential: 3-66 [10, 512, 7, 7] 2,360,320
│ │ └─Sequential: 3-67 [10, 2048, 7, 7] 1,052,672
│ │ └─ReLU: 3-68 [10, 2048, 7, 7] --
├─AdaptiveAvgPool2d: 1-7 [10, 2048, 1, 1] --
├─Sequential: 1-8 [10, 1000] --
│ └─Linear: 2-20 [10, 1000] 2,049,000
==========================================================================================
Total params: 25,557,096
Trainable params: 25,557,096
Non-trainable params: 0
Total mult-adds (G): 40.90
==========================================================================================
Input size (MB): 6.02
Forward/backward pass size (MB): 1778.32
Params size (MB): 102.23
Estimated Total Size (MB): 1886.57
==========================================================================================
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import transforms
import torchvision
import os
import numpy as np
import torch
#超参数定义
# 批次的大小
batch_size = 16 #可选32、64、128
# 优化器的学习率
lr = 1e-4
#运行epoch
max_epochs = 2
# 方案一:指定GPU的方式
# os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' # 指明调用的GPU为0,1号
# 方案二:使用“device”,后续对要使用GPU的变量用.to(device)即可
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # 指明调用的GPU为1号
# 数据读取
#cifar10数据集为例给出构建Dataset类的方式
from torchvision import datasets
#“data_transform”可以对图像进行一定的变换,如翻转、裁剪、归一化等操作,可自己定义
data_transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])
train_cifar_dataset = datasets.CIFAR10('cifar10',train=True, download=False,transform=data_transform)
test_cifar_dataset = datasets.CIFAR10('cifar10',train=False, download=False,transform=data_transform)
#构建好Dataset后,就可以使用DataLoader来按批次读入数据了
train_loader = torch.utils.data.DataLoader(train_cifar_dataset,
batch_size=batch_size, num_workers=4,
shuffle=True, drop_last=True)
test_loader = torch.utils.data.DataLoader(test_cifar_dataset,
batch_size=batch_size, num_workers=4,
shuffle=False)
# from tensorboard import SummaryWriter
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter('./runs')
#训练&验证
writer = SummaryWriter('./runs')
# Set fixed random number seed
torch.manual_seed(42)
# 定义损失函数和优化器
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
My_model = Resnet50()
My_model = My_model.to(device)
# 交叉熵
criterion = torch.nn.CrossEntropyLoss()
# 优化器
optimizer = torch.optim.Adam(My_model.parameters(), lr=lr)
epoch = max_epochs
total_step = len(train_loader)
train_all_loss = []
test_all_loss = []
for i in range(epoch):
My_model.train()
train_total_loss = 0
train_total_num = 0
train_total_correct = 0
for iter, (images,labels) in enumerate(train_loader):
images = images.to(device)
labels = labels.to(device)
# Write the network graph at epoch 0, batch 0
if epoch == 0 and iter == 0:
writer.add_graph(My_model, input_to_model=(images,labels)[0], verbose=True)
# Write an image at every batch 0
if iter == 0:
writer.add_image("Example input", images[0], global_step=epoch)
outputs = My_model(images)
loss = criterion(outputs,labels)
train_total_correct += (outputs.argmax(1) == labels).sum().item()
#backword
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_total_num += labels.shape[0]
train_total_loss += loss.item()
# Print statistics
writer.add_scalar("Loss/Minibatches", train_total_loss, train_total_num)
print("Epoch [{}/{}], Iter [{}/{}], train_loss:{:4f}".format(i+1,epoch,iter+1,total_step,loss.item()/labels.shape[0]))
# Write loss for epoch
writer.add_scalar("Loss/Epochs", train_total_loss, epoch)
My_model.eval()
test_total_loss = 0
test_total_correct = 0
test_total_num = 0
for iter,(images,labels) in enumerate(test_loader):
images = images.to(device)
labels = labels.to(device)
outputs = My_model(images)
loss = criterion(outputs,labels)
test_total_correct += (outputs.argmax(1) == labels).sum().item()
test_total_loss += loss.item()
test_total_num += labels.shape[0]
print("Epoch [{}/{}], train_loss:{:.4f}, train_acc:{:.4f}%, test_loss:{:.4f}, test_acc:{:.4f}%".format(
i+1, epoch, train_total_loss / train_total_num, train_total_correct / train_total_num * 100, test_total_loss / test_total_num, test_total_correct / test_total_num * 100
))
train_all_loss.append(np.round(train_total_loss / train_total_num,4))
test_all_loss.append(np.round(test_total_loss / test_total_num,4))
Epoch [1/2], Iter [1/3125], train_loss:0.430043
Epoch [1/2], Iter [2/3125], train_loss:0.399217
Epoch [1/2], Iter [3/3125], train_loss:0.391730
Epoch [1/2], Iter [4/3125], train_loss:0.381970
Epoch [1/2], Iter [5/3125], train_loss:0.337084
Epoch [1/2], Iter [6/3125], train_loss:0.322986
Epoch [1/2], Iter [7/3125], train_loss:0.328911
Epoch [1/2], Iter [8/3125], train_loss:0.287385
Epoch [1/2], Iter [9/3125], train_loss:0.289794
Epoch [1/2], Iter [10/3125], train_loss:0.247583
Epoch [1/2], Iter [11/3125], train_loss:0.239406
Epoch [1/2], Iter [12/3125], train_loss:0.252444
Epoch [1/2], Iter [13/3125], train_loss:0.204779
Epoch [1/2], Iter [14/3125], train_loss:0.197130
Epoch [1/2], Iter [15/3125], train_loss:0.198398
Epoch [1/2], Iter [16/3125], train_loss:0.234318
Epoch [1/2], Iter [17/3125], train_loss:0.175272
Epoch [1/2], Iter [18/3125], train_loss:0.175399
Epoch [1/2], Iter [19/3125], train_loss:0.166614
Epoch [1/2], Iter [20/3125], train_loss:0.193967
Epoch [1/2], Iter [21/3125], train_loss:0.197993
Epoch [1/2], Iter [22/3125], train_loss:0.159795
Epoch [1/2], Iter [23/3125], train_loss:0.164005
Epoch [1/2], Iter [24/3125], train_loss:0.170589
Epoch [1/2], Iter [25/3125], train_loss:0.138586
Epoch [1/2], Iter [26/3125], train_loss:0.160901
Epoch [1/2], Iter [27/3125], train_loss:0.159692
Epoch [1/2], Iter [28/3125], train_loss:0.174360
Epoch [1/2], Iter [29/3125], train_loss:0.166350
Epoch [1/2], Iter [30/3125], train_loss:0.163763
Epoch [1/2], Iter [31/3125], train_loss:0.174778
Epoch [1/2], Iter [32/3125], train_loss:0.169331
Epoch [1/2], Iter [33/3125], train_loss:0.151900
Epoch [1/2], Iter [34/3125], train_loss:0.167900
Epoch [1/2], Iter [35/3125], train_loss:0.174177
Epoch [1/2], Iter [36/3125], train_loss:0.174313
Epoch [1/2], Iter [37/3125], train_loss:0.165772
Epoch [1/2], Iter [38/3125], train_loss:0.163259
Epoch [1/2], Iter [39/3125], train_loss:0.157740
Epoch [1/2], Iter [40/3125], train_loss:0.176562
Epoch [1/2], Iter [41/3125], train_loss:0.173564
Epoch [1/2], Iter [42/3125], train_loss:0.167849
Epoch [1/2], Iter [43/3125], train_loss:0.158219
Epoch [1/2], Iter [44/3125], train_loss:0.153129
Epoch [1/2], Iter [45/3125], train_loss:0.165890
Epoch [1/2], Iter [46/3125], train_loss:0.175445
Epoch [1/2], Iter [47/3125], train_loss:0.161246
Epoch [1/2], Iter [48/3125], train_loss:0.152963
Epoch [1/2], Iter [49/3125], train_loss:0.159098
Epoch [1/2], Iter [50/3125], train_loss:0.149376
Epoch [1/2], Iter [51/3125], train_loss:0.169790
Epoch [1/2], Iter [52/3125], train_loss:0.156566
Epoch [1/2], Iter [53/3125], train_loss:0.137577
Epoch [1/2], Iter [54/3125], train_loss:0.154473
Epoch [1/2], Iter [55/3125], train_loss:0.170818
Epoch [1/2], Iter [56/3125], train_loss:0.168578
Epoch [1/2], Iter [57/3125], train_loss:0.127439
Epoch [1/2], Iter [58/3125], train_loss:0.130195
Epoch [1/2], Iter [59/3125], train_loss:0.170215
Epoch [1/2], Iter [60/3125], train_loss:0.137980
Epoch [1/2], Iter [61/3125], train_loss:0.190205
Epoch [1/2], Iter [62/3125], train_loss:0.173095
Epoch [1/2], Iter [63/3125], train_loss:0.172991
Epoch [1/2], Iter [64/3125], train_loss:0.185437
Epoch [1/2], Iter [65/3125], train_loss:0.143422
Epoch [1/2], Iter [66/3125], train_loss:0.167832
Epoch [1/2], Iter [67/3125], train_loss:0.143599
Epoch [1/2], Iter [68/3125], train_loss:0.140594
Epoch [1/2], Iter [69/3125], train_loss:0.136511
Epoch [1/2], Iter [70/3125], train_loss:0.148203
Epoch [1/2], Iter [71/3125], train_loss:0.136001
Epoch [1/2], Iter [72/3125], train_loss:0.127203
Epoch [1/2], Iter [73/3125], train_loss:0.148387
Epoch [1/2], Iter [74/3125], train_loss:0.160355
Epoch [1/2], Iter [75/3125], train_loss:0.142079
Epoch [1/2], Iter [76/3125], train_loss:0.178135
Epoch [1/2], Iter [77/3125], train_loss:0.169931
Epoch [1/2], Iter [78/3125], train_loss:0.164737
Epoch [1/2], Iter [79/3125], train_loss:0.137772
Epoch [1/2], Iter [80/3125], train_loss:0.140191
Epoch [1/2], Iter [81/3125], train_loss:0.168053
Epoch [1/2], Iter [82/3125], train_loss:0.169713
Epoch [1/2], Iter [83/3125], train_loss:0.166053
Epoch [1/2], Iter [84/3125], train_loss:0.146992
Epoch [1/2], Iter [85/3125], train_loss:0.138336
Epoch [1/2], Iter [86/3125], train_loss:0.133364
Epoch [1/2], Iter [87/3125], train_loss:0.147147
Epoch [1/2], Iter [88/3125], train_loss:0.165000
Epoch [1/2], Iter [89/3125], train_loss:0.187516
Epoch [1/2], Iter [90/3125], train_loss:0.152296
Epoch [1/2], Iter [91/3125], train_loss:0.159449
Epoch [1/2], Iter [92/3125], train_loss:0.155747
Epoch [1/2], Iter [93/3125], train_loss:0.186031
Epoch [1/2], Iter [94/3125], train_loss:0.161650
Epoch [1/2], Iter [95/3125], train_loss:0.180560
Epoch [1/2], Iter [96/3125], train_loss:0.152180
Epoch [1/2], Iter [97/3125], train_loss:0.156310
Epoch [1/2], Iter [98/3125], train_loss:0.157958
Epoch [1/2], Iter [99/3125], train_loss:0.153323
Epoch [1/2], Iter [100/3125], train_loss:0.163590
Epoch [1/2], Iter [101/3125], train_loss:0.139193
Epoch [1/2], Iter [102/3125], train_loss:0.182074
Epoch [1/2], Iter [103/3125], train_loss:0.171562
Epoch [1/2], Iter [104/3125], train_loss:0.135230
Epoch [1/2], Iter [105/3125], train_loss:0.157589
Epoch [1/2], Iter [106/3125], train_loss:0.193017
Epoch [1/2], Iter [107/3125], train_loss:0.149230
Epoch [1/2], Iter [108/3125], train_loss:0.122373
Epoch [1/2], Iter [109/3125], train_loss:0.145265
Epoch [1/2], Iter [110/3125], train_loss:0.152513
Epoch [1/2], Iter [111/3125], train_loss:0.156356
Epoch [1/2], Iter [112/3125], train_loss:0.141945
Epoch [1/2], Iter [113/3125], train_loss:0.160180
Epoch [1/2], Iter [114/3125], train_loss:0.140410
Epoch [1/2], Iter [115/3125], train_loss:0.141819
Epoch [1/2], Iter [116/3125], train_loss:0.150955
Epoch [1/2], Iter [117/3125], train_loss:0.135359
Epoch [1/2], Iter [118/3125], train_loss:0.166497
Epoch [1/2], Iter [119/3125], train_loss:0.142630
Epoch [1/2], Iter [120/3125], train_loss:0.174121
Epoch [1/2], Iter [121/3125], train_loss:0.158250
Epoch [1/2], Iter [122/3125], train_loss:0.146818
Epoch [1/2], Iter [123/3125], train_loss:0.149903
Epoch [1/2], Iter [124/3125], train_loss:0.150738
Epoch [1/2], Iter [125/3125], train_loss:0.152311
Epoch [1/2], Iter [126/3125], train_loss:0.148560
Epoch [1/2], Iter [127/3125], train_loss:0.134343
Epoch [1/2], Iter [128/3125], train_loss:0.144648
Epoch [1/2], Iter [129/3125], train_loss:0.150432
Epoch [1/2], Iter [130/3125], train_loss:0.126187
Epoch [1/2], Iter [131/3125], train_loss:0.137051
Epoch [1/2], Iter [132/3125], train_loss:0.145356
Epoch [1/2], Iter [133/3125], train_loss:0.140084
Epoch [1/2], Iter [134/3125], train_loss:0.158875
Epoch [1/2], Iter [135/3125], train_loss:0.152066
Epoch [1/2], Iter [136/3125], train_loss:0.147993
Epoch [1/2], Iter [137/3125], train_loss:0.137815
Epoch [1/2], Iter [138/3125], train_loss:0.157255
Epoch [1/2], Iter [139/3125], train_loss:0.172245
Epoch [1/2], Iter [140/3125], train_loss:0.119922
Epoch [1/2], Iter [141/3125], train_loss:0.147535
Epoch [1/2], Iter [142/3125], train_loss:0.135512
Epoch [1/2], Iter [143/3125], train_loss:0.132385
Epoch [1/2], Iter [144/3125], train_loss:0.167151
Epoch [1/2], Iter [145/3125], train_loss:0.173200
Epoch [1/2], Iter [146/3125], train_loss:0.153549
Epoch [1/2], Iter [147/3125], train_loss:0.147774
Epoch [1/2], Iter [148/3125], train_loss:0.138399
Epoch [1/2], Iter [149/3125], train_loss:0.147270
Epoch [1/2], Iter [150/3125], train_loss:0.146461
Epoch [1/2], Iter [151/3125], train_loss:0.127806
Epoch [1/2], Iter [152/3125], train_loss:0.143855
Epoch [1/2], Iter [153/3125], train_loss:0.162357
Epoch [1/2], Iter [154/3125], train_loss:0.099439
Epoch [1/2], Iter [155/3125], train_loss:0.156767
Epoch [1/2], Iter [156/3125], train_loss:0.141598
Epoch [1/2], Iter [157/3125], train_loss:0.144462
Epoch [1/2], Iter [158/3125], train_loss:0.144916
Epoch [1/2], Iter [159/3125], train_loss:0.140672
Epoch [1/2], Iter [160/3125], train_loss:0.141314
Epoch [1/2], Iter [161/3125], train_loss:0.159581
Epoch [1/2], Iter [162/3125], train_loss:0.130852
Epoch [1/2], Iter [163/3125], train_loss:0.141293
Epoch [1/2], Iter [164/3125], train_loss:0.146917
Epoch [1/2], Iter [165/3125], train_loss:0.147925
Epoch [1/2], Iter [166/3125], train_loss:0.152431
Epoch [1/2], Iter [167/3125], train_loss:0.151558
Epoch [1/2], Iter [168/3125], train_loss:0.141326
Epoch [1/2], Iter [169/3125], train_loss:0.165799
Epoch [1/2], Iter [170/3125], train_loss:0.174329
Epoch [1/2], Iter [171/3125], train_loss:0.138570
Epoch [1/2], Iter [172/3125], train_loss:0.117236
Epoch [1/2], Iter [173/3125], train_loss:0.116505
Epoch [1/2], Iter [174/3125], train_loss:0.169864
Epoch [1/2], Iter [175/3125], train_loss:0.180966
Epoch [1/2], Iter [176/3125], train_loss:0.157741
Epoch [1/2], Iter [177/3125], train_loss:0.158464
Epoch [1/2], Iter [178/3125], train_loss:0.169416
Epoch [1/2], Iter [179/3125], train_loss:0.135209
Epoch [1/2], Iter [180/3125], train_loss:0.149782
Epoch [1/2], Iter [181/3125], train_loss:0.145131
Epoch [1/2], Iter [182/3125], train_loss:0.163330
Epoch [1/2], Iter [183/3125], train_loss:0.148288
Epoch [1/2], Iter [184/3125], train_loss:0.162434
Epoch [1/2], Iter [185/3125], train_loss:0.138171
Epoch [1/2], Iter [186/3125], train_loss:0.174453
Epoch [1/2], Iter [187/3125], train_loss:0.152246
Epoch [1/2], Iter [188/3125], train_loss:0.145182
Epoch [1/2], Iter [189/3125], train_loss:0.138013
Epoch [1/2], Iter [190/3125], train_loss:0.129477
Epoch [1/2], Iter [191/3125], train_loss:0.167296
Epoch [1/2], Iter [192/3125], train_loss:0.151581
Epoch [1/2], Iter [193/3125], train_loss:0.129222
Epoch [1/2], Iter [194/3125], train_loss:0.144835
Epoch [1/2], Iter [195/3125], train_loss:0.155114
Epoch [1/2], Iter [196/3125], train_loss:0.159840
Epoch [1/2], Iter [197/3125], train_loss:0.140606
Epoch [1/2], Iter [198/3125], train_loss:0.120595
Epoch [1/2], Iter [199/3125], train_loss:0.166237
Epoch [1/2], Iter [200/3125], train_loss:0.139809
Epoch [1/2], Iter [201/3125], train_loss:0.152461
Epoch [1/2], Iter [202/3125], train_loss:0.180673
Epoch [1/2], Iter [203/3125], train_loss:0.152161
Epoch [1/2], Iter [204/3125], train_loss:0.162040
Epoch [1/2], Iter [205/3125], train_loss:0.116725
Epoch [1/2], Iter [206/3125], train_loss:0.149293
Epoch [1/2], Iter [207/3125], train_loss:0.133494
Epoch [1/2], Iter [208/3125], train_loss:0.151276
Epoch [1/2], Iter [209/3125], train_loss:0.135684
Epoch [1/2], Iter [210/3125], train_loss:0.146015
Epoch [1/2], Iter [211/3125], train_loss:0.154200
Epoch [1/2], Iter [212/3125], train_loss:0.163789
Epoch [1/2], Iter [213/3125], train_loss:0.143287
Epoch [1/2], Iter [214/3125], train_loss:0.156911
Epoch [1/2], Iter [215/3125], train_loss:0.148797
Epoch [1/2], Iter [216/3125], train_loss:0.135099
Epoch [1/2], Iter [217/3125], train_loss:0.147233
Epoch [1/2], Iter [218/3125], train_loss:0.132503
Epoch [1/2], Iter [219/3125], train_loss:0.131973
Epoch [1/2], Iter [220/3125], train_loss:0.142257
Epoch [1/2], Iter [221/3125], train_loss:0.131663
Epoch [1/2], Iter [222/3125], train_loss:0.165459
Epoch [1/2], Iter [223/3125], train_loss:0.140871
Epoch [1/2], Iter [224/3125], train_loss:0.176863
Epoch [1/2], Iter [225/3125], train_loss:0.125788
Epoch [1/2], Iter [226/3125], train_loss:0.145382
Epoch [1/2], Iter [227/3125], train_loss:0.133045
Epoch [1/2], Iter [228/3125], train_loss:0.147877
Epoch [1/2], Iter [229/3125], train_loss:0.133725
Epoch [1/2], Iter [230/3125], train_loss:0.122687
Epoch [1/2], Iter [231/3125], train_loss:0.160091
Epoch [1/2], Iter [232/3125], train_loss:0.158228
Epoch [1/2], Iter [233/3125], train_loss:0.149637
Epoch [1/2], Iter [234/3125], train_loss:0.115466
Epoch [1/2], Iter [235/3125], train_loss:0.119706
Epoch [1/2], Iter [236/3125], train_loss:0.165916
Epoch [1/2], Iter [237/3125], train_loss:0.127058
Epoch [1/2], Iter [238/3125], train_loss:0.135110
Epoch [1/2], Iter [239/3125], train_loss:0.131467
Epoch [1/2], Iter [240/3125], train_loss:0.149502
Epoch [1/2], Iter [241/3125], train_loss:0.147800
Epoch [1/2], Iter [242/3125], train_loss:0.164283
Epoch [1/2], Iter [243/3125], train_loss:0.152627
Epoch [1/2], Iter [244/3125], train_loss:0.139253
Epoch [1/2], Iter [245/3125], train_loss:0.140246
Epoch [1/2], Iter [246/3125], train_loss:0.128954
Epoch [1/2], Iter [247/3125], train_loss:0.148527
Epoch [1/2], Iter [248/3125], train_loss:0.132301
Epoch [1/2], Iter [249/3125], train_loss:0.154204
Epoch [1/2], Iter [250/3125], train_loss:0.128128
Epoch [1/2], Iter [251/3125], train_loss:0.157499
Epoch [1/2], Iter [252/3125], train_loss:0.134000
Epoch [1/2], Iter [253/3125], train_loss:0.153699
Epoch [1/2], Iter [254/3125], train_loss:0.153093
Epoch [1/2], Iter [255/3125], train_loss:0.134238
Epoch [1/2], Iter [256/3125], train_loss:0.151899
Epoch [1/2], Iter [257/3125], train_loss:0.129526
Epoch [1/2], Iter [258/3125], train_loss:0.118807
Epoch [1/2], Iter [259/3125], train_loss:0.140177
Epoch [1/2], Iter [260/3125], train_loss:0.155319
Epoch [1/2], Iter [261/3125], train_loss:0.138391
Epoch [1/2], Iter [262/3125], train_loss:0.150529
Epoch [1/2], Iter [263/3125], train_loss:0.144276
Epoch [1/2], Iter [264/3125], train_loss:0.140310
Epoch [1/2], Iter [265/3125], train_loss:0.121239
Epoch [1/2], Iter [266/3125], train_loss:0.167146
Epoch [1/2], Iter [267/3125], train_loss:0.189327
Epoch [1/2], Iter [268/3125], train_loss:0.110306
Epoch [1/2], Iter [269/3125], train_loss:0.151858
Epoch [1/2], Iter [270/3125], train_loss:0.166866
Epoch [1/2], Iter [271/3125], train_loss:0.153607
Epoch [1/2], Iter [272/3125], train_loss:0.120162
Epoch [1/2], Iter [273/3125], train_loss:0.173903
Epoch [1/2], Iter [274/3125], train_loss:0.161149
Epoch [1/2], Iter [275/3125], train_loss:0.170201
Epoch [1/2], Iter [276/3125], train_loss:0.145268
Epoch [1/2], Iter [277/3125], train_loss:0.136687
Epoch [1/2], Iter [278/3125], train_loss:0.144772
Epoch [1/2], Iter [279/3125], train_loss:0.151712
Epoch [1/2], Iter [280/3125], train_loss:0.120698
Epoch [1/2], Iter [281/3125], train_loss:0.144862
Epoch [1/2], Iter [282/3125], train_loss:0.160759
Epoch [1/2], Iter [283/3125], train_loss:0.143663
Epoch [1/2], Iter [284/3125], train_loss:0.152524
Epoch [1/2], Iter [285/3125], train_loss:0.147572
Epoch [1/2], Iter [286/3125], train_loss:0.170164
Epoch [1/2], Iter [287/3125], train_loss:0.139357
Epoch [1/2], Iter [288/3125], train_loss:0.137447
Epoch [1/2], Iter [289/3125], train_loss:0.153944
Epoch [1/2], Iter [290/3125], train_loss:0.120008
Epoch [1/2], Iter [291/3125], train_loss:0.125603
Epoch [1/2], Iter [292/3125], train_loss:0.169415
Epoch [1/2], Iter [293/3125], train_loss:0.156042
Epoch [1/2], Iter [294/3125], train_loss:0.140195
Epoch [1/2], Iter [295/3125], train_loss:0.102234
Epoch [1/2], Iter [296/3125], train_loss:0.133909
Epoch [1/2], Iter [297/3125], train_loss:0.139474
Epoch [1/2], Iter [298/3125], train_loss:0.162286
Epoch [1/2], Iter [299/3125], train_loss:0.151964
Epoch [1/2], Iter [300/3125], train_loss:0.155396
Epoch [1/2], Iter [301/3125], train_loss:0.137973
Epoch [1/2], Iter [302/3125], train_loss:0.161529
Epoch [1/2], Iter [303/3125], train_loss:0.137485
Epoch [1/2], Iter [304/3125], train_loss:0.134958
Epoch [1/2], Iter [305/3125], train_loss:0.151537
Epoch [1/2], Iter [306/3125], train_loss:0.115637
Epoch [1/2], Iter [307/3125], train_loss:0.146324
Epoch [1/2], Iter [308/3125], train_loss:0.135304
Epoch [1/2], Iter [309/3125], train_loss:0.161564
Epoch [1/2], Iter [310/3125], train_loss:0.140648
Epoch [1/2], Iter [311/3125], train_loss:0.165383
Epoch [1/2], Iter [312/3125], train_loss:0.171503
Epoch [1/2], Iter [313/3125], train_loss:0.128425
Epoch [1/2], Iter [314/3125], train_loss:0.137095
Epoch [1/2], Iter [315/3125], train_loss:0.147743
Epoch [1/2], Iter [316/3125], train_loss:0.136319
Epoch [1/2], Iter [317/3125], train_loss:0.140118
Epoch [1/2], Iter [318/3125], train_loss:0.129867
Epoch [1/2], Iter [319/3125], train_loss:0.140588
Epoch [1/2], Iter [320/3125], train_loss:0.140786
Epoch [1/2], Iter [321/3125], train_loss:0.131588
Epoch [1/2], Iter [322/3125], train_loss:0.118686
Epoch [1/2], Iter [323/3125], train_loss:0.145970
Epoch [1/2], Iter [324/3125], train_loss:0.144447
Epoch [1/2], Iter [325/3125], train_loss:0.140250
Epoch [1/2], Iter [326/3125], train_loss:0.144189
Epoch [1/2], Iter [327/3125], train_loss:0.151661
Epoch [1/2], Iter [328/3125], train_loss:0.153539
Epoch [1/2], Iter [329/3125], train_loss:0.161170
Epoch [1/2], Iter [330/3125], train_loss:0.135300
Epoch [1/2], Iter [331/3125], train_loss:0.123458
Epoch [1/2], Iter [332/3125], train_loss:0.139802
Epoch [1/2], Iter [333/3125], train_loss:0.169329
Epoch [1/2], Iter [334/3125], train_loss:0.145734
Epoch [1/2], Iter [335/3125], train_loss:0.184645
Epoch [1/2], Iter [336/3125], train_loss:0.138695
Epoch [1/2], Iter [337/3125], train_loss:0.121887
Epoch [1/2], Iter [338/3125], train_loss:0.131833
Epoch [1/2], Iter [339/3125], train_loss:0.154317
Epoch [1/2], Iter [340/3125], train_loss:0.131791
Epoch [1/2], Iter [341/3125], train_loss:0.111341
Epoch [1/2], Iter [342/3125], train_loss:0.123395
Epoch [1/2], Iter [343/3125], train_loss:0.161068
Epoch [1/2], Iter [344/3125], train_loss:0.138011
Epoch [1/2], Iter [345/3125], train_loss:0.172757
Epoch [1/2], Iter [346/3125], train_loss:0.141580
Epoch [1/2], Iter [347/3125], train_loss:0.144634
Epoch [1/2], Iter [348/3125], train_loss:0.133607
Epoch [1/2], Iter [349/3125], train_loss:0.151957
Epoch [1/2], Iter [350/3125], train_loss:0.153514
Epoch [1/2], Iter [351/3125], train_loss:0.132827
Epoch [1/2], Iter [352/3125], train_loss:0.165424
Epoch [1/2], Iter [353/3125], train_loss:0.151765
Epoch [1/2], Iter [354/3125], train_loss:0.123370
Epoch [1/2], Iter [355/3125], train_loss:0.133170
Epoch [1/2], Iter [356/3125], train_loss:0.134136
Epoch [1/2], Iter [357/3125], train_loss:0.134728
Epoch [1/2], Iter [358/3125], train_loss:0.130462
Epoch [1/2], Iter [359/3125], train_loss:0.140449
Epoch [1/2], Iter [360/3125], train_loss:0.115743
Epoch [1/2], Iter [361/3125], train_loss:0.135062
Epoch [1/2], Iter [362/3125], train_loss:0.170707
Epoch [1/2], Iter [363/3125], train_loss:0.125737
Epoch [1/2], Iter [364/3125], train_loss:0.144514
Epoch [1/2], Iter [365/3125], train_loss:0.167388
Epoch [1/2], Iter [366/3125], train_loss:0.136096
Epoch [1/2], Iter [367/3125], train_loss:0.150182
Epoch [1/2], Iter [368/3125], train_loss:0.173576
Epoch [1/2], Iter [369/3125], train_loss:0.129492
Epoch [1/2], Iter [370/3125], train_loss:0.142063
Epoch [1/2], Iter [371/3125], train_loss:0.103541
Epoch [1/2], Iter [372/3125], train_loss:0.156505
Epoch [1/2], Iter [373/3125], train_loss:0.154902
Epoch [1/2], Iter [374/3125], train_loss:0.115977
Epoch [1/2], Iter [375/3125], train_loss:0.119252
Epoch [1/2], Iter [376/3125], train_loss:0.171216
Epoch [1/2], Iter [377/3125], train_loss:0.132563
Epoch [1/2], Iter [378/3125], train_loss:0.118892
Epoch [1/2], Iter [379/3125], train_loss:0.114120
Epoch [1/2], Iter [380/3125], train_loss:0.133102
Epoch [1/2], Iter [381/3125], train_loss:0.148668
Epoch [1/2], Iter [382/3125], train_loss:0.088364
Epoch [1/2], Iter [383/3125], train_loss:0.139797
Epoch [1/2], Iter [384/3125], train_loss:0.109467
Epoch [1/2], Iter [385/3125], train_loss:0.120487
Epoch [1/2], Iter [386/3125], train_loss:0.129980
Epoch [1/2], Iter [387/3125], train_loss:0.133831
Epoch [1/2], Iter [388/3125], train_loss:0.129084
Epoch [1/2], Iter [389/3125], train_loss:0.143751
Epoch [1/2], Iter [390/3125], train_loss:0.145588
Epoch [1/2], Iter [391/3125], train_loss:0.141514
Epoch [1/2], Iter [392/3125], train_loss:0.134764
Epoch [1/2], Iter [393/3125], train_loss:0.135487
Epoch [1/2], Iter [394/3125], train_loss:0.158167
Epoch [1/2], Iter [395/3125], train_loss:0.128908
Epoch [1/2], Iter [396/3125], train_loss:0.104820
Epoch [1/2], Iter [397/3125], train_loss:0.126803
Epoch [1/2], Iter [398/3125], train_loss:0.119977
Epoch [1/2], Iter [399/3125], train_loss:0.167593
Epoch [1/2], Iter [400/3125], train_loss:0.120910
Epoch [1/2], Iter [401/3125], train_loss:0.133739
Epoch [1/2], Iter [402/3125], train_loss:0.143254
Epoch [1/2], Iter [403/3125], train_loss:0.128983
Epoch [1/2], Iter [404/3125], train_loss:0.148489
Epoch [1/2], Iter [405/3125], train_loss:0.138134
Epoch [1/2], Iter [406/3125], train_loss:0.159901
Epoch [1/2], Iter [407/3125], train_loss:0.116905
Epoch [1/2], Iter [408/3125], train_loss:0.131004
Epoch [1/2], Iter [409/3125], train_loss:0.128001
Epoch [1/2], Iter [410/3125], train_loss:0.126740
Epoch [1/2], Iter [411/3125], train_loss:0.132924
Epoch [1/2], Iter [412/3125], train_loss:0.131834
Epoch [1/2], Iter [413/3125], train_loss:0.124082
Epoch [1/2], Iter [414/3125], train_loss:0.141766
Epoch [1/2], Iter [415/3125], train_loss:0.146525
Epoch [1/2], Iter [416/3125], train_loss:0.174883
Epoch [1/2], Iter [417/3125], train_loss:0.154470
Epoch [1/2], Iter [418/3125], train_loss:0.130960
Epoch [1/2], Iter [419/3125], train_loss:0.146512
Epoch [1/2], Iter [420/3125], train_loss:0.133668
Epoch [1/2], Iter [421/3125], train_loss:0.165243
Epoch [1/2], Iter [422/3125], train_loss:0.132538
Epoch [1/2], Iter [423/3125], train_loss:0.115865
Epoch [1/2], Iter [424/3125], train_loss:0.134251
Epoch [1/2], Iter [425/3125], train_loss:0.144921
Epoch [1/2], Iter [426/3125], train_loss:0.128650
Epoch [1/2], Iter [427/3125], train_loss:0.124390
Epoch [1/2], Iter [428/3125], train_loss:0.120808
Epoch [1/2], Iter [429/3125], train_loss:0.117466
Epoch [1/2], Iter [430/3125], train_loss:0.133278
Epoch [1/2], Iter [431/3125], train_loss:0.121746
Epoch [1/2], Iter [432/3125], train_loss:0.124647
Epoch [1/2], Iter [433/3125], train_loss:0.115997
Epoch [1/2], Iter [434/3125], train_loss:0.135611
Epoch [1/2], Iter [435/3125], train_loss:0.149327
Epoch [1/2], Iter [436/3125], train_loss:0.113214
Epoch [1/2], Iter [437/3125], train_loss:0.152793
Epoch [1/2], Iter [438/3125], train_loss:0.158480
Epoch [1/2], Iter [439/3125], train_loss:0.116453
Epoch [1/2], Iter [440/3125], train_loss:0.127663
Epoch [1/2], Iter [441/3125], train_loss:0.140036
Epoch [1/2], Iter [442/3125], train_loss:0.166923
Epoch [1/2], Iter [443/3125], train_loss:0.120091
Epoch [1/2], Iter [444/3125], train_loss:0.153006
Epoch [1/2], Iter [445/3125], train_loss:0.150299
Epoch [1/2], Iter [446/3125], train_loss:0.117065
Epoch [1/2], Iter [447/3125], train_loss:0.124862
Epoch [1/2], Iter [448/3125], train_loss:0.138539
Epoch [1/2], Iter [449/3125], train_loss:0.130323
Epoch [1/2], Iter [450/3125], train_loss:0.144418
Epoch [1/2], Iter [451/3125], train_loss:0.133128
Epoch [1/2], Iter [452/3125], train_loss:0.154379
Epoch [1/2], Iter [453/3125], train_loss:0.131493
Epoch [1/2], Iter [454/3125], train_loss:0.150599
Epoch [1/2], Iter [455/3125], train_loss:0.121932
Epoch [1/2], Iter [456/3125], train_loss:0.094283
Epoch [1/2], Iter [457/3125], train_loss:0.106184
Epoch [1/2], Iter [458/3125], train_loss:0.155492
Epoch [1/2], Iter [459/3125], train_loss:0.149853
Epoch [1/2], Iter [460/3125], train_loss:0.159567
Epoch [1/2], Iter [461/3125], train_loss:0.142336
Epoch [1/2], Iter [462/3125], train_loss:0.120529
Epoch [1/2], Iter [463/3125], train_loss:0.178071
Epoch [1/2], Iter [464/3125], train_loss:0.138046
Epoch [1/2], Iter [465/3125], train_loss:0.136128
Epoch [1/2], Iter [466/3125], train_loss:0.137083
Epoch [1/2], Iter [467/3125], train_loss:0.092409
Epoch [1/2], Iter [468/3125], train_loss:0.154618
Epoch [1/2], Iter [469/3125], train_loss:0.119423
Epoch [1/2], Iter [470/3125], train_loss:0.141376
Epoch [1/2], Iter [471/3125], train_loss:0.144068
Epoch [1/2], Iter [472/3125], train_loss:0.152115
Epoch [1/2], Iter [473/3125], train_loss:0.138435
Epoch [1/2], Iter [474/3125], train_loss:0.111454
Epoch [1/2], Iter [475/3125], train_loss:0.127410
Epoch [1/2], Iter [476/3125], train_loss:0.141480
Epoch [1/2], Iter [477/3125], train_loss:0.118547
Epoch [1/2], Iter [478/3125], train_loss:0.116395
Epoch [1/2], Iter [479/3125], train_loss:0.131320
Epoch [1/2], Iter [480/3125], train_loss:0.135318
Epoch [1/2], Iter [481/3125], train_loss:0.130523
Epoch [1/2], Iter [482/3125], train_loss:0.113823
Epoch [1/2], Iter [483/3125], train_loss:0.145352
Epoch [1/2], Iter [484/3125], train_loss:0.114676
Epoch [1/2], Iter [485/3125], train_loss:0.118694
Epoch [1/2], Iter [486/3125], train_loss:0.155633
Epoch [1/2], Iter [487/3125], train_loss:0.154376
Epoch [1/2], Iter [488/3125], train_loss:0.150709
Epoch [1/2], Iter [489/3125], train_loss:0.140641
Epoch [1/2], Iter [490/3125], train_loss:0.113311
Epoch [1/2], Iter [491/3125], train_loss:0.125240
Epoch [1/2], Iter [492/3125], train_loss:0.165419
Epoch [1/2], Iter [493/3125], train_loss:0.126591
Epoch [1/2], Iter [494/3125], train_loss:0.135375
Epoch [1/2], Iter [495/3125], train_loss:0.108825
Epoch [1/2], Iter [496/3125], train_loss:0.146182
Epoch [1/2], Iter [497/3125], train_loss:0.145437
Epoch [1/2], Iter [498/3125], train_loss:0.125500
Epoch [1/2], Iter [499/3125], train_loss:0.115408
Epoch [1/2], Iter [500/3125], train_loss:0.158740
Epoch [1/2], Iter [501/3125], train_loss:0.138249
Epoch [1/2], Iter [502/3125], train_loss:0.126816
Epoch [1/2], Iter [503/3125], train_loss:0.147844
Epoch [1/2], Iter [504/3125], train_loss:0.128878
Epoch [1/2], Iter [505/3125], train_loss:0.114013
Epoch [1/2], Iter [506/3125], train_loss:0.160102
Epoch [1/2], Iter [507/3125], train_loss:0.151201
Epoch [1/2], Iter [508/3125], train_loss:0.149264
Epoch [1/2], Iter [509/3125], train_loss:0.159143
Epoch [1/2], Iter [510/3125], train_loss:0.142965
Epoch [1/2], Iter [511/3125], train_loss:0.138246
Epoch [1/2], Iter [512/3125], train_loss:0.124573
Epoch [1/2], Iter [513/3125], train_loss:0.148881
Epoch [1/2], Iter [514/3125], train_loss:0.149671
Epoch [1/2], Iter [515/3125], train_loss:0.140685
Epoch [1/2], Iter [516/3125], train_loss:0.143477
Epoch [1/2], Iter [517/3125], train_loss:0.116682
Epoch [1/2], Iter [518/3125], train_loss:0.140594
Epoch [1/2], Iter [519/3125], train_loss:0.126693
Epoch [1/2], Iter [520/3125], train_loss:0.131504
Epoch [1/2], Iter [521/3125], train_loss:0.152126
Epoch [1/2], Iter [522/3125], train_loss:0.152904
Epoch [1/2], Iter [523/3125], train_loss:0.146042
Epoch [1/2], Iter [524/3125], train_loss:0.128854
Epoch [1/2], Iter [525/3125], train_loss:0.123463
Epoch [1/2], Iter [526/3125], train_loss:0.130197
Epoch [1/2], Iter [527/3125], train_loss:0.153066
Epoch [1/2], Iter [528/3125], train_loss:0.165717
Epoch [1/2], Iter [529/3125], train_loss:0.165995
Epoch [1/2], Iter [530/3125], train_loss:0.130012
Epoch [1/2], Iter [531/3125], train_loss:0.124241
Epoch [1/2], Iter [532/3125], train_loss:0.126753
Epoch [1/2], Iter [533/3125], train_loss:0.141608
Epoch [1/2], Iter [534/3125], train_loss:0.130609
Epoch [1/2], Iter [535/3125], train_loss:0.140055
Epoch [1/2], Iter [536/3125], train_loss:0.141104
Epoch [1/2], Iter [537/3125], train_loss:0.129899
Epoch [1/2], Iter [538/3125], train_loss:0.152887
Epoch [1/2], Iter [539/3125], train_loss:0.147007
Epoch [1/2], Iter [540/3125], train_loss:0.140103
Epoch [1/2], Iter [541/3125], train_loss:0.123520
Epoch [1/2], Iter [542/3125], train_loss:0.158599
Epoch [1/2], Iter [543/3125], train_loss:0.147246
Epoch [1/2], Iter [544/3125], train_loss:0.118494
Epoch [1/2], Iter [545/3125], train_loss:0.140509
Epoch [1/2], Iter [546/3125], train_loss:0.155537
Epoch [1/2], Iter [547/3125], train_loss:0.164005
Epoch [1/2], Iter [548/3125], train_loss:0.124733
Epoch [1/2], Iter [549/3125], train_loss:0.143991
Epoch [1/2], Iter [550/3125], train_loss:0.166835
Epoch [1/2], Iter [551/3125], train_loss:0.131719
Epoch [1/2], Iter [552/3125], train_loss:0.123733
Epoch [1/2], Iter [553/3125], train_loss:0.114212
Epoch [1/2], Iter [554/3125], train_loss:0.131926
Epoch [1/2], Iter [555/3125], train_loss:0.126556
Epoch [1/2], Iter [556/3125], train_loss:0.127504
Epoch [1/2], Iter [557/3125], train_loss:0.127208
Epoch [1/2], Iter [558/3125], train_loss:0.117759
Epoch [1/2], Iter [559/3125], train_loss:0.115209
Epoch [1/2], Iter [560/3125], train_loss:0.114480
Epoch [1/2], Iter [561/3125], train_loss:0.117120
Epoch [1/2], Iter [562/3125], train_loss:0.114013
Epoch [1/2], Iter [563/3125], train_loss:0.149527
Epoch [1/2], Iter [564/3125], train_loss:0.128044
Epoch [1/2], Iter [565/3125], train_loss:0.150191
Epoch [1/2], Iter [566/3125], train_loss:0.120650
Epoch [1/2], Iter [567/3125], train_loss:0.131659
Epoch [1/2], Iter [568/3125], train_loss:0.122520
Epoch [1/2], Iter [569/3125], train_loss:0.121531
Epoch [1/2], Iter [570/3125], train_loss:0.129412
Epoch [1/2], Iter [571/3125], train_loss:0.135542
Epoch [1/2], Iter [572/3125], train_loss:0.138364
Epoch [1/2], Iter [573/3125], train_loss:0.107364
Epoch [1/2], Iter [574/3125], train_loss:0.184996
Epoch [1/2], Iter [575/3125], train_loss:0.139788
Epoch [1/2], Iter [576/3125], train_loss:0.149737
Epoch [1/2], Iter [577/3125], train_loss:0.158352
Epoch [1/2], Iter [578/3125], train_loss:0.182812
Epoch [1/2], Iter [579/3125], train_loss:0.131087
Epoch [1/2], Iter [580/3125], train_loss:0.128033
Epoch [1/2], Iter [581/3125], train_loss:0.118134
Epoch [1/2], Iter [582/3125], train_loss:0.121347
Epoch [1/2], Iter [583/3125], train_loss:0.111557
Epoch [1/2], Iter [584/3125], train_loss:0.120800
Epoch [1/2], Iter [585/3125], train_loss:0.138530
Epoch [1/2], Iter [586/3125], train_loss:0.135671
Epoch [1/2], Iter [587/3125], train_loss:0.130564
Epoch [1/2], Iter [588/3125], train_loss:0.123875
Epoch [1/2], Iter [589/3125], train_loss:0.131736
Epoch [1/2], Iter [590/3125], train_loss:0.119891
Epoch [1/2], Iter [591/3125], train_loss:0.128502
Epoch [1/2], Iter [592/3125], train_loss:0.125160
Epoch [1/2], Iter [593/3125], train_loss:0.129433
Epoch [1/2], Iter [594/3125], train_loss:0.149174
Epoch [1/2], Iter [595/3125], train_loss:0.148517
Epoch [1/2], Iter [596/3125], train_loss:0.129449
Epoch [1/2], Iter [597/3125], train_loss:0.140851
Epoch [1/2], Iter [598/3125], train_loss:0.127634
Epoch [1/2], Iter [599/3125], train_loss:0.112851
Epoch [1/2], Iter [600/3125], train_loss:0.132988
Epoch [1/2], Iter [601/3125], train_loss:0.125265
Epoch [1/2], Iter [602/3125], train_loss:0.123876
Epoch [1/2], Iter [603/3125], train_loss:0.130467
Epoch [1/2], Iter [604/3125], train_loss:0.129104
Epoch [1/2], Iter [605/3125], train_loss:0.117449
Epoch [1/2], Iter [606/3125], train_loss:0.117107
Epoch [1/2], Iter [607/3125], train_loss:0.119481
Epoch [1/2], Iter [608/3125], train_loss:0.127336
Epoch [1/2], Iter [609/3125], train_loss:0.120863
Epoch [1/2], Iter [610/3125], train_loss:0.129567
Epoch [1/2], Iter [611/3125], train_loss:0.105349
Epoch [1/2], Iter [612/3125], train_loss:0.115262
Epoch [1/2], Iter [613/3125], train_loss:0.114055
Epoch [1/2], Iter [614/3125], train_loss:0.088257
Epoch [1/2], Iter [615/3125], train_loss:0.132848
Epoch [1/2], Iter [616/3125], train_loss:0.147668
Epoch [1/2], Iter [617/3125], train_loss:0.138724
Epoch [1/2], Iter [618/3125], train_loss:0.143088
Epoch [1/2], Iter [619/3125], train_loss:0.120917
Epoch [1/2], Iter [620/3125], train_loss:0.135376
Epoch [1/2], Iter [621/3125], train_loss:0.108191
Epoch [1/2], Iter [622/3125], train_loss:0.130458
Epoch [1/2], Iter [623/3125], train_loss:0.120811
Epoch [1/2], Iter [624/3125], train_loss:0.157672
Epoch [1/2], Iter [625/3125], train_loss:0.140236
Epoch [1/2], Iter [626/3125], train_loss:0.129262
Epoch [1/2], Iter [627/3125], train_loss:0.154512
Epoch [1/2], Iter [628/3125], train_loss:0.135774
Epoch [1/2], Iter [629/3125], train_loss:0.117041
Epoch [1/2], Iter [630/3125], train_loss:0.134066
Epoch [1/2], Iter [631/3125], train_loss:0.136478
Epoch [1/2], Iter [632/3125], train_loss:0.125146
Epoch [1/2], Iter [633/3125], train_loss:0.128133
Epoch [1/2], Iter [634/3125], train_loss:0.159892
Epoch [1/2], Iter [635/3125], train_loss:0.144542
Epoch [1/2], Iter [636/3125], train_loss:0.174141
Epoch [1/2], Iter [637/3125], train_loss:0.099209
Epoch [1/2], Iter [638/3125], train_loss:0.123207
Epoch [1/2], Iter [639/3125], train_loss:0.108200
Epoch [1/2], Iter [640/3125], train_loss:0.150231
Epoch [1/2], Iter [641/3125], train_loss:0.140358
Epoch [1/2], Iter [642/3125], train_loss:0.129246
Epoch [1/2], Iter [643/3125], train_loss:0.119049
Epoch [1/2], Iter [644/3125], train_loss:0.119448
Epoch [1/2], Iter [645/3125], train_loss:0.130537
Epoch [1/2], Iter [646/3125], train_loss:0.133798
Epoch [1/2], Iter [647/3125], train_loss:0.132481
Epoch [1/2], Iter [648/3125], train_loss:0.133250
Epoch [1/2], Iter [649/3125], train_loss:0.104661
Epoch [1/2], Iter [650/3125], train_loss:0.152993
Epoch [1/2], Iter [651/3125], train_loss:0.119652
Epoch [1/2], Iter [652/3125], train_loss:0.128239
Epoch [1/2], Iter [653/3125], train_loss:0.132214
Epoch [1/2], Iter [654/3125], train_loss:0.129251
Epoch [1/2], Iter [655/3125], train_loss:0.149047
Epoch [1/2], Iter [656/3125], train_loss:0.153654
Epoch [1/2], Iter [657/3125], train_loss:0.133315
Epoch [1/2], Iter [658/3125], train_loss:0.128164
Epoch [1/2], Iter [659/3125], train_loss:0.134112
Epoch [1/2], Iter [660/3125], train_loss:0.103687
Epoch [1/2], Iter [661/3125], train_loss:0.125754
Epoch [1/2], Iter [662/3125], train_loss:0.132972
Epoch [1/2], Iter [663/3125], train_loss:0.153800
Epoch [1/2], Iter [664/3125], train_loss:0.110952
Epoch [1/2], Iter [665/3125], train_loss:0.120236
Epoch [1/2], Iter [666/3125], train_loss:0.115589
Epoch [1/2], Iter [667/3125], train_loss:0.132908
Epoch [1/2], Iter [668/3125], train_loss:0.159913
Epoch [1/2], Iter [669/3125], train_loss:0.131979
Epoch [1/2], Iter [670/3125], train_loss:0.136179
Epoch [1/2], Iter [671/3125], train_loss:0.131732
Epoch [1/2], Iter [672/3125], train_loss:0.106427
Epoch [1/2], Iter [673/3125], train_loss:0.094495
Epoch [1/2], Iter [674/3125], train_loss:0.139270
Epoch [1/2], Iter [675/3125], train_loss:0.148814
Epoch [1/2], Iter [676/3125], train_loss:0.121234
Epoch [1/2], Iter [677/3125], train_loss:0.135534
Epoch [1/2], Iter [678/3125], train_loss:0.163135
Epoch [1/2], Iter [679/3125], train_loss:0.143060
Epoch [1/2], Iter [680/3125], train_loss:0.125081
Epoch [1/2], Iter [681/3125], train_loss:0.129806
Epoch [1/2], Iter [682/3125], train_loss:0.122023
Epoch [1/2], Iter [683/3125], train_loss:0.134073
Epoch [1/2], Iter [684/3125], train_loss:0.134897
Epoch [1/2], Iter [685/3125], train_loss:0.106832
Epoch [1/2], Iter [686/3125], train_loss:0.111320
Epoch [1/2], Iter [687/3125], train_loss:0.103270
Epoch [1/2], Iter [688/3125], train_loss:0.126575
Epoch [1/2], Iter [689/3125], train_loss:0.146058
Epoch [1/2], Iter [690/3125], train_loss:0.122028
Epoch [1/2], Iter [691/3125], train_loss:0.111225
Epoch [1/2], Iter [692/3125], train_loss:0.133752
Epoch [1/2], Iter [693/3125], train_loss:0.147082
Epoch [1/2], Iter [694/3125], train_loss:0.152503
Epoch [1/2], Iter [695/3125], train_loss:0.140419
Epoch [1/2], Iter [696/3125], train_loss:0.105214
Epoch [1/2], Iter [697/3125], train_loss:0.101332
Epoch [1/2], Iter [698/3125], train_loss:0.119881
Epoch [1/2], Iter [699/3125], train_loss:0.139052
Epoch [1/2], Iter [700/3125], train_loss:0.130664
Epoch [1/2], Iter [701/3125], train_loss:0.139796
Epoch [1/2], Iter [702/3125], train_loss:0.144576
Epoch [1/2], Iter [703/3125], train_loss:0.148382
Epoch [1/2], Iter [704/3125], train_loss:0.155544
Epoch [1/2], Iter [705/3125], train_loss:0.124624
Epoch [1/2], Iter [706/3125], train_loss:0.129485
Epoch [1/2], Iter [707/3125], train_loss:0.112410
Epoch [1/2], Iter [708/3125], train_loss:0.117666
Epoch [1/2], Iter [709/3125], train_loss:0.123164
Epoch [1/2], Iter [710/3125], train_loss:0.118641
Epoch [1/2], Iter [711/3125], train_loss:0.126330
Epoch [1/2], Iter [712/3125], train_loss:0.149150
Epoch [1/2], Iter [713/3125], train_loss:0.136890
Epoch [1/2], Iter [714/3125], train_loss:0.138514
Epoch [1/2], Iter [715/3125], train_loss:0.135200
Epoch [1/2], Iter [716/3125], train_loss:0.162493
Epoch [1/2], Iter [717/3125], train_loss:0.124913
Epoch [1/2], Iter [718/3125], train_loss:0.136156
Epoch [1/2], Iter [719/3125], train_loss:0.124643
Epoch [1/2], Iter [720/3125], train_loss:0.111680
Epoch [1/2], Iter [721/3125], train_loss:0.142082
Epoch [1/2], Iter [722/3125], train_loss:0.125173
Epoch [1/2], Iter [723/3125], train_loss:0.155997
Epoch [1/2], Iter [724/3125], train_loss:0.130297
Epoch [1/2], Iter [725/3125], train_loss:0.118000
Epoch [1/2], Iter [726/3125], train_loss:0.121535
Epoch [1/2], Iter [727/3125], train_loss:0.132659
Epoch [1/2], Iter [728/3125], train_loss:0.147112
Epoch [1/2], Iter [729/3125], train_loss:0.127118
Epoch [1/2], Iter [730/3125], train_loss:0.117182
Epoch [1/2], Iter [731/3125], train_loss:0.131232
Epoch [1/2], Iter [732/3125], train_loss:0.114092
Epoch [1/2], Iter [733/3125], train_loss:0.109745
Epoch [1/2], Iter [734/3125], train_loss:0.145640
Epoch [1/2], Iter [735/3125], train_loss:0.129315
Epoch [1/2], Iter [736/3125], train_loss:0.139311
Epoch [1/2], Iter [737/3125], train_loss:0.144331
Epoch [1/2], Iter [738/3125], train_loss:0.147544
Epoch [1/2], Iter [739/3125], train_loss:0.122015
Epoch [1/2], Iter [740/3125], train_loss:0.118138
Epoch [1/2], Iter [741/3125], train_loss:0.131837
Epoch [1/2], Iter [742/3125], train_loss:0.134231
Epoch [1/2], Iter [743/3125], train_loss:0.107514
Epoch [1/2], Iter [744/3125], train_loss:0.134031
Epoch [1/2], Iter [745/3125], train_loss:0.104138
Epoch [1/2], Iter [746/3125], train_loss:0.137693
Epoch [1/2], Iter [747/3125], train_loss:0.111110
Epoch [1/2], Iter [748/3125], train_loss:0.105632
Epoch [1/2], Iter [749/3125], train_loss:0.107081
Epoch [1/2], Iter [750/3125], train_loss:0.116592
Epoch [1/2], Iter [751/3125], train_loss:0.106551
Epoch [1/2], Iter [752/3125], train_loss:0.125838
Epoch [1/2], Iter [753/3125], train_loss:0.120718
Epoch [1/2], Iter [754/3125], train_loss:0.132687
Epoch [1/2], Iter [755/3125], train_loss:0.151706
Epoch [1/2], Iter [756/3125], train_loss:0.135108
Epoch [1/2], Iter [757/3125], train_loss:0.113648
Epoch [1/2], Iter [758/3125], train_loss:0.110392
Epoch [1/2], Iter [759/3125], train_loss:0.126501
Epoch [1/2], Iter [760/3125], train_loss:0.138877
Epoch [1/2], Iter [761/3125], train_loss:0.133995
Epoch [1/2], Iter [762/3125], train_loss:0.125079
Epoch [1/2], Iter [763/3125], train_loss:0.117826
Epoch [1/2], Iter [764/3125], train_loss:0.116858
Epoch [1/2], Iter [765/3125], train_loss:0.126663
Epoch [1/2], Iter [766/3125], train_loss:0.105839
Epoch [1/2], Iter [767/3125], train_loss:0.131394
Epoch [1/2], Iter [768/3125], train_loss:0.152240
Epoch [1/2], Iter [769/3125], train_loss:0.149760
Epoch [1/2], Iter [770/3125], train_loss:0.138694
Epoch [1/2], Iter [771/3125], train_loss:0.126705
Epoch [1/2], Iter [772/3125], train_loss:0.138881
Epoch [1/2], Iter [773/3125], train_loss:0.101403
Epoch [1/2], Iter [774/3125], train_loss:0.112878
Epoch [1/2], Iter [775/3125], train_loss:0.134290
Epoch [1/2], Iter [776/3125], train_loss:0.148333
Epoch [1/2], Iter [777/3125], train_loss:0.134612
Epoch [1/2], Iter [778/3125], train_loss:0.136959
Epoch [1/2], Iter [779/3125], train_loss:0.120079
Epoch [1/2], Iter [780/3125], train_loss:0.115945
Epoch [1/2], Iter [781/3125], train_loss:0.126110
Epoch [1/2], Iter [782/3125], train_loss:0.129537
Epoch [1/2], Iter [783/3125], train_loss:0.135706
Epoch [1/2], Iter [784/3125], train_loss:0.119200
Epoch [1/2], Iter [785/3125], train_loss:0.149839
Epoch [1/2], Iter [786/3125], train_loss:0.118873
Epoch [1/2], Iter [787/3125], train_loss:0.118077
Epoch [1/2], Iter [788/3125], train_loss:0.125369
Epoch [1/2], Iter [789/3125], train_loss:0.147734
Epoch [1/2], Iter [790/3125], train_loss:0.143367
Epoch [1/2], Iter [791/3125], train_loss:0.110450
Epoch [1/2], Iter [792/3125], train_loss:0.137163
Epoch [1/2], Iter [793/3125], train_loss:0.113366
Epoch [1/2], Iter [794/3125], train_loss:0.119381
Epoch [1/2], Iter [795/3125], train_loss:0.131153
Epoch [1/2], Iter [796/3125], train_loss:0.161323
Epoch [1/2], Iter [797/3125], train_loss:0.125228
Epoch [1/2], Iter [798/3125], train_loss:0.134447
Epoch [1/2], Iter [799/3125], train_loss:0.123386
Epoch [1/2], Iter [800/3125], train_loss:0.116614
Epoch [1/2], Iter [801/3125], train_loss:0.122435
Epoch [1/2], Iter [802/3125], train_loss:0.130789
Epoch [1/2], Iter [803/3125], train_loss:0.120878
Epoch [1/2], Iter [804/3125], train_loss:0.121167
Epoch [1/2], Iter [805/3125], train_loss:0.120995
Epoch [1/2], Iter [806/3125], train_loss:0.104603
Epoch [1/2], Iter [807/3125], train_loss:0.116274
Epoch [1/2], Iter [808/3125], train_loss:0.113488
Epoch [1/2], Iter [809/3125], train_loss:0.139278
Epoch [1/2], Iter [810/3125], train_loss:0.133202
Epoch [1/2], Iter [811/3125], train_loss:0.142533
Epoch [1/2], Iter [812/3125], train_loss:0.140460
Epoch [1/2], Iter [813/3125], train_loss:0.160427
Epoch [1/2], Iter [814/3125], train_loss:0.108846
Epoch [1/2], Iter [815/3125], train_loss:0.102865
Epoch [1/2], Iter [816/3125], train_loss:0.169738
Epoch [1/2], Iter [817/3125], train_loss:0.141982
Epoch [1/2], Iter [818/3125], train_loss:0.120521
Epoch [1/2], Iter [819/3125], train_loss:0.110251
Epoch [1/2], Iter [820/3125], train_loss:0.124580
Epoch [1/2], Iter [821/3125], train_loss:0.120058
Epoch [1/2], Iter [822/3125], train_loss:0.128831
Epoch [1/2], Iter [823/3125], train_loss:0.116302
Epoch [1/2], Iter [824/3125], train_loss:0.126279
Epoch [1/2], Iter [825/3125], train_loss:0.122051
Epoch [1/2], Iter [826/3125], train_loss:0.101408
Epoch [1/2], Iter [827/3125], train_loss:0.133676
Epoch [1/2], Iter [828/3125], train_loss:0.114889
Epoch [1/2], Iter [829/3125], train_loss:0.154637
Epoch [1/2], Iter [830/3125], train_loss:0.110613
Epoch [1/2], Iter [831/3125], train_loss:0.107352
Epoch [1/2], Iter [832/3125], train_loss:0.113590
Epoch [1/2], Iter [833/3125], train_loss:0.127768
Epoch [1/2], Iter [834/3125], train_loss:0.158357
Epoch [1/2], Iter [835/3125], train_loss:0.156968
Epoch [1/2], Iter [836/3125], train_loss:0.139370
Epoch [1/2], Iter [837/3125], train_loss:0.160966
Epoch [1/2], Iter [838/3125], train_loss:0.125671
Epoch [1/2], Iter [839/3125], train_loss:0.130724
Epoch [1/2], Iter [840/3125], train_loss:0.148446
Epoch [1/2], Iter [841/3125], train_loss:0.125982
Epoch [1/2], Iter [842/3125], train_loss:0.139492
Epoch [1/2], Iter [843/3125], train_loss:0.116199
Epoch [1/2], Iter [844/3125], train_loss:0.103395
Epoch [1/2], Iter [845/3125], train_loss:0.154915
Epoch [1/2], Iter [846/3125], train_loss:0.129759
Epoch [1/2], Iter [847/3125], train_loss:0.111957
Epoch [1/2], Iter [848/3125], train_loss:0.097646
Epoch [1/2], Iter [849/3125], train_loss:0.104481
Epoch [1/2], Iter [850/3125], train_loss:0.117910
Epoch [1/2], Iter [851/3125], train_loss:0.111621
Epoch [1/2], Iter [852/3125], train_loss:0.152699
Epoch [1/2], Iter [853/3125], train_loss:0.132153
Epoch [1/2], Iter [854/3125], train_loss:0.096483
Epoch [1/2], Iter [855/3125], train_loss:0.128148
Epoch [1/2], Iter [856/3125], train_loss:0.118850
Epoch [1/2], Iter [857/3125], train_loss:0.125999
Epoch [1/2], Iter [858/3125], train_loss:0.128652
Epoch [1/2], Iter [859/3125], train_loss:0.141657
Epoch [1/2], Iter [860/3125], train_loss:0.156710
Epoch [1/2], Iter [861/3125], train_loss:0.117729
Epoch [1/2], Iter [862/3125], train_loss:0.121909
Epoch [1/2], Iter [863/3125], train_loss:0.124577
Epoch [1/2], Iter [864/3125], train_loss:0.121272
Epoch [1/2], Iter [865/3125], train_loss:0.117923
Epoch [1/2], Iter [866/3125], train_loss:0.095200
Epoch [1/2], Iter [867/3125], train_loss:0.140625
Epoch [1/2], Iter [868/3125], train_loss:0.140180
Epoch [1/2], Iter [869/3125], train_loss:0.126693
Epoch [1/2], Iter [870/3125], train_loss:0.133405
Epoch [1/2], Iter [871/3125], train_loss:0.134636
Epoch [1/2], Iter [872/3125], train_loss:0.151266
Epoch [1/2], Iter [873/3125], train_loss:0.154619
Epoch [1/2], Iter [874/3125], train_loss:0.113689
Epoch [1/2], Iter [875/3125], train_loss:0.108087
Epoch [1/2], Iter [876/3125], train_loss:0.128375
Epoch [1/2], Iter [877/3125], train_loss:0.122934
Epoch [1/2], Iter [878/3125], train_loss:0.107065
Epoch [1/2], Iter [879/3125], train_loss:0.116219
Epoch [1/2], Iter [880/3125], train_loss:0.106964
Epoch [1/2], Iter [881/3125], train_loss:0.088776
Epoch [1/2], Iter [882/3125], train_loss:0.137836
Epoch [1/2], Iter [883/3125], train_loss:0.131807
Epoch [1/2], Iter [884/3125], train_loss:0.128496
Epoch [1/2], Iter [885/3125], train_loss:0.124839
Epoch [1/2], Iter [886/3125], train_loss:0.159529
Epoch [1/2], Iter [887/3125], train_loss:0.131784
Epoch [1/2], Iter [888/3125], train_loss:0.102921
Epoch [1/2], Iter [889/3125], train_loss:0.127691
Epoch [1/2], Iter [890/3125], train_loss:0.143522
Epoch [1/2], Iter [891/3125], train_loss:0.112422
Epoch [1/2], Iter [892/3125], train_loss:0.116074
Epoch [1/2], Iter [893/3125], train_loss:0.125603
Epoch [1/2], Iter [894/3125], train_loss:0.129154
Epoch [1/2], Iter [895/3125], train_loss:0.098535
Epoch [1/2], Iter [896/3125], train_loss:0.113325
Epoch [1/2], Iter [897/3125], train_loss:0.128097
Epoch [1/2], Iter [898/3125], train_loss:0.113959
Epoch [1/2], Iter [899/3125], train_loss:0.121583
Epoch [1/2], Iter [900/3125], train_loss:0.126774
Epoch [1/2], Iter [901/3125], train_loss:0.131767
Epoch [1/2], Iter [902/3125], train_loss:0.128037
Epoch [1/2], Iter [903/3125], train_loss:0.133310
Epoch [1/2], Iter [904/3125], train_loss:0.111954
Epoch [1/2], Iter [905/3125], train_loss:0.151881
Epoch [1/2], Iter [906/3125], train_loss:0.116905
Epoch [1/2], Iter [907/3125], train_loss:0.115108
Epoch [1/2], Iter [908/3125], train_loss:0.113878
Epoch [1/2], Iter [909/3125], train_loss:0.153626
Epoch [1/2], Iter [910/3125], train_loss:0.101536
Epoch [1/2], Iter [911/3125], train_loss:0.128038
Epoch [1/2], Iter [912/3125], train_loss:0.113910
Epoch [1/2], Iter [913/3125], train_loss:0.132720
Epoch [1/2], Iter [914/3125], train_loss:0.117571
Epoch [1/2], Iter [915/3125], train_loss:0.134915
Epoch [1/2], Iter [916/3125], train_loss:0.142414
Epoch [1/2], Iter [917/3125], train_loss:0.102882
Epoch [1/2], Iter [918/3125], train_loss:0.152961
Epoch [1/2], Iter [919/3125], train_loss:0.130095
Epoch [1/2], Iter [920/3125], train_loss:0.135837
Epoch [1/2], Iter [921/3125], train_loss:0.131806
Epoch [1/2], Iter [922/3125], train_loss:0.106842
Epoch [1/2], Iter [923/3125], train_loss:0.114038
Epoch [1/2], Iter [924/3125], train_loss:0.139136
Epoch [1/2], Iter [925/3125], train_loss:0.119239
Epoch [1/2], Iter [926/3125], train_loss:0.118090
Epoch [1/2], Iter [927/3125], train_loss:0.127306
Epoch [1/2], Iter [928/3125], train_loss:0.128909
Epoch [1/2], Iter [929/3125], train_loss:0.143076
Epoch [1/2], Iter [930/3125], train_loss:0.109327
Epoch [1/2], Iter [931/3125], train_loss:0.141522
Epoch [1/2], Iter [932/3125], train_loss:0.151232
Epoch [1/2], Iter [933/3125], train_loss:0.125747
Epoch [1/2], Iter [934/3125], train_loss:0.138038
Epoch [1/2], Iter [935/3125], train_loss:0.127718
Epoch [1/2], Iter [936/3125], train_loss:0.106390
Epoch [1/2], Iter [937/3125], train_loss:0.092447
Epoch [1/2], Iter [938/3125], train_loss:0.133007
Epoch [1/2], Iter [939/3125], train_loss:0.158318
Epoch [1/2], Iter [940/3125], train_loss:0.150942
Epoch [1/2], Iter [941/3125], train_loss:0.115330
Epoch [1/2], Iter [942/3125], train_loss:0.125420
Epoch [1/2], Iter [943/3125], train_loss:0.133677
Epoch [1/2], Iter [944/3125], train_loss:0.103778
Epoch [1/2], Iter [945/3125], train_loss:0.117114
Epoch [1/2], Iter [946/3125], train_loss:0.138225
Epoch [1/2], Iter [947/3125], train_loss:0.126272
Epoch [1/2], Iter [948/3125], train_loss:0.145278
Epoch [1/2], Iter [949/3125], train_loss:0.119771
Epoch [1/2], Iter [950/3125], train_loss:0.127314
Epoch [1/2], Iter [951/3125], train_loss:0.129742
Epoch [1/2], Iter [952/3125], train_loss:0.145730
Epoch [1/2], Iter [953/3125], train_loss:0.143654
Epoch [1/2], Iter [954/3125], train_loss:0.153971
Epoch [1/2], Iter [955/3125], train_loss:0.129445
Epoch [1/2], Iter [956/3125], train_loss:0.123389
Epoch [1/2], Iter [957/3125], train_loss:0.098573
Epoch [1/2], Iter [958/3125], train_loss:0.136154
Epoch [1/2], Iter [959/3125], train_loss:0.089660
Epoch [1/2], Iter [960/3125], train_loss:0.128614
Epoch [1/2], Iter [961/3125], train_loss:0.108439
Epoch [1/2], Iter [962/3125], train_loss:0.120334
Epoch [1/2], Iter [963/3125], train_loss:0.142910
Epoch [1/2], Iter [964/3125], train_loss:0.119167
Epoch [1/2], Iter [965/3125], train_loss:0.147332
Epoch [1/2], Iter [966/3125], train_loss:0.137831
Epoch [1/2], Iter [967/3125], train_loss:0.135807
Epoch [1/2], Iter [968/3125], train_loss:0.122058
Epoch [1/2], Iter [969/3125], train_loss:0.089618
Epoch [1/2], Iter [970/3125], train_loss:0.130668
Epoch [1/2], Iter [971/3125], train_loss:0.113997
Epoch [1/2], Iter [972/3125], train_loss:0.095872
Epoch [1/2], Iter [973/3125], train_loss:0.130532
Epoch [1/2], Iter [974/3125], train_loss:0.119044
Epoch [1/2], Iter [975/3125], train_loss:0.125105
Epoch [1/2], Iter [976/3125], train_loss:0.122724
Epoch [1/2], Iter [977/3125], train_loss:0.098335
Epoch [1/2], Iter [978/3125], train_loss:0.104454
Epoch [1/2], Iter [979/3125], train_loss:0.133544
Epoch [1/2], Iter [980/3125], train_loss:0.126448
Epoch [1/2], Iter [981/3125], train_loss:0.136839
Epoch [1/2], Iter [982/3125], train_loss:0.152823
Epoch [1/2], Iter [983/3125], train_loss:0.139764
Epoch [1/2], Iter [984/3125], train_loss:0.149529
Epoch [1/2], Iter [985/3125], train_loss:0.120920
Epoch [1/2], Iter [986/3125], train_loss:0.101797
Epoch [1/2], Iter [987/3125], train_loss:0.158799
Epoch [1/2], Iter [988/3125], train_loss:0.113887
Epoch [1/2], Iter [989/3125], train_loss:0.106621
Epoch [1/2], Iter [990/3125], train_loss:0.153951
Epoch [1/2], Iter [991/3125], train_loss:0.136528
Epoch [1/2], Iter [992/3125], train_loss:0.104794
Epoch [1/2], Iter [993/3125], train_loss:0.132386
Epoch [1/2], Iter [994/3125], train_loss:0.110921
Epoch [1/2], Iter [995/3125], train_loss:0.143581
Epoch [1/2], Iter [996/3125], train_loss:0.112366
Epoch [1/2], Iter [997/3125], train_loss:0.150791
Epoch [1/2], Iter [998/3125], train_loss:0.114965
Epoch [1/2], Iter [999/3125], train_loss:0.144281
Epoch [1/2], Iter [1000/3125], train_loss:0.097253
Epoch [1/2], Iter [1001/3125], train_loss:0.107015
Epoch [1/2], Iter [1002/3125], train_loss:0.124313
Epoch [1/2], Iter [1003/3125], train_loss:0.108577
Epoch [1/2], Iter [1004/3125], train_loss:0.134294
Epoch [1/2], Iter [1005/3125], train_loss:0.129103
Epoch [1/2], Iter [1006/3125], train_loss:0.127533
Epoch [1/2], Iter [1007/3125], train_loss:0.114984
Epoch [1/2], Iter [1008/3125], train_loss:0.124624
Epoch [1/2], Iter [1009/3125], train_loss:0.136847
Epoch [1/2], Iter [1010/3125], train_loss:0.122541
Epoch [1/2], Iter [1011/3125], train_loss:0.107556
Epoch [1/2], Iter [1012/3125], train_loss:0.109197
Epoch [1/2], Iter [1013/3125], train_loss:0.119598
Epoch [1/2], Iter [1014/3125], train_loss:0.106924
Epoch [1/2], Iter [1015/3125], train_loss:0.151267
Epoch [1/2], Iter [1016/3125], train_loss:0.142139
Epoch [1/2], Iter [1017/3125], train_loss:0.105546
Epoch [1/2], Iter [1018/3125], train_loss:0.122640
Epoch [1/2], Iter [1019/3125], train_loss:0.122053
Epoch [1/2], Iter [1020/3125], train_loss:0.138856
Epoch [1/2], Iter [1021/3125], train_loss:0.152428
Epoch [1/2], Iter [1022/3125], train_loss:0.121946
Epoch [1/2], Iter [1023/3125], train_loss:0.096853
Epoch [1/2], Iter [1024/3125], train_loss:0.100939
Epoch [1/2], Iter [1025/3125], train_loss:0.132505
Epoch [1/2], Iter [1026/3125], train_loss:0.112318
Epoch [1/2], Iter [1027/3125], train_loss:0.132648
Epoch [1/2], Iter [1028/3125], train_loss:0.135367
Epoch [1/2], Iter [1029/3125], train_loss:0.127595
Epoch [1/2], Iter [1030/3125], train_loss:0.122608
Epoch [1/2], Iter [1031/3125], train_loss:0.125477
Epoch [1/2], Iter [1032/3125], train_loss:0.134335
Epoch [1/2], Iter [1033/3125], train_loss:0.154964
Epoch [1/2], Iter [1034/3125], train_loss:0.150042
Epoch [1/2], Iter [1035/3125], train_loss:0.133856
Epoch [1/2], Iter [1036/3125], train_loss:0.116784
Epoch [1/2], Iter [1037/3125], train_loss:0.102079
Epoch [1/2], Iter [1038/3125], train_loss:0.134110
Epoch [1/2], Iter [1039/3125], train_loss:0.122395
Epoch [1/2], Iter [1040/3125], train_loss:0.109360
Epoch [1/2], Iter [1041/3125], train_loss:0.142921
Epoch [1/2], Iter [1042/3125], train_loss:0.119808
Epoch [1/2], Iter [1043/3125], train_loss:0.144362
Epoch [1/2], Iter [1044/3125], train_loss:0.121404
Epoch [1/2], Iter [1045/3125], train_loss:0.119871
Epoch [1/2], Iter [1046/3125], train_loss:0.111753
Epoch [1/2], Iter [1047/3125], train_loss:0.106631
Epoch [1/2], Iter [1048/3125], train_loss:0.129624
Epoch [1/2], Iter [1049/3125], train_loss:0.139405
Epoch [1/2], Iter [1050/3125], train_loss:0.146612
Epoch [1/2], Iter [1051/3125], train_loss:0.130812
Epoch [1/2], Iter [1052/3125], train_loss:0.145417
Epoch [1/2], Iter [1053/3125], train_loss:0.124454
Epoch [1/2], Iter [1054/3125], train_loss:0.117862
Epoch [1/2], Iter [1055/3125], train_loss:0.127324
Epoch [1/2], Iter [1056/3125], train_loss:0.097558
Epoch [1/2], Iter [1057/3125], train_loss:0.102088
Epoch [1/2], Iter [1058/3125], train_loss:0.140332
Epoch [1/2], Iter [1059/3125], train_loss:0.148284
Epoch [1/2], Iter [1060/3125], train_loss:0.160273
Epoch [1/2], Iter [1061/3125], train_loss:0.131561
Epoch [1/2], Iter [1062/3125], train_loss:0.136726
Epoch [1/2], Iter [1063/3125], train_loss:0.109466
Epoch [1/2], Iter [1064/3125], train_loss:0.135302
Epoch [1/2], Iter [1065/3125], train_loss:0.122059
Epoch [1/2], Iter [1066/3125], train_loss:0.139268
Epoch [1/2], Iter [1067/3125], train_loss:0.141390
Epoch [1/2], Iter [1068/3125], train_loss:0.110667
Epoch [1/2], Iter [1069/3125], train_loss:0.114104
Epoch [1/2], Iter [1070/3125], train_loss:0.134630
Epoch [1/2], Iter [1071/3125], train_loss:0.133930
Epoch [1/2], Iter [1072/3125], train_loss:0.126191
Epoch [1/2], Iter [1073/3125], train_loss:0.117818
Epoch [1/2], Iter [1074/3125], train_loss:0.114748
Epoch [1/2], Iter [1075/3125], train_loss:0.119137
Epoch [1/2], Iter [1076/3125], train_loss:0.133567
Epoch [1/2], Iter [1077/3125], train_loss:0.129337
Epoch [1/2], Iter [1078/3125], train_loss:0.109689
Epoch [1/2], Iter [1079/3125], train_loss:0.106143
Epoch [1/2], Iter [1080/3125], train_loss:0.102661
Epoch [1/2], Iter [1081/3125], train_loss:0.117610
Epoch [1/2], Iter [1082/3125], train_loss:0.082699
Epoch [1/2], Iter [1083/3125], train_loss:0.111960
Epoch [1/2], Iter [1084/3125], train_loss:0.150622
Epoch [1/2], Iter [1085/3125], train_loss:0.147994
Epoch [1/2], Iter [1086/3125], train_loss:0.127080
Epoch [1/2], Iter [1087/3125], train_loss:0.110065
Epoch [1/2], Iter [1088/3125], train_loss:0.114176
Epoch [1/2], Iter [1089/3125], train_loss:0.113061
Epoch [1/2], Iter [1090/3125], train_loss:0.109248
Epoch [1/2], Iter [1091/3125], train_loss:0.088652
Epoch [1/2], Iter [1092/3125], train_loss:0.176266
Epoch [1/2], Iter [1093/3125], train_loss:0.145318
Epoch [1/2], Iter [1094/3125], train_loss:0.132436
Epoch [1/2], Iter [1095/3125], train_loss:0.143664
Epoch [1/2], Iter [1096/3125], train_loss:0.110644
Epoch [1/2], Iter [1097/3125], train_loss:0.099839
Epoch [1/2], Iter [1098/3125], train_loss:0.125293
Epoch [1/2], Iter [1099/3125], train_loss:0.126372
Epoch [1/2], Iter [1100/3125], train_loss:0.122323
Epoch [1/2], Iter [1101/3125], train_loss:0.107649
Epoch [1/2], Iter [1102/3125], train_loss:0.095684
Epoch [1/2], Iter [1103/3125], train_loss:0.122204
Epoch [1/2], Iter [1104/3125], train_loss:0.104475
Epoch [1/2], Iter [1105/3125], train_loss:0.134337
Epoch [1/2], Iter [1106/3125], train_loss:0.106109
Epoch [1/2], Iter [1107/3125], train_loss:0.117644
Epoch [1/2], Iter [1108/3125], train_loss:0.123394
Epoch [1/2], Iter [1109/3125], train_loss:0.104284
Epoch [1/2], Iter [1110/3125], train_loss:0.122454
Epoch [1/2], Iter [1111/3125], train_loss:0.121269
Epoch [1/2], Iter [1112/3125], train_loss:0.127860
Epoch [1/2], Iter [1113/3125], train_loss:0.144616
Epoch [1/2], Iter [1114/3125], train_loss:0.107651
Epoch [1/2], Iter [1115/3125], train_loss:0.141473
Epoch [1/2], Iter [1116/3125], train_loss:0.125693
Epoch [1/2], Iter [1117/3125], train_loss:0.131396
Epoch [1/2], Iter [1118/3125], train_loss:0.093923
Epoch [1/2], Iter [1119/3125], train_loss:0.134721
Epoch [1/2], Iter [1120/3125], train_loss:0.093752
Epoch [1/2], Iter [1121/3125], train_loss:0.128318
Epoch [1/2], Iter [1122/3125], train_loss:0.130023
Epoch [1/2], Iter [1123/3125], train_loss:0.127883
Epoch [1/2], Iter [1124/3125], train_loss:0.131423
Epoch [1/2], Iter [1125/3125], train_loss:0.121582
Epoch [1/2], Iter [1126/3125], train_loss:0.122645
Epoch [1/2], Iter [1127/3125], train_loss:0.132357
Epoch [1/2], Iter [1128/3125], train_loss:0.127798
Epoch [1/2], Iter [1129/3125], train_loss:0.130915
Epoch [1/2], Iter [1130/3125], train_loss:0.116867
Epoch [1/2], Iter [1131/3125], train_loss:0.117003
Epoch [1/2], Iter [1132/3125], train_loss:0.110279
Epoch [1/2], Iter [1133/3125], train_loss:0.123162
Epoch [1/2], Iter [1134/3125], train_loss:0.129390
Epoch [1/2], Iter [1135/3125], train_loss:0.124176
Epoch [1/2], Iter [1136/3125], train_loss:0.140684
Epoch [1/2], Iter [1137/3125], train_loss:0.128951
Epoch [1/2], Iter [1138/3125], train_loss:0.132136
Epoch [1/2], Iter [1139/3125], train_loss:0.100313
Epoch [1/2], Iter [1140/3125], train_loss:0.125512
Epoch [1/2], Iter [1141/3125], train_loss:0.143357
Epoch [1/2], Iter [1142/3125], train_loss:0.119749
Epoch [1/2], Iter [1143/3125], train_loss:0.089367
Epoch [1/2], Iter [1144/3125], train_loss:0.143185
Epoch [1/2], Iter [1145/3125], train_loss:0.125668
Epoch [1/2], Iter [1146/3125], train_loss:0.102639
Epoch [1/2], Iter [1147/3125], train_loss:0.119610
Epoch [1/2], Iter [1148/3125], train_loss:0.123779
Epoch [1/2], Iter [1149/3125], train_loss:0.100778
Epoch [1/2], Iter [1150/3125], train_loss:0.121607
Epoch [1/2], Iter [1151/3125], train_loss:0.101407
Epoch [1/2], Iter [1152/3125], train_loss:0.135673
Epoch [1/2], Iter [1153/3125], train_loss:0.126425
Epoch [1/2], Iter [1154/3125], train_loss:0.093462
Epoch [1/2], Iter [1155/3125], train_loss:0.126472
Epoch [1/2], Iter [1156/3125], train_loss:0.130557
Epoch [1/2], Iter [1157/3125], train_loss:0.128323
Epoch [1/2], Iter [1158/3125], train_loss:0.130056
Epoch [1/2], Iter [1159/3125], train_loss:0.122581
Epoch [1/2], Iter [1160/3125], train_loss:0.086433
Epoch [1/2], Iter [1161/3125], train_loss:0.107591
Epoch [1/2], Iter [1162/3125], train_loss:0.149391
Epoch [1/2], Iter [1163/3125], train_loss:0.119678
Epoch [1/2], Iter [1164/3125], train_loss:0.108670
Epoch [1/2], Iter [1165/3125], train_loss:0.141502
Epoch [1/2], Iter [1166/3125], train_loss:0.114156
Epoch [1/2], Iter [1167/3125], train_loss:0.104277
Epoch [1/2], Iter [1168/3125], train_loss:0.119293
Epoch [1/2], Iter [1169/3125], train_loss:0.116123
Epoch [1/2], Iter [1170/3125], train_loss:0.107151
Epoch [1/2], Iter [1171/3125], train_loss:0.123827
Epoch [1/2], Iter [1172/3125], train_loss:0.109402
Epoch [1/2], Iter [1173/3125], train_loss:0.106157
Epoch [1/2], Iter [1174/3125], train_loss:0.139650
Epoch [1/2], Iter [1175/3125], train_loss:0.152351
Epoch [1/2], Iter [1176/3125], train_loss:0.112824
Epoch [1/2], Iter [1177/3125], train_loss:0.116996
Epoch [1/2], Iter [1178/3125], train_loss:0.118954
Epoch [1/2], Iter [1179/3125], train_loss:0.106760
Epoch [1/2], Iter [1180/3125], train_loss:0.136774
Epoch [1/2], Iter [1181/3125], train_loss:0.098212
Epoch [1/2], Iter [1182/3125], train_loss:0.133383
Epoch [1/2], Iter [1183/3125], train_loss:0.142688
Epoch [1/2], Iter [1184/3125], train_loss:0.098366
Epoch [1/2], Iter [1185/3125], train_loss:0.138397
Epoch [1/2], Iter [1186/3125], train_loss:0.117988
Epoch [1/2], Iter [1187/3125], train_loss:0.154568
Epoch [1/2], Iter [1188/3125], train_loss:0.118643
Epoch [1/2], Iter [1189/3125], train_loss:0.140750
Epoch [1/2], Iter [1190/3125], train_loss:0.122152
Epoch [1/2], Iter [1191/3125], train_loss:0.126351
Epoch [1/2], Iter [1192/3125], train_loss:0.113274
Epoch [1/2], Iter [1193/3125], train_loss:0.125957
Epoch [1/2], Iter [1194/3125], train_loss:0.113587
Epoch [1/2], Iter [1195/3125], train_loss:0.116307
Epoch [1/2], Iter [1196/3125], train_loss:0.108461
Epoch [1/2], Iter [1197/3125], train_loss:0.132879
Epoch [1/2], Iter [1198/3125], train_loss:0.157118
Epoch [1/2], Iter [1199/3125], train_loss:0.109573
Epoch [1/2], Iter [1200/3125], train_loss:0.086982
Epoch [1/2], Iter [1201/3125], train_loss:0.139072
Epoch [1/2], Iter [1202/3125], train_loss:0.128344
Epoch [1/2], Iter [1203/3125], train_loss:0.110572
Epoch [1/2], Iter [1204/3125], train_loss:0.085608
Epoch [1/2], Iter [1205/3125], train_loss:0.113875
Epoch [1/2], Iter [1206/3125], train_loss:0.111099
Epoch [1/2], Iter [1207/3125], train_loss:0.100557
Epoch [1/2], Iter [1208/3125], train_loss:0.132341
Epoch [1/2], Iter [1209/3125], train_loss:0.116466
Epoch [1/2], Iter [1210/3125], train_loss:0.113626
Epoch [1/2], Iter [1211/3125], train_loss:0.121723
Epoch [1/2], Iter [1212/3125], train_loss:0.104577
Epoch [1/2], Iter [1213/3125], train_loss:0.096895
Epoch [1/2], Iter [1214/3125], train_loss:0.120486
Epoch [1/2], Iter [1215/3125], train_loss:0.107735
Epoch [1/2], Iter [1216/3125], train_loss:0.136918
Epoch [1/2], Iter [1217/3125], train_loss:0.101629
Epoch [1/2], Iter [1218/3125], train_loss:0.110400
Epoch [1/2], Iter [1219/3125], train_loss:0.123551
Epoch [1/2], Iter [1220/3125], train_loss:0.132686
Epoch [1/2], Iter [1221/3125], train_loss:0.105168
Epoch [1/2], Iter [1222/3125], train_loss:0.148806
Epoch [1/2], Iter [1223/3125], train_loss:0.103599
Epoch [1/2], Iter [1224/3125], train_loss:0.102260
Epoch [1/2], Iter [1225/3125], train_loss:0.139908
Epoch [1/2], Iter [1226/3125], train_loss:0.150834
Epoch [1/2], Iter [1227/3125], train_loss:0.074731
Epoch [1/2], Iter [1228/3125], train_loss:0.098475
Epoch [1/2], Iter [1229/3125], train_loss:0.144385
Epoch [1/2], Iter [1230/3125], train_loss:0.121909
Epoch [1/2], Iter [1231/3125], train_loss:0.114415
Epoch [1/2], Iter [1232/3125], train_loss:0.102998
Epoch [1/2], Iter [1233/3125], train_loss:0.130734
Epoch [1/2], Iter [1234/3125], train_loss:0.100877
Epoch [1/2], Iter [1235/3125], train_loss:0.108643
Epoch [1/2], Iter [1236/3125], train_loss:0.140781
Epoch [1/2], Iter [1237/3125], train_loss:0.131204
Epoch [1/2], Iter [1238/3125], train_loss:0.158854
Epoch [1/2], Iter [1239/3125], train_loss:0.127776
Epoch [1/2], Iter [1240/3125], train_loss:0.148763
Epoch [1/2], Iter [1241/3125], train_loss:0.120135
Epoch [1/2], Iter [1242/3125], train_loss:0.120117
Epoch [1/2], Iter [1243/3125], train_loss:0.161515
Epoch [1/2], Iter [1244/3125], train_loss:0.153187
Epoch [1/2], Iter [1245/3125], train_loss:0.130377
Epoch [1/2], Iter [1246/3125], train_loss:0.135746
Epoch [1/2], Iter [1247/3125], train_loss:0.133350
Epoch [1/2], Iter [1248/3125], train_loss:0.146740
Epoch [1/2], Iter [1249/3125], train_loss:0.106535
Epoch [1/2], Iter [1250/3125], train_loss:0.118668
Epoch [1/2], Iter [1251/3125], train_loss:0.131747
Epoch [1/2], Iter [1252/3125], train_loss:0.130888
Epoch [1/2], Iter [1253/3125], train_loss:0.115214
Epoch [1/2], Iter [1254/3125], train_loss:0.135826
Epoch [1/2], Iter [1255/3125], train_loss:0.126973
Epoch [1/2], Iter [1256/3125], train_loss:0.123112
Epoch [1/2], Iter [1257/3125], train_loss:0.116337
Epoch [1/2], Iter [1258/3125], train_loss:0.122621
Epoch [1/2], Iter [1259/3125], train_loss:0.111832
Epoch [1/2], Iter [1260/3125], train_loss:0.104192
Epoch [1/2], Iter [1261/3125], train_loss:0.098209
Epoch [1/2], Iter [1262/3125], train_loss:0.116020
Epoch [1/2], Iter [1263/3125], train_loss:0.124493
Epoch [1/2], Iter [1264/3125], train_loss:0.112971
Epoch [1/2], Iter [1265/3125], train_loss:0.128588
Epoch [1/2], Iter [1266/3125], train_loss:0.110129
Epoch [1/2], Iter [1267/3125], train_loss:0.131274
Epoch [1/2], Iter [1268/3125], train_loss:0.121199
Epoch [1/2], Iter [1269/3125], train_loss:0.125670
Epoch [1/2], Iter [1270/3125], train_loss:0.132897
Epoch [1/2], Iter [1271/3125], train_loss:0.149063
Epoch [1/2], Iter [1272/3125], train_loss:0.094635
Epoch [1/2], Iter [1273/3125], train_loss:0.137337
Epoch [1/2], Iter [1274/3125], train_loss:0.144458
Epoch [1/2], Iter [1275/3125], train_loss:0.112834
Epoch [1/2], Iter [1276/3125], train_loss:0.124261
Epoch [1/2], Iter [1277/3125], train_loss:0.129183
Epoch [1/2], Iter [1278/3125], train_loss:0.161575
Epoch [1/2], Iter [1279/3125], train_loss:0.106391
Epoch [1/2], Iter [1280/3125], train_loss:0.112518
Epoch [1/2], Iter [1281/3125], train_loss:0.110986
Epoch [1/2], Iter [1282/3125], train_loss:0.108414
Epoch [1/2], Iter [1283/3125], train_loss:0.152765
Epoch [1/2], Iter [1284/3125], train_loss:0.121458
Epoch [1/2], Iter [1285/3125], train_loss:0.108105
Epoch [1/2], Iter [1286/3125], train_loss:0.122133
Epoch [1/2], Iter [1287/3125], train_loss:0.119404
Epoch [1/2], Iter [1288/3125], train_loss:0.123093
Epoch [1/2], Iter [1289/3125], train_loss:0.110909
Epoch [1/2], Iter [1290/3125], train_loss:0.115075
Epoch [1/2], Iter [1291/3125], train_loss:0.094410
Epoch [1/2], Iter [1292/3125], train_loss:0.110264
Epoch [1/2], Iter [1293/3125], train_loss:0.146368
Epoch [1/2], Iter [1294/3125], train_loss:0.123814
Epoch [1/2], Iter [1295/3125], train_loss:0.112168
Epoch [1/2], Iter [1296/3125], train_loss:0.102267
Epoch [1/2], Iter [1297/3125], train_loss:0.115881
Epoch [1/2], Iter [1298/3125], train_loss:0.130322
Epoch [1/2], Iter [1299/3125], train_loss:0.131473
Epoch [1/2], Iter [1300/3125], train_loss:0.163199
Epoch [1/2], Iter [1301/3125], train_loss:0.113640
Epoch [1/2], Iter [1302/3125], train_loss:0.127416
Epoch [1/2], Iter [1303/3125], train_loss:0.113280
Epoch [1/2], Iter [1304/3125], train_loss:0.123337
Epoch [1/2], Iter [1305/3125], train_loss:0.091916
Epoch [1/2], Iter [1306/3125], train_loss:0.080357
Epoch [1/2], Iter [1307/3125], train_loss:0.094215
Epoch [1/2], Iter [1308/3125], train_loss:0.110574
Epoch [1/2], Iter [1309/3125], train_loss:0.122407
Epoch [1/2], Iter [1310/3125], train_loss:0.109602
Epoch [1/2], Iter [1311/3125], train_loss:0.092256
Epoch [1/2], Iter [1312/3125], train_loss:0.089961
Epoch [1/2], Iter [1313/3125], train_loss:0.138478
Epoch [1/2], Iter [1314/3125], train_loss:0.130750
Epoch [1/2], Iter [1315/3125], train_loss:0.098626
Epoch [1/2], Iter [1316/3125], train_loss:0.130637
Epoch [1/2], Iter [1317/3125], train_loss:0.113032
Epoch [1/2], Iter [1318/3125], train_loss:0.141212
Epoch [1/2], Iter [1319/3125], train_loss:0.159202
Epoch [1/2], Iter [1320/3125], train_loss:0.104703
Epoch [1/2], Iter [1321/3125], train_loss:0.130061
Epoch [1/2], Iter [1322/3125], train_loss:0.098450
Epoch [1/2], Iter [1323/3125], train_loss:0.118011
Epoch [1/2], Iter [1324/3125], train_loss:0.119083
Epoch [1/2], Iter [1325/3125], train_loss:0.122753
Epoch [1/2], Iter [1326/3125], train_loss:0.110272
Epoch [1/2], Iter [1327/3125], train_loss:0.124699
Epoch [1/2], Iter [1328/3125], train_loss:0.125460
Epoch [1/2], Iter [1329/3125], train_loss:0.120695
Epoch [1/2], Iter [1330/3125], train_loss:0.124485
Epoch [1/2], Iter [1331/3125], train_loss:0.110135
Epoch [1/2], Iter [1332/3125], train_loss:0.107310
Epoch [1/2], Iter [1333/3125], train_loss:0.114968
Epoch [1/2], Iter [1334/3125], train_loss:0.110071
Epoch [1/2], Iter [1335/3125], train_loss:0.103416
Epoch [1/2], Iter [1336/3125], train_loss:0.108320
Epoch [1/2], Iter [1337/3125], train_loss:0.133014
Epoch [1/2], Iter [1338/3125], train_loss:0.112441
Epoch [1/2], Iter [1339/3125], train_loss:0.104479
Epoch [1/2], Iter [1340/3125], train_loss:0.116247
Epoch [1/2], Iter [1341/3125], train_loss:0.130177
Epoch [1/2], Iter [1342/3125], train_loss:0.124418
Epoch [1/2], Iter [1343/3125], train_loss:0.131596
Epoch [1/2], Iter [1344/3125], train_loss:0.148934
Epoch [1/2], Iter [1345/3125], train_loss:0.131297
Epoch [1/2], Iter [1346/3125], train_loss:0.114347
Epoch [1/2], Iter [1347/3125], train_loss:0.105459
Epoch [1/2], Iter [1348/3125], train_loss:0.091900
Epoch [1/2], Iter [1349/3125], train_loss:0.121696
Epoch [1/2], Iter [1350/3125], train_loss:0.135702
Epoch [1/2], Iter [1351/3125], train_loss:0.084750
Epoch [1/2], Iter [1352/3125], train_loss:0.102412
Epoch [1/2], Iter [1353/3125], train_loss:0.136172
Epoch [1/2], Iter [1354/3125], train_loss:0.138000
Epoch [1/2], Iter [1355/3125], train_loss:0.080419
Epoch [1/2], Iter [1356/3125], train_loss:0.115543
Epoch [1/2], Iter [1357/3125], train_loss:0.124386
Epoch [1/2], Iter [1358/3125], train_loss:0.115385
Epoch [1/2], Iter [1359/3125], train_loss:0.127010
Epoch [1/2], Iter [1360/3125], train_loss:0.120455
Epoch [1/2], Iter [1361/3125], train_loss:0.117791
Epoch [1/2], Iter [1362/3125], train_loss:0.152406
Epoch [1/2], Iter [1363/3125], train_loss:0.109988
Epoch [1/2], Iter [1364/3125], train_loss:0.137212
Epoch [1/2], Iter [1365/3125], train_loss:0.104549
Epoch [1/2], Iter [1366/3125], train_loss:0.132258
Epoch [1/2], Iter [1367/3125], train_loss:0.116934
Epoch [1/2], Iter [1368/3125], train_loss:0.090230
Epoch [1/2], Iter [1369/3125], train_loss:0.109976
Epoch [1/2], Iter [1370/3125], train_loss:0.116305
Epoch [1/2], Iter [1371/3125], train_loss:0.124090
Epoch [1/2], Iter [1372/3125], train_loss:0.119928
Epoch [1/2], Iter [1373/3125], train_loss:0.140690
Epoch [1/2], Iter [1374/3125], train_loss:0.101751
Epoch [1/2], Iter [1375/3125], train_loss:0.094104
Epoch [1/2], Iter [1376/3125], train_loss:0.108286
Epoch [1/2], Iter [1377/3125], train_loss:0.100203
Epoch [1/2], Iter [1378/3125], train_loss:0.158961
Epoch [1/2], Iter [1379/3125], train_loss:0.128643
Epoch [1/2], Iter [1380/3125], train_loss:0.117819
Epoch [1/2], Iter [1381/3125], train_loss:0.109645
Epoch [1/2], Iter [1382/3125], train_loss:0.150495
Epoch [1/2], Iter [1383/3125], train_loss:0.115506
Epoch [1/2], Iter [1384/3125], train_loss:0.117302
Epoch [1/2], Iter [1385/3125], train_loss:0.132320
Epoch [1/2], Iter [1386/3125], train_loss:0.117862
Epoch [1/2], Iter [1387/3125], train_loss:0.088007
Epoch [1/2], Iter [1388/3125], train_loss:0.100484
Epoch [1/2], Iter [1389/3125], train_loss:0.152095
Epoch [1/2], Iter [1390/3125], train_loss:0.130487
Epoch [1/2], Iter [1391/3125], train_loss:0.107005
Epoch [1/2], Iter [1392/3125], train_loss:0.153524
Epoch [1/2], Iter [1393/3125], train_loss:0.106606
Epoch [1/2], Iter [1394/3125], train_loss:0.103809
Epoch [1/2], Iter [1395/3125], train_loss:0.112907
Epoch [1/2], Iter [1396/3125], train_loss:0.095083
Epoch [1/2], Iter [1397/3125], train_loss:0.115779
Epoch [1/2], Iter [1398/3125], train_loss:0.085522
Epoch [1/2], Iter [1399/3125], train_loss:0.124290
Epoch [1/2], Iter [1400/3125], train_loss:0.072803
Epoch [1/2], Iter [1401/3125], train_loss:0.106329
Epoch [1/2], Iter [1402/3125], train_loss:0.110175
Epoch [1/2], Iter [1403/3125], train_loss:0.135516
Epoch [1/2], Iter [1404/3125], train_loss:0.126846
Epoch [1/2], Iter [1405/3125], train_loss:0.125609
Epoch [1/2], Iter [1406/3125], train_loss:0.104507
Epoch [1/2], Iter [1407/3125], train_loss:0.110604
Epoch [1/2], Iter [1408/3125], train_loss:0.102211
Epoch [1/2], Iter [1409/3125], train_loss:0.127775
Epoch [1/2], Iter [1410/3125], train_loss:0.124930
Epoch [1/2], Iter [1411/3125], train_loss:0.113795
Epoch [1/2], Iter [1412/3125], train_loss:0.117095
Epoch [1/2], Iter [1413/3125], train_loss:0.108768
Epoch [1/2], Iter [1414/3125], train_loss:0.104051
Epoch [1/2], Iter [1415/3125], train_loss:0.114361
Epoch [1/2], Iter [1416/3125], train_loss:0.094833
Epoch [1/2], Iter [1417/3125], train_loss:0.122657
Epoch [1/2], Iter [1418/3125], train_loss:0.112632
Epoch [1/2], Iter [1419/3125], train_loss:0.107173
Epoch [1/2], Iter [1420/3125], train_loss:0.114673
Epoch [1/2], Iter [1421/3125], train_loss:0.108424
Epoch [1/2], Iter [1422/3125], train_loss:0.117980
Epoch [1/2], Iter [1423/3125], train_loss:0.108099
Epoch [1/2], Iter [1424/3125], train_loss:0.125009
Epoch [1/2], Iter [1425/3125], train_loss:0.103458
Epoch [1/2], Iter [1426/3125], train_loss:0.103903
Epoch [1/2], Iter [1427/3125], train_loss:0.087423
Epoch [1/2], Iter [1428/3125], train_loss:0.126077
Epoch [1/2], Iter [1429/3125], train_loss:0.138295
Epoch [1/2], Iter [1430/3125], train_loss:0.143625
Epoch [1/2], Iter [1431/3125], train_loss:0.116680
Epoch [1/2], Iter [1432/3125], train_loss:0.107513
Epoch [1/2], Iter [1433/3125], train_loss:0.090071
Epoch [1/2], Iter [1434/3125], train_loss:0.121352
Epoch [1/2], Iter [1435/3125], train_loss:0.143259
Epoch [1/2], Iter [1436/3125], train_loss:0.108410
Epoch [1/2], Iter [1437/3125], train_loss:0.131677
Epoch [1/2], Iter [1438/3125], train_loss:0.115317
Epoch [1/2], Iter [1439/3125], train_loss:0.114774
Epoch [1/2], Iter [1440/3125], train_loss:0.088071
Epoch [1/2], Iter [1441/3125], train_loss:0.127111
Epoch [1/2], Iter [1442/3125], train_loss:0.121695
Epoch [1/2], Iter [1443/3125], train_loss:0.123811
Epoch [1/2], Iter [1444/3125], train_loss:0.110418
Epoch [1/2], Iter [1445/3125], train_loss:0.112827
Epoch [1/2], Iter [1446/3125], train_loss:0.110010
Epoch [1/2], Iter [1447/3125], train_loss:0.108433
Epoch [1/2], Iter [1448/3125], train_loss:0.100427
Epoch [1/2], Iter [1449/3125], train_loss:0.132875
Epoch [1/2], Iter [1450/3125], train_loss:0.132393
Epoch [1/2], Iter [1451/3125], train_loss:0.135795
Epoch [1/2], Iter [1452/3125], train_loss:0.125536
Epoch [1/2], Iter [1453/3125], train_loss:0.126423
Epoch [1/2], Iter [1454/3125], train_loss:0.092239
Epoch [1/2], Iter [1455/3125], train_loss:0.154004
Epoch [1/2], Iter [1456/3125], train_loss:0.111715
Epoch [1/2], Iter [1457/3125], train_loss:0.128267
Epoch [1/2], Iter [1458/3125], train_loss:0.131167
Epoch [1/2], Iter [1459/3125], train_loss:0.122671
Epoch [1/2], Iter [1460/3125], train_loss:0.140966
Epoch [1/2], Iter [1461/3125], train_loss:0.114198
Epoch [1/2], Iter [1462/3125], train_loss:0.129094
Epoch [1/2], Iter [1463/3125], train_loss:0.109807
Epoch [1/2], Iter [1464/3125], train_loss:0.146480
Epoch [1/2], Iter [1465/3125], train_loss:0.105395
Epoch [1/2], Iter [1466/3125], train_loss:0.133418
Epoch [1/2], Iter [1467/3125], train_loss:0.131397
Epoch [1/2], Iter [1468/3125], train_loss:0.116122
Epoch [1/2], Iter [1469/3125], train_loss:0.114184
Epoch [1/2], Iter [1470/3125], train_loss:0.086669
Epoch [1/2], Iter [1471/3125], train_loss:0.098426
Epoch [1/2], Iter [1472/3125], train_loss:0.143860
Epoch [1/2], Iter [1473/3125], train_loss:0.109508
Epoch [1/2], Iter [1474/3125], train_loss:0.099417
Epoch [1/2], Iter [1475/3125], train_loss:0.137157
Epoch [1/2], Iter [1476/3125], train_loss:0.129953
Epoch [1/2], Iter [1477/3125], train_loss:0.112809
Epoch [1/2], Iter [1478/3125], train_loss:0.113120
Epoch [1/2], Iter [1479/3125], train_loss:0.090743
Epoch [1/2], Iter [1480/3125], train_loss:0.129271
Epoch [1/2], Iter [1481/3125], train_loss:0.137313
Epoch [1/2], Iter [1482/3125], train_loss:0.108650
Epoch [1/2], Iter [1483/3125], train_loss:0.137887
Epoch [1/2], Iter [1484/3125], train_loss:0.117343
Epoch [1/2], Iter [1485/3125], train_loss:0.114352
Epoch [1/2], Iter [1486/3125], train_loss:0.101056
Epoch [1/2], Iter [1487/3125], train_loss:0.120009
Epoch [1/2], Iter [1488/3125], train_loss:0.122330
Epoch [1/2], Iter [1489/3125], train_loss:0.117299
Epoch [1/2], Iter [1490/3125], train_loss:0.108325
Epoch [1/2], Iter [1491/3125], train_loss:0.119696
Epoch [1/2], Iter [1492/3125], train_loss:0.155192
Epoch [1/2], Iter [1493/3125], train_loss:0.134578
Epoch [1/2], Iter [1494/3125], train_loss:0.114686
Epoch [1/2], Iter [1495/3125], train_loss:0.143138
Epoch [1/2], Iter [1496/3125], train_loss:0.098434
Epoch [1/2], Iter [1497/3125], train_loss:0.085917
Epoch [1/2], Iter [1498/3125], train_loss:0.115986
Epoch [1/2], Iter [1499/3125], train_loss:0.142638
Epoch [1/2], Iter [1500/3125], train_loss:0.137145
Epoch [1/2], Iter [1501/3125], train_loss:0.097649
Epoch [1/2], Iter [1502/3125], train_loss:0.114596
Epoch [1/2], Iter [1503/3125], train_loss:0.114260
Epoch [1/2], Iter [1504/3125], train_loss:0.109256
Epoch [1/2], Iter [1505/3125], train_loss:0.116249
Epoch [1/2], Iter [1506/3125], train_loss:0.117468
Epoch [1/2], Iter [1507/3125], train_loss:0.106030
Epoch [1/2], Iter [1508/3125], train_loss:0.125583
Epoch [1/2], Iter [1509/3125], train_loss:0.126954
Epoch [1/2], Iter [1510/3125], train_loss:0.105045
Epoch [1/2], Iter [1511/3125], train_loss:0.091526
Epoch [1/2], Iter [1512/3125], train_loss:0.110302
Epoch [1/2], Iter [1513/3125], train_loss:0.106257
Epoch [1/2], Iter [1514/3125], train_loss:0.089856
Epoch [1/2], Iter [1515/3125], train_loss:0.122390
Epoch [1/2], Iter [1516/3125], train_loss:0.148043
Epoch [1/2], Iter [1517/3125], train_loss:0.089684
Epoch [1/2], Iter [1518/3125], train_loss:0.126691
Epoch [1/2], Iter [1519/3125], train_loss:0.093548
Epoch [1/2], Iter [1520/3125], train_loss:0.112327
Epoch [1/2], Iter [1521/3125], train_loss:0.128736
Epoch [1/2], Iter [1522/3125], train_loss:0.141749
Epoch [1/2], Iter [1523/3125], train_loss:0.095694
Epoch [1/2], Iter [1524/3125], train_loss:0.126285
Epoch [1/2], Iter [1525/3125], train_loss:0.117021
Epoch [1/2], Iter [1526/3125], train_loss:0.120626
Epoch [1/2], Iter [1527/3125], train_loss:0.118179
Epoch [1/2], Iter [1528/3125], train_loss:0.129668
Epoch [1/2], Iter [1529/3125], train_loss:0.103961
Epoch [1/2], Iter [1530/3125], train_loss:0.096230
Epoch [1/2], Iter [1531/3125], train_loss:0.155981
Epoch [1/2], Iter [1532/3125], train_loss:0.112469
Epoch [1/2], Iter [1533/3125], train_loss:0.116868
Epoch [1/2], Iter [1534/3125], train_loss:0.137747
Epoch [1/2], Iter [1535/3125], train_loss:0.098376
Epoch [1/2], Iter [1536/3125], train_loss:0.104237
Epoch [1/2], Iter [1537/3125], train_loss:0.135685
Epoch [1/2], Iter [1538/3125], train_loss:0.077748
Epoch [1/2], Iter [1539/3125], train_loss:0.110037
Epoch [1/2], Iter [1540/3125], train_loss:0.091916
Epoch [1/2], Iter [1541/3125], train_loss:0.094626
Epoch [1/2], Iter [1542/3125], train_loss:0.103348
Epoch [1/2], Iter [1543/3125], train_loss:0.086694
Epoch [1/2], Iter [1544/3125], train_loss:0.106981
Epoch [1/2], Iter [1545/3125], train_loss:0.105662
Epoch [1/2], Iter [1546/3125], train_loss:0.117666
Epoch [1/2], Iter [1547/3125], train_loss:0.085815
Epoch [1/2], Iter [1548/3125], train_loss:0.127396
Epoch [1/2], Iter [1549/3125], train_loss:0.126074
Epoch [1/2], Iter [1550/3125], train_loss:0.095834
Epoch [1/2], Iter [1551/3125], train_loss:0.107446
Epoch [1/2], Iter [1552/3125], train_loss:0.114715
Epoch [1/2], Iter [1553/3125], train_loss:0.098569
Epoch [1/2], Iter [1554/3125], train_loss:0.110418
Epoch [1/2], Iter [1555/3125], train_loss:0.134563
Epoch [1/2], Iter [1556/3125], train_loss:0.108616
Epoch [1/2], Iter [1557/3125], train_loss:0.100360
Epoch [1/2], Iter [1558/3125], train_loss:0.117380
Epoch [1/2], Iter [1559/3125], train_loss:0.117120
Epoch [1/2], Iter [1560/3125], train_loss:0.136910
Epoch [1/2], Iter [1561/3125], train_loss:0.107711
Epoch [1/2], Iter [1562/3125], train_loss:0.117605
Epoch [1/2], Iter [1563/3125], train_loss:0.102154
Epoch [1/2], Iter [1564/3125], train_loss:0.108402
Epoch [1/2], Iter [1565/3125], train_loss:0.093580
Epoch [1/2], Iter [1566/3125], train_loss:0.135590
Epoch [1/2], Iter [1567/3125], train_loss:0.099009
Epoch [1/2], Iter [1568/3125], train_loss:0.121854
Epoch [1/2], Iter [1569/3125], train_loss:0.109978
Epoch [1/2], Iter [1570/3125], train_loss:0.122701
Epoch [1/2], Iter [1571/3125], train_loss:0.114001
Epoch [1/2], Iter [1572/3125], train_loss:0.130748
Epoch [1/2], Iter [1573/3125], train_loss:0.114292
Epoch [1/2], Iter [1574/3125], train_loss:0.124781
Epoch [1/2], Iter [1575/3125], train_loss:0.138773
Epoch [1/2], Iter [1576/3125], train_loss:0.131097
Epoch [1/2], Iter [1577/3125], train_loss:0.105329
Epoch [1/2], Iter [1578/3125], train_loss:0.114761
Epoch [1/2], Iter [1579/3125], train_loss:0.094465
Epoch [1/2], Iter [1580/3125], train_loss:0.111704
Epoch [1/2], Iter [1581/3125], train_loss:0.140406
Epoch [1/2], Iter [1582/3125], train_loss:0.102851
Epoch [1/2], Iter [1583/3125], train_loss:0.106198
Epoch [1/2], Iter [1584/3125], train_loss:0.120307
Epoch [1/2], Iter [1585/3125], train_loss:0.126306
Epoch [1/2], Iter [1586/3125], train_loss:0.123201
Epoch [1/2], Iter [1587/3125], train_loss:0.100626
Epoch [1/2], Iter [1588/3125], train_loss:0.120522
Epoch [1/2], Iter [1589/3125], train_loss:0.109287
Epoch [1/2], Iter [1590/3125], train_loss:0.116193
Epoch [1/2], Iter [1591/3125], train_loss:0.100414
Epoch [1/2], Iter [1592/3125], train_loss:0.117426
Epoch [1/2], Iter [1593/3125], train_loss:0.090667
Epoch [1/2], Iter [1594/3125], train_loss:0.096649
Epoch [1/2], Iter [1595/3125], train_loss:0.124549
Epoch [1/2], Iter [1596/3125], train_loss:0.158632
Epoch [1/2], Iter [1597/3125], train_loss:0.126395
Epoch [1/2], Iter [1598/3125], train_loss:0.103779
Epoch [1/2], Iter [1599/3125], train_loss:0.114746
Epoch [1/2], Iter [1600/3125], train_loss:0.123276
Epoch [1/2], Iter [1601/3125], train_loss:0.097323
Epoch [1/2], Iter [1602/3125], train_loss:0.097028
Epoch [1/2], Iter [1603/3125], train_loss:0.136745
Epoch [1/2], Iter [1604/3125], train_loss:0.115201
Epoch [1/2], Iter [1605/3125], train_loss:0.107482
Epoch [1/2], Iter [1606/3125], train_loss:0.085949
Epoch [1/2], Iter [1607/3125], train_loss:0.130795
Epoch [1/2], Iter [1608/3125], train_loss:0.122182
Epoch [1/2], Iter [1609/3125], train_loss:0.122975
Epoch [1/2], Iter [1610/3125], train_loss:0.123023
Epoch [1/2], Iter [1611/3125], train_loss:0.143675
Epoch [1/2], Iter [1612/3125], train_loss:0.108047
Epoch [1/2], Iter [1613/3125], train_loss:0.114930
Epoch [1/2], Iter [1614/3125], train_loss:0.105145
Epoch [1/2], Iter [1615/3125], train_loss:0.141871
Epoch [1/2], Iter [1616/3125], train_loss:0.109234
Epoch [1/2], Iter [1617/3125], train_loss:0.115216
Epoch [1/2], Iter [1618/3125], train_loss:0.081389
Epoch [1/2], Iter [1619/3125], train_loss:0.099080
Epoch [1/2], Iter [1620/3125], train_loss:0.102463
Epoch [1/2], Iter [1621/3125], train_loss:0.108137
Epoch [1/2], Iter [1622/3125], train_loss:0.098112
Epoch [1/2], Iter [1623/3125], train_loss:0.114499
Epoch [1/2], Iter [1624/3125], train_loss:0.102529
Epoch [1/2], Iter [1625/3125], train_loss:0.128080
Epoch [1/2], Iter [1626/3125], train_loss:0.109938
Epoch [1/2], Iter [1627/3125], train_loss:0.097465
Epoch [1/2], Iter [1628/3125], train_loss:0.112853
Epoch [1/2], Iter [1629/3125], train_loss:0.087902
Epoch [1/2], Iter [1630/3125], train_loss:0.111491
Epoch [1/2], Iter [1631/3125], train_loss:0.107459
Epoch [1/2], Iter [1632/3125], train_loss:0.101524
Epoch [1/2], Iter [1633/3125], train_loss:0.117303
Epoch [1/2], Iter [1634/3125], train_loss:0.136640
Epoch [1/2], Iter [1635/3125], train_loss:0.104045
Epoch [1/2], Iter [1636/3125], train_loss:0.098606
Epoch [1/2], Iter [1637/3125], train_loss:0.109633
Epoch [1/2], Iter [1638/3125], train_loss:0.120075
Epoch [1/2], Iter [1639/3125], train_loss:0.140995
Epoch [1/2], Iter [1640/3125], train_loss:0.105396
Epoch [1/2], Iter [1641/3125], train_loss:0.114681
Epoch [1/2], Iter [1642/3125], train_loss:0.093426
Epoch [1/2], Iter [1643/3125], train_loss:0.108103
Epoch [1/2], Iter [1644/3125], train_loss:0.131016
Epoch [1/2], Iter [1645/3125], train_loss:0.133334
Epoch [1/2], Iter [1646/3125], train_loss:0.076322
Epoch [1/2], Iter [1647/3125], train_loss:0.104391
Epoch [1/2], Iter [1648/3125], train_loss:0.133650
Epoch [1/2], Iter [1649/3125], train_loss:0.117201
Epoch [1/2], Iter [1650/3125], train_loss:0.095546
Epoch [1/2], Iter [1651/3125], train_loss:0.112587
Epoch [1/2], Iter [1652/3125], train_loss:0.106575
Epoch [1/2], Iter [1653/3125], train_loss:0.079604
Epoch [1/2], Iter [1654/3125], train_loss:0.098822
Epoch [1/2], Iter [1655/3125], train_loss:0.094789
Epoch [1/2], Iter [1656/3125], train_loss:0.148320
Epoch [1/2], Iter [1657/3125], train_loss:0.123790
Epoch [1/2], Iter [1658/3125], train_loss:0.106912
Epoch [1/2], Iter [1659/3125], train_loss:0.109952
Epoch [1/2], Iter [1660/3125], train_loss:0.131533
Epoch [1/2], Iter [1661/3125], train_loss:0.123524
Epoch [1/2], Iter [1662/3125], train_loss:0.134478
Epoch [1/2], Iter [1663/3125], train_loss:0.127586
Epoch [1/2], Iter [1664/3125], train_loss:0.121722
Epoch [1/2], Iter [1665/3125], train_loss:0.139394
Epoch [1/2], Iter [1666/3125], train_loss:0.096734
Epoch [1/2], Iter [1667/3125], train_loss:0.092901
Epoch [1/2], Iter [1668/3125], train_loss:0.111155
Epoch [1/2], Iter [1669/3125], train_loss:0.118388
Epoch [1/2], Iter [1670/3125], train_loss:0.100780
Epoch [1/2], Iter [1671/3125], train_loss:0.109779
Epoch [1/2], Iter [1672/3125], train_loss:0.131700
Epoch [1/2], Iter [1673/3125], train_loss:0.141507
Epoch [1/2], Iter [1674/3125], train_loss:0.109175
Epoch [1/2], Iter [1675/3125], train_loss:0.092189
Epoch [1/2], Iter [1676/3125], train_loss:0.101953
Epoch [1/2], Iter [1677/3125], train_loss:0.133398
Epoch [1/2], Iter [1678/3125], train_loss:0.141626
Epoch [1/2], Iter [1679/3125], train_loss:0.106853
Epoch [1/2], Iter [1680/3125], train_loss:0.111855
Epoch [1/2], Iter [1681/3125], train_loss:0.113937
Epoch [1/2], Iter [1682/3125], train_loss:0.105170
Epoch [1/2], Iter [1683/3125], train_loss:0.100870
Epoch [1/2], Iter [1684/3125], train_loss:0.117880
Epoch [1/2], Iter [1685/3125], train_loss:0.092299
Epoch [1/2], Iter [1686/3125], train_loss:0.108514
Epoch [1/2], Iter [1687/3125], train_loss:0.091988
Epoch [1/2], Iter [1688/3125], train_loss:0.142538
Epoch [1/2], Iter [1689/3125], train_loss:0.109092
Epoch [1/2], Iter [1690/3125], train_loss:0.119447
Epoch [1/2], Iter [1691/3125], train_loss:0.091529
Epoch [1/2], Iter [1692/3125], train_loss:0.113592
Epoch [1/2], Iter [1693/3125], train_loss:0.138641
Epoch [1/2], Iter [1694/3125], train_loss:0.081737
Epoch [1/2], Iter [1695/3125], train_loss:0.104201
Epoch [1/2], Iter [1696/3125], train_loss:0.130549
Epoch [1/2], Iter [1697/3125], train_loss:0.108230
Epoch [1/2], Iter [1698/3125], train_loss:0.123517
Epoch [1/2], Iter [1699/3125], train_loss:0.105155
Epoch [1/2], Iter [1700/3125], train_loss:0.099825
Epoch [1/2], Iter [1701/3125], train_loss:0.119471
Epoch [1/2], Iter [1702/3125], train_loss:0.102020
Epoch [1/2], Iter [1703/3125], train_loss:0.125723
Epoch [1/2], Iter [1704/3125], train_loss:0.117470
Epoch [1/2], Iter [1705/3125], train_loss:0.171311
Epoch [1/2], Iter [1706/3125], train_loss:0.113500
Epoch [1/2], Iter [1707/3125], train_loss:0.101780
Epoch [1/2], Iter [1708/3125], train_loss:0.097162
Epoch [1/2], Iter [1709/3125], train_loss:0.113087
Epoch [1/2], Iter [1710/3125], train_loss:0.121180
Epoch [1/2], Iter [1711/3125], train_loss:0.140923
Epoch [1/2], Iter [1712/3125], train_loss:0.130363
Epoch [1/2], Iter [1713/3125], train_loss:0.120499
Epoch [1/2], Iter [1714/3125], train_loss:0.129576
Epoch [1/2], Iter [1715/3125], train_loss:0.122925
Epoch [1/2], Iter [1716/3125], train_loss:0.107934
Epoch [1/2], Iter [1717/3125], train_loss:0.137756
Epoch [1/2], Iter [1718/3125], train_loss:0.118472
Epoch [1/2], Iter [1719/3125], train_loss:0.102445
Epoch [1/2], Iter [1720/3125], train_loss:0.102518
Epoch [1/2], Iter [1721/3125], train_loss:0.139594
Epoch [1/2], Iter [1722/3125], train_loss:0.097171
Epoch [1/2], Iter [1723/3125], train_loss:0.096021
Epoch [1/2], Iter [1724/3125], train_loss:0.111021
Epoch [1/2], Iter [1725/3125], train_loss:0.109239
Epoch [1/2], Iter [1726/3125], train_loss:0.095762
Epoch [1/2], Iter [1727/3125], train_loss:0.098066
Epoch [1/2], Iter [1728/3125], train_loss:0.116896
Epoch [1/2], Iter [1729/3125], train_loss:0.115975
Epoch [1/2], Iter [1730/3125], train_loss:0.124496
Epoch [1/2], Iter [1731/3125], train_loss:0.123490
Epoch [1/2], Iter [1732/3125], train_loss:0.104479
Epoch [1/2], Iter [1733/3125], train_loss:0.113522
Epoch [1/2], Iter [1734/3125], train_loss:0.103710
Epoch [1/2], Iter [1735/3125], train_loss:0.102665
Epoch [1/2], Iter [1736/3125], train_loss:0.085018
Epoch [1/2], Iter [1737/3125], train_loss:0.100424
Epoch [1/2], Iter [1738/3125], train_loss:0.127958
Epoch [1/2], Iter [1739/3125], train_loss:0.116772
Epoch [1/2], Iter [1740/3125], train_loss:0.112261
Epoch [1/2], Iter [1741/3125], train_loss:0.098929
Epoch [1/2], Iter [1742/3125], train_loss:0.128234
Epoch [1/2], Iter [1743/3125], train_loss:0.090779
Epoch [1/2], Iter [1744/3125], train_loss:0.122256
Epoch [1/2], Iter [1745/3125], train_loss:0.120534
Epoch [1/2], Iter [1746/3125], train_loss:0.097334
Epoch [1/2], Iter [1747/3125], train_loss:0.123642
Epoch [1/2], Iter [1748/3125], train_loss:0.123044
Epoch [1/2], Iter [1749/3125], train_loss:0.106322
Epoch [1/2], Iter [1750/3125], train_loss:0.097880
Epoch [1/2], Iter [1751/3125], train_loss:0.166705
Epoch [1/2], Iter [1752/3125], train_loss:0.134495
Epoch [1/2], Iter [1753/3125], train_loss:0.145566
Epoch [1/2], Iter [1754/3125], train_loss:0.121603
Epoch [1/2], Iter [1755/3125], train_loss:0.130360
Epoch [1/2], Iter [1756/3125], train_loss:0.111613
Epoch [1/2], Iter [1757/3125], train_loss:0.115353
Epoch [1/2], Iter [1758/3125], train_loss:0.118099
Epoch [1/2], Iter [1759/3125], train_loss:0.132821
Epoch [1/2], Iter [1760/3125], train_loss:0.121610
Epoch [1/2], Iter [1761/3125], train_loss:0.108354
Epoch [1/2], Iter [1762/3125], train_loss:0.109593
Epoch [1/2], Iter [1763/3125], train_loss:0.087590
Epoch [1/2], Iter [1764/3125], train_loss:0.122846
Epoch [1/2], Iter [1765/3125], train_loss:0.121044
Epoch [1/2], Iter [1766/3125], train_loss:0.117222
Epoch [1/2], Iter [1767/3125], train_loss:0.105632
Epoch [1/2], Iter [1768/3125], train_loss:0.082365
Epoch [1/2], Iter [1769/3125], train_loss:0.125430
Epoch [1/2], Iter [1770/3125], train_loss:0.122826
Epoch [1/2], Iter [1771/3125], train_loss:0.116514
Epoch [1/2], Iter [1772/3125], train_loss:0.119358
Epoch [1/2], Iter [1773/3125], train_loss:0.116099
Epoch [1/2], Iter [1774/3125], train_loss:0.136565
Epoch [1/2], Iter [1775/3125], train_loss:0.105898
Epoch [1/2], Iter [1776/3125], train_loss:0.090921
Epoch [1/2], Iter [1777/3125], train_loss:0.117271
Epoch [1/2], Iter [1778/3125], train_loss:0.098961
Epoch [1/2], Iter [1779/3125], train_loss:0.080819
Epoch [1/2], Iter [1780/3125], train_loss:0.081426
Epoch [1/2], Iter [1781/3125], train_loss:0.093929
Epoch [1/2], Iter [1782/3125], train_loss:0.117402
Epoch [1/2], Iter [1783/3125], train_loss:0.095223
Epoch [1/2], Iter [1784/3125], train_loss:0.120733
Epoch [1/2], Iter [1785/3125], train_loss:0.098692
Epoch [1/2], Iter [1786/3125], train_loss:0.115689
Epoch [1/2], Iter [1787/3125], train_loss:0.113889
Epoch [1/2], Iter [1788/3125], train_loss:0.089751
Epoch [1/2], Iter [1789/3125], train_loss:0.109842
Epoch [1/2], Iter [1790/3125], train_loss:0.089839
Epoch [1/2], Iter [1791/3125], train_loss:0.143017
Epoch [1/2], Iter [1792/3125], train_loss:0.122177
Epoch [1/2], Iter [1793/3125], train_loss:0.088301
Epoch [1/2], Iter [1794/3125], train_loss:0.116527
Epoch [1/2], Iter [1795/3125], train_loss:0.089206
Epoch [1/2], Iter [1796/3125], train_loss:0.108409
Epoch [1/2], Iter [1797/3125], train_loss:0.095537
Epoch [1/2], Iter [1798/3125], train_loss:0.100983
Epoch [1/2], Iter [1799/3125], train_loss:0.112310
Epoch [1/2], Iter [1800/3125], train_loss:0.105625
Epoch [1/2], Iter [1801/3125], train_loss:0.106045
Epoch [1/2], Iter [1802/3125], train_loss:0.118067
Epoch [1/2], Iter [1803/3125], train_loss:0.103582
Epoch [1/2], Iter [1804/3125], train_loss:0.083729
Epoch [1/2], Iter [1805/3125], train_loss:0.133233
Epoch [1/2], Iter [1806/3125], train_loss:0.100614
Epoch [1/2], Iter [1807/3125], train_loss:0.102098
Epoch [1/2], Iter [1808/3125], train_loss:0.094543
Epoch [1/2], Iter [1809/3125], train_loss:0.120425
Epoch [1/2], Iter [1810/3125], train_loss:0.121749
Epoch [1/2], Iter [1811/3125], train_loss:0.094081
Epoch [1/2], Iter [1812/3125], train_loss:0.125282
Epoch [1/2], Iter [1813/3125], train_loss:0.092221
Epoch [1/2], Iter [1814/3125], train_loss:0.120117
Epoch [1/2], Iter [1815/3125], train_loss:0.111955
Epoch [1/2], Iter [1816/3125], train_loss:0.108735
Epoch [1/2], Iter [1817/3125], train_loss:0.123501
Epoch [1/2], Iter [1818/3125], train_loss:0.087921
Epoch [1/2], Iter [1819/3125], train_loss:0.121578
Epoch [1/2], Iter [1820/3125], train_loss:0.111834
Epoch [1/2], Iter [1821/3125], train_loss:0.128368
Epoch [1/2], Iter [1822/3125], train_loss:0.111813
Epoch [1/2], Iter [1823/3125], train_loss:0.141893
Epoch [1/2], Iter [1824/3125], train_loss:0.097122
Epoch [1/2], Iter [1825/3125], train_loss:0.104660
Epoch [1/2], Iter [1826/3125], train_loss:0.151332
Epoch [1/2], Iter [1827/3125], train_loss:0.100946
Epoch [1/2], Iter [1828/3125], train_loss:0.121244
Epoch [1/2], Iter [1829/3125], train_loss:0.104100
Epoch [1/2], Iter [1830/3125], train_loss:0.087686
Epoch [1/2], Iter [1831/3125], train_loss:0.111758
Epoch [1/2], Iter [1832/3125], train_loss:0.084322
Epoch [1/2], Iter [1833/3125], train_loss:0.099852
Epoch [1/2], Iter [1834/3125], train_loss:0.107632
Epoch [1/2], Iter [1835/3125], train_loss:0.134178
Epoch [1/2], Iter [1836/3125], train_loss:0.084126
Epoch [1/2], Iter [1837/3125], train_loss:0.118831
Epoch [1/2], Iter [1838/3125], train_loss:0.118193
Epoch [1/2], Iter [1839/3125], train_loss:0.102403
Epoch [1/2], Iter [1840/3125], train_loss:0.119499
Epoch [1/2], Iter [1841/3125], train_loss:0.089647
Epoch [1/2], Iter [1842/3125], train_loss:0.123974
Epoch [1/2], Iter [1843/3125], train_loss:0.103928
Epoch [1/2], Iter [1844/3125], train_loss:0.085205
Epoch [1/2], Iter [1845/3125], train_loss:0.098993
Epoch [1/2], Iter [1846/3125], train_loss:0.088542
Epoch [1/2], Iter [1847/3125], train_loss:0.090588
Epoch [1/2], Iter [1848/3125], train_loss:0.129216
Epoch [1/2], Iter [1849/3125], train_loss:0.124849
Epoch [1/2], Iter [1850/3125], train_loss:0.115883
Epoch [1/2], Iter [1851/3125], train_loss:0.100992
Epoch [1/2], Iter [1852/3125], train_loss:0.099127
Epoch [1/2], Iter [1853/3125], train_loss:0.108038
Epoch [1/2], Iter [1854/3125], train_loss:0.106039
Epoch [1/2], Iter [1855/3125], train_loss:0.107693
Epoch [1/2], Iter [1856/3125], train_loss:0.122102
Epoch [1/2], Iter [1857/3125], train_loss:0.065592
Epoch [1/2], Iter [1858/3125], train_loss:0.089284
Epoch [1/2], Iter [1859/3125], train_loss:0.128695
Epoch [1/2], Iter [1860/3125], train_loss:0.106631
Epoch [1/2], Iter [1861/3125], train_loss:0.093396
Epoch [1/2], Iter [1862/3125], train_loss:0.102988
Epoch [1/2], Iter [1863/3125], train_loss:0.107683
Epoch [1/2], Iter [1864/3125], train_loss:0.099660
Epoch [1/2], Iter [1865/3125], train_loss:0.116378
Epoch [1/2], Iter [1866/3125], train_loss:0.116871
Epoch [1/2], Iter [1867/3125], train_loss:0.127018
Epoch [1/2], Iter [1868/3125], train_loss:0.110150
Epoch [1/2], Iter [1869/3125], train_loss:0.138162
Epoch [1/2], Iter [1870/3125], train_loss:0.120097
Epoch [1/2], Iter [1871/3125], train_loss:0.089983
Epoch [1/2], Iter [1872/3125], train_loss:0.115508
Epoch [1/2], Iter [1873/3125], train_loss:0.110952
Epoch [1/2], Iter [1874/3125], train_loss:0.102631
Epoch [1/2], Iter [1875/3125], train_loss:0.117026
Epoch [1/2], Iter [1876/3125], train_loss:0.095122
Epoch [1/2], Iter [1877/3125], train_loss:0.121551
Epoch [1/2], Iter [1878/3125], train_loss:0.124627
Epoch [1/2], Iter [1879/3125], train_loss:0.108700
Epoch [1/2], Iter [1880/3125], train_loss:0.106096
Epoch [1/2], Iter [1881/3125], train_loss:0.073590
Epoch [1/2], Iter [1882/3125], train_loss:0.105583
Epoch [1/2], Iter [1883/3125], train_loss:0.105383
Epoch [1/2], Iter [1884/3125], train_loss:0.143912
Epoch [1/2], Iter [1885/3125], train_loss:0.116281
Epoch [1/2], Iter [1886/3125], train_loss:0.127088
Epoch [1/2], Iter [1887/3125], train_loss:0.110158
Epoch [1/2], Iter [1888/3125], train_loss:0.098516
Epoch [1/2], Iter [1889/3125], train_loss:0.099668
Epoch [1/2], Iter [1890/3125], train_loss:0.096417
Epoch [1/2], Iter [1891/3125], train_loss:0.119125
Epoch [1/2], Iter [1892/3125], train_loss:0.104781
Epoch [1/2], Iter [1893/3125], train_loss:0.101876
Epoch [1/2], Iter [1894/3125], train_loss:0.106831
Epoch [1/2], Iter [1895/3125], train_loss:0.107553
Epoch [1/2], Iter [1896/3125], train_loss:0.109665
Epoch [1/2], Iter [1897/3125], train_loss:0.110008
Epoch [1/2], Iter [1898/3125], train_loss:0.108660
Epoch [1/2], Iter [1899/3125], train_loss:0.110264
Epoch [1/2], Iter [1900/3125], train_loss:0.152644
Epoch [1/2], Iter [1901/3125], train_loss:0.117720
Epoch [1/2], Iter [1902/3125], train_loss:0.146421
Epoch [1/2], Iter [1903/3125], train_loss:0.123149
Epoch [1/2], Iter [1904/3125], train_loss:0.095981
Epoch [1/2], Iter [1905/3125], train_loss:0.085133
Epoch [1/2], Iter [1906/3125], train_loss:0.089243
Epoch [1/2], Iter [1907/3125], train_loss:0.093153
Epoch [1/2], Iter [1908/3125], train_loss:0.106806
Epoch [1/2], Iter [1909/3125], train_loss:0.089167
Epoch [1/2], Iter [1910/3125], train_loss:0.130021
Epoch [1/2], Iter [1911/3125], train_loss:0.085724
Epoch [1/2], Iter [1912/3125], train_loss:0.085494
Epoch [1/2], Iter [1913/3125], train_loss:0.109272
Epoch [1/2], Iter [1914/3125], train_loss:0.102889
Epoch [1/2], Iter [1915/3125], train_loss:0.101257
Epoch [1/2], Iter [1916/3125], train_loss:0.122897
Epoch [1/2], Iter [1917/3125], train_loss:0.094979
Epoch [1/2], Iter [1918/3125], train_loss:0.087800
Epoch [1/2], Iter [1919/3125], train_loss:0.113957
Epoch [1/2], Iter [1920/3125], train_loss:0.120947
Epoch [1/2], Iter [1921/3125], train_loss:0.134248
Epoch [1/2], Iter [1922/3125], train_loss:0.120839
Epoch [1/2], Iter [1923/3125], train_loss:0.097012
Epoch [1/2], Iter [1924/3125], train_loss:0.095889
Epoch [1/2], Iter [1925/3125], train_loss:0.122343
Epoch [1/2], Iter [1926/3125], train_loss:0.110138
Epoch [1/2], Iter [1927/3125], train_loss:0.117822
Epoch [1/2], Iter [1928/3125], train_loss:0.149388
Epoch [1/2], Iter [1929/3125], train_loss:0.126594
Epoch [1/2], Iter [1930/3125], train_loss:0.119148
Epoch [1/2], Iter [1931/3125], train_loss:0.131302
Epoch [1/2], Iter [1932/3125], train_loss:0.113817
Epoch [1/2], Iter [1933/3125], train_loss:0.104843
Epoch [1/2], Iter [1934/3125], train_loss:0.102203
Epoch [1/2], Iter [1935/3125], train_loss:0.104758
Epoch [1/2], Iter [1936/3125], train_loss:0.084807
Epoch [1/2], Iter [1937/3125], train_loss:0.103213
Epoch [1/2], Iter [1938/3125], train_loss:0.118753
Epoch [1/2], Iter [1939/3125], train_loss:0.085714
Epoch [1/2], Iter [1940/3125], train_loss:0.075405
Epoch [1/2], Iter [1941/3125], train_loss:0.111731
Epoch [1/2], Iter [1942/3125], train_loss:0.137009
Epoch [1/2], Iter [1943/3125], train_loss:0.106555
Epoch [1/2], Iter [1944/3125], train_loss:0.137298
Epoch [1/2], Iter [1945/3125], train_loss:0.130962
Epoch [1/2], Iter [1946/3125], train_loss:0.128386
Epoch [1/2], Iter [1947/3125], train_loss:0.118504
Epoch [1/2], Iter [1948/3125], train_loss:0.072771
Epoch [1/2], Iter [1949/3125], train_loss:0.086928
Epoch [1/2], Iter [1950/3125], train_loss:0.123281
Epoch [1/2], Iter [1951/3125], train_loss:0.099254
Epoch [1/2], Iter [1952/3125], train_loss:0.127203
Epoch [1/2], Iter [1953/3125], train_loss:0.135958
Epoch [1/2], Iter [1954/3125], train_loss:0.105019
Epoch [1/2], Iter [1955/3125], train_loss:0.141218
Epoch [1/2], Iter [1956/3125], train_loss:0.086414
Epoch [1/2], Iter [1957/3125], train_loss:0.122000
Epoch [1/2], Iter [1958/3125], train_loss:0.108958
Epoch [1/2], Iter [1959/3125], train_loss:0.109269
Epoch [1/2], Iter [1960/3125], train_loss:0.106017
Epoch [1/2], Iter [1961/3125], train_loss:0.107679
Epoch [1/2], Iter [1962/3125], train_loss:0.114157
Epoch [1/2], Iter [1963/3125], train_loss:0.088606
Epoch [1/2], Iter [1964/3125], train_loss:0.104400
Epoch [1/2], Iter [1965/3125], train_loss:0.084936
Epoch [1/2], Iter [1966/3125], train_loss:0.112303
Epoch [1/2], Iter [1967/3125], train_loss:0.101845
Epoch [1/2], Iter [1968/3125], train_loss:0.118825
Epoch [1/2], Iter [1969/3125], train_loss:0.121779
Epoch [1/2], Iter [1970/3125], train_loss:0.074884
Epoch [1/2], Iter [1971/3125], train_loss:0.117793
Epoch [1/2], Iter [1972/3125], train_loss:0.090739
Epoch [1/2], Iter [1973/3125], train_loss:0.110963
Epoch [1/2], Iter [1974/3125], train_loss:0.139955
Epoch [1/2], Iter [1975/3125], train_loss:0.117716
Epoch [1/2], Iter [1976/3125], train_loss:0.111063
Epoch [1/2], Iter [1977/3125], train_loss:0.089905
Epoch [1/2], Iter [1978/3125], train_loss:0.091710
Epoch [1/2], Iter [1979/3125], train_loss:0.113500
Epoch [1/2], Iter [1980/3125], train_loss:0.085731
Epoch [1/2], Iter [1981/3125], train_loss:0.089114
Epoch [1/2], Iter [1982/3125], train_loss:0.073216
Epoch [1/2], Iter [1983/3125], train_loss:0.078870
Epoch [1/2], Iter [1984/3125], train_loss:0.117588
Epoch [1/2], Iter [1985/3125], train_loss:0.104458
Epoch [1/2], Iter [1986/3125], train_loss:0.108113
Epoch [1/2], Iter [1987/3125], train_loss:0.120712
Epoch [1/2], Iter [1988/3125], train_loss:0.108525
Epoch [1/2], Iter [1989/3125], train_loss:0.086377
Epoch [1/2], Iter [1990/3125], train_loss:0.094650
Epoch [1/2], Iter [1991/3125], train_loss:0.074587
Epoch [1/2], Iter [1992/3125], train_loss:0.099681
Epoch [1/2], Iter [1993/3125], train_loss:0.092766
Epoch [1/2], Iter [1994/3125], train_loss:0.112165
Epoch [1/2], Iter [1995/3125], train_loss:0.107683
Epoch [1/2], Iter [1996/3125], train_loss:0.103036
Epoch [1/2], Iter [1997/3125], train_loss:0.153432
Epoch [1/2], Iter [1998/3125], train_loss:0.096860
Epoch [1/2], Iter [1999/3125], train_loss:0.142768
Epoch [1/2], Iter [2000/3125], train_loss:0.081604
Epoch [1/2], Iter [2001/3125], train_loss:0.102904
Epoch [1/2], Iter [2002/3125], train_loss:0.147187
Epoch [1/2], Iter [2003/3125], train_loss:0.084077
Epoch [1/2], Iter [2004/3125], train_loss:0.120355
Epoch [1/2], Iter [2005/3125], train_loss:0.146324
Epoch [1/2], Iter [2006/3125], train_loss:0.086058
Epoch [1/2], Iter [2007/3125], train_loss:0.099165
Epoch [1/2], Iter [2008/3125], train_loss:0.129830
Epoch [1/2], Iter [2009/3125], train_loss:0.086155
Epoch [1/2], Iter [2010/3125], train_loss:0.100047
Epoch [1/2], Iter [2011/3125], train_loss:0.106366
Epoch [1/2], Iter [2012/3125], train_loss:0.135484
Epoch [1/2], Iter [2013/3125], train_loss:0.132166
Epoch [1/2], Iter [2014/3125], train_loss:0.130440
Epoch [1/2], Iter [2015/3125], train_loss:0.098773
Epoch [1/2], Iter [2016/3125], train_loss:0.126730
Epoch [1/2], Iter [2017/3125], train_loss:0.085111
Epoch [1/2], Iter [2018/3125], train_loss:0.129992
Epoch [1/2], Iter [2019/3125], train_loss:0.111593
Epoch [1/2], Iter [2020/3125], train_loss:0.091401
Epoch [1/2], Iter [2021/3125], train_loss:0.119698
Epoch [1/2], Iter [2022/3125], train_loss:0.122655
Epoch [1/2], Iter [2023/3125], train_loss:0.120993
Epoch [1/2], Iter [2024/3125], train_loss:0.094078
Epoch [1/2], Iter [2025/3125], train_loss:0.080260
Epoch [1/2], Iter [2026/3125], train_loss:0.076512
Epoch [1/2], Iter [2027/3125], train_loss:0.089733
Epoch [1/2], Iter [2028/3125], train_loss:0.109131
Epoch [1/2], Iter [2029/3125], train_loss:0.101117
Epoch [1/2], Iter [2030/3125], train_loss:0.135421
Epoch [1/2], Iter [2031/3125], train_loss:0.078282
Epoch [1/2], Iter [2032/3125], train_loss:0.120359
Epoch [1/2], Iter [2033/3125], train_loss:0.139398
Epoch [1/2], Iter [2034/3125], train_loss:0.131844
Epoch [1/2], Iter [2035/3125], train_loss:0.081854
Epoch [1/2], Iter [2036/3125], train_loss:0.105653
Epoch [1/2], Iter [2037/3125], train_loss:0.101963
Epoch [1/2], Iter [2038/3125], train_loss:0.093379
Epoch [1/2], Iter [2039/3125], train_loss:0.140933
Epoch [1/2], Iter [2040/3125], train_loss:0.096073
Epoch [1/2], Iter [2041/3125], train_loss:0.124154
Epoch [1/2], Iter [2042/3125], train_loss:0.118376
Epoch [1/2], Iter [2043/3125], train_loss:0.121481
Epoch [1/2], Iter [2044/3125], train_loss:0.106825
Epoch [1/2], Iter [2045/3125], train_loss:0.110553
Epoch [1/2], Iter [2046/3125], train_loss:0.104090
Epoch [1/2], Iter [2047/3125], train_loss:0.093030
Epoch [1/2], Iter [2048/3125], train_loss:0.156042
Epoch [1/2], Iter [2049/3125], train_loss:0.116730
Epoch [1/2], Iter [2050/3125], train_loss:0.115696
Epoch [1/2], Iter [2051/3125], train_loss:0.132308
Epoch [1/2], Iter [2052/3125], train_loss:0.120332
Epoch [1/2], Iter [2053/3125], train_loss:0.126321
Epoch [1/2], Iter [2054/3125], train_loss:0.096678
Epoch [1/2], Iter [2055/3125], train_loss:0.155123
Epoch [1/2], Iter [2056/3125], train_loss:0.114222
Epoch [1/2], Iter [2057/3125], train_loss:0.098100
Epoch [1/2], Iter [2058/3125], train_loss:0.106661
Epoch [1/2], Iter [2059/3125], train_loss:0.105753
Epoch [1/2], Iter [2060/3125], train_loss:0.103096
Epoch [1/2], Iter [2061/3125], train_loss:0.133311
Epoch [1/2], Iter [2062/3125], train_loss:0.092937
Epoch [1/2], Iter [2063/3125], train_loss:0.132458
Epoch [1/2], Iter [2064/3125], train_loss:0.129511
Epoch [1/2], Iter [2065/3125], train_loss:0.120730
Epoch [1/2], Iter [2066/3125], train_loss:0.134831
Epoch [1/2], Iter [2067/3125], train_loss:0.101766
Epoch [1/2], Iter [2068/3125], train_loss:0.128740
Epoch [1/2], Iter [2069/3125], train_loss:0.122405
Epoch [1/2], Iter [2070/3125], train_loss:0.128550
Epoch [1/2], Iter [2071/3125], train_loss:0.101930
Epoch [1/2], Iter [2072/3125], train_loss:0.102552
Epoch [1/2], Iter [2073/3125], train_loss:0.076610
Epoch [1/2], Iter [2074/3125], train_loss:0.112972
Epoch [1/2], Iter [2075/3125], train_loss:0.103952
Epoch [1/2], Iter [2076/3125], train_loss:0.109852
Epoch [1/2], Iter [2077/3125], train_loss:0.113322
Epoch [1/2], Iter [2078/3125], train_loss:0.102785
Epoch [1/2], Iter [2079/3125], train_loss:0.090778
Epoch [1/2], Iter [2080/3125], train_loss:0.095918
Epoch [1/2], Iter [2081/3125], train_loss:0.116575
Epoch [1/2], Iter [2082/3125], train_loss:0.100046
Epoch [1/2], Iter [2083/3125], train_loss:0.089715
Epoch [1/2], Iter [2084/3125], train_loss:0.122666
Epoch [1/2], Iter [2085/3125], train_loss:0.129613
Epoch [1/2], Iter [2086/3125], train_loss:0.076697
Epoch [1/2], Iter [2087/3125], train_loss:0.093357
Epoch [1/2], Iter [2088/3125], train_loss:0.142714
Epoch [1/2], Iter [2089/3125], train_loss:0.124514
Epoch [1/2], Iter [2090/3125], train_loss:0.087637
Epoch [1/2], Iter [2091/3125], train_loss:0.102257
Epoch [1/2], Iter [2092/3125], train_loss:0.086186
Epoch [1/2], Iter [2093/3125], train_loss:0.093041
Epoch [1/2], Iter [2094/3125], train_loss:0.106152
Epoch [1/2], Iter [2095/3125], train_loss:0.140916
Epoch [1/2], Iter [2096/3125], train_loss:0.102147
Epoch [1/2], Iter [2097/3125], train_loss:0.126739
Epoch [1/2], Iter [2098/3125], train_loss:0.112947
Epoch [1/2], Iter [2099/3125], train_loss:0.118916
Epoch [1/2], Iter [2100/3125], train_loss:0.092814
Epoch [1/2], Iter [2101/3125], train_loss:0.119752
Epoch [1/2], Iter [2102/3125], train_loss:0.076538
Epoch [1/2], Iter [2103/3125], train_loss:0.096270
Epoch [1/2], Iter [2104/3125], train_loss:0.091702
Epoch [1/2], Iter [2105/3125], train_loss:0.143978
Epoch [1/2], Iter [2106/3125], train_loss:0.111897
Epoch [1/2], Iter [2107/3125], train_loss:0.089556
Epoch [1/2], Iter [2108/3125], train_loss:0.109824
Epoch [1/2], Iter [2109/3125], train_loss:0.099092
Epoch [1/2], Iter [2110/3125], train_loss:0.097747
Epoch [1/2], Iter [2111/3125], train_loss:0.146931
Epoch [1/2], Iter [2112/3125], train_loss:0.127117
Epoch [1/2], Iter [2113/3125], train_loss:0.108730
Epoch [1/2], Iter [2114/3125], train_loss:0.095239
Epoch [1/2], Iter [2115/3125], train_loss:0.083379
Epoch [1/2], Iter [2116/3125], train_loss:0.090572
Epoch [1/2], Iter [2117/3125], train_loss:0.096028
Epoch [1/2], Iter [2118/3125], train_loss:0.096893
Epoch [1/2], Iter [2119/3125], train_loss:0.114034
Epoch [1/2], Iter [2120/3125], train_loss:0.124006
Epoch [1/2], Iter [2121/3125], train_loss:0.125319
Epoch [1/2], Iter [2122/3125], train_loss:0.093370
Epoch [1/2], Iter [2123/3125], train_loss:0.094484
Epoch [1/2], Iter [2124/3125], train_loss:0.117593
Epoch [1/2], Iter [2125/3125], train_loss:0.088641
Epoch [1/2], Iter [2126/3125], train_loss:0.100637
Epoch [1/2], Iter [2127/3125], train_loss:0.125044
Epoch [1/2], Iter [2128/3125], train_loss:0.102803
Epoch [1/2], Iter [2129/3125], train_loss:0.100716
Epoch [1/2], Iter [2130/3125], train_loss:0.100396
Epoch [1/2], Iter [2131/3125], train_loss:0.110038
Epoch [1/2], Iter [2132/3125], train_loss:0.085658
Epoch [1/2], Iter [2133/3125], train_loss:0.111865
Epoch [1/2], Iter [2134/3125], train_loss:0.098088
Epoch [1/2], Iter [2135/3125], train_loss:0.075679
Epoch [1/2], Iter [2136/3125], train_loss:0.132928
Epoch [1/2], Iter [2137/3125], train_loss:0.116856
Epoch [1/2], Iter [2138/3125], train_loss:0.135806
Epoch [1/2], Iter [2139/3125], train_loss:0.133636
Epoch [1/2], Iter [2140/3125], train_loss:0.112448
Epoch [1/2], Iter [2141/3125], train_loss:0.118290
Epoch [1/2], Iter [2142/3125], train_loss:0.098431
Epoch [1/2], Iter [2143/3125], train_loss:0.071897
Epoch [1/2], Iter [2144/3125], train_loss:0.112979
Epoch [1/2], Iter [2145/3125], train_loss:0.085164
Epoch [1/2], Iter [2146/3125], train_loss:0.128800
Epoch [1/2], Iter [2147/3125], train_loss:0.081725
Epoch [1/2], Iter [2148/3125], train_loss:0.082943
Epoch [1/2], Iter [2149/3125], train_loss:0.111667
Epoch [1/2], Iter [2150/3125], train_loss:0.115756
Epoch [1/2], Iter [2151/3125], train_loss:0.079601
Epoch [1/2], Iter [2152/3125], train_loss:0.116097
Epoch [1/2], Iter [2153/3125], train_loss:0.116687
Epoch [1/2], Iter [2154/3125], train_loss:0.097041
Epoch [1/2], Iter [2155/3125], train_loss:0.089170
Epoch [1/2], Iter [2156/3125], train_loss:0.100973
Epoch [1/2], Iter [2157/3125], train_loss:0.097850
Epoch [1/2], Iter [2158/3125], train_loss:0.103660
Epoch [1/2], Iter [2159/3125], train_loss:0.108651
Epoch [1/2], Iter [2160/3125], train_loss:0.113844
Epoch [1/2], Iter [2161/3125], train_loss:0.093893
Epoch [1/2], Iter [2162/3125], train_loss:0.082955
Epoch [1/2], Iter [2163/3125], train_loss:0.128567
Epoch [1/2], Iter [2164/3125], train_loss:0.138731
Epoch [1/2], Iter [2165/3125], train_loss:0.133058
Epoch [1/2], Iter [2166/3125], train_loss:0.130054
Epoch [1/2], Iter [2167/3125], train_loss:0.108367
Epoch [1/2], Iter [2168/3125], train_loss:0.085435
Epoch [1/2], Iter [2169/3125], train_loss:0.118808
Epoch [1/2], Iter [2170/3125], train_loss:0.109687
Epoch [1/2], Iter [2171/3125], train_loss:0.104637
Epoch [1/2], Iter [2172/3125], train_loss:0.098688
Epoch [1/2], Iter [2173/3125], train_loss:0.100545
Epoch [1/2], Iter [2174/3125], train_loss:0.103489
Epoch [1/2], Iter [2175/3125], train_loss:0.110800
Epoch [1/2], Iter [2176/3125], train_loss:0.070940
Epoch [1/2], Iter [2177/3125], train_loss:0.085411
Epoch [1/2], Iter [2178/3125], train_loss:0.120935
Epoch [1/2], Iter [2179/3125], train_loss:0.089794
Epoch [1/2], Iter [2180/3125], train_loss:0.117729
Epoch [1/2], Iter [2181/3125], train_loss:0.142787
Epoch [1/2], Iter [2182/3125], train_loss:0.093391
Epoch [1/2], Iter [2183/3125], train_loss:0.116859
Epoch [1/2], Iter [2184/3125], train_loss:0.093596
Epoch [1/2], Iter [2185/3125], train_loss:0.083295
Epoch [1/2], Iter [2186/3125], train_loss:0.091943
Epoch [1/2], Iter [2187/3125], train_loss:0.126068
Epoch [1/2], Iter [2188/3125], train_loss:0.134602
Epoch [1/2], Iter [2189/3125], train_loss:0.114153
Epoch [1/2], Iter [2190/3125], train_loss:0.085646
Epoch [1/2], Iter [2191/3125], train_loss:0.087608
Epoch [1/2], Iter [2192/3125], train_loss:0.132938
Epoch [1/2], Iter [2193/3125], train_loss:0.093311
Epoch [1/2], Iter [2194/3125], train_loss:0.112723
Epoch [1/2], Iter [2195/3125], train_loss:0.107061
Epoch [1/2], Iter [2196/3125], train_loss:0.098063
Epoch [1/2], Iter [2197/3125], train_loss:0.105161
Epoch [1/2], Iter [2198/3125], train_loss:0.112891
Epoch [1/2], Iter [2199/3125], train_loss:0.087156
Epoch [1/2], Iter [2200/3125], train_loss:0.088423
Epoch [1/2], Iter [2201/3125], train_loss:0.113163
Epoch [1/2], Iter [2202/3125], train_loss:0.128250
Epoch [1/2], Iter [2203/3125], train_loss:0.113817
Epoch [1/2], Iter [2204/3125], train_loss:0.090483
Epoch [1/2], Iter [2205/3125], train_loss:0.082780
Epoch [1/2], Iter [2206/3125], train_loss:0.105257
Epoch [1/2], Iter [2207/3125], train_loss:0.102088
Epoch [1/2], Iter [2208/3125], train_loss:0.094012
Epoch [1/2], Iter [2209/3125], train_loss:0.135268
Epoch [1/2], Iter [2210/3125], train_loss:0.091043
Epoch [1/2], Iter [2211/3125], train_loss:0.086837
Epoch [1/2], Iter [2212/3125], train_loss:0.100739
Epoch [1/2], Iter [2213/3125], train_loss:0.089260
Epoch [1/2], Iter [2214/3125], train_loss:0.104809
Epoch [1/2], Iter [2215/3125], train_loss:0.111087
Epoch [1/2], Iter [2216/3125], train_loss:0.109913
Epoch [1/2], Iter [2217/3125], train_loss:0.144326
Epoch [1/2], Iter [2218/3125], train_loss:0.094746
Epoch [1/2], Iter [2219/3125], train_loss:0.127343
Epoch [1/2], Iter [2220/3125], train_loss:0.087044
Epoch [1/2], Iter [2221/3125], train_loss:0.123630
Epoch [1/2], Iter [2222/3125], train_loss:0.104947
Epoch [1/2], Iter [2223/3125], train_loss:0.110232
Epoch [1/2], Iter [2224/3125], train_loss:0.076661
Epoch [1/2], Iter [2225/3125], train_loss:0.134165
Epoch [1/2], Iter [2226/3125], train_loss:0.157577
Epoch [1/2], Iter [2227/3125], train_loss:0.094721
Epoch [1/2], Iter [2228/3125], train_loss:0.101042
Epoch [1/2], Iter [2229/3125], train_loss:0.096628
Epoch [1/2], Iter [2230/3125], train_loss:0.101660
Epoch [1/2], Iter [2231/3125], train_loss:0.087218
Epoch [1/2], Iter [2232/3125], train_loss:0.083415
Epoch [1/2], Iter [2233/3125], train_loss:0.100924
Epoch [1/2], Iter [2234/3125], train_loss:0.092865
Epoch [1/2], Iter [2235/3125], train_loss:0.118373
Epoch [1/2], Iter [2236/3125], train_loss:0.101207
Epoch [1/2], Iter [2237/3125], train_loss:0.084761
Epoch [1/2], Iter [2238/3125], train_loss:0.106357
Epoch [1/2], Iter [2239/3125], train_loss:0.118842
Epoch [1/2], Iter [2240/3125], train_loss:0.103979
Epoch [1/2], Iter [2241/3125], train_loss:0.125138
Epoch [1/2], Iter [2242/3125], train_loss:0.085798
Epoch [1/2], Iter [2243/3125], train_loss:0.102032
Epoch [1/2], Iter [2244/3125], train_loss:0.131359
Epoch [1/2], Iter [2245/3125], train_loss:0.099374
Epoch [1/2], Iter [2246/3125], train_loss:0.098269
Epoch [1/2], Iter [2247/3125], train_loss:0.091754
Epoch [1/2], Iter [2248/3125], train_loss:0.096370
Epoch [1/2], Iter [2249/3125], train_loss:0.126300
Epoch [1/2], Iter [2250/3125], train_loss:0.132058
Epoch [1/2], Iter [2251/3125], train_loss:0.084470
Epoch [1/2], Iter [2252/3125], train_loss:0.147128
Epoch [1/2], Iter [2253/3125], train_loss:0.069462
Epoch [1/2], Iter [2254/3125], train_loss:0.102953
Epoch [1/2], Iter [2255/3125], train_loss:0.123367
Epoch [1/2], Iter [2256/3125], train_loss:0.106619
Epoch [1/2], Iter [2257/3125], train_loss:0.088664
Epoch [1/2], Iter [2258/3125], train_loss:0.081543
Epoch [1/2], Iter [2259/3125], train_loss:0.120953
Epoch [1/2], Iter [2260/3125], train_loss:0.103699
Epoch [1/2], Iter [2261/3125], train_loss:0.099706
Epoch [1/2], Iter [2262/3125], train_loss:0.079738
Epoch [1/2], Iter [2263/3125], train_loss:0.100194
Epoch [1/2], Iter [2264/3125], train_loss:0.128680
Epoch [1/2], Iter [2265/3125], train_loss:0.131533
Epoch [1/2], Iter [2266/3125], train_loss:0.118202
Epoch [1/2], Iter [2267/3125], train_loss:0.094496
Epoch [1/2], Iter [2268/3125], train_loss:0.074186
Epoch [1/2], Iter [2269/3125], train_loss:0.095828
Epoch [1/2], Iter [2270/3125], train_loss:0.086729
Epoch [1/2], Iter [2271/3125], train_loss:0.079519
Epoch [1/2], Iter [2272/3125], train_loss:0.098425
Epoch [1/2], Iter [2273/3125], train_loss:0.093892
Epoch [1/2], Iter [2274/3125], train_loss:0.141978
Epoch [1/2], Iter [2275/3125], train_loss:0.118443
Epoch [1/2], Iter [2276/3125], train_loss:0.094937
Epoch [1/2], Iter [2277/3125], train_loss:0.119222
Epoch [1/2], Iter [2278/3125], train_loss:0.097568
Epoch [1/2], Iter [2279/3125], train_loss:0.102922
Epoch [1/2], Iter [2280/3125], train_loss:0.111276
Epoch [1/2], Iter [2281/3125], train_loss:0.089530
Epoch [1/2], Iter [2282/3125], train_loss:0.118905
Epoch [1/2], Iter [2283/3125], train_loss:0.086163
Epoch [1/2], Iter [2284/3125], train_loss:0.110971
Epoch [1/2], Iter [2285/3125], train_loss:0.112254
Epoch [1/2], Iter [2286/3125], train_loss:0.092250
Epoch [1/2], Iter [2287/3125], train_loss:0.106539
Epoch [1/2], Iter [2288/3125], train_loss:0.098029
Epoch [1/2], Iter [2289/3125], train_loss:0.103773
Epoch [1/2], Iter [2290/3125], train_loss:0.129419
Epoch [1/2], Iter [2291/3125], train_loss:0.098723
Epoch [1/2], Iter [2292/3125], train_loss:0.108025
Epoch [1/2], Iter [2293/3125], train_loss:0.124437
Epoch [1/2], Iter [2294/3125], train_loss:0.077301
Epoch [1/2], Iter [2295/3125], train_loss:0.114347
Epoch [1/2], Iter [2296/3125], train_loss:0.081775
Epoch [1/2], Iter [2297/3125], train_loss:0.115150
Epoch [1/2], Iter [2298/3125], train_loss:0.117596
Epoch [1/2], Iter [2299/3125], train_loss:0.105581
Epoch [1/2], Iter [2300/3125], train_loss:0.089893
Epoch [1/2], Iter [2301/3125], train_loss:0.131399
Epoch [1/2], Iter [2302/3125], train_loss:0.086729
Epoch [1/2], Iter [2303/3125], train_loss:0.101321
Epoch [1/2], Iter [2304/3125], train_loss:0.124556
Epoch [1/2], Iter [2305/3125], train_loss:0.108444
Epoch [1/2], Iter [2306/3125], train_loss:0.113353
Epoch [1/2], Iter [2307/3125], train_loss:0.104680
Epoch [1/2], Iter [2308/3125], train_loss:0.122059
Epoch [1/2], Iter [2309/3125], train_loss:0.090855
Epoch [1/2], Iter [2310/3125], train_loss:0.094236
Epoch [1/2], Iter [2311/3125], train_loss:0.108596
Epoch [1/2], Iter [2312/3125], train_loss:0.093419
Epoch [1/2], Iter [2313/3125], train_loss:0.083965
Epoch [1/2], Iter [2314/3125], train_loss:0.129653
Epoch [1/2], Iter [2315/3125], train_loss:0.100340
Epoch [1/2], Iter [2316/3125], train_loss:0.105309
Epoch [1/2], Iter [2317/3125], train_loss:0.104400
Epoch [1/2], Iter [2318/3125], train_loss:0.098583
Epoch [1/2], Iter [2319/3125], train_loss:0.111805
Epoch [1/2], Iter [2320/3125], train_loss:0.101948
Epoch [1/2], Iter [2321/3125], train_loss:0.105128
Epoch [1/2], Iter [2322/3125], train_loss:0.096615
Epoch [1/2], Iter [2323/3125], train_loss:0.126877
Epoch [1/2], Iter [2324/3125], train_loss:0.121535
Epoch [1/2], Iter [2325/3125], train_loss:0.098379
Epoch [1/2], Iter [2326/3125], train_loss:0.110792
Epoch [1/2], Iter [2327/3125], train_loss:0.097031
Epoch [1/2], Iter [2328/3125], train_loss:0.104541
Epoch [1/2], Iter [2329/3125], train_loss:0.084440
Epoch [1/2], Iter [2330/3125], train_loss:0.096462
Epoch [1/2], Iter [2331/3125], train_loss:0.097686
Epoch [1/2], Iter [2332/3125], train_loss:0.094296
Epoch [1/2], Iter [2333/3125], train_loss:0.119200
Epoch [1/2], Iter [2334/3125], train_loss:0.096054
Epoch [1/2], Iter [2335/3125], train_loss:0.114878
Epoch [1/2], Iter [2336/3125], train_loss:0.110496
Epoch [1/2], Iter [2337/3125], train_loss:0.099256
Epoch [1/2], Iter [2338/3125], train_loss:0.100970
Epoch [1/2], Iter [2339/3125], train_loss:0.128923
Epoch [1/2], Iter [2340/3125], train_loss:0.123876
Epoch [1/2], Iter [2341/3125], train_loss:0.125885
Epoch [1/2], Iter [2342/3125], train_loss:0.117648
Epoch [1/2], Iter [2343/3125], train_loss:0.118099
Epoch [1/2], Iter [2344/3125], train_loss:0.079732
Epoch [1/2], Iter [2345/3125], train_loss:0.086750
Epoch [1/2], Iter [2346/3125], train_loss:0.078172
Epoch [1/2], Iter [2347/3125], train_loss:0.163049
Epoch [1/2], Iter [2348/3125], train_loss:0.099812
Epoch [1/2], Iter [2349/3125], train_loss:0.094974
Epoch [1/2], Iter [2350/3125], train_loss:0.106246
Epoch [1/2], Iter [2351/3125], train_loss:0.095683
Epoch [1/2], Iter [2352/3125], train_loss:0.125036
Epoch [1/2], Iter [2353/3125], train_loss:0.105502
Epoch [1/2], Iter [2354/3125], train_loss:0.096412
Epoch [1/2], Iter [2355/3125], train_loss:0.121308
Epoch [1/2], Iter [2356/3125], train_loss:0.109995
Epoch [1/2], Iter [2357/3125], train_loss:0.082690
Epoch [1/2], Iter [2358/3125], train_loss:0.091900
Epoch [1/2], Iter [2359/3125], train_loss:0.117589
Epoch [1/2], Iter [2360/3125], train_loss:0.102684
Epoch [1/2], Iter [2361/3125], train_loss:0.086352
Epoch [1/2], Iter [2362/3125], train_loss:0.093263
Epoch [1/2], Iter [2363/3125], train_loss:0.119629
Epoch [1/2], Iter [2364/3125], train_loss:0.067344
Epoch [1/2], Iter [2365/3125], train_loss:0.141182
Epoch [1/2], Iter [2366/3125], train_loss:0.097096
Epoch [1/2], Iter [2367/3125], train_loss:0.107365
Epoch [1/2], Iter [2368/3125], train_loss:0.103708
Epoch [1/2], Iter [2369/3125], train_loss:0.115419
Epoch [1/2], Iter [2370/3125], train_loss:0.100928
Epoch [1/2], Iter [2371/3125], train_loss:0.123152
Epoch [1/2], Iter [2372/3125], train_loss:0.093848
Epoch [1/2], Iter [2373/3125], train_loss:0.084897
Epoch [1/2], Iter [2374/3125], train_loss:0.094672
Epoch [1/2], Iter [2375/3125], train_loss:0.114151
Epoch [1/2], Iter [2376/3125], train_loss:0.071165
Epoch [1/2], Iter [2377/3125], train_loss:0.113670
Epoch [1/2], Iter [2378/3125], train_loss:0.085005
Epoch [1/2], Iter [2379/3125], train_loss:0.131933
Epoch [1/2], Iter [2380/3125], train_loss:0.110527
Epoch [1/2], Iter [2381/3125], train_loss:0.086547
Epoch [1/2], Iter [2382/3125], train_loss:0.125244
Epoch [1/2], Iter [2383/3125], train_loss:0.087366
Epoch [1/2], Iter [2384/3125], train_loss:0.096163
Epoch [1/2], Iter [2385/3125], train_loss:0.076568
Epoch [1/2], Iter [2386/3125], train_loss:0.089735
Epoch [1/2], Iter [2387/3125], train_loss:0.088792
Epoch [1/2], Iter [2388/3125], train_loss:0.099147
Epoch [1/2], Iter [2389/3125], train_loss:0.083492
Epoch [1/2], Iter [2390/3125], train_loss:0.100325
Epoch [1/2], Iter [2391/3125], train_loss:0.086110
Epoch [1/2], Iter [2392/3125], train_loss:0.102520
Epoch [1/2], Iter [2393/3125], train_loss:0.099782
Epoch [1/2], Iter [2394/3125], train_loss:0.095551
Epoch [1/2], Iter [2395/3125], train_loss:0.092597
Epoch [1/2], Iter [2396/3125], train_loss:0.102948
Epoch [1/2], Iter [2397/3125], train_loss:0.090320
Epoch [1/2], Iter [2398/3125], train_loss:0.105069
Epoch [1/2], Iter [2399/3125], train_loss:0.133147
Epoch [1/2], Iter [2400/3125], train_loss:0.121134
Epoch [1/2], Iter [2401/3125], train_loss:0.126426
Epoch [1/2], Iter [2402/3125], train_loss:0.112873
Epoch [1/2], Iter [2403/3125], train_loss:0.095190
Epoch [1/2], Iter [2404/3125], train_loss:0.111614
Epoch [1/2], Iter [2405/3125], train_loss:0.134615
Epoch [1/2], Iter [2406/3125], train_loss:0.079283
Epoch [1/2], Iter [2407/3125], train_loss:0.099310
Epoch [1/2], Iter [2408/3125], train_loss:0.100244
Epoch [1/2], Iter [2409/3125], train_loss:0.111877
Epoch [1/2], Iter [2410/3125], train_loss:0.108714
Epoch [1/2], Iter [2411/3125], train_loss:0.078524
Epoch [1/2], Iter [2412/3125], train_loss:0.091149
Epoch [1/2], Iter [2413/3125], train_loss:0.105475
Epoch [1/2], Iter [2414/3125], train_loss:0.122295
Epoch [1/2], Iter [2415/3125], train_loss:0.144343
Epoch [1/2], Iter [2416/3125], train_loss:0.104529
Epoch [1/2], Iter [2417/3125], train_loss:0.124823
Epoch [1/2], Iter [2418/3125], train_loss:0.106808
Epoch [1/2], Iter [2419/3125], train_loss:0.117653
Epoch [1/2], Iter [2420/3125], train_loss:0.123505
Epoch [1/2], Iter [2421/3125], train_loss:0.114044
Epoch [1/2], Iter [2422/3125], train_loss:0.109120
Epoch [1/2], Iter [2423/3125], train_loss:0.111892
Epoch [1/2], Iter [2424/3125], train_loss:0.137719
Epoch [1/2], Iter [2425/3125], train_loss:0.117109
Epoch [1/2], Iter [2426/3125], train_loss:0.093619
Epoch [1/2], Iter [2427/3125], train_loss:0.073259
Epoch [1/2], Iter [2428/3125], train_loss:0.135654
Epoch [1/2], Iter [2429/3125], train_loss:0.103028
Epoch [1/2], Iter [2430/3125], train_loss:0.097963
Epoch [1/2], Iter [2431/3125], train_loss:0.105301
Epoch [1/2], Iter [2432/3125], train_loss:0.125698
Epoch [1/2], Iter [2433/3125], train_loss:0.097532
Epoch [1/2], Iter [2434/3125], train_loss:0.103793
Epoch [1/2], Iter [2435/3125], train_loss:0.112252
Epoch [1/2], Iter [2436/3125], train_loss:0.118567
Epoch [1/2], Iter [2437/3125], train_loss:0.095079
Epoch [1/2], Iter [2438/3125], train_loss:0.089631
Epoch [1/2], Iter [2439/3125], train_loss:0.095069
Epoch [1/2], Iter [2440/3125], train_loss:0.108419
Epoch [1/2], Iter [2441/3125], train_loss:0.112826
Epoch [1/2], Iter [2442/3125], train_loss:0.111640
Epoch [1/2], Iter [2443/3125], train_loss:0.113391
Epoch [1/2], Iter [2444/3125], train_loss:0.131918
Epoch [1/2], Iter [2445/3125], train_loss:0.076390
Epoch [1/2], Iter [2446/3125], train_loss:0.101470
Epoch [1/2], Iter [2447/3125], train_loss:0.085170
Epoch [1/2], Iter [2448/3125], train_loss:0.089206
Epoch [1/2], Iter [2449/3125], train_loss:0.099683
Epoch [1/2], Iter [2450/3125], train_loss:0.086865
Epoch [1/2], Iter [2451/3125], train_loss:0.128651
Epoch [1/2], Iter [2452/3125], train_loss:0.090884
Epoch [1/2], Iter [2453/3125], train_loss:0.106414
Epoch [1/2], Iter [2454/3125], train_loss:0.127482
Epoch [1/2], Iter [2455/3125], train_loss:0.076910
Epoch [1/2], Iter [2456/3125], train_loss:0.107479
Epoch [1/2], Iter [2457/3125], train_loss:0.079879
Epoch [1/2], Iter [2458/3125], train_loss:0.075093
Epoch [1/2], Iter [2459/3125], train_loss:0.080941
Epoch [1/2], Iter [2460/3125], train_loss:0.105018
Epoch [1/2], Iter [2461/3125], train_loss:0.090048
Epoch [1/2], Iter [2462/3125], train_loss:0.082398
Epoch [1/2], Iter [2463/3125], train_loss:0.117726
Epoch [1/2], Iter [2464/3125], train_loss:0.107102
Epoch [1/2], Iter [2465/3125], train_loss:0.141708
Epoch [1/2], Iter [2466/3125], train_loss:0.123104
Epoch [1/2], Iter [2467/3125], train_loss:0.099922
Epoch [1/2], Iter [2468/3125], train_loss:0.133417
Epoch [1/2], Iter [2469/3125], train_loss:0.110525
Epoch [1/2], Iter [2470/3125], train_loss:0.110006
Epoch [1/2], Iter [2471/3125], train_loss:0.090452
Epoch [1/2], Iter [2472/3125], train_loss:0.119548
Epoch [1/2], Iter [2473/3125], train_loss:0.132476
Epoch [1/2], Iter [2474/3125], train_loss:0.097383
Epoch [1/2], Iter [2475/3125], train_loss:0.110065
Epoch [1/2], Iter [2476/3125], train_loss:0.104751
Epoch [1/2], Iter [2477/3125], train_loss:0.085099
Epoch [1/2], Iter [2478/3125], train_loss:0.101220
Epoch [1/2], Iter [2479/3125], train_loss:0.088360
Epoch [1/2], Iter [2480/3125], train_loss:0.072771
Epoch [1/2], Iter [2481/3125], train_loss:0.087658
Epoch [1/2], Iter [2482/3125], train_loss:0.095933
Epoch [1/2], Iter [2483/3125], train_loss:0.108177
Epoch [1/2], Iter [2484/3125], train_loss:0.115885
Epoch [1/2], Iter [2485/3125], train_loss:0.101371
Epoch [1/2], Iter [2486/3125], train_loss:0.115408
Epoch [1/2], Iter [2487/3125], train_loss:0.084674
Epoch [1/2], Iter [2488/3125], train_loss:0.102107
Epoch [1/2], Iter [2489/3125], train_loss:0.076870
Epoch [1/2], Iter [2490/3125], train_loss:0.134582
Epoch [1/2], Iter [2491/3125], train_loss:0.111436
Epoch [1/2], Iter [2492/3125], train_loss:0.125878
Epoch [1/2], Iter [2493/3125], train_loss:0.129740
Epoch [1/2], Iter [2494/3125], train_loss:0.080612
Epoch [1/2], Iter [2495/3125], train_loss:0.130665
Epoch [1/2], Iter [2496/3125], train_loss:0.074256
Epoch [1/2], Iter [2497/3125], train_loss:0.139712
Epoch [1/2], Iter [2498/3125], train_loss:0.117353
Epoch [1/2], Iter [2499/3125], train_loss:0.119585
Epoch [1/2], Iter [2500/3125], train_loss:0.102869
Epoch [1/2], Iter [2501/3125], train_loss:0.095046
Epoch [1/2], Iter [2502/3125], train_loss:0.117398
Epoch [1/2], Iter [2503/3125], train_loss:0.111420
Epoch [1/2], Iter [2504/3125], train_loss:0.167339
Epoch [1/2], Iter [2505/3125], train_loss:0.113016
Epoch [1/2], Iter [2506/3125], train_loss:0.094196
Epoch [1/2], Iter [2507/3125], train_loss:0.096952
Epoch [1/2], Iter [2508/3125], train_loss:0.111106
Epoch [1/2], Iter [2509/3125], train_loss:0.089056
Epoch [1/2], Iter [2510/3125], train_loss:0.109800
Epoch [1/2], Iter [2511/3125], train_loss:0.086686
Epoch [1/2], Iter [2512/3125], train_loss:0.092258
Epoch [1/2], Iter [2513/3125], train_loss:0.076557
Epoch [1/2], Iter [2514/3125], train_loss:0.091248
Epoch [1/2], Iter [2515/3125], train_loss:0.093275
Epoch [1/2], Iter [2516/3125], train_loss:0.106473
Epoch [1/2], Iter [2517/3125], train_loss:0.094642
Epoch [1/2], Iter [2518/3125], train_loss:0.138280
Epoch [1/2], Iter [2519/3125], train_loss:0.098989
Epoch [1/2], Iter [2520/3125], train_loss:0.095182
Epoch [1/2], Iter [2521/3125], train_loss:0.107335
Epoch [1/2], Iter [2522/3125], train_loss:0.079086
Epoch [1/2], Iter [2523/3125], train_loss:0.086730
Epoch [1/2], Iter [2524/3125], train_loss:0.124144
Epoch [1/2], Iter [2525/3125], train_loss:0.094952
Epoch [1/2], Iter [2526/3125], train_loss:0.117466
Epoch [1/2], Iter [2527/3125], train_loss:0.109298
Epoch [1/2], Iter [2528/3125], train_loss:0.116636
Epoch [1/2], Iter [2529/3125], train_loss:0.096603
Epoch [1/2], Iter [2530/3125], train_loss:0.089863
Epoch [1/2], Iter [2531/3125], train_loss:0.090743
Epoch [1/2], Iter [2532/3125], train_loss:0.104793
Epoch [1/2], Iter [2533/3125], train_loss:0.114171
Epoch [1/2], Iter [2534/3125], train_loss:0.078191
Epoch [1/2], Iter [2535/3125], train_loss:0.075855
Epoch [1/2], Iter [2536/3125], train_loss:0.092886
Epoch [1/2], Iter [2537/3125], train_loss:0.084237
Epoch [1/2], Iter [2538/3125], train_loss:0.076853
Epoch [1/2], Iter [2539/3125], train_loss:0.099303
Epoch [1/2], Iter [2540/3125], train_loss:0.104977
Epoch [1/2], Iter [2541/3125], train_loss:0.122457
Epoch [1/2], Iter [2542/3125], train_loss:0.109284
Epoch [1/2], Iter [2543/3125], train_loss:0.099003
Epoch [1/2], Iter [2544/3125], train_loss:0.136708
Epoch [1/2], Iter [2545/3125], train_loss:0.097537
Epoch [1/2], Iter [2546/3125], train_loss:0.092581
Epoch [1/2], Iter [2547/3125], train_loss:0.084615
Epoch [1/2], Iter [2548/3125], train_loss:0.109484
Epoch [1/2], Iter [2549/3125], train_loss:0.065603
Epoch [1/2], Iter [2550/3125], train_loss:0.088243
Epoch [1/2], Iter [2551/3125], train_loss:0.091456
Epoch [1/2], Iter [2552/3125], train_loss:0.123616
Epoch [1/2], Iter [2553/3125], train_loss:0.094322
Epoch [1/2], Iter [2554/3125], train_loss:0.110907
Epoch [1/2], Iter [2555/3125], train_loss:0.112595
Epoch [1/2], Iter [2556/3125], train_loss:0.086224
Epoch [1/2], Iter [2557/3125], train_loss:0.137138
Epoch [1/2], Iter [2558/3125], train_loss:0.143773
Epoch [1/2], Iter [2559/3125], train_loss:0.127415
Epoch [1/2], Iter [2560/3125], train_loss:0.083331
Epoch [1/2], Iter [2561/3125], train_loss:0.117575
Epoch [1/2], Iter [2562/3125], train_loss:0.079147
Epoch [1/2], Iter [2563/3125], train_loss:0.094432
Epoch [1/2], Iter [2564/3125], train_loss:0.087761
Epoch [1/2], Iter [2565/3125], train_loss:0.081774
Epoch [1/2], Iter [2566/3125], train_loss:0.102274
Epoch [1/2], Iter [2567/3125], train_loss:0.089861
Epoch [1/2], Iter [2568/3125], train_loss:0.088501
Epoch [1/2], Iter [2569/3125], train_loss:0.104001
Epoch [1/2], Iter [2570/3125], train_loss:0.066133
Epoch [1/2], Iter [2571/3125], train_loss:0.080712
Epoch [1/2], Iter [2572/3125], train_loss:0.099983
Epoch [1/2], Iter [2573/3125], train_loss:0.106127
Epoch [1/2], Iter [2574/3125], train_loss:0.094524
Epoch [1/2], Iter [2575/3125], train_loss:0.084537
Epoch [1/2], Iter [2576/3125], train_loss:0.102242
Epoch [1/2], Iter [2577/3125], train_loss:0.111313
Epoch [1/2], Iter [2578/3125], train_loss:0.106967
Epoch [1/2], Iter [2579/3125], train_loss:0.084365
Epoch [1/2], Iter [2580/3125], train_loss:0.122814
Epoch [1/2], Iter [2581/3125], train_loss:0.097958
Epoch [1/2], Iter [2582/3125], train_loss:0.116795
Epoch [1/2], Iter [2583/3125], train_loss:0.103559
Epoch [1/2], Iter [2584/3125], train_loss:0.109728
Epoch [1/2], Iter [2585/3125], train_loss:0.108031
Epoch [1/2], Iter [2586/3125], train_loss:0.107263
Epoch [1/2], Iter [2587/3125], train_loss:0.076199
Epoch [1/2], Iter [2588/3125], train_loss:0.124672
Epoch [1/2], Iter [2589/3125], train_loss:0.089102
Epoch [1/2], Iter [2590/3125], train_loss:0.105508
Epoch [1/2], Iter [2591/3125], train_loss:0.117493
Epoch [1/2], Iter [2592/3125], train_loss:0.095886
Epoch [1/2], Iter [2593/3125], train_loss:0.113637
Epoch [1/2], Iter [2594/3125], train_loss:0.112449
Epoch [1/2], Iter [2595/3125], train_loss:0.089482
Epoch [1/2], Iter [2596/3125], train_loss:0.087168
Epoch [1/2], Iter [2597/3125], train_loss:0.090498
Epoch [1/2], Iter [2598/3125], train_loss:0.085577
Epoch [1/2], Iter [2599/3125], train_loss:0.097302
Epoch [1/2], Iter [2600/3125], train_loss:0.088938
Epoch [1/2], Iter [2601/3125], train_loss:0.115304
Epoch [1/2], Iter [2602/3125], train_loss:0.133274
Epoch [1/2], Iter [2603/3125], train_loss:0.145121
Epoch [1/2], Iter [2604/3125], train_loss:0.084187
Epoch [1/2], Iter [2605/3125], train_loss:0.129197
Epoch [1/2], Iter [2606/3125], train_loss:0.093822
Epoch [1/2], Iter [2607/3125], train_loss:0.101598
Epoch [1/2], Iter [2608/3125], train_loss:0.140341
Epoch [1/2], Iter [2609/3125], train_loss:0.115032
Epoch [1/2], Iter [2610/3125], train_loss:0.120124
Epoch [1/2], Iter [2611/3125], train_loss:0.110905
Epoch [1/2], Iter [2612/3125], train_loss:0.089199
Epoch [1/2], Iter [2613/3125], train_loss:0.104073
Epoch [1/2], Iter [2614/3125], train_loss:0.100672
Epoch [1/2], Iter [2615/3125], train_loss:0.111184
Epoch [1/2], Iter [2616/3125], train_loss:0.109902
Epoch [1/2], Iter [2617/3125], train_loss:0.098068
Epoch [1/2], Iter [2618/3125], train_loss:0.097632
Epoch [1/2], Iter [2619/3125], train_loss:0.085194
Epoch [1/2], Iter [2620/3125], train_loss:0.111314
Epoch [1/2], Iter [2621/3125], train_loss:0.097633
Epoch [1/2], Iter [2622/3125], train_loss:0.101432
Epoch [1/2], Iter [2623/3125], train_loss:0.084576
Epoch [1/2], Iter [2624/3125], train_loss:0.113484
Epoch [1/2], Iter [2625/3125], train_loss:0.089233
Epoch [1/2], Iter [2626/3125], train_loss:0.117646
Epoch [1/2], Iter [2627/3125], train_loss:0.092150
Epoch [1/2], Iter [2628/3125], train_loss:0.104805
Epoch [1/2], Iter [2629/3125], train_loss:0.110383
Epoch [1/2], Iter [2630/3125], train_loss:0.109359
Epoch [1/2], Iter [2631/3125], train_loss:0.093776
Epoch [1/2], Iter [2632/3125], train_loss:0.085401
Epoch [1/2], Iter [2633/3125], train_loss:0.083766
Epoch [1/2], Iter [2634/3125], train_loss:0.108508
Epoch [1/2], Iter [2635/3125], train_loss:0.093779
Epoch [1/2], Iter [2636/3125], train_loss:0.087341
Epoch [1/2], Iter [2637/3125], train_loss:0.123160
Epoch [1/2], Iter [2638/3125], train_loss:0.098978
Epoch [1/2], Iter [2639/3125], train_loss:0.146915
Epoch [1/2], Iter [2640/3125], train_loss:0.119571
Epoch [1/2], Iter [2641/3125], train_loss:0.106984
Epoch [1/2], Iter [2642/3125], train_loss:0.103030
Epoch [1/2], Iter [2643/3125], train_loss:0.117886
Epoch [1/2], Iter [2644/3125], train_loss:0.106485
Epoch [1/2], Iter [2645/3125], train_loss:0.127798
Epoch [1/2], Iter [2646/3125], train_loss:0.136132
Epoch [1/2], Iter [2647/3125], train_loss:0.111808
Epoch [1/2], Iter [2648/3125], train_loss:0.135164
Epoch [1/2], Iter [2649/3125], train_loss:0.081889
Epoch [1/2], Iter [2650/3125], train_loss:0.097605
Epoch [1/2], Iter [2651/3125], train_loss:0.114722
Epoch [1/2], Iter [2652/3125], train_loss:0.108491
Epoch [1/2], Iter [2653/3125], train_loss:0.100734
Epoch [1/2], Iter [2654/3125], train_loss:0.123039
Epoch [1/2], Iter [2655/3125], train_loss:0.111583
Epoch [1/2], Iter [2656/3125], train_loss:0.107290
Epoch [1/2], Iter [2657/3125], train_loss:0.108501
Epoch [1/2], Iter [2658/3125], train_loss:0.078135
Epoch [1/2], Iter [2659/3125], train_loss:0.085771
Epoch [1/2], Iter [2660/3125], train_loss:0.107128
Epoch [1/2], Iter [2661/3125], train_loss:0.095131
Epoch [1/2], Iter [2662/3125], train_loss:0.085456
Epoch [1/2], Iter [2663/3125], train_loss:0.112023
Epoch [1/2], Iter [2664/3125], train_loss:0.074527
Epoch [1/2], Iter [2665/3125], train_loss:0.098176
Epoch [1/2], Iter [2666/3125], train_loss:0.134337
Epoch [1/2], Iter [2667/3125], train_loss:0.079310
Epoch [1/2], Iter [2668/3125], train_loss:0.128383
Epoch [1/2], Iter [2669/3125], train_loss:0.063737
Epoch [1/2], Iter [2670/3125], train_loss:0.116620
Epoch [1/2], Iter [2671/3125], train_loss:0.109515
Epoch [1/2], Iter [2672/3125], train_loss:0.105551
Epoch [1/2], Iter [2673/3125], train_loss:0.106442
Epoch [1/2], Iter [2674/3125], train_loss:0.108208
Epoch [1/2], Iter [2675/3125], train_loss:0.092038
Epoch [1/2], Iter [2676/3125], train_loss:0.067518
Epoch [1/2], Iter [2677/3125], train_loss:0.108449
Epoch [1/2], Iter [2678/3125], train_loss:0.063891
Epoch [1/2], Iter [2679/3125], train_loss:0.097295
Epoch [1/2], Iter [2680/3125], train_loss:0.100544
Epoch [1/2], Iter [2681/3125], train_loss:0.059329
Epoch [1/2], Iter [2682/3125], train_loss:0.109202
Epoch [1/2], Iter [2683/3125], train_loss:0.099770
Epoch [1/2], Iter [2684/3125], train_loss:0.104589
Epoch [1/2], Iter [2685/3125], train_loss:0.080295
Epoch [1/2], Iter [2686/3125], train_loss:0.120223
Epoch [1/2], Iter [2687/3125], train_loss:0.078997
Epoch [1/2], Iter [2688/3125], train_loss:0.089128
Epoch [1/2], Iter [2689/3125], train_loss:0.112341
Epoch [1/2], Iter [2690/3125], train_loss:0.122444
Epoch [1/2], Iter [2691/3125], train_loss:0.092515
Epoch [1/2], Iter [2692/3125], train_loss:0.088293
Epoch [1/2], Iter [2693/3125], train_loss:0.091151
Epoch [1/2], Iter [2694/3125], train_loss:0.095652
Epoch [1/2], Iter [2695/3125], train_loss:0.100625
Epoch [1/2], Iter [2696/3125], train_loss:0.124390
Epoch [1/2], Iter [2697/3125], train_loss:0.108469
Epoch [1/2], Iter [2698/3125], train_loss:0.092776
Epoch [1/2], Iter [2699/3125], train_loss:0.115473
Epoch [1/2], Iter [2700/3125], train_loss:0.118285
Epoch [1/2], Iter [2701/3125], train_loss:0.070639
Epoch [1/2], Iter [2702/3125], train_loss:0.099144
Epoch [1/2], Iter [2703/3125], train_loss:0.071117
Epoch [1/2], Iter [2704/3125], train_loss:0.085093
Epoch [1/2], Iter [2705/3125], train_loss:0.087064
Epoch [1/2], Iter [2706/3125], train_loss:0.089685
Epoch [1/2], Iter [2707/3125], train_loss:0.105608
Epoch [1/2], Iter [2708/3125], train_loss:0.116224
Epoch [1/2], Iter [2709/3125], train_loss:0.092343
Epoch [1/2], Iter [2710/3125], train_loss:0.084557
Epoch [1/2], Iter [2711/3125], train_loss:0.092652
Epoch [1/2], Iter [2712/3125], train_loss:0.083277
Epoch [1/2], Iter [2713/3125], train_loss:0.113801
Epoch [1/2], Iter [2714/3125], train_loss:0.110867
Epoch [1/2], Iter [2715/3125], train_loss:0.118209
Epoch [1/2], Iter [2716/3125], train_loss:0.104623
Epoch [1/2], Iter [2717/3125], train_loss:0.095704
Epoch [1/2], Iter [2718/3125], train_loss:0.104851
Epoch [1/2], Iter [2719/3125], train_loss:0.118780
Epoch [1/2], Iter [2720/3125], train_loss:0.090578
Epoch [1/2], Iter [2721/3125], train_loss:0.141892
Epoch [1/2], Iter [2722/3125], train_loss:0.110100
Epoch [1/2], Iter [2723/3125], train_loss:0.119053
Epoch [1/2], Iter [2724/3125], train_loss:0.087268
Epoch [1/2], Iter [2725/3125], train_loss:0.122059
Epoch [1/2], Iter [2726/3125], train_loss:0.148750
Epoch [1/2], Iter [2727/3125], train_loss:0.123954
Epoch [1/2], Iter [2728/3125], train_loss:0.124976
Epoch [1/2], Iter [2729/3125], train_loss:0.089132
Epoch [1/2], Iter [2730/3125], train_loss:0.089235
Epoch [1/2], Iter [2731/3125], train_loss:0.123030
Epoch [1/2], Iter [2732/3125], train_loss:0.105519
Epoch [1/2], Iter [2733/3125], train_loss:0.106100
Epoch [1/2], Iter [2734/3125], train_loss:0.106303
Epoch [1/2], Iter [2735/3125], train_loss:0.094615
Epoch [1/2], Iter [2736/3125], train_loss:0.133672
Epoch [1/2], Iter [2737/3125], train_loss:0.103516
Epoch [1/2], Iter [2738/3125], train_loss:0.150776
Epoch [1/2], Iter [2739/3125], train_loss:0.087098
Epoch [1/2], Iter [2740/3125], train_loss:0.116379
Epoch [1/2], Iter [2741/3125], train_loss:0.102303
Epoch [1/2], Iter [2742/3125], train_loss:0.094834
Epoch [1/2], Iter [2743/3125], train_loss:0.089663
Epoch [1/2], Iter [2744/3125], train_loss:0.092802
Epoch [1/2], Iter [2745/3125], train_loss:0.110069
Epoch [1/2], Iter [2746/3125], train_loss:0.110816
Epoch [1/2], Iter [2747/3125], train_loss:0.127739
Epoch [1/2], Iter [2748/3125], train_loss:0.084715
Epoch [1/2], Iter [2749/3125], train_loss:0.101412
Epoch [1/2], Iter [2750/3125], train_loss:0.081077
Epoch [1/2], Iter [2751/3125], train_loss:0.111492
Epoch [1/2], Iter [2752/3125], train_loss:0.100451
Epoch [1/2], Iter [2753/3125], train_loss:0.087303
Epoch [1/2], Iter [2754/3125], train_loss:0.093413
Epoch [1/2], Iter [2755/3125], train_loss:0.112628
Epoch [1/2], Iter [2756/3125], train_loss:0.111557
Epoch [1/2], Iter [2757/3125], train_loss:0.109847
Epoch [1/2], Iter [2758/3125], train_loss:0.101618
Epoch [1/2], Iter [2759/3125], train_loss:0.089157
Epoch [1/2], Iter [2760/3125], train_loss:0.113698
Epoch [1/2], Iter [2761/3125], train_loss:0.091779
Epoch [1/2], Iter [2762/3125], train_loss:0.079673
Epoch [1/2], Iter [2763/3125], train_loss:0.103621
Epoch [1/2], Iter [2764/3125], train_loss:0.082735
Epoch [1/2], Iter [2765/3125], train_loss:0.105204
Epoch [1/2], Iter [2766/3125], train_loss:0.086259
Epoch [1/2], Iter [2767/3125], train_loss:0.123802
Epoch [1/2], Iter [2768/3125], train_loss:0.099351
Epoch [1/2], Iter [2769/3125], train_loss:0.109434
Epoch [1/2], Iter [2770/3125], train_loss:0.090484
Epoch [1/2], Iter [2771/3125], train_loss:0.121009
Epoch [1/2], Iter [2772/3125], train_loss:0.112087
Epoch [1/2], Iter [2773/3125], train_loss:0.107433
Epoch [1/2], Iter [2774/3125], train_loss:0.105113
Epoch [1/2], Iter [2775/3125], train_loss:0.118956
Epoch [1/2], Iter [2776/3125], train_loss:0.112925
Epoch [1/2], Iter [2777/3125], train_loss:0.100105
Epoch [1/2], Iter [2778/3125], train_loss:0.092404
Epoch [1/2], Iter [2779/3125], train_loss:0.098456
Epoch [1/2], Iter [2780/3125], train_loss:0.122263
Epoch [1/2], Iter [2781/3125], train_loss:0.107635
Epoch [1/2], Iter [2782/3125], train_loss:0.080257
Epoch [1/2], Iter [2783/3125], train_loss:0.093349
Epoch [1/2], Iter [2784/3125], train_loss:0.083886
Epoch [1/2], Iter [2785/3125], train_loss:0.103770
Epoch [1/2], Iter [2786/3125], train_loss:0.099667
Epoch [1/2], Iter [2787/3125], train_loss:0.114459
Epoch [1/2], Iter [2788/3125], train_loss:0.133057
Epoch [1/2], Iter [2789/3125], train_loss:0.086533
Epoch [1/2], Iter [2790/3125], train_loss:0.110268
Epoch [1/2], Iter [2791/3125], train_loss:0.101292
Epoch [1/2], Iter [2792/3125], train_loss:0.091083
Epoch [1/2], Iter [2793/3125], train_loss:0.092543
Epoch [1/2], Iter [2794/3125], train_loss:0.108981
Epoch [1/2], Iter [2795/3125], train_loss:0.096629
Epoch [1/2], Iter [2796/3125], train_loss:0.111024
Epoch [1/2], Iter [2797/3125], train_loss:0.103886
Epoch [1/2], Iter [2798/3125], train_loss:0.061455
Epoch [1/2], Iter [2799/3125], train_loss:0.094047
Epoch [1/2], Iter [2800/3125], train_loss:0.090577
Epoch [1/2], Iter [2801/3125], train_loss:0.089855
Epoch [1/2], Iter [2802/3125], train_loss:0.113875
Epoch [1/2], Iter [2803/3125], train_loss:0.107555
Epoch [1/2], Iter [2804/3125], train_loss:0.091442
Epoch [1/2], Iter [2805/3125], train_loss:0.121512
Epoch [1/2], Iter [2806/3125], train_loss:0.102267
Epoch [1/2], Iter [2807/3125], train_loss:0.113485
Epoch [1/2], Iter [2808/3125], train_loss:0.085101
Epoch [1/2], Iter [2809/3125], train_loss:0.123058
Epoch [1/2], Iter [2810/3125], train_loss:0.106300
Epoch [1/2], Iter [2811/3125], train_loss:0.100239
Epoch [1/2], Iter [2812/3125], train_loss:0.084932
Epoch [1/2], Iter [2813/3125], train_loss:0.121454
Epoch [1/2], Iter [2814/3125], train_loss:0.103186
Epoch [1/2], Iter [2815/3125], train_loss:0.116744
Epoch [1/2], Iter [2816/3125], train_loss:0.078205
Epoch [1/2], Iter [2817/3125], train_loss:0.118746
Epoch [1/2], Iter [2818/3125], train_loss:0.099491
Epoch [1/2], Iter [2819/3125], train_loss:0.084959
Epoch [1/2], Iter [2820/3125], train_loss:0.098084
Epoch [1/2], Iter [2821/3125], train_loss:0.076564
Epoch [1/2], Iter [2822/3125], train_loss:0.108699
Epoch [1/2], Iter [2823/3125], train_loss:0.092791
Epoch [1/2], Iter [2824/3125], train_loss:0.111765
Epoch [1/2], Iter [2825/3125], train_loss:0.082965
Epoch [1/2], Iter [2826/3125], train_loss:0.090465
Epoch [1/2], Iter [2827/3125], train_loss:0.115320
Epoch [1/2], Iter [2828/3125], train_loss:0.120692
Epoch [1/2], Iter [2829/3125], train_loss:0.123300
Epoch [1/2], Iter [2830/3125], train_loss:0.105747
Epoch [1/2], Iter [2831/3125], train_loss:0.109113
Epoch [1/2], Iter [2832/3125], train_loss:0.107350
Epoch [1/2], Iter [2833/3125], train_loss:0.106788
Epoch [1/2], Iter [2834/3125], train_loss:0.099931
Epoch [1/2], Iter [2835/3125], train_loss:0.098998
Epoch [1/2], Iter [2836/3125], train_loss:0.122916
Epoch [1/2], Iter [2837/3125], train_loss:0.121324
Epoch [1/2], Iter [2838/3125], train_loss:0.080634
Epoch [1/2], Iter [2839/3125], train_loss:0.116525
Epoch [1/2], Iter [2840/3125], train_loss:0.095675
Epoch [1/2], Iter [2841/3125], train_loss:0.084981
Epoch [1/2], Iter [2842/3125], train_loss:0.087895
Epoch [1/2], Iter [2843/3125], train_loss:0.115805
Epoch [1/2], Iter [2844/3125], train_loss:0.112233
Epoch [1/2], Iter [2845/3125], train_loss:0.094116
Epoch [1/2], Iter [2846/3125], train_loss:0.126081
Epoch [1/2], Iter [2847/3125], train_loss:0.090484
Epoch [1/2], Iter [2848/3125], train_loss:0.135703
Epoch [1/2], Iter [2849/3125], train_loss:0.096301
Epoch [1/2], Iter [2850/3125], train_loss:0.108214
Epoch [1/2], Iter [2851/3125], train_loss:0.073158
Epoch [1/2], Iter [2852/3125], train_loss:0.139311
Epoch [1/2], Iter [2853/3125], train_loss:0.087241
Epoch [1/2], Iter [2854/3125], train_loss:0.107322
Epoch [1/2], Iter [2855/3125], train_loss:0.114805
Epoch [1/2], Iter [2856/3125], train_loss:0.070172
Epoch [1/2], Iter [2857/3125], train_loss:0.111630
Epoch [1/2], Iter [2858/3125], train_loss:0.113782
Epoch [1/2], Iter [2859/3125], train_loss:0.120545
Epoch [1/2], Iter [2860/3125], train_loss:0.119155
Epoch [1/2], Iter [2861/3125], train_loss:0.097770
Epoch [1/2], Iter [2862/3125], train_loss:0.100840
Epoch [1/2], Iter [2863/3125], train_loss:0.114033
Epoch [1/2], Iter [2864/3125], train_loss:0.114624
Epoch [1/2], Iter [2865/3125], train_loss:0.107871
Epoch [1/2], Iter [2866/3125], train_loss:0.100698
Epoch [1/2], Iter [2867/3125], train_loss:0.091538
Epoch [1/2], Iter [2868/3125], train_loss:0.108914
Epoch [1/2], Iter [2869/3125], train_loss:0.115105
Epoch [1/2], Iter [2870/3125], train_loss:0.108430
Epoch [1/2], Iter [2871/3125], train_loss:0.094847
Epoch [1/2], Iter [2872/3125], train_loss:0.103506
Epoch [1/2], Iter [2873/3125], train_loss:0.129978
Epoch [1/2], Iter [2874/3125], train_loss:0.120356
Epoch [1/2], Iter [2875/3125], train_loss:0.091264
Epoch [1/2], Iter [2876/3125], train_loss:0.105926
Epoch [1/2], Iter [2877/3125], train_loss:0.085569
Epoch [1/2], Iter [2878/3125], train_loss:0.098659
Epoch [1/2], Iter [2879/3125], train_loss:0.116561
Epoch [1/2], Iter [2880/3125], train_loss:0.076713
Epoch [1/2], Iter [2881/3125], train_loss:0.115081
Epoch [1/2], Iter [2882/3125], train_loss:0.118629
Epoch [1/2], Iter [2883/3125], train_loss:0.083068
Epoch [1/2], Iter [2884/3125], train_loss:0.098757
Epoch [1/2], Iter [2885/3125], train_loss:0.090734
Epoch [1/2], Iter [2886/3125], train_loss:0.105137
Epoch [1/2], Iter [2887/3125], train_loss:0.100578
Epoch [1/2], Iter [2888/3125], train_loss:0.102933
Epoch [1/2], Iter [2889/3125], train_loss:0.111093
Epoch [1/2], Iter [2890/3125], train_loss:0.107033
Epoch [1/2], Iter [2891/3125], train_loss:0.094879
Epoch [1/2], Iter [2892/3125], train_loss:0.085116
Epoch [1/2], Iter [2893/3125], train_loss:0.098241
Epoch [1/2], Iter [2894/3125], train_loss:0.108890
Epoch [1/2], Iter [2895/3125], train_loss:0.102243
Epoch [1/2], Iter [2896/3125], train_loss:0.093309
Epoch [1/2], Iter [2897/3125], train_loss:0.084491
Epoch [1/2], Iter [2898/3125], train_loss:0.092920
Epoch [1/2], Iter [2899/3125], train_loss:0.100787
Epoch [1/2], Iter [2900/3125], train_loss:0.101562
Epoch [1/2], Iter [2901/3125], train_loss:0.142026
Epoch [1/2], Iter [2902/3125], train_loss:0.104432
Epoch [1/2], Iter [2903/3125], train_loss:0.127287
Epoch [1/2], Iter [2904/3125], train_loss:0.120489
Epoch [1/2], Iter [2905/3125], train_loss:0.136113
Epoch [1/2], Iter [2906/3125], train_loss:0.113443
Epoch [1/2], Iter [2907/3125], train_loss:0.118766
Epoch [1/2], Iter [2908/3125], train_loss:0.104068
Epoch [1/2], Iter [2909/3125], train_loss:0.107036
Epoch [1/2], Iter [2910/3125], train_loss:0.134377
Epoch [1/2], Iter [2911/3125], train_loss:0.103202
Epoch [1/2], Iter [2912/3125], train_loss:0.122124
Epoch [1/2], Iter [2913/3125], train_loss:0.085734
Epoch [1/2], Iter [2914/3125], train_loss:0.078548
Epoch [1/2], Iter [2915/3125], train_loss:0.080658
Epoch [1/2], Iter [2916/3125], train_loss:0.091166
Epoch [1/2], Iter [2917/3125], train_loss:0.113348
Epoch [1/2], Iter [2918/3125], train_loss:0.092998
Epoch [1/2], Iter [2919/3125], train_loss:0.098290
Epoch [1/2], Iter [2920/3125], train_loss:0.134570
Epoch [1/2], Iter [2921/3125], train_loss:0.088210
Epoch [1/2], Iter [2922/3125], train_loss:0.088639
Epoch [1/2], Iter [2923/3125], train_loss:0.101916
Epoch [1/2], Iter [2924/3125], train_loss:0.121580
Epoch [1/2], Iter [2925/3125], train_loss:0.092680
Epoch [1/2], Iter [2926/3125], train_loss:0.092212
Epoch [1/2], Iter [2927/3125], train_loss:0.110600
Epoch [1/2], Iter [2928/3125], train_loss:0.076853
Epoch [1/2], Iter [2929/3125], train_loss:0.085440
Epoch [1/2], Iter [2930/3125], train_loss:0.103700
Epoch [1/2], Iter [2931/3125], train_loss:0.112204
Epoch [1/2], Iter [2932/3125], train_loss:0.100517
Epoch [1/2], Iter [2933/3125], train_loss:0.090385
Epoch [1/2], Iter [2934/3125], train_loss:0.100238
Epoch [1/2], Iter [2935/3125], train_loss:0.095390
Epoch [1/2], Iter [2936/3125], train_loss:0.102841
Epoch [1/2], Iter [2937/3125], train_loss:0.153473
Epoch [1/2], Iter [2938/3125], train_loss:0.150890
Epoch [1/2], Iter [2939/3125], train_loss:0.106806
Epoch [1/2], Iter [2940/3125], train_loss:0.116421
Epoch [1/2], Iter [2941/3125], train_loss:0.084526
Epoch [1/2], Iter [2942/3125], train_loss:0.084673
Epoch [1/2], Iter [2943/3125], train_loss:0.087922
Epoch [1/2], Iter [2944/3125], train_loss:0.093839
Epoch [1/2], Iter [2945/3125], train_loss:0.077009
Epoch [1/2], Iter [2946/3125], train_loss:0.118196
Epoch [1/2], Iter [2947/3125], train_loss:0.082792
Epoch [1/2], Iter [2948/3125], train_loss:0.092917
Epoch [1/2], Iter [2949/3125], train_loss:0.090361
Epoch [1/2], Iter [2950/3125], train_loss:0.087079
Epoch [1/2], Iter [2951/3125], train_loss:0.093458
Epoch [1/2], Iter [2952/3125], train_loss:0.072833
Epoch [1/2], Iter [2953/3125], train_loss:0.090879
Epoch [1/2], Iter [2954/3125], train_loss:0.097900
Epoch [1/2], Iter [2955/3125], train_loss:0.136696
Epoch [1/2], Iter [2956/3125], train_loss:0.064742
Epoch [1/2], Iter [2957/3125], train_loss:0.109027
Epoch [1/2], Iter [2958/3125], train_loss:0.100904
Epoch [1/2], Iter [2959/3125], train_loss:0.075821
Epoch [1/2], Iter [2960/3125], train_loss:0.096203
Epoch [1/2], Iter [2961/3125], train_loss:0.121050
Epoch [1/2], Iter [2962/3125], train_loss:0.091576
Epoch [1/2], Iter [2963/3125], train_loss:0.121059
Epoch [1/2], Iter [2964/3125], train_loss:0.081418
Epoch [1/2], Iter [2965/3125], train_loss:0.070045
Epoch [1/2], Iter [2966/3125], train_loss:0.115191
Epoch [1/2], Iter [2967/3125], train_loss:0.102221
Epoch [1/2], Iter [2968/3125], train_loss:0.089548
Epoch [1/2], Iter [2969/3125], train_loss:0.089752
Epoch [1/2], Iter [2970/3125], train_loss:0.086713
Epoch [1/2], Iter [2971/3125], train_loss:0.094410
Epoch [1/2], Iter [2972/3125], train_loss:0.109021
Epoch [1/2], Iter [2973/3125], train_loss:0.109131
Epoch [1/2], Iter [2974/3125], train_loss:0.078996
Epoch [1/2], Iter [2975/3125], train_loss:0.092639
Epoch [1/2], Iter [2976/3125], train_loss:0.083834
Epoch [1/2], Iter [2977/3125], train_loss:0.087725
Epoch [1/2], Iter [2978/3125], train_loss:0.108193
Epoch [1/2], Iter [2979/3125], train_loss:0.125889
Epoch [1/2], Iter [2980/3125], train_loss:0.098979
Epoch [1/2], Iter [2981/3125], train_loss:0.072635
Epoch [1/2], Iter [2982/3125], train_loss:0.105319
Epoch [1/2], Iter [2983/3125], train_loss:0.079485
Epoch [1/2], Iter [2984/3125], train_loss:0.083783
Epoch [1/2], Iter [2985/3125], train_loss:0.082029
Epoch [1/2], Iter [2986/3125], train_loss:0.106792
Epoch [1/2], Iter [2987/3125], train_loss:0.092535
Epoch [1/2], Iter [2988/3125], train_loss:0.101078
Epoch [1/2], Iter [2989/3125], train_loss:0.074717
Epoch [1/2], Iter [2990/3125], train_loss:0.110788
Epoch [1/2], Iter [2991/3125], train_loss:0.110753
Epoch [1/2], Iter [2992/3125], train_loss:0.077376
Epoch [1/2], Iter [2993/3125], train_loss:0.105112
Epoch [1/2], Iter [2994/3125], train_loss:0.098503
Epoch [1/2], Iter [2995/3125], train_loss:0.098274
Epoch [1/2], Iter [2996/3125], train_loss:0.111971
Epoch [1/2], Iter [2997/3125], train_loss:0.080266
Epoch [1/2], Iter [2998/3125], train_loss:0.087455
Epoch [1/2], Iter [2999/3125], train_loss:0.069537
Epoch [1/2], Iter [3000/3125], train_loss:0.114505
Epoch [1/2], Iter [3001/3125], train_loss:0.103018
Epoch [1/2], Iter [3002/3125], train_loss:0.134254
Epoch [1/2], Iter [3003/3125], train_loss:0.137939
Epoch [1/2], Iter [3004/3125], train_loss:0.096483
Epoch [1/2], Iter [3005/3125], train_loss:0.090615
Epoch [1/2], Iter [3006/3125], train_loss:0.116887
Epoch [1/2], Iter [3007/3125], train_loss:0.126273
Epoch [1/2], Iter [3008/3125], train_loss:0.100232
Epoch [1/2], Iter [3009/3125], train_loss:0.085712
Epoch [1/2], Iter [3010/3125], train_loss:0.110663
Epoch [1/2], Iter [3011/3125], train_loss:0.123572
Epoch [1/2], Iter [3012/3125], train_loss:0.112289
Epoch [1/2], Iter [3013/3125], train_loss:0.129155
Epoch [1/2], Iter [3014/3125], train_loss:0.095497
Epoch [1/2], Iter [3015/3125], train_loss:0.091534
Epoch [1/2], Iter [3016/3125], train_loss:0.087610
Epoch [1/2], Iter [3017/3125], train_loss:0.129653
Epoch [1/2], Iter [3018/3125], train_loss:0.099838
Epoch [1/2], Iter [3019/3125], train_loss:0.085400
Epoch [1/2], Iter [3020/3125], train_loss:0.100788
Epoch [1/2], Iter [3021/3125], train_loss:0.091098
Epoch [1/2], Iter [3022/3125], train_loss:0.095280
Epoch [1/2], Iter [3023/3125], train_loss:0.109820
Epoch [1/2], Iter [3024/3125], train_loss:0.092585
Epoch [1/2], Iter [3025/3125], train_loss:0.098658
Epoch [1/2], Iter [3026/3125], train_loss:0.119363
Epoch [1/2], Iter [3027/3125], train_loss:0.077531
Epoch [1/2], Iter [3028/3125], train_loss:0.118906
Epoch [1/2], Iter [3029/3125], train_loss:0.123934
Epoch [1/2], Iter [3030/3125], train_loss:0.070096
Epoch [1/2], Iter [3031/3125], train_loss:0.082617
Epoch [1/2], Iter [3032/3125], train_loss:0.124859
Epoch [1/2], Iter [3033/3125], train_loss:0.099028
Epoch [1/2], Iter [3034/3125], train_loss:0.087440
Epoch [1/2], Iter [3035/3125], train_loss:0.089839
Epoch [1/2], Iter [3036/3125], train_loss:0.088460
Epoch [1/2], Iter [3037/3125], train_loss:0.070800
Epoch [1/2], Iter [3038/3125], train_loss:0.112089
Epoch [1/2], Iter [3039/3125], train_loss:0.126554
Epoch [1/2], Iter [3040/3125], train_loss:0.131252
Epoch [1/2], Iter [3041/3125], train_loss:0.112579
Epoch [1/2], Iter [3042/3125], train_loss:0.115775
Epoch [1/2], Iter [3043/3125], train_loss:0.072862
Epoch [1/2], Iter [3044/3125], train_loss:0.096823
Epoch [1/2], Iter [3045/3125], train_loss:0.118967
Epoch [1/2], Iter [3046/3125], train_loss:0.094897
Epoch [1/2], Iter [3047/3125], train_loss:0.122852
Epoch [1/2], Iter [3048/3125], train_loss:0.138975
Epoch [1/2], Iter [3049/3125], train_loss:0.085603
Epoch [1/2], Iter [3050/3125], train_loss:0.103826
Epoch [1/2], Iter [3051/3125], train_loss:0.117173
Epoch [1/2], Iter [3052/3125], train_loss:0.108605
Epoch [1/2], Iter [3053/3125], train_loss:0.083909
Epoch [1/2], Iter [3054/3125], train_loss:0.111517
Epoch [1/2], Iter [3055/3125], train_loss:0.108840
Epoch [1/2], Iter [3056/3125], train_loss:0.084882
Epoch [1/2], Iter [3057/3125], train_loss:0.112076
Epoch [1/2], Iter [3058/3125], train_loss:0.099775
Epoch [1/2], Iter [3059/3125], train_loss:0.091122
Epoch [1/2], Iter [3060/3125], train_loss:0.079662
Epoch [1/2], Iter [3061/3125], train_loss:0.112417
Epoch [1/2], Iter [3062/3125], train_loss:0.112178
Epoch [1/2], Iter [3063/3125], train_loss:0.088234
Epoch [1/2], Iter [3064/3125], train_loss:0.107721
Epoch [1/2], Iter [3065/3125], train_loss:0.104738
Epoch [1/2], Iter [3066/3125], train_loss:0.107277
Epoch [1/2], Iter [3067/3125], train_loss:0.103006
Epoch [1/2], Iter [3068/3125], train_loss:0.135107
Epoch [1/2], Iter [3069/3125], train_loss:0.091834
Epoch [1/2], Iter [3070/3125], train_loss:0.110295
Epoch [1/2], Iter [3071/3125], train_loss:0.088163
Epoch [1/2], Iter [3072/3125], train_loss:0.128199
Epoch [1/2], Iter [3073/3125], train_loss:0.113416
Epoch [1/2], Iter [3074/3125], train_loss:0.083675
Epoch [1/2], Iter [3075/3125], train_loss:0.129645
Epoch [1/2], Iter [3076/3125], train_loss:0.084569
Epoch [1/2], Iter [3077/3125], train_loss:0.121163
Epoch [1/2], Iter [3078/3125], train_loss:0.092450
Epoch [1/2], Iter [3079/3125], train_loss:0.109824
Epoch [1/2], Iter [3080/3125], train_loss:0.085952
Epoch [1/2], Iter [3081/3125], train_loss:0.111499
Epoch [1/2], Iter [3082/3125], train_loss:0.091926
Epoch [1/2], Iter [3083/3125], train_loss:0.126225
Epoch [1/2], Iter [3084/3125], train_loss:0.085915
Epoch [1/2], Iter [3085/3125], train_loss:0.086037
Epoch [1/2], Iter [3086/3125], train_loss:0.072105
Epoch [1/2], Iter [3087/3125], train_loss:0.124601
Epoch [1/2], Iter [3088/3125], train_loss:0.131993
Epoch [1/2], Iter [3089/3125], train_loss:0.113655
Epoch [1/2], Iter [3090/3125], train_loss:0.110477
Epoch [1/2], Iter [3091/3125], train_loss:0.083211
Epoch [1/2], Iter [3092/3125], train_loss:0.093807
Epoch [1/2], Iter [3093/3125], train_loss:0.104893
Epoch [1/2], Iter [3094/3125], train_loss:0.099689
Epoch [1/2], Iter [3095/3125], train_loss:0.104535
Epoch [1/2], Iter [3096/3125], train_loss:0.098604
Epoch [1/2], Iter [3097/3125], train_loss:0.099393
Epoch [1/2], Iter [3098/3125], train_loss:0.122256
Epoch [1/2], Iter [3099/3125], train_loss:0.119814
Epoch [1/2], Iter [3100/3125], train_loss:0.112334
Epoch [1/2], Iter [3101/3125], train_loss:0.123617
Epoch [1/2], Iter [3102/3125], train_loss:0.077105
Epoch [1/2], Iter [3103/3125], train_loss:0.085895
Epoch [1/2], Iter [3104/3125], train_loss:0.114813
Epoch [1/2], Iter [3105/3125], train_loss:0.062794
Epoch [1/2], Iter [3106/3125], train_loss:0.116139
Epoch [1/2], Iter [3107/3125], train_loss:0.091801
Epoch [1/2], Iter [3108/3125], train_loss:0.097065
Epoch [1/2], Iter [3109/3125], train_loss:0.102375
Epoch [1/2], Iter [3110/3125], train_loss:0.107358
Epoch [1/2], Iter [3111/3125], train_loss:0.087032
Epoch [1/2], Iter [3112/3125], train_loss:0.088426
Epoch [1/2], Iter [3113/3125], train_loss:0.096127
Epoch [1/2], Iter [3114/3125], train_loss:0.112896
Epoch [1/2], Iter [3115/3125], train_loss:0.101730
Epoch [1/2], Iter [3116/3125], train_loss:0.103965
Epoch [1/2], Iter [3117/3125], train_loss:0.091098
Epoch [1/2], Iter [3118/3125], train_loss:0.088157
Epoch [1/2], Iter [3119/3125], train_loss:0.131079
Epoch [1/2], Iter [3120/3125], train_loss:0.105961
Epoch [1/2], Iter [3121/3125], train_loss:0.102978
Epoch [1/2], Iter [3122/3125], train_loss:0.097545
Epoch [1/2], Iter [3123/3125], train_loss:0.104406
Epoch [1/2], Iter [3124/3125], train_loss:0.080545
Epoch [1/2], Iter [3125/3125], train_loss:0.093825
Epoch [1/2], train_loss:0.1192, train_acc:32.1560%, test_loss:0.0972, test_acc:43.6000%
Epoch [2/2], Iter [1/3125], train_loss:0.091757
Epoch [2/2], Iter [2/3125], train_loss:0.089473
Epoch [2/2], Iter [3/3125], train_loss:0.098204
Epoch [2/2], Iter [4/3125], train_loss:0.086418
Epoch [2/2], Iter [5/3125], train_loss:0.089171
Epoch [2/2], Iter [6/3125], train_loss:0.086089
Epoch [2/2], Iter [7/3125], train_loss:0.091749
Epoch [2/2], Iter [8/3125], train_loss:0.115198
Epoch [2/2], Iter [9/3125], train_loss:0.114494
Epoch [2/2], Iter [10/3125], train_loss:0.099484
Epoch [2/2], Iter [11/3125], train_loss:0.111296
Epoch [2/2], Iter [12/3125], train_loss:0.100909
Epoch [2/2], Iter [13/3125], train_loss:0.102753
Epoch [2/2], Iter [14/3125], train_loss:0.074273
Epoch [2/2], Iter [15/3125], train_loss:0.079903
Epoch [2/2], Iter [16/3125], train_loss:0.089634
Epoch [2/2], Iter [17/3125], train_loss:0.092798
Epoch [2/2], Iter [18/3125], train_loss:0.107158
Epoch [2/2], Iter [19/3125], train_loss:0.093421
Epoch [2/2], Iter [20/3125], train_loss:0.110199
Epoch [2/2], Iter [21/3125], train_loss:0.104135
Epoch [2/2], Iter [22/3125], train_loss:0.093904
Epoch [2/2], Iter [23/3125], train_loss:0.094700
Epoch [2/2], Iter [24/3125], train_loss:0.101966
Epoch [2/2], Iter [25/3125], train_loss:0.075299
Epoch [2/2], Iter [26/3125], train_loss:0.081989
Epoch [2/2], Iter [27/3125], train_loss:0.096282
Epoch [2/2], Iter [28/3125], train_loss:0.099643
Epoch [2/2], Iter [29/3125], train_loss:0.094669
Epoch [2/2], Iter [30/3125], train_loss:0.102238
Epoch [2/2], Iter [31/3125], train_loss:0.075263
Epoch [2/2], Iter [32/3125], train_loss:0.096392
Epoch [2/2], Iter [33/3125], train_loss:0.160338
Epoch [2/2], Iter [34/3125], train_loss:0.110569
Epoch [2/2], Iter [35/3125], train_loss:0.084512
Epoch [2/2], Iter [36/3125], train_loss:0.106845
Epoch [2/2], Iter [37/3125], train_loss:0.092132
Epoch [2/2], Iter [38/3125], train_loss:0.086252
Epoch [2/2], Iter [39/3125], train_loss:0.073865
Epoch [2/2], Iter [40/3125], train_loss:0.100261
Epoch [2/2], Iter [41/3125], train_loss:0.077646
Epoch [2/2], Iter [42/3125], train_loss:0.076397
Epoch [2/2], Iter [43/3125], train_loss:0.125353
Epoch [2/2], Iter [44/3125], train_loss:0.100842
Epoch [2/2], Iter [45/3125], train_loss:0.120180
Epoch [2/2], Iter [46/3125], train_loss:0.087662
Epoch [2/2], Iter [47/3125], train_loss:0.091548
Epoch [2/2], Iter [48/3125], train_loss:0.088386
Epoch [2/2], Iter [49/3125], train_loss:0.094382
Epoch [2/2], Iter [50/3125], train_loss:0.098175
Epoch [2/2], Iter [51/3125], train_loss:0.087562
Epoch [2/2], Iter [52/3125], train_loss:0.108904
Epoch [2/2], Iter [53/3125], train_loss:0.080077
Epoch [2/2], Iter [54/3125], train_loss:0.108177
Epoch [2/2], Iter [55/3125], train_loss:0.131528
Epoch [2/2], Iter [56/3125], train_loss:0.080193
Epoch [2/2], Iter [57/3125], train_loss:0.091726
Epoch [2/2], Iter [58/3125], train_loss:0.071657
Epoch [2/2], Iter [59/3125], train_loss:0.084843
Epoch [2/2], Iter [60/3125], train_loss:0.104012
Epoch [2/2], Iter [61/3125], train_loss:0.102972
Epoch [2/2], Iter [62/3125], train_loss:0.111907
Epoch [2/2], Iter [63/3125], train_loss:0.111772
Epoch [2/2], Iter [64/3125], train_loss:0.078473
Epoch [2/2], Iter [65/3125], train_loss:0.080929
Epoch [2/2], Iter [66/3125], train_loss:0.091896
Epoch [2/2], Iter [67/3125], train_loss:0.070422
Epoch [2/2], Iter [68/3125], train_loss:0.106302
Epoch [2/2], Iter [69/3125], train_loss:0.106870
Epoch [2/2], Iter [70/3125], train_loss:0.090615
Epoch [2/2], Iter [71/3125], train_loss:0.098871
Epoch [2/2], Iter [72/3125], train_loss:0.096799
Epoch [2/2], Iter [73/3125], train_loss:0.088088
Epoch [2/2], Iter [74/3125], train_loss:0.079056
Epoch [2/2], Iter [75/3125], train_loss:0.092176
Epoch [2/2], Iter [76/3125], train_loss:0.093906
Epoch [2/2], Iter [77/3125], train_loss:0.107949
Epoch [2/2], Iter [78/3125], train_loss:0.094350
Epoch [2/2], Iter [79/3125], train_loss:0.088256
Epoch [2/2], Iter [80/3125], train_loss:0.113880
Epoch [2/2], Iter [81/3125], train_loss:0.098161
Epoch [2/2], Iter [82/3125], train_loss:0.110207
Epoch [2/2], Iter [83/3125], train_loss:0.064564
Epoch [2/2], Iter [84/3125], train_loss:0.106611
Epoch [2/2], Iter [85/3125], train_loss:0.105607
Epoch [2/2], Iter [86/3125], train_loss:0.102875
Epoch [2/2], Iter [87/3125], train_loss:0.107462
Epoch [2/2], Iter [88/3125], train_loss:0.105881
Epoch [2/2], Iter [89/3125], train_loss:0.121599
Epoch [2/2], Iter [90/3125], train_loss:0.103243
Epoch [2/2], Iter [91/3125], train_loss:0.080412
Epoch [2/2], Iter [92/3125], train_loss:0.098595
Epoch [2/2], Iter [93/3125], train_loss:0.083234
Epoch [2/2], Iter [94/3125], train_loss:0.075366
Epoch [2/2], Iter [95/3125], train_loss:0.115272
Epoch [2/2], Iter [96/3125], train_loss:0.114001
Epoch [2/2], Iter [97/3125], train_loss:0.086412
Epoch [2/2], Iter [98/3125], train_loss:0.071845
Epoch [2/2], Iter [99/3125], train_loss:0.110689
Epoch [2/2], Iter [100/3125], train_loss:0.081002
Epoch [2/2], Iter [101/3125], train_loss:0.087105
Epoch [2/2], Iter [102/3125], train_loss:0.077709
Epoch [2/2], Iter [103/3125], train_loss:0.087919
Epoch [2/2], Iter [104/3125], train_loss:0.092537
Epoch [2/2], Iter [105/3125], train_loss:0.097107
Epoch [2/2], Iter [106/3125], train_loss:0.066073
Epoch [2/2], Iter [107/3125], train_loss:0.111412
Epoch [2/2], Iter [108/3125], train_loss:0.075312
Epoch [2/2], Iter [109/3125], train_loss:0.102170
Epoch [2/2], Iter [110/3125], train_loss:0.098995
Epoch [2/2], Iter [111/3125], train_loss:0.075082
Epoch [2/2], Iter [112/3125], train_loss:0.086781
Epoch [2/2], Iter [113/3125], train_loss:0.106837
Epoch [2/2], Iter [114/3125], train_loss:0.087139
Epoch [2/2], Iter [115/3125], train_loss:0.099929
Epoch [2/2], Iter [116/3125], train_loss:0.096372
Epoch [2/2], Iter [117/3125], train_loss:0.101554
Epoch [2/2], Iter [118/3125], train_loss:0.087905
Epoch [2/2], Iter [119/3125], train_loss:0.070996
Epoch [2/2], Iter [120/3125], train_loss:0.105514
Epoch [2/2], Iter [121/3125], train_loss:0.136283
Epoch [2/2], Iter [122/3125], train_loss:0.103037
Epoch [2/2], Iter [123/3125], train_loss:0.097136
Epoch [2/2], Iter [124/3125], train_loss:0.101025
Epoch [2/2], Iter [125/3125], train_loss:0.094171
Epoch [2/2], Iter [126/3125], train_loss:0.121665
Epoch [2/2], Iter [127/3125], train_loss:0.092011
Epoch [2/2], Iter [128/3125], train_loss:0.078880
Epoch [2/2], Iter [129/3125], train_loss:0.128020
Epoch [2/2], Iter [130/3125], train_loss:0.072524
Epoch [2/2], Iter [131/3125], train_loss:0.071616
Epoch [2/2], Iter [132/3125], train_loss:0.096712
Epoch [2/2], Iter [133/3125], train_loss:0.093573
Epoch [2/2], Iter [134/3125], train_loss:0.121898
Epoch [2/2], Iter [135/3125], train_loss:0.093081
Epoch [2/2], Iter [136/3125], train_loss:0.105385
Epoch [2/2], Iter [137/3125], train_loss:0.103334
Epoch [2/2], Iter [138/3125], train_loss:0.083012
Epoch [2/2], Iter [139/3125], train_loss:0.123903
Epoch [2/2], Iter [140/3125], train_loss:0.066876
Epoch [2/2], Iter [141/3125], train_loss:0.117363
Epoch [2/2], Iter [142/3125], train_loss:0.065998
Epoch [2/2], Iter [143/3125], train_loss:0.125768
Epoch [2/2], Iter [144/3125], train_loss:0.098320
Epoch [2/2], Iter [145/3125], train_loss:0.126339
Epoch [2/2], Iter [146/3125], train_loss:0.106683
Epoch [2/2], Iter [147/3125], train_loss:0.108368
Epoch [2/2], Iter [148/3125], train_loss:0.068419
Epoch [2/2], Iter [149/3125], train_loss:0.116456
Epoch [2/2], Iter [150/3125], train_loss:0.089318
Epoch [2/2], Iter [151/3125], train_loss:0.088393
Epoch [2/2], Iter [152/3125], train_loss:0.078417
Epoch [2/2], Iter [153/3125], train_loss:0.090817
Epoch [2/2], Iter [154/3125], train_loss:0.141944
Epoch [2/2], Iter [155/3125], train_loss:0.112521
Epoch [2/2], Iter [156/3125], train_loss:0.105626
Epoch [2/2], Iter [157/3125], train_loss:0.101277
Epoch [2/2], Iter [158/3125], train_loss:0.108937
Epoch [2/2], Iter [159/3125], train_loss:0.114222
Epoch [2/2], Iter [160/3125], train_loss:0.109539
Epoch [2/2], Iter [161/3125], train_loss:0.095470
Epoch [2/2], Iter [162/3125], train_loss:0.083049
Epoch [2/2], Iter [163/3125], train_loss:0.130008
Epoch [2/2], Iter [164/3125], train_loss:0.108459
Epoch [2/2], Iter [165/3125], train_loss:0.103473
Epoch [2/2], Iter [166/3125], train_loss:0.102036
Epoch [2/2], Iter [167/3125], train_loss:0.071162
Epoch [2/2], Iter [168/3125], train_loss:0.107491
Epoch [2/2], Iter [169/3125], train_loss:0.098133
Epoch [2/2], Iter [170/3125], train_loss:0.096314
Epoch [2/2], Iter [171/3125], train_loss:0.073051
Epoch [2/2], Iter [172/3125], train_loss:0.155295
Epoch [2/2], Iter [173/3125], train_loss:0.134648
Epoch [2/2], Iter [174/3125], train_loss:0.096655
Epoch [2/2], Iter [175/3125], train_loss:0.123550
Epoch [2/2], Iter [176/3125], train_loss:0.089275
Epoch [2/2], Iter [177/3125], train_loss:0.100006
Epoch [2/2], Iter [178/3125], train_loss:0.111296
Epoch [2/2], Iter [179/3125], train_loss:0.097870
Epoch [2/2], Iter [180/3125], train_loss:0.091881
Epoch [2/2], Iter [181/3125], train_loss:0.099951
Epoch [2/2], Iter [182/3125], train_loss:0.069839
Epoch [2/2], Iter [183/3125], train_loss:0.137639
Epoch [2/2], Iter [184/3125], train_loss:0.108209
Epoch [2/2], Iter [185/3125], train_loss:0.070030
Epoch [2/2], Iter [186/3125], train_loss:0.086371
Epoch [2/2], Iter [187/3125], train_loss:0.107254
Epoch [2/2], Iter [188/3125], train_loss:0.130238
Epoch [2/2], Iter [189/3125], train_loss:0.073020
Epoch [2/2], Iter [190/3125], train_loss:0.115974
Epoch [2/2], Iter [191/3125], train_loss:0.089619
Epoch [2/2], Iter [192/3125], train_loss:0.070109
Epoch [2/2], Iter [193/3125], train_loss:0.104231
Epoch [2/2], Iter [194/3125], train_loss:0.108008
Epoch [2/2], Iter [195/3125], train_loss:0.094901
Epoch [2/2], Iter [196/3125], train_loss:0.075482
Epoch [2/2], Iter [197/3125], train_loss:0.103212
Epoch [2/2], Iter [198/3125], train_loss:0.122867
Epoch [2/2], Iter [199/3125], train_loss:0.103050
Epoch [2/2], Iter [200/3125], train_loss:0.105274
Epoch [2/2], Iter [201/3125], train_loss:0.091650
Epoch [2/2], Iter [202/3125], train_loss:0.096133
Epoch [2/2], Iter [203/3125], train_loss:0.087355
Epoch [2/2], Iter [204/3125], train_loss:0.158490
Epoch [2/2], Iter [205/3125], train_loss:0.108093
Epoch [2/2], Iter [206/3125], train_loss:0.096270
Epoch [2/2], Iter [207/3125], train_loss:0.084289
Epoch [2/2], Iter [208/3125], train_loss:0.132000
Epoch [2/2], Iter [209/3125], train_loss:0.121158
Epoch [2/2], Iter [210/3125], train_loss:0.094302
Epoch [2/2], Iter [211/3125], train_loss:0.124608
Epoch [2/2], Iter [212/3125], train_loss:0.089951
Epoch [2/2], Iter [213/3125], train_loss:0.088527
Epoch [2/2], Iter [214/3125], train_loss:0.083720
Epoch [2/2], Iter [215/3125], train_loss:0.101040
Epoch [2/2], Iter [216/3125], train_loss:0.083461
Epoch [2/2], Iter [217/3125], train_loss:0.081839
Epoch [2/2], Iter [218/3125], train_loss:0.076899
Epoch [2/2], Iter [219/3125], train_loss:0.094987
Epoch [2/2], Iter [220/3125], train_loss:0.071407
Epoch [2/2], Iter [221/3125], train_loss:0.062804
Epoch [2/2], Iter [222/3125], train_loss:0.086016
Epoch [2/2], Iter [223/3125], train_loss:0.148447
Epoch [2/2], Iter [224/3125], train_loss:0.092807
Epoch [2/2], Iter [225/3125], train_loss:0.098514
Epoch [2/2], Iter [226/3125], train_loss:0.079975
Epoch [2/2], Iter [227/3125], train_loss:0.103644
Epoch [2/2], Iter [228/3125], train_loss:0.097163
Epoch [2/2], Iter [229/3125], train_loss:0.116863
Epoch [2/2], Iter [230/3125], train_loss:0.115765
Epoch [2/2], Iter [231/3125], train_loss:0.083744
Epoch [2/2], Iter [232/3125], train_loss:0.092721
Epoch [2/2], Iter [233/3125], train_loss:0.083652
Epoch [2/2], Iter [234/3125], train_loss:0.107158
Epoch [2/2], Iter [235/3125], train_loss:0.097513
Epoch [2/2], Iter [236/3125], train_loss:0.098046
Epoch [2/2], Iter [237/3125], train_loss:0.102731
Epoch [2/2], Iter [238/3125], train_loss:0.108409
Epoch [2/2], Iter [239/3125], train_loss:0.084167
Epoch [2/2], Iter [240/3125], train_loss:0.115512
Epoch [2/2], Iter [241/3125], train_loss:0.090877
Epoch [2/2], Iter [242/3125], train_loss:0.111257
Epoch [2/2], Iter [243/3125], train_loss:0.099199
Epoch [2/2], Iter [244/3125], train_loss:0.127728
Epoch [2/2], Iter [245/3125], train_loss:0.092669
Epoch [2/2], Iter [246/3125], train_loss:0.091938
Epoch [2/2], Iter [247/3125], train_loss:0.117714
Epoch [2/2], Iter [248/3125], train_loss:0.103751
Epoch [2/2], Iter [249/3125], train_loss:0.086651
Epoch [2/2], Iter [250/3125], train_loss:0.107576
Epoch [2/2], Iter [251/3125], train_loss:0.106203
Epoch [2/2], Iter [252/3125], train_loss:0.087378
Epoch [2/2], Iter [253/3125], train_loss:0.115751
Epoch [2/2], Iter [254/3125], train_loss:0.132171
Epoch [2/2], Iter [255/3125], train_loss:0.078951
Epoch [2/2], Iter [256/3125], train_loss:0.080014
Epoch [2/2], Iter [257/3125], train_loss:0.134510
Epoch [2/2], Iter [258/3125], train_loss:0.137065
Epoch [2/2], Iter [259/3125], train_loss:0.104432
Epoch [2/2], Iter [260/3125], train_loss:0.106487
Epoch [2/2], Iter [261/3125], train_loss:0.069835
Epoch [2/2], Iter [262/3125], train_loss:0.094925
Epoch [2/2], Iter [263/3125], train_loss:0.094040
Epoch [2/2], Iter [264/3125], train_loss:0.093047
Epoch [2/2], Iter [265/3125], train_loss:0.079012
Epoch [2/2], Iter [266/3125], train_loss:0.098503
Epoch [2/2], Iter [267/3125], train_loss:0.105267
Epoch [2/2], Iter [268/3125], train_loss:0.087947
Epoch [2/2], Iter [269/3125], train_loss:0.078582
Epoch [2/2], Iter [270/3125], train_loss:0.090194
Epoch [2/2], Iter [271/3125], train_loss:0.077623
Epoch [2/2], Iter [272/3125], train_loss:0.072486
Epoch [2/2], Iter [273/3125], train_loss:0.106877
Epoch [2/2], Iter [274/3125], train_loss:0.093605
Epoch [2/2], Iter [275/3125], train_loss:0.095765
Epoch [2/2], Iter [276/3125], train_loss:0.073483
Epoch [2/2], Iter [277/3125], train_loss:0.105748
Epoch [2/2], Iter [278/3125], train_loss:0.115098
Epoch [2/2], Iter [279/3125], train_loss:0.101363
Epoch [2/2], Iter [280/3125], train_loss:0.094877
Epoch [2/2], Iter [281/3125], train_loss:0.077018
Epoch [2/2], Iter [282/3125], train_loss:0.142760
Epoch [2/2], Iter [283/3125], train_loss:0.083268
Epoch [2/2], Iter [284/3125], train_loss:0.091778
Epoch [2/2], Iter [285/3125], train_loss:0.100697
Epoch [2/2], Iter [286/3125], train_loss:0.061429
Epoch [2/2], Iter [287/3125], train_loss:0.103810
Epoch [2/2], Iter [288/3125], train_loss:0.074329
Epoch [2/2], Iter [289/3125], train_loss:0.086135
Epoch [2/2], Iter [290/3125], train_loss:0.052865
Epoch [2/2], Iter [291/3125], train_loss:0.064886
Epoch [2/2], Iter [292/3125], train_loss:0.083900
Epoch [2/2], Iter [293/3125], train_loss:0.109142
Epoch [2/2], Iter [294/3125], train_loss:0.092724
Epoch [2/2], Iter [295/3125], train_loss:0.120955
Epoch [2/2], Iter [296/3125], train_loss:0.083090
Epoch [2/2], Iter [297/3125], train_loss:0.086837
Epoch [2/2], Iter [298/3125], train_loss:0.080210
Epoch [2/2], Iter [299/3125], train_loss:0.091169
Epoch [2/2], Iter [300/3125], train_loss:0.096427
Epoch [2/2], Iter [301/3125], train_loss:0.120840
Epoch [2/2], Iter [302/3125], train_loss:0.068802
Epoch [2/2], Iter [303/3125], train_loss:0.083719
Epoch [2/2], Iter [304/3125], train_loss:0.115758
Epoch [2/2], Iter [305/3125], train_loss:0.100274
Epoch [2/2], Iter [306/3125], train_loss:0.110705
Epoch [2/2], Iter [307/3125], train_loss:0.106541
Epoch [2/2], Iter [308/3125], train_loss:0.088817
Epoch [2/2], Iter [309/3125], train_loss:0.102153
Epoch [2/2], Iter [310/3125], train_loss:0.097295
Epoch [2/2], Iter [311/3125], train_loss:0.081824
Epoch [2/2], Iter [312/3125], train_loss:0.068557
Epoch [2/2], Iter [313/3125], train_loss:0.117271
Epoch [2/2], Iter [314/3125], train_loss:0.060042
Epoch [2/2], Iter [315/3125], train_loss:0.080024
Epoch [2/2], Iter [316/3125], train_loss:0.065119
Epoch [2/2], Iter [317/3125], train_loss:0.083336
Epoch [2/2], Iter [318/3125], train_loss:0.111622
Epoch [2/2], Iter [319/3125], train_loss:0.093300
Epoch [2/2], Iter [320/3125], train_loss:0.092461
Epoch [2/2], Iter [321/3125], train_loss:0.081172
Epoch [2/2], Iter [322/3125], train_loss:0.095288
Epoch [2/2], Iter [323/3125], train_loss:0.076318
Epoch [2/2], Iter [324/3125], train_loss:0.079046
Epoch [2/2], Iter [325/3125], train_loss:0.116439
Epoch [2/2], Iter [326/3125], train_loss:0.081611
Epoch [2/2], Iter [327/3125], train_loss:0.089972
Epoch [2/2], Iter [328/3125], train_loss:0.078858
Epoch [2/2], Iter [329/3125], train_loss:0.087226
Epoch [2/2], Iter [330/3125], train_loss:0.091323
Epoch [2/2], Iter [331/3125], train_loss:0.076480
Epoch [2/2], Iter [332/3125], train_loss:0.104710
Epoch [2/2], Iter [333/3125], train_loss:0.127592
Epoch [2/2], Iter [334/3125], train_loss:0.091593
Epoch [2/2], Iter [335/3125], train_loss:0.079174
Epoch [2/2], Iter [336/3125], train_loss:0.103978
Epoch [2/2], Iter [337/3125], train_loss:0.096624
Epoch [2/2], Iter [338/3125], train_loss:0.103828
Epoch [2/2], Iter [339/3125], train_loss:0.120061
Epoch [2/2], Iter [340/3125], train_loss:0.137862
Epoch [2/2], Iter [341/3125], train_loss:0.083696
Epoch [2/2], Iter [342/3125], train_loss:0.120275
Epoch [2/2], Iter [343/3125], train_loss:0.070729
Epoch [2/2], Iter [344/3125], train_loss:0.071022
Epoch [2/2], Iter [345/3125], train_loss:0.113541
Epoch [2/2], Iter [346/3125], train_loss:0.155338
Epoch [2/2], Iter [347/3125], train_loss:0.089502
Epoch [2/2], Iter [348/3125], train_loss:0.102329
Epoch [2/2], Iter [349/3125], train_loss:0.088657
Epoch [2/2], Iter [350/3125], train_loss:0.099869
Epoch [2/2], Iter [351/3125], train_loss:0.100885
Epoch [2/2], Iter [352/3125], train_loss:0.076961
Epoch [2/2], Iter [353/3125], train_loss:0.093844
Epoch [2/2], Iter [354/3125], train_loss:0.091456
Epoch [2/2], Iter [355/3125], train_loss:0.083950
Epoch [2/2], Iter [356/3125], train_loss:0.083916
Epoch [2/2], Iter [357/3125], train_loss:0.106248
Epoch [2/2], Iter [358/3125], train_loss:0.096157
Epoch [2/2], Iter [359/3125], train_loss:0.064131
Epoch [2/2], Iter [360/3125], train_loss:0.084503
Epoch [2/2], Iter [361/3125], train_loss:0.103175
Epoch [2/2], Iter [362/3125], train_loss:0.102201
Epoch [2/2], Iter [363/3125], train_loss:0.078447
Epoch [2/2], Iter [364/3125], train_loss:0.106766
Epoch [2/2], Iter [365/3125], train_loss:0.072494
Epoch [2/2], Iter [366/3125], train_loss:0.097293
Epoch [2/2], Iter [367/3125], train_loss:0.080583
Epoch [2/2], Iter [368/3125], train_loss:0.084713
Epoch [2/2], Iter [369/3125], train_loss:0.098219
Epoch [2/2], Iter [370/3125], train_loss:0.099100
Epoch [2/2], Iter [371/3125], train_loss:0.071372
Epoch [2/2], Iter [372/3125], train_loss:0.120350
Epoch [2/2], Iter [373/3125], train_loss:0.088132
Epoch [2/2], Iter [374/3125], train_loss:0.092861
Epoch [2/2], Iter [375/3125], train_loss:0.077495
Epoch [2/2], Iter [376/3125], train_loss:0.081694
Epoch [2/2], Iter [377/3125], train_loss:0.100780
Epoch [2/2], Iter [378/3125], train_loss:0.093676
Epoch [2/2], Iter [379/3125], train_loss:0.095345
Epoch [2/2], Iter [380/3125], train_loss:0.118269
Epoch [2/2], Iter [381/3125], train_loss:0.088016
Epoch [2/2], Iter [382/3125], train_loss:0.069193
Epoch [2/2], Iter [383/3125], train_loss:0.080610
Epoch [2/2], Iter [384/3125], train_loss:0.086855
Epoch [2/2], Iter [385/3125], train_loss:0.107553
Epoch [2/2], Iter [386/3125], train_loss:0.099464
Epoch [2/2], Iter [387/3125], train_loss:0.133794
Epoch [2/2], Iter [388/3125], train_loss:0.070247
Epoch [2/2], Iter [389/3125], train_loss:0.105525
Epoch [2/2], Iter [390/3125], train_loss:0.090175
Epoch [2/2], Iter [391/3125], train_loss:0.086423
Epoch [2/2], Iter [392/3125], train_loss:0.114725
Epoch [2/2], Iter [393/3125], train_loss:0.097274
Epoch [2/2], Iter [394/3125], train_loss:0.074110
Epoch [2/2], Iter [395/3125], train_loss:0.083200
Epoch [2/2], Iter [396/3125], train_loss:0.097351
Epoch [2/2], Iter [397/3125], train_loss:0.074531
Epoch [2/2], Iter [398/3125], train_loss:0.074331
Epoch [2/2], Iter [399/3125], train_loss:0.103247
Epoch [2/2], Iter [400/3125], train_loss:0.098521
Epoch [2/2], Iter [401/3125], train_loss:0.090921
Epoch [2/2], Iter [402/3125], train_loss:0.112154
Epoch [2/2], Iter [403/3125], train_loss:0.070519
Epoch [2/2], Iter [404/3125], train_loss:0.089387
Epoch [2/2], Iter [405/3125], train_loss:0.107348
Epoch [2/2], Iter [406/3125], train_loss:0.105891
Epoch [2/2], Iter [407/3125], train_loss:0.144931
Epoch [2/2], Iter [408/3125], train_loss:0.082374
Epoch [2/2], Iter [409/3125], train_loss:0.129991
Epoch [2/2], Iter [410/3125], train_loss:0.103775
Epoch [2/2], Iter [411/3125], train_loss:0.108515
Epoch [2/2], Iter [412/3125], train_loss:0.100185
Epoch [2/2], Iter [413/3125], train_loss:0.084244
Epoch [2/2], Iter [414/3125], train_loss:0.109758
Epoch [2/2], Iter [415/3125], train_loss:0.076957
Epoch [2/2], Iter [416/3125], train_loss:0.092599
Epoch [2/2], Iter [417/3125], train_loss:0.078856
Epoch [2/2], Iter [418/3125], train_loss:0.069968
Epoch [2/2], Iter [419/3125], train_loss:0.090139
Epoch [2/2], Iter [420/3125], train_loss:0.062768
Epoch [2/2], Iter [421/3125], train_loss:0.079735
Epoch [2/2], Iter [422/3125], train_loss:0.107121
Epoch [2/2], Iter [423/3125], train_loss:0.145370
Epoch [2/2], Iter [424/3125], train_loss:0.079752
Epoch [2/2], Iter [425/3125], train_loss:0.132153
Epoch [2/2], Iter [426/3125], train_loss:0.083394
Epoch [2/2], Iter [427/3125], train_loss:0.081933
Epoch [2/2], Iter [428/3125], train_loss:0.098451
Epoch [2/2], Iter [429/3125], train_loss:0.106545
Epoch [2/2], Iter [430/3125], train_loss:0.110436
Epoch [2/2], Iter [431/3125], train_loss:0.079092
Epoch [2/2], Iter [432/3125], train_loss:0.090842
Epoch [2/2], Iter [433/3125], train_loss:0.094455
Epoch [2/2], Iter [434/3125], train_loss:0.083076
Epoch [2/2], Iter [435/3125], train_loss:0.098882
Epoch [2/2], Iter [436/3125], train_loss:0.124126
Epoch [2/2], Iter [437/3125], train_loss:0.099700
Epoch [2/2], Iter [438/3125], train_loss:0.092618
Epoch [2/2], Iter [439/3125], train_loss:0.092783
Epoch [2/2], Iter [440/3125], train_loss:0.134112
Epoch [2/2], Iter [441/3125], train_loss:0.084922
Epoch [2/2], Iter [442/3125], train_loss:0.118824
Epoch [2/2], Iter [443/3125], train_loss:0.103315
Epoch [2/2], Iter [444/3125], train_loss:0.125121
Epoch [2/2], Iter [445/3125], train_loss:0.133544
Epoch [2/2], Iter [446/3125], train_loss:0.110311
Epoch [2/2], Iter [447/3125], train_loss:0.083451
Epoch [2/2], Iter [448/3125], train_loss:0.110809
Epoch [2/2], Iter [449/3125], train_loss:0.097352
Epoch [2/2], Iter [450/3125], train_loss:0.094873
Epoch [2/2], Iter [451/3125], train_loss:0.109798
Epoch [2/2], Iter [452/3125], train_loss:0.108717
Epoch [2/2], Iter [453/3125], train_loss:0.091716
Epoch [2/2], Iter [454/3125], train_loss:0.090690
Epoch [2/2], Iter [455/3125], train_loss:0.094762
Epoch [2/2], Iter [456/3125], train_loss:0.111473
Epoch [2/2], Iter [457/3125], train_loss:0.103065
Epoch [2/2], Iter [458/3125], train_loss:0.108422
Epoch [2/2], Iter [459/3125], train_loss:0.102061
Epoch [2/2], Iter [460/3125], train_loss:0.102083
Epoch [2/2], Iter [461/3125], train_loss:0.109224
Epoch [2/2], Iter [462/3125], train_loss:0.104043
Epoch [2/2], Iter [463/3125], train_loss:0.065878
Epoch [2/2], Iter [464/3125], train_loss:0.091389
Epoch [2/2], Iter [465/3125], train_loss:0.115812
Epoch [2/2], Iter [466/3125], train_loss:0.118369
Epoch [2/2], Iter [467/3125], train_loss:0.068617
Epoch [2/2], Iter [468/3125], train_loss:0.088816
Epoch [2/2], Iter [469/3125], train_loss:0.150452
Epoch [2/2], Iter [470/3125], train_loss:0.059345
Epoch [2/2], Iter [471/3125], train_loss:0.066618
Epoch [2/2], Iter [472/3125], train_loss:0.102710
Epoch [2/2], Iter [473/3125], train_loss:0.075018
Epoch [2/2], Iter [474/3125], train_loss:0.122839
Epoch [2/2], Iter [475/3125], train_loss:0.114021
Epoch [2/2], Iter [476/3125], train_loss:0.086777
Epoch [2/2], Iter [477/3125], train_loss:0.102576
Epoch [2/2], Iter [478/3125], train_loss:0.078979
Epoch [2/2], Iter [479/3125], train_loss:0.097320
Epoch [2/2], Iter [480/3125], train_loss:0.112049
Epoch [2/2], Iter [481/3125], train_loss:0.097673
Epoch [2/2], Iter [482/3125], train_loss:0.103756
Epoch [2/2], Iter [483/3125], train_loss:0.085546
Epoch [2/2], Iter [484/3125], train_loss:0.134447
Epoch [2/2], Iter [485/3125], train_loss:0.081610
Epoch [2/2], Iter [486/3125], train_loss:0.113824
Epoch [2/2], Iter [487/3125], train_loss:0.079254
Epoch [2/2], Iter [488/3125], train_loss:0.098650
Epoch [2/2], Iter [489/3125], train_loss:0.108382
Epoch [2/2], Iter [490/3125], train_loss:0.076616
Epoch [2/2], Iter [491/3125], train_loss:0.085238
Epoch [2/2], Iter [492/3125], train_loss:0.135156
Epoch [2/2], Iter [493/3125], train_loss:0.090402
Epoch [2/2], Iter [494/3125], train_loss:0.106814
Epoch [2/2], Iter [495/3125], train_loss:0.088576
Epoch [2/2], Iter [496/3125], train_loss:0.104555
Epoch [2/2], Iter [497/3125], train_loss:0.088838
Epoch [2/2], Iter [498/3125], train_loss:0.103274
Epoch [2/2], Iter [499/3125], train_loss:0.104177
Epoch [2/2], Iter [500/3125], train_loss:0.077060
Epoch [2/2], Iter [501/3125], train_loss:0.071030
Epoch [2/2], Iter [502/3125], train_loss:0.105627
Epoch [2/2], Iter [503/3125], train_loss:0.068488
Epoch [2/2], Iter [504/3125], train_loss:0.067340
Epoch [2/2], Iter [505/3125], train_loss:0.101247
Epoch [2/2], Iter [506/3125], train_loss:0.120195
Epoch [2/2], Iter [507/3125], train_loss:0.096677
Epoch [2/2], Iter [508/3125], train_loss:0.093882
Epoch [2/2], Iter [509/3125], train_loss:0.097796
Epoch [2/2], Iter [510/3125], train_loss:0.109570
Epoch [2/2], Iter [511/3125], train_loss:0.117683
Epoch [2/2], Iter [512/3125], train_loss:0.152239
Epoch [2/2], Iter [513/3125], train_loss:0.110212
Epoch [2/2], Iter [514/3125], train_loss:0.112285
Epoch [2/2], Iter [515/3125], train_loss:0.114113
Epoch [2/2], Iter [516/3125], train_loss:0.114004
Epoch [2/2], Iter [517/3125], train_loss:0.102815
Epoch [2/2], Iter [518/3125], train_loss:0.143307
Epoch [2/2], Iter [519/3125], train_loss:0.093839
Epoch [2/2], Iter [520/3125], train_loss:0.082347
Epoch [2/2], Iter [521/3125], train_loss:0.065753
Epoch [2/2], Iter [522/3125], train_loss:0.070755
Epoch [2/2], Iter [523/3125], train_loss:0.083399
Epoch [2/2], Iter [524/3125], train_loss:0.107254
Epoch [2/2], Iter [525/3125], train_loss:0.107849
Epoch [2/2], Iter [526/3125], train_loss:0.109029
Epoch [2/2], Iter [527/3125], train_loss:0.073447
Epoch [2/2], Iter [528/3125], train_loss:0.121817
Epoch [2/2], Iter [529/3125], train_loss:0.104663
Epoch [2/2], Iter [530/3125], train_loss:0.094757
Epoch [2/2], Iter [531/3125], train_loss:0.116653
Epoch [2/2], Iter [532/3125], train_loss:0.086909
Epoch [2/2], Iter [533/3125], train_loss:0.111515
Epoch [2/2], Iter [534/3125], train_loss:0.075181
Epoch [2/2], Iter [535/3125], train_loss:0.084049
Epoch [2/2], Iter [536/3125], train_loss:0.156880
Epoch [2/2], Iter [537/3125], train_loss:0.090378
Epoch [2/2], Iter [538/3125], train_loss:0.120230
Epoch [2/2], Iter [539/3125], train_loss:0.064216
Epoch [2/2], Iter [540/3125], train_loss:0.132715
Epoch [2/2], Iter [541/3125], train_loss:0.131452
Epoch [2/2], Iter [542/3125], train_loss:0.101619
Epoch [2/2], Iter [543/3125], train_loss:0.100493
Epoch [2/2], Iter [544/3125], train_loss:0.092979
Epoch [2/2], Iter [545/3125], train_loss:0.080726
Epoch [2/2], Iter [546/3125], train_loss:0.091035
Epoch [2/2], Iter [547/3125], train_loss:0.100602
Epoch [2/2], Iter [548/3125], train_loss:0.094904
Epoch [2/2], Iter [549/3125], train_loss:0.121439
Epoch [2/2], Iter [550/3125], train_loss:0.121370
Epoch [2/2], Iter [551/3125], train_loss:0.083671
Epoch [2/2], Iter [552/3125], train_loss:0.110429
Epoch [2/2], Iter [553/3125], train_loss:0.105332
Epoch [2/2], Iter [554/3125], train_loss:0.087366
Epoch [2/2], Iter [555/3125], train_loss:0.082316
Epoch [2/2], Iter [556/3125], train_loss:0.099882
Epoch [2/2], Iter [557/3125], train_loss:0.111021
Epoch [2/2], Iter [558/3125], train_loss:0.077680
Epoch [2/2], Iter [559/3125], train_loss:0.109377
Epoch [2/2], Iter [560/3125], train_loss:0.080302
Epoch [2/2], Iter [561/3125], train_loss:0.105432
Epoch [2/2], Iter [562/3125], train_loss:0.113619
Epoch [2/2], Iter [563/3125], train_loss:0.094069
Epoch [2/2], Iter [564/3125], train_loss:0.096262
Epoch [2/2], Iter [565/3125], train_loss:0.084621
Epoch [2/2], Iter [566/3125], train_loss:0.124599
Epoch [2/2], Iter [567/3125], train_loss:0.124162
Epoch [2/2], Iter [568/3125], train_loss:0.106849
Epoch [2/2], Iter [569/3125], train_loss:0.100125
Epoch [2/2], Iter [570/3125], train_loss:0.097650
Epoch [2/2], Iter [571/3125], train_loss:0.079950
Epoch [2/2], Iter [572/3125], train_loss:0.124764
Epoch [2/2], Iter [573/3125], train_loss:0.087671
Epoch [2/2], Iter [574/3125], train_loss:0.106283
Epoch [2/2], Iter [575/3125], train_loss:0.063669
Epoch [2/2], Iter [576/3125], train_loss:0.109756
Epoch [2/2], Iter [577/3125], train_loss:0.076927
Epoch [2/2], Iter [578/3125], train_loss:0.089796
Epoch [2/2], Iter [579/3125], train_loss:0.091205
Epoch [2/2], Iter [580/3125], train_loss:0.083034
Epoch [2/2], Iter [581/3125], train_loss:0.084445
Epoch [2/2], Iter [582/3125], train_loss:0.101539
Epoch [2/2], Iter [583/3125], train_loss:0.098867
Epoch [2/2], Iter [584/3125], train_loss:0.113716
Epoch [2/2], Iter [585/3125], train_loss:0.071058
Epoch [2/2], Iter [586/3125], train_loss:0.098496
Epoch [2/2], Iter [587/3125], train_loss:0.108242
Epoch [2/2], Iter [588/3125], train_loss:0.092561
Epoch [2/2], Iter [589/3125], train_loss:0.074094
Epoch [2/2], Iter [590/3125], train_loss:0.097281
Epoch [2/2], Iter [591/3125], train_loss:0.087513
Epoch [2/2], Iter [592/3125], train_loss:0.086917
Epoch [2/2], Iter [593/3125], train_loss:0.126143
Epoch [2/2], Iter [594/3125], train_loss:0.104166
Epoch [2/2], Iter [595/3125], train_loss:0.095785
Epoch [2/2], Iter [596/3125], train_loss:0.096451
Epoch [2/2], Iter [597/3125], train_loss:0.112868
Epoch [2/2], Iter [598/3125], train_loss:0.091374
Epoch [2/2], Iter [599/3125], train_loss:0.111677
Epoch [2/2], Iter [600/3125], train_loss:0.096349
Epoch [2/2], Iter [601/3125], train_loss:0.076007
Epoch [2/2], Iter [602/3125], train_loss:0.100855
Epoch [2/2], Iter [603/3125], train_loss:0.081808
Epoch [2/2], Iter [604/3125], train_loss:0.087975
Epoch [2/2], Iter [605/3125], train_loss:0.074303
Epoch [2/2], Iter [606/3125], train_loss:0.119068
Epoch [2/2], Iter [607/3125], train_loss:0.069057
Epoch [2/2], Iter [608/3125], train_loss:0.081503
Epoch [2/2], Iter [609/3125], train_loss:0.113753
Epoch [2/2], Iter [610/3125], train_loss:0.091258
Epoch [2/2], Iter [611/3125], train_loss:0.075648
Epoch [2/2], Iter [612/3125], train_loss:0.086869
Epoch [2/2], Iter [613/3125], train_loss:0.085953
Epoch [2/2], Iter [614/3125], train_loss:0.083555
Epoch [2/2], Iter [615/3125], train_loss:0.068392
Epoch [2/2], Iter [616/3125], train_loss:0.082908
Epoch [2/2], Iter [617/3125], train_loss:0.097139
Epoch [2/2], Iter [618/3125], train_loss:0.088849
Epoch [2/2], Iter [619/3125], train_loss:0.114269
Epoch [2/2], Iter [620/3125], train_loss:0.096929
Epoch [2/2], Iter [621/3125], train_loss:0.099542
Epoch [2/2], Iter [622/3125], train_loss:0.089321
Epoch [2/2], Iter [623/3125], train_loss:0.123321
Epoch [2/2], Iter [624/3125], train_loss:0.079554
Epoch [2/2], Iter [625/3125], train_loss:0.082541
Epoch [2/2], Iter [626/3125], train_loss:0.085805
Epoch [2/2], Iter [627/3125], train_loss:0.116099
Epoch [2/2], Iter [628/3125], train_loss:0.062045
Epoch [2/2], Iter [629/3125], train_loss:0.093665
Epoch [2/2], Iter [630/3125], train_loss:0.096117
Epoch [2/2], Iter [631/3125], train_loss:0.120881
Epoch [2/2], Iter [632/3125], train_loss:0.086188
Epoch [2/2], Iter [633/3125], train_loss:0.090466
Epoch [2/2], Iter [634/3125], train_loss:0.109846
Epoch [2/2], Iter [635/3125], train_loss:0.098191
Epoch [2/2], Iter [636/3125], train_loss:0.101009
Epoch [2/2], Iter [637/3125], train_loss:0.072900
Epoch [2/2], Iter [638/3125], train_loss:0.122198
Epoch [2/2], Iter [639/3125], train_loss:0.110124
Epoch [2/2], Iter [640/3125], train_loss:0.085853
Epoch [2/2], Iter [641/3125], train_loss:0.110393
Epoch [2/2], Iter [642/3125], train_loss:0.105882
Epoch [2/2], Iter [643/3125], train_loss:0.099858
Epoch [2/2], Iter [644/3125], train_loss:0.106550
Epoch [2/2], Iter [645/3125], train_loss:0.093056
Epoch [2/2], Iter [646/3125], train_loss:0.108176
Epoch [2/2], Iter [647/3125], train_loss:0.107052
Epoch [2/2], Iter [648/3125], train_loss:0.083282
Epoch [2/2], Iter [649/3125], train_loss:0.069446
Epoch [2/2], Iter [650/3125], train_loss:0.101450
Epoch [2/2], Iter [651/3125], train_loss:0.086882
Epoch [2/2], Iter [652/3125], train_loss:0.084529
Epoch [2/2], Iter [653/3125], train_loss:0.091770
Epoch [2/2], Iter [654/3125], train_loss:0.079131
Epoch [2/2], Iter [655/3125], train_loss:0.120871
Epoch [2/2], Iter [656/3125], train_loss:0.091773
Epoch [2/2], Iter [657/3125], train_loss:0.104853
Epoch [2/2], Iter [658/3125], train_loss:0.095033
Epoch [2/2], Iter [659/3125], train_loss:0.095691
Epoch [2/2], Iter [660/3125], train_loss:0.108144
Epoch [2/2], Iter [661/3125], train_loss:0.092027
Epoch [2/2], Iter [662/3125], train_loss:0.071688
Epoch [2/2], Iter [663/3125], train_loss:0.099780
Epoch [2/2], Iter [664/3125], train_loss:0.084860
Epoch [2/2], Iter [665/3125], train_loss:0.081114
Epoch [2/2], Iter [666/3125], train_loss:0.086606
Epoch [2/2], Iter [667/3125], train_loss:0.099935
Epoch [2/2], Iter [668/3125], train_loss:0.108894
Epoch [2/2], Iter [669/3125], train_loss:0.080974
Epoch [2/2], Iter [670/3125], train_loss:0.087669
Epoch [2/2], Iter [671/3125], train_loss:0.104220
Epoch [2/2], Iter [672/3125], train_loss:0.098142
Epoch [2/2], Iter [673/3125], train_loss:0.118249
Epoch [2/2], Iter [674/3125], train_loss:0.104474
Epoch [2/2], Iter [675/3125], train_loss:0.105180
Epoch [2/2], Iter [676/3125], train_loss:0.116002
Epoch [2/2], Iter [677/3125], train_loss:0.086220
Epoch [2/2], Iter [678/3125], train_loss:0.119653
Epoch [2/2], Iter [679/3125], train_loss:0.119114
Epoch [2/2], Iter [680/3125], train_loss:0.093704
Epoch [2/2], Iter [681/3125], train_loss:0.082358
Epoch [2/2], Iter [682/3125], train_loss:0.068221
Epoch [2/2], Iter [683/3125], train_loss:0.073008
Epoch [2/2], Iter [684/3125], train_loss:0.062461
Epoch [2/2], Iter [685/3125], train_loss:0.053876
Epoch [2/2], Iter [686/3125], train_loss:0.112768
Epoch [2/2], Iter [687/3125], train_loss:0.087811
Epoch [2/2], Iter [688/3125], train_loss:0.087970
Epoch [2/2], Iter [689/3125], train_loss:0.126323
Epoch [2/2], Iter [690/3125], train_loss:0.060084
Epoch [2/2], Iter [691/3125], train_loss:0.071475
Epoch [2/2], Iter [692/3125], train_loss:0.077108
Epoch [2/2], Iter [693/3125], train_loss:0.090324
Epoch [2/2], Iter [694/3125], train_loss:0.115778
Epoch [2/2], Iter [695/3125], train_loss:0.091183
Epoch [2/2], Iter [696/3125], train_loss:0.105349
Epoch [2/2], Iter [697/3125], train_loss:0.110092
Epoch [2/2], Iter [698/3125], train_loss:0.107705
Epoch [2/2], Iter [699/3125], train_loss:0.086618
Epoch [2/2], Iter [700/3125], train_loss:0.133944
Epoch [2/2], Iter [701/3125], train_loss:0.080485
Epoch [2/2], Iter [702/3125], train_loss:0.094014
Epoch [2/2], Iter [703/3125], train_loss:0.101598
Epoch [2/2], Iter [704/3125], train_loss:0.102957
Epoch [2/2], Iter [705/3125], train_loss:0.075928
Epoch [2/2], Iter [706/3125], train_loss:0.120276
Epoch [2/2], Iter [707/3125], train_loss:0.063608
Epoch [2/2], Iter [708/3125], train_loss:0.111435
Epoch [2/2], Iter [709/3125], train_loss:0.087704
Epoch [2/2], Iter [710/3125], train_loss:0.104987
Epoch [2/2], Iter [711/3125], train_loss:0.113673
Epoch [2/2], Iter [712/3125], train_loss:0.110319
Epoch [2/2], Iter [713/3125], train_loss:0.109937
Epoch [2/2], Iter [714/3125], train_loss:0.113730
Epoch [2/2], Iter [715/3125], train_loss:0.054402
Epoch [2/2], Iter [716/3125], train_loss:0.159296
Epoch [2/2], Iter [717/3125], train_loss:0.099721
Epoch [2/2], Iter [718/3125], train_loss:0.079371
Epoch [2/2], Iter [719/3125], train_loss:0.073157
Epoch [2/2], Iter [720/3125], train_loss:0.089477
Epoch [2/2], Iter [721/3125], train_loss:0.096350
Epoch [2/2], Iter [722/3125], train_loss:0.076988
Epoch [2/2], Iter [723/3125], train_loss:0.091401
Epoch [2/2], Iter [724/3125], train_loss:0.094071
Epoch [2/2], Iter [725/3125], train_loss:0.099668
Epoch [2/2], Iter [726/3125], train_loss:0.077234
Epoch [2/2], Iter [727/3125], train_loss:0.069081
Epoch [2/2], Iter [728/3125], train_loss:0.070330
Epoch [2/2], Iter [729/3125], train_loss:0.104584
Epoch [2/2], Iter [730/3125], train_loss:0.079599
Epoch [2/2], Iter [731/3125], train_loss:0.091007
Epoch [2/2], Iter [732/3125], train_loss:0.129703
Epoch [2/2], Iter [733/3125], train_loss:0.053601
Epoch [2/2], Iter [734/3125], train_loss:0.100923
Epoch [2/2], Iter [735/3125], train_loss:0.118555
Epoch [2/2], Iter [736/3125], train_loss:0.088056
Epoch [2/2], Iter [737/3125], train_loss:0.129550
Epoch [2/2], Iter [738/3125], train_loss:0.089502
Epoch [2/2], Iter [739/3125], train_loss:0.068963
Epoch [2/2], Iter [740/3125], train_loss:0.095034
Epoch [2/2], Iter [741/3125], train_loss:0.123924
Epoch [2/2], Iter [742/3125], train_loss:0.062268
Epoch [2/2], Iter [743/3125], train_loss:0.105786
Epoch [2/2], Iter [744/3125], train_loss:0.093041
Epoch [2/2], Iter [745/3125], train_loss:0.128233
Epoch [2/2], Iter [746/3125], train_loss:0.085857
Epoch [2/2], Iter [747/3125], train_loss:0.093941
Epoch [2/2], Iter [748/3125], train_loss:0.093465
Epoch [2/2], Iter [749/3125], train_loss:0.073185
Epoch [2/2], Iter [750/3125], train_loss:0.079939
Epoch [2/2], Iter [751/3125], train_loss:0.086033
Epoch [2/2], Iter [752/3125], train_loss:0.110138
Epoch [2/2], Iter [753/3125], train_loss:0.087187
Epoch [2/2], Iter [754/3125], train_loss:0.119257
Epoch [2/2], Iter [755/3125], train_loss:0.101958
Epoch [2/2], Iter [756/3125], train_loss:0.067140
Epoch [2/2], Iter [757/3125], train_loss:0.079778
Epoch [2/2], Iter [758/3125], train_loss:0.098867
Epoch [2/2], Iter [759/3125], train_loss:0.066322
Epoch [2/2], Iter [760/3125], train_loss:0.089248
Epoch [2/2], Iter [761/3125], train_loss:0.097678
Epoch [2/2], Iter [762/3125], train_loss:0.120523
Epoch [2/2], Iter [763/3125], train_loss:0.104695
Epoch [2/2], Iter [764/3125], train_loss:0.107009
Epoch [2/2], Iter [765/3125], train_loss:0.103234
Epoch [2/2], Iter [766/3125], train_loss:0.100424
Epoch [2/2], Iter [767/3125], train_loss:0.084092
Epoch [2/2], Iter [768/3125], train_loss:0.083450
Epoch [2/2], Iter [769/3125], train_loss:0.086751
Epoch [2/2], Iter [770/3125], train_loss:0.073314
Epoch [2/2], Iter [771/3125], train_loss:0.087713
Epoch [2/2], Iter [772/3125], train_loss:0.091291
Epoch [2/2], Iter [773/3125], train_loss:0.096915
Epoch [2/2], Iter [774/3125], train_loss:0.100215
Epoch [2/2], Iter [775/3125], train_loss:0.104935
Epoch [2/2], Iter [776/3125], train_loss:0.118939
Epoch [2/2], Iter [777/3125], train_loss:0.116502
Epoch [2/2], Iter [778/3125], train_loss:0.100367
Epoch [2/2], Iter [779/3125], train_loss:0.101167
Epoch [2/2], Iter [780/3125], train_loss:0.102839
Epoch [2/2], Iter [781/3125], train_loss:0.066892
Epoch [2/2], Iter [782/3125], train_loss:0.087467
Epoch [2/2], Iter [783/3125], train_loss:0.101108
Epoch [2/2], Iter [784/3125], train_loss:0.096700
Epoch [2/2], Iter [785/3125], train_loss:0.087809
Epoch [2/2], Iter [786/3125], train_loss:0.095772
Epoch [2/2], Iter [787/3125], train_loss:0.064771
Epoch [2/2], Iter [788/3125], train_loss:0.103117
Epoch [2/2], Iter [789/3125], train_loss:0.074872
Epoch [2/2], Iter [790/3125], train_loss:0.136279
Epoch [2/2], Iter [791/3125], train_loss:0.069266
Epoch [2/2], Iter [792/3125], train_loss:0.076346
Epoch [2/2], Iter [793/3125], train_loss:0.077704
Epoch [2/2], Iter [794/3125], train_loss:0.099309
Epoch [2/2], Iter [795/3125], train_loss:0.093810
Epoch [2/2], Iter [796/3125], train_loss:0.092663
Epoch [2/2], Iter [797/3125], train_loss:0.070305
Epoch [2/2], Iter [798/3125], train_loss:0.106723
Epoch [2/2], Iter [799/3125], train_loss:0.081737
Epoch [2/2], Iter [800/3125], train_loss:0.110722
Epoch [2/2], Iter [801/3125], train_loss:0.098199
Epoch [2/2], Iter [802/3125], train_loss:0.105921
Epoch [2/2], Iter [803/3125], train_loss:0.074993
Epoch [2/2], Iter [804/3125], train_loss:0.082455
Epoch [2/2], Iter [805/3125], train_loss:0.084609
Epoch [2/2], Iter [806/3125], train_loss:0.081505
Epoch [2/2], Iter [807/3125], train_loss:0.114460
Epoch [2/2], Iter [808/3125], train_loss:0.089111
Epoch [2/2], Iter [809/3125], train_loss:0.078759
Epoch [2/2], Iter [810/3125], train_loss:0.093516
Epoch [2/2], Iter [811/3125], train_loss:0.093906
Epoch [2/2], Iter [812/3125], train_loss:0.095975
Epoch [2/2], Iter [813/3125], train_loss:0.103670
Epoch [2/2], Iter [814/3125], train_loss:0.096167
Epoch [2/2], Iter [815/3125], train_loss:0.085567
Epoch [2/2], Iter [816/3125], train_loss:0.095904
Epoch [2/2], Iter [817/3125], train_loss:0.095014
Epoch [2/2], Iter [818/3125], train_loss:0.095261
Epoch [2/2], Iter [819/3125], train_loss:0.105797
Epoch [2/2], Iter [820/3125], train_loss:0.076541
Epoch [2/2], Iter [821/3125], train_loss:0.076522
Epoch [2/2], Iter [822/3125], train_loss:0.104505
Epoch [2/2], Iter [823/3125], train_loss:0.106988
Epoch [2/2], Iter [824/3125], train_loss:0.103925
Epoch [2/2], Iter [825/3125], train_loss:0.109792
Epoch [2/2], Iter [826/3125], train_loss:0.091824
Epoch [2/2], Iter [827/3125], train_loss:0.101664
Epoch [2/2], Iter [828/3125], train_loss:0.135664
Epoch [2/2], Iter [829/3125], train_loss:0.062098
Epoch [2/2], Iter [830/3125], train_loss:0.096688
Epoch [2/2], Iter [831/3125], train_loss:0.083266
Epoch [2/2], Iter [832/3125], train_loss:0.074664
Epoch [2/2], Iter [833/3125], train_loss:0.136668
Epoch [2/2], Iter [834/3125], train_loss:0.117845
Epoch [2/2], Iter [835/3125], train_loss:0.109683
Epoch [2/2], Iter [836/3125], train_loss:0.080236
Epoch [2/2], Iter [837/3125], train_loss:0.063216
Epoch [2/2], Iter [838/3125], train_loss:0.128305
Epoch [2/2], Iter [839/3125], train_loss:0.062488
Epoch [2/2], Iter [840/3125], train_loss:0.144444
Epoch [2/2], Iter [841/3125], train_loss:0.119419
Epoch [2/2], Iter [842/3125], train_loss:0.077271
Epoch [2/2], Iter [843/3125], train_loss:0.108360
Epoch [2/2], Iter [844/3125], train_loss:0.093583
Epoch [2/2], Iter [845/3125], train_loss:0.103373
Epoch [2/2], Iter [846/3125], train_loss:0.105248
Epoch [2/2], Iter [847/3125], train_loss:0.071489
Epoch [2/2], Iter [848/3125], train_loss:0.091004
Epoch [2/2], Iter [849/3125], train_loss:0.104574
Epoch [2/2], Iter [850/3125], train_loss:0.066352
Epoch [2/2], Iter [851/3125], train_loss:0.075491
Epoch [2/2], Iter [852/3125], train_loss:0.090248
Epoch [2/2], Iter [853/3125], train_loss:0.141754
Epoch [2/2], Iter [854/3125], train_loss:0.111203
Epoch [2/2], Iter [855/3125], train_loss:0.101882
Epoch [2/2], Iter [856/3125], train_loss:0.080121
Epoch [2/2], Iter [857/3125], train_loss:0.109338
Epoch [2/2], Iter [858/3125], train_loss:0.074698
Epoch [2/2], Iter [859/3125], train_loss:0.078181
Epoch [2/2], Iter [860/3125], train_loss:0.080490
Epoch [2/2], Iter [861/3125], train_loss:0.070324
Epoch [2/2], Iter [862/3125], train_loss:0.091529
Epoch [2/2], Iter [863/3125], train_loss:0.099398
Epoch [2/2], Iter [864/3125], train_loss:0.116627
Epoch [2/2], Iter [865/3125], train_loss:0.114276
Epoch [2/2], Iter [866/3125], train_loss:0.093187
Epoch [2/2], Iter [867/3125], train_loss:0.069209
Epoch [2/2], Iter [868/3125], train_loss:0.104793
Epoch [2/2], Iter [869/3125], train_loss:0.073808
Epoch [2/2], Iter [870/3125], train_loss:0.092707
Epoch [2/2], Iter [871/3125], train_loss:0.085808
Epoch [2/2], Iter [872/3125], train_loss:0.099127
Epoch [2/2], Iter [873/3125], train_loss:0.094822
Epoch [2/2], Iter [874/3125], train_loss:0.081842
Epoch [2/2], Iter [875/3125], train_loss:0.083346
Epoch [2/2], Iter [876/3125], train_loss:0.109375
Epoch [2/2], Iter [877/3125], train_loss:0.069028
Epoch [2/2], Iter [878/3125], train_loss:0.088529
Epoch [2/2], Iter [879/3125], train_loss:0.082911
Epoch [2/2], Iter [880/3125], train_loss:0.064488
Epoch [2/2], Iter [881/3125], train_loss:0.109088
Epoch [2/2], Iter [882/3125], train_loss:0.086650
Epoch [2/2], Iter [883/3125], train_loss:0.069423
Epoch [2/2], Iter [884/3125], train_loss:0.082668
Epoch [2/2], Iter [885/3125], train_loss:0.101943
Epoch [2/2], Iter [886/3125], train_loss:0.062625
Epoch [2/2], Iter [887/3125], train_loss:0.067995
Epoch [2/2], Iter [888/3125], train_loss:0.085687
Epoch [2/2], Iter [889/3125], train_loss:0.065357
Epoch [2/2], Iter [890/3125], train_loss:0.071787
Epoch [2/2], Iter [891/3125], train_loss:0.081613
Epoch [2/2], Iter [892/3125], train_loss:0.072062
Epoch [2/2], Iter [893/3125], train_loss:0.104661
Epoch [2/2], Iter [894/3125], train_loss:0.087902
Epoch [2/2], Iter [895/3125], train_loss:0.130290
Epoch [2/2], Iter [896/3125], train_loss:0.075751
Epoch [2/2], Iter [897/3125], train_loss:0.083584
Epoch [2/2], Iter [898/3125], train_loss:0.088319
Epoch [2/2], Iter [899/3125], train_loss:0.107320
Epoch [2/2], Iter [900/3125], train_loss:0.069297
Epoch [2/2], Iter [901/3125], train_loss:0.059855
Epoch [2/2], Iter [902/3125], train_loss:0.090469
Epoch [2/2], Iter [903/3125], train_loss:0.083430
Epoch [2/2], Iter [904/3125], train_loss:0.060752
Epoch [2/2], Iter [905/3125], train_loss:0.088156
Epoch [2/2], Iter [906/3125], train_loss:0.089071
Epoch [2/2], Iter [907/3125], train_loss:0.084885
Epoch [2/2], Iter [908/3125], train_loss:0.048224
Epoch [2/2], Iter [909/3125], train_loss:0.113041
Epoch [2/2], Iter [910/3125], train_loss:0.116053
Epoch [2/2], Iter [911/3125], train_loss:0.074417
Epoch [2/2], Iter [912/3125], train_loss:0.091008
Epoch [2/2], Iter [913/3125], train_loss:0.092575
Epoch [2/2], Iter [914/3125], train_loss:0.113760
Epoch [2/2], Iter [915/3125], train_loss:0.120776
Epoch [2/2], Iter [916/3125], train_loss:0.139293
Epoch [2/2], Iter [917/3125], train_loss:0.069343
Epoch [2/2], Iter [918/3125], train_loss:0.098188
Epoch [2/2], Iter [919/3125], train_loss:0.061732
Epoch [2/2], Iter [920/3125], train_loss:0.138873
Epoch [2/2], Iter [921/3125], train_loss:0.108592
Epoch [2/2], Iter [922/3125], train_loss:0.108380
Epoch [2/2], Iter [923/3125], train_loss:0.089235
Epoch [2/2], Iter [924/3125], train_loss:0.098835
Epoch [2/2], Iter [925/3125], train_loss:0.084797
Epoch [2/2], Iter [926/3125], train_loss:0.086078
Epoch [2/2], Iter [927/3125], train_loss:0.096045
Epoch [2/2], Iter [928/3125], train_loss:0.103381
Epoch [2/2], Iter [929/3125], train_loss:0.064686
Epoch [2/2], Iter [930/3125], train_loss:0.101205
Epoch [2/2], Iter [931/3125], train_loss:0.083386
Epoch [2/2], Iter [932/3125], train_loss:0.124332
Epoch [2/2], Iter [933/3125], train_loss:0.071771
Epoch [2/2], Iter [934/3125], train_loss:0.068327
Epoch [2/2], Iter [935/3125], train_loss:0.069932
Epoch [2/2], Iter [936/3125], train_loss:0.088089
Epoch [2/2], Iter [937/3125], train_loss:0.088597
Epoch [2/2], Iter [938/3125], train_loss:0.104114
Epoch [2/2], Iter [939/3125], train_loss:0.083072
Epoch [2/2], Iter [940/3125], train_loss:0.101029
Epoch [2/2], Iter [941/3125], train_loss:0.108483
Epoch [2/2], Iter [942/3125], train_loss:0.100051
Epoch [2/2], Iter [943/3125], train_loss:0.106296
Epoch [2/2], Iter [944/3125], train_loss:0.072279
Epoch [2/2], Iter [945/3125], train_loss:0.143448
Epoch [2/2], Iter [946/3125], train_loss:0.084587
Epoch [2/2], Iter [947/3125], train_loss:0.073256
Epoch [2/2], Iter [948/3125], train_loss:0.083115
Epoch [2/2], Iter [949/3125], train_loss:0.076965
Epoch [2/2], Iter [950/3125], train_loss:0.083379
Epoch [2/2], Iter [951/3125], train_loss:0.078656
Epoch [2/2], Iter [952/3125], train_loss:0.080206
Epoch [2/2], Iter [953/3125], train_loss:0.088033
Epoch [2/2], Iter [954/3125], train_loss:0.094281
Epoch [2/2], Iter [955/3125], train_loss:0.109771
Epoch [2/2], Iter [956/3125], train_loss:0.098340
Epoch [2/2], Iter [957/3125], train_loss:0.103174
Epoch [2/2], Iter [958/3125], train_loss:0.070675
Epoch [2/2], Iter [959/3125], train_loss:0.092117
Epoch [2/2], Iter [960/3125], train_loss:0.093642
Epoch [2/2], Iter [961/3125], train_loss:0.128867
Epoch [2/2], Iter [962/3125], train_loss:0.072056
Epoch [2/2], Iter [963/3125], train_loss:0.094215
Epoch [2/2], Iter [964/3125], train_loss:0.091706
Epoch [2/2], Iter [965/3125], train_loss:0.076420
Epoch [2/2], Iter [966/3125], train_loss:0.110798
Epoch [2/2], Iter [967/3125], train_loss:0.066716
Epoch [2/2], Iter [968/3125], train_loss:0.104807
Epoch [2/2], Iter [969/3125], train_loss:0.086580
Epoch [2/2], Iter [970/3125], train_loss:0.105679
Epoch [2/2], Iter [971/3125], train_loss:0.084984
Epoch [2/2], Iter [972/3125], train_loss:0.093323
Epoch [2/2], Iter [973/3125], train_loss:0.088777
Epoch [2/2], Iter [974/3125], train_loss:0.090154
Epoch [2/2], Iter [975/3125], train_loss:0.096426
Epoch [2/2], Iter [976/3125], train_loss:0.107699
Epoch [2/2], Iter [977/3125], train_loss:0.110699
Epoch [2/2], Iter [978/3125], train_loss:0.072643
Epoch [2/2], Iter [979/3125], train_loss:0.078052
Epoch [2/2], Iter [980/3125], train_loss:0.090422
Epoch [2/2], Iter [981/3125], train_loss:0.071456
Epoch [2/2], Iter [982/3125], train_loss:0.095594
Epoch [2/2], Iter [983/3125], train_loss:0.092027
Epoch [2/2], Iter [984/3125], train_loss:0.116863
Epoch [2/2], Iter [985/3125], train_loss:0.114535
Epoch [2/2], Iter [986/3125], train_loss:0.079183
Epoch [2/2], Iter [987/3125], train_loss:0.090277
Epoch [2/2], Iter [988/3125], train_loss:0.124222
Epoch [2/2], Iter [989/3125], train_loss:0.115095
Epoch [2/2], Iter [990/3125], train_loss:0.114542
Epoch [2/2], Iter [991/3125], train_loss:0.106006
Epoch [2/2], Iter [992/3125], train_loss:0.095041
Epoch [2/2], Iter [993/3125], train_loss:0.076730
Epoch [2/2], Iter [994/3125], train_loss:0.109610
Epoch [2/2], Iter [995/3125], train_loss:0.107274
Epoch [2/2], Iter [996/3125], train_loss:0.066058
Epoch [2/2], Iter [997/3125], train_loss:0.065898
Epoch [2/2], Iter [998/3125], train_loss:0.117909
Epoch [2/2], Iter [999/3125], train_loss:0.069444
Epoch [2/2], Iter [1000/3125], train_loss:0.107684
Epoch [2/2], Iter [1001/3125], train_loss:0.094535
Epoch [2/2], Iter [1002/3125], train_loss:0.098872
Epoch [2/2], Iter [1003/3125], train_loss:0.097507
Epoch [2/2], Iter [1004/3125], train_loss:0.091864
Epoch [2/2], Iter [1005/3125], train_loss:0.078213
Epoch [2/2], Iter [1006/3125], train_loss:0.099576
Epoch [2/2], Iter [1007/3125], train_loss:0.100277
Epoch [2/2], Iter [1008/3125], train_loss:0.124750
Epoch [2/2], Iter [1009/3125], train_loss:0.104891
Epoch [2/2], Iter [1010/3125], train_loss:0.079731
Epoch [2/2], Iter [1011/3125], train_loss:0.085950
Epoch [2/2], Iter [1012/3125], train_loss:0.084804
Epoch [2/2], Iter [1013/3125], train_loss:0.075454
Epoch [2/2], Iter [1014/3125], train_loss:0.130603
Epoch [2/2], Iter [1015/3125], train_loss:0.096016
Epoch [2/2], Iter [1016/3125], train_loss:0.090073
Epoch [2/2], Iter [1017/3125], train_loss:0.074195
Epoch [2/2], Iter [1018/3125], train_loss:0.122536
Epoch [2/2], Iter [1019/3125], train_loss:0.112131
Epoch [2/2], Iter [1020/3125], train_loss:0.109132
Epoch [2/2], Iter [1021/3125], train_loss:0.115335
Epoch [2/2], Iter [1022/3125], train_loss:0.140687
Epoch [2/2], Iter [1023/3125], train_loss:0.083916
Epoch [2/2], Iter [1024/3125], train_loss:0.095654
Epoch [2/2], Iter [1025/3125], train_loss:0.084160
Epoch [2/2], Iter [1026/3125], train_loss:0.114870
Epoch [2/2], Iter [1027/3125], train_loss:0.101187
Epoch [2/2], Iter [1028/3125], train_loss:0.082069
Epoch [2/2], Iter [1029/3125], train_loss:0.072046
Epoch [2/2], Iter [1030/3125], train_loss:0.086769
Epoch [2/2], Iter [1031/3125], train_loss:0.089113
Epoch [2/2], Iter [1032/3125], train_loss:0.061093
Epoch [2/2], Iter [1033/3125], train_loss:0.090316
Epoch [2/2], Iter [1034/3125], train_loss:0.085117
Epoch [2/2], Iter [1035/3125], train_loss:0.104584
Epoch [2/2], Iter [1036/3125], train_loss:0.081303
Epoch [2/2], Iter [1037/3125], train_loss:0.091452
Epoch [2/2], Iter [1038/3125], train_loss:0.112761
Epoch [2/2], Iter [1039/3125], train_loss:0.088501
Epoch [2/2], Iter [1040/3125], train_loss:0.084058
Epoch [2/2], Iter [1041/3125], train_loss:0.078801
Epoch [2/2], Iter [1042/3125], train_loss:0.087638
Epoch [2/2], Iter [1043/3125], train_loss:0.106893
Epoch [2/2], Iter [1044/3125], train_loss:0.087472
Epoch [2/2], Iter [1045/3125], train_loss:0.130255
Epoch [2/2], Iter [1046/3125], train_loss:0.097685
Epoch [2/2], Iter [1047/3125], train_loss:0.095756
Epoch [2/2], Iter [1048/3125], train_loss:0.115433
Epoch [2/2], Iter [1049/3125], train_loss:0.079820
Epoch [2/2], Iter [1050/3125], train_loss:0.116015
Epoch [2/2], Iter [1051/3125], train_loss:0.146984
Epoch [2/2], Iter [1052/3125], train_loss:0.129607
Epoch [2/2], Iter [1053/3125], train_loss:0.098001
Epoch [2/2], Iter [1054/3125], train_loss:0.076012
Epoch [2/2], Iter [1055/3125], train_loss:0.098679
Epoch [2/2], Iter [1056/3125], train_loss:0.079336
Epoch [2/2], Iter [1057/3125], train_loss:0.127889
Epoch [2/2], Iter [1058/3125], train_loss:0.093738
Epoch [2/2], Iter [1059/3125], train_loss:0.096781
Epoch [2/2], Iter [1060/3125], train_loss:0.079172
Epoch [2/2], Iter [1061/3125], train_loss:0.074400
Epoch [2/2], Iter [1062/3125], train_loss:0.094194
Epoch [2/2], Iter [1063/3125], train_loss:0.085245
Epoch [2/2], Iter [1064/3125], train_loss:0.094455
Epoch [2/2], Iter [1065/3125], train_loss:0.081712
Epoch [2/2], Iter [1066/3125], train_loss:0.096517
Epoch [2/2], Iter [1067/3125], train_loss:0.140057
Epoch [2/2], Iter [1068/3125], train_loss:0.087830
Epoch [2/2], Iter [1069/3125], train_loss:0.083283
Epoch [2/2], Iter [1070/3125], train_loss:0.081132
Epoch [2/2], Iter [1071/3125], train_loss:0.083004
Epoch [2/2], Iter [1072/3125]