补充:
两个前排的github
https://github.com/zhangboshen/Alibaba-Cloud-German-AI-Challenge-2018-Rank-17th-Solution
https://github.com/lhwcv/cloud_german_rank10
比赛历程:
https://tianchi.aliyun.com/forum/postDetail?spm=5176.12282027.0.0.774d311fDpL98N&postId=46819
好的学习资料:
【天池直播】广东算法大赛冠军团队思路分享(ppt已更新)
天池直播】图像语义分割在遥感影像中的应用(ppt已沉淀)
牛人博客:
https://qrfaction.github.io/categories/%E6%95%B0%E6%8D%AE%E7%AB%9E%E8%B5%9B/
自己用的
进度条utils程序
'''Some helper functions for PyTorch, including:
- get_mean_and_std: calculate the mean and std value of dataset.
- msr_init: net parameter initialization.
- progress_bar: progress bar mimic xlua.progress.
'''
import os
import sys
import time
import math
import torch.nn as nn
import torch.nn.init as init
def get_mean_and_std(dataset):
'''Compute the mean and std value of dataset.'''
dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=2)
mean = torch.zeros(3)
std = torch.zeros(3)
print('==> Computing mean and std..')
for inputs, targets in dataloader:
for i in range(3):
mean[i] += inputs[:,i,:,:].mean()
std[i] += inputs[:,i,:,:].std()
mean.div_(len(dataset))
std.div_(len(dataset))
return mean, std
def init_params(net):
'''Init layer parameters.'''
for m in net.modules():
if isinstance(m, nn.Conv2d):
init.kaiming_normal(m.weight, mode='fan_out')
if m.bias:
init.constant(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant(m.weight, 1)
init.constant(m.bias, 0)
elif isinstance(m, nn.Linear):
init.normal(m.weight, std=1e-3)
if m.bias:
init.constant(m.bias, 0)
_, term_width = os.popen('stty size', 'r').read().split()
term_width = int(term_width)
TOTAL_BAR_LENGTH = 65.
last_time = time.time()
begin_time = last_time
def progress_bar(current, total, msg=None):
global last_time, begin_time
if current == 0:
begin_time = time.time() # Reset for new bar.
cur_len = int(TOTAL_BAR_LENGTH*current/total)
rest_len = int(TOTAL_BAR_LENGTH - cur_len) - 1
sys.stdout.write(' [')
for i in range(cur_len):
sys.stdout.write('=')
sys.stdout.write('>')
for i in range(rest_len):
sys.stdout.write('.')
sys.stdout.write(']')
cur_time = time.time()
step_time = cur_time - last_time
last_time = cur_time
tot_time = cur_time - begin_time
L = []
L.append(' Step: %s' % format_time(step_time))
L.append(' | Tot: %s' % format_time(tot_time))
if msg:
L.append(' | ' + msg)
msg = ''.join(L)
sys.stdout.write(msg)
for i in range(term_width-int(TOTAL_BAR_LENGTH)-len(msg)-3):
sys.stdout.write(' ')
# Go back to the center of the bar.
for i in range(term_width-int(TOTAL_BAR_LENGTH/2)+2):
sys.stdout.write('\b')
sys.stdout.write(' %d/%d ' % (current+1, total))
if current < total-1:
sys.stdout.write('\r')
else:
sys.stdout.write('\n')
sys.stdout.flush()
def format_time(seconds):
days = int(seconds / 3600/24)
seconds = seconds - days*3600*24
hours = int(seconds / 3600)
seconds = seconds - hours*3600
minutes = int(seconds / 60)
seconds = seconds - minutes*60
secondsf = int(seconds)
seconds = seconds - secondsf
millis = int(seconds*1000)
f = ''
i = 1
if days > 0:
f += str(days) + 'D'
i += 1
if hours > 0 and i <= 2:
f += str(hours) + 'h'
i += 1
if minutes > 0 and i <= 2:
f += str(minutes) + 'm'
i += 1
if secondsf > 0 and i <= 2:
f += str(secondsf) + 's'
i += 1
if millis > 0 and i <= 2:
f += str(millis) + 'ms'
i += 1
if f == '':
f = '0ms'
return f
天池比赛分类主程序
Alibaba Cloud German AI Challenge 2018
AI For Earth Observation
# -*- coding: utf-8 -*-
from __future__ import division
"""
Created on Tue Dec 4 17:33:44 2018
原先愚蠢的认为都读到内存再打乱,可惜了h5文件太大了
@author: ygx
"""
import h5py
import numpy as np
#import matplotlib.pyplot as plt
import os
import time
import argparse
import torch
from torch import optim
from torch.autograd import Variable
#import torchvision
#from torchvision import transforms
#from torch.utils.data import Dataset
import torch.nn as nn
import math
from tqdm import tqdm
from utils1 import progress_bar
from models.SENet.se_resnet import se_resnet20_v2,se_resnet20
from utils.LabelSmooth import LabelSmoothing
from models.preact_resnet import *
from models.preact_resnet_sn import *
from models.senet import SENet18,SENet34,SENet101,SENet152
from models.senet_sn1 import SENet34_SN,SENet50_SN
from models.dpn import DPN92
#多gpu训练
#由于torch中搞得transform.totensor不支持3维以上的变成到0-1之间,所以需要重新定义函数
#
def AddFeatures(array):
"""
输入一个18个波段的concat之后的图像NHWC
NDVI、SI阴影指数、ndbi建筑物指数
"""
ndvi=(array[:,:,:,14]-array[:,:,:,11])/(array[:,:,:,14]+array[:,:,:,11])
si=(array[:,:,:,14]+array[:,:,:,9]+array[:,:,:,11]+array[:,:,:,10])/4
ndbi=(array[:,:,:,17]-array[:,:,:,14])/(array[:,:,:,17]+array[:,:,:,14])
output=np.concatenate((array,ndvi[:,:,:,np.newaxis],si[:,:,:,np.newaxis],ndbi[:,:,:,np.newaxis]),axis=3)
return output
def RandomHorizontalFlip(array):
a1 = np.random.choice(a=[0,1], size=1)
if a1==1:
return array[:,:,:,::-1]
else:
return array
def RandomVerticalFlip(array):
a1 = np.random.choice(a=[0,1], size=1)
if a1==1:
array=array.transpose(0,3,1,2)#因为地址连续的原因,必须加copy
array=array[:,:,::-1,::-1]
#return np.ascontiguousarray(array.transpose(0,2,3,1),dtype=np.float32)
return array.transpose(0,2,3,1)
else:
return array
def RandomFlip(array):
return RandomVerticalFlip(RandomHorizontalFlip(array))
def RandomRotate180(array):
a1 = np.random.choice(a=[0,1], size=1)
if a1==1:
array=array.transpose(0,3,1,2)#因为地址连续的原因,必须加copy
array=array[:,::-1,::-1,::-1]
return array.transpose(0,2,3,1)
else:
return array
def RandomRotate90(array):
a1 = np.random.choice(a=[0,1], size=1)
if a1==1:
return array.transpose(0,2,1,3)
else:
return array
def RandomRotate(array):
return RandomRotate180(RandomRotate90(array))
def RandomPre(array):
return RandomRotate(RandomFlip(array))
def Padding(array,filters=0,nums_side=4):
"""默认以0为填充,四个边都添加4个0 NHWC"""
N,H,W,C=array.shape
output=np.zeros((N,H+2*nums_side,W+2*nums_side,C))
output[:,nums_side:H+nums_side,nums_side:W+nums_side,:]=array
return output
def RandomPaddingCrop(array,filters=0,nums_side=4,size=(32,32)):
"""一半的概率padding之后裁剪成原图大小 NHWC"""
a1 = np.random.choice(a=[0,1], size=1)
if a1==1:
PaddingArray=Padding(array,filters=0,nums_side=4)
start_w,start_h=np.random.choice(8,2)#从[0:8]随机去出两个数字
return PaddingArray[:,start_w:start_w+size[0],start_h:start_h+size[1],:]
else:
return array
def mean_std(array):
mean=[-5.8652742e-05,2.1940528e-05,1.0698503e-05,-3.6694932e-05,
2.8304448e-02,1.8809982e-01,7.6072977e-04,1.0500627e-03,
4.5042928e-02,4.6203922e-02,5.0576344e-02,5.2854732e-02,
7.6116115e-02,8.3651222e-02,8.3404467e-02,8.4830634e-02,
8.3397724e-02,5.9738528e-02,1.2501612e-01,5.3771760e-02,-1.6543813e-01]
std=[0.15613347,0.15583488,0.423213,0.41939119, 2.4466505 , 8.333362,
2.254153,1.3736475, 0.0687826, 0.06539123, 0.07428898, 0.07567783,
0.09257834, 0.10948441, 0.10750295, 0.12227393, 0.10479108, 0.08892089,
0.20687017, 0.07645338, 0.2877441 ]
return (array-mean)/std
class Generator():
def __init__(self,
filepath='/home/zj/senetial/data',
batch_size=256,
datatype='train',
split=0.1):
train_file = h5py.File(os.path.join(filepath, 'training.h5'), 'r')
val_file = h5py.File(os.path.join(filepath, 'validation.h5'), 'r')
self.train_X1 = train_file['sen1']
self.train_X2 = train_file['sen2']
self.train_Y = train_file['label']
self.val_X1 = val_file['sen1']
self.val_X2 = val_file['sen2']
self.val_Y = val_file['label']
# 统计每一个数据集的数量
self.num_train = self.train_Y.shape[0]
self.num_val = self.val_Y.shape[0]
# 按照batch_size进行(分组)采样
# 得到每一个分组的索引 [0, 8, 16, 24, ...]
num_groups = int((self.num_train + self.num_val) / batch_size)
self.indices = np.arange(num_groups) * batch_size#这里其实少了最后面的一个batch,但是就不用考虑train和val中间的情况
np.random.seed(1)
np.random.shuffle(self.indices)
# 这里只选择总数的后1/10作为验证集
# 其余的作为训练集 cudnn.benchmark = True
split = int(num_groups * split)
split = -split if split else None
self.datatype=datatype
if datatype == 'train':
self.indices = self.indices[:split]
else:
self.indices = self.indices[split:]
#count是指示的总数/batchsize
self.count = self.indices.size
self.batch_size = batch_size
self.index = 0
def next_batch(self):
idx = self.indices[self.index]
if idx > self.num_train:
idx = idx - self.num_train
X1 = self.val_X1
X2 = self.val_X2
Y = self.val_Y
else:
X1 = self.train_X1
X2 = self.train_X2
Y = self.train_Y
images_1 = X1[idx:idx + self.batch_size]
images_2 = X2[idx:idx +self. batch_size]
labels = Y[idx:idx + self.batch_size]
self.index += 1
if self.index >= self.count:
self.index = 0
np.random.shuffle(self.indices)
images_1 = np.asarray(images_1, dtype=np.float32)
images_2 = np.asarray(images_2, dtype=np.float32)#(352366, 32, 32, 10)
labels = np.asarray(labels, dtype=np.float32)
images=np.concatenate((images_1,images_2),axis=3)#合并归一化操作#(352366, 32, 32, 18)
images = mean_std(AddFeatures(images))
if self.datatype== 'train':
images=RandomPaddingCrop(RandomPre(images))
return images, labels
else:
return images, labels
def __next__(self):
return self.next_batch()
def set_optimizer_lr(optimizer, lr):
# callback to set the learning rate in an optimizer, without rebuilding the whole optimizer
for param_group in optimizer.param_groups:
param_group['lr'] = lr
return optimizer
def sgdr(period, batch_idx):
# returns normalised anytime sgdr schedule given period and batch_idx
# best performing settings reported in paper are T_0 = 10, T_mult=2
# so always use T_mult=2
batch_idx = float(batch_idx)
restart_period = period
while batch_idx/restart_period > 1.:
batch_idx = batch_idx - restart_period
restart_period = restart_period * 2.
radians = math.pi*(batch_idx/restart_period)
return 0.5*(1.0 + math.cos(radians))
def train_model(model, criterion, optimizer, num_epochs=25):
since = time.time()
best_acc = 0.0
for epoch in (range(num_epochs)):
#global optimizer
#开始第几次循环
print('Epoch {}/{}'.format(epoch+42, num_epochs))
print('-' * 10)
# Each epoch has a training and validation phase!!!
for phase in ['train','val']:
#根据phase不同,将读入的data不同,然后传入
if phase == 'train':
data = trian_data
#scheduler.step()
model.train(True) # Set model to training mode
else:
data =val_data
model.train(False) # Set model to evaluate mode
#model.eval() #等效于model.train(False)仅仅当模型中有Dropout和BatchNorm是才会有影响。
running_loss = 0.0
running_corrects = 0
print_trainloss=0.0
print_traincorrects=0.0
total = 0
# Iterate over data.
#Iter=int(data.s1.shape[0]/float(data.batch_size))
###输入前16000,验证下效果
#Iter=1000
Iter=int(data.count)
start_batch_idx=Iter*epoch
trainfalse = {x: 0 for x in np.arange(17)}
valfalse = {x: 0 for x in np.arange(17)}
lr_period = args.lr_period*Iter
for i in (range(Iter)):#用1.6w张图片看下效果
# get the inputs
inputs, labels = data.next_batch()#迭代器
inputs=np.ascontiguousarray(inputs, dtype=np.float32)
# wrap them in Variable
if use_gpu:#np_>FloatTensor_>Variable
inputs = Variable((torch.from_numpy(inputs)).float().permute(0, 3, 1, 2).cuda())#输入必须是float N C H W
#inputs = train_transform(inputs)
labels = Variable((torch.from_numpy(labels)).long().cuda())#label必须是long
else:
inputs, labels = Variable(torch.from_numpy(inputs).permute(0, 3, 1, 2)), Variable(torch.from_numpy(labels).long())
total += labels.size(0)
global_step = i+start_batch_idx
batch_lr = args.lr*sgdr(lr_period, global_step)
lr_trace.append(batch_lr)
optimizer = set_optimizer_lr(optimizer, batch_lr)
# zero the parameter gradients 因为本身是累加的
optimizer.zero_grad()
# forward CE
outputs = model(inputs)
labels =labels.argmax(dim=1)#CE默认不支持one-hot编码
_, preds = torch.max(outputs.data, 1)#这里已经转成了
loss = criterion(outputs, labels)#CE默认不支持one-hot编码
# backward + optimize only if in training phase
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item()
print_trainloss += loss.item()
running_corrects += torch.sum(preds == labels.data)
print_traincorrects += torch.sum(preds == labels.data)
falselenth=len(labels.data[np.where(preds != labels.data)])
if phase=='train':
for j in range(falselenth):
trainfalse[labels.data[np.where(preds != labels.data)].cpu().numpy()[j]]+=1
if phase=='val':
for k in range(falselenth):
valfalse[labels.data[np.where(preds != labels.data)].cpu().numpy()[k]]+=1
# print_iter_train=1000 #每1000输出一次train的loss和acc
#
# if i%print_iter_train==0 and i>0 and phase=='train':
# print('\r{} Loss: {:.4f} Acc: {:.4f}'.format(phase,print_trainloss/(print_iter_train*data.batch_size), float(print_traincorrects)/(data.batch_size*print_iter_train)))
# print_traincorrects=0.0
# print_trainloss=0.0
if i>0 and phase=='train':
progress_bar(i, Iter, 'Loss: %.3f | Acc: %.3f%% (%d/%d) | LR: %.7f'
% (running_loss/(i+1), 100.*float(running_corrects)/total, running_corrects, total, batch_lr))
epoch_loss = float(running_loss) / (Iter*data.batch_size)
epoch_acc = float(running_corrects) / (Iter*data.batch_size)
print('{} Loss: {:.4f} Acc: {:.4f}'.format(
phase, epoch_loss, epoch_acc))
if phase=='train':
print('TrainFalse: {}'.format(trainfalse))
if phase=='val':
print('ValFalse: {}'.format(valfalse))
save_path=save_root+args.model
if not os.path.isdir(save_path):
os.mkdir(save_path)
torch.save(model.state_dict(),save_path+'/'+args.model+'_{0}.pth'.format(epoch+42))
time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(
time_elapsed // 60, time_elapsed % 60))
print('Best val Acc: {:4f}'.format(best_acc))
return model
###############################################
parser = argparse.ArgumentParser(description='PyTorch LCZ Training')
parser.add_argument('--lr_period', default=8, type=float, help='learning rate schedule restart period')
parser.add_argument('--lr', default=0.0001, type=float, help='learning rate')#0.001
parser.add_argument('--model', '-s', default='SENet18', help='saves state_dict on every epoch (for resuming best performing model and saving it)')
save_root='/home/zj/senetial/save_models/'
args = parser.parse_args()
device_ids = [0,1]
use_gpu = torch.cuda.is_available()
lr_trace = []
#定义网络
#model = PreActResNet18_SN()
model =SENet152()
if use_gpu and len(device_ids)>1:#多gpu训练
model = model.cuda(device_ids[0])
model = nn.DataParallel(model, device_ids=device_ids)
torch.backends.cudnn.benchmark = True
if use_gpu and len(device_ids)==1:#单gpu训练
model = model.cuda()
torch.backends.cudnn.benchmark = True
print model
model.load_state_dict(torch.load('/home/zj/senetial/save_models/SEnet152/SEnet152_41.pth'))
#定义损失函数
criterion = nn.CrossEntropyLoss()
#criterion = LabelSmoothing(size=17,smoothing=0.1)
criterion.cuda()
#定义优化器
optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9,weight_decay=5e-4)
#定义trian的数据和val的数据
print('Preparing data..')
trian_data = Generator(filepath='/home/zj/senetial/data',datatype='train',batch_size=32,split=0.1)
val_data = Generator(filepath='/home/zj/senetial/data',datatype='val',batch_size=64,split=0.1)
model_ft = train_model(model=model,
criterion=criterion,
optimizer=optimizer,
num_epochs=1000)