代码仓地址:https://github.com/open-mmlab/mmsegmentation
文档库地址:[https://mmsegmentation.readthedocs.io/en/latest/overview.html](https://mmsegmentation.readthedocs.io/en/latest/overview.html]
pip install -U openmim
mim install mmengine
mim install "mmcv>=2.0.0"
git clone -b main https://github.com/open-mmlab/mmsegmentation.git
cd mmsegmentation
pip install -v -e .
下载config文件
mim download mmsegmentation --config pspnet_r50-d8_4xb2-40k_cityscapes-512x1024 --dest .
验证推理demo
python demo/image_demo.py demo/demo.png configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth --device cuda:0 --out-file result.jpg
这部分参考官方文档:https://mmsegmentation.readthedocs.io/
_base_ = [
'../_base_/models/pspnet_r50-d8.py', '../_base_/datasets/cityscapes.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_40k.py'
] # base config file which we build new config file on.
crop_size = (512, 1024)
data_preprocessor = dict(size=crop_size)
model = dict(data_preprocessor=data_preprocessor)
base/models/pspnet_r50-d8.py
# model settings
norm_cfg = dict(type='SyncBN', requires_grad=True) # Segmentation usually uses SyncBN
data_preprocessor = dict( # The config of data preprocessor, usually includes image normalization and augmentation.
type='SegDataPreProcessor', # The type of data preprocessor.
mean=[123.675, 116.28, 103.53], # Mean values used for normalizing the input images.
std=[58.395, 57.12, 57.375], # Standard variance used for normalizing the input images.
bgr_to_rgb=True, # Whether to convert image from BGR to RGB.
pad_val=0, # Padding value of image.
seg_pad_val=255) # Padding value of segmentation map.
model = dict(
type='EncoderDecoder', # Name of segmentor
data_preprocessor=data_preprocessor,
pretrained='open-mmlab://resnet50_v1c', # The ImageNet pretrained backbone to be loaded
backbone=dict(
type='ResNetV1c', # The type of backbone. Please refer to mmseg/models/backbones/resnet.py for details.
depth=50, # Depth of backbone. Normally 50, 101 are used.
num_stages=4, # Number of stages of backbone.
out_indices=(0, 1, 2, 3), # The index of output feature maps produced in each stages.
dilations=(1, 1, 2, 4), # The dilation rate of each layer.
strides=(1, 2, 1, 1), # The stride of each layer.
norm_cfg=norm_cfg, # The configuration of norm layer.
norm_eval=False, # Whether to freeze the statistics in BN
style='pytorch', # The style of backbone, 'pytorch' means that stride 2 layers are in 3x3 conv, 'caffe' means stride 2 layers are in 1x1 convs.
contract_dilation=True), # When dilation > 1, whether contract first layer of dilation.
decode_head=dict(
type='PSPHead', # Type of decode head. Please refer to mmseg/models/decode_heads for available options.
in_channels=2048, # Input channel of decode head.
in_index=3, # The index of feature map to select.
channels=512, # The intermediate channels of decode head.
pool_scales=(1, 2, 3, 6), # The avg pooling scales of PSPHead. Please refer to paper for details.
dropout_ratio=0.1, # The dropout ratio before final classification layer.
num_classes=19, # Number of segmentation class. Usually 19 for cityscapes, 21 for VOC, 150 for ADE20k.
norm_cfg=norm_cfg, # The configuration of norm layer.
align_corners=False, # The align_corners argument for resize in decoding.
loss_decode=dict( # Config of loss function for the decode_head.
type='CrossEntropyLoss', # Type of loss used for segmentation.
use_sigmoid=False, # Whether use sigmoid activation for segmentation.
loss_weight=1.0)), # Loss weight of decode_head.
auxiliary_head=dict(
type='FCNHead', # Type of auxiliary head. Please refer to mmseg/models/decode_heads for available options.
in_channels=1024, # Input channel of auxiliary head.
in_index=2, # The index of feature map to select.
channels=256, # The intermediate channels of decode head.
num_convs=1, # Number of convs in FCNHead. It is usually 1 in auxiliary head.
concat_input=False, # Whether concat output of convs with input before classification layer.
dropout_ratio=0.1, # The dropout ratio before final classification layer.
num_classes=19, # Number of segmentation class. Usually 19 for cityscapes, 21 for VOC, 150 for ADE20k.
norm_cfg=norm_cfg, # The configuration of norm layer.
align_corners=False, # The align_corners argument for resize in decoding.
loss_decode=dict( # Config of loss function for the auxiliary_head.
type='CrossEntropyLoss', # Type of loss used for segmentation.
use_sigmoid=False, # Whether use sigmoid activation for segmentation.
loss_weight=0.4)), # Loss weight of auxiliary_head.
# model training and testing settings
train_cfg=dict(), # train_cfg is just a place holder for now.
test_cfg=dict(mode='whole')) # The test mode, options are 'whole' and 'slide'. 'whole': whole image fully-convolutional test. 'slide': sliding crop window on the image.
base_/datasets/cityscapes.py
# dataset settings
dataset_type = 'CityscapesDataset' # Dataset type, this will be used to define the dataset.
data_root = 'data/cityscapes/' # Root path of data.
crop_size = (512, 1024) # The crop size during training.
train_pipeline = [ # Training pipeline.
dict(type='LoadImageFromFile'), # First pipeline to load images from file path.
dict(type='LoadAnnotations'), # Second pipeline to load annotations for current image.
dict(type='RandomResize', # Augmentation pipeline that resize the images and their annotations.
scale=(2048, 1024), # The scale of image.
ratio_range=(0.5, 2.0), # The augmented scale range as ratio.
keep_ratio=True), # Whether to keep the aspect ratio when resizing the image.
dict(type='RandomCrop', # Augmentation pipeline that randomly crop a patch from current image.
crop_size=crop_size, # The crop size of patch.
cat_max_ratio=0.75), # The max area ratio that could be occupied by single category.
dict(type='RandomFlip', # Augmentation pipeline that flip the images and their annotations
prob=0.5), # The ratio or probability to flip
dict(type='PhotoMetricDistortion'), # Augmentation pipeline that distort current image with several photo metric methods.
dict(type='PackSegInputs') # Pack the inputs data for the semantic segmentation.
]
test_pipeline = [
dict(type='LoadImageFromFile'), # First pipeline to load images from file path
dict(type='Resize', # Use resize augmentation
scale=(2048, 1024), # Images scales for resizing.
keep_ratio=True), # Whether to keep the aspect ratio when resizing the image.
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(type='LoadAnnotations'), # Load annotations for semantic segmentation provided by dataset.
dict(type='PackSegInputs') # Pack the inputs data for the semantic segmentation.
]
train_dataloader = dict( # Train dataloader config
batch_size=2, # Batch size of a single GPU
num_workers=2, # Worker to pre-fetch data for each single GPU
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate training speed.
sampler=dict(type='InfiniteSampler', shuffle=True), # Randomly shuffle during training.
dataset=dict( # Train dataset config
type=dataset_type, # Type of dataset, refer to mmseg/datasets/ for details.
data_root=data_root, # The root of dataset.
data_prefix=dict(
img_path='leftImg8bit/train', seg_map_path='gtFine/train'), # Prefix for training data.
pipeline=train_pipeline)) # Processing pipeline. This is passed by the train_pipeline created before.
val_dataloader = dict(
batch_size=1, # Batch size of a single GPU
num_workers=4, # Worker to pre-fetch data for each single GPU
persistent_workers=True, # Shut down the worker processes after an epoch end, which can accelerate testing speed.
sampler=dict(type='DefaultSampler', shuffle=False), # Not shuffle during validation and testing.
dataset=dict( # Test dataset config
type=dataset_type, # Type of dataset, refer to mmseg/datasets/ for details.
data_root=data_root, # The root of dataset.
data_prefix=dict(
img_path='leftImg8bit/val', seg_map_path='gtFine/val'), # Prefix for testing data.
pipeline=test_pipeline)) # Processing pipeline. This is passed by the test_pipeline created before.
test_dataloader = val_dataloader
# The metric to measure the accuracy. Here, we use IoUMetric.
val_evaluator = dict(type='IoUMetric', iou_metrics=['mIoU'])
test_evaluator = val_evaluator
base/schedules/schedule_40k.py
# optimizer
optimizer = dict(type='SGD', # Type of optimizers, refer to https://github.com/open-mmlab/mmengine/blob/main/mmengine/optim/optimizer/default_constructor.py for more details
lr=0.01, # Learning rate of optimizers, see detail usages of the parameters in the documentation of PyTorch
momentum=0.9, # Momentum
weight_decay=0.0005) # Weight decay of SGD
optim_wrapper = dict(type='OptimWrapper', # Optimizer wrapper provides a common interface for updating parameters.
optimizer=optimizer, # Optimizer used to update model parameters.
clip_grad=None) # If ``clip_grad`` is not None, it will be the arguments of ``torch.nn.utils.clip_grad``.
# learning policy
param_scheduler = [
dict(
type='PolyLR', # The policy of scheduler, also support Step, CosineAnnealing, Cyclic, etc. Refer to details of supported LrUpdater from https://github.com/open-mmlab/mmengine/blob/main/mmengine/optim/scheduler/lr_scheduler.py
eta_min=1e-4, # Minimum learning rate at the end of scheduling.
power=0.9, # The power of polynomial decay.
begin=0, # Step at which to start updating the parameters.
end=40000, # Step at which to stop updating the parameters.
by_epoch=False) # Whether count by epoch or not.
]
# training schedule for 40k iteration
train_cfg = dict(type='IterBasedTrainLoop', max_iters=40000, val_interval=4000)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
# default hooks
default_hooks = dict(
timer=dict(type='IterTimerHook'), # Log the time spent during iteration.
logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False), # Collect and write logs from different components of ``Runner``.
param_scheduler=dict(type='ParamSchedulerHook'), # update some hyper-parameters in optimizer, e.g., learning rate.
checkpoint=dict(type='CheckpointHook', by_epoch=False, interval=4000), # Save checkpoints periodically.
sampler_seed=dict(type='DistSamplerSeedHook')) # Data-loading sampler for distributed training.
base/default_runtime.py
# Set the default scope of the registry to mmseg.
default_scope = 'mmseg'
# environment
env_cfg = dict(
cudnn_benchmark=True,
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
dist_cfg=dict(backend='nccl'),
)
log_level = 'INFO'
log_processor = dict(by_epoch=False)
load_from = None # Load checkpoint from file.
resume = False # Whether to resume from existed model.
使用mmengine调用config文件
from mmengine.config import Config
cfg = Config.fromfile('configs/pspnet/pspnet_r50-d8_4xb2-40k_cityscapes-512x1024.py')
print(cfg.train_dataloader)