树莓派c语言运行_使用TVM在树莓派上运行INT8量化SSD模型

我们使用mmdetection训练了一个以mobilenet-v2为backbone的SSD模型,将其从Pytorch转化为onnx,并使用TVM运行。当前TVM的文档中仅有一个基于mxnet的SSD模型的tutorial,与我们期望的不符。因此我们自己实现了从onnx到TVM的程序代码,并使用了TVM中的INT-8量化,将其部署到树莓派4B上。

完整代码

import time
​
import cv2
import numpy as np
import onnx
import tvm
import tvm.contrib.graph_runtime as runtime
import tvm.relay as relay
​
# preprocess
image_path = '9331584514251_.pic_hd.jpg'
image = cv2.imread(image_path)
img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
​
resize_shape = (300, 300)
img = cv2.resize(img, resize_shape, interpolation=cv2.INTER_LINEAR)
mean = np.array([123.675, 116.28, 103.53]).reshape(1, -1)
std = np.array([1., 1., 1.]).reshape(1, -1)
img = img.astype(np.float32)
img = cv2.subtract(img, mean)
img = cv2.multiply(img, std)
img = img.transpose(2, 0, 1)
​
# load onnx model and build tvm runtime
target = 'llvm'
ctx = tvm.context(target)
dtype = 'float32'
mssd = onnx.load('mssd.onnx')
input_blob = mssd.graph.input[0]
input_shape = tuple(map(lambda x: getattr(x, 'dim_value'), input_blob.type.tensor_type.shape.dim))
shape_dict = {
      input_blob.name: input_shape}
mod, params = relay.frontend.from_onnx(mssd, shape_dict)
​
with relay.build_config(opt_level=3):
    graph, lib, params = relay.build(mod, target, params=params)
​
​
module = runtime.create(graph, lib, ctx)
​
# run
module.set_input(**params)
module.set_input(input_blob.name, tvm.nd.array(img))
​
module.run()
​
# postprocess
# generate anchor
from anchor import gen_anchors
​
mlvl_anchors = gen_anchors()
​
img_shape = image.shape
scale_factor = [img_shape[1] / resize_shape[1], img_shape[0] / resize_shape[0]] # x_scale, y_scale
​
from bbox_utils import get_bboxes_single
from easydict import EasyDict
cfg = dict(
    nms=dict(iou_thr=0.45),
    min_bbox_size=0,
    score_thr=0.6,
    max_per_img=200
)
​
cfg = EasyDict(cfg)
​
# get output
cls_score_list = [module.get_output(i).asnumpy()[0] for i in range(6)]
bbox_pred_list = [module.get_output(i + 6).asnumpy()[0] for i in range(6)]
​
# recover bbox
proposals = get_bboxes_single(cls_score_list, bbox_pred_list, mlvl_anchors, resize_shape, scale_factor, cfg, rescale=True)
​
from vis_bbox import imshow_det_bboxes
​
bboxes = proposals[0]
labels = proposals[1]
imshow_det_bboxes(image, bboxes, labels, score_thr=0.9, out_file='out.png')

后处理部分

mmdetection自带有目标检测的后处理代码,但是由于mmdetection是用Pytorch实现的,而Pytorch在树莓派上的安装十分麻烦,因此我们使用Numpy,基于mmdetection的后处理代码,重新实现了SSD后处理。

# generate anchor
from anchor import gen_anchors
​
mlvl_anchors = gen_anchors()
​
img_shape = image.shape
scale_factor = [img_shape[1] / resize_shape[1], img_shape[0] / resize_shape[0]] # x_scale, y_scale
​
from bbox_utils import get_bboxes_single
from easydict import EasyDict
cfg = dict(
    nms=dict(type='nms', iou_thr=0.45),
    min_bbox_size=0,
    score_thr=0.6,
    max_per_img=200
)
​
cfg = EasyDict(cfg)
​
# get output
post_start = time.time()
cls_score_list = [module.get_output(i).asnumpy()[0] for i in range(6)]
bbox_pred_list = [module.get_output(i + 6).asnumpy()[0] for i in range(6)]
​
# recover bbox
proposals = get_bboxes_single(cls_score_list, bbox_pred_list, mlvl_anchors, resize_shape, scale_factor, cfg,
                              rescale=True)
post_end = time.time()
​
from vis_bbox import imshow_det_bboxes
​
bboxes = proposals[0]
labels = proposals[1]
imshow_det_bboxes(image, bboxes, labels, score_thr=0.9, out_file='out.png')

后处理需要的文件如下:

# ./anchor.py
import numpy as np
from anchor_generator import AnchorGenerator
​
​
# generate anchor
def gen_anchors():
    basesize_ratio_range = (0.2, 0.9)
    in_channels = (32, 96, 320, 512, 256, 256)
    input_size = 300
    anchor_strides = (8, 16, 32, 64, 100, 300)
    anchor_ratios = ([2], [2, 3], [2, 3], [2, 3], [2], [2])
​
    min_ratio, max_ratio = basesize_ratio_range
    min_ratio = int(min_ratio * 100)
    max_ratio = int(max_ratio * 100)
    step = int(np.floor(max_ratio - min_ratio) / (len(in_channels) - 2))
    min_sizes = []
    max_sizes = []
​
    for r in range(int(min_ratio), int(max_ratio) + 1, step):
        min_sizes.append(int(input_size * r / 100))
        max_sizes.append(int(input_size * (r + step) / 100))
​
    min_sizes.insert(0, int(input_size * 10 / 100))
    max_sizes.insert(0, int(input_size * 20 / 100))
​
    anchor_generators = []
    for k in range(len(anchor_strides)):
        base_size = min_sizes[k]
        stride = anchor_strides[k]
        ctr = ((stride - 1) / 2., (stride - 1) / 2.)
        scales = [1., np.sqrt(max_sizes[k] / min_sizes[k])]
        ratios = [1.]
        for r in anchor_ratios[k]:
            ratios += [1 / r, r]  # 4 or 6 ratio
        anchor_generator = AnchorGenerator(
            base_size, scales, ratios, scale_major=False, ctr=ctr)
        indices = list(range(len(ratios)))
        indices.insert(1, len(indices))
        anchor_generator.base_anchors = anchor_generator.base_anchors[indices]
        anchor_generators.append(anchor_generator)
​
    feature_size = ((38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1))
    mlvl_anchors = []
    for feat_size, stride, anchor_generator in zip(feature_size, anchor_strides, anchor_generators):
        anchor = anchor_generator.grid_anchors(feat_size, stride)
        mlvl_anchors.append(anchor)
​
    return mlvl_anchors
​
# ./anchor_generator.py
import numpy as np
​
​
class AnchorGenerator(object):
​
    def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None):
        self.base_size = base_size
        self.scales = np.array(scales)
        self.ratios = np.array(ratios)
        self.scale_major = scale_major
        self.ctr = ctr
        self.base_anchors = self.gen_base_anchors()
​
    @property
    def num_base_anchors(self):
        return self.base_anchors.shape[0]
​
    def gen_base_anchors(self):
        w = self.base_size
        h = self.base_size
        if self.ctr is None:
            x_ctr = 0.5 * (w - 1)
            y_ctr = 0.5 * (h - 1)
        else:
            x_ctr, y_ctr = self.ctr
​
        h_ratios = np.sqrt(self.ratios)
        w_ratios = 1 / h_ratios
        if self.scale_major:
            ws = (w * w_ratios[:, None] * self.scales[None, :]).flatten()
            hs = (h * h_ratios[:, None] * self.scales[None, :]).flatten()
        else:
            ws = (w * self.scales[:, None] * w_ratios[None, :]).flatten()
            hs = (h * self.scales[:, None] * h_ratios[None, :]).flatten()
​
        # yapf: disable
        base_anchors = np.stack(
            [
                x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1),
                x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1)
            ],
            axis=-1).round()
        # yapf: enable
​
        return base_anchors
​
    @staticmethod
    def _meshgrid(x, y, row_major=True):
        xx = np.tile(x, len(y))
        yy = np.tile(y.reshape(-1, 1), (1, len(x))).flatten()
        if row_major:
            return xx, yy
        else:
            return yy, xx
​
    def grid_anchors(self, featmap_size, stride=16):
        base_anchors = self.base_anchors
​
        feat_h, feat_w = featmap_size
        shift_x = np.arange(0, feat_w) * stride
        shift_y = np.arange(0, feat_h) * stride
        shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
        shifts = np.stack([shift_xx, shift_yy, shift_xx, shift_yy], axis=-1)
        shifts = shifts.astype(base_anchors.dtype)
        # first feat_w elements correspond to the first row of shifts
        # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get
        # shifted anchors (K, A, 4), reshape to (K*A, 4)
​
        all_anchors = base_anchors[None, :, :] + shifts[:, None, :]
        all_anchors = all_anchors.reshape(-1, 4)
        # first A rows correspond to A anchors of (0, 0) in feature map,
        # then (0, 1), (0, 2), ...
        return all_anchors
# ./bbox_utils.py
import numpy as np
from utils import sigmoid, softmax, addcmul, topk
from bbox_nms import multiclass_nms
​
​
def delta2bbox(rois,
               deltas,
               means=None,
               stds=None,
               max_shape=None,
               wh_ratio_clip=16 / 1000):
    if stds is None:
        stds = [1, 1, 1, 1]
    if means is None:
        means = [0, 0, 0, 0]
    means = np.tile(np.array(means, dtype=deltas.dtype), (1, deltas.shape[1] // 4))
    stds = np.tile(np.array(stds, dtype=deltas.dtype), (1, deltas.shape[1] // 4))
    denorm_deltas = deltas * stds + means
    dx = denorm_deltas[:, 0::4]
    dy = denorm_deltas[:, 1::4]
    dw = denorm_deltas[:, 2::4]
    dh = denorm_deltas[:, 3::4]
    max_ratio = np.abs(np.log(wh_ratio_clip))
    dw = np.clip(dw, -max_ratio, max_ratio)
    dh = np.clip(dh, -max_ratio, max_ratio)
    # Compute center of each roi
    px = ((rois[:, 0] + rois[:, 2]) * 0.5)[:, np.newaxis]
    py = ((rois[:, 1] + rois[:, 3]) * 0.5)[:, np.newaxis]
    # Compute width/height of each roi
    pw = (rois[:, 2] - rois[:, 0] + 1.0)[:, np.newaxis]
    ph = (rois[:, 3] - rois[:, 1] + 1.0)[:, np.newaxis]
    # Use exp(network energy) to enlarge/shrink each roi
    gw = pw * np.exp(dw)
    gh = ph * np.exp(dh)
    # Use network energy to shift the center of each roi
    gx = addcmul(px, pw, dx)  # gx = px + pw * dx
    gy = addcmul(py, ph, dy)  # gy = py + ph * dy
    # Convert center-xy/width/height to top-left, bottom-right
    x1 = gx - gw * 0.5 + 0.5
    y1 = gy - gh * 0.5 + 0.5
    x2 = gx + gw * 0.5 - 0.5
    y2 = gy + gh * 0.5 - 0.5
    if max_shape is not None:
        x1 = np.clip(x1, 0, max_shape[1] - 1)
        y1 = np.clip(y1, 0, max_shape[0] - 1)
        x2 = np.clip(x2, 0, max_shape[1] - 1)
        y2 = np.clip(y2, 0, max_shape[0] - 1)
    bboxes = np.stack([x1, y1, x2, y2], axis=-1).reshape(deltas.shape)
    return bboxes
​
​
def get_bboxes_single(cls_score_list,
                      bbox_pred_list,
                      mlvl_anchors,
                      img_shape,
                      scale_factor,
                      cfg,
                      rescale=False):
​
    assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_anchors)
    mlvl_bboxes = []
    mlvl_scores = []
    # ############# add #############
    use_sigmoid_cls = False
    cls_out_channels = 2
    target_means = (.0, .0, .0, .0)
    target_stds = (0.1, 0.1, 0.2, 0.2)
    # ############# add #############
    for cls_score, bbox_pred, anchors in zip(cls_score_list,
                                             bbox_pred_list, mlvl_anchors):
        assert cls_score.shape[-2:] == bbox_pred.shape[-2:]
        cls_score = np.transpose(cls_score, (1, 2, 0)).reshape(-1, cls_out_channels)
        if use_sigmoid_cls:
            scores = sigmoid(cls_score)
        else:
            scores = softmax(cls_score)
        bbox_pred = np.transpose(bbox_pred, (1, 2, 0)).reshape(-1, 4)
        nms_pre = cfg.get('nms_pre', -1)
        if 0 < nms_pre < scores.shape[0]:
            # Get maximum scores for foreground classes.
            if use_sigmoid_cls:
                max_scores = scores.max(axis=1)
            else:
                max_scores, _ = scores[:, 1:].max(axis=1)
            topk_inds = topk(max_scores, nms_pre, axis=1)
            anchors = anchors[topk_inds, :]
            bbox_pred = bbox_pred[topk_inds, :]
            scores = scores[topk_inds, :]
​
        bboxes = delta2bbox(anchors, bbox_pred, target_means,
                            target_stds, img_shape)
​
        mlvl_bboxes.append(bboxes)
        mlvl_scores.append(scores)
    mlvl_bboxes = np.concatenate(mlvl_bboxes)
    mlvl_scores = np.concatenate(mlvl_scores)
    if use_sigmoid_cls:
        # Add a dummy background class to the front when using sigmoid
        padding = np.zeros((mlvl_scores.shape[0], 1), dtype=mlvl_scores.dtype)
        mlvl_scores = np.concatenate([padding, mlvl_scores], axis=1)
    det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_scores,
                                            cfg.score_thr, cfg.nms,
                                            cfg.max_per_img)
    if rescale:
        det_bboxes[:, 0] *= scale_factor[0]
        det_bboxes[:, 1] *= scale_factor[1]
        det_bboxes[:, 2] *= scale_factor[0]
        det_bboxes[:, 3] *= scale_factor[1]
​
    return det_bboxes, det_labels
# ./bbox_nms.py
import numpy as np
from utils import nms
​
​
def multiclass_nms(multi_bboxes,
                   multi_scores,
                   score_thr,
                   nms_cfg,
                   max_num=-1,
                   score_factors=None):
    """NMS for multi-class bboxes.
​
    Args:
        multi_bboxes (Tensor): shape (n, #class*4) or (n, 4)
        multi_scores (Tensor): shape (n, #class), where the 0th column
            contains scores of the background class, but this will be ignored.
        score_thr (float): bbox threshold, bboxes with scores lower than it
            will not be considered.
        nms_thr (float): NMS IoU threshold
        max_num (int): if there are more than max_num bboxes after NMS,
            only top max_num will be kept.
        score_factors (Tensor): The factors multiplied to scores before
            applying NMS
​
    Returns:
        tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels
            are 0-based.
    """
    num_classes = multi_scores.shape[1] - 1
    # exclude background category
    if multi_bboxes.shape[1] > 4:
        bboxes = multi_bboxes.reshape(multi_scores.shape[0], -1, 4)[:, 1:]
    else:
        bboxes = multi_bboxes[:, None]
    scores = multi_scores[:, 1:]
​
    # filter out boxes with low scores
    valid_mask = scores > score_thr
    bboxes = bboxes[valid_mask]
    if score_factors is not None:
        scores = scores * score_factors[:, None]
    scores = scores[valid_mask]
    labels = valid_mask.nonzero()[1]
​
    if bboxes.size == 0:
        bboxes = np.zeros((0, 5), dtype=multi_bboxes.dtype)
        labels = np.zeros((0, ), dtype=np.long)
        return bboxes, labels
​
    # Modified from https://github.com/pytorch/vision/blob
    # /505cd6957711af790211896d32b40291bea1bc21/torchvision/ops/boxes.py#L39.
    # strategy: in order to perform NMS independently per class.
    # we add an offset to all the boxes. The offset is dependent
    # only on the class idx, and is large enough so that boxes
    # from different classes do not overlap
    max_coordinate = bboxes.max()
    offsets = labels.astype(bboxes.dtype) * (max_coordinate + 1)
    bboxes_for_nms = bboxes + offsets[:, None]
​
    scores = scores.astype(np.float64)
    bboxes_for_nms = bboxes_for_nms.astype(np.float64)
​
    nms_cfg_ = nms_cfg.copy()
​
    keep = nms(bboxes_for_nms, scores, nms_cfg_.get('iou_thr', None))
    bboxes = bboxes[keep]
    scores = scores[keep]
    labels = labels[keep]
​
    if len(keep) > max_num:
        inds = scores.argsort()[::-1]
        inds = inds[:max_num]
        bboxes = bboxes[inds]
        scores = scores[inds]
        labels = labels[inds]
​
    return np.concatenate([bboxes, scores[:, None]], 1), labels
​
# ./utils.py
import numpy as np
​
​
def sigmoid(x: np.ndarray):
    s = 1 / (1 + np.exp(-x))
    return s
​
​
def softmax(x: np.ndarray):
    x -= np.max(x, axis=1, keepdims=True)
    x = np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True)
    return x
​
​
def addcmul(px: np.ndarray, pw: np.ndarray, dx: np.ndarray):
    return px + pw * dx
​
​
def topk(x: np.ndarray, k: int, axis=1):
    part = np.argpartition(x, k, axis=axis)
    if axis == 0:
        row_index = np.arange(x.shape[1])
        sort_K = np.argsort(x[part[k + 1:, :], row_index], axis=axis)
        return np.fliplr(part[k + 1:, :][sort_K, row_index])
    else:
        column_index = np.arange(x.shape[1 - axis])[:, None]
        sort_K = np.argsort(x[column_index, part[:, k + 1:]], axis=axis)
        return np.fliplr(part[:, k + 1:][column_index, sort_K])
​
​
def nms(dets, scores, prob_threshold):
    x1 = dets[:, 0]
    y1 = dets[:, 1]
    x2 = dets[:, 2]
    y2 = dets[:, 3]
​
    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
​
    score_index = scores.argsort()[::-1]
    keep = []
​
    while score_index.size > 0:
        i = score_index[0]
        keep.append(i)
        xx1 = np.maximum(x1[i], x1[score_index[1:]])
        yy1 = np.maximum(y1[i], y1[score_index[1:]])
        xx2 = np.minimum(x2[i], x2[score_index[1:]])
        yy2 = np.minimum(y2[i], y2[score_index[1:]])
​
        w = np.maximum(0.0, xx2 - xx1 + 1)
        h = np.maximum(0.0, yy2 - yy1 + 1)
        union = w * h
        iou = union / (areas[i] + areas[score_index[1:]] - union)
​
        ids = np.where(iou <= prob_threshold)[0]
        score_index = score_index[ids + 1]
​
    return keep
​
# ./vis_bbox.py
from enum import Enum
import numpy as np
import cv2
​
​
class Color(Enum):
    """An enum that defines common colors.
    Contains red, green, blue, cyan, yellow, magenta, white and black.
    """
    red = (0, 0, 255)
    green = (0, 255, 0)
    blue = (255, 0, 0)
    cyan = (255, 255, 0)
    yellow = (0, 255, 255)
    magenta = (255, 0, 255)
    white = (255, 255, 255)
    black = (0, 0, 0)
​
​
def imshow_det_bboxes(img,
                      bboxes,
                      labels,
                      class_names=None,
                      score_thr=0,
                      bbox_color='green',
                      text_color='green',
                      thickness=1,
                      font_scale=0.5,
                      show=False,
                      win_name='',
                      wait_time=0,
                      out_file=None):
    """Draw bboxes and class labels (with scores) on an image.
    Args:
        img (str or ndarray): The image to be displayed.
        bboxes (ndarray): Bounding boxes (with scores), shaped (n, 4) or
            (n, 5).
        labels (ndarray): Labels of bboxes.
        class_names (list[str]): Names of each classes.
        score_thr (float): Minimum score of bboxes to be shown.
        bbox_color (str or tuple or :obj:`Color`): Color of bbox lines.
        text_color (str or tuple or :obj:`Color`): Color of texts.
        thickness (int): Thickness of lines.
        font_scale (float): Font scales of texts.
        show (bool): Whether to show the image.
        win_name (str): The window name.
        wait_time (int): Value of waitKey param.
        out_file (str or None): The filename to write the image.
    """
    assert bboxes.ndim == 2
    assert labels.ndim == 1
    assert bboxes.shape[0] == labels.shape[0]
    assert bboxes.shape[1] == 4 or bboxes.shape[1] == 5
​
    if score_thr > 0:
        assert bboxes.shape[1] == 5
        scores = bboxes[:, -1]
        inds = scores > score_thr
        bboxes = bboxes[inds, :]
        labels = labels[inds]
​
    bbox_color = Color[bbox_color].value
    text_color = Color[text_color].value
​
    for bbox, label in zip(bboxes, labels):
        bbox_int = bbox.astype(np.int32)
        left_top = (bbox_int[0], bbox_int[1])
        right_bottom = (bbox_int[2], bbox_int[3])
        cv2.rectangle(
            img, left_top, right_bottom, bbox_color, thickness)
        label_text = class_names[
            label] if class_names is not None else 'cls {}'.format(label)
        if len(bbox) > 4:
            label_text += '|{:.02f}'.format(bbox[-1])
        cv2.putText(img, label_text, (bbox_int[0], bbox_int[1] - 2),
                    cv2.FONT_HERSHEY_COMPLEX, font_scale, text_color)
​
    if show:
        cv2.imshow(img, win_name)
    if out_file is not None:
        cv2.imwrite(out_file, img)

预处理部分

预处理使用numpy和opencv-python。

# preprocess
image_path = '9331584514251_.pic_hd.jpg'
image = cv2.imread(image_path)
img = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
​
resize_shape = (300, 300)
img = cv2.resize(img, resize_shape, interpolation=cv2.INTER_LINEAR)
mean = np.array([123.675, 116.28, 103.53]).reshape(1, -1)
std = np.array([1., 1., 1.]).reshape(1, -1)
img = img.astype(np.float32)
img = cv2.subtract(img, mean)
img = cv2.multiply(img, std)
img = img.transpose(2, 0, 1)

模型载入编译及运行

# load onnx model and build tvm runtime
target = 'llvm'
ctx = tvm.context(target)
mssd = onnx.load('mssd.onnx')
# get input blob name and shape
input_blob = mssd.graph.input[0]
input_shape = tuple(map(lambda x: getattr(x, 'dim_value'), input_blob.type.tensor_type.shape.dim))
shape_dict = {
      input_blob.name: input_shape}
mod, params = relay.frontend.from_onnx(mssd, shape_dict)
​
with relay.build_config(opt_level=3):
    graph, lib, params = relay.build(mod, target, params=params)
​
​
module = runtime.create(graph, lib, ctx)
​
# run
module.set_input(**params)
module.set_input(input_blob.name, tvm.nd.array(img))
​
module.run()

模型量化

使用TVM自带的quantize模块,可简单实现模型的INT-8离线量化。在量化过程中,需要校准数据集。

树莓派上运行

受运算能力的限制,我们在树莓派上仅编译了runtime,因此在树莓派上运行代码有两种方式:

  1. TVM自带的RPC
  2. 将模型交叉编译后在树莓派上运行

我们在这里使用第二种方式。此时需要对模型载入编译部分做改动,并删除电脑上的模型运行部分。

# load onnx model and build tvm runtime
target = 'llvm -target=aarch64-linux-gnu -mattr=+neon'
ctx = tvm.context(target)
mssd = onnx.load('mssd.onnx')
# get input blob name and shape
input_blob = mssd.graph.input[0]
input_shape = tuple(map(lambda x: getattr(x, 'dim_value'), input_blob.type.tensor_type.shape.dim))
shape_dict = {
      input_blob.name: input_shape}
mod, params = relay.frontend.from_onnx(mssd, shape_dict)
​
with relay.build_config(opt_level=3):
    graph, lib, params = relay.build(mod, target, params=params)
    
######## export lib ########
path = 'model/'
path_lib = path + "deploy_lib.tar"
path_graph = path + "deploy_graph.json"
path_params = path + "deploy_param.params"
lib.export_library(path_lib)
with open(path_graph, "w") as fo:
    fo.write(graph)
with open(path_params, "wb") as fo:
    fo.write(relay.save_param_dict(params))

讲编译后的三个文件发送到树莓派上,并在树莓派上运行该模型。

树莓派上的模型无上述载入编译部分。

######## load lib ########
# load the module back.
path = 'model/'
path_lib = path + "deploy_lib.tar"
path_graph = path + "deploy_graph.json"
path_params = path + "deploy_param.params"
graph = open(path_graph).read()
lib = tvm.runtime.load_module(path_lib)
params = bytearray(open(path_params, "rb").read())
​
module = runtime.create(graph, lib, ctx)
​
# run
module.load_params(params)  # pay attention to the difference
module.set_input('input.1', tvm.nd.array(img))  # hardcode the input blob name

其他部分与原代码相同。

运行结果

树莓派c语言运行_使用TVM在树莓派上运行INT8量化SSD模型_第1张图片
使用量化后的模型进行人脸检测https://www.zhihu.com/video/1237878206147485696
树莓派c语言运行_使用TVM在树莓派上运行INT8量化SSD模型_第2张图片
使用全精度模型进行人脸检测https://www.zhihu.com/video/1237879039152390144

树莓派c语言运行_使用TVM在树莓派上运行INT8量化SSD模型_第3张图片

可以看到,量化后的模型精度有所下降,而运算速度有一定提升。

下表是不同平台下模型运行的时间测试。

树莓派c语言运行_使用TVM在树莓派上运行INT8量化SSD模型_第4张图片

你可能感兴趣的:(树莓派c语言运行)