raspberry pi4B ncnn cpu vulkan benchmark

env: MANJARO ARM aarch64
commit id: fff16a025d21feb11ae51e86365abd8bfd86e900
2022 Apr 28

写在前面的话:

raspberry os在64位同样也做了测试,历经坎坷,但是收效甚微,而且vulkan驱动不好找。在MANJARO系统使用vulkan更容易一些,这里放出来NCNN在raspberry pi4上的benchnark。先说结论:分配512MB的显存情况下,树莓派GPU+vulkan的表现明显弱于CPU的算力。应该是树莓派的Vulkan支持尚不完善的原因。
另一个vulkan:https://qengineering.eu/install-vulkan-on-raspberry-pi.html

thread 1 cpu:

loop_count = 10
num_threads = 1
powersave = 0
gpu_device = -1
cooling_down = 1
          squeezenet  min =   87.23  max =   88.53  avg =   87.95
     squeezenet_int8  min =   76.35  max =   77.11  avg =   76.71
           mobilenet  min =  140.10  max =  142.05  avg =  140.97
      mobilenet_int8  min =   95.47  max =   95.79  avg =   95.63
        mobilenet_v2  min =  101.54  max =  102.40  avg =  101.99
        mobilenet_v3  min =   82.33  max =   83.70  avg =   82.99
          shufflenet  min =   50.17  max =   51.85  avg =   50.94
       shufflenet_v2  min =   48.87  max =   49.48  avg =   49.10
             mnasnet  min =   92.76  max =   93.17  avg =   92.96
     proxylessnasnet  min =  111.36  max =  112.04  avg =  111.67
     efficientnet_b0  min =  178.04  max =  178.42  avg =  178.24
   efficientnetv2_b0  min =  202.40  max =  203.09  avg =  202.80
        regnety_400m  min =  122.74  max =  123.09  avg =  122.91
           blazeface  min =   15.64  max =   15.91  avg =   15.79
           googlenet  min =  271.19  max =  272.28  avg =  271.75
      googlenet_int8  min =  239.69  max =  241.40  avg =  240.20
            resnet18  min =  216.32  max =  217.22  avg =  216.87
       resnet18_int8  min =  179.47  max =  179.86  avg =  179.68
             alexnet  min =  202.26  max =  202.81  avg =  202.54
               vgg16  min = 1286.14  max = 1291.54  avg = 1287.90
          vgg16_int8  min =  994.59  max = 1002.22  avg =  999.48
            resnet50  min =  613.59  max =  628.67  avg =  618.64
       resnet50_int8  min =  487.12  max =  489.30  avg =  488.22
      squeezenet_ssd  min =  201.68  max =  202.58  avg =  202.05
 squeezenet_ssd_int8  min =  174.25  max =  176.63  avg =  175.01
       mobilenet_ssd  min =  280.41  max =  281.18  avg =  280.76
  mobilenet_ssd_int8  min =  192.00  max =  192.72  avg =  192.36
      mobilenet_yolo  min =  631.44  max =  642.08  avg =  635.85
  mobilenetv2_yolov3  min =  346.23  max =  347.11  avg =  346.83
         yolov4-tiny  min =  430.36  max =  432.57  avg =  431.51
           nanodet_m  min =  118.20  max =  118.70  avg =  118.47
    yolo-fastest-1.1  min =   59.48  max =   60.90  avg =   60.00
      yolo-fastestv2  min =   49.94  max =   50.71  avg =   50.22

thread 1 gpu with vulkan

[0 V3D 4.2]  queueC=0[1]  queueG=0[1]  queueT=0[1]
[0 V3D 4.2]  bugsbn1=0  bugbilz=0  bugcopc=0  bugihfa=0
[0 V3D 4.2]  fp16-p/s/a=1/1/0  int8-p/s/a=1/1/0
[0 V3D 4.2]  subgroup=16  basic=1  vote=0  ballot=0  shuffle=0
loop_count = 10
num_threads = 1
powersave = 0
gpu_device = 0
cooling_down = 1
          squeezenet  min =  308.47  max =  309.27  avg =  308.87
     squeezenet_int8  min =   83.22  max =   83.79  avg =   83.47
           mobilenet  min =  345.16  max =  345.46  avg =  345.27
      mobilenet_int8  min =   99.83  max =  101.45  avg =  100.58
        mobilenet_v2  min =  244.97  max =  245.23  avg =  245.08
        mobilenet_v3  min =  231.20  max =  231.39  avg =  231.28
          shufflenet  min =  143.88  max =  144.13  avg =  144.01
       shufflenet_v2  min =  192.31  max =  192.94  avg =  192.42
             mnasnet  min =  249.60  max =  249.76  avg =  249.70
     proxylessnasnet  min =  265.37  max =  265.52  avg =  265.44
     efficientnet_b0  min =  374.82  max =  375.16  avg =  374.99
   efficientnetv2_b0  min =  625.12  max =  626.87  avg =  625.78
        regnety_400m  min =  318.96  max =  319.88  avg =  319.25
           blazeface  min =   52.95  max =   53.52  avg =   53.13
           googlenet  min =  803.00  max =  803.40  avg =  803.20
      googlenet_int8  min =  245.09  max =  247.60  avg =  246.22
            resnet18  min =  895.02  max =  896.33  avg =  895.97
       resnet18_int8  min =  181.80  max =  183.03  avg =  182.34
             alexnet  min =  499.71  max =  500.70  avg =  500.26
               vgg16  min = 4311.12  max = 4312.92  avg = 4312.02
          vgg16_int8  min =  998.62  max = 1003.33  avg = 1001.12
            resnet50  min = 2022.53  max = 2023.51  avg = 2023.05
       resnet50_int8  min =  490.57  max =  494.13  avg =  492.33
      squeezenet_ssd  min = 1143.39  max = 1144.38  avg = 1143.78
 squeezenet_ssd_int8  min =  177.60  max =  180.83  avg =  179.01
       mobilenet_ssd  min =  816.07  max =  816.63  avg =  816.43
  mobilenet_ssd_int8  min =  195.79  max =  196.89  avg =  196.37
      mobilenet_yolo  min = 1622.98  max = 1623.30  avg = 1623.19
  mobilenetv2_yolov3  min =  808.26  max =  808.45  avg =  808.37
         yolov4-tiny  min = 1704.12  max = 1704.79  avg = 1704.52
           nanodet_m  min =  389.94  max =  390.18  avg =  390.07
    yolo-fastest-1.1  min =  200.23  max =  200.50  avg =  200.36
      yolo-fastestv2  min =  164.09  max =  164.36  avg =  164.19

thread 4 cpu:

loop_count = 10
num_threads = 4
powersave = 0
gpu_device = -1
cooling_down = 1
          squeezenet  min =   51.93  max =   52.47  avg =   52.14
     squeezenet_int8  min =   42.81  max =   43.33  avg =   43.07
           mobilenet  min =   62.97  max =   66.78  avg =   63.72
      mobilenet_int8  min =   36.24  max =   39.39  avg =   36.69
        mobilenet_v2  min =   61.20  max =   62.30  avg =   61.62
        mobilenet_v3  min =   48.56  max =   75.32  avg =   51.63
          shufflenet  min =   34.52  max =   54.34  avg =   36.62
       shufflenet_v2  min =   27.39  max =   27.79  avg =   27.52
             mnasnet  min =   52.07  max =   54.51  avg =   52.62
     proxylessnasnet  min =   54.93  max =   56.66  avg =   55.43
     efficientnet_b0  min =   81.97  max =   82.88  avg =   82.32
   efficientnetv2_b0  min =   89.38  max =   90.46  avg =   89.88
        regnety_400m  min =   75.65  max =   76.17  avg =   75.81
           blazeface  min =   10.88  max =   11.08  avg =   10.98
           googlenet  min =  129.04  max =  131.39  avg =  129.72
      googlenet_int8  min =  106.56  max =  107.41  avg =  106.93
            resnet18  min =  152.15  max =  166.36  avg =  158.47
       resnet18_int8  min =   85.29  max =   86.35  avg =   85.82
             alexnet  min =  130.07  max =  132.20  avg =  130.82
               vgg16  min =  812.36  max = 1004.54  avg =  903.46
          vgg16_int8  min =  437.49  max = 1657.19  avg =  726.93
            resnet50  min =  315.49  max =  391.08  avg =  348.88
       resnet50_int8  min =  258.68  max =  396.38  avg =  286.31
      squeezenet_ssd  min =  177.35  max =  242.16  avg =  199.35
 squeezenet_ssd_int8  min =  119.77  max =  123.66  avg =  122.09
       mobilenet_ssd  min =  151.96  max =  176.89  avg =  162.62
  mobilenet_ssd_int8  min =   82.95  max =   98.27  avg =   87.34
      mobilenet_yolo  min =  336.06  max =  364.58  avg =  347.83
  mobilenetv2_yolov3  min =  194.39  max =  254.23  avg =  208.75
         yolov4-tiny  min =  250.72  max =  263.13  avg =  254.51
           nanodet_m  min =   71.37  max =   72.80  avg =   71.86
    yolo-fastest-1.1  min =   47.95  max =   57.21  avg =   49.25
      yolo-fastestv2  min =   38.46  max =   38.71  avg =   38.57

thread 4 gpu with vulkan

[0 V3D 4.2]  queueC=0[1]  queueG=0[1]  queueT=0[1]
[0 V3D 4.2]  bugsbn1=0  bugbilz=0  bugcopc=0  bugihfa=0
[0 V3D 4.2]  fp16-p/s/a=1/1/0  int8-p/s/a=1/1/0
[0 V3D 4.2]  subgroup=16  basic=1  vote=0  ballot=0  shuffle=0
loop_count = 10
num_threads = 4
powersave = 0
gpu_device = 0
cooling_down = 1
          squeezenet  min =  305.52  max =  306.38  avg =  305.92
     squeezenet_int8  min =   44.80  max =   54.47  avg =   46.48
           mobilenet  min =  342.60  max =  342.87  avg =  342.69
      mobilenet_int8  min =   37.23  max =   37.81  avg =   37.42
        mobilenet_v2  min =  245.07  max =  247.07  avg =  245.34
        mobilenet_v3  min =  230.95  max =  231.27  avg =  231.07
          shufflenet  min =  143.72  max =  144.89  avg =  144.03
       shufflenet_v2  min =  192.21  max =  192.48  avg =  192.31
             mnasnet  min =  249.26  max =  250.15  avg =  249.53
     proxylessnasnet  min =  265.09  max =  265.51  avg =  265.26
     efficientnet_b0  min =  374.56  max =  376.18  avg =  374.95
   efficientnetv2_b0  min =  624.94  max =  637.84  avg =  627.64
        regnety_400m  min =  318.95  max =  319.99  avg =  319.19
           blazeface  min =   53.02  max =   53.14  avg =   53.06
           googlenet  min =  803.82  max =  804.77  avg =  804.07
      googlenet_int8  min =  107.19  max =  119.33  avg =  109.47
            resnet18  min =  895.64  max =  897.14  avg =  896.53
       resnet18_int8  min =   86.94  max =   87.82  avg =   87.40
             alexnet  min =  499.26  max =  501.15  avg =  500.33
               vgg16  min = 4315.99  max = 4317.85  avg = 4316.88
          vgg16_int8  min =  412.25  max =  438.12  avg =  418.59
            resnet50  min = 2024.29  max = 2025.05  avg = 2024.64
       resnet50_int8  min =  223.42  max =  272.70  avg =  230.76
      squeezenet_ssd  min = 1144.16  max = 1144.95  avg = 1144.46
 squeezenet_ssd_int8  min =  112.33  max =  122.58  avg =  114.04
       mobilenet_ssd  min =  816.72  max =  817.11  avg =  816.89
  mobilenet_ssd_int8  min =   77.19  max =   77.76  avg =   77.53
      mobilenet_yolo  min = 1623.28  max = 1623.88  avg = 1623.53
  mobilenetv2_yolov3  min =  808.65  max =  808.88  avg =  808.77
         yolov4-tiny  min = 1704.79  max = 1706.01  avg = 1705.30
           nanodet_m  min =  389.68  max =  390.61  avg =  389.88
    yolo-fastest-1.1  min =  199.75  max =  200.16  avg =  199.87
      yolo-fastestv2  min =  163.96  max =  164.44  avg =  164.05

你可能感兴趣的:(杂七杂八,学习随笔记,人工智能,深度学习)