Xavier NX实现硬解码H264&H265

提示:文章写完后,目录可以自动生成,如何生成可参考右边的帮助文档

文章目录

  • 前言
  • 一、NX的硬解码?
  • 二、测试代码
  • 三、软硬解对比
  • 四、总结


前言

最近在做一个项目,使用opencv的VideoCapture拉流并把帧送进模型推理,结果发现VideoCapture本身使用的是cpu的软解码,占用了大量的资源,后来就去研究使用设备本身的硬解码资源去分摊cpu的压力。


一、NX的硬解码?

Xavier NX 使用OpenCV+GStreamer实现硬解码

NX的硬解码叫NVDEC,硬编码交NVENC,这两个都是专门的硬件去做的,效率比cpu高很多。这里要多说一句,NX是共享内存,就是gpu,vpu和cpu都是一个内存,不像台式机有专用显存,显存与内存数据格式不一样,需要转换(实测0-2ms),不能拿来直接用。NX就没有这个问题,但是这里我们不讨论这么深的问题,感兴趣的可以私下里去研究下。先上个图,这里需要使用jtop看

Xavier NX实现硬解码H264&H265_第1张图片
注意看红框里面的,如果解码器没有用这个就是OFF,旁边的就是编码器,不在这次讨论范围内。

二、测试代码

首先你要安装了opencv+gstreamer,具体安装方法我会在另一篇文章里面介绍,这里不多说,直接上代码。这里我们使用官方提供的代码

# --------------------------------------------------------
# Camera sample code for Tegra X2/X1
#
# This program could capture and display video from
# IP CAM, USB webcam, or the Tegra onboard camera.
# Refer to the following blog post for how to set up
# and run the code:
#   https://jkjung-avt.github.io/tx2-camera-with-python/
#
# Written by JK Jung 
# --------------------------------------------------------


import sys
import argparse
import subprocess

import cv2


WINDOW_NAME = 'CameraDemo'


def parse_args():
    # Parse input arguments
    desc = 'Capture and display live camera video on Jetson TX2/TX1'
    parser = argparse.ArgumentParser(description=desc)
    parser.add_argument('--rtsp', dest='use_rtsp',
                        help='use IP CAM (remember to also set --uri)',
                        action='store_true')
    parser.add_argument('--uri', dest='rtsp_uri',
                        help='RTSP URI, e.g. rtsp://192.168.1.64:554',
                        default=None, type=str)
    parser.add_argument('--latency', dest='rtsp_latency',
                        help='latency in ms for RTSP [200]',
                        default=200, type=int)
    parser.add_argument('--usb', dest='use_usb',
                        help='use USB webcam (remember to also set --vid)',
                        action='store_true')
    parser.add_argument('--vid', dest='video_dev',
                        help='device # of USB webcam (/dev/video?) [1]',
                        default=1, type=int)
    parser.add_argument('--width', dest='image_width',
                        help='image width [1920]',
                        default=1920, type=int)
    parser.add_argument('--height', dest='image_height',
                        help='image height [1080]',
                        default=1080, type=int)
    args = parser.parse_args()
    return args


def open_cam_rtsp(uri, width, height, latency):
	#H264的写法
    #gst_str = ('rtspsrc location={} latency={} ! '
    #           'rtph264depay ! h264parse ! omxh264dec ! '
    #           'nvvidconv ! '
    #           'video/x-raw, width=(int){}, height=(int){}, '
    #           'format=(string)BGRx ! '
    #           'videoconvert ! appsink').format(uri, latency, width, height)
	#H265的写法
    gst_str = ('rtspsrc location={} latency={} ! '
               'rtph265depay ! h265parse ! omxh265dec ! '
               'nvvidconv ! '
               'video/x-raw, width=(int){}, height=(int){}, '
               'format=(string)BGRx ! '
               'videoconvert ! appsink').format(uri, latency, width, height)
    return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER)


def open_cam_usb(dev, width, height):
    # We want to set width and height here, otherwise we could just do:
    #     return cv2.VideoCapture(dev)
    gst_str = ('v4l2src device=/dev/video{} ! '
               'video/x-raw, width=(int){}, height=(int){} ! '
               'videoconvert ! appsink').format(dev, width, height)
    return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER)


def open_cam_onboard(width, height):
    gst_elements = str(subprocess.check_output('gst-inspect-1.0'))
    if 'nvcamerasrc' in gst_elements:
        # On versions of L4T prior to 28.1, add 'flip-method=2' into gst_str
        gst_str = ('nvcamerasrc ! '
                   'video/x-raw(memory:NVMM), '
                   'width=(int)2592, height=(int)1458, '
                   'format=(string)I420, framerate=(fraction)30/1 ! '
                   'nvvidconv ! '
                   'video/x-raw, width=(int){}, height=(int){}, '
                   'format=(string)BGRx ! '
                   'videoconvert ! appsink').format(width, height)
    elif 'nvarguscamerasrc' in gst_elements:
        gst_str = ('nvarguscamerasrc ! '
                   'video/x-raw(memory:NVMM), '
                   'width=(int)1920, height=(int)1080, '
                   'format=(string)NV12, framerate=(fraction)30/1 ! '
                   'nvvidconv flip-method=2 ! '
                   'video/x-raw, width=(int){}, height=(int){}, '
                   'format=(string)BGRx ! '
                   'videoconvert ! appsink').format(width, height)
    else:
        raise RuntimeError('onboard camera source not found!')
    return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER)


def open_window(width, height):
    cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
    cv2.resizeWindow(WINDOW_NAME, width, height)
    cv2.moveWindow(WINDOW_NAME, 0, 0)
    cv2.setWindowTitle(WINDOW_NAME, 'Camera Demo for Jetson TX2/TX1')


def read_cam(cap):
    show_help = True
    full_scrn = False
    help_text = '"Esc" to Quit, "H" for Help, "F" to Toggle Fullscreen'
    font = cv2.FONT_HERSHEY_PLAIN
    while True:
        #if cv2.getWindowProperty(WINDOW_NAME, 0) < 0:
            # Check to see if the user has closed the window
            # If yes, terminate the program
        #    break
        _, img = cap.read() # grab the next image frame from camera
#        if show_help:
#            cv2.putText(img, help_text, (11, 20), font,
 #                       1.0, (32, 32, 32), 4, cv2.LINE_AA)
  #          cv2.putText(img, help_text, (10, 20), font,
   #                     1.0, (240, 240, 240), 1, cv2.LINE_AA)
    #    cv2.imshow(WINDOW_NAME, img)
        key = cv2.waitKey(10)
        if key == 27: # ESC key: quit program
            break
        elif key == ord('H') or key == ord('h'): # toggle help message
            show_help = not show_help
        elif key == ord('F') or key == ord('f'): # toggle fullscreen
            full_scrn = not full_scrn
            if full_scrn:
                cv2.setWindowProperty(WINDOW_NAME, cv2.WND_PROP_FULLSCREEN,
                                      cv2.WINDOW_FULLSCREEN)
            else:
                cv2.setWindowProperty(WINDOW_NAME, cv2.WND_PROP_FULLSCREEN,
                                      cv2.WINDOW_NORMAL)


def main():
    args = parse_args()
    print('Called with args:')
    print(args)
    print('OpenCV version: {}'.format(cv2.__version__))

    if args.use_rtsp:
        cap = open_cam_rtsp(args.rtsp_uri,
                            args.image_width,
                            args.image_height,
                            args.rtsp_latency)
    elif args.use_usb:
        cap = open_cam_usb(args.video_dev,
                           args.image_width,
                           args.image_height)
    else: # by default, use the Jetson onboard camera
        cap = open_cam_onboard(args.image_width,
                               args.image_height)

    if not cap.isOpened():
        sys.exit('Failed to open camera!')

    #open_window(args.image_width, args.image_height)
    read_cam(cap)

    cap.release()
    #cv2.destroyAllWindows()


if __name__ == '__main__':
    main()

这里我不需要显示,所以我把显示代码注释掉了,需要的可自行打开,前提是你需要一个显示页面。再看看jtop
Xavier NX实现硬解码H264&H265_第2张图片
再看看红框里面的,如果有数值,说明硬解码成功了,如果没有那就是你本身的环境装的不对。

补充:这里多说一句,关于怎么提前知道流的信息,我们需要借用ffprobe的指令

 ffprobe -show_streams -v quiet -show_format -print_format json -i <url>
{
    "streams": [
        {
            "index": 0,
            "codec_name": "hevc",
            "codec_long_name": "H.265 / HEVC (High Efficiency Video Coding)",
            "profile": "Main",
            "codec_type": "video",
            "codec_tag_string": "[0][0][0][0]",
            "codec_tag": "0x0000",
            "width": 1920,
            "height": 1080,
            "coded_width": 1920,
            "coded_height": 1080,
            "closed_captions": 0,
            "has_b_frames": 2,
            "pix_fmt": "yuv420p",
            "level": 153,
            "color_range": "tv",
            "chroma_location": "left",
            "refs": 1,
            "r_frame_rate": "25/1",
            "avg_frame_rate": "0/0",
            "time_base": "1/90000",
            "start_pts": 3600,
            "start_time": "0.040000",
            "disposition": {
                "default": 0,
                "dub": 0,
                "original": 0,
                "comment": 0,
                "lyrics": 0,
                "karaoke": 0,
                "forced": 0,
                "hearing_impaired": 0,
                "visual_impaired": 0,
                "clean_effects": 0,
                "attached_pic": 0,
                "timed_thumbnails": 0
            }
        }
    ],
    "format": {
        "filename": "rtsp://10.0.42.30/record_hevc_0",
        "nb_streams": 1,
        "nb_programs": 0,
        "format_name": "rtsp",
        "format_long_name": "RTSP input",
        "start_time": "0.040000",
        "probe_score": 100,
        "tags": {
            "title": "No Name"
        }
    }
}

index 就是音视频的索引,我用的这个流是不带声音的,所以只有一个index,一般0是视频,1是音频。codec_name 就是编码格式,你可以先用这个方法获取编码格式再决定选用哪种编码方式,由于H264和H265是最普遍的两种格式,几乎所有的设备都是支持的。所以这里我不介绍其他的了,有兴趣的可自行研究。
其他的参数如帧率,比特率都是可以通过这个命令获取的,rtsp是没有duration的,这个选项不可用,因为流是一直推的,视频文件这个参数是有效的。其它参数请大家自行研究。

三、软硬解对比

Xavier NX实现硬解码H264&H265_第3张图片

由于我需要看硬解码状态所以使用了jtop,jtop对cpu占用还是比较大的,大家可以使用top来看,top对资源占用很少。不过就算这样硬解码的优势还是很明显的。

四、总结

总体来讲是比较简单的,H264和H265两兄弟也基本上能满足日常绝大多数场景的需求了。

你可能感兴趣的:(编解码,ubuntu,nvcodec,python,开发语言,linux,视频编解码,bash)