Android 仿B站实现视频换脸

最近逛B站总是看到一些鬼畜视频，于是也模仿写了一个，效果如下：

原视频	换脸后

所需的技术介绍：
1. 视频编解码（用于将视频解码为裸数据，再编码成视频，解码使用MediaCodec，编码同时介绍使用MediaCodec和ffmpeg）
2. 人脸检测，人脸识别（使用虹软人脸识别SDK，虹软人脸识别 - ArcFace SDK介绍及使用注意事项）
3. 帧数据图像处理（格式转换、裁剪、绘制），如果对格式转换和图像裁剪不太了解可以看下我以前写的介绍。图像实战 - RGB、YUV图像格式转换、图像实战 - 裁剪RGB、YUV图像。

视频换脸流程：
1. mp4等视频文件帧数据解析，将编码数据解码为YUV数据
2. 对YUV数据进行格式转换，转换为人脸识别SDK支持的图像格式（NV21）
3. 对NV21图像数据进行人脸追踪
4. 若是第一次检测到的人脸，则对人脸区域进行绘制
5. 将绘制好的数据进行格式转换，用于视频录制
6. 裸数据编码视频流

一、视频编码与解码

视频编码
视频图像数据有极强的相关性，也就是说有大量的冗余信息。其中冗余信息可分为空域冗余信息和时域冗余信息。压缩技术就是将数据中的冗余信息去掉（去除数据之间的相关性），压缩技术包含帧内图像数据压缩技术、帧间图像数据压缩技术和熵编码压缩技术。

这样说可能还是有点抽象，来点具体的：
假如不进行任何压缩，也不考虑其他信息，仅考虑图像内容，那么一个帧数据格式是24位深的RGB格式，fps为30，时长60s，分辨率为1920x1080的视频文件大小将是：1920x1080x3x30x60 (Byte)-> 1093500 (KB)->1067.8 (MB)->1.04 (GB)。而一般情况下，一个时长60s的视频文件是不会有这么大的。
视频解码
视频解码就是视频编码的逆过程，将编码数据解析还原成像素数据。
硬编解码与软编解码
简单来说，硬编解码就是使用非CPU的其他硬件进行编解码，效率较高，Android平台可使用MediaCodec；
而软编解码就是使用CPU进行编解码，速度没硬编解码快，对于Android平台，我们可选择使用ffmpeg。
本文介绍使用MediaCodec解码，同时介绍MediaCodec / ffmpeg 编码

二、MediaCodec解码

1. API介绍

queueInputBuffer
输入流入队列
dequeueInputBuffer
返回待被填充数据ByteBuffer数组下标
getInputBuffer
获取需要被编解码的输入流数据
dequeueOutputBuffer
返回被编解码后的数据的下标
releaseOutputBuffer
释放ByteBuffer，返还给MediaCodec

2. 解码步骤

自定义一个DecodeCallback，便于数据处理

 /**
* 视频解码回调
*/
public interface DecodeCallback {
 /**
  * 开始解码
  *
  * @param width     视频宽度
  * @param height    视频高度
  * @param frameRate 视频帧率
  */
 void onDecodeStart(int width, int height, int frameRate);

 /**
  * 视频帧解码回调，在Android 5.0以下使用
  *
  * @param data   视频帧裸数据，格式由{@link Mp4Decoder#decodeColorFormat}指定
  * @param width  宽度
  * @param height 高度
  * @param time   微秒时间戳
  */
 void onFrameAvailable(byte[] data, int width, int height, long time);

 /**
  * 视频帧解码回调，在Android 5.0及以上使用，建议使用该项，因为一般帧数据会有做字节对齐操作，width不一定为stride
  *
  * @param image 视频帧图像数据，其中包含宽高、步长、裸数据
  * @param time  微秒时间戳
  */
 void onFrameAvailable(Image image, long time);

 /**
  * 解码结束
  */
 void onDecodeFinished();
}

初始化解码器

1. 创建MediaExtractor，并绑定数据
2. 选择视频轨道
3. 获取视频轨道的MediaFormat，并获取视频的宽高、帧率等信息
4. 根据视频的mimeType创建解码器
5. 为解码器设置MediaFormat，用于确认可输出的裸数据格式

public void init(String mp4Path) throws IOException {
   // 创建MediaExtractor，并绑定数据源
   extractor = new MediaExtractor();
   extractor.setDataSource(mp4Path);
   // 获取其中的视频轨道
   int trackIndex = selectTrack(extractor);
   if (trackIndex < 0) {
       throw new RuntimeException("decode failed for file " + mp4Path);
   }
   // 选中其中的视频轨道，获取其格式
   extractor.selectTrack(trackIndex);
   MediaFormat mediaFormat = extractor.getTrackFormat(trackIndex);

   // 获取视频的宽高、mimeType、帧率信息
   width = mediaFormat.getInteger(MediaFormat.KEY_WIDTH);
   height = mediaFormat.getInteger(MediaFormat.KEY_HEIGHT);
   String mime = mediaFormat.getString(MediaFormat.KEY_MIME);
   int frameRate = mediaFormat.getInteger(MediaFormat.KEY_FRAME_RATE);
   Log.i(TAG, "init: " + frameRate  + " " + width + " " + height);
   // 根据mimeType创建解码器
   decoder = MediaCodec.createDecoderByType(mime);
   showSupportedColorFormat(decoder.getCodecInfo().getCapabilitiesForType(mime));
   // 选择解码格式
   if (isColorFormatSupported(decodeColorFormat, decoder.getCodecInfo().getCapabilitiesForType(mime))) {
     mediaFormat.setInteger(MediaFormat.KEY_COLOR_FORMAT, decodeColorFormat);
   } else {
       throw new IllegalArgumentException("unable to set decode color format");
   }
   // 配置并启动
   decoder.configure(mediaFormat, null, null, 0);
   decoder.start();
   if (decodeCallback != null) {
       decodeCallback.onDecodeStart(width, height, frameRate);
   }
}

/**
 * 选择视频文件中的视频轨道
 * @param extractor 媒体解析器
 * @return 视频轨道，-1代表失败
 */
private int selectTrack(MediaExtractor extractor) {
    int numTracks = extractor.getTrackCount();
    for (int i = 0; i < numTracks; i++) {
        MediaFormat format = extractor.getTrackFormat(i);
        String mime = format.getString(MediaFormat.KEY_MIME);
        if (mime.startsWith("video/")) {
            return i;
        }
    }
    return -1;
}

循环解码
循环步骤如下：
1. 取待输入视频数据的buffer下标
2. 取buffer
3. 通过MediaExtractor读入帧数据
4. 帧数据入输入队列
5. 取出解码后的数据
6. 释放buffer

private void decodeFramesToYUV(MediaCodec decoder, MediaExtractor extractor) {
      MediaCodec.BufferInfo info = new MediaCodec.BufferInfo();
      boolean inputFinished = false;
      boolean outputFinished = false;
      while (!outputFinished) {
          if (!inputFinished) {
              // 取待输入视频数据的buffer下标
              int inputBufferId = decoder.dequeueInputBuffer(DEFAULT_TIMEOUT_US);
              if (inputBufferId >= 0) {
                  // 取buffer
                  ByteBuffer inputBuffer = null;
                  if (android.os.Build.VERSION.SDK_INT >= android.os.Build.VERSION_CODES.LOLLIPOP) {
                      inputBuffer = decoder.getInputBuffer(inputBufferId);
                  } else {
                      inputBuffer = decoder.getInputBuffers()[inputBufferId];
                  }
                  // 通过MediaExtractor读入帧数据
                  int sampleSize = extractor.readSampleData(inputBuffer, 0);
                  // 写入帧数据
                  if (sampleSize < 0) {
                      decoder.queueInputBuffer(inputBufferId, 0, 0, 0L, MediaCodec.BUFFER_FLAG_END_OF_STREAM);
                      inputFinished = true;
                  } else {
                      long presentationTimeUs = extractor.getSampleTime();
                      decoder.queueInputBuffer(inputBufferId, 0, sampleSize, presentationTimeUs, 0);
                      extractor.advance();
                  }
              }
          }
          // 取出解码后的数据
          int outputBufferId = decoder.dequeueOutputBuffer(info, DEFAULT_TIMEOUT_US);
          if (outputBufferId >= 0) {
              if ((info.flags & MediaCodec.BUFFER_FLAG_END_OF_STREAM) != 0) {
                  outputFinished = true;
              }
              if (info.size > 0) {
                  if (android.os.Build.VERSION.SDK_INT >= android.os.Build.VERSION_CODES.LOLLIPOP) {
                      Image image = null;
                      image = decoder.getOutputImage(outputBufferId);
                      if (decodeCallback != null) {
                          decodeCallback.onFrameAvailable(image, extractor.getSampleTime());
                      }
                      image.close();
                  }else {
                      ByteBuffer outputBuffer = decoder.getOutputBuffers()[outputBufferId];
                      if (decodeCallback != null) {
                          byte[] data = new byte[width * height * 3 / 2];
                          outputBuffer.get(data, 0, data.length);
                          decodeCallback.onFrameAvailable(data, width, height, extractor.getSampleTime());
                      }
                  }
                  // 释放buffer
                  decoder.releaseOutputBuffer(outputBufferId, false);
              }
          }
      }
      if (decodeCallback != null) {
          decodeCallback.onDecodeFinished();
      }
  }

三、帧数据格式转换、裁剪

使用 MediaDecoder#getOutputImage拿到的数据实际上是YUV422P格式，而虹软人脸识别SDK支持的是NV21（属于YUV420SP）和BGR24格式，这里我们选择将YUV422P转换为NV21格式，转换过程就是将Y数据拷贝，UV数据间隔获取。

还有一个比较坑的是自动内存对齐，经测试发现视频帧取出来的数据和宽高和原始视频的宽高不一致，因此需要再做一次裁剪，否则输出的视频会有明显的绿边。

以下是将帧数据YUV422P转换为NV21并裁剪的代码：

    private byte[] imageToTargetNv21(Image image, int frameWidth, int frameHeight) {
        byte[] originNv21 = new byte[frameWidth * frameHeight * 3 / 2];
        byte[] y = new byte[image.getPlanes()[0].getBuffer().limit()];
        byte[] u = new byte[image.getPlanes()[1].getBuffer().limit()];
        byte[] v = new byte[image.getPlanes()[2].getBuffer().limit()];
        image.getPlanes()[0].getBuffer().get(y);
        image.getPlanes()[1].getBuffer().get(u);
        image.getPlanes()[2].getBuffer().get(v);
        // YUV422 转NV21
        ImageUtil.yuv422ToYuv420sp(y, u, v, originNv21, image.getPlanes()[0].getRowStride(), image.getHeight());
        // Image的数据一般都做了字节对齐，对齐部分都是以0填充的，也就是会显示为绿色，是无用数据，因此需要做一次裁剪
        byte[] targetNv21 = new byte[videoWidth * videoHeight * 3 / 2];
        ImageUtil.cropYuv420sp(originNv21, targetNv21, frameWidth, frameHeight, 0, frameHeight - videoHeight, videoWidth, frameHeight);
        return targetNv21;
    }

    /**
     * 将Y:U:V == 4:2:2的数据转换为nv21
     *
     * @param y      Y 数据
     * @param u      U 数据
     * @param v      V 数据
     * @param nv21   生成的nv21，需要预先分配内存
     * @param stride 步长
     * @param height 图像高度
     */
    public static void yuv422ToYuv420sp(byte[] y, byte[] u, byte[] v, byte[] nv21, int stride, int height) {
        System.arraycopy(y, 0, nv21, 0, y.length);
        int nv21UVIndex = stride * height;
        int length = y.length + u.length / 2 + v.length / 2 - 2;
        int uIndex = 0, vIndex = 0;
        for (int i = nv21UVIndex; i < length; i += 2) {
            vIndex += 2;
            uIndex += 2;
            nv21[i] = v[vIndex];
            nv21[i + 1] = u[uIndex];
        }
    }

    /**
     * 裁剪YUV420SP（NV21/NV12）
     *
     * @param yuv420sp     原始数据
     * @param cropYuv420sp 裁剪后的数据，需要预先分配内存
     * @param width        原始宽度
     * @param height       原始高度
     * @param left         原始数据被裁剪的左边界
     * @param top          原始数据被裁剪的上边界
     * @param right        原始数据被裁剪的右边界
     * @param bottom       原始数据被裁剪的下边界
     */
    public static void cropYuv420sp(byte[] yuv420sp, byte[] cropYuv420sp, int width, int height, int left, int top,
                                    int right, int bottom) {
        int halfWidth = width / 2;
        int cropImageWidth = right - left;
        int cropImageHeight = bottom - top;

        //复制Y
        int originalYLineStart = top * width;
        int targetYIndex = 0;

        //复制UV
        int originalUVLineStart = width * height + top * halfWidth;
        int targetUVIndex = cropImageWidth * cropImageHeight;
        for (int i = top; i < bottom; i++) {
            System.arraycopy(yuv420sp, originalYLineStart + left, cropYuv420sp, targetYIndex, cropImageWidth);
            originalYLineStart += width;
            targetYIndex += cropImageWidth;
            if ((i & 1) == 0) {
                System.arraycopy(yuv420sp, originalUVLineStart + left, cropYuv420sp, targetUVIndex,
                        cropImageWidth);
                originalUVLineStart += width;
                targetUVIndex += cropImageWidth;
            }
        }
    }

四、帧数据绘制

人脸目标确定方案
1. 首次检测到人脸时，提取人脸特征，记录faceId
2. 持续人脸检测，当faceId不变，进行绘制
3. 在faceId变更时，重新人脸特征提取并比对，若是同一个人脸，绘制并记录新faceId
4. 循环 2、3 步骤绘制人脸帧数据

在NV21数据上进行绘制

将Bitmap转换为NV21数据
对于Config.ARGB8888的Bitmap对象，其图像的实际内存排布顺序其实是RGBA，以下是格式为ARGB8888的Bitmap转换为NV21的方法（其中Bitmap已事先4字节对齐）：

  public static byte[] getNv21FromBitmap(Bitmap bitmap) {
      int allocationByteCount = bitmap.getAllocationByteCount();
      byte[] data = new byte[allocationByteCount];
      ByteBuffer byteBuffer = ByteBuffer.wrap(data);
      bitmap.copyPixelsToBuffer(byteBuffer);
      byte[] nv21 = new byte[bitmap.getWidth() * bitmap.getHeight() * 3 / 2];
      rgba32ToNv21(data, nv21, bitmap.getWidth(), bitmap.getHeight());
      return nv21;
  }
  private static void rgba32ToNv21(byte[] rgba32, byte[] nv21, int width, int height) {
      int yIndex = 0;
      int uvIndex = width * height;
      int rgbaIndex = 0;
      int nv21Length = width * height * 3 / 2;
      for (int j = 0; j < height; ++j) {
          for (int i = 0; i < width; ++i) {
              int r = rgba32[rgbaIndex++];
              int g = rgba32[rgbaIndex++];
              int b = rgba32[rgbaIndex++];
              rgbaIndex++;
              b = alignIntToByte(b);
              g = alignIntToByte(g);
              r = alignIntToByte(r);
              int y = rgbToY(r, g, b);
              nv21[yIndex++] = (byte) alignIntToByte(y);
              if ((j & 1) == 0 && ((rgbaIndex >> 2) & 1) == 0 && uvIndex < nv21Length - 2) {
                  int u = rgbToU(r, g, b);
                  int v = rgbToV(r, g, b);
                  nv21[uvIndex++] = (byte) alignIntToByte(v);
                  nv21[uvIndex++] = (byte) alignIntToByte(u);
              }
          }
      }
  }

将一个小的NV21数据绘制到大NV21数据的指定区域，代码如下：

  /**
   * 将NV21数据绘制到NV21数据上
   *
   * @param nv21            大图NV21数据
   * @param width           大图宽度
   * @param height          大图高度
   * @param left            大图被绘制的左边
   * @param top             大图被绘制的右边
   * @param waterMarkNv21   小图NV21数据
   * @param waterMarkWidth  小图的宽度
   * @param waterMarkHeight 小图的高度
   */
  public static void drawNv21OnNv21(byte[] nv21, int width, int height, int left, int top, byte[] waterMarkNv21,
                                    int waterMarkWidth,
                                    int waterMarkHeight) {
      //确保偶数
      left &= ~1;
      top &= ~1;

      int nv21YLineDataSize = width;
      int waterMarkYLineDataSize = waterMarkWidth;
      int nv21YLinePositionOffset = left;
      int nv21YPositionOffset = nv21YLineDataSize * top + nv21YLinePositionOffset;
      int waterMarkYPositionOffset = 0;
      for (int i = 0; i < waterMarkHeight; ++i) {
          System.arraycopy(waterMarkNv21, waterMarkYPositionOffset, nv21, nv21YPositionOffset, waterMarkYLineDataSize);
          nv21YPositionOffset += nv21YLineDataSize;
          waterMarkYPositionOffset += waterMarkYLineDataSize;
      }
      int waterMarkUVLineDataSize = waterMarkWidth;
      int nv21UVLinePositionOffset = left;
      int waterMarkUVPositionOffset = waterMarkWidth * waterMarkHeight;
      for (int i = 0; i < waterMarkHeight; i += 2) {
          System.arraycopy(waterMarkNv21, waterMarkUVPositionOffset, nv21, nv21UVLinePositionOffset, waterMarkUVLineDataSize);
          waterMarkUVPositionOffset += waterMarkUVLineDataSize;
      }
  }

至此，我们完成了帧数据的绘制操作，但是若需要将NV21数据用于编码，还需要进行转换，这里使用MediaCodec和ffmpeg进行编码，分别使用NV12格式数据和YV12格式数据

以下是格式转换代码：

NV21和NV12互转

public static void nv21ToNv12(byte[] nv21, byte[] nv12) {
      System.arraycopy(nv21, 0, nv12, 0, nv21.length * 2 / 3);

      int length = Math.min(nv12.length, nv21.length);
      int uvStart = length * 2 / 3;
      for (int i = uvStart; i < length; i += 2) {
          nv12[i + 1] = nv21[i];
          nv12[i] = nv21[i + 1];
      }
  }

NV21转YV12

  public static void nv21ToYv12(byte[] nv21, byte[] yv12) {
      int ySize = nv21.length * 2 / 3;
      int totalSize = nv21.length;
      int i420UIndex = ySize;
      int i420VIndex = ySize * 5 / 4;
      //复制y
      System.arraycopy(nv21, 0, yv12, 0, ySize);
      //复制uv
      for (int uvIndex = ySize; uvIndex < totalSize; uvIndex += 2) {
          yv12[i420UIndex++] = nv21[uvIndex];
          yv12[i420VIndex++] = nv21[uvIndex + 1];
      }
  }

五、对裸数据编码

MediaCodec方式
流程如下：
1. 创建MediaFormat并设置视频参数
2. 创建MediaCodec Encoder
3. 配置Encoder,start
4. 推流编码，通过MediaMuxer写入编码数据
5. 释放资源

初始化操作，创建Encoder

public void startRecord() {
    // 根据格式和宽高创建MediaFormat
    MediaFormat mediaFormat = MediaFormat.createVideoFormat(MediaFormat.MIMETYPE_VIDEO_AVC, videoWidth, videoHeight);
    // 设置颜色格式为YUV420SP，这里指的是NV12
    mediaFormat.setInteger(MediaFormat.KEY_COLOR_FORMAT, MediaCodecInfo.CodecCapabilities.COLOR_FormatYUV420SemiPlanar);
    // 比特率设置，越大，视频质量越高
    mediaFormat.setInteger(MediaFormat.KEY_BIT_RATE, 3000000);
    // 帧率
    mediaFormat.setInteger(MediaFormat.KEY_FRAME_RATE, frameRate);
    // 关键帧间隔，单位是秒
    mediaFormat.setInteger(MediaFormat.KEY_I_FRAME_INTERVAL, 1);

    try {
        // 根据格式创建encoder
        videoMediaCodec = MediaCodec.createEncoderByType(MediaFormat.MIMETYPE_VIDEO_AVC);
    } catch (IOException e) {
        throw new RuntimeException("createEncoderByType failed: " + e.getMessage());
    }
    // 配置encoder并开始，若失败会报运行时异常
    videoMediaCodec.configure(mediaFormat, null, null, MediaCodec.CONFIGURE_FLAG_ENCODE);
    videoMediaCodec.start();

    recording = true;
}

推流，对NV12数据进行编码

/**
 * 编码帧数据
 *
 * @param nv12 NV12格式的原数据
 * @param time 时间戳，微秒
 */
private void encodeVideo(byte[] nv12, long time) {
    // 获取编码器的输入流缓存数据下标
    int inputIndex = videoMediaCodec.dequeueInputBuffer(TIMEOUT_USEC);
    if (inputIndex >= 0) {
        ByteBuffer inputBuffer = null;
        // 兼容地获取输入流缓存数据
        if (android.os.Build.VERSION.SDK_INT >= android.os.Build.VERSION_CODES.LOLLIPOP) {
            inputBuffer = videoMediaCodec.getInputBuffer(inputIndex);
        } else {
            inputBuffer = videoMediaCodec.getInputBuffers()[inputIndex];
        }
        inputBuffer.clear();
        // 把要编码的数据添加进去
        inputBuffer.put(nv12);
        // 入队列，等待编码
        if (time == -1) {
            videoMediaCodec.queueInputBuffer(inputIndex, 0, nv12.length, lastPresentationTime += (1000 * 1000 / frameRate), 0);
        } else {
            videoMediaCodec.queueInputBuffer(inputIndex, 0, nv12.length, time, 0);
            lastPresentationTime = time;
        }
    }

    MediaCodec.BufferInfo bufferInfo = new MediaCodec.BufferInfo();
    //读取MediaCodec编码后的数据
    int outputIndex;
    while ((outputIndex = videoMediaCodec.dequeueOutputBuffer(bufferInfo, TIMEOUT_USEC)) >= 0) {
        ByteBuffer outputBuffer = null;
        // 兼容地获取输出流，此时是编码后的数据
        if (android.os.Build.VERSION.SDK_INT >= android.os.Build.VERSION_CODES.LOLLIPOP) {
            outputBuffer = videoMediaCodec.getOutputBuffer(outputIndex);
        } else {
            outputBuffer = videoMediaCodec.getOutputBuffers()[outputIndex];
        }
        // 在拿到CSD(Codec Specific Data)时为MediaMuxer添加视频轨道
        if (bufferInfo.flags == MediaCodec.BUFFER_FLAG_CODEC_CONFIG) {
            bufferInfo.size = 0;
            if (videoTrack < 0) {
                videoTrack = mediaMuxer.addTrack(videoMediaCodec.getOutputFormat());
                mediaMuxer.start();
            }
        }
        // 拿到编码数据时写入
        if (bufferInfo.size != 0) {
            outputBuffer.position(bufferInfo.offset);
            outputBuffer.limit(bufferInfo.offset + bufferInfo.size);
            mediaMuxer.writeSampleData(videoTrack, outputBuffer, bufferInfo);
        }
        // 数据写入本地成功 通知MediaCodec释放data
        videoMediaCodec.releaseOutputBuffer(outputIndex, false);
    }
}

最后释放资源

public void stopRecord() {
    recording = false;
    try {
        mediaMuxer.stop();
        mediaMuxer.release();
        videoMediaCodec.stop();
        videoMediaCodec.release();
    } catch (Exception e) {
        e.printStackTrace();
    }
}

ffmpeg方式

封装一个C++类，定义如下

class Recorder {
  private:
    AVFormatContext *pFormatCtx;
    AVOutputFormat *outfmt;
    AVStream *video_st;
    AVCodecContext *pCodecCtx;
    AVCodec *pCodec;
    AVPacket pkt;
    uint8_t *picture_buf;
    AVFrame *pFrame;
    int fps;
    int frameIndex;
    char *path;
    AVDictionary *dictionary = 0;

  public:
    long System_currentTimeMillis() ;

    int startRecord(const  char *mp4Path, int width, int height, int jFps);

    int pushFrame(char *yv12, int width, int height);

    int stopRecord();
};

初始化操作
1. 初始化组件
2. 创建AVFormatContext对象，并配置属性
4. 创建AVStream对象，并配置属性
5. 创建帧数据对象和压缩后的数据对象
6. 写文件头

int Recorder::startRecord(const char *mp4Path, int width, int height, int fps) {
  // 初始化所有组件
  av_register_all();
  path = static_cast(malloc(strlen(mp4Path)));
  strcpy(path, mp4Path);
  // 创建一个AVFormatContext对象，这个结构体包含媒体文件或流的构成和基本信息
  pFormatCtx = avformat_alloc_context();
  // 根据文件名猜测一个输出格式
  outfmt = av_guess_format(NULL, path, NULL);
  if (outfmt == NULL) {
      return -1;
  }
  pFormatCtx->oformat = outfmt;
  // 打开输出文件
  if (avio_open(&pFormatCtx->pb, path, AVIO_FLAG_READ_WRITE) < 0) {
      printf("Failed to open output file! \n");
      return -1;
  }
  // 创建一个视频流
  video_st = avformat_new_stream(pFormatCtx, 0);

  if (video_st == NULL) {
      return -1;
  }
  //为视频流配置参数
  pCodecCtx = video_st->codec;
  pCodecCtx->codec_id = outfmt->video_codec;
  pCodecCtx->codec_type = AVMEDIA_TYPE_VIDEO;
  pCodecCtx->pix_fmt = AV_PIX_FMT_YUV420P;
  pCodecCtx->width = width;
  pCodecCtx->height = height;
  pCodecCtx->bit_rate = 3000000;
  pCodecCtx->gop_size = 250;


  pCodecCtx->time_base.num = 1;
  pCodecCtx->time_base.den = fps;
  this->fps = fps;

  pCodecCtx->qmin = 10;
  pCodecCtx->qmax = 51;


  // 属性设置，AVDictionary用于存储 Key-Value 信息
  //H.264
  if (pCodecCtx->codec_id == AV_CODEC_ID_H264) {
      av_dict_set(&dictionary, "preset", "slow", 0);
      av_dict_set(&dictionary, "tune", "zerolatency", 0);
  }
  //H.265
  if (pCodecCtx->codec_id == AV_CODEC_ID_H265) {
      av_dict_set(&dictionary, "preset", "ultrafast", 0);
      av_dict_set(&dictionary, "tune", "zero-latency", 0);
  }
  // 根据codec_id寻找编码器
  pCodec = avcodec_find_encoder(pCodecCtx->codec_id);
  if (!pCodec) {
      printf("Can not find encoder! \n");
      return -1;
  }
  if (avcodec_open2(pCodecCtx, pCodec, &dictionary) < 0) {
      printf("Failed to open encoder! \n");
      return -1;
  }

  // 分配帧数据内存
  pFrame = av_frame_alloc();
  // 格式化数据
  av_image_fill_arrays(pFrame->data, pFrame->linesize,
                       picture_buf, pCodecCtx->pix_fmt, width, height, 4);
  // 写文件头
  avformat_write_header(pFormatCtx, NULL);
  // 分配一个AVPacket，用于存储编码后的数据
  av_new_packet(&pkt,
                av_image_get_buffer_size(pCodecCtx->pix_fmt, pCodecCtx->width, pCodecCtx->height,
                                         4));
  frameIndex = 0;
  return 0;
}

推流，对YV12数据进行编码
1. 将YV12数据的Y U V分别分配给AVFrame
2. 将帧数据传递给avcodec编码，并获取编码数据
3. 将编码数据写入文件

 int Recorder::pushFrame(char *yv12, int width, int height) {
   /**
    * YV12的数据为3个plane，分别是：
    * 0:大小为width * height的Y:
    * 1:大小为width * height / 4 的V
    * 2:大小为width * height / 4 的U
    *
    * 因此 pFrame->data[i]分别指向不同的内存地址
    */
   pFrame->data[0] = reinterpret_cast(yv12);             // Y
   pFrame->data[1] = reinterpret_cast(yv12 + width * height * 5 / 4);      // U
   pFrame->data[2] = reinterpret_cast(yv12 + width * height);  // V

   pFrame->pts = frameIndex++ * (video_st->time_base.den) / ((video_st->time_base.num) * fps);
   // 编码，avcodec_send_frame发送裸数据后使用avcodec_receive_packet接收编码后内容
   long start = System_currentTimeMillis();
   int ret = avcodec_send_frame(pCodecCtx, pFrame);
   if (ret < 0) {
       printf("avcodec_send_frame failed! \n");
       return -1;
   }
   ret = avcodec_receive_packet(pCodecCtx, &pkt);
   long end = System_currentTimeMillis();
   LOGI("avcodec_encode_video2 cost is %ld", end - start);

   if (ret < 0) {
       printf("avcodec_encode_video2 failed! \n");
       return -1;
   }
   // 写入帧数据
   pkt.stream_index = video_st->index;
   start = System_currentTimeMillis();
   ret = av_write_frame(pFormatCtx, &pkt);
   end = System_currentTimeMillis();
   LOGI("av_write_frame cost is %ld", end - start);
   // 释放AVPacket
   av_packet_unref(&pkt);
   return 0;
 }

结束录制
1. 写文件尾
2. 释放资源

 int Recorder::stopRecord() {
  free(path);
   frameIndex = 0;

   // 写文件尾
   av_write_trailer(pFormatCtx);

   // 释放数据
   if (video_st) {
       avcodec_close(pCodecCtx);
       av_free(pFrame);
       av_free(picture_buf);
   }
   avio_close(pFormatCtx->pb);
   avformat_free_context(pFormatCtx);

   return 0;
 }

再封一层JNI

JNI部分

JNIEXPORT jlong JNICALL
Java_com_wsy_faceswap_ffmpeg_RecordUtil_nativeStartRecord(
        JNIEnv *env,
        jobject /* this */,
        jstring mp4Path,
        jint width, jint height, jint jFps) {
    const  char* path = env->GetStringUTFChars(mp4Path, JNI_FALSE);
    Recorder* recorder = new Recorder();
    recorder->startRecord(path,width,height,jFps);
    env->ReleaseStringUTFChars(mp4Path,path);
    return reinterpret_cast(recorder);
}
JNIEXPORT jint JNICALL
Java_com_wsy_faceswap_ffmpeg_RecordUtil_stopRecord(
        JNIEnv *env,
        jobject /* this */,jlong handle) {
    Recorder* recorder = reinterpret_cast(handle);
    return recorder->stopRecord();
}

JNIEXPORT jint JNICALL
Java_com_wsy_faceswap_ffmpeg_RecordUtil_pushFrame(JNIEnv *env, jobject instance,jlong handle,
                                         jbyteArray yv12_,
                                         jint width, jint height) {
    jbyte *yv12 = env->GetByteArrayElements(yv12_, JNI_FALSE);
    Recorder* recorder = reinterpret_cast(handle);
    recorder->pushFrame(reinterpret_cast(yv12), width, height);
    env->ReleaseByteArrayElements(yv12_, yv12, JNI_FALSE);
    return 0;
}

Java部分

package com.wsy.faceswap.ffmpeg;

public class RecordUtil {
    static {
        System.loadLibrary("recorder_jni");
    }

    private long handle = 0;

    private native long nativeStartRecord(String path, int width, int height, int fps);

    public native int pushFrame(long handle, byte[] yv12, int width, int height);

    public native int stopRecord(long handle);

    public boolean startRecord(String path, int width, int height, int fps) {
        handle = nativeStartRecord(path, width, height, fps);
        return handle > 0;
    }


    public int pushFrame(byte[] yv12, int width, int height) {
        if (handle == 0) {
            return -1;
        }
        return pushFrame(handle, yv12, width, height);
    }


    public int stopRecord() {
        if (handle == 0) {
            return -1;
        }
        int code = stopRecord(handle);
        handle = 0;
        return code;
    }

}

六、示例代码

https://github.com/wangshengyang1996/FaceSwap

七、参考博客

最简单的基于FFMPEG的视频编码器（YUV编码为H.264）