RGBA 编码为 YUV420SP【NEON】

RGBA >> YUV420SP 

  • C/C++ 朴素实现版
void encodeYUV420SP_CPU(unsigned char *__restrict__ yuv420sp,
                        unsigned char *__restrict__ argb, int width, int height) {
    int frameSize = width * height;
    int yIndex = 0;
    int uvIndex = frameSize;

    for (int j = 0; j < height; j++) {
        for (int i = 0; i < width; i++) {

            uint8_t R = argb[2];
            uint8_t G = argb[1];
            uint8_t B = argb[0];
            argb += 4;

            // well known RGB to YUV algorithm
            uint8_t Y = ((66 * R + 129 * G + 25 * B + 128) >> 8) + 16;
            uint8_t U = ((-38 * R - 74 * G + 112 * B + 128) >> 8) + 128;
            uint8_t V = ((112 * R - 94 * G - 18 * B + 128) >> 8) + 128;

            // NV21有一个 Y 平面和 V-U 交叉平面,每一个平面的采样值都是 2
            // 意思是每4个 Y 像素(上下左右,不是横向连续的四个)对应1个 V 和1个 U
            // 像素和其他扫描线。
            yuv420sp[yIndex++] = Y;
            if (j % 2 == 0 && i % 2 == 0) {
                yuv420sp[uvIndex++] = V;
                yuv420sp[uvIndex++] = U;
            }
        }
    }
}
  • Neon 实现版(Github上找的)
void encodeYUV420SP_NEON_Intrinsics(unsigned char *__restrict__ yuv420sp,
                                    unsigned char *__restrict__ argb, int width, int height) {
    const uint16x8_t u16_rounding = vdupq_n_u16(128);
    const int16x8_t s16_rounding = vdupq_n_s16(128); // +128, u/v 中内层的 +128
    const int8x8_t s8_rounding = vdup_n_s8(
            128); // -128,即 0x80,最高成了符号位,实际只有 7 位用来表示数字,用来处理符号位, u/v 中外层的 +128
    const uint8x16_t offset = vdupq_n_u8(16);
    const uint16x8_t mask = vdupq_n_s16(255);

//    测试
//    int16x8_t test = vaddl_s8 (s8_rounding, s8_rounding);// -256
//    int8x8_t test_0 = vdup_n_s8(127); // 正常为 127
//    int8x8_t test_1 = vadd_s8(test_0, test_0); // -2,因为计算溢出到符号位

    int frameSize = width * height;

    int yIndex = 0;
    int uvIndex = frameSize;

    int i;
    int j;
    for (j = 0; j < height; j++) {
        for (i = 0; i < width >> 4; i++) {
            // Load rgb
            uint8x16x4_t pixel_argb = vld4q_u8(argb);
            argb += 4 * 16;

            uint8x8x2_t uint8_r;
            uint8x8x2_t uint8_g;
            uint8x8x2_t uint8_b;
            uint8_r.val[0] = vget_low_u8(pixel_argb.val[2]);
            uint8_r.val[1] = vget_high_u8(pixel_argb.val[2]);
            uint8_g.val[0] = vget_low_u8(pixel_argb.val[1]);
            uint8_g.val[1] = vget_high_u8(pixel_argb.val[1]);
            uint8_b.val[0] = vget_low_u8(pixel_argb.val[0]);
            uint8_b.val[1] = vget_high_u8(pixel_argb.val[0]);

            // Y = ((66 * R + 129 * G + 25 * B + 128) >> 8) + 16;
            uint16x8x2_t uint16_y;

            uint8x8_t scalar = vdup_n_u8(66);
            uint8x16_t y;

            uint16_y.val[0] = vmull_u8(uint8_r.val[0], scalar);
            uint16_y.val[1] = vmull_u8(uint8_r.val[1], scalar);
            scalar = vdup_n_u8(129);
            uint16_y.val[0] = vmlal_u8(uint16_y.val[0], uint8_g.val[0], scalar);
            uint16_y.val[1] = vmlal_u8(uint16_y.val[1], uint8_g.val[1], scalar);
            scalar = vdup_n_u8(25);
            uint16_y.val[0] = vmlal_u8(uint16_y.val[0], uint8_b.val[0], scalar);
            uint16_y.val[1] = vmlal_u8(uint16_y.val[1], uint8_b.val[1], scalar);

            uint16_y.val[0] = vaddq_u16(uint16_y.val[0], u16_rounding);
            uint16_y.val[1] = vaddq_u16(uint16_y.val[1], u16_rounding);

            y = vcombine_u8(vqshrn_n_u16(uint16_y.val[0], 8), vqshrn_n_u16(uint16_y.val[1], 8));
            y = vaddq_u8(y, offset);

            vst1q_u8(yuv420sp + yIndex, y);
            yIndex += 16;

            // 在偶数行中计算 U 和 V
            if (j % 2 == 0) {

//                uint8_t U = ((-38 * R - 74 * G + 112 * B + 128) >> 8) + 128;
//                uint8_t V = ((112 * R - 94 * G - 18 * B + 128) >> 8) + 128;

                int16x8_t u_scalar = vdupq_n_s16(-38);
                int16x8_t v_scalar = vdupq_n_s16(112);

#if 1
                // 因为 u,v 的值只有 y 的一半,所以只取高位计算
                int16x8_t r = vreinterpretq_s16_u16(
                        vandq_u16(vreinterpretq_u16_u8(pixel_argb.val[2]), mask));

//                 测试
//                uint16x8_t test_0 = vreinterpretq_u16_u8(pixel_argb.val[2]);
//                uint16x8_t test_1 = vandq_u16(test_0, mask);
//                int16x8_t  test_2 = vreinterpretq_s16_u16(test_1);

                int16x8_t g = vreinterpretq_s16_u16(
                        vandq_u16(vreinterpretq_u16_u8(pixel_argb.val[1]), mask));
                int16x8_t b = vreinterpretq_s16_u16(
                        vandq_u16(vreinterpretq_u16_u8(pixel_argb.val[0]), mask));
#else // 两者效果是一样的,但是没第一种快
                int16x8_t r = vreinterpretq_s16_u16(vmovl_u8(
                        vqshrn_n_u16(vshlq_n_u16(vreinterpretq_u16_u8(pixel_argb.val[2]), 8), 8)));
                int16x8_t g = vreinterpretq_s16_u16(vmovl_u8(
                        vqshrn_n_u16(vshlq_n_u16(vreinterpretq_u16_u8(pixel_argb.val[1]), 8), 8)));
                int16x8_t b = vreinterpretq_s16_u16(vmovl_u8(
                        vqshrn_n_u16(vshlq_n_u16(vreinterpretq_u16_u8(pixel_argb.val[0]), 8), 8)));

                // vshlq_n_u16,结果是 uint16x8_t
                // vqshrn_n_u16,结果是 uint8x8_t
                // vmovl_u8,结果是 uint16x8_t
#endif
                int16x8_t u;
                int16x8_t v;
                uint8x8x2_t uv;

                u = vmulq_s16(r, u_scalar);
                v = vmulq_s16(r, v_scalar);

                u_scalar = vdupq_n_s16(-74);
                v_scalar = vdupq_n_s16(-94);
                u = vmlaq_s16(u, g, u_scalar);
                v = vmlaq_s16(v, g, v_scalar);

                u_scalar = vdupq_n_s16(112);
                v_scalar = vdupq_n_s16(-18);
                u = vmlaq_s16(u, b, u_scalar);
                v = vmlaq_s16(v, b, v_scalar);

                u = vaddq_s16(u, s16_rounding);
                v = vaddq_s16(v, s16_rounding);

                uv.val[1] = vreinterpret_u8_s8(vadd_s8(vqshrn_n_s16(u, 8), s8_rounding));
//                 测试
//                int8x8_t test_3 = vqshrn_n_s16(u, 8);
//                int8x8_t test_4 = vadd_s8(test_3, s8_rounding); //
//                uint8x8_t test_5 = vreinterpret_u8_s8(test_4);

                uv.val[0] = vreinterpret_u8_s8(vadd_s8(vqshrn_n_s16(v, 8), s8_rounding));

                vst2_u8(yuv420sp + uvIndex, uv);

                uvIndex += 2 * 8;
            }
        }

        // 处理余数的好办法
        for (i = ((width >> 4) << 4); i < width; i++) {
            uint8_t R = argb[2];
            uint8_t G = argb[1];
            uint8_t B = argb[0];
            argb += 4;

            // well known RGB to YUV algorithm
            uint8_t Y = ((66 * R + 129 * G + 25 * B + 128) >> 8) + 16;
            uint8_t U = ((-38 * R - 74 * G + 112 * B + 128) >> 8) + 128;
            uint8_t V = ((112 * R - 94 * G - 18 * B + 128) >> 8) + 128;

            // NV21有一个 Y 平面和 V-U 交叉平面,每一个平面的采样值都是 2
            // 意思是每4个 Y 像素(上下左右,不是横向连续的四个)对应1个 V 和1个 U
            // 像素和其他扫描线。
            yuv420sp[yIndex++] = Y;
            if (j % 2 == 0 && i % 2 == 0) {
                yuv420sp[uvIndex++] = V;
                yuv420sp[uvIndex++] = U;
            }
        }
    }
}

通过 objdump 生成 so 库的反汇编

 命令行如下:

>J:\Programs\Android\sdk\ndk-bundle\toolchains\aarch64-linux-android-4.9\prebuilt\windows-x86_64\bin\aarch64-linux-android-objdump.exe -d libnative-lib.so > objdump_d.txt

 

一.   arm-linux-objdump

常用来显示二进制文件信息,常用来查看反汇编代码

二.   常用选项:

1.-b bfdname 指定目标码格式

2.—disassemble或者-d 反汇编可执行段

3.—dissassemble-all或者-D 反汇编所有段

4.-EB,-EL指定字节序

5.—file-headers或者-f 显示文件的整体头部摘要信息

6.—section-headers,--headers或者-h 显示目标文件中各个段的头部摘要信息

7.—info 或者-I 显示支持的目标文件格式和CPU架构

8.—section=name或者-j name显示指定section 的信息

9.—architecture=machine或者-m machine 指定反汇编目标文件时使用的架构

 

三.   示例

Arm-linux-objdump –D elf_file > dis_file 或者

Arm-linux-objdump –D –b binary –m arm bin_file > dis_file

  •  C/C++  实现版:
000410dc <_Z18encodeYUV420SP_CPUPhS_ii>:
   410dc:	b5b0      	push	{r4, r5, r7, lr}
   410de:	af02      	add	r7, sp, #8
   410e0:	b08f      	sub	sp, #60	; 0x3c
   410e2:	469c      	mov	ip, r3
   410e4:	4696      	mov	lr, r2
   410e6:	460c      	mov	r4, r1
   410e8:	4605      	mov	r5, r0
   410ea:	900e      	str	r0, [sp, #56]	; 0x38
   410ec:	910d      	str	r1, [sp, #52]	; 0x34
   410ee:	920c      	str	r2, [sp, #48]	; 0x30
   410f0:	930b      	str	r3, [sp, #44]	; 0x2c
   410f2:	980c      	ldr	r0, [sp, #48]	; 0x30
   410f4:	990b      	ldr	r1, [sp, #44]	; 0x2c
   410f6:	4348      	muls	r0, r1
   410f8:	900a      	str	r0, [sp, #40]	; 0x28
   410fa:	2000      	movs	r0, #0
   410fc:	9009      	str	r0, [sp, #36]	; 0x24
   410fe:	990a      	ldr	r1, [sp, #40]	; 0x28
   41100:	9108      	str	r1, [sp, #32]
   41102:	9007      	str	r0, [sp, #28]
   41104:	f8cd c00c 	str.w	ip, [sp, #12]
   41108:	f8cd e008 	str.w	lr, [sp, #8]
   4110c:	9401      	str	r4, [sp, #4]
   4110e:	9500      	str	r5, [sp, #0]
   41110:	e7ff      	b.n	41112 <_Z18encodeYUV420SP_CPUPhS_ii+0x36>
   41112:	9807      	ldr	r0, [sp, #28]
   41114:	990b      	ldr	r1, [sp, #44]	; 0x2c
   41116:	4288      	cmp	r0, r1
   41118:	f280 808c 	bge.w	41234 <_Z18encodeYUV420SP_CPUPhS_ii+0x158>
   4111c:	e7ff      	b.n	4111e <_Z18encodeYUV420SP_CPUPhS_ii+0x42>
   4111e:	2000      	movs	r0, #0
   41120:	9006      	str	r0, [sp, #24]
   41122:	e7ff      	b.n	41124 <_Z18encodeYUV420SP_CPUPhS_ii+0x48>
   41124:	9806      	ldr	r0, [sp, #24]
   41126:	990c      	ldr	r1, [sp, #48]	; 0x30
   41128:	4288      	cmp	r0, r1
   4112a:	da7e      	bge.n	4122a <_Z18encodeYUV420SP_CPUPhS_ii+0x14e>
   4112c:	e7ff      	b.n	4112e <_Z18encodeYUV420SP_CPUPhS_ii+0x52>
   4112e:	980d      	ldr	r0, [sp, #52]	; 0x34
   41130:	7880      	ldrb	r0, [r0, #2]
   41132:	f807 0c2d 	strb.w	r0, [r7, #-45]
   41136:	980d      	ldr	r0, [sp, #52]	; 0x34
   41138:	7840      	ldrb	r0, [r0, #1]
   4113a:	f807 0c2e 	strb.w	r0, [r7, #-46]
   4113e:	980d      	ldr	r0, [sp, #52]	; 0x34
   41140:	7800      	ldrb	r0, [r0, #0]
   41142:	f807 0c2f 	strb.w	r0, [r7, #-47]
   41146:	980d      	ldr	r0, [sp, #52]	; 0x34
   41148:	3004      	adds	r0, #4
   4114a:	900d      	str	r0, [sp, #52]	; 0x34
   4114c:	f817 0c2d 	ldrb.w	r0, [r7, #-45]
   41150:	eb00 1040 	add.w	r0, r0, r0, lsl #5
   41154:	f817 1c2e 	ldrb.w	r1, [r7, #-46]
   41158:	eb01 11c1 	add.w	r1, r1, r1, lsl #7
   4115c:	eb01 0040 	add.w	r0, r1, r0, lsl #1
   41160:	f817 1c2f 	ldrb.w	r1, [r7, #-47]
   41164:	2219      	movs	r2, #25
   41166:	fb11 0002 	smlabb	r0, r1, r2, r0
   4116a:	3080      	adds	r0, #128	; 0x80
   4116c:	2110      	movs	r1, #16
   4116e:	eb01 2010 	add.w	r0, r1, r0, lsr #8
   41172:	f88d 0014 	strb.w	r0, [sp, #20]
   41176:	f817 0c2d 	ldrb.w	r0, [r7, #-45]
   4117a:	f06f 0125 	mvn.w	r1, #37	; 0x25
   4117e:	fb10 f001 	smulbb	r0, r0, r1
   41182:	f817 1c2e 	ldrb.w	r1, [r7, #-46]
   41186:	224a      	movs	r2, #74	; 0x4a
   41188:	fb01 0012 	mls	r0, r1, r2, r0
   4118c:	f817 1c2f 	ldrb.w	r1, [r7, #-47]
   41190:	ebc1 01c1 	rsb	r1, r1, r1, lsl #3
   41194:	eb00 1001 	add.w	r0, r0, r1, lsl #4
   41198:	3080      	adds	r0, #128	; 0x80
   4119a:	2180      	movs	r1, #128	; 0x80
   4119c:	eb01 2010 	add.w	r0, r1, r0, lsr #8
   411a0:	f807 0c31 	strb.w	r0, [r7, #-49]
   411a4:	f817 0c2d 	ldrb.w	r0, [r7, #-45]
   411a8:	ebc0 00c0 	rsb	r0, r0, r0, lsl #3
   411ac:	f817 2c2e 	ldrb.w	r2, [r7, #-46]
   411b0:	235e      	movs	r3, #94	; 0x5e
   411b2:	fb12 f203 	smulbb	r2, r2, r3
   411b6:	ebc2 1000 	rsb	r0, r2, r0, lsl #4
   411ba:	f817 2c2f 	ldrb.w	r2, [r7, #-47]
   411be:	eb02 02c2 	add.w	r2, r2, r2, lsl #3
   411c2:	eba0 0042 	sub.w	r0, r0, r2, lsl #1
   411c6:	3080      	adds	r0, #128	; 0x80
   411c8:	eb01 2010 	add.w	r0, r1, r0, lsr #8
   411cc:	f807 0c32 	strb.w	r0, [r7, #-50]
   411d0:	f89d 0014 	ldrb.w	r0, [sp, #20]
   411d4:	990e      	ldr	r1, [sp, #56]	; 0x38
   411d6:	9a09      	ldr	r2, [sp, #36]	; 0x24
   411d8:	1c53      	adds	r3, r2, #1
   411da:	9309      	str	r3, [sp, #36]	; 0x24
   411dc:	5488      	strb	r0, [r1, r2]
   411de:	9807      	ldr	r0, [sp, #28]
   411e0:	eb00 71d0 	add.w	r1, r0, r0, lsr #31
   411e4:	f021 0101 	bic.w	r1, r1, #1
   411e8:	1a40      	subs	r0, r0, r1
   411ea:	2800      	cmp	r0, #0
   411ec:	d118      	bne.n	41220 <_Z18encodeYUV420SP_CPUPhS_ii+0x144>
   411ee:	e7ff      	b.n	411f0 <_Z18encodeYUV420SP_CPUPhS_ii+0x114>
   411f0:	9806      	ldr	r0, [sp, #24]
   411f2:	eb00 71d0 	add.w	r1, r0, r0, lsr #31
   411f6:	f021 0101 	bic.w	r1, r1, #1
   411fa:	1a40      	subs	r0, r0, r1
   411fc:	2800      	cmp	r0, #0
   411fe:	d10f      	bne.n	41220 <_Z18encodeYUV420SP_CPUPhS_ii+0x144>
   41200:	e7ff      	b.n	41202 <_Z18encodeYUV420SP_CPUPhS_ii+0x126>
   41202:	f817 0c32 	ldrb.w	r0, [r7, #-50]
   41206:	990e      	ldr	r1, [sp, #56]	; 0x38
   41208:	9a08      	ldr	r2, [sp, #32]
   4120a:	1c53      	adds	r3, r2, #1
   4120c:	9308      	str	r3, [sp, #32]
   4120e:	5488      	strb	r0, [r1, r2]
   41210:	f817 0c31 	ldrb.w	r0, [r7, #-49]
   41214:	990e      	ldr	r1, [sp, #56]	; 0x38
   41216:	9a08      	ldr	r2, [sp, #32]
   41218:	1c53      	adds	r3, r2, #1
   4121a:	9308      	str	r3, [sp, #32]
   4121c:	5488      	strb	r0, [r1, r2]
   4121e:	e7ff      	b.n	41220 <_Z18encodeYUV420SP_CPUPhS_ii+0x144>
   41220:	e7ff      	b.n	41222 <_Z18encodeYUV420SP_CPUPhS_ii+0x146>
   41222:	9806      	ldr	r0, [sp, #24]
   41224:	3001      	adds	r0, #1
   41226:	9006      	str	r0, [sp, #24]
   41228:	e77c      	b.n	41124 <_Z18encodeYUV420SP_CPUPhS_ii+0x48>
   4122a:	e7ff      	b.n	4122c <_Z18encodeYUV420SP_CPUPhS_ii+0x150>
   4122c:	9807      	ldr	r0, [sp, #28]
   4122e:	3001      	adds	r0, #1
   41230:	9007      	str	r0, [sp, #28]
   41232:	e76e      	b.n	41112 <_Z18encodeYUV420SP_CPUPhS_ii+0x36>
   41234:	b00f      	add	sp, #60	; 0x3c
   41236:	bdb0      	pop	{r4, r5, r7, pc}
  • Neon 实现版:
00041238 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii>:
   41238:	b5f0      	push	{r4, r5, r6, r7, lr}
   4123a:	af03      	add	r7, sp, #12
   4123c:	e92d 0b00 	stmdb	sp!, {r8, r9, fp}
   41240:	f5ad 6d35 	sub.w	sp, sp, #2896	; 0xb50
   41244:	466c      	mov	r4, sp
   41246:	f36f 0403 	bfc	r4, #0, #4
   4124a:	46a5      	mov	sp, r4
   4124c:	f50d 6cd6 	add.w	ip, sp, #1712	; 0x6b0
   41250:	f10d 0e30 	add.w	lr, sp, #48	; 0x30
   41254:	461c      	mov	r4, r3
   41256:	4615      	mov	r5, r2
   41258:	460e      	mov	r6, r1
   4125a:	4680      	mov	r8, r0
   4125c:	f8df 9c0c 	ldr.w	r9, [pc, #3084]	; 41e6c <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc34>
   41260:	44f9      	add	r9, pc
   41262:	f8d9 9000 	ldr.w	r9, [r9]
   41266:	f8d9 9000 	ldr.w	r9, [r9]
   4126a:	f8cd 9024 	str.w	r9, [sp, #36]	; 0x24
   4126e:	906f      	str	r0, [sp, #444]	; 0x1bc
   41270:	916e      	str	r1, [sp, #440]	; 0x1b8
   41272:	926d      	str	r2, [sp, #436]	; 0x1b4
   41274:	936c      	str	r3, [sp, #432]	; 0x1b0
   41276:	2080      	movs	r0, #128	; 0x80
   41278:	f8ad 01ee 	strh.w	r0, [sp, #494]	; 0x1ee
   4127c:	f50d 71f7 	add.w	r1, sp, #494	; 0x1ee
   41280:	f9e1 0c7f 	vld1.16	{d16[]-d17[]}, [r1 :16]
   41284:	a970      	add	r1, sp, #448	; 0x1c0
   41286:	f941 0aef 	vst1.64	{d16-d17}, [r1 :128]
   4128a:	f961 0aef 	vld1.64	{d16-d17}, [r1 :128]
   4128e:	a974      	add	r1, sp, #464	; 0x1d0
   41290:	f941 0aef 	vst1.64	{d16-d17}, [r1 :128]
   41294:	f961 0aef 	vld1.64	{d16-d17}, [r1 :128]
   41298:	a968      	add	r1, sp, #416	; 0x1a0
   4129a:	f941 0aef 	vst1.64	{d16-d17}, [r1 :128]
   4129e:	f8ad 0a4e 	strh.w	r0, [sp, #2638]	; 0xa4e
   412a2:	f60d 214e 	addw	r1, sp, #2638	; 0xa4e
   412a6:	f9e1 0c7f 	vld1.16	{d16[]-d17[]}, [r1 :16]
   412aa:	f50d 6122 	add.w	r1, sp, #2592	; 0xa20
   412ae:	f941 0aef 	vst1.64	{d16-d17}, [r1 :128]
   412b2:	f961 0aef 	vld1.64	{d16-d17}, [r1 :128]
   412b6:	f50d 6123 	add.w	r1, sp, #2608	; 0xa30
   412ba:	f941 0aef 	vst1.64	{d16-d17}, [r1 :128]
   412be:	f961 0aef 	vld1.64	{d16-d17}, [r1 :128]
   412c2:	a964      	add	r1, sp, #400	; 0x190
   412c4:	f941 0aef 	vst1.64	{d16-d17}, [r1 :128]
   412c8:	f88d 0a1f 	strb.w	r0, [sp, #2591]	; 0xa1f
   412cc:	f60d 201f 	addw	r0, sp, #2591	; 0xa1f
   412d0:	f9e0 2c0f 	vld1.8	{d18[]}, [r0]
   412d4:	edcc 2bd6 	vstr	d18, [ip, #856]	; 0x358
   412d8:	eddc 2bd6 	vldr	d18, [ip, #856]	; 0x358
   412dc:	edcc 2bd8 	vstr	d18, [ip, #864]	; 0x360
   412e0:	eddc 2bd8 	vldr	d18, [ip, #864]	; 0x360
   412e4:	edce 2b56 	vstr	d18, [lr, #344]	; 0x158
   412e8:	2010      	movs	r0, #16
   412ea:	f88d 0a07 	strb.w	r0, [sp, #2567]	; 0xa07
   412ee:	f60d 2007 	addw	r0, sp, #2567	; 0xa07
   412f2:	f9e0 0c2f 	vld1.8	{d16[]-d17[]}, [r0]
   412f6:	f50d 601e 	add.w	r0, sp, #2528	; 0x9e0
   412fa:	f940 0aef 	vst1.64	{d16-d17}, [r0 :128]
   412fe:	f960 0aef 	vld1.64	{d16-d17}, [r0 :128]
   41302:	f50d 601f 	add.w	r0, sp, #2544	; 0x9f0
   41306:	f940 0aef 	vst1.64	{d16-d17}, [r0 :128]
   4130a:	f960 0aef 	vld1.64	{d16-d17}, [r0 :128]
   4130e:	a85c      	add	r0, sp, #368	; 0x170
   41310:	f940 0aef 	vst1.64	{d16-d17}, [r0 :128]
   41314:	20ff      	movs	r0, #255	; 0xff
   41316:	f8ad 09de 	strh.w	r0, [sp, #2526]	; 0x9de
   4131a:	f60d 10de 	addw	r0, sp, #2526	; 0x9de
   4131e:	f9e0 0c7f 	vld1.16	{d16[]-d17[]}, [r0 :16]
   41322:	f50d 601b 	add.w	r0, sp, #2480	; 0x9b0
   41326:	f940 0aef 	vst1.64	{d16-d17}, [r0 :128]
   4132a:	f960 0aef 	vld1.64	{d16-d17}, [r0 :128]
   4132e:	f50d 601c 	add.w	r0, sp, #2496	; 0x9c0
   41332:	f940 0aef 	vst1.64	{d16-d17}, [r0 :128]
   41336:	f960 0aef 	vld1.64	{d16-d17}, [r0 :128]
   4133a:	a858      	add	r0, sp, #352	; 0x160
   4133c:	f940 0aef 	vst1.64	{d16-d17}, [r0 :128]
   41340:	986d      	ldr	r0, [sp, #436]	; 0x1b4
   41342:	996c      	ldr	r1, [sp, #432]	; 0x1b0
   41344:	4348      	muls	r0, r1
   41346:	9057      	str	r0, [sp, #348]	; 0x15c
   41348:	2000      	movs	r0, #0
   4134a:	9056      	str	r0, [sp, #344]	; 0x158
   4134c:	9957      	ldr	r1, [sp, #348]	; 0x15c
   4134e:	9155      	str	r1, [sp, #340]	; 0x154
   41350:	9053      	str	r0, [sp, #332]	; 0x14c
   41352:	f8cd c020 	str.w	ip, [sp, #32]
   41356:	f8cd e01c 	str.w	lr, [sp, #28]
   4135a:	9406      	str	r4, [sp, #24]
   4135c:	9505      	str	r5, [sp, #20]
   4135e:	9604      	str	r6, [sp, #16]
   41360:	f8cd 800c 	str.w	r8, [sp, #12]
   41364:	e7ff      	b.n	41366 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x12e>
   41366:	9853      	ldr	r0, [sp, #332]	; 0x14c
   41368:	996c      	ldr	r1, [sp, #432]	; 0x1b0
   4136a:	4288      	cmp	r0, r1
   4136c:	f280 856d 	bge.w	41e4a <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc12>
   41370:	e7ff      	b.n	41372 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x13a>
   41372:	2000      	movs	r0, #0
   41374:	9054      	str	r0, [sp, #336]	; 0x150
   41376:	e7ff      	b.n	41378 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x140>
   41378:	9854      	ldr	r0, [sp, #336]	; 0x150
   4137a:	996d      	ldr	r1, [sp, #436]	; 0x1b4
   4137c:	ebb0 1f21 	cmp.w	r0, r1, asr #4
   41380:	f280 84d5 	bge.w	41d2e <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xaf6>
   41384:	e7ff      	b.n	41386 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x14e>
   41386:	986e      	ldr	r0, [sp, #440]	; 0x1b8
   41388:	f960 010d 	vld4.8	{d16,d18,d20,d22}, [r0]!
   4138c:	f960 110f 	vld4.8	{d17,d19,d21,d23}, [r0]
   41390:	ef66 81f6 	vorr	q12, q11, q11
   41394:	ef62 a1f2 	vorr	q13, q9, q9
   41398:	ef64 c1f4 	vorr	q14, q10, q10
   4139c:	ef60 e1f0 	vorr	q15, q8, q8
   413a0:	f50d 602c 	add.w	r0, sp, #2752	; 0xac0
   413a4:	f940 eacf 	vst1.64	{d30-d31}, [r0]
   413a8:	f100 0120 	add.w	r1, r0, #32
   413ac:	f941 cacf 	vst1.64	{d28-d29}, [r1]
   413b0:	4602      	mov	r2, r0
   413b2:	f962 caed 	vld1.64	{d28-d29}, [r2 :128]!
   413b6:	f942 aacf 	vst1.64	{d26-d27}, [r2]
   413ba:	3030      	adds	r0, #48	; 0x30
   413bc:	f940 8acf 	vst1.64	{d24-d25}, [r0]
   413c0:	f50d 6330 	add.w	r3, sp, #2816	; 0xb00
   413c4:	f103 0c20 	add.w	ip, r3, #32
   413c8:	f961 8aef 	vld1.64	{d24-d25}, [r1 :128]
   413cc:	f94c 8aef 	vst1.64	{d24-d25}, [ip :128]
   413d0:	4619      	mov	r1, r3
   413d2:	f941 caed 	vst1.64	{d28-d29}, [r1 :128]!
   413d6:	f962 8aef 	vld1.64	{d24-d25}, [r2 :128]
   413da:	f941 8aef 	vst1.64	{d24-d25}, [r1 :128]
   413de:	f103 0230 	add.w	r2, r3, #48	; 0x30
   413e2:	f960 8aef 	vld1.64	{d24-d25}, [r0 :128]
   413e6:	f942 8aef 	vst1.64	{d24-d25}, [r2 :128]
   413ea:	986e      	ldr	r0, [sp, #440]	; 0x1b8
   413ec:	3040      	adds	r0, #64	; 0x40
   413ee:	906e      	str	r0, [sp, #440]	; 0x1b8
   413f0:	f96c 8aef 	vld1.64	{d24-d25}, [ip :128]
   413f4:	f50d 601a 	add.w	r0, sp, #2464	; 0x9a0
   413f8:	f940 8aef 	vst1.64	{d24-d25}, [r0 :128]
   413fc:	f960 8aef 	vld1.64	{d24-d25}, [r0 :128]
   41400:	eeb0 0b68 	vmov.f64	d0, d24
   41404:	9808      	ldr	r0, [sp, #32]
   41406:	ed80 0bba 	vstr	d0, [r0, #744]	; 0x2e8
   4140a:	ed90 0bba 	vldr	d0, [r0, #744]	; 0x2e8
   4140e:	f50d 622b 	add.w	r2, sp, #2736	; 0xab0
   41412:	f902 071d 	vst1.8	{d0}, [r2 :64]!
   41416:	f96c 8aef 	vld1.64	{d24-d25}, [ip :128]
   4141a:	f50d 6c18 	add.w	ip, sp, #2432	; 0x980
   4141e:	f94c 8aef 	vst1.64	{d24-d25}, [ip :128]
   41422:	f96c 8aef 	vld1.64	{d24-d25}, [ip :128]
   41426:	eeb0 0b69 	vmov.f64	d0, d25
   4142a:	ed80 0bb2 	vstr	d0, [r0, #712]	; 0x2c8
   4142e:	ed90 0bb2 	vldr	d0, [r0, #712]	; 0x2c8
   41432:	ed82 0b00 	vstr	d0, [r2]
   41436:	f961 8aef 	vld1.64	{d24-d25}, [r1 :128]
   4143a:	f50d 6c16 	add.w	ip, sp, #2400	; 0x960
   4143e:	f94c 8aef 	vst1.64	{d24-d25}, [ip :128]
   41442:	f96c 8aef 	vld1.64	{d24-d25}, [ip :128]
   41446:	eeb0 0b68 	vmov.f64	d0, d24
   4144a:	ed80 0baa 	vstr	d0, [r0, #680]	; 0x2a8
   4144e:	ed90 0baa 	vldr	d0, [r0, #680]	; 0x2a8
   41452:	f50d 6c2a 	add.w	ip, sp, #2720	; 0xaa0
   41456:	f90c 071d 	vst1.8	{d0}, [ip :64]!
   4145a:	f961 8aef 	vld1.64	{d24-d25}, [r1 :128]
   4145e:	f50d 6114 	add.w	r1, sp, #2368	; 0x940
   41462:	f941 8aef 	vst1.64	{d24-d25}, [r1 :128]
   41466:	f961 8aef 	vld1.64	{d24-d25}, [r1 :128]
   4146a:	eeb0 0b69 	vmov.f64	d0, d25
   4146e:	ed80 0ba2 	vstr	d0, [r0, #648]	; 0x288
   41472:	ed90 0ba2 	vldr	d0, [r0, #648]	; 0x288
   41476:	ed8c 0b00 	vstr	d0, [ip]
   4147a:	f963 8aef 	vld1.64	{d24-d25}, [r3 :128]
   4147e:	f50d 6112 	add.w	r1, sp, #2336	; 0x920
   41482:	f941 8aef 	vst1.64	{d24-d25}, [r1 :128]
   41486:	f961 8aef 	vld1.64	{d24-d25}, [r1 :128]
   4148a:	eeb0 0b68 	vmov.f64	d0, d24
   4148e:	ed80 0b9a 	vstr	d0, [r0, #616]	; 0x268
   41492:	ed90 0b9a 	vldr	d0, [r0, #616]	; 0x268
   41496:	f50d 6129 	add.w	r1, sp, #2704	; 0xa90
   4149a:	f901 071d 	vst1.8	{d0}, [r1 :64]!
   4149e:	f963 8aef 	vld1.64	{d24-d25}, [r3 :128]
   414a2:	f50d 6310 	add.w	r3, sp, #2304	; 0x900
   414a6:	f943 8aef 	vst1.64	{d24-d25}, [r3 :128]
   414aa:	f963 8aef 	vld1.64	{d24-d25}, [r3 :128]
   414ae:	eeb0 0b69 	vmov.f64	d0, d25
   414b2:	ed80 0b92 	vstr	d0, [r0, #584]	; 0x248
   414b6:	ed90 0b92 	vldr	d0, [r0, #584]	; 0x248
   414ba:	ed81 0b00 	vstr	d0, [r1]
   414be:	2342      	movs	r3, #66	; 0x42
   414c0:	f88d 38f7 	strb.w	r3, [sp, #2295]	; 0x8f7
   414c4:	f60d 03f7 	addw	r3, sp, #2295	; 0x8f7
   414c8:	f9a3 0c0f 	vld1.8	{d0[]}, [r3]
   414cc:	ed80 0b8c 	vstr	d0, [r0, #560]	; 0x230
   414d0:	ed90 0b8c 	vldr	d0, [r0, #560]	; 0x230
   414d4:	ed80 0b8e 	vstr	d0, [r0, #568]	; 0x238
   414d8:	ed90 0b8e 	vldr	d0, [r0, #568]	; 0x238
   414dc:	9b07      	ldr	r3, [sp, #28]
   414de:	ed83 0b44 	vstr	d0, [r3, #272]	; 0x110
   414e2:	f50d 6400 	add.w	r4, sp, #2048	; 0x800
   414e6:	ed94 0bac 	vldr	d0, [r4, #688]	; 0x2b0
   414ea:	ed93 1b44 	vldr	d1, [r3, #272]	; 0x110
   414ee:	ed80 0b8a 	vstr	d0, [r0, #552]	; 0x228
   414f2:	ed80 1b88 	vstr	d1, [r0, #544]	; 0x220
   414f6:	ed90 0b8a 	vldr	d0, [r0, #552]	; 0x228
   414fa:	ed90 1b88 	vldr	d1, [r0, #544]	; 0x220
   414fe:	ffc0 8c01 	vmull.u8	q12, d0, d1
   41502:	f50d 6e0c 	add.w	lr, sp, #2240	; 0x8c0
   41506:	f94e 8aef 	vst1.64	{d24-d25}, [lr :128]
   4150a:	f96e 8aef 	vld1.64	{d24-d25}, [lr :128]
   4150e:	f50d 6e27 	add.w	lr, sp, #2672	; 0xa70
   41512:	f94e 8aef 	vst1.64	{d24-d25}, [lr :128]
   41516:	ed92 0b00 	vldr	d0, [r2]
   4151a:	ed93 1b44 	vldr	d1, [r3, #272]	; 0x110
   4151e:	ed80 0b82 	vstr	d0, [r0, #520]	; 0x208
   41522:	ed80 1b80 	vstr	d1, [r0, #512]	; 0x200
   41526:	ed90 0b82 	vldr	d0, [r0, #520]	; 0x208
   4152a:	ed90 1b80 	vldr	d1, [r0, #512]	; 0x200
   4152e:	ffc0 8c01 	vmull.u8	q12, d0, d1
   41532:	f50d 620a 	add.w	r2, sp, #2208	; 0x8a0
   41536:	f942 8aef 	vst1.64	{d24-d25}, [r2 :128]
   4153a:	f962 8aef 	vld1.64	{d24-d25}, [r2 :128]
   4153e:	2281      	movs	r2, #129	; 0x81
   41540:	f88d 289f 	strb.w	r2, [sp, #2207]	; 0x89f
   41544:	f60d 029f 	addw	r2, sp, #2207	; 0x89f
   41548:	f9a2 0c0f 	vld1.8	{d0[]}, [r2]
   4154c:	ed80 0b76 	vstr	d0, [r0, #472]	; 0x1d8
   41550:	ed90 0b76 	vldr	d0, [r0, #472]	; 0x1d8
   41554:	ed80 0b78 	vstr	d0, [r0, #480]	; 0x1e0
   41558:	ed90 0b78 	vldr	d0, [r0, #480]	; 0x1e0
   4155c:	ed83 0b44 	vstr	d0, [r3, #272]	; 0x110
   41560:	4672      	mov	r2, lr
   41562:	f962 aa6d 	vld1.16	{d26-d27}, [r2 :128]!
   41566:	f942 8aef 	vst1.64	{d24-d25}, [r2 :128]
   4156a:	ed90 0bfc 	vldr	d0, [r0, #1008]	; 0x3f0
   4156e:	ed93 1b44 	vldr	d1, [r3, #272]	; 0x110
   41572:	f50d 6405 	add.w	r4, sp, #2128	; 0x850
   41576:	f944 aaef 	vst1.64	{d26-d27}, [r4 :128]
   4157a:	ed80 0b66 	vstr	d0, [r0, #408]	; 0x198
   4157e:	ed80 1b64 	vstr	d1, [r0, #400]	; 0x190
   41582:	f964 8aef 	vld1.64	{d24-d25}, [r4 :128]
   41586:	ed90 0b66 	vldr	d0, [r0, #408]	; 0x198
   4158a:	ed90 1b64 	vldr	d1, [r0, #400]	; 0x190
   4158e:	ed80 0b74 	vstr	d0, [r0, #464]	; 0x1d0
   41592:	ed80 1b72 	vstr	d1, [r0, #456]	; 0x1c8
   41596:	ed90 0b74 	vldr	d0, [r0, #464]	; 0x1d0
   4159a:	ed90 1b72 	vldr	d1, [r0, #456]	; 0x1c8
   4159e:	ffc0 ac01 	vmull.u8	q13, d0, d1
   415a2:	f50d 6406 	add.w	r4, sp, #2144	; 0x860
   415a6:	f944 aaef 	vst1.64	{d26-d27}, [r4 :128]
   415aa:	f964 aaef 	vld1.64	{d26-d27}, [r4 :128]
   415ae:	ef58 88ea 	vadd.i16	q12, q12, q13
   415b2:	f50d 6403 	add.w	r4, sp, #2096	; 0x830
   415b6:	f944 8aef 	vst1.64	{d24-d25}, [r4 :128]
   415ba:	f964 8aef 	vld1.64	{d24-d25}, [r4 :128]
   415be:	f94e 8aef 	vst1.64	{d24-d25}, [lr :128]
   415c2:	f962 8aef 	vld1.64	{d24-d25}, [r2 :128]
   415c6:	ed9c 0b00 	vldr	d0, [ip]
   415ca:	ed93 1b44 	vldr	d1, [r3, #272]	; 0x110
   415ce:	f50d 6c00 	add.w	ip, sp, #2048	; 0x800
   415d2:	f94c 8aef 	vst1.64	{d24-d25}, [ip :128]
   415d6:	ed80 0b52 	vstr	d0, [r0, #328]	; 0x148
   415da:	ed80 1b50 	vstr	d1, [r0, #320]	; 0x140
   415de:	f96c 8aef 	vld1.64	{d24-d25}, [ip :128]
   415e2:	ed90 0b52 	vldr	d0, [r0, #328]	; 0x148
   415e6:	ed90 1b50 	vldr	d1, [r0, #320]	; 0x140
   415ea:	ed80 0b5e 	vstr	d0, [r0, #376]	; 0x178
   415ee:	ed80 1b5c 	vstr	d1, [r0, #368]	; 0x170
   415f2:	ed90 0b5e 	vldr	d0, [r0, #376]	; 0x178
   415f6:	ed90 1b5c 	vldr	d1, [r0, #368]	; 0x170
   415fa:	ffc0 ac01 	vmull.u8	q13, d0, d1
   415fe:	f50d 6c01 	add.w	ip, sp, #2064	; 0x810
   41602:	f94c aaef 	vst1.64	{d26-d27}, [ip :128]
   41606:	f96c aaef 	vld1.64	{d26-d27}, [ip :128]
   4160a:	ef58 88ea 	vadd.i16	q12, q12, q13
   4160e:	f50d 6cfc 	add.w	ip, sp, #2016	; 0x7e0
   41612:	f94c 8aef 	vst1.64	{d24-d25}, [ip :128]
   41616:	f96c 8aef 	vld1.64	{d24-d25}, [ip :128]
   4161a:	f942 8aef 	vst1.64	{d24-d25}, [r2 :128]
   4161e:	f04f 0c19 	mov.w	ip, #25
   41622:	f88d c7df 	strb.w	ip, [sp, #2015]	; 0x7df
   41626:	f20d 7cdf 	addw	ip, sp, #2015	; 0x7df
   4162a:	f9ac 0c0f 	vld1.8	{d0[]}, [ip]
   4162e:	ed80 0b46 	vstr	d0, [r0, #280]	; 0x118
   41632:	ed90 0b46 	vldr	d0, [r0, #280]	; 0x118
   41636:	ed80 0b48 	vstr	d0, [r0, #288]	; 0x120
   4163a:	ed90 0b48 	vldr	d0, [r0, #288]	; 0x120
   4163e:	ed83 0b44 	vstr	d0, [r3, #272]	; 0x110
   41642:	f96e 8aef 	vld1.64	{d24-d25}, [lr :128]
   41646:	ed90 0bf8 	vldr	d0, [r0, #992]	; 0x3e0
   4164a:	ed93 1b44 	vldr	d1, [r3, #272]	; 0x110
   4164e:	f50d 6cf2 	add.w	ip, sp, #1936	; 0x790
   41652:	f94c 8aef 	vst1.64	{d24-d25}, [ip :128]
   41656:	ed80 0b36 	vstr	d0, [r0, #216]	; 0xd8
   4165a:	ed80 1b34 	vstr	d1, [r0, #208]	; 0xd0
   4165e:	f96c 8aef 	vld1.64	{d24-d25}, [ip :128]
   41662:	ed90 0b36 	vldr	d0, [r0, #216]	; 0xd8
   41666:	ed90 1b34 	vldr	d1, [r0, #208]	; 0xd0
   4166a:	ed80 0b44 	vstr	d0, [r0, #272]	; 0x110
   4166e:	ed80 1b42 	vstr	d1, [r0, #264]	; 0x108
   41672:	ed90 0b44 	vldr	d0, [r0, #272]	; 0x110
   41676:	ed90 1b42 	vldr	d1, [r0, #264]	; 0x108
   4167a:	ffc0 ac01 	vmull.u8	q13, d0, d1
   4167e:	f50d 6cf4 	add.w	ip, sp, #1952	; 0x7a0
   41682:	f94c aaef 	vst1.64	{d26-d27}, [ip :128]
   41686:	f96c aaef 	vld1.64	{d26-d27}, [ip :128]
   4168a:	ef58 88ea 	vadd.i16	q12, q12, q13
   4168e:	f50d 6cee 	add.w	ip, sp, #1904	; 0x770
   41692:	f94c 8aef 	vst1.64	{d24-d25}, [ip :128]
   41696:	f96c 8aef 	vld1.64	{d24-d25}, [ip :128]
   4169a:	f94e 8aef 	vst1.64	{d24-d25}, [lr :128]
   4169e:	f962 8aef 	vld1.64	{d24-d25}, [r2 :128]
   416a2:	ed91 0b00 	vldr	d0, [r1]
   416a6:	ed93 1b44 	vldr	d1, [r3, #272]	; 0x110
   416aa:	f50d 61e8 	add.w	r1, sp, #1856	; 0x740
   416ae:	f941 8aef 	vst1.64	{d24-d25}, [r1 :128]
   416b2:	ed80 0b22 	vstr	d0, [r0, #136]	; 0x88
   416b6:	ed80 1b20 	vstr	d1, [r0, #128]	; 0x80
   416ba:	f961 8aef 	vld1.64	{d24-d25}, [r1 :128]
   416be:	ed90 0b22 	vldr	d0, [r0, #136]	; 0x88
   416c2:	ed90 1b20 	vldr	d1, [r0, #128]	; 0x80
   416c6:	ed80 0b2e 	vstr	d0, [r0, #184]	; 0xb8
   416ca:	ed80 1b2c 	vstr	d1, [r0, #176]	; 0xb0
   416ce:	ed90 0b2e 	vldr	d0, [r0, #184]	; 0xb8
   416d2:	ed90 1b2c 	vldr	d1, [r0, #176]	; 0xb0
   416d6:	ffc0 ac01 	vmull.u8	q13, d0, d1
   416da:	f50d 61ea 	add.w	r1, sp, #1872	; 0x750
   416de:	f941 aaef 	vst1.64	{d26-d27}, [r1 :128]
   416e2:	f961 aaef 	vld1.64	{d26-d27}, [r1 :128]
   416e6:	ef58 88ea 	vadd.i16	q12, q12, q13
   416ea:	f50d 61e4 	add.w	r1, sp, #1824	; 0x720
   416ee:	f941 8aef 	vst1.64	{d24-d25}, [r1 :128]
   416f2:	f961 8aef 	vld1.64	{d24-d25}, [r1 :128]
   416f6:	f942 8aef 	vst1.64	{d24-d25}, [r2 :128]
   416fa:	f96e 8aef 	vld1.64	{d24-d25}, [lr :128]
   416fe:	a968      	add	r1, sp, #416	; 0x1a0
   41700:	f961 aaef 	vld1.64	{d26-d27}, [r1 :128]
   41704:	f50d 6ce2 	add.w	ip, sp, #1808	; 0x710
   41708:	f94c 8aef 	vst1.64	{d24-d25}, [ip :128]
   4170c:	f50d 64e0 	add.w	r4, sp, #1792	; 0x700
   41710:	f944 aaef 	vst1.64	{d26-d27}, [r4 :128]
   41714:	f96c 8aef 	vld1.64	{d24-d25}, [ip :128]
   41718:	f964 aaef 	vld1.64	{d26-d27}, [r4 :128]
   4171c:	ef58 88ea 	vadd.i16	q12, q12, q13
   41720:	f50d 6cde 	add.w	ip, sp, #1776	; 0x6f0
   41724:	f94c 8aef 	vst1.64	{d24-d25}, [ip :128]
   41728:	f96c 8aef 	vld1.64	{d24-d25}, [ip :128]
   4172c:	f94e 8aef 	vst1.64	{d24-d25}, [lr :128]
   41730:	f962 8aef 	vld1.64	{d24-d25}, [r2 :128]
   41734:	f961 aaef 	vld1.64	{d26-d27}, [r1 :128]
   41738:	f50d 61dc 	add.w	r1, sp, #1760	; 0x6e0
   4173c:	f941 8aef 	vst1.64	{d24-d25}, [r1 :128]
   41740:	f50d 6cda 	add.w	ip, sp, #1744	; 0x6d0
   41744:	f94c aaef 	vst1.64	{d26-d27}, [ip :128]
   41748:	f961 8aef 	vld1.64	{d24-d25}, [r1 :128]
   4174c:	f96c aaef 	vld1.64	{d26-d27}, [ip :128]
   41750:	ef58 88ea 	vadd.i16	q12, q12, q13
   41754:	f50d 61d8 	add.w	r1, sp, #1728	; 0x6c0
   41758:	f941 8aef 	vst1.64	{d24-d25}, [r1 :128]
   4175c:	f961 8aef 	vld1.64	{d24-d25}, [r1 :128]
   41760:	f942 8aef 	vst1.64	{d24-d25}, [r2 :128]
   41764:	f96e 8aef 	vld1.64	{d24-d25}, [lr :128]
   41768:	a948      	add	r1, sp, #288	; 0x120
   4176a:	f941 8aef 	vst1.64	{d24-d25}, [r1 :128]
   4176e:	f961 8aef 	vld1.64	{d24-d25}, [r1 :128]
   41772:	ff88 0938 	vqshrn.u16	d0, q12, #8
   41776:	ed83 0b3a 	vstr	d0, [r3, #232]	; 0xe8
   4177a:	ed93 0b3a 	vldr	d0, [r3, #232]	; 0xe8
   4177e:	ed83 0b38 	vstr	d0, [r3, #224]	; 0xe0
   41782:	ed93 0b38 	vldr	d0, [r3, #224]	; 0xe0
   41786:	f962 8aef 	vld1.64	{d24-d25}, [r2 :128]
   4178a:	a940      	add	r1, sp, #256	; 0x100
   4178c:	f941 8aef 	vst1.64	{d24-d25}, [r1 :128]
   41790:	f961 8aef 	vld1.64	{d24-d25}, [r1 :128]
   41794:	ff88 1938 	vqshrn.u16	d1, q12, #8
   41798:	ed83 1b32 	vstr	d1, [r3, #200]	; 0xc8
   4179c:	ed93 1b32 	vldr	d1, [r3, #200]	; 0xc8
   417a0:	ed83 1b30 	vstr	d1, [r3, #192]	; 0xc0
   417a4:	ed93 1b30 	vldr	d1, [r3, #192]	; 0xc0
   417a8:	ed80 0b02 	vstr	d0, [r0, #8]
   417ac:	ed80 1b00 	vstr	d1, [r0]
   417b0:	ed90 0b02 	vldr	d0, [r0, #8]
   417b4:	ed90 1b00 	vldr	d1, [r0]
   417b8:	eef0 8b40 	vmov.f64	d24, d0
   417bc:	eef0 9b41 	vmov.f64	d25, d1
   417c0:	f50d 61d4 	add.w	r1, sp, #1696	; 0x6a0
   417c4:	f941 8aef 	vst1.64	{d24-d25}, [r1 :128]
   417c8:	f961 8aef 	vld1.64	{d24-d25}, [r1 :128]
   417cc:	a94c      	add	r1, sp, #304	; 0x130
   417ce:	f941 8aef 	vst1.64	{d24-d25}, [r1 :128]
   417d2:	f961 8aef 	vld1.64	{d24-d25}, [r1 :128]
   417d6:	aa5c      	add	r2, sp, #368	; 0x170
   417d8:	f962 aaef 	vld1.64	{d26-d27}, [r2 :128]
   417dc:	f50d 62d2 	add.w	r2, sp, #1680	; 0x690
   417e0:	f942 8aef 	vst1.64	{d24-d25}, [r2 :128]
   417e4:	f50d 6cd0 	add.w	ip, sp, #1664	; 0x680
   417e8:	f94c aaef 	vst1.64	{d26-d27}, [ip :128]
   417ec:	f962 8aef 	vld1.64	{d24-d25}, [r2 :128]
   417f0:	f96c aaef 	vld1.64	{d26-d27}, [ip :128]
   417f4:	ef48 88ea 	vadd.i8	q12, q12, q13
   417f8:	f50d 62ce 	add.w	r2, sp, #1648	; 0x670
   417fc:	f942 8aef 	vst1.64	{d24-d25}, [r2 :128]
   41800:	f962 8aef 	vld1.64	{d24-d25}, [r2 :128]
   41804:	f941 8aef 	vst1.64	{d24-d25}, [r1 :128]
   41808:	f961 8aef 	vld1.64	{d24-d25}, [r1 :128]
   4180c:	a938      	add	r1, sp, #224	; 0xe0
   4180e:	f941 8aef 	vst1.64	{d24-d25}, [r1 :128]
   41812:	9a6f      	ldr	r2, [sp, #444]	; 0x1bc
   41814:	f8dd c158 	ldr.w	ip, [sp, #344]	; 0x158
   41818:	4462      	add	r2, ip
   4181a:	f961 8aef 	vld1.64	{d24-d25}, [r1 :128]
   4181e:	f942 8a0f 	vst1.8	{d24-d25}, [r2]
   41822:	9956      	ldr	r1, [sp, #344]	; 0x158
   41824:	3110      	adds	r1, #16
   41826:	9156      	str	r1, [sp, #344]	; 0x158
   41828:	9953      	ldr	r1, [sp, #332]	; 0x14c
   4182a:	eb01 72d1 	add.w	r2, r1, r1, lsr #31
   4182e:	f022 0201 	bic.w	r2, r2, #1
   41832:	1a89      	subs	r1, r1, r2
   41834:	2900      	cmp	r1, #0
   41836:	f040 8274 	bne.w	41d22 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xaea>
   4183a:	e7ff      	b.n	4183c <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x604>
   4183c:	f64f 70da 	movw	r0, #65498	; 0xffda
   41840:	f8ad 066e 	strh.w	r0, [sp, #1646]	; 0x66e
   41844:	f20d 606e 	addw	r0, sp, #1646	; 0x66e
   41848:	f9e0 0c7f 	vld1.16	{d16[]-d17[]}, [r0 :16]
   4184c:	f50d 60c8 	add.w	r0, sp, #1600	; 0x640
   41850:	f940 0aef 	vst1.64	{d16-d17}, [r0 :128]
   41854:	f960 0aef 	vld1.64	{d16-d17}, [r0 :128]
   41858:	f50d 60ca 	add.w	r0, sp, #1616	; 0x650
   4185c:	f940 0aef 	vst1.64	{d16-d17}, [r0 :128]
   41860:	f960 0aef 	vld1.64	{d16-d17}, [r0 :128]
   41864:	a834      	add	r0, sp, #208	; 0xd0
   41866:	f940 0aef 	vst1.64	{d16-d17}, [r0 :128]
   4186a:	2170      	movs	r1, #112	; 0x70
   4186c:	f8ad 163e 	strh.w	r1, [sp, #1598]	; 0x63e
   41870:	f20d 623e 	addw	r2, sp, #1598	; 0x63e
   41874:	f9e2 0c7f 	vld1.16	{d16[]-d17[]}, [r2 :16]
   41878:	f50d 62c2 	add.w	r2, sp, #1552	; 0x610
   4187c:	f942 0aef 	vst1.64	{d16-d17}, [r2 :128]
   41880:	f962 0aef 	vld1.64	{d16-d17}, [r2 :128]
   41884:	f50d 62c4 	add.w	r2, sp, #1568	; 0x620
   41888:	f942 0aef 	vst1.64	{d16-d17}, [r2 :128]
   4188c:	f962 0aef 	vld1.64	{d16-d17}, [r2 :128]
   41890:	aa30      	add	r2, sp, #192	; 0xc0
   41892:	f942 0aef 	vst1.64	{d16-d17}, [r2 :128]
   41896:	f50d 6330 	add.w	r3, sp, #2816	; 0xb00
   4189a:	f103 0c20 	add.w	ip, r3, #32
   4189e:	f96c 0aef 	vld1.64	{d16-d17}, [ip :128]
   418a2:	f50d 6cc0 	add.w	ip, sp, #1536	; 0x600
   418a6:	f94c 0aef 	vst1.64	{d16-d17}, [ip :128]
   418aa:	f96c 0aef 	vld1.64	{d16-d17}, [ip :128]
   418ae:	f50d 6cbe 	add.w	ip, sp, #1520	; 0x5f0
   418b2:	f94c 0aef 	vst1.64	{d16-d17}, [ip :128]
   418b6:	f96c 0aef 	vld1.64	{d16-d17}, [ip :128]
   418ba:	f50d 7cb0 	add.w	ip, sp, #352	; 0x160
   418be:	f96c 2aef 	vld1.64	{d18-d19}, [ip :128]
   418c2:	f50d 6ebc 	add.w	lr, sp, #1504	; 0x5e0
   418c6:	f94e 0aef 	vst1.64	{d16-d17}, [lr :128]
   418ca:	f50d 64ba 	add.w	r4, sp, #1488	; 0x5d0
   418ce:	f944 2aef 	vst1.64	{d18-d19}, [r4 :128]
   418d2:	f96e 0aef 	vld1.64	{d16-d17}, [lr :128]
   418d6:	f964 2aef 	vld1.64	{d18-d19}, [r4 :128]
   418da:	ef40 01f2 	vand	q8, q8, q9
   418de:	f50d 6eb8 	add.w	lr, sp, #1472	; 0x5c0
   418e2:	f94e 0aef 	vst1.64	{d16-d17}, [lr :128]
   418e6:	f96e 0aef 	vld1.64	{d16-d17}, [lr :128]
   418ea:	f50d 6eb6 	add.w	lr, sp, #1456	; 0x5b0
   418ee:	f94e 0aef 	vst1.64	{d16-d17}, [lr :128]
   418f2:	f96e 0aef 	vld1.64	{d16-d17}, [lr :128]
   418f6:	f50d 6eb4 	add.w	lr, sp, #1440	; 0x5a0
   418fa:	f94e 0aef 	vst1.64	{d16-d17}, [lr :128]
   418fe:	f96e 0aef 	vld1.64	{d16-d17}, [lr :128]
   41902:	f10d 0eb0 	add.w	lr, sp, #176	; 0xb0
   41906:	f94e 0aef 	vst1.64	{d16-d17}, [lr :128]
   4190a:	f103 0410 	add.w	r4, r3, #16
   4190e:	f964 0aef 	vld1.64	{d16-d17}, [r4 :128]
   41912:	f50d 64b2 	add.w	r4, sp, #1424	; 0x590
   41916:	f944 0aef 	vst1.64	{d16-d17}, [r4 :128]
   4191a:	f964 0aef 	vld1.64	{d16-d17}, [r4 :128]
   4191e:	f50d 64b0 	add.w	r4, sp, #1408	; 0x580
   41922:	f944 0aef 	vst1.64	{d16-d17}, [r4 :128]
   41926:	f964 0aef 	vld1.64	{d16-d17}, [r4 :128]
   4192a:	f96c 2aef 	vld1.64	{d18-d19}, [ip :128]
   4192e:	f50d 64ae 	add.w	r4, sp, #1392	; 0x570
   41932:	f944 0aef 	vst1.64	{d16-d17}, [r4 :128]
   41936:	f50d 65ac 	add.w	r5, sp, #1376	; 0x560
   4193a:	f945 2aef 	vst1.64	{d18-d19}, [r5 :128]
   4193e:	f964 0aef 	vld1.64	{d16-d17}, [r4 :128]
   41942:	f965 2aef 	vld1.64	{d18-d19}, [r5 :128]
   41946:	ef40 01f2 	vand	q8, q8, q9
   4194a:	f50d 64aa 	add.w	r4, sp, #1360	; 0x550
   4194e:	f944 0aef 	vst1.64	{d16-d17}, [r4 :128]
   41952:	f964 0aef 	vld1.64	{d16-d17}, [r4 :128]
   41956:	f50d 64a8 	add.w	r4, sp, #1344	; 0x540
   4195a:	f944 0aef 	vst1.64	{d16-d17}, [r4 :128]
   4195e:	f964 0aef 	vld1.64	{d16-d17}, [r4 :128]
   41962:	f50d 64a6 	add.w	r4, sp, #1328	; 0x530
   41966:	f944 0aef 	vst1.64	{d16-d17}, [r4 :128]
   4196a:	f964 0aef 	vld1.64	{d16-d17}, [r4 :128]
   4196e:	ac28      	add	r4, sp, #160	; 0xa0
   41970:	f944 0aef 	vst1.64	{d16-d17}, [r4 :128]
   41974:	f963 0aef 	vld1.64	{d16-d17}, [r3 :128]
   41978:	f50d 63a4 	add.w	r3, sp, #1312	; 0x520
   4197c:	f943 0aef 	vst1.64	{d16-d17}, [r3 :128]
   41980:	f963 0aef 	vld1.64	{d16-d17}, [r3 :128]
   41984:	f50d 63a2 	add.w	r3, sp, #1296	; 0x510
   41988:	f943 0aef 	vst1.64	{d16-d17}, [r3 :128]
   4198c:	f963 0aef 	vld1.64	{d16-d17}, [r3 :128]
   41990:	f96c 2aef 	vld1.64	{d18-d19}, [ip :128]
   41994:	f50d 63a0 	add.w	r3, sp, #1280	; 0x500
   41998:	f943 0aef 	vst1.64	{d16-d17}, [r3 :128]
   4199c:	f50d 6c9e 	add.w	ip, sp, #1264	; 0x4f0
   419a0:	f94c 2aef 	vst1.64	{d18-d19}, [ip :128]
   419a4:	f963 0aef 	vld1.64	{d16-d17}, [r3 :128]
   419a8:	f96c 2aef 	vld1.64	{d18-d19}, [ip :128]
   419ac:	ef40 01f2 	vand	q8, q8, q9
   419b0:	f50d 639c 	add.w	r3, sp, #1248	; 0x4e0
   419b4:	f943 0aef 	vst1.64	{d16-d17}, [r3 :128]
   419b8:	f963 0aef 	vld1.64	{d16-d17}, [r3 :128]
   419bc:	f50d 639a 	add.w	r3, sp, #1232	; 0x4d0
   419c0:	f943 0aef 	vst1.64	{d16-d17}, [r3 :128]
   419c4:	f963 0aef 	vld1.64	{d16-d17}, [r3 :128]
   419c8:	f50d 6398 	add.w	r3, sp, #1216	; 0x4c0
   419cc:	f943 0aef 	vst1.64	{d16-d17}, [r3 :128]
   419d0:	f963 0aef 	vld1.64	{d16-d17}, [r3 :128]
   419d4:	ab24      	add	r3, sp, #144	; 0x90
   419d6:	f943 0aef 	vst1.64	{d16-d17}, [r3 :128]
   419da:	f96e 0aef 	vld1.64	{d16-d17}, [lr :128]
   419de:	f960 2aef 	vld1.64	{d18-d19}, [r0 :128]
   419e2:	f50d 6c96 	add.w	ip, sp, #1200	; 0x4b0
   419e6:	f94c 0aef 	vst1.64	{d16-d17}, [ip :128]
   419ea:	f50d 6594 	add.w	r5, sp, #1184	; 0x4a0
   419ee:	f945 2aef 	vst1.64	{d18-d19}, [r5 :128]
   419f2:	f96c 0aef 	vld1.64	{d16-d17}, [ip :128]
   419f6:	f965 2aef 	vld1.64	{d18-d19}, [r5 :128]
   419fa:	ef50 09f2 	vmul.i16	q8, q8, q9
   419fe:	f50d 6c92 	add.w	ip, sp, #1168	; 0x490
   41a02:	f94c 0aef 	vst1.64	{d16-d17}, [ip :128]
   41a06:	f96c 0aef 	vld1.64	{d16-d17}, [ip :128]
   41a0a:	f10d 0c80 	add.w	ip, sp, #128	; 0x80
   41a0e:	f94c 0aef 	vst1.64	{d16-d17}, [ip :128]
   41a12:	f96e 0aef 	vld1.64	{d16-d17}, [lr :128]
   41a16:	f962 2aef 	vld1.64	{d18-d19}, [r2 :128]
   41a1a:	f50d 6e90 	add.w	lr, sp, #1152	; 0x480
   41a1e:	f94e 0aef 	vst1.64	{d16-d17}, [lr :128]
   41a22:	f50d 658e 	add.w	r5, sp, #1136	; 0x470
   41a26:	f945 2aef 	vst1.64	{d18-d19}, [r5 :128]
   41a2a:	f96e 0aef 	vld1.64	{d16-d17}, [lr :128]
   41a2e:	f965 2aef 	vld1.64	{d18-d19}, [r5 :128]
   41a32:	ef50 09f2 	vmul.i16	q8, q8, q9
   41a36:	f50d 6e8c 	add.w	lr, sp, #1120	; 0x460
   41a3a:	f94e 0aef 	vst1.64	{d16-d17}, [lr :128]
   41a3e:	f96e 0aef 	vld1.64	{d16-d17}, [lr :128]
   41a42:	f10d 0e70 	add.w	lr, sp, #112	; 0x70
   41a46:	f94e 0aef 	vst1.64	{d16-d17}, [lr :128]
   41a4a:	f64f 75b6 	movw	r5, #65462	; 0xffb6
   41a4e:	f8ad 545e 	strh.w	r5, [sp, #1118]	; 0x45e
   41a52:	f20d 455e 	addw	r5, sp, #1118	; 0x45e
   41a56:	f9e5 0c7f 	vld1.16	{d16[]-d17[]}, [r5 :16]
   41a5a:	f50d 6586 	add.w	r5, sp, #1072	; 0x430
   41a5e:	f945 0aef 	vst1.64	{d16-d17}, [r5 :128]
   41a62:	f965 0aef 	vld1.64	{d16-d17}, [r5 :128]
   41a66:	f50d 6588 	add.w	r5, sp, #1088	; 0x440
   41a6a:	f945 0aef 	vst1.64	{d16-d17}, [r5 :128]
   41a6e:	f965 0aef 	vld1.64	{d16-d17}, [r5 :128]
   41a72:	f940 0aef 	vst1.64	{d16-d17}, [r0 :128]
   41a76:	f64f 75a2 	movw	r5, #65442	; 0xffa2
   41a7a:	f8ad 542e 	strh.w	r5, [sp, #1070]	; 0x42e
   41a7e:	f20d 452e 	addw	r5, sp, #1070	; 0x42e
   41a82:	f9e5 0c7f 	vld1.16	{d16[]-d17[]}, [r5 :16]
   41a86:	f50d 6580 	add.w	r5, sp, #1024	; 0x400
   41a8a:	f945 0aef 	vst1.64	{d16-d17}, [r5 :128]
   41a8e:	f965 0aef 	vld1.64	{d16-d17}, [r5 :128]
   41a92:	f50d 6582 	add.w	r5, sp, #1040	; 0x410
   41a96:	f945 0aef 	vst1.64	{d16-d17}, [r5 :128]
   41a9a:	f965 0aef 	vld1.64	{d16-d17}, [r5 :128]
   41a9e:	f942 0aef 	vst1.64	{d16-d17}, [r2 :128]
   41aa2:	f96c 0aef 	vld1.64	{d16-d17}, [ip :128]
   41aa6:	f964 2aef 	vld1.64	{d18-d19}, [r4 :128]
   41aaa:	f960 4aef 	vld1.64	{d20-d21}, [r0 :128]
   41aae:	adfc      	add	r5, sp, #1008	; 0x3f0
   41ab0:	f945 0aef 	vst1.64	{d16-d17}, [r5 :128]
   41ab4:	aef8      	add	r6, sp, #992	; 0x3e0
   41ab6:	f946 2aef 	vst1.64	{d18-d19}, [r6 :128]
   41aba:	f50d 7874 	add.w	r8, sp, #976	; 0x3d0
   41abe:	f948 4aef 	vst1.64	{d20-d21}, [r8 :128]
   41ac2:	f965 0aef 	vld1.64	{d16-d17}, [r5 :128]
   41ac6:	f966 2aef 	vld1.64	{d18-d19}, [r6 :128]
   41aca:	f968 4aef 	vld1.64	{d20-d21}, [r8 :128]
   41ace:	ef52 09e4 	vmla.i16	q8, q9, q10
   41ad2:	adf0      	add	r5, sp, #960	; 0x3c0
   41ad4:	f945 0aef 	vst1.64	{d16-d17}, [r5 :128]
   41ad8:	f965 0aef 	vld1.64	{d16-d17}, [r5 :128]
   41adc:	f94c 0aef 	vst1.64	{d16-d17}, [ip :128]
   41ae0:	f96e 0aef 	vld1.64	{d16-d17}, [lr :128]
   41ae4:	f964 2aef 	vld1.64	{d18-d19}, [r4 :128]
   41ae8:	f962 4aef 	vld1.64	{d20-d21}, [r2 :128]
   41aec:	acec      	add	r4, sp, #944	; 0x3b0
   41aee:	f944 0aef 	vst1.64	{d16-d17}, [r4 :128]
   41af2:	ade8      	add	r5, sp, #928	; 0x3a0
   41af4:	f945 2aef 	vst1.64	{d18-d19}, [r5 :128]
   41af8:	aee4      	add	r6, sp, #912	; 0x390
   41afa:	f946 4aef 	vst1.64	{d20-d21}, [r6 :128]
   41afe:	f964 0aef 	vld1.64	{d16-d17}, [r4 :128]
   41b02:	f965 2aef 	vld1.64	{d18-d19}, [r5 :128]
   41b06:	f966 4aef 	vld1.64	{d20-d21}, [r6 :128]
   41b0a:	ef52 09e4 	vmla.i16	q8, q9, q10
   41b0e:	ace0      	add	r4, sp, #896	; 0x380
   41b10:	f944 0aef 	vst1.64	{d16-d17}, [r4 :128]
   41b14:	f964 0aef 	vld1.64	{d16-d17}, [r4 :128]
   41b18:	f94e 0aef 	vst1.64	{d16-d17}, [lr :128]
   41b1c:	f8ad 137e 	strh.w	r1, [sp, #894]	; 0x37e
   41b20:	f20d 317e 	addw	r1, sp, #894	; 0x37e
   41b24:	f9e1 0c7f 	vld1.16	{d16[]-d17[]}, [r1 :16]
   41b28:	a9d4      	add	r1, sp, #848	; 0x350
   41b2a:	f941 0aef 	vst1.64	{d16-d17}, [r1 :128]
   41b2e:	f961 0aef 	vld1.64	{d16-d17}, [r1 :128]
   41b32:	a9d8      	add	r1, sp, #864	; 0x360
   41b34:	f941 0aef 	vst1.64	{d16-d17}, [r1 :128]
   41b38:	f961 0aef 	vld1.64	{d16-d17}, [r1 :128]
   41b3c:	f940 0aef 	vst1.64	{d16-d17}, [r0 :128]
   41b40:	f64f 71ee 	movw	r1, #65518	; 0xffee
   41b44:	f8ad 134e 	strh.w	r1, [sp, #846]	; 0x34e
   41b48:	f20d 314e 	addw	r1, sp, #846	; 0x34e
   41b4c:	f9e1 0c7f 	vld1.16	{d16[]-d17[]}, [r1 :16]
   41b50:	a9c8      	add	r1, sp, #800	; 0x320
   41b52:	f941 0aef 	vst1.64	{d16-d17}, [r1 :128]
   41b56:	f961 0aef 	vld1.64	{d16-d17}, [r1 :128]
   41b5a:	a9cc      	add	r1, sp, #816	; 0x330
   41b5c:	f941 0aef 	vst1.64	{d16-d17}, [r1 :128]
   41b60:	f961 0aef 	vld1.64	{d16-d17}, [r1 :128]
   41b64:	f942 0aef 	vst1.64	{d16-d17}, [r2 :128]
   41b68:	f96c 0aef 	vld1.64	{d16-d17}, [ip :128]
   41b6c:	f963 2aef 	vld1.64	{d18-d19}, [r3 :128]
   41b70:	f960 4aef 	vld1.64	{d20-d21}, [r0 :128]
   41b74:	a8c4      	add	r0, sp, #784	; 0x310
   41b76:	f940 0aef 	vst1.64	{d16-d17}, [r0 :128]
   41b7a:	a9c0      	add	r1, sp, #768	; 0x300
   41b7c:	f941 2aef 	vst1.64	{d18-d19}, [r1 :128]
   41b80:	acbc      	add	r4, sp, #752	; 0x2f0
   41b82:	f944 4aef 	vst1.64	{d20-d21}, [r4 :128]
   41b86:	f960 0aef 	vld1.64	{d16-d17}, [r0 :128]
   41b8a:	f961 2aef 	vld1.64	{d18-d19}, [r1 :128]
   41b8e:	f964 4aef 	vld1.64	{d20-d21}, [r4 :128]
   41b92:	ef52 09e4 	vmla.i16	q8, q9, q10
   41b96:	a8b8      	add	r0, sp, #736	; 0x2e0
   41b98:	f940 0aef 	vst1.64	{d16-d17}, [r0 :128]
   41b9c:	f960 0aef 	vld1.64	{d16-d17}, [r0 :128]
   41ba0:	f94c 0aef 	vst1.64	{d16-d17}, [ip :128]
   41ba4:	f96e 0aef 	vld1.64	{d16-d17}, [lr :128]
   41ba8:	f963 2aef 	vld1.64	{d18-d19}, [r3 :128]
   41bac:	f962 4aef 	vld1.64	{d20-d21}, [r2 :128]
   41bb0:	a8b4      	add	r0, sp, #720	; 0x2d0
   41bb2:	f940 0aef 	vst1.64	{d16-d17}, [r0 :128]
   41bb6:	a9b0      	add	r1, sp, #704	; 0x2c0
   41bb8:	f941 2aef 	vst1.64	{d18-d19}, [r1 :128]
   41bbc:	aaac      	add	r2, sp, #688	; 0x2b0
   41bbe:	f942 4aef 	vst1.64	{d20-d21}, [r2 :128]
   41bc2:	f960 0aef 	vld1.64	{d16-d17}, [r0 :128]
   41bc6:	f961 2aef 	vld1.64	{d18-d19}, [r1 :128]
   41bca:	f962 4aef 	vld1.64	{d20-d21}, [r2 :128]
   41bce:	ef52 09e4 	vmla.i16	q8, q9, q10
   41bd2:	a8a8      	add	r0, sp, #672	; 0x2a0
   41bd4:	f940 0aef 	vst1.64	{d16-d17}, [r0 :128]
   41bd8:	f960 0aef 	vld1.64	{d16-d17}, [r0 :128]
   41bdc:	f94e 0aef 	vst1.64	{d16-d17}, [lr :128]
   41be0:	f96c 0aef 	vld1.64	{d16-d17}, [ip :128]
   41be4:	a864      	add	r0, sp, #400	; 0x190
   41be6:	f960 2aef 	vld1.64	{d18-d19}, [r0 :128]
   41bea:	a9a4      	add	r1, sp, #656	; 0x290
   41bec:	f941 0aef 	vst1.64	{d16-d17}, [r1 :128]
   41bf0:	aaa0      	add	r2, sp, #640	; 0x280
   41bf2:	f942 2aef 	vst1.64	{d18-d19}, [r2 :128]
   41bf6:	f961 0aef 	vld1.64	{d16-d17}, [r1 :128]
   41bfa:	f962 2aef 	vld1.64	{d18-d19}, [r2 :128]
   41bfe:	ef50 08e2 	vadd.i16	q8, q8, q9
   41c02:	a99c      	add	r1, sp, #624	; 0x270
   41c04:	f941 0aef 	vst1.64	{d16-d17}, [r1 :128]
   41c08:	f961 0aef 	vld1.64	{d16-d17}, [r1 :128]
   41c0c:	f94c 0aef 	vst1.64	{d16-d17}, [ip :128]
   41c10:	f96e 0aef 	vld1.64	{d16-d17}, [lr :128]
   41c14:	f960 2aef 	vld1.64	{d18-d19}, [r0 :128]
   41c18:	a898      	add	r0, sp, #608	; 0x260
   41c1a:	f940 0aef 	vst1.64	{d16-d17}, [r0 :128]
   41c1e:	a994      	add	r1, sp, #592	; 0x250
   41c20:	f941 2aef 	vst1.64	{d18-d19}, [r1 :128]
   41c24:	f960 0aef 	vld1.64	{d16-d17}, [r0 :128]
   41c28:	f961 2aef 	vld1.64	{d18-d19}, [r1 :128]
   41c2c:	ef50 08e2 	vadd.i16	q8, q8, q9
   41c30:	a890      	add	r0, sp, #576	; 0x240
   41c32:	f940 0aef 	vst1.64	{d16-d17}, [r0 :128]
   41c36:	f960 0aef 	vld1.64	{d16-d17}, [r0 :128]
   41c3a:	f94e 0aef 	vst1.64	{d16-d17}, [lr :128]
   41c3e:	f96c 0aef 	vld1.64	{d16-d17}, [ip :128]
   41c42:	a818      	add	r0, sp, #96	; 0x60
   41c44:	f940 0aef 	vst1.64	{d16-d17}, [r0 :128]
   41c48:	f960 0aef 	vld1.64	{d16-d17}, [r0 :128]
   41c4c:	efc8 6930 	vqshrn.s16	d22, q8, #8
   41c50:	9807      	ldr	r0, [sp, #28]
   41c52:	edc0 6b0a 	vstr	d22, [r0, #40]	; 0x28
   41c56:	edd0 6b0a 	vldr	d22, [r0, #40]	; 0x28
   41c5a:	edc0 6b08 	vstr	d22, [r0, #32]
   41c5e:	edd0 6b08 	vldr	d22, [r0, #32]
   41c62:	edd0 7b56 	vldr	d23, [r0, #344]	; 0x158
   41c66:	edc0 6b82 	vstr	d22, [r0, #520]	; 0x208
   41c6a:	edc0 7b80 	vstr	d23, [r0, #512]	; 0x200
   41c6e:	edd0 6b82 	vldr	d22, [r0, #520]	; 0x208
   41c72:	edd0 7b80 	vldr	d23, [r0, #512]	; 0x200
   41c76:	ef46 68a7 	vadd.i8	d22, d22, d23
   41c7a:	edc0 6b7e 	vstr	d22, [r0, #504]	; 0x1f8
   41c7e:	edd0 6b7e 	vldr	d22, [r0, #504]	; 0x1f8
   41c82:	edc0 6b7c 	vstr	d22, [r0, #496]	; 0x1f0
   41c86:	edd0 6b7c 	vldr	d22, [r0, #496]	; 0x1f0
   41c8a:	edc0 6b7a 	vstr	d22, [r0, #488]	; 0x1e8
   41c8e:	edd0 6b7a 	vldr	d22, [r0, #488]	; 0x1e8
   41c92:	9908      	ldr	r1, [sp, #32]
   41c94:	edc1 6bee 	vstr	d22, [r1, #952]	; 0x3b8
   41c98:	f96e 0aef 	vld1.64	{d16-d17}, [lr :128]
   41c9c:	aa10      	add	r2, sp, #64	; 0x40
   41c9e:	f942 0aef 	vst1.64	{d16-d17}, [r2 :128]
   41ca2:	f962 0aef 	vld1.64	{d16-d17}, [r2 :128]
   41ca6:	efc8 6930 	vqshrn.s16	d22, q8, #8
   41caa:	edc0 6b02 	vstr	d22, [r0, #8]
   41cae:	edd0 6b02 	vldr	d22, [r0, #8]
   41cb2:	edc0 6b00 	vstr	d22, [r0]
   41cb6:	edd0 6b00 	vldr	d22, [r0]
   41cba:	edd0 7b56 	vldr	d23, [r0, #344]	; 0x158
   41cbe:	edc0 6b78 	vstr	d22, [r0, #480]	; 0x1e0
   41cc2:	edc0 7b76 	vstr	d23, [r0, #472]	; 0x1d8
   41cc6:	edd0 6b78 	vldr	d22, [r0, #480]	; 0x1e0
   41cca:	edd0 7b76 	vldr	d23, [r0, #472]	; 0x1d8
   41cce:	ef46 68a7 	vadd.i8	d22, d22, d23
   41cd2:	edc0 6b74 	vstr	d22, [r0, #464]	; 0x1d0
   41cd6:	edd0 6b74 	vldr	d22, [r0, #464]	; 0x1d0
   41cda:	edc0 6b72 	vstr	d22, [r0, #456]	; 0x1c8
   41cde:	edd0 6b72 	vldr	d22, [r0, #456]	; 0x1c8
   41ce2:	edc0 6b70 	vstr	d22, [r0, #448]	; 0x1c0
   41ce6:	edd0 6b70 	vldr	d22, [r0, #448]	; 0x1c0
   41cea:	edc1 6bec 	vstr	d22, [r1, #944]	; 0x3b0
   41cee:	f50d 6226 	add.w	r2, sp, #2656	; 0xa60
   41cf2:	f962 0acf 	vld1.64	{d16-d17}, [r2]
   41cf6:	f50d 6225 	add.w	r2, sp, #2640	; 0xa50
   41cfa:	f942 0acf 	vst1.64	{d16-d17}, [r2]
   41cfe:	9b6f      	ldr	r3, [sp, #444]	; 0x1bc
   41d00:	f8dd c154 	ldr.w	ip, [sp, #340]	; 0x154
   41d04:	4463      	add	r3, ip
   41d06:	f962 671d 	vld1.8	{d22}, [r2 :64]!
   41d0a:	edd2 7b00 	vldr	d23, [r2]
   41d0e:	eef0 0b66 	vmov.f64	d16, d22
   41d12:	eef0 1b67 	vmov.f64	d17, d23
   41d16:	f943 080f 	vst2.8	{d16-d17}, [r3]
   41d1a:	9a55      	ldr	r2, [sp, #340]	; 0x154
   41d1c:	3210      	adds	r2, #16
   41d1e:	9255      	str	r2, [sp, #340]	; 0x154
   41d20:	e7ff      	b.n	41d22 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xaea>
   41d22:	e7ff      	b.n	41d24 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xaec>
   41d24:	9854      	ldr	r0, [sp, #336]	; 0x150
   41d26:	3001      	adds	r0, #1
   41d28:	9054      	str	r0, [sp, #336]	; 0x150
   41d2a:	f7ff bb25 	b.w	41378 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x140>
   41d2e:	986d      	ldr	r0, [sp, #436]	; 0x1b4
   41d30:	f020 000f 	bic.w	r0, r0, #15
   41d34:	9054      	str	r0, [sp, #336]	; 0x150
   41d36:	e7ff      	b.n	41d38 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xb00>
   41d38:	9854      	ldr	r0, [sp, #336]	; 0x150
   41d3a:	996d      	ldr	r1, [sp, #436]	; 0x1b4
   41d3c:	4288      	cmp	r0, r1
   41d3e:	da7e      	bge.n	41e3e <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc06>
   41d40:	e7ff      	b.n	41d42 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xb0a>
   41d42:	986e      	ldr	r0, [sp, #440]	; 0x1b8
   41d44:	7880      	ldrb	r0, [r0, #2]
   41d46:	f88d 002f 	strb.w	r0, [sp, #47]	; 0x2f
   41d4a:	986e      	ldr	r0, [sp, #440]	; 0x1b8
   41d4c:	7840      	ldrb	r0, [r0, #1]
   41d4e:	f88d 002e 	strb.w	r0, [sp, #46]	; 0x2e
   41d52:	986e      	ldr	r0, [sp, #440]	; 0x1b8
   41d54:	7800      	ldrb	r0, [r0, #0]
   41d56:	f88d 002d 	strb.w	r0, [sp, #45]	; 0x2d
   41d5a:	986e      	ldr	r0, [sp, #440]	; 0x1b8
   41d5c:	3004      	adds	r0, #4
   41d5e:	906e      	str	r0, [sp, #440]	; 0x1b8
   41d60:	f89d 002f 	ldrb.w	r0, [sp, #47]	; 0x2f
   41d64:	eb00 1040 	add.w	r0, r0, r0, lsl #5
   41d68:	f89d 102e 	ldrb.w	r1, [sp, #46]	; 0x2e
   41d6c:	eb01 11c1 	add.w	r1, r1, r1, lsl #7
   41d70:	eb01 0040 	add.w	r0, r1, r0, lsl #1
   41d74:	f89d 102d 	ldrb.w	r1, [sp, #45]	; 0x2d
   41d78:	2219      	movs	r2, #25
   41d7a:	fb11 0002 	smlabb	r0, r1, r2, r0
   41d7e:	3080      	adds	r0, #128	; 0x80
   41d80:	2110      	movs	r1, #16
   41d82:	eb01 2010 	add.w	r0, r1, r0, lsr #8
   41d86:	f88d 002c 	strb.w	r0, [sp, #44]	; 0x2c
   41d8a:	f89d 002f 	ldrb.w	r0, [sp, #47]	; 0x2f
   41d8e:	f06f 0125 	mvn.w	r1, #37	; 0x25
   41d92:	fb10 f001 	smulbb	r0, r0, r1
   41d96:	f89d 102e 	ldrb.w	r1, [sp, #46]	; 0x2e
   41d9a:	224a      	movs	r2, #74	; 0x4a
   41d9c:	fb01 0012 	mls	r0, r1, r2, r0
   41da0:	f89d 102d 	ldrb.w	r1, [sp, #45]	; 0x2d
   41da4:	ebc1 01c1 	rsb	r1, r1, r1, lsl #3
   41da8:	eb00 1001 	add.w	r0, r0, r1, lsl #4
   41dac:	3080      	adds	r0, #128	; 0x80
   41dae:	2180      	movs	r1, #128	; 0x80
   41db0:	eb01 2010 	add.w	r0, r1, r0, lsr #8
   41db4:	f88d 002b 	strb.w	r0, [sp, #43]	; 0x2b
   41db8:	f89d 002f 	ldrb.w	r0, [sp, #47]	; 0x2f
   41dbc:	ebc0 00c0 	rsb	r0, r0, r0, lsl #3
   41dc0:	f89d 202e 	ldrb.w	r2, [sp, #46]	; 0x2e
   41dc4:	235e      	movs	r3, #94	; 0x5e
   41dc6:	fb12 f203 	smulbb	r2, r2, r3
   41dca:	ebc2 1000 	rsb	r0, r2, r0, lsl #4
   41dce:	f89d 202d 	ldrb.w	r2, [sp, #45]	; 0x2d
   41dd2:	eb02 02c2 	add.w	r2, r2, r2, lsl #3
   41dd6:	eba0 0042 	sub.w	r0, r0, r2, lsl #1
   41dda:	3080      	adds	r0, #128	; 0x80
   41ddc:	eb01 2010 	add.w	r0, r1, r0, lsr #8
   41de0:	f88d 002a 	strb.w	r0, [sp, #42]	; 0x2a
   41de4:	f89d 002c 	ldrb.w	r0, [sp, #44]	; 0x2c
   41de8:	996f      	ldr	r1, [sp, #444]	; 0x1bc
   41dea:	9a56      	ldr	r2, [sp, #344]	; 0x158
   41dec:	1c53      	adds	r3, r2, #1
   41dee:	9356      	str	r3, [sp, #344]	; 0x158
   41df0:	5488      	strb	r0, [r1, r2]
   41df2:	9853      	ldr	r0, [sp, #332]	; 0x14c
   41df4:	eb00 71d0 	add.w	r1, r0, r0, lsr #31
   41df8:	f021 0101 	bic.w	r1, r1, #1
   41dfc:	1a40      	subs	r0, r0, r1
   41dfe:	2800      	cmp	r0, #0
   41e00:	d118      	bne.n	41e34 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xbfc>
   41e02:	e7ff      	b.n	41e04 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xbcc>
   41e04:	9854      	ldr	r0, [sp, #336]	; 0x150
   41e06:	eb00 71d0 	add.w	r1, r0, r0, lsr #31
   41e0a:	f021 0101 	bic.w	r1, r1, #1
   41e0e:	1a40      	subs	r0, r0, r1
   41e10:	2800      	cmp	r0, #0
   41e12:	d10f      	bne.n	41e34 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xbfc>
   41e14:	e7ff      	b.n	41e16 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xbde>
   41e16:	f89d 002a 	ldrb.w	r0, [sp, #42]	; 0x2a
   41e1a:	996f      	ldr	r1, [sp, #444]	; 0x1bc
   41e1c:	9a55      	ldr	r2, [sp, #340]	; 0x154
   41e1e:	1c53      	adds	r3, r2, #1
   41e20:	9355      	str	r3, [sp, #340]	; 0x154
   41e22:	5488      	strb	r0, [r1, r2]
   41e24:	f89d 002b 	ldrb.w	r0, [sp, #43]	; 0x2b
   41e28:	996f      	ldr	r1, [sp, #444]	; 0x1bc
   41e2a:	9a55      	ldr	r2, [sp, #340]	; 0x154
   41e2c:	1c53      	adds	r3, r2, #1
   41e2e:	9355      	str	r3, [sp, #340]	; 0x154
   41e30:	5488      	strb	r0, [r1, r2]
   41e32:	e7ff      	b.n	41e34 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xbfc>
   41e34:	e7ff      	b.n	41e36 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xbfe>
   41e36:	9854      	ldr	r0, [sp, #336]	; 0x150
   41e38:	3001      	adds	r0, #1
   41e3a:	9054      	str	r0, [sp, #336]	; 0x150
   41e3c:	e77c      	b.n	41d38 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xb00>
   41e3e:	e7ff      	b.n	41e40 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc08>
   41e40:	9853      	ldr	r0, [sp, #332]	; 0x14c
   41e42:	3001      	adds	r0, #1
   41e44:	9053      	str	r0, [sp, #332]	; 0x14c
   41e46:	f7ff ba8e 	b.w	41366 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x12e>
   41e4a:	4809      	ldr	r0, [pc, #36]	; (41e70 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc38>)
   41e4c:	4478      	add	r0, pc
   41e4e:	6800      	ldr	r0, [r0, #0]
   41e50:	6800      	ldr	r0, [r0, #0]
   41e52:	9909      	ldr	r1, [sp, #36]	; 0x24
   41e54:	4288      	cmp	r0, r1
   41e56:	d106      	bne.n	41e66 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc2e>
   41e58:	e7ff      	b.n	41e5a <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc22>
   41e5a:	f1a7 0418 	sub.w	r4, r7, #24
   41e5e:	46a5      	mov	sp, r4
   41e60:	e8bd 0b00 	ldmia.w	sp!, {r8, r9, fp}
   41e64:	bdf0      	pop	{r4, r5, r6, r7, pc}
   41e66:	f7f8 ec6e 	blx	3a744 <__stack_chk_fail@plt>
   41e6a:	bf00      	nop
   41e6c:	001ef0f8 	.word	0x001ef0f8
   41e70:	001ee50c 	.word	0x001ee50c

 

Neon 版在我的手机上是 “负优化” 。。。跑得比朴素 CPU 还慢。。。看来网上给的 NEON 代码也未必靠谱,还是得亲自实践对比!

RGBA 编码为 YUV420SP【NEON】_第1张图片

经过我的优化后,NEON 版达到了 53ms 左右,展开(一次)版是51 ms左右,原图大小为 1600*1873 

RGBA 编码为 YUV420SP【NEON】_第2张图片

 

 

你可能感兴趣的:(NEON)