void encodeYUV420SP_CPU(unsigned char *__restrict__ yuv420sp,
unsigned char *__restrict__ argb, int width, int height) {
int frameSize = width * height;
int yIndex = 0;
int uvIndex = frameSize;
for (int j = 0; j < height; j++) {
for (int i = 0; i < width; i++) {
uint8_t R = argb[2];
uint8_t G = argb[1];
uint8_t B = argb[0];
argb += 4;
// well known RGB to YUV algorithm
uint8_t Y = ((66 * R + 129 * G + 25 * B + 128) >> 8) + 16;
uint8_t U = ((-38 * R - 74 * G + 112 * B + 128) >> 8) + 128;
uint8_t V = ((112 * R - 94 * G - 18 * B + 128) >> 8) + 128;
// NV21有一个 Y 平面和 V-U 交叉平面,每一个平面的采样值都是 2
// 意思是每4个 Y 像素(上下左右,不是横向连续的四个)对应1个 V 和1个 U
// 像素和其他扫描线。
yuv420sp[yIndex++] = Y;
if (j % 2 == 0 && i % 2 == 0) {
yuv420sp[uvIndex++] = V;
yuv420sp[uvIndex++] = U;
}
}
}
}
void encodeYUV420SP_NEON_Intrinsics(unsigned char *__restrict__ yuv420sp,
unsigned char *__restrict__ argb, int width, int height) {
const uint16x8_t u16_rounding = vdupq_n_u16(128);
const int16x8_t s16_rounding = vdupq_n_s16(128); // +128, u/v 中内层的 +128
const int8x8_t s8_rounding = vdup_n_s8(
128); // -128,即 0x80,最高成了符号位,实际只有 7 位用来表示数字,用来处理符号位, u/v 中外层的 +128
const uint8x16_t offset = vdupq_n_u8(16);
const uint16x8_t mask = vdupq_n_s16(255);
// 测试
// int16x8_t test = vaddl_s8 (s8_rounding, s8_rounding);// -256
// int8x8_t test_0 = vdup_n_s8(127); // 正常为 127
// int8x8_t test_1 = vadd_s8(test_0, test_0); // -2,因为计算溢出到符号位
int frameSize = width * height;
int yIndex = 0;
int uvIndex = frameSize;
int i;
int j;
for (j = 0; j < height; j++) {
for (i = 0; i < width >> 4; i++) {
// Load rgb
uint8x16x4_t pixel_argb = vld4q_u8(argb);
argb += 4 * 16;
uint8x8x2_t uint8_r;
uint8x8x2_t uint8_g;
uint8x8x2_t uint8_b;
uint8_r.val[0] = vget_low_u8(pixel_argb.val[2]);
uint8_r.val[1] = vget_high_u8(pixel_argb.val[2]);
uint8_g.val[0] = vget_low_u8(pixel_argb.val[1]);
uint8_g.val[1] = vget_high_u8(pixel_argb.val[1]);
uint8_b.val[0] = vget_low_u8(pixel_argb.val[0]);
uint8_b.val[1] = vget_high_u8(pixel_argb.val[0]);
// Y = ((66 * R + 129 * G + 25 * B + 128) >> 8) + 16;
uint16x8x2_t uint16_y;
uint8x8_t scalar = vdup_n_u8(66);
uint8x16_t y;
uint16_y.val[0] = vmull_u8(uint8_r.val[0], scalar);
uint16_y.val[1] = vmull_u8(uint8_r.val[1], scalar);
scalar = vdup_n_u8(129);
uint16_y.val[0] = vmlal_u8(uint16_y.val[0], uint8_g.val[0], scalar);
uint16_y.val[1] = vmlal_u8(uint16_y.val[1], uint8_g.val[1], scalar);
scalar = vdup_n_u8(25);
uint16_y.val[0] = vmlal_u8(uint16_y.val[0], uint8_b.val[0], scalar);
uint16_y.val[1] = vmlal_u8(uint16_y.val[1], uint8_b.val[1], scalar);
uint16_y.val[0] = vaddq_u16(uint16_y.val[0], u16_rounding);
uint16_y.val[1] = vaddq_u16(uint16_y.val[1], u16_rounding);
y = vcombine_u8(vqshrn_n_u16(uint16_y.val[0], 8), vqshrn_n_u16(uint16_y.val[1], 8));
y = vaddq_u8(y, offset);
vst1q_u8(yuv420sp + yIndex, y);
yIndex += 16;
// 在偶数行中计算 U 和 V
if (j % 2 == 0) {
// uint8_t U = ((-38 * R - 74 * G + 112 * B + 128) >> 8) + 128;
// uint8_t V = ((112 * R - 94 * G - 18 * B + 128) >> 8) + 128;
int16x8_t u_scalar = vdupq_n_s16(-38);
int16x8_t v_scalar = vdupq_n_s16(112);
#if 1
// 因为 u,v 的值只有 y 的一半,所以只取高位计算
int16x8_t r = vreinterpretq_s16_u16(
vandq_u16(vreinterpretq_u16_u8(pixel_argb.val[2]), mask));
// 测试
// uint16x8_t test_0 = vreinterpretq_u16_u8(pixel_argb.val[2]);
// uint16x8_t test_1 = vandq_u16(test_0, mask);
// int16x8_t test_2 = vreinterpretq_s16_u16(test_1);
int16x8_t g = vreinterpretq_s16_u16(
vandq_u16(vreinterpretq_u16_u8(pixel_argb.val[1]), mask));
int16x8_t b = vreinterpretq_s16_u16(
vandq_u16(vreinterpretq_u16_u8(pixel_argb.val[0]), mask));
#else // 两者效果是一样的,但是没第一种快
int16x8_t r = vreinterpretq_s16_u16(vmovl_u8(
vqshrn_n_u16(vshlq_n_u16(vreinterpretq_u16_u8(pixel_argb.val[2]), 8), 8)));
int16x8_t g = vreinterpretq_s16_u16(vmovl_u8(
vqshrn_n_u16(vshlq_n_u16(vreinterpretq_u16_u8(pixel_argb.val[1]), 8), 8)));
int16x8_t b = vreinterpretq_s16_u16(vmovl_u8(
vqshrn_n_u16(vshlq_n_u16(vreinterpretq_u16_u8(pixel_argb.val[0]), 8), 8)));
// vshlq_n_u16,结果是 uint16x8_t
// vqshrn_n_u16,结果是 uint8x8_t
// vmovl_u8,结果是 uint16x8_t
#endif
int16x8_t u;
int16x8_t v;
uint8x8x2_t uv;
u = vmulq_s16(r, u_scalar);
v = vmulq_s16(r, v_scalar);
u_scalar = vdupq_n_s16(-74);
v_scalar = vdupq_n_s16(-94);
u = vmlaq_s16(u, g, u_scalar);
v = vmlaq_s16(v, g, v_scalar);
u_scalar = vdupq_n_s16(112);
v_scalar = vdupq_n_s16(-18);
u = vmlaq_s16(u, b, u_scalar);
v = vmlaq_s16(v, b, v_scalar);
u = vaddq_s16(u, s16_rounding);
v = vaddq_s16(v, s16_rounding);
uv.val[1] = vreinterpret_u8_s8(vadd_s8(vqshrn_n_s16(u, 8), s8_rounding));
// 测试
// int8x8_t test_3 = vqshrn_n_s16(u, 8);
// int8x8_t test_4 = vadd_s8(test_3, s8_rounding); //
// uint8x8_t test_5 = vreinterpret_u8_s8(test_4);
uv.val[0] = vreinterpret_u8_s8(vadd_s8(vqshrn_n_s16(v, 8), s8_rounding));
vst2_u8(yuv420sp + uvIndex, uv);
uvIndex += 2 * 8;
}
}
// 处理余数的好办法
for (i = ((width >> 4) << 4); i < width; i++) {
uint8_t R = argb[2];
uint8_t G = argb[1];
uint8_t B = argb[0];
argb += 4;
// well known RGB to YUV algorithm
uint8_t Y = ((66 * R + 129 * G + 25 * B + 128) >> 8) + 16;
uint8_t U = ((-38 * R - 74 * G + 112 * B + 128) >> 8) + 128;
uint8_t V = ((112 * R - 94 * G - 18 * B + 128) >> 8) + 128;
// NV21有一个 Y 平面和 V-U 交叉平面,每一个平面的采样值都是 2
// 意思是每4个 Y 像素(上下左右,不是横向连续的四个)对应1个 V 和1个 U
// 像素和其他扫描线。
yuv420sp[yIndex++] = Y;
if (j % 2 == 0 && i % 2 == 0) {
yuv420sp[uvIndex++] = V;
yuv420sp[uvIndex++] = U;
}
}
}
}
命令行如下:
>J:\Programs\Android\sdk\ndk-bundle\toolchains\aarch64-linux-android-4.9\prebuilt\windows-x86_64\bin\aarch64-linux-android-objdump.exe -d libnative-lib.so > objdump_d.txt
一. arm-linux-objdump
常用来显示二进制文件信息,常用来查看反汇编代码
二. 常用选项:
1.-b bfdname 指定目标码格式
2.—disassemble或者-d 反汇编可执行段
3.—dissassemble-all或者-D 反汇编所有段
4.-EB,-EL指定字节序
5.—file-headers或者-f 显示文件的整体头部摘要信息
6.—section-headers,--headers或者-h 显示目标文件中各个段的头部摘要信息
7.—info 或者-I 显示支持的目标文件格式和CPU架构
8.—section=name或者-j name显示指定section 的信息
9.—architecture=machine或者-m machine 指定反汇编目标文件时使用的架构
三. 示例
Arm-linux-objdump –D elf_file > dis_file 或者
Arm-linux-objdump –D –b binary –m arm bin_file > dis_file
000410dc <_Z18encodeYUV420SP_CPUPhS_ii>:
410dc: b5b0 push {r4, r5, r7, lr}
410de: af02 add r7, sp, #8
410e0: b08f sub sp, #60 ; 0x3c
410e2: 469c mov ip, r3
410e4: 4696 mov lr, r2
410e6: 460c mov r4, r1
410e8: 4605 mov r5, r0
410ea: 900e str r0, [sp, #56] ; 0x38
410ec: 910d str r1, [sp, #52] ; 0x34
410ee: 920c str r2, [sp, #48] ; 0x30
410f0: 930b str r3, [sp, #44] ; 0x2c
410f2: 980c ldr r0, [sp, #48] ; 0x30
410f4: 990b ldr r1, [sp, #44] ; 0x2c
410f6: 4348 muls r0, r1
410f8: 900a str r0, [sp, #40] ; 0x28
410fa: 2000 movs r0, #0
410fc: 9009 str r0, [sp, #36] ; 0x24
410fe: 990a ldr r1, [sp, #40] ; 0x28
41100: 9108 str r1, [sp, #32]
41102: 9007 str r0, [sp, #28]
41104: f8cd c00c str.w ip, [sp, #12]
41108: f8cd e008 str.w lr, [sp, #8]
4110c: 9401 str r4, [sp, #4]
4110e: 9500 str r5, [sp, #0]
41110: e7ff b.n 41112 <_Z18encodeYUV420SP_CPUPhS_ii+0x36>
41112: 9807 ldr r0, [sp, #28]
41114: 990b ldr r1, [sp, #44] ; 0x2c
41116: 4288 cmp r0, r1
41118: f280 808c bge.w 41234 <_Z18encodeYUV420SP_CPUPhS_ii+0x158>
4111c: e7ff b.n 4111e <_Z18encodeYUV420SP_CPUPhS_ii+0x42>
4111e: 2000 movs r0, #0
41120: 9006 str r0, [sp, #24]
41122: e7ff b.n 41124 <_Z18encodeYUV420SP_CPUPhS_ii+0x48>
41124: 9806 ldr r0, [sp, #24]
41126: 990c ldr r1, [sp, #48] ; 0x30
41128: 4288 cmp r0, r1
4112a: da7e bge.n 4122a <_Z18encodeYUV420SP_CPUPhS_ii+0x14e>
4112c: e7ff b.n 4112e <_Z18encodeYUV420SP_CPUPhS_ii+0x52>
4112e: 980d ldr r0, [sp, #52] ; 0x34
41130: 7880 ldrb r0, [r0, #2]
41132: f807 0c2d strb.w r0, [r7, #-45]
41136: 980d ldr r0, [sp, #52] ; 0x34
41138: 7840 ldrb r0, [r0, #1]
4113a: f807 0c2e strb.w r0, [r7, #-46]
4113e: 980d ldr r0, [sp, #52] ; 0x34
41140: 7800 ldrb r0, [r0, #0]
41142: f807 0c2f strb.w r0, [r7, #-47]
41146: 980d ldr r0, [sp, #52] ; 0x34
41148: 3004 adds r0, #4
4114a: 900d str r0, [sp, #52] ; 0x34
4114c: f817 0c2d ldrb.w r0, [r7, #-45]
41150: eb00 1040 add.w r0, r0, r0, lsl #5
41154: f817 1c2e ldrb.w r1, [r7, #-46]
41158: eb01 11c1 add.w r1, r1, r1, lsl #7
4115c: eb01 0040 add.w r0, r1, r0, lsl #1
41160: f817 1c2f ldrb.w r1, [r7, #-47]
41164: 2219 movs r2, #25
41166: fb11 0002 smlabb r0, r1, r2, r0
4116a: 3080 adds r0, #128 ; 0x80
4116c: 2110 movs r1, #16
4116e: eb01 2010 add.w r0, r1, r0, lsr #8
41172: f88d 0014 strb.w r0, [sp, #20]
41176: f817 0c2d ldrb.w r0, [r7, #-45]
4117a: f06f 0125 mvn.w r1, #37 ; 0x25
4117e: fb10 f001 smulbb r0, r0, r1
41182: f817 1c2e ldrb.w r1, [r7, #-46]
41186: 224a movs r2, #74 ; 0x4a
41188: fb01 0012 mls r0, r1, r2, r0
4118c: f817 1c2f ldrb.w r1, [r7, #-47]
41190: ebc1 01c1 rsb r1, r1, r1, lsl #3
41194: eb00 1001 add.w r0, r0, r1, lsl #4
41198: 3080 adds r0, #128 ; 0x80
4119a: 2180 movs r1, #128 ; 0x80
4119c: eb01 2010 add.w r0, r1, r0, lsr #8
411a0: f807 0c31 strb.w r0, [r7, #-49]
411a4: f817 0c2d ldrb.w r0, [r7, #-45]
411a8: ebc0 00c0 rsb r0, r0, r0, lsl #3
411ac: f817 2c2e ldrb.w r2, [r7, #-46]
411b0: 235e movs r3, #94 ; 0x5e
411b2: fb12 f203 smulbb r2, r2, r3
411b6: ebc2 1000 rsb r0, r2, r0, lsl #4
411ba: f817 2c2f ldrb.w r2, [r7, #-47]
411be: eb02 02c2 add.w r2, r2, r2, lsl #3
411c2: eba0 0042 sub.w r0, r0, r2, lsl #1
411c6: 3080 adds r0, #128 ; 0x80
411c8: eb01 2010 add.w r0, r1, r0, lsr #8
411cc: f807 0c32 strb.w r0, [r7, #-50]
411d0: f89d 0014 ldrb.w r0, [sp, #20]
411d4: 990e ldr r1, [sp, #56] ; 0x38
411d6: 9a09 ldr r2, [sp, #36] ; 0x24
411d8: 1c53 adds r3, r2, #1
411da: 9309 str r3, [sp, #36] ; 0x24
411dc: 5488 strb r0, [r1, r2]
411de: 9807 ldr r0, [sp, #28]
411e0: eb00 71d0 add.w r1, r0, r0, lsr #31
411e4: f021 0101 bic.w r1, r1, #1
411e8: 1a40 subs r0, r0, r1
411ea: 2800 cmp r0, #0
411ec: d118 bne.n 41220 <_Z18encodeYUV420SP_CPUPhS_ii+0x144>
411ee: e7ff b.n 411f0 <_Z18encodeYUV420SP_CPUPhS_ii+0x114>
411f0: 9806 ldr r0, [sp, #24]
411f2: eb00 71d0 add.w r1, r0, r0, lsr #31
411f6: f021 0101 bic.w r1, r1, #1
411fa: 1a40 subs r0, r0, r1
411fc: 2800 cmp r0, #0
411fe: d10f bne.n 41220 <_Z18encodeYUV420SP_CPUPhS_ii+0x144>
41200: e7ff b.n 41202 <_Z18encodeYUV420SP_CPUPhS_ii+0x126>
41202: f817 0c32 ldrb.w r0, [r7, #-50]
41206: 990e ldr r1, [sp, #56] ; 0x38
41208: 9a08 ldr r2, [sp, #32]
4120a: 1c53 adds r3, r2, #1
4120c: 9308 str r3, [sp, #32]
4120e: 5488 strb r0, [r1, r2]
41210: f817 0c31 ldrb.w r0, [r7, #-49]
41214: 990e ldr r1, [sp, #56] ; 0x38
41216: 9a08 ldr r2, [sp, #32]
41218: 1c53 adds r3, r2, #1
4121a: 9308 str r3, [sp, #32]
4121c: 5488 strb r0, [r1, r2]
4121e: e7ff b.n 41220 <_Z18encodeYUV420SP_CPUPhS_ii+0x144>
41220: e7ff b.n 41222 <_Z18encodeYUV420SP_CPUPhS_ii+0x146>
41222: 9806 ldr r0, [sp, #24]
41224: 3001 adds r0, #1
41226: 9006 str r0, [sp, #24]
41228: e77c b.n 41124 <_Z18encodeYUV420SP_CPUPhS_ii+0x48>
4122a: e7ff b.n 4122c <_Z18encodeYUV420SP_CPUPhS_ii+0x150>
4122c: 9807 ldr r0, [sp, #28]
4122e: 3001 adds r0, #1
41230: 9007 str r0, [sp, #28]
41232: e76e b.n 41112 <_Z18encodeYUV420SP_CPUPhS_ii+0x36>
41234: b00f add sp, #60 ; 0x3c
41236: bdb0 pop {r4, r5, r7, pc}
00041238 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii>:
41238: b5f0 push {r4, r5, r6, r7, lr}
4123a: af03 add r7, sp, #12
4123c: e92d 0b00 stmdb sp!, {r8, r9, fp}
41240: f5ad 6d35 sub.w sp, sp, #2896 ; 0xb50
41244: 466c mov r4, sp
41246: f36f 0403 bfc r4, #0, #4
4124a: 46a5 mov sp, r4
4124c: f50d 6cd6 add.w ip, sp, #1712 ; 0x6b0
41250: f10d 0e30 add.w lr, sp, #48 ; 0x30
41254: 461c mov r4, r3
41256: 4615 mov r5, r2
41258: 460e mov r6, r1
4125a: 4680 mov r8, r0
4125c: f8df 9c0c ldr.w r9, [pc, #3084] ; 41e6c <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc34>
41260: 44f9 add r9, pc
41262: f8d9 9000 ldr.w r9, [r9]
41266: f8d9 9000 ldr.w r9, [r9]
4126a: f8cd 9024 str.w r9, [sp, #36] ; 0x24
4126e: 906f str r0, [sp, #444] ; 0x1bc
41270: 916e str r1, [sp, #440] ; 0x1b8
41272: 926d str r2, [sp, #436] ; 0x1b4
41274: 936c str r3, [sp, #432] ; 0x1b0
41276: 2080 movs r0, #128 ; 0x80
41278: f8ad 01ee strh.w r0, [sp, #494] ; 0x1ee
4127c: f50d 71f7 add.w r1, sp, #494 ; 0x1ee
41280: f9e1 0c7f vld1.16 {d16[]-d17[]}, [r1 :16]
41284: a970 add r1, sp, #448 ; 0x1c0
41286: f941 0aef vst1.64 {d16-d17}, [r1 :128]
4128a: f961 0aef vld1.64 {d16-d17}, [r1 :128]
4128e: a974 add r1, sp, #464 ; 0x1d0
41290: f941 0aef vst1.64 {d16-d17}, [r1 :128]
41294: f961 0aef vld1.64 {d16-d17}, [r1 :128]
41298: a968 add r1, sp, #416 ; 0x1a0
4129a: f941 0aef vst1.64 {d16-d17}, [r1 :128]
4129e: f8ad 0a4e strh.w r0, [sp, #2638] ; 0xa4e
412a2: f60d 214e addw r1, sp, #2638 ; 0xa4e
412a6: f9e1 0c7f vld1.16 {d16[]-d17[]}, [r1 :16]
412aa: f50d 6122 add.w r1, sp, #2592 ; 0xa20
412ae: f941 0aef vst1.64 {d16-d17}, [r1 :128]
412b2: f961 0aef vld1.64 {d16-d17}, [r1 :128]
412b6: f50d 6123 add.w r1, sp, #2608 ; 0xa30
412ba: f941 0aef vst1.64 {d16-d17}, [r1 :128]
412be: f961 0aef vld1.64 {d16-d17}, [r1 :128]
412c2: a964 add r1, sp, #400 ; 0x190
412c4: f941 0aef vst1.64 {d16-d17}, [r1 :128]
412c8: f88d 0a1f strb.w r0, [sp, #2591] ; 0xa1f
412cc: f60d 201f addw r0, sp, #2591 ; 0xa1f
412d0: f9e0 2c0f vld1.8 {d18[]}, [r0]
412d4: edcc 2bd6 vstr d18, [ip, #856] ; 0x358
412d8: eddc 2bd6 vldr d18, [ip, #856] ; 0x358
412dc: edcc 2bd8 vstr d18, [ip, #864] ; 0x360
412e0: eddc 2bd8 vldr d18, [ip, #864] ; 0x360
412e4: edce 2b56 vstr d18, [lr, #344] ; 0x158
412e8: 2010 movs r0, #16
412ea: f88d 0a07 strb.w r0, [sp, #2567] ; 0xa07
412ee: f60d 2007 addw r0, sp, #2567 ; 0xa07
412f2: f9e0 0c2f vld1.8 {d16[]-d17[]}, [r0]
412f6: f50d 601e add.w r0, sp, #2528 ; 0x9e0
412fa: f940 0aef vst1.64 {d16-d17}, [r0 :128]
412fe: f960 0aef vld1.64 {d16-d17}, [r0 :128]
41302: f50d 601f add.w r0, sp, #2544 ; 0x9f0
41306: f940 0aef vst1.64 {d16-d17}, [r0 :128]
4130a: f960 0aef vld1.64 {d16-d17}, [r0 :128]
4130e: a85c add r0, sp, #368 ; 0x170
41310: f940 0aef vst1.64 {d16-d17}, [r0 :128]
41314: 20ff movs r0, #255 ; 0xff
41316: f8ad 09de strh.w r0, [sp, #2526] ; 0x9de
4131a: f60d 10de addw r0, sp, #2526 ; 0x9de
4131e: f9e0 0c7f vld1.16 {d16[]-d17[]}, [r0 :16]
41322: f50d 601b add.w r0, sp, #2480 ; 0x9b0
41326: f940 0aef vst1.64 {d16-d17}, [r0 :128]
4132a: f960 0aef vld1.64 {d16-d17}, [r0 :128]
4132e: f50d 601c add.w r0, sp, #2496 ; 0x9c0
41332: f940 0aef vst1.64 {d16-d17}, [r0 :128]
41336: f960 0aef vld1.64 {d16-d17}, [r0 :128]
4133a: a858 add r0, sp, #352 ; 0x160
4133c: f940 0aef vst1.64 {d16-d17}, [r0 :128]
41340: 986d ldr r0, [sp, #436] ; 0x1b4
41342: 996c ldr r1, [sp, #432] ; 0x1b0
41344: 4348 muls r0, r1
41346: 9057 str r0, [sp, #348] ; 0x15c
41348: 2000 movs r0, #0
4134a: 9056 str r0, [sp, #344] ; 0x158
4134c: 9957 ldr r1, [sp, #348] ; 0x15c
4134e: 9155 str r1, [sp, #340] ; 0x154
41350: 9053 str r0, [sp, #332] ; 0x14c
41352: f8cd c020 str.w ip, [sp, #32]
41356: f8cd e01c str.w lr, [sp, #28]
4135a: 9406 str r4, [sp, #24]
4135c: 9505 str r5, [sp, #20]
4135e: 9604 str r6, [sp, #16]
41360: f8cd 800c str.w r8, [sp, #12]
41364: e7ff b.n 41366 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x12e>
41366: 9853 ldr r0, [sp, #332] ; 0x14c
41368: 996c ldr r1, [sp, #432] ; 0x1b0
4136a: 4288 cmp r0, r1
4136c: f280 856d bge.w 41e4a <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc12>
41370: e7ff b.n 41372 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x13a>
41372: 2000 movs r0, #0
41374: 9054 str r0, [sp, #336] ; 0x150
41376: e7ff b.n 41378 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x140>
41378: 9854 ldr r0, [sp, #336] ; 0x150
4137a: 996d ldr r1, [sp, #436] ; 0x1b4
4137c: ebb0 1f21 cmp.w r0, r1, asr #4
41380: f280 84d5 bge.w 41d2e <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xaf6>
41384: e7ff b.n 41386 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x14e>
41386: 986e ldr r0, [sp, #440] ; 0x1b8
41388: f960 010d vld4.8 {d16,d18,d20,d22}, [r0]!
4138c: f960 110f vld4.8 {d17,d19,d21,d23}, [r0]
41390: ef66 81f6 vorr q12, q11, q11
41394: ef62 a1f2 vorr q13, q9, q9
41398: ef64 c1f4 vorr q14, q10, q10
4139c: ef60 e1f0 vorr q15, q8, q8
413a0: f50d 602c add.w r0, sp, #2752 ; 0xac0
413a4: f940 eacf vst1.64 {d30-d31}, [r0]
413a8: f100 0120 add.w r1, r0, #32
413ac: f941 cacf vst1.64 {d28-d29}, [r1]
413b0: 4602 mov r2, r0
413b2: f962 caed vld1.64 {d28-d29}, [r2 :128]!
413b6: f942 aacf vst1.64 {d26-d27}, [r2]
413ba: 3030 adds r0, #48 ; 0x30
413bc: f940 8acf vst1.64 {d24-d25}, [r0]
413c0: f50d 6330 add.w r3, sp, #2816 ; 0xb00
413c4: f103 0c20 add.w ip, r3, #32
413c8: f961 8aef vld1.64 {d24-d25}, [r1 :128]
413cc: f94c 8aef vst1.64 {d24-d25}, [ip :128]
413d0: 4619 mov r1, r3
413d2: f941 caed vst1.64 {d28-d29}, [r1 :128]!
413d6: f962 8aef vld1.64 {d24-d25}, [r2 :128]
413da: f941 8aef vst1.64 {d24-d25}, [r1 :128]
413de: f103 0230 add.w r2, r3, #48 ; 0x30
413e2: f960 8aef vld1.64 {d24-d25}, [r0 :128]
413e6: f942 8aef vst1.64 {d24-d25}, [r2 :128]
413ea: 986e ldr r0, [sp, #440] ; 0x1b8
413ec: 3040 adds r0, #64 ; 0x40
413ee: 906e str r0, [sp, #440] ; 0x1b8
413f0: f96c 8aef vld1.64 {d24-d25}, [ip :128]
413f4: f50d 601a add.w r0, sp, #2464 ; 0x9a0
413f8: f940 8aef vst1.64 {d24-d25}, [r0 :128]
413fc: f960 8aef vld1.64 {d24-d25}, [r0 :128]
41400: eeb0 0b68 vmov.f64 d0, d24
41404: 9808 ldr r0, [sp, #32]
41406: ed80 0bba vstr d0, [r0, #744] ; 0x2e8
4140a: ed90 0bba vldr d0, [r0, #744] ; 0x2e8
4140e: f50d 622b add.w r2, sp, #2736 ; 0xab0
41412: f902 071d vst1.8 {d0}, [r2 :64]!
41416: f96c 8aef vld1.64 {d24-d25}, [ip :128]
4141a: f50d 6c18 add.w ip, sp, #2432 ; 0x980
4141e: f94c 8aef vst1.64 {d24-d25}, [ip :128]
41422: f96c 8aef vld1.64 {d24-d25}, [ip :128]
41426: eeb0 0b69 vmov.f64 d0, d25
4142a: ed80 0bb2 vstr d0, [r0, #712] ; 0x2c8
4142e: ed90 0bb2 vldr d0, [r0, #712] ; 0x2c8
41432: ed82 0b00 vstr d0, [r2]
41436: f961 8aef vld1.64 {d24-d25}, [r1 :128]
4143a: f50d 6c16 add.w ip, sp, #2400 ; 0x960
4143e: f94c 8aef vst1.64 {d24-d25}, [ip :128]
41442: f96c 8aef vld1.64 {d24-d25}, [ip :128]
41446: eeb0 0b68 vmov.f64 d0, d24
4144a: ed80 0baa vstr d0, [r0, #680] ; 0x2a8
4144e: ed90 0baa vldr d0, [r0, #680] ; 0x2a8
41452: f50d 6c2a add.w ip, sp, #2720 ; 0xaa0
41456: f90c 071d vst1.8 {d0}, [ip :64]!
4145a: f961 8aef vld1.64 {d24-d25}, [r1 :128]
4145e: f50d 6114 add.w r1, sp, #2368 ; 0x940
41462: f941 8aef vst1.64 {d24-d25}, [r1 :128]
41466: f961 8aef vld1.64 {d24-d25}, [r1 :128]
4146a: eeb0 0b69 vmov.f64 d0, d25
4146e: ed80 0ba2 vstr d0, [r0, #648] ; 0x288
41472: ed90 0ba2 vldr d0, [r0, #648] ; 0x288
41476: ed8c 0b00 vstr d0, [ip]
4147a: f963 8aef vld1.64 {d24-d25}, [r3 :128]
4147e: f50d 6112 add.w r1, sp, #2336 ; 0x920
41482: f941 8aef vst1.64 {d24-d25}, [r1 :128]
41486: f961 8aef vld1.64 {d24-d25}, [r1 :128]
4148a: eeb0 0b68 vmov.f64 d0, d24
4148e: ed80 0b9a vstr d0, [r0, #616] ; 0x268
41492: ed90 0b9a vldr d0, [r0, #616] ; 0x268
41496: f50d 6129 add.w r1, sp, #2704 ; 0xa90
4149a: f901 071d vst1.8 {d0}, [r1 :64]!
4149e: f963 8aef vld1.64 {d24-d25}, [r3 :128]
414a2: f50d 6310 add.w r3, sp, #2304 ; 0x900
414a6: f943 8aef vst1.64 {d24-d25}, [r3 :128]
414aa: f963 8aef vld1.64 {d24-d25}, [r3 :128]
414ae: eeb0 0b69 vmov.f64 d0, d25
414b2: ed80 0b92 vstr d0, [r0, #584] ; 0x248
414b6: ed90 0b92 vldr d0, [r0, #584] ; 0x248
414ba: ed81 0b00 vstr d0, [r1]
414be: 2342 movs r3, #66 ; 0x42
414c0: f88d 38f7 strb.w r3, [sp, #2295] ; 0x8f7
414c4: f60d 03f7 addw r3, sp, #2295 ; 0x8f7
414c8: f9a3 0c0f vld1.8 {d0[]}, [r3]
414cc: ed80 0b8c vstr d0, [r0, #560] ; 0x230
414d0: ed90 0b8c vldr d0, [r0, #560] ; 0x230
414d4: ed80 0b8e vstr d0, [r0, #568] ; 0x238
414d8: ed90 0b8e vldr d0, [r0, #568] ; 0x238
414dc: 9b07 ldr r3, [sp, #28]
414de: ed83 0b44 vstr d0, [r3, #272] ; 0x110
414e2: f50d 6400 add.w r4, sp, #2048 ; 0x800
414e6: ed94 0bac vldr d0, [r4, #688] ; 0x2b0
414ea: ed93 1b44 vldr d1, [r3, #272] ; 0x110
414ee: ed80 0b8a vstr d0, [r0, #552] ; 0x228
414f2: ed80 1b88 vstr d1, [r0, #544] ; 0x220
414f6: ed90 0b8a vldr d0, [r0, #552] ; 0x228
414fa: ed90 1b88 vldr d1, [r0, #544] ; 0x220
414fe: ffc0 8c01 vmull.u8 q12, d0, d1
41502: f50d 6e0c add.w lr, sp, #2240 ; 0x8c0
41506: f94e 8aef vst1.64 {d24-d25}, [lr :128]
4150a: f96e 8aef vld1.64 {d24-d25}, [lr :128]
4150e: f50d 6e27 add.w lr, sp, #2672 ; 0xa70
41512: f94e 8aef vst1.64 {d24-d25}, [lr :128]
41516: ed92 0b00 vldr d0, [r2]
4151a: ed93 1b44 vldr d1, [r3, #272] ; 0x110
4151e: ed80 0b82 vstr d0, [r0, #520] ; 0x208
41522: ed80 1b80 vstr d1, [r0, #512] ; 0x200
41526: ed90 0b82 vldr d0, [r0, #520] ; 0x208
4152a: ed90 1b80 vldr d1, [r0, #512] ; 0x200
4152e: ffc0 8c01 vmull.u8 q12, d0, d1
41532: f50d 620a add.w r2, sp, #2208 ; 0x8a0
41536: f942 8aef vst1.64 {d24-d25}, [r2 :128]
4153a: f962 8aef vld1.64 {d24-d25}, [r2 :128]
4153e: 2281 movs r2, #129 ; 0x81
41540: f88d 289f strb.w r2, [sp, #2207] ; 0x89f
41544: f60d 029f addw r2, sp, #2207 ; 0x89f
41548: f9a2 0c0f vld1.8 {d0[]}, [r2]
4154c: ed80 0b76 vstr d0, [r0, #472] ; 0x1d8
41550: ed90 0b76 vldr d0, [r0, #472] ; 0x1d8
41554: ed80 0b78 vstr d0, [r0, #480] ; 0x1e0
41558: ed90 0b78 vldr d0, [r0, #480] ; 0x1e0
4155c: ed83 0b44 vstr d0, [r3, #272] ; 0x110
41560: 4672 mov r2, lr
41562: f962 aa6d vld1.16 {d26-d27}, [r2 :128]!
41566: f942 8aef vst1.64 {d24-d25}, [r2 :128]
4156a: ed90 0bfc vldr d0, [r0, #1008] ; 0x3f0
4156e: ed93 1b44 vldr d1, [r3, #272] ; 0x110
41572: f50d 6405 add.w r4, sp, #2128 ; 0x850
41576: f944 aaef vst1.64 {d26-d27}, [r4 :128]
4157a: ed80 0b66 vstr d0, [r0, #408] ; 0x198
4157e: ed80 1b64 vstr d1, [r0, #400] ; 0x190
41582: f964 8aef vld1.64 {d24-d25}, [r4 :128]
41586: ed90 0b66 vldr d0, [r0, #408] ; 0x198
4158a: ed90 1b64 vldr d1, [r0, #400] ; 0x190
4158e: ed80 0b74 vstr d0, [r0, #464] ; 0x1d0
41592: ed80 1b72 vstr d1, [r0, #456] ; 0x1c8
41596: ed90 0b74 vldr d0, [r0, #464] ; 0x1d0
4159a: ed90 1b72 vldr d1, [r0, #456] ; 0x1c8
4159e: ffc0 ac01 vmull.u8 q13, d0, d1
415a2: f50d 6406 add.w r4, sp, #2144 ; 0x860
415a6: f944 aaef vst1.64 {d26-d27}, [r4 :128]
415aa: f964 aaef vld1.64 {d26-d27}, [r4 :128]
415ae: ef58 88ea vadd.i16 q12, q12, q13
415b2: f50d 6403 add.w r4, sp, #2096 ; 0x830
415b6: f944 8aef vst1.64 {d24-d25}, [r4 :128]
415ba: f964 8aef vld1.64 {d24-d25}, [r4 :128]
415be: f94e 8aef vst1.64 {d24-d25}, [lr :128]
415c2: f962 8aef vld1.64 {d24-d25}, [r2 :128]
415c6: ed9c 0b00 vldr d0, [ip]
415ca: ed93 1b44 vldr d1, [r3, #272] ; 0x110
415ce: f50d 6c00 add.w ip, sp, #2048 ; 0x800
415d2: f94c 8aef vst1.64 {d24-d25}, [ip :128]
415d6: ed80 0b52 vstr d0, [r0, #328] ; 0x148
415da: ed80 1b50 vstr d1, [r0, #320] ; 0x140
415de: f96c 8aef vld1.64 {d24-d25}, [ip :128]
415e2: ed90 0b52 vldr d0, [r0, #328] ; 0x148
415e6: ed90 1b50 vldr d1, [r0, #320] ; 0x140
415ea: ed80 0b5e vstr d0, [r0, #376] ; 0x178
415ee: ed80 1b5c vstr d1, [r0, #368] ; 0x170
415f2: ed90 0b5e vldr d0, [r0, #376] ; 0x178
415f6: ed90 1b5c vldr d1, [r0, #368] ; 0x170
415fa: ffc0 ac01 vmull.u8 q13, d0, d1
415fe: f50d 6c01 add.w ip, sp, #2064 ; 0x810
41602: f94c aaef vst1.64 {d26-d27}, [ip :128]
41606: f96c aaef vld1.64 {d26-d27}, [ip :128]
4160a: ef58 88ea vadd.i16 q12, q12, q13
4160e: f50d 6cfc add.w ip, sp, #2016 ; 0x7e0
41612: f94c 8aef vst1.64 {d24-d25}, [ip :128]
41616: f96c 8aef vld1.64 {d24-d25}, [ip :128]
4161a: f942 8aef vst1.64 {d24-d25}, [r2 :128]
4161e: f04f 0c19 mov.w ip, #25
41622: f88d c7df strb.w ip, [sp, #2015] ; 0x7df
41626: f20d 7cdf addw ip, sp, #2015 ; 0x7df
4162a: f9ac 0c0f vld1.8 {d0[]}, [ip]
4162e: ed80 0b46 vstr d0, [r0, #280] ; 0x118
41632: ed90 0b46 vldr d0, [r0, #280] ; 0x118
41636: ed80 0b48 vstr d0, [r0, #288] ; 0x120
4163a: ed90 0b48 vldr d0, [r0, #288] ; 0x120
4163e: ed83 0b44 vstr d0, [r3, #272] ; 0x110
41642: f96e 8aef vld1.64 {d24-d25}, [lr :128]
41646: ed90 0bf8 vldr d0, [r0, #992] ; 0x3e0
4164a: ed93 1b44 vldr d1, [r3, #272] ; 0x110
4164e: f50d 6cf2 add.w ip, sp, #1936 ; 0x790
41652: f94c 8aef vst1.64 {d24-d25}, [ip :128]
41656: ed80 0b36 vstr d0, [r0, #216] ; 0xd8
4165a: ed80 1b34 vstr d1, [r0, #208] ; 0xd0
4165e: f96c 8aef vld1.64 {d24-d25}, [ip :128]
41662: ed90 0b36 vldr d0, [r0, #216] ; 0xd8
41666: ed90 1b34 vldr d1, [r0, #208] ; 0xd0
4166a: ed80 0b44 vstr d0, [r0, #272] ; 0x110
4166e: ed80 1b42 vstr d1, [r0, #264] ; 0x108
41672: ed90 0b44 vldr d0, [r0, #272] ; 0x110
41676: ed90 1b42 vldr d1, [r0, #264] ; 0x108
4167a: ffc0 ac01 vmull.u8 q13, d0, d1
4167e: f50d 6cf4 add.w ip, sp, #1952 ; 0x7a0
41682: f94c aaef vst1.64 {d26-d27}, [ip :128]
41686: f96c aaef vld1.64 {d26-d27}, [ip :128]
4168a: ef58 88ea vadd.i16 q12, q12, q13
4168e: f50d 6cee add.w ip, sp, #1904 ; 0x770
41692: f94c 8aef vst1.64 {d24-d25}, [ip :128]
41696: f96c 8aef vld1.64 {d24-d25}, [ip :128]
4169a: f94e 8aef vst1.64 {d24-d25}, [lr :128]
4169e: f962 8aef vld1.64 {d24-d25}, [r2 :128]
416a2: ed91 0b00 vldr d0, [r1]
416a6: ed93 1b44 vldr d1, [r3, #272] ; 0x110
416aa: f50d 61e8 add.w r1, sp, #1856 ; 0x740
416ae: f941 8aef vst1.64 {d24-d25}, [r1 :128]
416b2: ed80 0b22 vstr d0, [r0, #136] ; 0x88
416b6: ed80 1b20 vstr d1, [r0, #128] ; 0x80
416ba: f961 8aef vld1.64 {d24-d25}, [r1 :128]
416be: ed90 0b22 vldr d0, [r0, #136] ; 0x88
416c2: ed90 1b20 vldr d1, [r0, #128] ; 0x80
416c6: ed80 0b2e vstr d0, [r0, #184] ; 0xb8
416ca: ed80 1b2c vstr d1, [r0, #176] ; 0xb0
416ce: ed90 0b2e vldr d0, [r0, #184] ; 0xb8
416d2: ed90 1b2c vldr d1, [r0, #176] ; 0xb0
416d6: ffc0 ac01 vmull.u8 q13, d0, d1
416da: f50d 61ea add.w r1, sp, #1872 ; 0x750
416de: f941 aaef vst1.64 {d26-d27}, [r1 :128]
416e2: f961 aaef vld1.64 {d26-d27}, [r1 :128]
416e6: ef58 88ea vadd.i16 q12, q12, q13
416ea: f50d 61e4 add.w r1, sp, #1824 ; 0x720
416ee: f941 8aef vst1.64 {d24-d25}, [r1 :128]
416f2: f961 8aef vld1.64 {d24-d25}, [r1 :128]
416f6: f942 8aef vst1.64 {d24-d25}, [r2 :128]
416fa: f96e 8aef vld1.64 {d24-d25}, [lr :128]
416fe: a968 add r1, sp, #416 ; 0x1a0
41700: f961 aaef vld1.64 {d26-d27}, [r1 :128]
41704: f50d 6ce2 add.w ip, sp, #1808 ; 0x710
41708: f94c 8aef vst1.64 {d24-d25}, [ip :128]
4170c: f50d 64e0 add.w r4, sp, #1792 ; 0x700
41710: f944 aaef vst1.64 {d26-d27}, [r4 :128]
41714: f96c 8aef vld1.64 {d24-d25}, [ip :128]
41718: f964 aaef vld1.64 {d26-d27}, [r4 :128]
4171c: ef58 88ea vadd.i16 q12, q12, q13
41720: f50d 6cde add.w ip, sp, #1776 ; 0x6f0
41724: f94c 8aef vst1.64 {d24-d25}, [ip :128]
41728: f96c 8aef vld1.64 {d24-d25}, [ip :128]
4172c: f94e 8aef vst1.64 {d24-d25}, [lr :128]
41730: f962 8aef vld1.64 {d24-d25}, [r2 :128]
41734: f961 aaef vld1.64 {d26-d27}, [r1 :128]
41738: f50d 61dc add.w r1, sp, #1760 ; 0x6e0
4173c: f941 8aef vst1.64 {d24-d25}, [r1 :128]
41740: f50d 6cda add.w ip, sp, #1744 ; 0x6d0
41744: f94c aaef vst1.64 {d26-d27}, [ip :128]
41748: f961 8aef vld1.64 {d24-d25}, [r1 :128]
4174c: f96c aaef vld1.64 {d26-d27}, [ip :128]
41750: ef58 88ea vadd.i16 q12, q12, q13
41754: f50d 61d8 add.w r1, sp, #1728 ; 0x6c0
41758: f941 8aef vst1.64 {d24-d25}, [r1 :128]
4175c: f961 8aef vld1.64 {d24-d25}, [r1 :128]
41760: f942 8aef vst1.64 {d24-d25}, [r2 :128]
41764: f96e 8aef vld1.64 {d24-d25}, [lr :128]
41768: a948 add r1, sp, #288 ; 0x120
4176a: f941 8aef vst1.64 {d24-d25}, [r1 :128]
4176e: f961 8aef vld1.64 {d24-d25}, [r1 :128]
41772: ff88 0938 vqshrn.u16 d0, q12, #8
41776: ed83 0b3a vstr d0, [r3, #232] ; 0xe8
4177a: ed93 0b3a vldr d0, [r3, #232] ; 0xe8
4177e: ed83 0b38 vstr d0, [r3, #224] ; 0xe0
41782: ed93 0b38 vldr d0, [r3, #224] ; 0xe0
41786: f962 8aef vld1.64 {d24-d25}, [r2 :128]
4178a: a940 add r1, sp, #256 ; 0x100
4178c: f941 8aef vst1.64 {d24-d25}, [r1 :128]
41790: f961 8aef vld1.64 {d24-d25}, [r1 :128]
41794: ff88 1938 vqshrn.u16 d1, q12, #8
41798: ed83 1b32 vstr d1, [r3, #200] ; 0xc8
4179c: ed93 1b32 vldr d1, [r3, #200] ; 0xc8
417a0: ed83 1b30 vstr d1, [r3, #192] ; 0xc0
417a4: ed93 1b30 vldr d1, [r3, #192] ; 0xc0
417a8: ed80 0b02 vstr d0, [r0, #8]
417ac: ed80 1b00 vstr d1, [r0]
417b0: ed90 0b02 vldr d0, [r0, #8]
417b4: ed90 1b00 vldr d1, [r0]
417b8: eef0 8b40 vmov.f64 d24, d0
417bc: eef0 9b41 vmov.f64 d25, d1
417c0: f50d 61d4 add.w r1, sp, #1696 ; 0x6a0
417c4: f941 8aef vst1.64 {d24-d25}, [r1 :128]
417c8: f961 8aef vld1.64 {d24-d25}, [r1 :128]
417cc: a94c add r1, sp, #304 ; 0x130
417ce: f941 8aef vst1.64 {d24-d25}, [r1 :128]
417d2: f961 8aef vld1.64 {d24-d25}, [r1 :128]
417d6: aa5c add r2, sp, #368 ; 0x170
417d8: f962 aaef vld1.64 {d26-d27}, [r2 :128]
417dc: f50d 62d2 add.w r2, sp, #1680 ; 0x690
417e0: f942 8aef vst1.64 {d24-d25}, [r2 :128]
417e4: f50d 6cd0 add.w ip, sp, #1664 ; 0x680
417e8: f94c aaef vst1.64 {d26-d27}, [ip :128]
417ec: f962 8aef vld1.64 {d24-d25}, [r2 :128]
417f0: f96c aaef vld1.64 {d26-d27}, [ip :128]
417f4: ef48 88ea vadd.i8 q12, q12, q13
417f8: f50d 62ce add.w r2, sp, #1648 ; 0x670
417fc: f942 8aef vst1.64 {d24-d25}, [r2 :128]
41800: f962 8aef vld1.64 {d24-d25}, [r2 :128]
41804: f941 8aef vst1.64 {d24-d25}, [r1 :128]
41808: f961 8aef vld1.64 {d24-d25}, [r1 :128]
4180c: a938 add r1, sp, #224 ; 0xe0
4180e: f941 8aef vst1.64 {d24-d25}, [r1 :128]
41812: 9a6f ldr r2, [sp, #444] ; 0x1bc
41814: f8dd c158 ldr.w ip, [sp, #344] ; 0x158
41818: 4462 add r2, ip
4181a: f961 8aef vld1.64 {d24-d25}, [r1 :128]
4181e: f942 8a0f vst1.8 {d24-d25}, [r2]
41822: 9956 ldr r1, [sp, #344] ; 0x158
41824: 3110 adds r1, #16
41826: 9156 str r1, [sp, #344] ; 0x158
41828: 9953 ldr r1, [sp, #332] ; 0x14c
4182a: eb01 72d1 add.w r2, r1, r1, lsr #31
4182e: f022 0201 bic.w r2, r2, #1
41832: 1a89 subs r1, r1, r2
41834: 2900 cmp r1, #0
41836: f040 8274 bne.w 41d22 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xaea>
4183a: e7ff b.n 4183c <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x604>
4183c: f64f 70da movw r0, #65498 ; 0xffda
41840: f8ad 066e strh.w r0, [sp, #1646] ; 0x66e
41844: f20d 606e addw r0, sp, #1646 ; 0x66e
41848: f9e0 0c7f vld1.16 {d16[]-d17[]}, [r0 :16]
4184c: f50d 60c8 add.w r0, sp, #1600 ; 0x640
41850: f940 0aef vst1.64 {d16-d17}, [r0 :128]
41854: f960 0aef vld1.64 {d16-d17}, [r0 :128]
41858: f50d 60ca add.w r0, sp, #1616 ; 0x650
4185c: f940 0aef vst1.64 {d16-d17}, [r0 :128]
41860: f960 0aef vld1.64 {d16-d17}, [r0 :128]
41864: a834 add r0, sp, #208 ; 0xd0
41866: f940 0aef vst1.64 {d16-d17}, [r0 :128]
4186a: 2170 movs r1, #112 ; 0x70
4186c: f8ad 163e strh.w r1, [sp, #1598] ; 0x63e
41870: f20d 623e addw r2, sp, #1598 ; 0x63e
41874: f9e2 0c7f vld1.16 {d16[]-d17[]}, [r2 :16]
41878: f50d 62c2 add.w r2, sp, #1552 ; 0x610
4187c: f942 0aef vst1.64 {d16-d17}, [r2 :128]
41880: f962 0aef vld1.64 {d16-d17}, [r2 :128]
41884: f50d 62c4 add.w r2, sp, #1568 ; 0x620
41888: f942 0aef vst1.64 {d16-d17}, [r2 :128]
4188c: f962 0aef vld1.64 {d16-d17}, [r2 :128]
41890: aa30 add r2, sp, #192 ; 0xc0
41892: f942 0aef vst1.64 {d16-d17}, [r2 :128]
41896: f50d 6330 add.w r3, sp, #2816 ; 0xb00
4189a: f103 0c20 add.w ip, r3, #32
4189e: f96c 0aef vld1.64 {d16-d17}, [ip :128]
418a2: f50d 6cc0 add.w ip, sp, #1536 ; 0x600
418a6: f94c 0aef vst1.64 {d16-d17}, [ip :128]
418aa: f96c 0aef vld1.64 {d16-d17}, [ip :128]
418ae: f50d 6cbe add.w ip, sp, #1520 ; 0x5f0
418b2: f94c 0aef vst1.64 {d16-d17}, [ip :128]
418b6: f96c 0aef vld1.64 {d16-d17}, [ip :128]
418ba: f50d 7cb0 add.w ip, sp, #352 ; 0x160
418be: f96c 2aef vld1.64 {d18-d19}, [ip :128]
418c2: f50d 6ebc add.w lr, sp, #1504 ; 0x5e0
418c6: f94e 0aef vst1.64 {d16-d17}, [lr :128]
418ca: f50d 64ba add.w r4, sp, #1488 ; 0x5d0
418ce: f944 2aef vst1.64 {d18-d19}, [r4 :128]
418d2: f96e 0aef vld1.64 {d16-d17}, [lr :128]
418d6: f964 2aef vld1.64 {d18-d19}, [r4 :128]
418da: ef40 01f2 vand q8, q8, q9
418de: f50d 6eb8 add.w lr, sp, #1472 ; 0x5c0
418e2: f94e 0aef vst1.64 {d16-d17}, [lr :128]
418e6: f96e 0aef vld1.64 {d16-d17}, [lr :128]
418ea: f50d 6eb6 add.w lr, sp, #1456 ; 0x5b0
418ee: f94e 0aef vst1.64 {d16-d17}, [lr :128]
418f2: f96e 0aef vld1.64 {d16-d17}, [lr :128]
418f6: f50d 6eb4 add.w lr, sp, #1440 ; 0x5a0
418fa: f94e 0aef vst1.64 {d16-d17}, [lr :128]
418fe: f96e 0aef vld1.64 {d16-d17}, [lr :128]
41902: f10d 0eb0 add.w lr, sp, #176 ; 0xb0
41906: f94e 0aef vst1.64 {d16-d17}, [lr :128]
4190a: f103 0410 add.w r4, r3, #16
4190e: f964 0aef vld1.64 {d16-d17}, [r4 :128]
41912: f50d 64b2 add.w r4, sp, #1424 ; 0x590
41916: f944 0aef vst1.64 {d16-d17}, [r4 :128]
4191a: f964 0aef vld1.64 {d16-d17}, [r4 :128]
4191e: f50d 64b0 add.w r4, sp, #1408 ; 0x580
41922: f944 0aef vst1.64 {d16-d17}, [r4 :128]
41926: f964 0aef vld1.64 {d16-d17}, [r4 :128]
4192a: f96c 2aef vld1.64 {d18-d19}, [ip :128]
4192e: f50d 64ae add.w r4, sp, #1392 ; 0x570
41932: f944 0aef vst1.64 {d16-d17}, [r4 :128]
41936: f50d 65ac add.w r5, sp, #1376 ; 0x560
4193a: f945 2aef vst1.64 {d18-d19}, [r5 :128]
4193e: f964 0aef vld1.64 {d16-d17}, [r4 :128]
41942: f965 2aef vld1.64 {d18-d19}, [r5 :128]
41946: ef40 01f2 vand q8, q8, q9
4194a: f50d 64aa add.w r4, sp, #1360 ; 0x550
4194e: f944 0aef vst1.64 {d16-d17}, [r4 :128]
41952: f964 0aef vld1.64 {d16-d17}, [r4 :128]
41956: f50d 64a8 add.w r4, sp, #1344 ; 0x540
4195a: f944 0aef vst1.64 {d16-d17}, [r4 :128]
4195e: f964 0aef vld1.64 {d16-d17}, [r4 :128]
41962: f50d 64a6 add.w r4, sp, #1328 ; 0x530
41966: f944 0aef vst1.64 {d16-d17}, [r4 :128]
4196a: f964 0aef vld1.64 {d16-d17}, [r4 :128]
4196e: ac28 add r4, sp, #160 ; 0xa0
41970: f944 0aef vst1.64 {d16-d17}, [r4 :128]
41974: f963 0aef vld1.64 {d16-d17}, [r3 :128]
41978: f50d 63a4 add.w r3, sp, #1312 ; 0x520
4197c: f943 0aef vst1.64 {d16-d17}, [r3 :128]
41980: f963 0aef vld1.64 {d16-d17}, [r3 :128]
41984: f50d 63a2 add.w r3, sp, #1296 ; 0x510
41988: f943 0aef vst1.64 {d16-d17}, [r3 :128]
4198c: f963 0aef vld1.64 {d16-d17}, [r3 :128]
41990: f96c 2aef vld1.64 {d18-d19}, [ip :128]
41994: f50d 63a0 add.w r3, sp, #1280 ; 0x500
41998: f943 0aef vst1.64 {d16-d17}, [r3 :128]
4199c: f50d 6c9e add.w ip, sp, #1264 ; 0x4f0
419a0: f94c 2aef vst1.64 {d18-d19}, [ip :128]
419a4: f963 0aef vld1.64 {d16-d17}, [r3 :128]
419a8: f96c 2aef vld1.64 {d18-d19}, [ip :128]
419ac: ef40 01f2 vand q8, q8, q9
419b0: f50d 639c add.w r3, sp, #1248 ; 0x4e0
419b4: f943 0aef vst1.64 {d16-d17}, [r3 :128]
419b8: f963 0aef vld1.64 {d16-d17}, [r3 :128]
419bc: f50d 639a add.w r3, sp, #1232 ; 0x4d0
419c0: f943 0aef vst1.64 {d16-d17}, [r3 :128]
419c4: f963 0aef vld1.64 {d16-d17}, [r3 :128]
419c8: f50d 6398 add.w r3, sp, #1216 ; 0x4c0
419cc: f943 0aef vst1.64 {d16-d17}, [r3 :128]
419d0: f963 0aef vld1.64 {d16-d17}, [r3 :128]
419d4: ab24 add r3, sp, #144 ; 0x90
419d6: f943 0aef vst1.64 {d16-d17}, [r3 :128]
419da: f96e 0aef vld1.64 {d16-d17}, [lr :128]
419de: f960 2aef vld1.64 {d18-d19}, [r0 :128]
419e2: f50d 6c96 add.w ip, sp, #1200 ; 0x4b0
419e6: f94c 0aef vst1.64 {d16-d17}, [ip :128]
419ea: f50d 6594 add.w r5, sp, #1184 ; 0x4a0
419ee: f945 2aef vst1.64 {d18-d19}, [r5 :128]
419f2: f96c 0aef vld1.64 {d16-d17}, [ip :128]
419f6: f965 2aef vld1.64 {d18-d19}, [r5 :128]
419fa: ef50 09f2 vmul.i16 q8, q8, q9
419fe: f50d 6c92 add.w ip, sp, #1168 ; 0x490
41a02: f94c 0aef vst1.64 {d16-d17}, [ip :128]
41a06: f96c 0aef vld1.64 {d16-d17}, [ip :128]
41a0a: f10d 0c80 add.w ip, sp, #128 ; 0x80
41a0e: f94c 0aef vst1.64 {d16-d17}, [ip :128]
41a12: f96e 0aef vld1.64 {d16-d17}, [lr :128]
41a16: f962 2aef vld1.64 {d18-d19}, [r2 :128]
41a1a: f50d 6e90 add.w lr, sp, #1152 ; 0x480
41a1e: f94e 0aef vst1.64 {d16-d17}, [lr :128]
41a22: f50d 658e add.w r5, sp, #1136 ; 0x470
41a26: f945 2aef vst1.64 {d18-d19}, [r5 :128]
41a2a: f96e 0aef vld1.64 {d16-d17}, [lr :128]
41a2e: f965 2aef vld1.64 {d18-d19}, [r5 :128]
41a32: ef50 09f2 vmul.i16 q8, q8, q9
41a36: f50d 6e8c add.w lr, sp, #1120 ; 0x460
41a3a: f94e 0aef vst1.64 {d16-d17}, [lr :128]
41a3e: f96e 0aef vld1.64 {d16-d17}, [lr :128]
41a42: f10d 0e70 add.w lr, sp, #112 ; 0x70
41a46: f94e 0aef vst1.64 {d16-d17}, [lr :128]
41a4a: f64f 75b6 movw r5, #65462 ; 0xffb6
41a4e: f8ad 545e strh.w r5, [sp, #1118] ; 0x45e
41a52: f20d 455e addw r5, sp, #1118 ; 0x45e
41a56: f9e5 0c7f vld1.16 {d16[]-d17[]}, [r5 :16]
41a5a: f50d 6586 add.w r5, sp, #1072 ; 0x430
41a5e: f945 0aef vst1.64 {d16-d17}, [r5 :128]
41a62: f965 0aef vld1.64 {d16-d17}, [r5 :128]
41a66: f50d 6588 add.w r5, sp, #1088 ; 0x440
41a6a: f945 0aef vst1.64 {d16-d17}, [r5 :128]
41a6e: f965 0aef vld1.64 {d16-d17}, [r5 :128]
41a72: f940 0aef vst1.64 {d16-d17}, [r0 :128]
41a76: f64f 75a2 movw r5, #65442 ; 0xffa2
41a7a: f8ad 542e strh.w r5, [sp, #1070] ; 0x42e
41a7e: f20d 452e addw r5, sp, #1070 ; 0x42e
41a82: f9e5 0c7f vld1.16 {d16[]-d17[]}, [r5 :16]
41a86: f50d 6580 add.w r5, sp, #1024 ; 0x400
41a8a: f945 0aef vst1.64 {d16-d17}, [r5 :128]
41a8e: f965 0aef vld1.64 {d16-d17}, [r5 :128]
41a92: f50d 6582 add.w r5, sp, #1040 ; 0x410
41a96: f945 0aef vst1.64 {d16-d17}, [r5 :128]
41a9a: f965 0aef vld1.64 {d16-d17}, [r5 :128]
41a9e: f942 0aef vst1.64 {d16-d17}, [r2 :128]
41aa2: f96c 0aef vld1.64 {d16-d17}, [ip :128]
41aa6: f964 2aef vld1.64 {d18-d19}, [r4 :128]
41aaa: f960 4aef vld1.64 {d20-d21}, [r0 :128]
41aae: adfc add r5, sp, #1008 ; 0x3f0
41ab0: f945 0aef vst1.64 {d16-d17}, [r5 :128]
41ab4: aef8 add r6, sp, #992 ; 0x3e0
41ab6: f946 2aef vst1.64 {d18-d19}, [r6 :128]
41aba: f50d 7874 add.w r8, sp, #976 ; 0x3d0
41abe: f948 4aef vst1.64 {d20-d21}, [r8 :128]
41ac2: f965 0aef vld1.64 {d16-d17}, [r5 :128]
41ac6: f966 2aef vld1.64 {d18-d19}, [r6 :128]
41aca: f968 4aef vld1.64 {d20-d21}, [r8 :128]
41ace: ef52 09e4 vmla.i16 q8, q9, q10
41ad2: adf0 add r5, sp, #960 ; 0x3c0
41ad4: f945 0aef vst1.64 {d16-d17}, [r5 :128]
41ad8: f965 0aef vld1.64 {d16-d17}, [r5 :128]
41adc: f94c 0aef vst1.64 {d16-d17}, [ip :128]
41ae0: f96e 0aef vld1.64 {d16-d17}, [lr :128]
41ae4: f964 2aef vld1.64 {d18-d19}, [r4 :128]
41ae8: f962 4aef vld1.64 {d20-d21}, [r2 :128]
41aec: acec add r4, sp, #944 ; 0x3b0
41aee: f944 0aef vst1.64 {d16-d17}, [r4 :128]
41af2: ade8 add r5, sp, #928 ; 0x3a0
41af4: f945 2aef vst1.64 {d18-d19}, [r5 :128]
41af8: aee4 add r6, sp, #912 ; 0x390
41afa: f946 4aef vst1.64 {d20-d21}, [r6 :128]
41afe: f964 0aef vld1.64 {d16-d17}, [r4 :128]
41b02: f965 2aef vld1.64 {d18-d19}, [r5 :128]
41b06: f966 4aef vld1.64 {d20-d21}, [r6 :128]
41b0a: ef52 09e4 vmla.i16 q8, q9, q10
41b0e: ace0 add r4, sp, #896 ; 0x380
41b10: f944 0aef vst1.64 {d16-d17}, [r4 :128]
41b14: f964 0aef vld1.64 {d16-d17}, [r4 :128]
41b18: f94e 0aef vst1.64 {d16-d17}, [lr :128]
41b1c: f8ad 137e strh.w r1, [sp, #894] ; 0x37e
41b20: f20d 317e addw r1, sp, #894 ; 0x37e
41b24: f9e1 0c7f vld1.16 {d16[]-d17[]}, [r1 :16]
41b28: a9d4 add r1, sp, #848 ; 0x350
41b2a: f941 0aef vst1.64 {d16-d17}, [r1 :128]
41b2e: f961 0aef vld1.64 {d16-d17}, [r1 :128]
41b32: a9d8 add r1, sp, #864 ; 0x360
41b34: f941 0aef vst1.64 {d16-d17}, [r1 :128]
41b38: f961 0aef vld1.64 {d16-d17}, [r1 :128]
41b3c: f940 0aef vst1.64 {d16-d17}, [r0 :128]
41b40: f64f 71ee movw r1, #65518 ; 0xffee
41b44: f8ad 134e strh.w r1, [sp, #846] ; 0x34e
41b48: f20d 314e addw r1, sp, #846 ; 0x34e
41b4c: f9e1 0c7f vld1.16 {d16[]-d17[]}, [r1 :16]
41b50: a9c8 add r1, sp, #800 ; 0x320
41b52: f941 0aef vst1.64 {d16-d17}, [r1 :128]
41b56: f961 0aef vld1.64 {d16-d17}, [r1 :128]
41b5a: a9cc add r1, sp, #816 ; 0x330
41b5c: f941 0aef vst1.64 {d16-d17}, [r1 :128]
41b60: f961 0aef vld1.64 {d16-d17}, [r1 :128]
41b64: f942 0aef vst1.64 {d16-d17}, [r2 :128]
41b68: f96c 0aef vld1.64 {d16-d17}, [ip :128]
41b6c: f963 2aef vld1.64 {d18-d19}, [r3 :128]
41b70: f960 4aef vld1.64 {d20-d21}, [r0 :128]
41b74: a8c4 add r0, sp, #784 ; 0x310
41b76: f940 0aef vst1.64 {d16-d17}, [r0 :128]
41b7a: a9c0 add r1, sp, #768 ; 0x300
41b7c: f941 2aef vst1.64 {d18-d19}, [r1 :128]
41b80: acbc add r4, sp, #752 ; 0x2f0
41b82: f944 4aef vst1.64 {d20-d21}, [r4 :128]
41b86: f960 0aef vld1.64 {d16-d17}, [r0 :128]
41b8a: f961 2aef vld1.64 {d18-d19}, [r1 :128]
41b8e: f964 4aef vld1.64 {d20-d21}, [r4 :128]
41b92: ef52 09e4 vmla.i16 q8, q9, q10
41b96: a8b8 add r0, sp, #736 ; 0x2e0
41b98: f940 0aef vst1.64 {d16-d17}, [r0 :128]
41b9c: f960 0aef vld1.64 {d16-d17}, [r0 :128]
41ba0: f94c 0aef vst1.64 {d16-d17}, [ip :128]
41ba4: f96e 0aef vld1.64 {d16-d17}, [lr :128]
41ba8: f963 2aef vld1.64 {d18-d19}, [r3 :128]
41bac: f962 4aef vld1.64 {d20-d21}, [r2 :128]
41bb0: a8b4 add r0, sp, #720 ; 0x2d0
41bb2: f940 0aef vst1.64 {d16-d17}, [r0 :128]
41bb6: a9b0 add r1, sp, #704 ; 0x2c0
41bb8: f941 2aef vst1.64 {d18-d19}, [r1 :128]
41bbc: aaac add r2, sp, #688 ; 0x2b0
41bbe: f942 4aef vst1.64 {d20-d21}, [r2 :128]
41bc2: f960 0aef vld1.64 {d16-d17}, [r0 :128]
41bc6: f961 2aef vld1.64 {d18-d19}, [r1 :128]
41bca: f962 4aef vld1.64 {d20-d21}, [r2 :128]
41bce: ef52 09e4 vmla.i16 q8, q9, q10
41bd2: a8a8 add r0, sp, #672 ; 0x2a0
41bd4: f940 0aef vst1.64 {d16-d17}, [r0 :128]
41bd8: f960 0aef vld1.64 {d16-d17}, [r0 :128]
41bdc: f94e 0aef vst1.64 {d16-d17}, [lr :128]
41be0: f96c 0aef vld1.64 {d16-d17}, [ip :128]
41be4: a864 add r0, sp, #400 ; 0x190
41be6: f960 2aef vld1.64 {d18-d19}, [r0 :128]
41bea: a9a4 add r1, sp, #656 ; 0x290
41bec: f941 0aef vst1.64 {d16-d17}, [r1 :128]
41bf0: aaa0 add r2, sp, #640 ; 0x280
41bf2: f942 2aef vst1.64 {d18-d19}, [r2 :128]
41bf6: f961 0aef vld1.64 {d16-d17}, [r1 :128]
41bfa: f962 2aef vld1.64 {d18-d19}, [r2 :128]
41bfe: ef50 08e2 vadd.i16 q8, q8, q9
41c02: a99c add r1, sp, #624 ; 0x270
41c04: f941 0aef vst1.64 {d16-d17}, [r1 :128]
41c08: f961 0aef vld1.64 {d16-d17}, [r1 :128]
41c0c: f94c 0aef vst1.64 {d16-d17}, [ip :128]
41c10: f96e 0aef vld1.64 {d16-d17}, [lr :128]
41c14: f960 2aef vld1.64 {d18-d19}, [r0 :128]
41c18: a898 add r0, sp, #608 ; 0x260
41c1a: f940 0aef vst1.64 {d16-d17}, [r0 :128]
41c1e: a994 add r1, sp, #592 ; 0x250
41c20: f941 2aef vst1.64 {d18-d19}, [r1 :128]
41c24: f960 0aef vld1.64 {d16-d17}, [r0 :128]
41c28: f961 2aef vld1.64 {d18-d19}, [r1 :128]
41c2c: ef50 08e2 vadd.i16 q8, q8, q9
41c30: a890 add r0, sp, #576 ; 0x240
41c32: f940 0aef vst1.64 {d16-d17}, [r0 :128]
41c36: f960 0aef vld1.64 {d16-d17}, [r0 :128]
41c3a: f94e 0aef vst1.64 {d16-d17}, [lr :128]
41c3e: f96c 0aef vld1.64 {d16-d17}, [ip :128]
41c42: a818 add r0, sp, #96 ; 0x60
41c44: f940 0aef vst1.64 {d16-d17}, [r0 :128]
41c48: f960 0aef vld1.64 {d16-d17}, [r0 :128]
41c4c: efc8 6930 vqshrn.s16 d22, q8, #8
41c50: 9807 ldr r0, [sp, #28]
41c52: edc0 6b0a vstr d22, [r0, #40] ; 0x28
41c56: edd0 6b0a vldr d22, [r0, #40] ; 0x28
41c5a: edc0 6b08 vstr d22, [r0, #32]
41c5e: edd0 6b08 vldr d22, [r0, #32]
41c62: edd0 7b56 vldr d23, [r0, #344] ; 0x158
41c66: edc0 6b82 vstr d22, [r0, #520] ; 0x208
41c6a: edc0 7b80 vstr d23, [r0, #512] ; 0x200
41c6e: edd0 6b82 vldr d22, [r0, #520] ; 0x208
41c72: edd0 7b80 vldr d23, [r0, #512] ; 0x200
41c76: ef46 68a7 vadd.i8 d22, d22, d23
41c7a: edc0 6b7e vstr d22, [r0, #504] ; 0x1f8
41c7e: edd0 6b7e vldr d22, [r0, #504] ; 0x1f8
41c82: edc0 6b7c vstr d22, [r0, #496] ; 0x1f0
41c86: edd0 6b7c vldr d22, [r0, #496] ; 0x1f0
41c8a: edc0 6b7a vstr d22, [r0, #488] ; 0x1e8
41c8e: edd0 6b7a vldr d22, [r0, #488] ; 0x1e8
41c92: 9908 ldr r1, [sp, #32]
41c94: edc1 6bee vstr d22, [r1, #952] ; 0x3b8
41c98: f96e 0aef vld1.64 {d16-d17}, [lr :128]
41c9c: aa10 add r2, sp, #64 ; 0x40
41c9e: f942 0aef vst1.64 {d16-d17}, [r2 :128]
41ca2: f962 0aef vld1.64 {d16-d17}, [r2 :128]
41ca6: efc8 6930 vqshrn.s16 d22, q8, #8
41caa: edc0 6b02 vstr d22, [r0, #8]
41cae: edd0 6b02 vldr d22, [r0, #8]
41cb2: edc0 6b00 vstr d22, [r0]
41cb6: edd0 6b00 vldr d22, [r0]
41cba: edd0 7b56 vldr d23, [r0, #344] ; 0x158
41cbe: edc0 6b78 vstr d22, [r0, #480] ; 0x1e0
41cc2: edc0 7b76 vstr d23, [r0, #472] ; 0x1d8
41cc6: edd0 6b78 vldr d22, [r0, #480] ; 0x1e0
41cca: edd0 7b76 vldr d23, [r0, #472] ; 0x1d8
41cce: ef46 68a7 vadd.i8 d22, d22, d23
41cd2: edc0 6b74 vstr d22, [r0, #464] ; 0x1d0
41cd6: edd0 6b74 vldr d22, [r0, #464] ; 0x1d0
41cda: edc0 6b72 vstr d22, [r0, #456] ; 0x1c8
41cde: edd0 6b72 vldr d22, [r0, #456] ; 0x1c8
41ce2: edc0 6b70 vstr d22, [r0, #448] ; 0x1c0
41ce6: edd0 6b70 vldr d22, [r0, #448] ; 0x1c0
41cea: edc1 6bec vstr d22, [r1, #944] ; 0x3b0
41cee: f50d 6226 add.w r2, sp, #2656 ; 0xa60
41cf2: f962 0acf vld1.64 {d16-d17}, [r2]
41cf6: f50d 6225 add.w r2, sp, #2640 ; 0xa50
41cfa: f942 0acf vst1.64 {d16-d17}, [r2]
41cfe: 9b6f ldr r3, [sp, #444] ; 0x1bc
41d00: f8dd c154 ldr.w ip, [sp, #340] ; 0x154
41d04: 4463 add r3, ip
41d06: f962 671d vld1.8 {d22}, [r2 :64]!
41d0a: edd2 7b00 vldr d23, [r2]
41d0e: eef0 0b66 vmov.f64 d16, d22
41d12: eef0 1b67 vmov.f64 d17, d23
41d16: f943 080f vst2.8 {d16-d17}, [r3]
41d1a: 9a55 ldr r2, [sp, #340] ; 0x154
41d1c: 3210 adds r2, #16
41d1e: 9255 str r2, [sp, #340] ; 0x154
41d20: e7ff b.n 41d22 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xaea>
41d22: e7ff b.n 41d24 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xaec>
41d24: 9854 ldr r0, [sp, #336] ; 0x150
41d26: 3001 adds r0, #1
41d28: 9054 str r0, [sp, #336] ; 0x150
41d2a: f7ff bb25 b.w 41378 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x140>
41d2e: 986d ldr r0, [sp, #436] ; 0x1b4
41d30: f020 000f bic.w r0, r0, #15
41d34: 9054 str r0, [sp, #336] ; 0x150
41d36: e7ff b.n 41d38 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xb00>
41d38: 9854 ldr r0, [sp, #336] ; 0x150
41d3a: 996d ldr r1, [sp, #436] ; 0x1b4
41d3c: 4288 cmp r0, r1
41d3e: da7e bge.n 41e3e <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc06>
41d40: e7ff b.n 41d42 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xb0a>
41d42: 986e ldr r0, [sp, #440] ; 0x1b8
41d44: 7880 ldrb r0, [r0, #2]
41d46: f88d 002f strb.w r0, [sp, #47] ; 0x2f
41d4a: 986e ldr r0, [sp, #440] ; 0x1b8
41d4c: 7840 ldrb r0, [r0, #1]
41d4e: f88d 002e strb.w r0, [sp, #46] ; 0x2e
41d52: 986e ldr r0, [sp, #440] ; 0x1b8
41d54: 7800 ldrb r0, [r0, #0]
41d56: f88d 002d strb.w r0, [sp, #45] ; 0x2d
41d5a: 986e ldr r0, [sp, #440] ; 0x1b8
41d5c: 3004 adds r0, #4
41d5e: 906e str r0, [sp, #440] ; 0x1b8
41d60: f89d 002f ldrb.w r0, [sp, #47] ; 0x2f
41d64: eb00 1040 add.w r0, r0, r0, lsl #5
41d68: f89d 102e ldrb.w r1, [sp, #46] ; 0x2e
41d6c: eb01 11c1 add.w r1, r1, r1, lsl #7
41d70: eb01 0040 add.w r0, r1, r0, lsl #1
41d74: f89d 102d ldrb.w r1, [sp, #45] ; 0x2d
41d78: 2219 movs r2, #25
41d7a: fb11 0002 smlabb r0, r1, r2, r0
41d7e: 3080 adds r0, #128 ; 0x80
41d80: 2110 movs r1, #16
41d82: eb01 2010 add.w r0, r1, r0, lsr #8
41d86: f88d 002c strb.w r0, [sp, #44] ; 0x2c
41d8a: f89d 002f ldrb.w r0, [sp, #47] ; 0x2f
41d8e: f06f 0125 mvn.w r1, #37 ; 0x25
41d92: fb10 f001 smulbb r0, r0, r1
41d96: f89d 102e ldrb.w r1, [sp, #46] ; 0x2e
41d9a: 224a movs r2, #74 ; 0x4a
41d9c: fb01 0012 mls r0, r1, r2, r0
41da0: f89d 102d ldrb.w r1, [sp, #45] ; 0x2d
41da4: ebc1 01c1 rsb r1, r1, r1, lsl #3
41da8: eb00 1001 add.w r0, r0, r1, lsl #4
41dac: 3080 adds r0, #128 ; 0x80
41dae: 2180 movs r1, #128 ; 0x80
41db0: eb01 2010 add.w r0, r1, r0, lsr #8
41db4: f88d 002b strb.w r0, [sp, #43] ; 0x2b
41db8: f89d 002f ldrb.w r0, [sp, #47] ; 0x2f
41dbc: ebc0 00c0 rsb r0, r0, r0, lsl #3
41dc0: f89d 202e ldrb.w r2, [sp, #46] ; 0x2e
41dc4: 235e movs r3, #94 ; 0x5e
41dc6: fb12 f203 smulbb r2, r2, r3
41dca: ebc2 1000 rsb r0, r2, r0, lsl #4
41dce: f89d 202d ldrb.w r2, [sp, #45] ; 0x2d
41dd2: eb02 02c2 add.w r2, r2, r2, lsl #3
41dd6: eba0 0042 sub.w r0, r0, r2, lsl #1
41dda: 3080 adds r0, #128 ; 0x80
41ddc: eb01 2010 add.w r0, r1, r0, lsr #8
41de0: f88d 002a strb.w r0, [sp, #42] ; 0x2a
41de4: f89d 002c ldrb.w r0, [sp, #44] ; 0x2c
41de8: 996f ldr r1, [sp, #444] ; 0x1bc
41dea: 9a56 ldr r2, [sp, #344] ; 0x158
41dec: 1c53 adds r3, r2, #1
41dee: 9356 str r3, [sp, #344] ; 0x158
41df0: 5488 strb r0, [r1, r2]
41df2: 9853 ldr r0, [sp, #332] ; 0x14c
41df4: eb00 71d0 add.w r1, r0, r0, lsr #31
41df8: f021 0101 bic.w r1, r1, #1
41dfc: 1a40 subs r0, r0, r1
41dfe: 2800 cmp r0, #0
41e00: d118 bne.n 41e34 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xbfc>
41e02: e7ff b.n 41e04 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xbcc>
41e04: 9854 ldr r0, [sp, #336] ; 0x150
41e06: eb00 71d0 add.w r1, r0, r0, lsr #31
41e0a: f021 0101 bic.w r1, r1, #1
41e0e: 1a40 subs r0, r0, r1
41e10: 2800 cmp r0, #0
41e12: d10f bne.n 41e34 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xbfc>
41e14: e7ff b.n 41e16 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xbde>
41e16: f89d 002a ldrb.w r0, [sp, #42] ; 0x2a
41e1a: 996f ldr r1, [sp, #444] ; 0x1bc
41e1c: 9a55 ldr r2, [sp, #340] ; 0x154
41e1e: 1c53 adds r3, r2, #1
41e20: 9355 str r3, [sp, #340] ; 0x154
41e22: 5488 strb r0, [r1, r2]
41e24: f89d 002b ldrb.w r0, [sp, #43] ; 0x2b
41e28: 996f ldr r1, [sp, #444] ; 0x1bc
41e2a: 9a55 ldr r2, [sp, #340] ; 0x154
41e2c: 1c53 adds r3, r2, #1
41e2e: 9355 str r3, [sp, #340] ; 0x154
41e30: 5488 strb r0, [r1, r2]
41e32: e7ff b.n 41e34 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xbfc>
41e34: e7ff b.n 41e36 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xbfe>
41e36: 9854 ldr r0, [sp, #336] ; 0x150
41e38: 3001 adds r0, #1
41e3a: 9054 str r0, [sp, #336] ; 0x150
41e3c: e77c b.n 41d38 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xb00>
41e3e: e7ff b.n 41e40 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc08>
41e40: 9853 ldr r0, [sp, #332] ; 0x14c
41e42: 3001 adds r0, #1
41e44: 9053 str r0, [sp, #332] ; 0x14c
41e46: f7ff ba8e b.w 41366 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0x12e>
41e4a: 4809 ldr r0, [pc, #36] ; (41e70 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc38>)
41e4c: 4478 add r0, pc
41e4e: 6800 ldr r0, [r0, #0]
41e50: 6800 ldr r0, [r0, #0]
41e52: 9909 ldr r1, [sp, #36] ; 0x24
41e54: 4288 cmp r0, r1
41e56: d106 bne.n 41e66 <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc2e>
41e58: e7ff b.n 41e5a <_Z30encodeYUV420SP_NEON_IntrinsicsPhS_ii+0xc22>
41e5a: f1a7 0418 sub.w r4, r7, #24
41e5e: 46a5 mov sp, r4
41e60: e8bd 0b00 ldmia.w sp!, {r8, r9, fp}
41e64: bdf0 pop {r4, r5, r6, r7, pc}
41e66: f7f8 ec6e blx 3a744 <__stack_chk_fail@plt>
41e6a: bf00 nop
41e6c: 001ef0f8 .word 0x001ef0f8
41e70: 001ee50c .word 0x001ee50c
Neon 版在我的手机上是 “负优化” 。。。跑得比朴素 CPU 还慢。。。看来网上给的 NEON 代码也未必靠谱,还是得亲自实践对比!
经过我的优化后,NEON 版达到了 53ms 左右,展开(一次)版是51 ms左右,原图大小为 1600*1873