NEON在Android中的使用举例

1、  打开Eclipse,File-->New-->AndroidApplication Project-->Application Name:Hello-Neon, Project Name: Hello-Neon,Package Name:com.hello_neon.android, Minimum Required SDK:API 9:Android 2.3(Gingerbread),Next-->去掉Create custom launcher icon的勾选,Next-->Next-->ActivityName:Hello_NeonProjectActivity,Finish.

2、  打开Hello-Neon工程下的src-->com.hello_neon.android-->Hello_NeonProjectActivity.java,将其内容改为:

package com.hello_neon.android;

import android.os.Bundle;
import android.app.Activity;
import android.widget.TextView;

public class Hello_NeonProjectActivity extends Activity {

    /** Called when the activity is first created. */
    @Override
    public void onCreate(Bundle savedInstanceState)
    {
        super.onCreate(savedInstanceState);
        /* Create a TextView and set its content.
         * the text is retrieved by calling a native function.
         */
        TextView  tv = new TextView(this);
        tv.setText( stringFromJNI() );
        setContentView(tv);
    }

    /* A native method that is implemented by the
     * 'helloneon' native library, which is packaged with this application.
     */
    public native String  stringFromJNI();

    /* this is used to load the 'helloneon' library on application
     * startup. The library has already been unpacked into
     * /data/data/com.example.neon/lib/libhelloneon.so at
     * installation time by the package manager.
     */
    static {
        System.loadLibrary("helloneon");
    }

}

3、 保存Hello_NeonProjectActivity.java文件,打开命令行窗口,将其定位到\bin\classes目录下,输入命令:javah –classpath D:\ProgramFiles\Android\android-sdk\platforms\android-9\android.jar;com.hello_neon.android.Hello_NeonProjectActivity ,会在\bin\classes目录下生成com_hello_neon_android_Hello_NeonProjectActivity.h文件(说明:*.jar也可以是其它版本);

4、  选中Hello-Neon工程,点击右键-->New-->Folder新建一个jni文件夹,在此文件夹下添加Android.mk、Application.mk、helloneon.c、helloneon-intrinsics.c、helloneon-intrinsics.h五个文件,其中内容分别为:

Android.mk:

LOCAL_PATH := $(call my-dir)
include $(CLEAR_VARS)
LOCAL_MODULE    := helloneon

#填写要编译的源文件路径
LOCAL_SRC_FILES := helloneon.c helloneon-intrinsics.c

#默认包含的头文件路径
LOCAL_C_INCLUDES := \
$(LOCAL_PATH) \
$(LOCAL_PATH)/..

#-g 后面的一系列项目添加了才能使用arm_neon-h头文件, -mfloat-abi=softfp -mfpu=neon 使用arm_neon.h必须
LOCAL_CFLAGS := -g -mfloat-abi=softfp -mfpu=neon -march=armv7-a -mtune=cortex-a8

LOCAL_LDLIBS := -lz -llog
TARGET_ARCH_ABI := armeabi-v7a 
LOCAL_ARM_MODE := arm

ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
#采用NEON优化技术
    LOCAL_ARM_NEON := true
    #LOCAL_CFLAGS := -DHAVE_NEON=1
endif

LOCAL_STATIC_LIBRARIES := cpufeatures

#生成动态调用库
include $(BUILD_SHARED_LIBRARY)

$(call import-module,cpufeatures)

Application.mk:

APP_PROJECT_PATH := $(call my-dir)/..
APP_PLATFORM := android-10
#choose which library to compile against in your Makefile
APP_STL := stlport_static
#APP_ABI这句指定了编译的目标平台类型,可以针对不同平台进行优化,x86 or armeabi-v7a
# Build both ARMv5TE and ARMv7-A machine code.
APP_ABI := armeabi armeabi-v7a
APP_CPPFLAGS += -fexceptions
#for using c++ features,you need to enable these in your Makefile
APP_CPP_FEATURES += exceptions rtti

helloneon.c:

#include <jni.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <cpu-features.h>
#include "helloneon-intrinsics.h"

#define DEBUG 0
#define HAVE_NEON

#if DEBUG
#include <android/log.h>
#  define  D(x...)  __android_log_print(ANDROID_LOG_INFO,"helloneon",x)
#else
#  define  D(...)  do {} while (0)
#endif

/* return current time in milliseconds */
static double
now_ms(void)
{
    struct timespec res;
    clock_gettime(CLOCK_REALTIME, &res);
    return 1000.0*res.tv_sec + (double)res.tv_nsec/1e6;
}


/* this is a FIR filter implemented in C */
static void
fir_filter_c(short *output, const short* input, const short* kernel, int width, int kernelSize)
{
    int  offset = -kernelSize/2;
    int  nn;
    for (nn = 0; nn < width; nn++) {
        int sum = 0;
        int mm;
        for (mm = 0; mm < kernelSize; mm++) {
            sum += kernel[mm]*input[nn+offset+mm];
        }
        output[nn] = (short)((sum + 0x8000) >> 16);
    }
}

#define  FIR_KERNEL_SIZE   32
#define  FIR_OUTPUT_SIZE   2560
#define  FIR_INPUT_SIZE    (FIR_OUTPUT_SIZE + FIR_KERNEL_SIZE)
#define  FIR_ITERATIONS    600

static const short  fir_kernel[FIR_KERNEL_SIZE] = {
    0x10, 0x20, 0x40, 0x70, 0x8c, 0xa2, 0xce, 0xf0, 0xe9, 0xce, 0xa2, 0x8c, 070, 0x40, 0x20, 0x10,
    0x10, 0x20, 0x40, 0x70, 0x8c, 0xa2, 0xce, 0xf0, 0xe9, 0xce, 0xa2, 0x8c, 070, 0x40, 0x20, 0x10 };

static short        fir_output[FIR_OUTPUT_SIZE];
static short        fir_input_0[FIR_INPUT_SIZE];
static const short* fir_input = fir_input_0 + (FIR_KERNEL_SIZE/2);
static short        fir_output_expected[FIR_OUTPUT_SIZE];

/* This is a trivial JNI example where we use a native method
 * to return a new VM String. See the corresponding Java source
 * file located at:
 *
 *   apps/samples/hello-neon/project/src/com/example/neon/HelloNeon.java
 */
JNIEXPORT jstring JNICALL Java_com_hello_1neon_android_Hello_1NeonProjectActivity_stringFromJNI(JNIEnv *env, jobject thiz)
{
    char*  str;
    uint64_t features;
    char buffer[512];
    char tryNeon = 0;
    double  t0, t1, time_c, time_neon;

    /* setup FIR input - whatever */
    {
        int  nn;
        for (nn = 0; nn < FIR_INPUT_SIZE; nn++) {
            fir_input_0[nn] = (5*nn) & 255;
        }
        fir_filter_c(fir_output_expected, fir_input, fir_kernel, FIR_OUTPUT_SIZE, FIR_KERNEL_SIZE);
    }

    /* Benchmark small FIR filter loop - C version */
    t0 = now_ms();
    {
        int  count = FIR_ITERATIONS;
        for (; count > 0; count--) {
            fir_filter_c(fir_output, fir_input, fir_kernel, FIR_OUTPUT_SIZE, FIR_KERNEL_SIZE);
        }
    }
    t1 = now_ms();
    time_c = t1 - t0;

    asprintf(&str, "FIR Filter benchmark:\nC version          : %g ms\n", time_c);
    strlcpy(buffer, str, sizeof buffer);
    free(str);

    strlcat(buffer, "Neon version   : ", sizeof buffer);

    if (android_getCpuFamily() != ANDROID_CPU_FAMILY_ARM) {
        strlcat(buffer, "Not an ARM CPU !\n", sizeof buffer);
        goto EXIT;
    }

    features = android_getCpuFeatures();
    if ((features & ANDROID_CPU_ARM_FEATURE_ARMv7) == 0) {
        strlcat(buffer, "Not an ARMv7 CPU !\n", sizeof buffer);
        goto EXIT;
    }

    /* HAVE_NEON is defined in Android.mk ! */
#ifdef HAVE_NEON
    if ((features & ANDROID_CPU_ARM_FEATURE_NEON) == 0) {
        strlcat(buffer, "CPU doesn't support NEON !\n", sizeof buffer);
        goto EXIT;
    }

    /* Benchmark small FIR filter loop - Neon version */
    t0 = now_ms();
    {
        int  count = FIR_ITERATIONS;
        for (; count > 0; count--) {
            fir_filter_neon_intrinsics(fir_output, fir_input, fir_kernel, FIR_OUTPUT_SIZE, FIR_KERNEL_SIZE);
        }
    }
    t1 = now_ms();
    time_neon = t1 - t0;
    asprintf(&str, "%g ms (x%g faster)\n", time_neon, time_c / (time_neon < 1e-6 ? 1. : time_neon));
    strlcat(buffer, str, sizeof buffer);
    free(str);

    /* check the result, just in case */
    {
        int  nn, fails = 0;
        for (nn = 0; nn < FIR_OUTPUT_SIZE; nn++) {
            if (fir_output[nn] != fir_output_expected[nn]) {
                if (++fails < 16)
                    D("neon[%d] = %d expected %d", nn, fir_output[nn], fir_output_expected[nn]);
            }
        }
        D("%d fails\n", fails);
    }
#else /* !HAVE_NEON */
    strlcat(buffer, "Program not compiled with ARMv7 support !\n", sizeof buffer);
#endif /* !HAVE_NEON */
EXIT:
    return (*env)->NewStringUTF(env, buffer);
}

helloneon-intrinsics.h:

#ifndef HELLONEON_INTRINSICS_H
#define HELLONEON_INTRINSICS_H

void fir_filter_neon_intrinsics(short *output, const short* input, const short* kernel, int width, int kernelSize);

#endif /* HELLONEON_INTRINSICS_H */

helloneon-intrinsics.c:

#include "helloneon-intrinsics.h"
#include <arm_neon.h>

/* this source file should only be compiled by Android.mk when targeting
 * the armeabi-v7a ABI, and should be built in NEON mode
 */
void
fir_filter_neon_intrinsics(short *output, const short* input, const short* kernel, int width, int kernelSize)
{
#if 1
   int nn, offset = -kernelSize/2;

   for (nn = 0; nn < width; nn++)
   {
        int mm, sum = 0;
        int32x4_t sum_vec = vdupq_n_s32(0);
        for(mm = 0; mm < kernelSize/4; mm++)
        {
            int16x4_t  kernel_vec = vld1_s16(kernel + mm*4);
            int16x4_t  input_vec = vld1_s16(input + (nn+offset+mm*4));
            sum_vec = vmlal_s16(sum_vec, kernel_vec, input_vec);
        }

        sum += vgetq_lane_s32(sum_vec, 0);
        sum += vgetq_lane_s32(sum_vec, 1);
        sum += vgetq_lane_s32(sum_vec, 2);
        sum += vgetq_lane_s32(sum_vec, 3);

        if(kernelSize & 3)
        {
            for(mm = kernelSize - (kernelSize & 3); mm < kernelSize; mm++)
                sum += kernel[mm] * input[nn+offset+mm];
        }

        output[nn] = (short)((sum + 0x8000) >> 16);
    }
#else /* for comparison purposes only */
    int nn, offset = -kernelSize/2;
    for (nn = 0; nn < width; nn++) {
        int sum = 0;
        int mm;
        for (mm = 0; mm < kernelSize; mm++) {
            sum += kernel[mm]*input[nn+offset+mm];
        }
        output[nn] = (short)((sum + 0x8000) >> 16);
    }
#endif
}

5、 利用NDK生成.so文件:选中工程-->Properties-->Builders-->New-->选中Program-->OK,Name:Hello_Neon_Builder,Location: D:\ProgramFiles\Android\android-sdk\android-ndk-r9\ndk-build.cmd,Working Directory: E:\NEON\Eclipse\Hello-Neon -->Apply,选中Refresh,勾选Refreshresources upon completion, 勾选Specific resources,点击Specify Resources…,勾选Hello-Neon工程下的libs文件夹,Finish-->Apply,选中BuildOptions,勾选Allocate Console(necessary for input), After a “Clean”, During manualbuilds, During auto builds, Specify working set of relevant resources,点击SpecifyResoures…,勾选Hello-Neon工程下的jni文件夹,Finish-->Apply-->OK-->OK,会在libs文件夹下生成libhelloneon.so文件;

6、  选中Hello-Neon,-->Run As-->AndroidApplication,运行结果为:

FIRFilter benchmark:

C version       :282.84 ms

Neon version    :135985 ms(x2.07994 faster)

以上是.c文件的操作步骤,若将.c文件该为.cpp文件,则需改动两个文件:

1、将Android.mk改为:

LOCAL_PATH := $(call my-dir)
include $(CLEAR_VARS)
LOCAL_MODULE    := helloneon

#填写要编译的源文件路径
LOCAL_SRC_FILES := helloneon.cpp helloneon-intrinsics.cpp

#默认包含的头文件路径
LOCAL_C_INCLUDES := \
$(LOCAL_PATH) \
$(LOCAL_PATH)/..

#-g 后面的一系列项目添加了才能使用arm_neon-h头文件, -mfloat-abi=softfp -mfpu=neon 使用arm_neon.h必须
LOCAL_CFLAGS := -g -mfloat-abi=softfp -mfpu=neon -march=armv7-a -mtune=cortex-a8

LOCAL_LDLIBS := -lz -llog
TARGET_ARCH_ABI := armeabi-v7a 
LOCAL_ARM_MODE := arm

ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
#采用NEON优化技术
    LOCAL_ARM_NEON := true
    #LOCAL_CFLAGS := -DHAVE_NEON=1
endif

LOCAL_STATIC_LIBRARIES := cpufeatures

#生成动态调用库
include $(BUILD_SHARED_LIBRARY)

$(call import-module,cpufeatures)

2、helloneon.c改为:

#include <jni.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <cpu-features.h>
#include "helloneon-intrinsics.h"

#define DEBUG 0
#define HAVE_NEON

#ifdef __cplusplus
extern "C" {
#endif

#if DEBUG
#include <android/log.h>
#  define  D(x...)  __android_log_print(ANDROID_LOG_INFO,"helloneon",x)
#else
#  define  D(...)  do {} while (0)
#endif

/* return current time in milliseconds */
static double
now_ms(void)
{
    struct timespec res;
    clock_gettime(CLOCK_REALTIME, &res);
    return 1000.0*res.tv_sec + (double)res.tv_nsec/1e6;
}


/* this is a FIR filter implemented in C */
static void
fir_filter_c(short *output, const short* input, const short* kernel, int width, int kernelSize)
{
    int  offset = -kernelSize/2;
    int  nn;
    for (nn = 0; nn < width; nn++) {
        int sum = 0;
        int mm;
        for (mm = 0; mm < kernelSize; mm++) {
            sum += kernel[mm]*input[nn+offset+mm];
        }
        output[nn] = (short)((sum + 0x8000) >> 16);
    }
}

#define  FIR_KERNEL_SIZE   32
#define  FIR_OUTPUT_SIZE   2560
#define  FIR_INPUT_SIZE    (FIR_OUTPUT_SIZE + FIR_KERNEL_SIZE)
#define  FIR_ITERATIONS    600

static const short  fir_kernel[FIR_KERNEL_SIZE] = {
    0x10, 0x20, 0x40, 0x70, 0x8c, 0xa2, 0xce, 0xf0, 0xe9, 0xce, 0xa2, 0x8c, 070, 0x40, 0x20, 0x10,
    0x10, 0x20, 0x40, 0x70, 0x8c, 0xa2, 0xce, 0xf0, 0xe9, 0xce, 0xa2, 0x8c, 070, 0x40, 0x20, 0x10 };

static short        fir_output[FIR_OUTPUT_SIZE];
static short        fir_input_0[FIR_INPUT_SIZE];
static const short* fir_input = fir_input_0 + (FIR_KERNEL_SIZE/2);
static short        fir_output_expected[FIR_OUTPUT_SIZE];

/* This is a trivial JNI example where we use a native method
 * to return a new VM String. See the corresponding Java source
 * file located at:
 *
 *   apps/samples/hello-neon/project/src/com/example/neon/HelloNeon.java
 */
JNIEXPORT jstring JNICALL Java_com_hello_1neon_android_Hello_1NeonProjectActivity_stringFromJNI(JNIEnv *env, jobject thiz)
{
    char str[512] = {0};
    uint64_t features;
    char buffer[512];
    char tryNeon = 0;
    double  t0, t1, time_c, time_neon;

    /* setup FIR input - whatever */
    {
        int  nn;
        for (nn = 0; nn < FIR_INPUT_SIZE; nn++) {
            fir_input_0[nn] = (5*nn) & 255;
        }
        fir_filter_c(fir_output_expected, fir_input, fir_kernel, FIR_OUTPUT_SIZE, FIR_KERNEL_SIZE);
    }

    /* Benchmark small FIR filter loop - C version */
    t0 = now_ms();
    {
        int  count = FIR_ITERATIONS;
        for (; count > 0; count--) {
            fir_filter_c(fir_output, fir_input, fir_kernel, FIR_OUTPUT_SIZE, FIR_KERNEL_SIZE);
        }
    }
    t1 = now_ms();
    time_c = t1 - t0;

    sprintf(str, "FIR Filter benchmark:\nC version          : %g ms\n", time_c);
    strlcpy(buffer, str, sizeof buffer);

    strlcat(buffer, "Neon version   : ", sizeof buffer);

    if (android_getCpuFamily() != ANDROID_CPU_FAMILY_ARM) {
        strlcat(buffer, "Not an ARM CPU !\n", sizeof buffer);
        goto EXIT;
    }

    features = android_getCpuFeatures();
    if ((features & ANDROID_CPU_ARM_FEATURE_ARMv7) == 0) {
        strlcat(buffer, "Not an ARMv7 CPU !\n", sizeof buffer);
        goto EXIT;
    }

    /* HAVE_NEON is defined in Android.mk ! */
#ifdef HAVE_NEON
    if ((features & ANDROID_CPU_ARM_FEATURE_NEON) == 0) {
        strlcat(buffer, "CPU doesn't support NEON !\n", sizeof buffer);
        goto EXIT;
    }

    /* Benchmark small FIR filter loop - Neon version */
    t0 = now_ms();
    {
        int  count = FIR_ITERATIONS;
        for (; count > 0; count--) {
            fir_filter_neon_intrinsics(fir_output, fir_input, fir_kernel, FIR_OUTPUT_SIZE, FIR_KERNEL_SIZE);
        }
    }
    t1 = now_ms();
    time_neon = t1 - t0;
    sprintf(str, "%g ms (x%g faster)\n", time_neon, time_c / (time_neon < 1e-6 ? 1. : time_neon));
    strlcat(buffer, str, sizeof buffer);

    /* check the result, just in case */
    {
        int  nn, fails = 0;
        for (nn = 0; nn < FIR_OUTPUT_SIZE; nn++) {
            if (fir_output[nn] != fir_output_expected[nn]) {
                if (++fails < 16)
                    D("neon[%d] = %d expected %d", nn, fir_output[nn], fir_output_expected[nn]);
            }
        }
        D("%d fails\n", fails);
    }
#else /* !HAVE_NEON */
    strlcat(buffer, "Program not compiled with ARMv7 support !\n", sizeof buffer);
#endif /* !HAVE_NEON */
EXIT:
    return env->NewStringUTF(buffer);
}

#ifdef __cplusplus
}
#endif


参考文献:

1、  http://blog.csdn.net/fengbingchun/article/details/11580983

2、  android-ndk-r9-windows-x86_64中的hello-neon例子代码


你可能感兴趣的:(NEON在Android中的使用举例)