1、 打开Eclipse,File-->New-->AndroidApplication Project-->Application Name:Hello-Neon, Project Name: Hello-Neon,Package Name:com.hello_neon.android, Minimum Required SDK:API 9:Android 2.3(Gingerbread),Next-->去掉Create custom launcher icon的勾选,Next-->Next-->ActivityName:Hello_NeonProjectActivity,Finish.
2、 打开Hello-Neon工程下的src-->com.hello_neon.android-->Hello_NeonProjectActivity.java,将其内容改为:
package com.hello_neon.android; import android.os.Bundle; import android.app.Activity; import android.widget.TextView; public class Hello_NeonProjectActivity extends Activity { /** Called when the activity is first created. */ @Override public void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); /* Create a TextView and set its content. * the text is retrieved by calling a native function. */ TextView tv = new TextView(this); tv.setText( stringFromJNI() ); setContentView(tv); } /* A native method that is implemented by the * 'helloneon' native library, which is packaged with this application. */ public native String stringFromJNI(); /* this is used to load the 'helloneon' library on application * startup. The library has already been unpacked into * /data/data/com.example.neon/lib/libhelloneon.so at * installation time by the package manager. */ static { System.loadLibrary("helloneon"); } }
3、 保存Hello_NeonProjectActivity.java文件,打开命令行窗口,将其定位到\bin\classes目录下,输入命令:javah –classpath D:\ProgramFiles\Android\android-sdk\platforms\android-9\android.jar;com.hello_neon.android.Hello_NeonProjectActivity ,会在\bin\classes目录下生成com_hello_neon_android_Hello_NeonProjectActivity.h文件(说明:*.jar也可以是其它版本);
4、 选中Hello-Neon工程,点击右键-->New-->Folder新建一个jni文件夹,在此文件夹下添加Android.mk、Application.mk、helloneon.c、helloneon-intrinsics.c、helloneon-intrinsics.h五个文件,其中内容分别为:
Android.mk:
LOCAL_PATH := $(call my-dir) include $(CLEAR_VARS) LOCAL_MODULE := helloneon #填写要编译的源文件路径 LOCAL_SRC_FILES := helloneon.c helloneon-intrinsics.c #默认包含的头文件路径 LOCAL_C_INCLUDES := \ $(LOCAL_PATH) \ $(LOCAL_PATH)/.. #-g 后面的一系列项目添加了才能使用arm_neon-h头文件, -mfloat-abi=softfp -mfpu=neon 使用arm_neon.h必须 LOCAL_CFLAGS := -g -mfloat-abi=softfp -mfpu=neon -march=armv7-a -mtune=cortex-a8 LOCAL_LDLIBS := -lz -llog TARGET_ARCH_ABI := armeabi-v7a LOCAL_ARM_MODE := arm ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) #采用NEON优化技术 LOCAL_ARM_NEON := true #LOCAL_CFLAGS := -DHAVE_NEON=1 endif LOCAL_STATIC_LIBRARIES := cpufeatures #生成动态调用库 include $(BUILD_SHARED_LIBRARY) $(call import-module,cpufeatures)
APP_PROJECT_PATH := $(call my-dir)/.. APP_PLATFORM := android-10 #choose which library to compile against in your Makefile APP_STL := stlport_static #APP_ABI这句指定了编译的目标平台类型,可以针对不同平台进行优化,x86 or armeabi-v7a # Build both ARMv5TE and ARMv7-A machine code. APP_ABI := armeabi armeabi-v7a APP_CPPFLAGS += -fexceptions #for using c++ features,you need to enable these in your Makefile APP_CPP_FEATURES += exceptions rtti
#include <jni.h> #include <time.h> #include <stdio.h> #include <stdlib.h> #include <cpu-features.h> #include "helloneon-intrinsics.h" #define DEBUG 0 #define HAVE_NEON #if DEBUG #include <android/log.h> # define D(x...) __android_log_print(ANDROID_LOG_INFO,"helloneon",x) #else # define D(...) do {} while (0) #endif /* return current time in milliseconds */ static double now_ms(void) { struct timespec res; clock_gettime(CLOCK_REALTIME, &res); return 1000.0*res.tv_sec + (double)res.tv_nsec/1e6; } /* this is a FIR filter implemented in C */ static void fir_filter_c(short *output, const short* input, const short* kernel, int width, int kernelSize) { int offset = -kernelSize/2; int nn; for (nn = 0; nn < width; nn++) { int sum = 0; int mm; for (mm = 0; mm < kernelSize; mm++) { sum += kernel[mm]*input[nn+offset+mm]; } output[nn] = (short)((sum + 0x8000) >> 16); } } #define FIR_KERNEL_SIZE 32 #define FIR_OUTPUT_SIZE 2560 #define FIR_INPUT_SIZE (FIR_OUTPUT_SIZE + FIR_KERNEL_SIZE) #define FIR_ITERATIONS 600 static const short fir_kernel[FIR_KERNEL_SIZE] = { 0x10, 0x20, 0x40, 0x70, 0x8c, 0xa2, 0xce, 0xf0, 0xe9, 0xce, 0xa2, 0x8c, 070, 0x40, 0x20, 0x10, 0x10, 0x20, 0x40, 0x70, 0x8c, 0xa2, 0xce, 0xf0, 0xe9, 0xce, 0xa2, 0x8c, 070, 0x40, 0x20, 0x10 }; static short fir_output[FIR_OUTPUT_SIZE]; static short fir_input_0[FIR_INPUT_SIZE]; static const short* fir_input = fir_input_0 + (FIR_KERNEL_SIZE/2); static short fir_output_expected[FIR_OUTPUT_SIZE]; /* This is a trivial JNI example where we use a native method * to return a new VM String. See the corresponding Java source * file located at: * * apps/samples/hello-neon/project/src/com/example/neon/HelloNeon.java */ JNIEXPORT jstring JNICALL Java_com_hello_1neon_android_Hello_1NeonProjectActivity_stringFromJNI(JNIEnv *env, jobject thiz) { char* str; uint64_t features; char buffer[512]; char tryNeon = 0; double t0, t1, time_c, time_neon; /* setup FIR input - whatever */ { int nn; for (nn = 0; nn < FIR_INPUT_SIZE; nn++) { fir_input_0[nn] = (5*nn) & 255; } fir_filter_c(fir_output_expected, fir_input, fir_kernel, FIR_OUTPUT_SIZE, FIR_KERNEL_SIZE); } /* Benchmark small FIR filter loop - C version */ t0 = now_ms(); { int count = FIR_ITERATIONS; for (; count > 0; count--) { fir_filter_c(fir_output, fir_input, fir_kernel, FIR_OUTPUT_SIZE, FIR_KERNEL_SIZE); } } t1 = now_ms(); time_c = t1 - t0; asprintf(&str, "FIR Filter benchmark:\nC version : %g ms\n", time_c); strlcpy(buffer, str, sizeof buffer); free(str); strlcat(buffer, "Neon version : ", sizeof buffer); if (android_getCpuFamily() != ANDROID_CPU_FAMILY_ARM) { strlcat(buffer, "Not an ARM CPU !\n", sizeof buffer); goto EXIT; } features = android_getCpuFeatures(); if ((features & ANDROID_CPU_ARM_FEATURE_ARMv7) == 0) { strlcat(buffer, "Not an ARMv7 CPU !\n", sizeof buffer); goto EXIT; } /* HAVE_NEON is defined in Android.mk ! */ #ifdef HAVE_NEON if ((features & ANDROID_CPU_ARM_FEATURE_NEON) == 0) { strlcat(buffer, "CPU doesn't support NEON !\n", sizeof buffer); goto EXIT; } /* Benchmark small FIR filter loop - Neon version */ t0 = now_ms(); { int count = FIR_ITERATIONS; for (; count > 0; count--) { fir_filter_neon_intrinsics(fir_output, fir_input, fir_kernel, FIR_OUTPUT_SIZE, FIR_KERNEL_SIZE); } } t1 = now_ms(); time_neon = t1 - t0; asprintf(&str, "%g ms (x%g faster)\n", time_neon, time_c / (time_neon < 1e-6 ? 1. : time_neon)); strlcat(buffer, str, sizeof buffer); free(str); /* check the result, just in case */ { int nn, fails = 0; for (nn = 0; nn < FIR_OUTPUT_SIZE; nn++) { if (fir_output[nn] != fir_output_expected[nn]) { if (++fails < 16) D("neon[%d] = %d expected %d", nn, fir_output[nn], fir_output_expected[nn]); } } D("%d fails\n", fails); } #else /* !HAVE_NEON */ strlcat(buffer, "Program not compiled with ARMv7 support !\n", sizeof buffer); #endif /* !HAVE_NEON */ EXIT: return (*env)->NewStringUTF(env, buffer); }
helloneon-intrinsics.h:
#ifndef HELLONEON_INTRINSICS_H #define HELLONEON_INTRINSICS_H void fir_filter_neon_intrinsics(short *output, const short* input, const short* kernel, int width, int kernelSize); #endif /* HELLONEON_INTRINSICS_H */
#include "helloneon-intrinsics.h" #include <arm_neon.h> /* this source file should only be compiled by Android.mk when targeting * the armeabi-v7a ABI, and should be built in NEON mode */ void fir_filter_neon_intrinsics(short *output, const short* input, const short* kernel, int width, int kernelSize) { #if 1 int nn, offset = -kernelSize/2; for (nn = 0; nn < width; nn++) { int mm, sum = 0; int32x4_t sum_vec = vdupq_n_s32(0); for(mm = 0; mm < kernelSize/4; mm++) { int16x4_t kernel_vec = vld1_s16(kernel + mm*4); int16x4_t input_vec = vld1_s16(input + (nn+offset+mm*4)); sum_vec = vmlal_s16(sum_vec, kernel_vec, input_vec); } sum += vgetq_lane_s32(sum_vec, 0); sum += vgetq_lane_s32(sum_vec, 1); sum += vgetq_lane_s32(sum_vec, 2); sum += vgetq_lane_s32(sum_vec, 3); if(kernelSize & 3) { for(mm = kernelSize - (kernelSize & 3); mm < kernelSize; mm++) sum += kernel[mm] * input[nn+offset+mm]; } output[nn] = (short)((sum + 0x8000) >> 16); } #else /* for comparison purposes only */ int nn, offset = -kernelSize/2; for (nn = 0; nn < width; nn++) { int sum = 0; int mm; for (mm = 0; mm < kernelSize; mm++) { sum += kernel[mm]*input[nn+offset+mm]; } output[nn] = (short)((sum + 0x8000) >> 16); } #endif }
5、 利用NDK生成.so文件:选中工程-->Properties-->Builders-->New-->选中Program-->OK,Name:Hello_Neon_Builder,Location: D:\ProgramFiles\Android\android-sdk\android-ndk-r9\ndk-build.cmd,Working Directory: E:\NEON\Eclipse\Hello-Neon -->Apply,选中Refresh,勾选Refreshresources upon completion, 勾选Specific resources,点击Specify Resources…,勾选Hello-Neon工程下的libs文件夹,Finish-->Apply,选中BuildOptions,勾选Allocate Console(necessary for input), After a “Clean”, During manualbuilds, During auto builds, Specify working set of relevant resources,点击SpecifyResoures…,勾选Hello-Neon工程下的jni文件夹,Finish-->Apply-->OK-->OK,会在libs文件夹下生成libhelloneon.so文件;
6、 选中Hello-Neon,-->Run As-->AndroidApplication,运行结果为:
FIRFilter benchmark:
C version :282.84 ms
Neon version :135985 ms(x2.07994 faster)
以上是.c文件的操作步骤,若将.c文件该为.cpp文件,则需改动两个文件:
1、将Android.mk改为:LOCAL_PATH := $(call my-dir) include $(CLEAR_VARS) LOCAL_MODULE := helloneon #填写要编译的源文件路径 LOCAL_SRC_FILES := helloneon.cpp helloneon-intrinsics.cpp #默认包含的头文件路径 LOCAL_C_INCLUDES := \ $(LOCAL_PATH) \ $(LOCAL_PATH)/.. #-g 后面的一系列项目添加了才能使用arm_neon-h头文件, -mfloat-abi=softfp -mfpu=neon 使用arm_neon.h必须 LOCAL_CFLAGS := -g -mfloat-abi=softfp -mfpu=neon -march=armv7-a -mtune=cortex-a8 LOCAL_LDLIBS := -lz -llog TARGET_ARCH_ABI := armeabi-v7a LOCAL_ARM_MODE := arm ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) #采用NEON优化技术 LOCAL_ARM_NEON := true #LOCAL_CFLAGS := -DHAVE_NEON=1 endif LOCAL_STATIC_LIBRARIES := cpufeatures #生成动态调用库 include $(BUILD_SHARED_LIBRARY) $(call import-module,cpufeatures)
#include <jni.h> #include <time.h> #include <stdio.h> #include <stdlib.h> #include <cpu-features.h> #include "helloneon-intrinsics.h" #define DEBUG 0 #define HAVE_NEON #ifdef __cplusplus extern "C" { #endif #if DEBUG #include <android/log.h> # define D(x...) __android_log_print(ANDROID_LOG_INFO,"helloneon",x) #else # define D(...) do {} while (0) #endif /* return current time in milliseconds */ static double now_ms(void) { struct timespec res; clock_gettime(CLOCK_REALTIME, &res); return 1000.0*res.tv_sec + (double)res.tv_nsec/1e6; } /* this is a FIR filter implemented in C */ static void fir_filter_c(short *output, const short* input, const short* kernel, int width, int kernelSize) { int offset = -kernelSize/2; int nn; for (nn = 0; nn < width; nn++) { int sum = 0; int mm; for (mm = 0; mm < kernelSize; mm++) { sum += kernel[mm]*input[nn+offset+mm]; } output[nn] = (short)((sum + 0x8000) >> 16); } } #define FIR_KERNEL_SIZE 32 #define FIR_OUTPUT_SIZE 2560 #define FIR_INPUT_SIZE (FIR_OUTPUT_SIZE + FIR_KERNEL_SIZE) #define FIR_ITERATIONS 600 static const short fir_kernel[FIR_KERNEL_SIZE] = { 0x10, 0x20, 0x40, 0x70, 0x8c, 0xa2, 0xce, 0xf0, 0xe9, 0xce, 0xa2, 0x8c, 070, 0x40, 0x20, 0x10, 0x10, 0x20, 0x40, 0x70, 0x8c, 0xa2, 0xce, 0xf0, 0xe9, 0xce, 0xa2, 0x8c, 070, 0x40, 0x20, 0x10 }; static short fir_output[FIR_OUTPUT_SIZE]; static short fir_input_0[FIR_INPUT_SIZE]; static const short* fir_input = fir_input_0 + (FIR_KERNEL_SIZE/2); static short fir_output_expected[FIR_OUTPUT_SIZE]; /* This is a trivial JNI example where we use a native method * to return a new VM String. See the corresponding Java source * file located at: * * apps/samples/hello-neon/project/src/com/example/neon/HelloNeon.java */ JNIEXPORT jstring JNICALL Java_com_hello_1neon_android_Hello_1NeonProjectActivity_stringFromJNI(JNIEnv *env, jobject thiz) { char str[512] = {0}; uint64_t features; char buffer[512]; char tryNeon = 0; double t0, t1, time_c, time_neon; /* setup FIR input - whatever */ { int nn; for (nn = 0; nn < FIR_INPUT_SIZE; nn++) { fir_input_0[nn] = (5*nn) & 255; } fir_filter_c(fir_output_expected, fir_input, fir_kernel, FIR_OUTPUT_SIZE, FIR_KERNEL_SIZE); } /* Benchmark small FIR filter loop - C version */ t0 = now_ms(); { int count = FIR_ITERATIONS; for (; count > 0; count--) { fir_filter_c(fir_output, fir_input, fir_kernel, FIR_OUTPUT_SIZE, FIR_KERNEL_SIZE); } } t1 = now_ms(); time_c = t1 - t0; sprintf(str, "FIR Filter benchmark:\nC version : %g ms\n", time_c); strlcpy(buffer, str, sizeof buffer); strlcat(buffer, "Neon version : ", sizeof buffer); if (android_getCpuFamily() != ANDROID_CPU_FAMILY_ARM) { strlcat(buffer, "Not an ARM CPU !\n", sizeof buffer); goto EXIT; } features = android_getCpuFeatures(); if ((features & ANDROID_CPU_ARM_FEATURE_ARMv7) == 0) { strlcat(buffer, "Not an ARMv7 CPU !\n", sizeof buffer); goto EXIT; } /* HAVE_NEON is defined in Android.mk ! */ #ifdef HAVE_NEON if ((features & ANDROID_CPU_ARM_FEATURE_NEON) == 0) { strlcat(buffer, "CPU doesn't support NEON !\n", sizeof buffer); goto EXIT; } /* Benchmark small FIR filter loop - Neon version */ t0 = now_ms(); { int count = FIR_ITERATIONS; for (; count > 0; count--) { fir_filter_neon_intrinsics(fir_output, fir_input, fir_kernel, FIR_OUTPUT_SIZE, FIR_KERNEL_SIZE); } } t1 = now_ms(); time_neon = t1 - t0; sprintf(str, "%g ms (x%g faster)\n", time_neon, time_c / (time_neon < 1e-6 ? 1. : time_neon)); strlcat(buffer, str, sizeof buffer); /* check the result, just in case */ { int nn, fails = 0; for (nn = 0; nn < FIR_OUTPUT_SIZE; nn++) { if (fir_output[nn] != fir_output_expected[nn]) { if (++fails < 16) D("neon[%d] = %d expected %d", nn, fir_output[nn], fir_output_expected[nn]); } } D("%d fails\n", fails); } #else /* !HAVE_NEON */ strlcat(buffer, "Program not compiled with ARMv7 support !\n", sizeof buffer); #endif /* !HAVE_NEON */ EXIT: return env->NewStringUTF(buffer); } #ifdef __cplusplus } #endif
参考文献:
1、 http://blog.csdn.net/fengbingchun/article/details/11580983
2、 android-ndk-r9-windows-x86_64中的hello-neon例子代码