Neon ARM Quick Guide

Neon

See more detailed instructions on this web
While using armv7, all data address must be 16-aligned

Sample


#include 

float[N] a;
//=======Use variable========
//base struct, a vector stores 4 float_t
float32x4_t float_vector;
//fetch a vector from an array
float_vector = vld1q_f32(&a[i+offset]);
//store all values to array, use them by using t[i]
float t[4];
vst1q_f32(t, v4_1);

//======Use pointer(good)=====
float32x4_t *v_p;
float32x4_t v_ans;
v_p = (float32x4_t*)(&a[i+offset]);
//v_ans += v_p * int_a
v_ans = vmlaq_n_f32(v_ans, *v_p, int_a); 
offset+=4;
g++ filename.cpp -mfpu=neon

inline NEON

size = ((size - 1) & -64) + 64;
asm volatile (
    "NEONCopy2: \n"
    "   ld1 {V0.4S}, [%1]\n"
    "   st1 {V0.4S}, [%0]\n"
    "   adds %1, %1, #0x10\n"
    "   adds %0, %0, #0x10\n"
    "   SUBS %2,%2,#0x10 \n"
    "   BGT NEONCopy2  \n"
    :"+r"(dst)
    :"r"(src), "r"(size)
);

你可能感兴趣的:(Linux)