这里不做详细解释,网上有很多教程。
1、打开FPU,需要在编译器的Options中配置相关参数。因为STM32H7系列是双精度的FPU,所以这里选择Double Precision。
2、配置预处理符号:
为了方便各位大侠拷贝,这里贴出来具体的配置语句:ARM_MATH_CM7,__CC_ARM,ARM_MATH_MATRIX_CHECK,ARM_MATH_ROUNDING
3、移植DSP库进入代码中,我这里使用封装成lib库的dsp库,各位也可以选择源码xx.c
加入工程后的样子
注:DSP库 各位可以在st官网下载!
4、加入头文件的链接
到这里为止,所有的配置都弄好了,接下来准备撸代码!
关于加入固件库更基础知识这里不做介绍,各位大侠可以在网上找到很多教程。
void TIM5_Int_Init(uint32 arr,uint16 psc)
{
volatile TIM_HandleTypeDef TIM5_Handler; //定时器句柄
__HAL_RCC_TIM5_CLK_ENABLE(); //使能TIM5时钟
TIM5_Handler.Instance = TIM5; //通用定时器5
TIM5_Handler.Init.Prescaler = psc; //分频
TIM5_Handler.Init.Period = arr; //自动装载值
TIM5_Handler.Init.CounterMode = TIM_COUNTERMODE_UP; //向上计数器
TIM5_Handler.Init.ClockDivision = TIM_CLOCKDIVISION_DIV1; //时钟分频因子
TIM5_Handler.Init.AutoReloadPreload = TIM_AUTORELOAD_PRELOAD_ENABLE; //使能自动预装载
HAL_TIM_Base_DeInit((TIM_HandleTypeDef*)&TIM5_Handler);
HAL_TIM_Base_Init((TIM_HandleTypeDef*)&TIM5_Handler);
HAL_TIM_Base_Start((TIM_HandleTypeDef *)&TIM5_Handler);
}
1、测试的功能函数
#define APB1_TIME5_CLK_MHZ (uint8)(200)
#define TIME5_ARR_VALUE (uint32) (60000)
#define TIME_BUFF_NUM (uint8)(10)
float real_time_us[TIME_BUFF_NUM];
void MathFunc_Machine_Cycle(void)
{
unsigned int cnt_buff[2];
unsigned int time = 0;
volatile float math_temp[10] ;
volatile unsigned int math_itemp[10] ;
for(uint8 ii=0;ii TIM5->CNT = 0; } if(cnt_buff[1] > cnt_buff[0]){ time = cnt_buff[1] - cnt_buff[0]; } else{ time = cnt_buff[1] + TIME5_ARR_VALUE - cnt_buff[0]; } real_time_us[ii] = (float)time/APB1_TIME5_CLK_MHZ; } } 若想测试FFT、sqrt等功能所消耗的机器周期 直接在上面的测试代码中添加即可! 若想要测试的更准确,可在统计时间内 执行多次语句,之后取平均值。 2、main 函数 int main(void) { MPU_Memory_Protection(); Cache_Enable(); //打开L1-Cache HAL_Init(); //初始化HAL库 Stm32_Clock_Init(160,5,2,2); //设置时钟,400Mhz __HAL_RCC_SYSCFG_CLK_ENABLE(); __HAL_RCC_VREF_CLK_ENABLE(); TIM5_Int_Init(60000,0); // 60000/200 us = 300us while(1) { MathFunc_Machine_Cycle(); } } Optimization:level 0 从实验数据可知 加减乘除所耗费的时间基本在10个机器周期,sin和cos则消耗的时间比较多。 其中stm32提供的arm_sin_f32 和arm_cos_f32 比C语言标准库的sin、cos运行更快。 为什么arm_sin_f32比arm_cos_f32快呢?贴出源码对比分析一下,发现arm_sin_f32比arm_cos_f32多一个判断语句,少一个加法运算。这样就可以解释的通了!
switch(ii){
case 0:
cnt_buff[0] = TIM5->CNT;
math_itemp[0] = 1000+2;
math_itemp[1] = 1000+2;
math_itemp[2] = 1000+2;
math_itemp[3] = 1000+2;
math_itemp[4] = 1000+2;
math_itemp[5] = 1000+2;
math_itemp[6] = 1000+2;
math_itemp[7] = 1000+2;
math_itemp[8] = 1000+2;
math_itemp[9] = 1000+2;
cnt_buff[1] = TIM5->CNT;
break;
case 1:
cnt_buff[0] = TIM5->CNT;
math_itemp[0] = 1000-2;
math_itemp[1] = 1000-2;
math_itemp[2] = 1000-2;
math_itemp[3] = 1000-2;
math_itemp[4] = 1000-2;
math_itemp[5] = 1000-2;
math_itemp[6] = 1000-2;
math_itemp[7] = 1000-2;
math_itemp[8] = 1000-2;
math_itemp[9] = 1000-2;
cnt_buff[1] = TIM5->CNT;
break;
case 2:
cnt_buff[0] = TIM5->CNT;
math_temp[0] = 1.57097654+2.354;
math_temp[1] = 1.57097654+2.354;
math_temp[2] = 1.57097654+2.354;
math_temp[3] = 1.57097654+2.354;
math_temp[4] = 1.57097654+2.354;
math_temp[5] = 1.57097654+2.354;
math_temp[6] = 1.57097654+2.354;
math_temp[7] = 1.57097654+2.354;
math_temp[8] = 1.57097654+2.354;
math_temp[9] = 1.57097654+2.354;
cnt_buff[1] = TIM5->CNT;
break;
case 3:
cnt_buff[0] = TIM5->CNT;
math_temp[0] = 1.57097654-2.354;
math_temp[1] = 1.57097654-2.354;
math_temp[2] = 1.57097654-2.354;
math_temp[3] = 1.57097654-2.354;
math_temp[4] = 1.57097654-2.354;
math_temp[5] = 1.57097654-2.354;
math_temp[6] = 1.57097654-2.354;
math_temp[7] = 1.57097654-2.354;
math_temp[8] = 1.57097654-2.354;
math_temp[9] = 1.57097654-2.354;
cnt_buff[1] = TIM5->CNT;
break;
case 4:
cnt_buff[0] = TIM5->CNT;
math_temp[0] = 1.57097654*2.354;
math_temp[1] = 1.57097654*2.354;
math_temp[2] = 1.57097654*2.354;
math_temp[3] = 1.57097654*2.354;
math_temp[4] = 1.57097654*2.354;
math_temp[5] = 1.57097654*2.354;
math_temp[6] = 1.57097654*2.354;
math_temp[7] = 1.57097654*2.354;
math_temp[8] = 1.57097654*2.354;
math_temp[9] = 1.57097654*2.354;
cnt_buff[1] = TIM5->CNT;
break;
case 5:
cnt_buff[0] = TIM5->CNT;
math_temp[0] = 1.57097654/2.354;
math_temp[1] = 1.57097654/2.354;
math_temp[2] = 1.57097654/2.354;
math_temp[3] = 1.57097654/2.354;
math_temp[4] = 1.57097654/2.354;
math_temp[5] = 1.57097654/2.354;
math_temp[6] = 1.57097654/2.354;
math_temp[7] = 1.57097654/2.354;
math_temp[8] = 1.57097654/2.354;
math_temp[9] = 1.57097654/2.354;
cnt_buff[1] = TIM5->CNT;
break;
case 6:
cnt_buff[0] = TIM5->CNT;
math_temp[0] = arm_cos_f32(1.57097654);
math_temp[1] = arm_cos_f32(1.57097654);
math_temp[2] = arm_cos_f32(1.57097654);
math_temp[3] = arm_cos_f32(1.57097654);
math_temp[4] = arm_cos_f32(1.57097654);
math_temp[5] = arm_cos_f32(1.57097654);
math_temp[6] = arm_cos_f32(1.57097654);
math_temp[7] = arm_cos_f32(1.57097654);
math_temp[8] = arm_cos_f32(1.57097654);
math_temp[9] = arm_cos_f32(1.57097654);
cnt_buff[1] = TIM5->CNT;
break;
case 7:
cnt_buff[0] = TIM5->CNT;
math_temp[0] = arm_sin_f32(1.57097654);
math_temp[1] = arm_sin_f32(1.57097654);
math_temp[2] = arm_sin_f32(1.57097654);
math_temp[3] = arm_sin_f32(1.57097654);
math_temp[4] = arm_sin_f32(1.57097654);
math_temp[5] = arm_sin_f32(1.57097654);
math_temp[6] = arm_sin_f32(1.57097654);
math_temp[7] = arm_sin_f32(1.57097654);
math_temp[8] = arm_sin_f32(1.57097654);
math_temp[9] = arm_sin_f32(1.57097654);
cnt_buff[1] = TIM5->CNT;
break;
case 8:
cnt_buff[0] = TIM5->CNT;
math_temp[0] = cos(1.57097654);
math_temp[1] = cos(1.57097654);
math_temp[2] = cos(1.57097654);
math_temp[3] = cos(1.57097654);
math_temp[4] = cos(1.57097654);
math_temp[5] = cos(1.57097654);
math_temp[6] = cos(1.57097654);
math_temp[7] = cos(1.57097654);
math_temp[8] = cos(1.57097654);
math_temp[9] = cos(1.57097654);
cnt_buff[1] = TIM5->CNT;
break;
case 9:
cnt_buff[0] = TIM5->CNT;
math_temp[0] = sin(1.57097654);
math_temp[1] = sin(1.57097654);
math_temp[2] = sin(1.57097654);
math_temp[3] = sin(1.57097654);
math_temp[4] = sin(1.57097654);
math_temp[5] = sin(1.57097654);
math_temp[6] = sin(1.57097654);
math_temp[7] = sin(1.57097654);
math_temp[8] = sin(1.57097654);
math_temp[9] = sin(1.57097654);
cnt_buff[1] = TIM5->CNT;
break;
default:
break; 五、实验数据
六、结论