c语言函数参数太多对性能是否有影响?

64位汇编(linux)
当参数少于7个时, 参数从左到右放入寄存器: rdi, rsi, rdx, rcx, r8, r9。
当参数为7个以上时, 前 6 个与前面一样, 但后面的依次从 “右向左” 放入栈中,即和32位汇编一样。
参数个数大于 7 个的时候
H(a, b, c, d, e, f, g, h);
a->%rdi, b->%rsi, c->%rdx, d->%rcx, e->%r8, f->%r9
h->8(%esp)
g->(%esp)
call H


一般考虑用结构体来打包逻辑上相关的变量,传递给函数从而减少参数个数,那么这种方式对性能提升有帮助嘛?写了一个测试函数用来分析:

#include
#include
#include

typedef struct fun_p{
  char c;
  int  a;
  int  b;
  int  d;
  long f;
}_funp;


int func(int aa, int bb, int cc, int dd, char c, int a, int b, int d, double f)
{
  int ret = 0;
  if(c > 10)
	  ret = c*c + a + b + d + f * 2;
  else
	  ret = a + b + d + f * 2;
  return ret + aa + bb + cc + dd;
}

int funcs(int aa, int bb, int cc, int dd, _funp * inp)
{
  int ret = 0;
  char c = inp->c;
  int  a = inp->a;
  int  b = inp->b;
  int  d = inp->d;
  double f = inp->f;
  if(c > 10)
      ret = c*c + a + b + d + f * 2;
  else
      ret = a + b + d + f * 2;
  return ret + aa + bb + cc + dd;
}

int funcs2(int aa, int bb, int cc, int dd, _funp * inp)
{
  int ret = 0;
  char c = inp->c;
  if(c > 10)
      ret = c*c    + inp->a + inp->b + inp->d + inp->f * 2;
  else
      ret = inp->a + inp->b + inp->d + inp->f * 2;
  return ret + aa + bb + cc + dd;
}


int main(void)
{
  struct timespec time_start = {0, 0}, time_end = {0, 0};
  int aa = 9;
  int bb = 12;
  int cc = 39;
  int dd = 90;
  char c = 20;
  int a = 98;
  int b = 199;
  int d = 23;
  double f = 34.2;
  struct fun_p funp;
  int i, j;
  int run_num = 1000;
  long long total = 0;
  funp.c = c;
  funp.a = a;
  funp.b = b;
  funp.d = d;
  funp.f = f;

  clock_gettime(CLOCK_REALTIME, &time_start);
  for (i = 0; i < run_num; i++)
    for (j = 0; j < run_num; j++){
      c = c + 1;
      total += func(aa, bb, cc, dd, c, a, b, d, f);
    }
  clock_gettime(CLOCK_REALTIME, &time_end);
  printf("func   duration:%llus %lluns. total = %lld\n", time_end.tv_sec-time_start.tv_sec, time_end.tv_nsec-time_start.tv_nsec, total);


  total = 0;
  c = 20;
  clock_gettime(CLOCK_REALTIME, &time_start);
  for (i = 0; i < run_num; i++)
    for (j = 0; j < run_num; j++){
      funp.c = funp.c + 1;
      total += funcs(aa, bb, cc, dd, &funp);
    }
  clock_gettime(CLOCK_REALTIME, &time_end);
  printf("funcs  duration:%llus %lluns. total = %lld\n", time_end.tv_sec-time_start.tv_sec, time_end.tv_nsec-time_start.tv_nsec, total);

  total = 0;
  funp.c = 20;
  clock_gettime(CLOCK_REALTIME, &time_start);
  for (i = 0; i < run_num; i++)
    for (j = 0; j < run_num; j++){
      funp.c = funp.c + 1;
      total += funcs2(aa, bb, cc, dd, &funp);
    }
  clock_gettime(CLOCK_REALTIME, &time_end);
  printf("funcs2 duration:%llus %lluns. total = %lld\n", time_end.tv_sec-time_start.tv_sec, time_end.tv_nsec-time_start.tv_nsec, total);
  return 1;
}

编译命令: gcc -O3 test_func_parameter.c -o test_func_parameter -lrt

测试结果表明:
func duration:0s 1633997ns. total = 3235271710
funcs duration:0s 1524616ns. total = 3235271710
funcs2 duration:0s 1023802ns. total = 3235271710

结论

1、func相比funcs消耗的时间差不多,说明如果在函数内部用临时变量来转存一遍结构体变量的话,则函数参数多少对性能影响不大。
2、funcs2比funcs少了参数拷贝到函数变量的过程,但是却可以节省很多时间。说明将部分参数用结构体一次传输,并且函数内部通过结构体访问的话,可以节省时间,因为减少了函数参数过多导致的内存写入操作(参数压栈)。

3、如果函数本身计算量比较大,那可能参数的传输方式对性能影响就比较小了。



你可能感兴趣的:(c/c++)