lwip select函数分析和优化

我的设备有两个网卡,我需要开两路socket,一路UDP,一路TCP,lwip的版本是1.4.1的,实际运行发现,UDP 运行一段时间以后挂了,通信挂了,线程继续运行,调试发现select 函数读取read_fds.fd_bits始终等于0,导致UDP发回来的数据不能被读取,UDP线程像死了一样。

        FD_SET(sockfd,&read_fds);

        select(sockfd+1,&read_fds,NULL,NULL,&tv_out);    

        if(FD_ISSET(sockfd, &read_fds))      //read_fds异常,
        {        
              //读取udp数据
        }

后来分析了一下select函数的源码,发现每次进来之后他会更新所有socket的描述符,而我的TCP线程用的socket标号是1,UDP用的标号是0,也就是说TCP线程更新socket描述符的时候顺便也更新了UDP线程的描述符,UDP只会去更新自己的,标号越大,管的越宽啊,。。。。。。。在TCP线程抢占之后,同时更新了两个线程的描述符,但他只能处理自己的描述符,不能处理UDP的描述符,但等到TCP处理完毕,UDP去读取描述符的时候,这个时候由于该描述符被更新过,所以早被lwip内核释放了,清零了,因为单片机是单核,所有线程只能是分时复用,所以导致UDP线程来不及处理自己描述符,就表现为像死了一样,

int lwip_select(int maxfdp1, fd_set *readset, fd_set *writeset, fd_set *exceptset,struct timeval *timeout)
{
  u32_t waitres = 0;
  int nready;
  fd_set lreadset, lwriteset, lexceptset;
  u32_t msectimeout;
  struct lwip_select_cb select_cb;
  err_t err;
  int i;
  SYS_ARCH_DECL_PROTECT(lev);

  LWIP_DEBUGF(SOCKETS_DEBUG, ("lwip_select(%d, %p, %p, %p, tvsec=%"S32_F" tvusec=%"S32_F")\n",
                  maxfdp1, (void *)readset, (void *) writeset, (void *) exceptset,
                  timeout ? (s32_t)timeout->tv_sec : (s32_t)-1,
                  timeout ? (s32_t)timeout->tv_usec : (s32_t)-1));

  /* Go through each socket in each list to count number of sockets which
     currently match */
  nready = lwip_selscan(maxfdp1, readset, writeset, exceptset, &lreadset, &lwriteset, &lexceptset);       //根据每个socket发生的事件更新文件描述符

  /* If we don't have any current events, then suspend if we are supposed to */
  if (!nready)       
  {
    if (timeout && timeout->tv_sec == 0 && timeout->tv_usec == 0)
    {
      LWIP_DEBUGF(SOCKETS_DEBUG, ("lwip_select: no timeout, returning 0\n"));
      /* This is OK as the local fdsets are empty and nready is zero,
         or we would have returned earlier. */
      goto return_copy_fdsets;
    }

    /* None ready: add our semaphore to list:
       We don't actually need any dynamic memory. Our entry on the
       list is only valid while we are in this function, so it's ok
       to use local variables. */

    select_cb.next = NULL;
    select_cb.prev = NULL;
    select_cb.readset = readset;
    select_cb.writeset = writeset;
    select_cb.exceptset = exceptset;
    select_cb.sem_signalled = 0;
    err = sys_sem_new(&select_cb.sem, 0);
    if (err != ERR_OK)
    {
      /* failed to create semaphore */
      set_errno(ENOMEM);
      return -1;
    }

    /* Protect the select_cb_list */
    SYS_ARCH_PROTECT(lev);

    /* Put this select_cb on top of list */
    select_cb.next = select_cb_list;
    if (select_cb_list != NULL)
    {
      select_cb_list->prev = &select_cb;
    }
    select_cb_list = &select_cb;
    /* Increasing this counter tells even_callback that the list has changed. */
    select_cb_ctr++;

    /* Now we can safely unprotect */
    SYS_ARCH_UNPROTECT(lev);

    /* Increase select_waiting for each socket we are interested in */
    for(i = 0; i < maxfdp1; i++)      //遍历每个socket
    {
      if ((readset && FD_ISSET(i, readset)) ||
          (writeset && FD_ISSET(i, writeset)) ||
          (exceptset && FD_ISSET(i, exceptset)))
      {
        struct lwip_sock *sock = tryget_socket(i);
        LWIP_ASSERT("sock != NULL", sock != NULL);
        SYS_ARCH_PROTECT(lev);
        sock->select_waiting++;
        LWIP_ASSERT("sock->select_waiting > 0", sock->select_waiting > 0);
        SYS_ARCH_UNPROTECT(lev);
      }
    }

    /* Call lwip_selscan again: there could have been events between
       the last scan (whithout us on the list) and putting us on the list! */
    nready = lwip_selscan(maxfdp1, readset, writeset, exceptset, &lreadset, &lwriteset, &lexceptset);
    if (!nready)
    {
      /* Still none ready, just wait to be woken */
      if (timeout == 0)
      {
        /* Wait forever */
        msectimeout = 0;
      }
      else
      {
        msectimeout =  ((timeout->tv_sec * 1000) + ((timeout->tv_usec + 500)/1000));
        if (msectimeout == 0)
        {
          /* Wait 1ms at least (0 means wait forever) */
          msectimeout = 1;
        }
      }

      waitres = sys_arch_sem_wait(&select_cb.sem, msectimeout);
    }
    /* Increase select_waiting for each socket we are interested in */

    for(i = 0; i < maxfdp1; i++)
    {
      if ((readset && FD_ISSET(i, readset)) ||
          (writeset && FD_ISSET(i, writeset)) ||
          (exceptset && FD_ISSET(i, exceptset))) {
        struct lwip_sock *sock = tryget_socket(i);
        LWIP_ASSERT("sock != NULL", sock != NULL);
        SYS_ARCH_PROTECT(lev);
        sock->select_waiting--;
        LWIP_ASSERT("sock->select_waiting >= 0", sock->select_waiting >= 0);
        SYS_ARCH_UNPROTECT(lev);
      }
    }
    /* Take us off the list */
    SYS_ARCH_PROTECT(lev);
    if (select_cb.next != NULL)
    {
      select_cb.next->prev = select_cb.prev;
    }

    if (select_cb_list == &select_cb)
    {       
       
      LWIP_ASSERT("select_cb.prev == NULL\n", select_cb.prev == NULL);     
      select_cb_list = select_cb.next;

    }
    else
    {
      LWIP_ASSERT("select_cb.prev != NULL\n", select_cb.prev != NULL);
      select_cb.prev->next = select_cb.next;
    }
    /* Increasing this counter tells even_callback that the list has changed. */
    select_cb_ctr++;
    SYS_ARCH_UNPROTECT(lev);

    sys_sem_free(&select_cb.sem);
    if (waitres == SYS_ARCH_TIMEOUT)
    {
      /* Timeout */
      LWIP_DEBUGF(SOCKETS_DEBUG, ("lwip_select: timeout expired\n"));
      /* This is OK as the local fdsets are empty and nready is zero,
         or we would have returned earlier. */
      goto return_copy_fdsets;
    }

    /* See what's set */
    nready = lwip_selscan(maxfdp1, readset, writeset, exceptset, &lreadset, &lwriteset, &lexceptset);
  }

  LWIP_DEBUGF(SOCKETS_DEBUG, ("lwip_select: nready=%d\n", nready));
return_copy_fdsets:
  set_errno(0);
  if (readset)
  {
    *readset = lreadset;
  }
  if (writeset)
  {
    *writeset = lwriteset;
  }
  if (exceptset)
  {
    *exceptset = lexceptset;
  }
  return nready;
}

而实际上select函数会调用lwip_selscan函数来更新各种描述符,而lwip_selscan函数调用tryget_socket函数更新每个socket的发生的事件,然后根据每个socket发生的事件来更新描述符。

static int lwip_selscan(int maxfdp1, fd_set *readset_in, fd_set *writeset_in, fd_set *exceptset_in, fd_set *readset_out, fd_set *writeset_out, fd_set *exceptset_out)
{
  int i, nready = 0;
  fd_set lreadset, lwriteset, lexceptset;
  struct lwip_sock *sock;
  SYS_ARCH_DECL_PROTECT(lev);

  FD_ZERO(&lreadset);
  FD_ZERO(&lwriteset);
  FD_ZERO(&lexceptset);

  /* Go through each socket in each list to count number of sockets which
     currently match */
  for(i = 0; i < maxfdp1; i++)   //遍历每一个socket,标号为1的会更新标号为0的
  {
    void* lastdata = NULL;
    s16_t rcvevent = 0;
    u16_t sendevent = 0;
    u16_t errevent = 0;
    /* First get the socket's status (protected)... */
    SYS_ARCH_PROTECT(lev);
    sock = tryget_socket(i);     //更新每个socket的事件
    if (sock != NULL)//socket 为空
    {
      lastdata = sock->lastdata;       //最新数据
      rcvevent = sock->rcvevent;       //接收事件
      sendevent = sock->sendevent;     //发送事件
      errevent = sock->errevent;       //错误事件
    }
    else
    {
    	printf("socket 为空\n");
    }
    SYS_ARCH_UNPROTECT(lev);
    /* ... then examine it: */
    /* See if netconn of this socket is ready for read */
    if (readset_in && FD_ISSET(i, readset_in) && ((lastdata != NULL) || (rcvevent > 0)))       //要么发生接收事件,要么数据准备好
    {
      FD_SET(i, &lreadset);      //置位可读标志位
      LWIP_DEBUGF(SOCKETS_DEBUG, ("lwip_selscan: fd=%d ready for reading\n", i));
      nready++;
    }
    /* See if netconn of this socket is ready for write */
    if (writeset_in && FD_ISSET(i, writeset_in) && (sendevent != 0))
    {
      FD_SET(i, &lwriteset);
      LWIP_DEBUGF(SOCKETS_DEBUG, ("lwip_selscan: fd=%d ready for writing\n", i));
      nready++;
    }
    /* See if netconn of this socket had an error */
    if (exceptset_in && FD_ISSET(i, exceptset_in) && (errevent != 0))
    {
      FD_SET(i, &lexceptset);
      LWIP_DEBUGF(SOCKETS_DEBUG, ("lwip_selscan: fd=%d ready for exception\n", i));
      nready++;
    }
  }
  /* copy local sets to the ones provided as arguments */
  *readset_out = lreadset;
  *writeset_out = lwriteset;
  *exceptset_out = lexceptset;

  LWIP_ASSERT("nready >= 0", nready >= 0);
  return nready;
}

下面是tryget_socket函数,他通过socket索引拿到全局变量sockets[s]的各种事件。

static struct lwip_sock * tryget_socket(int s)
{
  if ((s < 0) || (s >= NUM_SOCKETS))
  {
    return NULL;
  }
  if (!sockets[s].conn)
  {
    return NULL;
  }
  return &sockets[s];
}

而sockets[s]在event_callback内更新,只要发生相应的事件,就会有LWIP内核更新。

static void event_callback(struct netconn *conn, enum netconn_evt evt, u16_t len)
{
  int s;
  struct lwip_sock *sock;
  struct lwip_select_cb *scb;
  int last_select_cb_ctr;
  SYS_ARCH_DECL_PROTECT(lev);

  LWIP_UNUSED_ARG(len);

  /* Get socket */
  if (conn)
  {
    s = conn->socket;
    if (s < 0)
    {
      /* Data comes in right away after an accept, even though
       * the server task might not have created a new socket yet.
       * Just count down (or up) if that's the case and we
       * will use the data later. Note that only receive events
       * can happen before the new socket is set up. */
      SYS_ARCH_PROTECT(lev);
      if (conn->socket < 0)
      {
        if (evt == NETCONN_EVT_RCVPLUS)
        {
          conn->socket--;
        }
        SYS_ARCH_UNPROTECT(lev);
        return;
      }
      s = conn->socket;
      SYS_ARCH_UNPROTECT(lev);
    }

    sock = get_socket(s);
    if (!sock)
    {
    	return;
    }
  }
  else
  {
    return;
  }

  SYS_ARCH_PROTECT(lev);
  /* Set event as required */
  switch (evt)
  {
    case NETCONN_EVT_RCVPLUS:
      sock->rcvevent++;        //接收事件累加
      break;
    case NETCONN_EVT_RCVMINUS:
      sock->rcvevent--;       //接收事件减少
      break;
    case NETCONN_EVT_SENDPLUS:
      sock->sendevent = 1;
      break;
    case NETCONN_EVT_SENDMINUS:
      sock->sendevent = 0;
      break;
    case NETCONN_EVT_ERROR:
      sock->errevent = 1;
      break;
    default:
      LWIP_ASSERT("unknown event", 0);
      break;
  }
 /***只截取部分代码****************/
}

event_callback是个回调函数,在创建socket通信时就会注册该回调函数。

 

int lwip_socket(int domain, int type, int protocol)
{
  struct netconn *conn;
  int i;

  LWIP_UNUSED_ARG(domain);

  /* create a netconn */
  switch (type)
  {
  case SOCK_RAW:
    conn = netconn_new_with_proto_and_callback(NETCONN_RAW, (u8_t)protocol, event_callback);
    LWIP_DEBUGF(SOCKETS_DEBUG, ("lwip_socket(%s, SOCK_RAW, %d) = ",
                                 domain == PF_INET ? "PF_INET" : "UNKNOWN", protocol));
    break;
  case SOCK_DGRAM:
    conn = netconn_new_with_callback( (protocol == IPPROTO_UDPLITE) ?
                 NETCONN_UDPLITE : NETCONN_UDP, event_callback);       //这里注册回调
    LWIP_DEBUGF(SOCKETS_DEBUG, ("lwip_socket(%s, SOCK_DGRAM, %d) = ",
                                 domain == PF_INET ? "PF_INET" : "UNKNOWN", protocol));
    break;
  case SOCK_STREAM:
    conn = netconn_new_with_callback(NETCONN_TCP, event_callback);    //这里注册回调
    LWIP_DEBUGF(SOCKETS_DEBUG, ("lwip_socket(%s, SOCK_STREAM, %d) = ",
                                 domain == PF_INET ? "PF_INET" : "UNKNOWN", protocol));
    if (conn != NULL)
    {
      /* Prevent automatic window updates, we do this on our own! */
      netconn_set_noautorecved(conn, 1);
    }
    break;
  default:
    LWIP_DEBUGF(SOCKETS_DEBUG, ("lwip_socket(%d, %d/UNKNOWN, %d) = -1\n",
                                 domain, type, protocol));
    set_errno(EINVAL);
    return -1;
  }

  if (!conn)
  {
    LWIP_DEBUGF(SOCKETS_DEBUG, ("-1 / ENOBUFS (could not create netconn)\n"));
    set_errno(ENOBUFS);
    printf("conn is null\n");
    return -1;
  }

  i = alloc_socket(conn, 0);      

  if (i == -1)
  {
    netconn_delete(conn);
    set_errno(ENFILE);
    printf("alloc socket failed\n");
    return -1;
  }
  conn->socket = i;
  LWIP_DEBUGF(SOCKETS_DEBUG, ("%d\n", i));
  set_errno(0);
  return i;
}

/*******************************************************************************************************************************************/

/*******************************************************************************************************************************************/

最后谈谈优化吧

其实优化很简单,将lwip_select函数lwip_selscan函数内的所有 for(i = 0; i < maxfdp1; i++) 改为 for(i = maxfdp1-1; i < maxfdp1; i++)就好了,这样就会各自更新各自的描述符,不会发生标号高的更新了标号低的描述符,导致标号低的描述符读取错误而通信失败,这样改了我的两个socket通信稳定了许多,UDP掉线了,也会很快恢复。

/**************************************************************************************************************************************/

ps:其实以上我分析的原因只是猜的,具体原因我也不太明白,反正这么干了就正常了。

lwip select函数分析和优化_第1张图片

你可能感兴趣的:(嵌入式,编程,单片机)