前面《linux0.99网络模块-网络设备初始化》的分析中wd8003注册了中断处理函数wd8003_sigaction(irqaction (dev->irq, &wd8003_sigaction))。当网络设备接收到数据报之后就会触发中断,然后就会调用该函数。
net/tcp/we.c:
639 static struct sigaction wd8003_sigaction =
640 {
641 wd8003_interrupt,
642 0,
643 0,
644 NULL
645 };
它的处理函数为wd8003_interrupt。
wd8003_interrupt中调用了几个函数,我们先来看一下:
net/tcp/dev.c
这个是用来向链路层传送数据报的
227 dev_rint(unsigned char *buff, long len, int flags,
228 struct device * dev)
229 {
230 struct sk_buff *skb=NULL;
231 unsigned char *to;
232 int amount;
234 if (dev == NULL || buff == NULL || len <= 0) return (1);
236 if (flags & IN_SKBUFF) //如果已经封装过(所有数据都需封装成sk_buff)
237 {
238 skb = (struct sk_buff *)buff; //直接取出即可
239 }
240 else
241 {
242 skb = kmalloc (sizeof (*skb) + len, GFP_ATOMIC); //否则,需要分配内存,长度就是sk_buff数据结构本身的长度加上后面数据的长度(网络栈中传递的数据都是这种形式:sk_buff结构体后面跟着数据)
243 if (skb == NULL)
244 {
245 printk ("dev_rint:dropping packet due to lack of memory.\n");
246 return (1);
247 }
下面开始设置sk_buff的字段
248 skb->lock = 0;
249 skb->mem_len = sizeof (*skb) + len; //数据结构+数据总长度
250 skb->mem_addr = skb; //数据结构内存地址
251 /* first we copy the packet into a buffer, and save it for later. */
253 to = (unsigned char *)(skb+1);
254 while (len > 0)
255 {
256 amount = min (len, (unsigned long) dev->rmem_end -
257 (unsigned long) buff);
258 memcpy (to, buff, amount);
把网卡驱动中接收的数据拷贝到内存中(也就是封装成sk_buff)
259 len -= amount;
260 buff += amount;
261 to += amount;
262 if ((unsigned long)buff == dev->rmem_end)//rmem_end是硬件接收缓冲区末端地址
263 buff = (unsigned char *)dev->rmem_start; //rmem_start是硬件接收缓冲区起始地址
// 如果已经到达设备数据报接收内存末端,就把它调整到起始位置,也就是说网卡的内存是以环的形式使用的
264 }
整个while循环就是拷贝buff开始的len字节(len不能超过最大可用数据量)的数据封装成sk_buff结构,保存在内存之中。
265 }//else
267 skb->len = len; //len是数据的长度不包括结构体(区别与men_len)
268 skb->dev = dev;
269 skb->sk = NULL;
//根据上面的具体信息设置skb字段
271 /* now add it to the backlog. */
272 cli();
273 if (backlog == NULL)
274 {
275 skb->prev = skb;
276 skb->next = skb;
277 backlog = skb;
278 }
279 else
280 {
281 skb ->prev = backlog->prev;
282 skb->next = backlog;
283 skb->next->prev = skb;
284 skb->prev->next = skb;
285 }
286 sti();
添加到backlog中,链路层会使用积压队列来暂时存储由驱动上传上来的数据包。之后可以在下半部处理,这也是为了提高响应速度。
288 if (backlog != NULL)
289 bh_active |= 1 << INET_BH;
设置下半部标志
291 return (0);
292 }
我们总结一下dev_rint的作用是把网卡中接收到的数据封装成sk_buff传送给链路层。首先它要判断是不是已经封装过该数据包,如果是就直接让skb指向buff即可;否则需要分配内存来封装该数据包。封装完成后,把该sk_buff挂到积压队列中,并设置下半部激活标记。
net/tcp/we.c
206 /* This routine just calls the ether rcv_int. */
207 static int
208 wdget(volatile struct wd_ring *ring, struct device *dev)
209 {
210 unsigned char *fptr;
211 long len;
212 fptr = (unsigned char *)(ring +1);
213 /* some people have bugs in their hardware which let
214 ring->count be 0. It shouldn't happen, but we
215 should check for it. */
216 len = ring->count-4;
217 if (len < 56)
218 printk ("we.c: Hardware problem, runt packet. ring->count = %d\n",
219 ring->count);
220 return (dev_rint(fptr, len, 0, dev));
221 }
上面的函数是网卡驱动程序中的,它根据ring的字段属性,来调用dev_rint。其中注意212行,ring+1指向的就是数据开始的地方(这里+1使的指针移动一个wd_ring结构体大小)。
360 /* This routine handles the packet recieved interrupt. */
361 /* Debug routines slow things down, but reveal bugs... */
362 /* Modified Boundry Page Register to follow Current Page */
这个函数是用来处理数据报接收中断的,而且会修改边界页寄存器来跟踪当前页
364 static void
365 wd_rcv( struct device *dev )
366 {
367
368 unsigned char pkt; /* Next packet page start */
369 unsigned char bnd; /* Last packet page end */
370 unsigned char cur; /* Future packet page start */
371 unsigned char cmd; /* Command register save */
372 volatile struct wd_ring *ring;
373 int done=0;
375 /* Calculate next packet location */
376 cur = wd_get_cur( dev );
下一个数据报应该存放的起始页面
377 bnd = wd_get_bnd( dev );
获得上一个数据报页面结束位置
378 if( (pkt = bnd + 1) == max_pages )
379 pkt = WD_TXBS;
net/tcp/wereg.h:119:#define WD_TXBS 6 /* size of transmit buffer in pages. */
因为bnd是上一个数据报结束页面,因此下一个数据报就从bnd+1个页面开始,如果已经达到最大页面数,设置pkt指向最后一个页面?
381 while( done != 1)
382 {
383 if (pkt != cur) //这时说明缓冲区满了?
384 {
385
386 /* Position pointer to packet in card ring buffer */
387 ring = (volatile struct wd_ring *) (dev->mem_start + (pkt << 8));
因为一个page是256字节,这个可以从net/tcp/we.c:716: max_pages = ( i - dev->mem_start )/256;推测出来,从387行我们又可以推测dev->mem_start是数据报接收的起始地址,现在ring后面就跟着数据,ring对后面的数据进行描述。
389 /* Ensure a valid packet */
390 if( ring->status & 1 )
391 {
392 /* Too small and too big packets are
393 filtered by the board */
394 if( wd_debug )
395 printk("\nwd8013 - wdget: bnd = %d, pkt = %d, "
396 "cur = %d, status = %d, len = %d, next = %d",
397 bnd, pkt, cur, ring->status, ring->count,
398 ring->next);
399
400 stats.rx_packets++; /* count all receives */
增加接收数据报计数
401 done = wdget( ring, dev ); /* get the packet */
从ring指定的位置拷贝数据(封装成sk_buff),把封装后的sk_buff挂到链路层的backlog上,如果正常完成,返回0,否则返回1.
403 /* see if we need to process this packet again. */
404 if (done == -1) continue;
这里写错了?哪来的-1?
406 /* Calculate next packet location */
407 pkt = ring->next;
409 /* Compute new boundry - tell the chip */
410 if( (bnd = pkt - 1) < WD_TXBS )
411 bnd = max_pages - 1;
412 wd_put_bnd(bnd, dev);
413 //把边界信息告诉芯片
414 /* update our copy of cur. */
415 cur = wd_get_cur(dev);
416 }
417 else
418 { /* Bad packet in ring buffer -
419 should not happen due to hardware filtering */
420 printk("wd8013 - bad packet: len = %d, status = x%x, "
421 "bnd = %d, pkt = %d, cur = %d\n"
422 "trashing receive buffer!",
423 ring->count, ring->status, bnd, pkt,
424 cur);
425 /* Reset bnd = cur-1 */
426 if( ( bnd = wd_get_cur( dev ) - 1 ) < WD_TXBS )
427 bnd = max_pages - 1;
428 wd_put_bnd( bnd, dev );
429 break; /* return */
430 }
432 }
433 else //pkt==cur
434 {
435 done = dev_rint(NULL, 0,0, dev);
436 }
437 }
439 /* reset to page 0 */
440 cmd = inb_p(WD_COMM);
441 if (cmd & 0x40)
442 {
443 outb_p(cmd & ~(CPAGE1), WD_COMM); /* select page 0 */
444 }
445 }
这个函数就是根据芯片中指明的数据包的位置调用dev_rint将其发送给链路层,并且维护芯片状态。
376 /* This routine is called when an device interface is ready to
377 transmit a packet. Buffer points to where the packet should
378 be put, and the routine returns the length of the packet. A
379 length of zero is interrpreted to mean the transmit buffers
380 are empty, and the transmitter should be shut down. */
382 unsigned long
383 dev_tint(unsigned char *buff, struct device *dev)
384 {
385 int i;
386 int tmp;
387 struct sk_buff *skb;
388 for (i=0; i < DEV_NUMBUFFS; i++)
389 {
390 while (dev->buffs[i]!=NULL)
391 {
392 cli();
393 skb=dev->buffs[i];
394 if (skb->magic != DEV_QUEUE_MAGIC)
395 {
396 printk ("dev.c skb with bad magic-%X: squashing queue\n",
397 skb->magic);
398 cli();
399 dev->buffs[i] = NULL;
400 sti();
401 continue;
402 }
404 skb->magic = 0;
405
406 if (skb->next == skb) //只有一个sk_buff,
407 {
408 dev->buffs[i] = NULL; //从缓冲区中删除它
409 }
410 else
411 {
412 /* extra consistancy check. */
413 if (skb->next == NULL
414 #ifdef CONFIG_MAX_16M
415 || (unsigned long)(skb->next) > 16*1024*1024
416 #endif
417 )
418
419 {
420 printk ("dev.c: *** bug bad skb->next, squashing queue \n");
421 cli();
422 dev->buffs[i] = NULL;
423 }
424 else
425 {
426 dev->buffs[i]= skb->next;
427 skb->prev->next = skb->next;
428 skb->next->prev = skb->prev;
// 从缓冲区中删除此sk_buff
429 }
430 } //410
431 skb->next = NULL;
432 skb->prev = NULL;
//切断与其他sk_buff的联系,使其孤立
434 if (!skb->arp) //如果没有进行arp解析
435 {
436 if (dev->rebuild_header (skb+1, dev))
437 {
438 skb->dev = dev;
439 sti();
440 arp_queue (skb);
441 continue; //跳过下面的处理,因为上面代码会处理该数据报的发送
442 }
443 }
到这里说明已经进行过arp解析了
445 tmp = skb->len;
446 if (tmp <= dev->mtu) //不能超过MTU
447 {
448 if (dev->send_packet != NULL)
449 {
450 dev->send_packet(skb, dev); //发送该数据报
451 }
452 if (buff != NULL)
453 memcpy (buff, skb + 1, tmp); //拷贝数据到buff中,这里的buff一般就是dev->mem_start,它是wd8003的发送数据缓冲区,用于存储即将发送的数据
454
455 PRINTK (">>\n");
456 print_eth ((struct enet_header *)(skb+1));
457 }
458 else //如果超过了MTU
459 {
460 printk ("dev.c:**** bug len bigger than mtu, "
461 "squashing queue. \n");
462 cli();
463 dev->buffs[i] = NULL; //置为NULL
464 continue;
466 }
467 sti();
运行到这里说明没有超过MTU,并且已经调用了发送过程
468 if (skb->free)
469 {
470 kfree_skb(skb, FREE_WRITE);
471 }
释放skb所占内存
473 if (tmp != 0)
474 return (tmp); //返回发送数据长度
475 }
476 }
477 PRINTK ("dev_tint returning 0 \n");
478 return (0);
479 }
总结:dev把发送数据包通过数组来维护,不同优先级的数据包会放到不同的索引处,数组同一个位置处的sk_buff链接成双向链表。这里会遍历每一个sk_buff,把它从链表中删除,然后判断是不是已经进行过arp解析,如果还没有就进行解析,在这个过程中数据包会被发送;如果已经解析过了,并且没有超过MTU的话就调用发送函数进行发送;否则废弃该链。如果该sk_buff已经处于空闲状态(已经被发送)就释放它,最后返回发送数据的长度。
242 void
243 kfree_skb (struct sk_buff *skb, int rw)
244 {
245 if (skb->lock)
246 {
247 skb->free = 1;
248 return;
249 }
如果锁定的话就解锁返回
250 skb->magic = 0;
251 if (skb->sk) //如果sk不为空
252 {
253 if (rw)
254 {
255 skb->sk->prot->rfree (skb->sk, skb->mem_addr, skb->mem_len);
调用具体协议的rfree方法
256 }
257 else
258 {
259 skb->sk->prot->wfree (skb->sk, skb->mem_addr, skb->mem_len);
260 }
261 }
262 else
263 {
264 kfree_s (skb->mem_addr, skb->mem_len);
释放内存
265 }
266 }
具体协议怎么处理即将被释放的包那就看协议自身的考虑了。后面文章的分析中就会看到具体是怎么操作的。
473 /*
474 * This get's the transmit interrupts. It assumes command page 0 is set, and
475 * returns with command page 0 set.
476 */
477
478 static void
479 wd_trs( struct device *dev)
480 {
481 unsigned char cmd, errors;
482 int len;
483
484 if( wd_debug )
485 printk("\nwd_trs() - TX complete, status = x%x", inb_p(TSR));
486
487 if( ( errors = inb_p( TSR ) & PTXOK ) || tx_aborted ){ //正常
488 if( (errors&~0x02) == 0 ){
489 stats.tx_packets++; //没有发生错误,更新发送包的数量
490 tx_aborted = 0;
491 }
493 /* attempt to start a new transmission. */
494 len = dev_tint( (unsigned char *)dev->mem_start, dev ); //
495 if( len != 0 ){
496 len=max(len, ETHER_MIN_LEN);
497 cmd=inb_p(WD_COMM);
498 outb_p(len&0xff,WD_TB0);
499 outb_p(len>>8,WD_TB1);
500 cmd |= CTRANS;
506 status &= ~TRS_BUSY;
507 interrupt_mask &= ~TRANS_MASK;
508 return;
509 }
510 }
511 else{ /* TX error occurred! - H/W will reschedule */
512 if( errors & CRS ){
513 stats.tx_carrier_errors++;
514 printk("\nwd8013 - network cable short!");
515 }
516 if (errors & COL )
517 stats.collisions += inb_p( NCR );
518 if (errors & CDH )
519 stats.tx_heartbeat_errors++;
520 if (errors & OWC )
521 stats.tx_window_errors++;
上面这几行是用来统计错误的
522 }
523 }
用来进行发送数据包的工作,并且统计发送信息,包括出错信息。
我们来分析一下这个函数:
525 void
526 wd8003_interrupt(int reg_ptr)
527 {
528 unsigned char cmd;
529 unsigned char errors;
530 unsigned char isr;
531 struct device *dev;
532 struct pt_regs *ptr;
533 int irq;
534 int count = 0;
535
536 ptr = (struct pt_regs *)reg_ptr; //
537 irq = -(ptr->orig_eax+2); //中断号
538 for (dev = dev_base; dev != NULL; dev = dev->next)
539 {
540 if (dev->irq == irq) break; //找到对应的设备
541 }
542 if (dev == NULL)
543 {
544 printk ("we.c: irq %d for unknown device\n", irq);
545 return;
546 }
547 sti(); /* this could take a long time, we should have interrupts on. */
//这个过程比较长,需要开启中断
549 cmd = inb_p( CR );/* Select page 0 */
控制寄存器命令
550 if( cmd & (PS0|PS1 ) ){
551 cmd &= ~(PS0|PS1);
552 outb_p(cmd, CR );
553 }
写出命令
555 if (wd_debug)
556 printk("\nwd8013 - interrupt isr = x%x", inb_p( ISR ) );
558 status |= IN_INT;
正在处理中断
560 do{ /* find out who called */
561 sti();
562 /* Check for overrunning receive buffer first */
563 if ( ( isr = inb_p( ISR ) ) & OVW ) { /* Receiver overwrite warning */
564 stats.rx_over_errors++;
565 if( wd_debug )
566 printk("\nwd8013 overrun bnd = %d, cur = %d", wd_get_bnd( dev ), wd_get_cur( dev ) );
567 wd_rx_over( dev ); /* performs wd_rcv() as well */
568 outb_p( OVW, ISR ); /* acknowledge interrupt */
569 }
570 else if ( isr & PRX ) { /* got a packet. */ //接收到一个数据报
571 wd_rcv( dev );
572 outb_p( PRX, ISR ); /* acknowledge interrupt */ //通知中断
573 }
574 /* This completes rx processing... whats next */
575
576 if ( inb_p( ISR ) & PTX ) { /* finished sending a packet. */
577 wd_trs( dev );
578 outb_p( PTX, ISR ); /* acknowledge interrupt */
579 }
580
581 if (inb_p( ISR ) & RXE ) { /* recieve error */
582 stats.rx_errors++; /* general errors */
583 errors = inb_p( RSR ); /* detailed errors */
584 if (errors & CRC )
585 stats.rx_crc_errors++;
586 if (errors & FAE )
587 stats.rx_frame_errors++;
588 if (errors & FO )
589 stats.rx_fifo_errors++;
590 if (errors & MPA )
591 stats.rx_missed_errors++;
592 outb_p( RXE, ISR ); /* acknowledge interrupt */
593 }
594
595 if (inb_p( ISR ) & TXE ) { /* transmit aborted! */
596 stats.tx_errors++; /* general errors */
597 errors = inb_p( TSR ); /* get detailed errors */
598 if (errors & ABT ){
599 stats.tx_aborted_errors++;
600 printk("\nwd8013 - network cable open!");
601 }
602 if (errors & FU )
603 {
604 stats.tx_fifo_errors++;
605 printk("\nwd8013 - TX FIFO underrun!");
606 }
以上do循环处理如下,首先检查是不是有缓冲区被覆盖的情况,如果有就调用wd_rx_over进行处理,处理完成后通知控制器;然后检查是不是接收到数据包,如果是就调用wd_rcv进行处理,完成后通知控制器;然后再检查是不是发送完成,如果发送完成并且没有出错,就继续下一个发送;否则统计错误信息;如果接收错误,更新信息;如果发送错误统计错误信息。
608 /* Cannot do anymore - empty the bit bucket */
609 tx_aborted = 1;
610 wd_trs( dev );
611 tx_aborted = 0;
613 outb_p( TXE, ISR ); /* acknowledge interrupt */
614 }
615
616 if( inb_p( ISR ) & CNTE ){ /* Tally counters overflowing */
617 errors = inb_p( CNTR0 );
618 errors = inb_p( CNTR1 );
619 errors = inb_p( CNTR2 );
620 outb_p( CNTE, ISR ); /* acknowledge interrupt */
621 }
622 if( inb_p( ISR ) & RST ) /* Reset has been performed */
623 outb_p( RST, ISR ); /* acknowledge interrupt */
624
625 if( wd_debug ){
626 if( ( isr = inb_p( ISR ) ) != 0 )
627 printk("\nwd8013 - ISR not cleared = x%x", isr );
628 }
629 if( ++count > max_pages + 1 ){
630 printk("\nwd8013_interrupt - infinite loop detected, isr = x%x, count = %d", isr, count );
631 }
632 cli();
633 } while( inb_p( ISR ) != 0 );
635 status &= ~IN_INT;
设为处理完成状态
636 }
总结,中断处理函数就是通过检测芯片状态调用不同的过程进行处理,比如将数据包传给链路层或者将数据包从控制器发送出去,或者统计出错信息等等。