网卡收到一个数据包的时候,是如何传给应用层的

这里以3c501网卡为例,每个设备对应一个device的结构体,下面代码即对3c501网卡的数据结构进行初始化,包括发送函数,注册中断回调,mac头长度等。

/* The actual probe. */ 
static int
el1_probe1(struct device *dev, int ioaddr)
{
     
    #ifndef MODULE

    char *mname;		/* Vendor name */
    unsigned char station_addr[6];
    int autoirq = 0;
    int i;

    /* Read the station address PROM data from the special port.  */
    for (i = 0; i < 6; i++) {
     
	outw(i, ioaddr + EL1_DATAPTR);
	station_addr[i] = inb(ioaddr + EL1_SAPROM);
    }
    /* Check the first three octets of the S.A. for 3Com's prefix, or
       for the Sager NP943 prefix. */ 
    if (station_addr[0] == 0x02  &&  station_addr[1] == 0x60
	&& station_addr[2] == 0x8c) {
     
	mname = "3c501";
    } else if (station_addr[0] == 0x00  &&  station_addr[1] == 0x80
	&& station_addr[2] == 0xC8) {
     
	mname = "NP943";
    } else
	return ENODEV;

    /* Grab the region so we can find the another board if autoIRQ fails. */
    request_region(ioaddr, EL1_IO_EXTENT,"3c501");

    /* We auto-IRQ by shutting off the interrupt line and letting it float
       high. */
    if (dev->irq < 2) {
     

	autoirq_setup(2);

	inb(RX_STATUS);		/* Clear pending interrupts. */
	inb(TX_STATUS);
	outb(AX_LOOP + 1, AX_CMD);

	outb(0x00, AX_CMD);

	autoirq = autoirq_report(1);

	if (autoirq == 0) {
     
	    printk("%s probe at %#x failed to detect IRQ line.\n",
		   mname, ioaddr);
	    return EAGAIN;
	}
    }

    outb(AX_RESET+AX_LOOP, AX_CMD);			/* Loopback mode. */

    dev->base_addr = ioaddr;
    memcpy(dev->dev_addr, station_addr, ETH_ALEN);
    if (dev->mem_start & 0xf)
	el_debug = dev->mem_start & 0x7;
    if (autoirq)
	dev->irq = autoirq;

    printk("%s: %s EtherLink at %#lx, using %sIRQ %d.\n",
	   dev->name, mname, dev->base_addr,
	   autoirq ? "auto":"assigned ", dev->irq);
	   
#ifdef CONFIG_IP_MULTICAST
    printk("WARNING: Use of the 3c501 in a multicast kernel is NOT recommended.\n");
#endif    

    if (el_debug)
	printk("%s", version);

    /* Initialize the device structure. */
    if (dev->priv == NULL)
	dev->priv = kmalloc(sizeof(struct net_local), GFP_KERNEL);
    memset(dev->priv, 0, sizeof(struct net_local));

    /* The EL1-specific entries in the device structure. */
    dev->open = &el_open;
	// 发送函数
    dev->hard_start_xmit = &el_start_xmit;
    dev->stop = &el1_close;
    dev->get_stats = &el1_get_stats;
    dev->set_multicast_list = &set_multicast_list;
    /* Setup the generic properties */
    ether_setup(dev);

#endif /* !MODULE */
    return 0;
}

void ether_setup(struct device *dev)
{
     
	int i;
	/* Fill in the fields of the device structure with ethernet-generic values.
	   This should be in a common file instead of per-driver.  */
	for (i = 0; i < DEV_NUMBUFFS; i++)
		skb_queue_head_init(&dev->buffs[i]);

	/* register boot-defined "eth" devices */
	if (dev->name && (strncmp(dev->name, "eth", 3) == 0)) {
     
		i = simple_strtoul(dev->name + 3, NULL, 0);
		if (ethdev_index[i] == NULL) {
     
			ethdev_index[i] = dev;
		}
		else if (dev != ethdev_index[i]) {
     
			/* Really shouldn't happen! */
			printk("ether_setup: Ouch! Someone else took %s\n",
				dev->name);
		}
	}

	dev->hard_header	= eth_header;
	dev->rebuild_header = eth_rebuild_header;
	dev->type_trans = eth_type_trans;

	dev->type		= ARPHRD_ETHER;
	dev->hard_header_len = ETH_HLEN;
	dev->mtu		= 1500; /* eth_mtu */
	dev->addr_len	= ETH_ALEN;
	for (i = 0; i < ETH_ALEN; i++) {
     
		dev->broadcast[i]=0xff;
	}

	/* New-style flags. */
	dev->flags		= IFF_BROADCAST|IFF_MULTICAST;
	dev->family		= AF_INET;
	dev->pa_addr	= 0;
	dev->pa_brdaddr = 0;
	dev->pa_mask	= 0;
	dev->pa_alen	= sizeof(unsigned long);
}

/* Open/initialize the board. */
static int el_open(struct device *dev)
{
     
    int ioaddr = dev->base_addr;

    if (el_debug > 2)
	printk("%s: Doing el_open()...", dev->name);
	// 设置中断的回调是el_interrupt函数,网络收到数据包后会触发系统中断,系统会执行该函数
    if (request_irq(dev->irq, &el_interrupt, 0, "3c501")) {
     
	return -EAGAIN;
    }
    irq2dev_map[dev->irq] = dev;

    el_reset(dev);

    dev->start = 1;

    outb(AX_RX, AX_CMD);	/* Aux control, irq and receive enabled */
    MOD_INC_USE_COUNT;
    return 0;
}

设置完网卡对应的数据结构后,如果有数据包到达,由驱动程序中的这两个函数处理。

/* The typical workload of the driver:
   Handle the ether interface interrupts. */
static void
el_interrupt(int irq, struct pt_regs *regs)
{
     
    struct device *dev = (struct device *)(irq2dev_map[irq]);
    struct net_local *lp;
    int ioaddr;
    int axsr;			/* Aux. status reg. */

    if (dev == NULL  ||  dev->irq != irq) {
     
	printk ("3c501 driver: irq %d for unknown device.\n", irq);
	return;
    }

    ioaddr = dev->base_addr;
    lp = (struct net_local *)dev->priv;
    axsr = inb(AX_STATUS);

    if (el_debug > 3)
      printk("%s: el_interrupt() aux=%#02x", dev->name, axsr);
    if (dev->interrupt)
	printk("%s: Reentering the interrupt driver!\n", dev->name);
    dev->interrupt = 1;

    if (dev->tbusy) {
     
    
    	/*
    	 *	Board in transmit mode.
    	 */
    	 
	int txsr = inb(TX_STATUS);

	if (el_debug > 6)
	    printk(" txsr=%02x gp=%04x rp=%04x", txsr, inw(GP_LOW),
		   inw(RX_LOW));

	if ((axsr & 0x80) && (txsr & TX_READY) == 0) {
     
	/*
	 *	FIXME: is there a logic to whether to keep on trying or
	 *	reset immediately ?
	 */
	    printk("%s: Unusual interrupt during Tx, txsr=%02x axsr=%02x"
		   " gp=%03x rp=%03x.\n", dev->name, txsr, axsr,
		   inw(ioaddr + EL1_DATAPTR), inw(ioaddr + EL1_RXPTR));
	    dev->tbusy = 0;
	    mark_bh(NET_BH);
	} else if (txsr & TX_16COLLISIONS) {
     
	/*
	 *	Timed out
	 */
	    if (el_debug)
		printk("%s: Transmit failed 16 times, ethernet jammed?\n",
		       dev->name);
	    outb(AX_SYS, AX_CMD);
	    lp->stats.tx_aborted_errors++;
	} else if (txsr & TX_COLLISION) {
     	/* Retrigger xmit. */
	    if (el_debug > 6)
		printk(" retransmitting after a collision.\n");
	/*
	 *	Poor little chip can't reset its own start pointer
	 */
	    outb(AX_SYS, AX_CMD);
	    outw(lp->tx_pkt_start, GP_LOW);
	    outb(AX_XMIT, AX_CMD);
	    lp->stats.collisions++;
	    dev->interrupt = 0;
	    return;
	} else {
     
	/*
	 *	It worked.. we will now fall through and receive
	 */
	    lp->stats.tx_packets++;
	    if (el_debug > 6)
		printk(" Tx succeeded %s\n",
		       (txsr & TX_RDY) ? "." : "but tx is busy!");
	/*
	 *	This is safe the interrupt is atomic WRT itself.
	 */
	    dev->tbusy = 0;
	    mark_bh(NET_BH);	/* In case more to transmit */
	}
    } else {
     
    
    	/*
    	 *	In receive mode.
    	 */
    	 
	int rxsr = inb(RX_STATUS);
	if (el_debug > 5)
	    printk(" rxsr=%02x txsr=%02x rp=%04x", rxsr, inb(TX_STATUS),
		   inw(RX_LOW));

	/*
	 *	Just reading rx_status fixes most errors. 
	 */
	if (rxsr & RX_MISSED)
	    lp->stats.rx_missed_errors++;
	if (rxsr & RX_RUNT) {
     	/* Handled to avoid board lock-up. */
	    lp->stats.rx_length_errors++;
	    if (el_debug > 5) printk(" runt.\n");
	} else if (rxsr & RX_GOOD) {
     
	/*
	 *	Receive worked.
	 */
		// 成功收到数据包后执行到这
	    el_receive(dev);
	} else {
     			/* Nothing?  Something is broken! */
	    if (el_debug > 2)
		printk("%s: No packet seen, rxsr=%02x **resetting 3c501***\n",
		       dev->name, rxsr);
	    el_reset(dev);
	}
	if (el_debug > 3)
	    printk(".\n");
    }

    /*
     *	Move into receive mode 
     */
    outb(AX_RX, AX_CMD);
    outw(0x00, RX_BUF_CLR);
    inb(RX_STATUS);		/* Be certain that interrupts are cleared. */
    inb(TX_STATUS);
    dev->interrupt = 0;
    return;
}


/* We have a good packet. Well, not really "good", just mostly not broken.
   We must check everything to see if it is good. */
static void
el_receive(struct device *dev)
{
     
    struct net_local *lp = (struct net_local *)dev->priv;
    int ioaddr = dev->base_addr;
    int pkt_len;
    struct sk_buff *skb;
	// 包长度
    pkt_len = inw(RX_LOW);

    if (el_debug > 4)
      printk(" el_receive %d.\n", pkt_len);
	// 包太大或太小
    if ((pkt_len < 60)  ||  (pkt_len > 1536)) {
     
	if (el_debug)
	  printk("%s: bogus packet, length=%d\n", dev->name, pkt_len);
	lp->stats.rx_over_errors++;
	return;
    }
    
    /*
     *	Command mode so we can empty the buffer
     */
     
    outb(AX_SYS, AX_CMD);
	// 分配一个承载数据的skb
    skb = alloc_skb(pkt_len, GFP_ATOMIC);
    /*
     *	Start of frame
     */
    outw(0x00, GP_LOW);
    if (skb == NULL) {
     
	printk("%s: Memory squeeze, dropping packet.\n", dev->name);
	lp->stats.rx_dropped++;
	return;
    } else {
     
	// 记录数据包长度和收到该包的设备
	skb->len = pkt_len;
	skb->dev = dev;

	/*
	 *	The read increments through the bytes. The interrupt
	 *	handler will fix the pointer when it returns to 
	 *	receive mode.
	 */
	// 读取数据到skb中 
	insb(DATAPORT, skb->data, pkt_len);
	// 传给mac层
	netif_rx(skb);
	lp->stats.rx_packets++;
    }
    return;
}

驱动层处理生成一个skb结构体,然后通过netif_rx函数传给链路层。netif_rx直接把skb挂载到backlog队列中,然后结束中断处理,等下半部分再进行数据包的具体处理。由sock_init函数的代码我们知道,下半部分的处理函数是net_bh。

/*
 *	Receive a packet from a device driver and queue it for the upper
 *	(protocol) levels.  It always succeeds. This is the recommended 
 *	interface to use.
 */

void netif_rx(struct sk_buff *skb)
{
     
	static int dropping = 0;

	/*
	 *	Any received buffers are un-owned and should be discarded
	 *	when freed. These will be updated later as the frames get
	 *	owners.
	 */
	skb->sk = NULL;
	skb->free = 1;
	if(skb->stamp.tv_sec==0)
		skb->stamp = xtime;

	/*
	 *	Check that we aren't overdoing things.
	 */
	// 是否过载
	if (!backlog_size)
  		dropping = 0;
	else if (backlog_size > 300)
		dropping = 1;
	// 过载则丢弃
	if (dropping) 
	{
     
		kfree_skb(skb, FREE_READ);
		return;
	}

	/*
	 *	Add it to the "backlog" queue. 
	 */
#ifdef CONFIG_SKB_CHECK
	IS_SKB(skb);
#endif	
	// 加到backlog队列
	skb_queue_tail(&backlog,skb);
	backlog_size++;
  
	/*
	 *	If any packet arrived, mark it for processing after the
	 *	hardware interrupt returns.
	 */
	// 激活下半部分,处理数据包
	mark_bh(NET_BH);
	return;
}
/*
 *	When we are called the queue is ready to grab, the interrupts are
 *	on and hardware can interrupt and queue to the receive queue a we
 *	run with no problems.
 *	This is run as a bottom half after an interrupt handler that does
 *	mark_bh(NET_BH);
 */
 
void net_bh(void *tmp)
{
     
	struct sk_buff *skb;
	struct packet_type *ptype;
	struct packet_type *pt_prev;
	unsigned short type;

	/*
	 *	Atomically check and mark our BUSY state. 
	 */
	// 防止重入
	if (set_bit(1, (void*)&in_bh))
		return;

	/*
	 *	Can we send anything now? We want to clear the
	 *	decks for any more sends that get done as we
	 *	process the input.
	 */
	// 发送缓存的数据包
	dev_transmit();
  
	/*
	 *	Any data left to process. This may occur because a
	 *	mark_bh() is done after we empty the queue including
	 *	that from the device which does a mark_bh() just after
	 */

	cli();
	
	/*
	 *	While the queue is not empty
	 */
	// backlog队列的数据包来源于网卡收到的数据包	 
	while((skb=skb_dequeue(&backlog))!=NULL)
	{
     
		/*
		 *	We have a packet. Therefore the queue has shrunk
		 */
  		backlog_size--;

		sti();
		
	       /*
		*	Bump the pointer to the next structure.
		*	This assumes that the basic 'skb' pointer points to
		*	the MAC header, if any (as indicated by its "length"
		*	field).  Take care now!
		*/
		// 指向ip头
		skb->h.raw = skb->data + skb->dev->hard_header_len;
		// 减去mac头长度
		skb->len -= skb->dev->hard_header_len;

	       /*
		* 	Fetch the packet protocol ID.  This is also quite ugly, as
		* 	it depends on the protocol driver (the interface itself) to
		* 	know what the type is, or where to get it from.  The Ethernet
		* 	interfaces fetch the ID from the two bytes in the Ethernet MAC
		*	header (the h_proto field in struct ethhdr), but other drivers
		*	may either use the ethernet ID's or extra ones that do not
		*	clash (eg ETH_P_AX25). We could set this before we queue the
		*	frame. In fact I may change this when I have time.
		*/
		// 判断上层协议
		type = skb->dev->type_trans(skb, skb->dev);

		/*
		 *	We got a packet ID.  Now loop over the "known protocols"
		 *	table (which is actually a linked list, but this will
		 *	change soon if I get my way- FvK), and forward the packet
		 *	to anyone who wants it.
		 *
		 *	[FvK didn't get his way but he is right this ought to be
		 *	hashed so we typically get a single hit. The speed cost
		 *	here is minimal but no doubt adds up at the 4,000+ pkts/second
		 *	rate we can hit flat out]
		 */
		pt_prev = NULL;
		for (ptype = ptype_base; ptype != NULL; ptype = ptype->next) 
		{
     
			if ((ptype->type == type || ptype->type == htons(ETH_P_ALL)) && (!ptype->dev || ptype->dev==skb->dev))
			{
     
				/*
				 *	We already have a match queued. Deliver
				 *	to it and then remember the new match
				 */
				// 如果有匹配的项则要单独复制一份skb
				if(pt_prev)
				{
     
					struct sk_buff *skb2;

					skb2=skb_clone(skb, GFP_ATOMIC);

					/*
					 *	Kick the protocol handler. This should be fast
					 *	and efficient code.
					 */

					if(skb2)
						pt_prev->func(skb2, skb->dev, pt_prev);
				}
				/* Remember the current last to do */
				// 记录最近匹配的项
				pt_prev=ptype;
			}
		} /* End of protocol list loop */
		
		/*
		 *	Is there a last item to send to ?
		 */
		// 把数据包交给上层协议处理,大于一个匹配项,则把skb复制给最后一项,否则销毁skb
		if(pt_prev)
			pt_prev->func(skb, skb->dev, pt_prev);
		/*
		 * 	Has an unknown packet has been received ?
		 */
	 
		else
			kfree_skb(skb, FREE_WRITE);

		/*
		 *	Again, see if we can transmit anything now. 
		 *	[Ought to take this out judging by tests it slows
		 *	 us down not speeds us up]
		 */

		dev_transmit();
		cli();
  	}	/* End of queue loop */
  	
  	/*
  	 *	We have emptied the queue
  	 */
  	// 处理完毕 
  	in_bh = 0;
	sti();
	
	/*
	 *	One last output flush.
	 */
	 
	dev_transmit();
}

这里假设上层协议是ip,ip层处理函数是ip_rcv,代码如下

/*
 *	This function receives all incoming IP datagrams.
 */

int ip_rcv(struct sk_buff *skb, struct device *dev, struct packet_type *pt)
{
     
	struct iphdr *iph = skb->h.iph;
	struct sock *raw_sk=NULL;
	unsigned char hash;
	unsigned char flag = 0;
	unsigned char opts_p = 0;	/* Set iff the packet has options. */
	struct inet_protocol *ipprot;
	static struct options opt; /* since we don't use these yet, and they
				take up stack space. */
	int brd=IS_MYADDR;
	int is_frag=0;
#ifdef CONFIG_IP_FIREWALL
	int err;
#endif	

	ip_statistics.IpInReceives++;

	/*
	 *	Tag the ip header of this packet so we can find it
	 */

	skb->ip_hdr = iph;

	/*
	 *	Is the datagram acceptable?
	 *
	 *	1.	Length at least the size of an ip header
	 *	2.	Version of 4
	 *	3.	Checksums correctly. [Speed optimisation for later, skip loopback checksums]
	 *	(4.	We ought to check for IP multicast addresses and undefined types.. does this matter ?)
	 */
	// 参数检查
	if (skb->len<sizeof(struct iphdr) || iph->ihl<5 || iph->version != 4 ||
		skb->len<ntohs(iph->tot_len) || ip_fast_csum((unsigned char *)iph, iph->ihl) !=0)
	{
     
		ip_statistics.IpInHdrErrors++;
		kfree_skb(skb, FREE_WRITE);
		return(0);
	}
	
	/*
	 *	See if the firewall wants to dispose of the packet. 
	 */
// 配置了防火墙,则先检查是否符合防火墙的过滤规则,否则则丢掉
#ifdef	CONFIG_IP_FIREWALL
	
	if ((err=ip_fw_chk(iph,dev,ip_fw_blk_chain,ip_fw_blk_policy, 0))!=1)
	{
     
		if(err==-1)
			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0, dev);
		kfree_skb(skb, FREE_WRITE);
		return 0;	
	}

#endif
	
	/*
	 *	Our transport medium may have padded the buffer out. Now we know it
	 *	is IP we can trim to the true length of the frame.
	 */

	skb->len=ntohs(iph->tot_len);

	/*
	 *	Next analyse the packet for options. Studies show under one packet in
	 *	a thousand have options....
	 */
	// ip头超过20字节,说明有选项
	if (iph->ihl != 5)
	{
       	/* Fast path for the typical optionless IP packet. */
		memset((char *) &opt, 0, sizeof(opt));
		if (do_options(iph, &opt) != 0)
			return 0;
		opts_p = 1;
	}

	/*
	 *	Remember if the frame is fragmented.
	 */
	// 非0则说明是分片	
	if(iph->frag_off)
	{
     	
		// 是否禁止分片,是的话is_frag等于1
		if (iph->frag_off & 0x0020)
			is_frag|=1;
		/*
		 *	Last fragment ?
		 */
		// 非0说明有偏移,即不是第一个块分片
		if (ntohs(iph->frag_off) & 0x1fff)
			is_frag|=2;
	}
	
	/*
	 *	Do any IP forwarding required.  chk_addr() is expensive -- avoid it someday.
	 *
	 *	This is inefficient. While finding out if it is for us we could also compute
	 *	the routing table entry. This is where the great unified cache theory comes
	 *	in as and when someone implements it
	 *
	 *	For most hosts over 99% of packets match the first conditional
	 *	and don't go via ip_chk_addr. Note: brd is set to IS_MYADDR at
	 *	function entry.
	 */

	if ( iph->daddr != skb->dev->pa_addr && (brd = ip_chk_addr(iph->daddr)) == 0)
	{
     
		/*
		 *	Don't forward multicast or broadcast frames.
		 */

		if(skb->pkt_type!=PACKET_HOST || brd==IS_BROADCAST)
		{
     
			kfree_skb(skb,FREE_WRITE);
			return 0;
		}

		/*
		 *	The packet is for another target. Forward the frame
		 */

#ifdef CONFIG_IP_FORWARD
		ip_forward(skb, dev, is_frag);
#else
/*		printk("Machine %lx tried to use us as a forwarder to %lx but we have forwarding disabled!\n",
			iph->saddr,iph->daddr);*/
		ip_statistics.IpInAddrErrors++;
#endif
		/*
		 *	The forwarder is inefficient and copies the packet. We
		 *	free the original now.
		 */

		kfree_skb(skb, FREE_WRITE);
		return(0);
	}
	
#ifdef CONFIG_IP_MULTICAST	

	if(brd==IS_MULTICAST && iph->daddr!=IGMP_ALL_HOSTS && !(dev->flags&IFF_LOOPBACK))
	{
     
		/*
		 *	Check it is for one of our groups
		 */
		struct ip_mc_list *ip_mc=dev->ip_mc_list;
		do
		{
     
			if(ip_mc==NULL)
			{
     	
				kfree_skb(skb, FREE_WRITE);
				return 0;
			}
			if(ip_mc->multiaddr==iph->daddr)
				break;
			ip_mc=ip_mc->next;
		}
		while(1);
	}
#endif
	/*
	 *	Account for the packet
	 */
	 
#ifdef CONFIG_IP_ACCT
	ip_acct_cnt(iph,dev, ip_acct_chain);
#endif	

	/*
	 * Reassemble IP fragments.
 	 */
	// 分片重组 
	if(is_frag)
	{
     
		/* Defragment. Obtain the complete packet if there is one */
		skb=ip_defrag(iph,skb,dev);
		if(skb==NULL)
			return 0;
		skb->dev = dev;
		iph=skb->h.iph;
	}
	
		 

	/*
	 *	Point into the IP datagram, just past the header.
	 */

	skb->ip_hdr = iph;
	// 往上层传之前先指向上层的头
	skb->h.raw += iph->ihl*4;
	
	/*
	 *	Deliver to raw sockets. This is fun as to avoid copies we want to make no surplus copies.
	 */
	 
	hash = iph->protocol & (SOCK_ARRAY_SIZE-1);
	
	/* If there maybe a raw socket we must check - if not we don't care less */
	if((raw_sk=raw_prot.sock_array[hash])!=NULL)
	{
     
		struct sock *sknext=NULL;
		struct sk_buff *skb1;
		// 找对应的socket
		raw_sk=get_sock_raw(raw_sk, hash,  iph->saddr, iph->daddr);
		if(raw_sk)	/* Any raw sockets */
		{
     
			do
			{
     
				/* Find the next */
				// 从队列中raw_sk的下一个节点开始找满足条件的socket,因为之前的的肯定不满足条件了
				sknext=get_sock_raw(raw_sk->next, hash, iph->saddr, iph->daddr);
				// 复制一份skb给符合条件的socket
				if(sknext)
					skb1=skb_clone(skb, GFP_ATOMIC);
				else
					break;	/* One pending raw socket left */
				if(skb1)
					raw_rcv(raw_sk, skb1, dev, iph->saddr,iph->daddr);
				// 记录最近符合条件的socket
				raw_sk=sknext;
			}
			while(raw_sk!=NULL);
			/* Here either raw_sk is the last raw socket, or NULL if none */
			/* We deliver to the last raw socket AFTER the protocol checks as it avoids a surplus copy */
		}
	}
	
	/*
	 *	skb->h.raw now points at the protocol beyond the IP header.
	 */
	// 传给ip层的上传协议
	hash = iph->protocol & (MAX_INET_PROTOS -1);
	// 获取哈希链表中的一个队列,遍历
	for (ipprot = (struct inet_protocol *)inet_protos[hash];ipprot != NULL;ipprot=(struct inet_protocol *)ipprot->next)
	{
     
		struct sk_buff *skb2;

		if (ipprot->protocol != iph->protocol)
			continue;
       /*
	* 	See if we need to make a copy of it.  This will
	* 	only be set if more than one protocol wants it.
	* 	and then not for the last one. If there is a pending
	*	raw delivery wait for that
	*/	
		/*
			是否需要复制一份skb,copy字段这个版本中都是0,有多个一样的协议才需要复制一份,
			否则一份就够,因为只有一个协议需要使用,raw_sk的值是上面代码决定的
		*/
		if (ipprot->copy || raw_sk)
		{
     
			skb2 = skb_clone(skb, GFP_ATOMIC);
			if(skb2==NULL)
				continue;
		}
		else
		{
     
			skb2 = skb;
		}
		// 找到了处理该数据包的上层协议
		flag = 1;

	       /*
		* Pass on the datagram to each protocol that wants it,
		* based on the datagram protocol.  We should really
		* check the protocol handler's return values here...
		*/
		ipprot->handler(skb2, dev, opts_p ? &opt : 0, iph->daddr,
				(ntohs(iph->tot_len) - (iph->ihl * 4)),
				iph->saddr, 0, ipprot);

	}

	/*
	 * All protocols checked.
	 * If this packet was a broadcast, we may *not* reply to it, since that
	 * causes (proven, grin) ARP storms and a leakage of memory (i.e. all
	 * ICMP reply messages get queued up for transmission...)
	 */

	if(raw_sk!=NULL)	/* Shift to last raw user */
		raw_rcv(raw_sk, skb, dev, iph->saddr, iph->daddr);
	// 没找到处理该数据包的上层协议,报告错误
	else if (!flag)		/* Free and report errors */
	{
     	
		// 不是广播不是多播,发送目的地不可达的icmp包
		if (brd != IS_BROADCAST && brd!=IS_MULTICAST)
			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0, dev);
		kfree_skb(skb, FREE_WRITE);
	}

	return(0);
}

ip层遍历inet_protos数组,找到和ip头中指定的协议相等的协议,把数据包交给该节点处理。比如tcp协议对应的处理函数是tcp_rcv,该函数把skb挂载到socket的接收队列等待读取,获取建立一个连接等。应用层使用read函数进行读取的时候,就从接收队列摘下一个skb。至此,一个数据包从网卡到应用层的过程就结束了。

你可能感兴趣的:(linux1.2.13源码分析)