TCP包的校验和
--
1) TCP包的错误检测使用16位加和校验. 除了TCP包本身,
TCP校验数据块还包括源IP地址,目的IP地址, TCP包长度, TCP协议号组成的12字节伪头标.
2) 校验和为16位字补码和, 数据块长度为奇数时, 数据块末尾添零处理.
校验和的计算与顺序无关, 可以从数据块开始计算, 也可以从未尾开始向前计算.
3) 为了提高计算效率, TCP包的校验和并不一次算出,
而是采用32位部分累加和(sk->csum)进行增量计算.
csum_partial()用来计算数据块的32位部分累加和, 累加和可以用csum_fold()折叠为16位校验和.
csum_partial_copy_nocheck()可在拷贝用户数据的同时计算出它的部分累加和.
4) 为了加快执行速度, csum_partial()将8个32位字分为一组用分立的指令进行32位累加,
这样可加长循环体中指令长度, 提高CPU指令流水线的效率.
5) 并不是所有的TCP包都必须校验, skb->ip_summed用来控制校验操作.
对于loopback设备的收发包, 其skb->ip_summed设为HECKSUM_UNNECESSARY, 忽略校验过程.
--
static int tcp_v4_checksum_init(struct sk_buff *skb) TCP包接收校验的初始化
{
if (skb->ip_summed ==CHECKSUM_HW) { 如果TCP包本身的校验已经由硬件链路层完成
if(tcp_v4_check(skb->h.th,skb->len,skb->nh.iph->saddr, 附加伪头标进行校验
skb->nh.iph->daddr,skb->csum)) {
NETDEBUG(printk(KERN_DEBUG"hw tcp v4 csum failed\n"));
return -1;
}
skb->ip_summed =CHECKSUM_UNNECESSARY;
} else {
if (skb->len <= 76) { 如果TCP包全长不超过76字节
if(tcp_v4_check(skb->h.th,skb->len,skb->nh.iph->saddr,
skb->nh.iph->daddr,
csum_partial((char *)skb->h.th, skb->len,0))) 进行全包校验
return -1;
skb->ip_summed =CHECKSUM_UNNECESSARY;
} else { 首先生成伪头标的部分累加和
skb->csum =~tcp_v4_check(skb->h.th,skb->len,skb->nh.iph->saddr,
skb->nh.iph->daddr,0);
}
}
return 0;
}
static __inline__ int tcp_checksum_complete(struct sk_buff *skb)
基于伪头标累加和,完成全包校验
{
return skb->ip_summed !=CHECKSUM_UNNECESSARY &&
__tcp_checksum_complete(skb);
}
static __inline__ int __tcp_checksum_complete(struct sk_buff *skb)
{
return (unsignedshort)csum_fold(csum_partial(skb->h.raw, skb->len, skb->csum));
}
/* This routine computes an IPv4 TCP checksum. */
void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
struct sk_buff *skb) 基于TCP用户数据的中间累加和, 生成TCP包校验码
{
th->check = tcp_v4_check(th,len, sk->saddr, sk->daddr,
csum_partial((char *)th, th->doff<<2,skb->csum));
}
/*
* Calculate(/check) TCP checksum
*/
static __inline__ u16 tcp_v4_check(struct tcphdr *th, int len,
unsigned long saddr, unsigned long daddr,
unsigned long base)
{
returncsum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base);
生成包含96位伪头标的校验和
}
/*
* computes the checksum of the TCP/UDPpseudo-header
* returns a 16-bit checksum, alreadycomplemented
*/
static inline unsigned short int csum_tcpudp_magic(unsigned long saddr,
unsigned long daddr,
unsigned short len,
unsigned short proto,
unsigned int sum)
{
returncsum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
}
static inline unsigned long csum_tcpudp_nofold(unsigned long saddr,
unsigned long daddr,
unsigned short len,
unsigned short proto,
unsigned int sum) 生成包含伪头标的累加和
{
__asm__("
addl % 1, % 0
adcl % 2, % 0
adcl % 3, % 0
adcl $0, % 0
"
: "=r" (sum)
: "g" (daddr),"g"(saddr), "g"((ntohs(len)<<16)+proto*256),"0"(sum));
return sum;
}
/*
* Folda partial checksum
*/
static inline unsigned int csum_fold(unsigned int sum) 将32位累加和折叠成16位校验和
{
__asm__("
addl % 1, % 0
adcl $0xffff, % 0
"
: "=r" (sum)
: "r" (sum<< 16), "0" (sum & 0xffff0000)
);
return (~sum) >> 16;
}
extern __inline__
unsigned int csum_partial_copy_nocheck ( const char *src, char *dst,
intlen, int sum) 在拷贝用户数据时同时计算累加和
{
return csum_partial_copy_generic (src, dst, len, sum, NULL, NULL);
}
; arch/i386/lib/checksum.S:
/*
* computes a partial checksum, e.g. forTCP/UDP fragments
*/
/*
unsigned int csum_partial(const unsigned char * buff, int len, unsigned intsum)
*/
.text
.align 4
.globl csum_partial 计算32位中间累加和
/*
* Experiments with Ethernet and SLIP connections show that buff
* is aligned on either a 2-byte or 4-byte boundary. We get at
* least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
* Fortunately, it is easy to convert 2-byte alignment to 4-byte
* alignment for the unrolled loop.
*/
csum_partial:
pushl % esi
pushl % ebx
movl 20(% esp),% eax # Function arg: unsigned int sum
movl 16(% esp),% ecx # Function arg: int len
movl 12(% esp),% esi # Function arg: unsigned char *buff
testl $2, % esi # Check alignment.
jz 2f #Jump if alignment is ok.
subl $2, % ecx # Alignment uses up two bytes.
jae 1f #Jump if we had at least two bytes.
addl $2, % ecx # ecx was < 2. Deal with it.
jmp 4f
1: movw (% esi), % bx
addl $2, % esi
addw % bx, % ax
adcl $0, % eax
2:
movl % ecx, % edx 保存长度
shrl $5, % ecx 除以32
jz 2f 如果长度小于32
testl % esi, % esi 清除进位标志
1: movl (% esi), % ebx 加载32位字
adcl % ebx, % eax 加到累加和上
movl 4(% esi), % ebx 加载下一32位字
adcl % ebx, % eax 继续累加
movl 8(% esi), % ebx
adcl % ebx, % eax
movl 12(% esi), % ebx
adcl % ebx, % eax
movl 16(% esi), % ebx
adcl % ebx, % eax
movl 20(% esi), % ebx
adcl % ebx, % eax
movl 24(% esi), % ebx
adcl % ebx, % eax
movl 28(% esi), % ebx
adcl % ebx, % eax
lea 32(% esi), % esi 连续累加8个32位字
dec % ecx
jne 1b
adcl $0, % eax 累加最后的进位标志
2: movl % edx, % ecx
andl $0x1c, % edx 取剩余字节数量
je 4f
shrl $2, % edx # This clears CF 除以4得到剩余32位字数
3: adcl (% esi), % eax
lea 4(% esi), % esi
dec % edx
jne 3b
adcl $0, % eax 加上剩余的32位字
4: andl $3, % ecx 取剩余的字节数
jz 7f
cmpl $2, % ecx
jb 5f 如果只剩1字节
movw (% esi),% cx 取剩余的2字节
leal 2(% esi),% esi
je 6f
shll $16,% ecx 移到高16位
5: movb (% esi),% cl 取仅剩的1字节
6: addl % ecx,% eax 累加最后一字
adcl $0, % eax 累加进位标志
7:
popl % ebx
popl % esi
ret
/*
unsigned int csum_partial_copy_generic (const char *src, char *dst,
int len, int sum, int *src_err_ptr, int*dst_err_ptr)
*/
/*
* Copy from ds while checksumming,otherwise like csum_partial
*
* The macros SRC and DST specify thetype of access for the instruction.
* thus we can call a custom exceptionhandler for all access types.
*
* FIXME: could someone double-checkwhether I haven't mixed up some SRC and
* DST definitions? It's damn hard to triggerall cases. I hope I got
* them all but there's no guarantee.
*/
#define SRC(y...) \
9999: y; \
.section __ex_table, "a"; \
.long 9999b, 6001f ; \
.previous
#define DST(y...) \
9999: y; \
.section __ex_table, "a"; \
.long 9999b, 6002f ; \
.previous
.align 4
.globl csum_partial_copy_generic
#define ARGBASE 16
#define FP 12
csum_partial_copy_generic:
subl $4,% esp
pushl % edi
pushl % esi
pushl % ebx
movl ARGBASE+16(% esp),% eax # sum
movl ARGBASE+12(% esp),% ecx # len
movl ARGBASE+4(% esp),% esi # src
movl ARGBASE+8(% esp),% edi # dst
testl $2, % edi # Check alignment.
jz 2f # Jump if alignment is ok.
subl $2, % ecx # Alignment uses up twobytes.
jae 1f #Jump if we had at least two bytes.
addl $2, % ecx # ecx was < 2. Deal with it.
jmp 4f
SRC(1: movw (% esi), % bx )
addl $2, % esi
DST( movw % bx, (% edi) )
addl $2, % edi
addw % bx, % ax
adcl $0, % eax
2:
movl % ecx, FP(% esp)
shrl $5, % ecx
jz 2f
testl % esi, % esi
SRC(1: movl (% esi), % ebx )
SRC( movl 4(% esi), % edx )
adcl % ebx, % eax
DST( movl % ebx, (% edi) )
adcl % edx, % eax
DST( movl % edx, 4(% edi) )
SRC( movl 8(% esi), % ebx )
SRC( movl 12(% esi), % edx )
adcl % ebx, % eax
DST( movl % ebx, 8(% edi) )
adcl % edx, % eax
DST( movl % edx, 12(% edi) )
SRC( movl 16(% esi), % ebx )
SRC( movl 20(% esi), % edx )
adcl % ebx, % eax
DST( movl % ebx, 16(% edi) )
adcl % edx, % eax
DST( movl % edx, 20(% edi) )
SRC( movl 24(% esi), % ebx )
SRC( movl 28(% esi), % edx )
adcl % ebx, % eax
DST( movl % ebx, 24(% edi) )
adcl % edx, % eax
DST( movl % edx, 28(% edi) )
lea 32(% esi), % esi
lea 32(% edi), % edi
dec % ecx
jne 1b
adcl $0, % eax
2: movl FP(% esp), % edx
movl % edx, % ecx
andl $0x1c, % edx
je 4f
shrl $2, % edx # This clears CF
SRC(3: movl (% esi), % ebx )
adcl % ebx, % eax
DST( movl % ebx, (% edi) )
lea 4(% esi), % esi
lea 4(% edi), % edi
dec % edx
jne 3b
adcl $0, % eax
4: andl $3, % ecx
jz 7f
cmpl $2, % ecx
jb 5f
SRC( movw (% esi), % cx )
leal 2(% esi), % esi
DST( movw % cx, (% edi) )
leal 2(% edi), % edi
je 6f
shll $16,% ecx
SRC(5: movb (% esi), % cl )
DST( movb % cl, (% edi) )
6: addl % ecx, % eax
adcl $0, % eax
7:
5000:
# Exception handler:
.section .fixup, "ax"
6001:
movl ARGBASE+20(% esp), % ebx # src_err_ptr
movl $-EFAULT, (% ebx)
# zero the complete destination -computing the rest
# is too much work
movl ARGBASE+8(% esp), % edi # dst
movl ARGBASE+12(% esp), % ecx # len
xorl % eax,% eax
rep ; stosb
jmp 5000b
6002:
movl ARGBASE+24(% esp), % ebx # dst_err_ptr
movl $-EFAULT,(% ebx)
jmp 5000b
.previous
popl % ebx
popl % esi
popl % edi
popl % ecx # equivalent to addl $4,% esp
ret