我们知道,字节序分为big-endian 和 little-endian ,采用大端的机器有IBM体系结构,相反intel体系结构主机则采用小端。另外我们需要知道的是,在网络编程中,字节序为大端,所以我们要实现主机字节序到网络字节序的转换。这里有一个需要说明的是,为什么网络字节序需要采用大端。主要是大端有一个特点,我们书写某个数的16进制形式(从左至右:高字节->低字节)和内存中存储形式一样的(从左至右:低地址->高地址)是一样的,外观一样,所以网络字节序好理解(个人观点,呵呵),从括号中我们可以知道。
(附:在网络传输中,采用big-endian序,对于0x0A0B0C0D ,传输顺序就是0A 0B 0C 0D(传输的时候从低地址开始) ,因此big-endian作为network byte order,little-endian作为host byte order。为什么X86存储会使用little-endian,起初我想对于位运算,尤其是位移运算,little-endian很方便,但转念一想,big-endian也方便啊,无非是左移和右移的区别而已,但little-endian的优势在于unsigned char/short/int/long类型转换时,存储位置无需改变。如short a = 0x1234;小字节序主机存储内存如下:
add add+1
34, 12
2011-07-22更新)
int b = (int)a 后,b内存存储为:
add add+3
34 , 12 , 00 , 00
可以知道,类型转换非常简单,只在后面添加0x0000即可)
大端字节序即 最重要的字节(高位,也称为the most significance of bytes)存放在低地址(前面的地址,即前面),高位在前,也就是我们书写的形式。
比如:0x12345678 。
如果采用大端,在内存中存放如下:
add add+3
12 , 34 , 56 , 78
和我们书写形式是一样的。
如果采用小端,将存放如下:
add add+3
78 , 56 , 34 , 12
总结如下:大端即高位(字节)在前(低地址),小端即低位(字节)在前(低地址)
有了上面的讨论,本文具体讨论下glibc下面字节序的相关实现机制。本文主要涉及的库文件包括 :
/usr/include/bits/endian.h
/usr/include/endian.h
/usr/include/bits/byteswap.h
/usr/include/byteswap.h
////////////////////////////////////////////////////////////////////////////////////
/usr/include/bits/endian.h
/* This file defines `__BYTE_ORDER' for the particular machine. */
该文件主要定义了具体的机器的字节序,如对于Intel i386是小端字节序
1 /* i386 is little-endian. */
2
3 #ifndef _ENDIAN_H
4 # error "Never use
5 #endif
//以上的预编译指令用来阻止直接包含bits/endian.h头文件,因为宏 _ENDIAN_H是在/usr/include/endian.h定义的,如果没有包含/usr/include/endian.h文件,则对应的宏没定义,通过#error产生预编译错误,也就是说,系统提供给外部的接口只能是
include/endian.h 头文件 ,与机器无关,而bits/endian.h 文件是于与特定机器相关的。这也就提高了程序的可移植性,因为系统提供给外部的头文件include/endian.h是与系统无关的。
6
7 #define __BYTE_ORDER __LITTLE_ENDIAN
////////////////////////////////////////////////////////////////////////////////////
///usr/include/endian.h文件内容如下:
19 #ifndef _ENDIAN_H
20 #define _ENDIAN_H 1
21
22 #include
23
24 /* Definitions for byte order, according to significance of bytes(高位字节),
25 from low addresses to high addresses. The value is what you get by
26 putting '4' in the most significant byte, '3' in the second most
27 significant byte, '2' in the second least significant byte, and '1'
28 in the least significant byte, and then writing down one digit for
29 each byte, starting with the byte at the lowest address at the left,
30 and proceeding to the byte with the highest address at the right. */
31
32 #define __LITTLE_ENDIAN 1234
33 #define __BIG_ENDIAN 4321
34 #define __PDP_ENDIAN 3412
35
36 /* This file defines `__BYTE_ORDER' for the particular machine. */
37 #include
38
39 /* Some machines may need to use a different endianness for floating point
40 values. */
41 #ifndef __FLOAT_WORD_ORDER
42 # define __FLOAT_WORD_ORDER __BYTE_ORDER
43 #endif
44
45 #ifdef __USE_BSD
46 # define LITTLE_ENDIAN __LITTLE_ENDIAN
47 # define BIG_ENDIAN __BIG_ENDIAN
48 # define PDP_ENDIAN __PDP_ENDIAN
49 # define BYTE_ORDER __BYTE_ORDER
50 #endif
51
52 #if __BYTE_ORDER == __LITTLE_ENDIAN
53 # define __LONG_LONG_PAIR(HI, LO) LO, HI
54 #elif __BYTE_ORDER == __BIG_ENDIAN
55 # define __LONG_LONG_PAIR(HI, LO) HI, LO
56 #endif
57
58
59 #ifdef __USE_BSD 以下宏定义只对BSD系统有关,其它系统无这些定义
60 /* Conversion interfaces. */
61 # include
62
63 # if __BYTE_ORDER == __LITTLE_ENDIAN
64 # define htobe16(x) __bswap_16 (x) // htobe16 it means host to big-endian 16bits
65 # define htole16(x) (x)
66 # define be16toh(x) __bswap_16 (x)
67 # define le16toh(x) (x)
68
69 # define htobe32(x) __bswap_32 (x)
70 # define htole32(x) (x)
71 # define be32toh(x) __bswap_32 (x)
72 # define le32toh(x) (x)
73
74 # define htobe64(x) __bswap_64 (x)
75 # define htole64(x) (x)
76 # define be64toh(x) __bswap_64 (x)
77 # define le64toh(x) (x)
78 # else
79 # define htobe16(x) (x)
80 # define htole16(x) __bswap_16 (x)
81 # define be16toh(x) (x)
82 # define le16toh(x) __bswap_16 (x)
83
84 # define htobe32(x) (x)
85 # define htole32(x) __bswap_32 (x)
86 # define be32toh(x) (x)
87 # define le32toh(x) __bswap_32 (x)
88
89 # define htobe64(x) (x)
90 # define htole64(x) __bswap_64 (x)
91 # define be64toh(x) (x)
92 # define le64toh(x) __bswap_64 (x)
93 # endif
94 #endif
95
96 #endif /* endian.h */
////////////////////////////////////////////////////////////////////////////////////
/usr/include/byteswap.h
19 #ifndef _BYTESWAP_H
20 #define _BYTESWAP_H 1
21
22 /* Get the machine specific, optimized definitions. */
23 #include
24
25
26 /* The following definitions must all be macros since otherwise some
27 of the possible optimizations are not possible. */
28
29 /* Return a value with all bytes in the 16 bit argument swapped. */
30 #define bswap_16(x) __bswap_16 (x) //宏定义中引用了bits/byteswap中的宏
31
32 /* Return a value with all bytes in the 32 bit argument swapped. */
33 #define bswap_32(x) __bswap_32 (x)
34
35 #if defined __GNUC__ && __GNUC__ >= 2
36 /* Return a value with all bytes in the 64 bit argument swapped. */
37 # define bswap_64(x) __bswap_64 (x)
38 #endif
39
40 #endif /* byteswap.h */
////////////////////////////////////////////////////////////////////////////////////
/usr/include/bits/byteswap.h文件内容如下:
/* Macros to swap the order of bytes in integer values. */
20
21 #if !defined _BYTESWAP_H && !defined _NETINET_IN_H && !defined _ENDIAN_H
22 # error "Never use
23 #endif
以上语句表明,/usr/include/bits/byteswap.h文件只能包含在/usr/include/byteswap.h以及usr/include/netinet/in.h以及usr/include/endian.h这三个公共对外接口头文件中。注意宏定义的方式:_NETINET_IN_H 表示netinet文件夹下的in.h文件
外部接口头文件中。还有以上条件编译语句形式:defined 宏用来判断后面的宏是否定义。多个条件可以通过 && 连接。
24
25 #ifndef _BITS_BYTESWAP_H
26 #define _BITS_BYTESWAP_H 1
27
28 /* Swap bytes in 16 bit value. */
29 #define __bswap_constant_16(x) /
30 ((((x) >> 8) & 0xff) | (((x) & 0xff) << 8))
/*
以上为宏 __bswap_constant_16(x)具体实现,即16位的字节交换
*/
31
32 #ifdef __GNUC__ //如果编译器是GNU gcc 且版本号大于2
33 # if __GNUC__ >= 2
34 # define __bswap_16(x) /
35 (__extension__ /
36 ({ register unsigned short int __v, __x = (x); /
37 if (__builtin_constant_p (__x)) /
38 __v = __bswap_constant_16 (__x); /
39 else /
40 __asm__ ("rorw $8, %w0" /
41 : "=r" (__v) /
42 : "0" (__x) /
43 : "cc"); /
44 __v; }))
45 # else
46 /* This is better than nothing. */
47 # define __bswap_16(x) /
48 (__extension__ /
49 ({ register unsigned short int __x = (x); __bswap_constant_16 (__x); }))
50 # endif
51 #else
52 static __inline unsigned short int
53 __bswap_16 (unsigned short int __bsx)
54 {
55 return __bswap_constant_16 (__bsx);
56 }
57 #endif
58
59 /* Swap bytes in 32 bit value. */
60 #define __bswap_constant_32(x) /
61 ((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >> 8) | /
62 (((x) & 0x0000ff00) << 8) | (((x) & 0x000000ff) << 24))
63
134 #endif
135
136 #endif /* _BITS_BYTESWAP_H */
以上文件内容,基本上实现了字节序转换的所有的宏(注意没有用函数实现,而是通过宏替代函数的作用),下面我们来看看具体的/usr/include/netinet/in.h文件:
/* Get system-specific definitions. */
356 #include
357
358 /* Functions to convert between host and network byte order.
359
360 Please note that these functions normally take `unsigned long int' or
361 `unsigned short int' values as arguments and also return them. But
362 this was a short-sighted decision since on different systems the types
363 may have different representations but the values are always the same. */
364
365 extern uint32_t ntohl (uint32_t __netlong) __THROW __attribute__ ((__const__));
366 extern uint16_t ntohs (uint16_t __netshort)
367 __THROW __attribute__ ((__const__));
368 extern uint32_t htonl (uint32_t __hostlong)
369 __THROW __attribute__ ((__const__));
370 extern uint16_t htons (uint16_t __hostshort)
371 __THROW __attribute__ ((__const__));
372
373 #include
374
375 /* Get machine dependent optimized versions of byte swapping functions. */
376 #include
377
378 #ifdef __OPTIMIZE__
379 /* We can optimize calls to the conversion functions. Either nothing has
380 to be done or we are using directly the byte-swapping functions which
381 often can be inlined. */
382 # if __BYTE_ORDER == __BIG_ENDIAN
383 /* The host byte order is the same as network byte order,
384 so these functions are all just identity. */
385 # define ntohl(x) (x)
386 # define ntohs(x) (x)
387 # define htonl(x) (x)
388 # define htons(x) (x)
389 # else
390 # if __BYTE_ORDER == __LITTLE_ENDIAN
391 # define ntohl(x) __bswap_32 (x)
392 # define ntohs(x) __bswap_16 (x)
393 # define htonl(x) __bswap_32 (x)
394 # define htons(x) __bswap_16 (x)
395 # endif
396 # endif
397 #endif
可以看到主机到网络字节序的转换全部在该文件中实现了。