首先,我们先明确Linux TCP实现中的3个事实,这些叫做“事实”的论述是颇为主观的,它们只是我积累下来的所谓“事实”,这些事实即:
1.Linux作为数据接收端的时候,默认不会Delay ACK,直到...struct packet *new_tcp_packet(int address_family,
enum direction_t direction,
enum ip_ecn_t ecn,
const char *flags,
u32 start_sequence,
u16 tcp_payload_bytes,
u32 ack_sequence,
s32 window,
const struct tcp_options *tcp_options,
char **error)
{
...
const int ip_bytes =
ip_header_bytes + tcp_header_bytes + tcp_payload_bytes;
// 增加一个static变量,每次递增,以可打印的ASCII码为起始。
static int pad = 70;
...
packet = packet_new(ip_bytes);
memset(packet->buffer, 0, ip_bytes);
if (tcp_payload_bytes) {
memset(packet->buffer + ip_header_bytes + tcp_header_bytes, pad++, tcp_payload_bytes);
memset(packet->buffer + ip_header_bytes + tcp_header_bytes + tcp_payload_bytes -1, 0, 0);
printf("send:\n%s\n-end send-\n", packet->buffer + ip_header_bytes + tcp_header_bytes);
}
...
}
static int syscall_read(struct state *state, struct syscall_spec *syscall,
struct expression_list *args, char **error)
{
...
result = read(live_fd, buf, count);
printf("payload:\n%s\n", buf);
...
}
修改后直接make之,很容易便生成了新的packetdrill。接下来我们将用这个新编译的packetdrill进行一切实验。首先我为事实1和事实2设计了以下的脚本,执行之并抓包:
0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
0.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
0.000 bind(3, ..., ...) = 0
0.000 listen(3, 1) = 0
0.000 < S 0:0(0) win 32792
0.000 > S. 0:0(0) ack 1 <...>
0.000 < . 1:1(0) ack 1 win 257
0.000 accept(3, ..., ...) = 4
// 以下开启Delay ACK
0.100 < . 1:11(10) ack 1 win 257
0.100 write(4, ..., 20) = 20
0.100 < . 11:11(0) ack 21 win 257
0.100 < . 11:21(10) ack 21 win 257
1.000 read(4, ..., 20) = 20
// 由于已经Delay ACK了一次,其Oneshot特性关闭了自身,以下再次触发开启Delay ACK的序列
1.100 < . 21:31(10) ack 21 win 257
1.100 write(4, ..., 20) = 20
1.100 < . 31:31(0) ack 41 win 257
1.100 < . 26:41(15) ack 41 win 257 //这一次发送一个overlap的数据段,部分重叠旧数据,部分包含新数据
2.000 read(4, ..., 20) = 20
// Receiver ACKs all data.
10.000 < . 1:1(0) ack 6001 win 257
在分析抓包之前,我们先看一下打印输出:
这个打印输出基本上与事实2所描述的是一致的,数据段26:31之前已经被收到,已经被ACK过了,因此接收了其包含的部分新数据31:41,旧数据26:31直接丢弃!然后再看下抓包:
0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
0.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
0.000 bind(3, ..., ...) = 0
0.000 listen(3, 1) = 0
0.000 < S 0:0(0) win 32792
0.000 > S. 0:0(0) ack 1 <...>
0.000 < . 1:1(0) ack 1 win 257
0.000 accept(3, ..., ...) = 4
0.100 < . 21:31(10) ack 1 win 257 // 模拟1:21丢失
0.100 < . 6:21(15) ack 1 win 257 // 模拟重传6:21,这些数据从未被接收过
0.100 < . 16:31(15) ack 1 win 257 // 模拟重传16:31,这些数据部分(21:31)被接收过
0.100 < . 1:11(10) ack 1 win 257 // 模拟重传1:11,这些数据部分(6:11)被接收过,部分(1:6)是新数据
1.000 read(4, ..., 30) = 30
// Receiver ACKs all data.
10.000 < . 1:1(0) ack 6001 win 257
0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
0.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
0.000 bind(3, ..., ...) = 0
0.000 listen(3, 1) = 0
0.000 < S 0:0(0) win 32792
0.000 > S. 0:0(0) ack 1 <...>
0.000 < . 1:1(0) ack 1 win 257
0.000 accept(3, ..., ...) = 4
// 以下两句开启交互pingpong行为,进入Delay ACK模式
0.100 < . 1:11(10) ack 1 win 257
0.100 write(4, ..., 20) = 20
0.100 < . 11:11(0) ack 21 win 257
// 首先传输一个10字节的段
0.100 < . 11:21(10) ack 21 win 257
// 然后部分重叠地推进数据接收
0.100 < . 16:26(10) ack 21 win 257
1.000 read(4, ..., 25) = 25
// Receiver ACKs all data.
10.000 < . 1:1(0) ack 6001 win 257
以下是抓包以及输出结果的分析,首先看抓包:
我构造了本节最后一个脚本来印证结论:
0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
0.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
0.000 bind(3, ..., ...) = 0
0.000 listen(3, 1) = 0
0.000 < S 0:0(0) win 32792
0.000 > S. 0:0(0) ack 1 <...>
0.000 < . 1:1(0) ack 1 win 257
0.000 accept(3, ..., ...) = 4
0.100 < . 21:31(10) ack 1 win 257
0.100 < . 26:31(5) ack 1 win 257
0.100 < . 21:26(5) ack 1 win 257
0.100 < . 1:16(15) ack 1 win 257
0.100 < . 11:26(15) ack 1 win 257
//0.100 < . 11:21(10) ack 1 win 257
1.000 read(4, ..., 30) = 30
// Receiver ACKs all data.
10.000 < . 1:1(0) ack 6001 win 257
0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
0.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
0.000 bind(3, ..., ...) = 0
0.000 listen(3, 1) = 0
0.000 < S 0:0(0) win 32792
0.000 > S. 0:0(0) ack 1 <...>
0.000 < . 1:1(0) ack 1 win 257
0.000 accept(3, ..., ...) = 4
// 以下开启Delay ACK
0.100 < . 1:11(10) ack 1 win 257
0.100 write(4, ..., 20) = 20
0.100 < . 11:11(0) ack 21 win 257
//原始数据段丢失 0.100 < . 11:21(10) ack 21 win 257 // 注意时间戳
//第一次重传数据段丢失 0.080 < . 11:21(10) ack 21 win 257 // 注意时间戳,此例中RTO约80ms
// ...时间戳指数退避
1.100 < . 11:21(10) ack 21 win 257 // 延迟1秒发送,看起来像是原始数据段丢了,这里是指数退避好几次后成功重发的
10.000 < . 1:1(0) ack 6001 win 257