本文档的Copyleft归yfydz所有,使用GPL发布,可以自由拷贝,转载,转载时请保持文档的完整性,
12. 数据包处理
pluto收到一个IKE数据包时, 需要对其进行解码操作, 识别该数据包是哪个连接哪个状态的, 如果该
包合法, 将进行状态的迁移处理, 完成IKE协议中的各种定义的操作, 相关定义和函数都在
programs/pluto/demux.c中定义。
所有状态枚举类型:
enum state_kind {
STATE_UNDEFINED=0, /* 0 -- most likely accident */
/* Opportunism states: see "Opportunistic Encryption" 2.2 */
OPPO_ACQUIRE, /* got an ACQUIRE message for this pair */
OPPO_GW_DISCOVERED, /* got TXT specifying gateway */
/* IKE states */
// 主描述状态
STATE_MAIN_R0,
STATE_MAIN_I1,
STATE_MAIN_R1,
STATE_MAIN_I2,
STATE_MAIN_R2,
STATE_MAIN_I3,
STATE_MAIN_R3,
STATE_MAIN_I4,
// 野蛮描述状态
STATE_AGGR_R0,
STATE_AGGR_I1,
STATE_AGGR_R1,
STATE_AGGR_I2,
STATE_AGGR_R2,
// 快速描述状态
STATE_QUICK_R0,
STATE_QUICK_I1,
STATE_QUICK_R1,
STATE_QUICK_I2,
STATE_QUICK_R2,
STATE_INFO,
STATE_INFO_PROTECTED,
// XAUTH认证和配置相关状态
/* Xauth states */
STATE_XAUTH_R0, /* server state has sent request, awaiting reply */
STATE_XAUTH_R1, /* server state has sent success/fail, awaiting reply */
STATE_MODE_CFG_R0, /* these states are used on the responder */
STATE_MODE_CFG_R1,
STATE_MODE_CFG_R2,
STATE_MODE_CFG_I1, /* this is used on the initiator */
STATE_XAUTH_I0, /* client state is awaiting request */
STATE_XAUTH_I1, /* client state is awaiting result code */
STATE_IKE_ROOF
};
12.1 状态微码数据结构
IKE状态变化处理过程是通过一个数组定义的, 这和netfilter中的TCP连接跟踪的状态转换数组类似,
但比那个数组复杂得多, TCP状态连接表数组的每个元素只是一个表示TCP状态类型的整数, 而IKE状
态迁移数组的每个节点是一个状态微码数据结构, 用于描述状态迁移的操作.
struct state_microcode {
// 状态类型: 当前状态, 转换后的下一个状态
enum state_kind state, next_state;
// 标志
lset_t flags;
// 请求的载荷
lset_t req_payloads; /* required payloads (allows just one) */
// 可选载荷
lset_t opt_payloads; /* optional payloads (any mumber) */
/* if not ISAKMP_NEXT_NONE, process_packet will emit HDR with this as np */
// 如果不是ISAKMP_NEXT_NONE, 发送回应包时的第一个输出的载荷
u_int8_t first_out_payload;
// 超时事件类型
enum event_type timeout_event;
// 状态迁移函数, 即收到该包后的处理函数
state_transition_fn *processor;
};
12.2 状态迁移数组
状态迁移数组是一个很大的数组, 数组内元素是按state值顺序排列的,此处不将其全部列出, 只列出
部分内容, 用于举例说明其数组元素定义, 其他内容类推:
#define LELEM(opt) (1ULL << (opt))
static const struct state_microcode state_microcode_table[] = {
// 名称定义
#define PT(n) ISAKMP_NEXT_##n
// ISAKMP_NEXT_##n对应的位
#define P(n) LELEM(PT(n))
/***** Phase 1 Main Mode *****/
/* No state for main_outI1: --> HDR, SA */
/* STATE_MAIN_R0: I1 --> R1
* HDR, SA --> HDR, SA
*/
// 响应方
// 数组第一个元素描述协商响应方收到协商发起方发出第一个数据包
// 当前状态是STATE_MAIN_R0, 处理后将转换为STATE_MAIN_R1, R表示是Response, 响应方
{ STATE_MAIN_R0, STATE_MAIN_R1
// 标志
, SMF_ALL_AUTH | SMF_REPLY
// 请求载荷是ISAKMP_NEXT_SA类型载荷,
// 可选载荷可包含ISAKMP_NEXT_VID和ISAKMP_NEXT_CR
// ISAKMP_NEXT_NONE, 没限制回应的第一个载荷类型
, P(SA), P(VID) | P(CR), PT(NONE)
// 超时事件是重新发送
// 对该数据的处理函数是main_inI1_outR1, 即主模式响应方收到第一个请求包,发送第一个回应包
, EVENT_RETRANSMIT, main_inI1_outR1},
/* STATE_MAIN_I1: R1 --> I2
* HDR, SA --> auth dependent
* SMF_PSK_AUTH, SMF_DS_AUTH: --> HDR, KE, Ni
* SMF_PKE_AUTH:
* --> HDR, KE, [ HASH(1), ] <IDi1_b>PubKey_r, <Ni_b>PubKey_r
* SMF_RPKE_AUTH:
* --> HDR, [ HASH(1), ] <Ni_b>Pubkey_r, <KE_b>Ke_i, <IDi1_b>Ke_i [,<<Cert-
I_b>Ke_i]
* Note: since we don't know auth at start, we cannot differentiate
* microcode entries based on it.
*/
// 发起方
// 这个数组元素描述协商发起方收到响应方回应的第一个回应包后的状态迁移情况
// 当前状态是STATE_MAIN_I0, 处理后将转换为STATE_MAIN_I1, I表示是Initialize, 发起方
{ STATE_MAIN_I1, STATE_MAIN_I2
// 标志
, SMF_ALL_AUTH | SMF_INITIATOR | SMF_REPLY
// 请求载荷是ISAKMP_NEXT_SA类型载荷,
// 可选载荷可包含ISAKMP_NEXT_VID和ISAKMP_NEXT_CR
// ISAKMP_NEXT_NONE, 没限制回应的第一个载荷类型
, P(SA), P(VID) | P(CR), PT(NONE) /* don't know yet */
// 超时事件是重新发送
// 对该数据的处理函数是main_inR1_outI2, 即主模式发起方收到第1个回应包,发送第2个请求包
, EVENT_RETRANSMIT, main_inR1_outI2 },
/* STATE_MAIN_R1: I2 --> R2
* SMF_PSK_AUTH, SMF_DS_AUTH: HDR, KE, Ni --> HDR, KE, Nr
* SMF_PKE_AUTH: HDR, KE, [ HASH(1), ] <IDi1_b>PubKey_r, <Ni_b>PubKey_r
* --> HDR, KE, <IDr1_b>PubKey_i, <Nr_b>PubKey_i
* SMF_RPKE_AUTH:
* HDR, [ HASH(1), ] <Ni_b>Pubkey_r, <KE_b>Ke_i, <IDi1_b>Ke_i [,<<Cert-
I_b>Ke_i]
* --> HDR, <Nr_b>PubKey_i, <KE_b>Ke_r, <IDr1_b>Ke_r
*/
// 响应方
// 这个数组元素描述协商响应方收到发起方回应的第2个请求包后的状态迁移情况
// 当前状态是STATE_MAIN_R1, 处理后将转换为STATE_MAIN_R2, R表示是Response, 响应方
{ STATE_MAIN_R1, STATE_MAIN_R2
// 标志
, SMF_PSK_AUTH | SMF_DS_AUTH | SMF_REPLY
#ifdef NAT_TRAVERSAL
// 请求载荷是ISAKMP_NEXT_KE(密钥交换)和ISAKMP_NEXT_NONCE(时间)类型载荷,
// 可选载荷可包含ISAKMP_NEXT_VID和ISAKMP_NEXT_CR,
// 如果支持NAT穿越,还包括ISAKMP_NEXT_NATD_RFC
// 限制回应的第一个载荷类型为ISAKMP_NEXT_KE
, P(KE) | P(NONCE), P(VID) | P(CR) | P(NATD_RFC), PT(KE)
#else
, P(KE) | P(NONCE), P(VID) | P(CR), PT(KE)
#endif
// 超时事件是重新发送
// 对该数据的处理函数是main_inI2_outR2, 即主模式响应方收到第2个请求包,发送第2个回应包
, EVENT_RETRANSMIT, main_inI2_outR2 },
......以下数组元素分析略
12.3 索引数组
static const struct state_microcode
*ike_microcode_index[STATE_IKE_ROOF - STATE_IKE_FLOOR];
其中:
#define STATE_IKE_FLOOR STATE_MAIN_R0
定义的范围是从STATE_MAIN_R0到STATE_XAUTH_I1
该索引数组的初始化在系统初始化解码函数中进行:
void
init_demux(void)
{
/* fill ike_microcode_index:
* make ike_microcode_index[s] point to first entry in
* state_microcode_table for state s (backward scan makes this easier).
* Check that table is in order -- catch coding errors.
* For what it's worth, this routine is idempotent.
*/
const struct state_microcode *t;
从state_microcode_table数组的最后一个元素开始遍历数组
for (t = &state_microcode_table[elemsof(state_microcode_table) - 1];;)
{
// 确认每个状态微码结构的状态类型值在合适范围内
passert(STATE_IKE_FLOOR <= t->state && t->state < STATE_IKE_ROOF);
// 将索引数组元素指向该数组元素
ike_microcode_index[t->state - STATE_IKE_FLOOR] = t;
// 如果回到了数组的头元素, 中断循环
if (t == state_microcode_table)
break;
// 数组的前一项
t--;
passert(t[0].state <= t[1].state);
}
}
12.4 数据接收处理
12.4.1 入口函数
在server.c的call_server()函数中, 当各网卡的socket接收到数据包时, 将调用comm_handle函数:
/* wrapper for read_packet and process_packet
*
* The main purpose of this wrapper is to factor out teardown code
* from the many return points in process_packet. This amounts to
* releasing the msg_digest and resetting global variables.
*
* When processing of a packet is suspended (STF_SUSPEND),
* process_packet sets md to NULL to prevent the msg_digest being freed.
* Someone else must ensure that msg_digest is freed eventually.
*
* read_packet is broken out to minimize the lifetime of the
* enormous input packet buffer, an auto.
*/
// 其实是read_packet和process_packet函数的包裹函数
// 输入参数是数据包进入的网卡ifp
void
comm_handle(const struct iface_port *ifp)
{
static struct msg_digest *md;
#if defined(IP_RECVERR) && defined(MSG_ERRQUEUE)
/* Even though select(2) says that there is a message,
* it might only be a MSG_ERRQUEUE message. At least
* sometimes that leads to a hanging recvfrom. To avoid
* what appears to be a kernel bug, check_msg_errqueue
* uses poll(2) and tells us if there is anything for us
* to read.
*
* This is early enough that teardown isn't required:
* just return on failure.
*/
if (!check_msg_errqueue(ifp, POLLIN))
return; /* no normal message to read */
#endif /* defined(IP_RECVERR) && defined(MSG_ERRQUEUE) */
// 分配消息摘要结构
md = alloc_md();
// 网卡
md->iface = ifp;
// 如果接收数据成功, 进行解码的数据包操作
if (read_packet(md))
process_packet(&md);
if (md != NULL)
release_md(md);
cur_state = NULL;
reset_cur_connection();
cur_from = NULL;
}
12.4.2 读数据包
/* read the message.
* Since we don't know its size, we read it into
* an overly large buffer and then copy it to a
* new, properly sized buffer.
*/
static bool
read_packet(struct msg_digest *md)
{
// 数据进入网卡
const struct iface_port *ifp = md->iface;
int packet_len;
/* ??? this buffer seems *way* too big */
// 缓冲区, MAX_INPUT_UDP_SIZE定义为65536, 确实是太大了, 实际数据一般都不超过1500的
u_int8_t bigbuffer[MAX_INPUT_UDP_SIZE];
#ifdef NAT_TRAVERSAL
u_int8_t *_buffer = bigbuffer;
#endif
// 源和目的地址, 自动处理IPV4和IPV6地址
union
{
struct sockaddr sa;
struct sockaddr_in sa_in4;
struct sockaddr_in6 sa_in6;
} from,to;
// 地址长度
int from_len = sizeof(from);
int to_len = sizeof(to);
err_t from_ugh = NULL;
// 缺省错误类型: 未知的源地址
static const char undisclosed[] = "unknown source";
happy(anyaddr(addrtypeof(&ifp->ip_addr), &md->sender));
// 源地址参数清零
zero(&from.sa);
// 接收数据到bigbuffer缓冲区
packet_len = recvfromto(ifp->fd, bigbuffer
, sizeof(bigbuffer), /*flags*/0
, &from.sa, &from_len
, &to.sa, &to_len);
/* we do not do anything with *to* addresses yet... we will */
/* First: digest the from address.
* We presume that nothing here disturbs errno.
*/
// 接收失败, 返回源地址全0
if (packet_len == -1
&& from_len == sizeof(from)
&& all_zero((const void *)&from.sa, sizeof(from)))
{
/* "from" is untouched -- not set by recvfrom */
from_ugh = undisclosed;
}
// 返回的源地址长度不够, 被截断了
else if (from_len
< (int) (offsetof(struct sockaddr, sa_family) + sizeof(from.sa.sa_family)))
{
from_ugh = "truncated";
}
else
{
// 正确获取了数据的源地址参数的情况
// 获取协议族地址信息
const struct af_info *afi = aftoinfo(from.sa.sa_family);
// 信息为空, 表示的地址族错误, 但基本是不可能发生的
if (afi == NULL)
{
from_ugh = "unexpected Address Family";
}
// 如果长度不匹配
else if (from_len != (int)afi->sa_sz)
{
from_ugh = "wrong length";
}
else
{
switch (from.sa.sa_family)
{
case AF_INET:
// 获取源地址, 将&from.sa_in4.sin_addr中的数值复制到&md->sender
from_ugh = initaddr((void *) &from.sa_in4.sin_addr
, sizeof(from.sa_in4.sin_addr)
, AF_INET, &md->sender);
// 获取源端口
setportof(from.sa_in4.sin_port, &md->sender);
md->sender_port = ntohs(from.sa_in4.sin_port);
break;
case AF_INET6:
from_ugh = initaddr((void *) &from.sa_in6.sin6_addr
, sizeof(from.sa_in6.sin6_addr)
, AF_INET6, &md->sender);
setportof(from.sa_in6.sin6_port, &md->sender);
md->sender_port = ntohs(from.sa_in6.sin6_port);
break;
}
}
}
/* now we report any actual I/O error */
// 返回-1是表示数据接收错误
if (packet_len == -1)
{
// 记录错误信息
if (from_ugh == undisclosed
&& errno == ECONNREFUSED)
{
/* Tone down scary message for vague event:
* We get "connection refused" in response to some
* datagram we sent, but we cannot tell which one.
*/
openswan_log("some IKE message we sent has been rejected with ECONNREFUSED
(kernel supplied no details)");
}
else if (from_ugh != NULL)
{
log_errno((e, "recvfrom on %s failed; Pluto cannot decode source sockaddr in
rejection: %s"
, ifp->ip_dev->id_rname, from_ugh));
}
else
{
log_errno((e, "recvfrom on %s from %s:%u failed"
, ifp->ip_dev->id_rname
, ip_str(&md->sender), (unsigned)md->sender_port));
}
return FALSE;
}
else if (from_ugh != NULL)
{
// 即使接收正确,但地址参数获取错误也返回错误
openswan_log("recvfrom on %s returned misformed source sockaddr: %s"
, ifp->ip_dev->id_rname, from_ugh);
return FALSE;
}
// 源地址和端口参数
cur_from = &md->sender;
cur_from_port = md->sender_port;
#ifdef NAT_TRAVERSAL
// 如果该端口上支持NAT穿越
if (ifp->ike_float == TRUE) {
u_int32_t non_esp;
// 数据包长小于4字节, 错误
if (packet_len < (int)sizeof(u_int32_t)) {
openswan_log("recvfrom %s:%u too small packet (%d)"
, ip_str(cur_from), (unsigned) cur_from_port, packet_len);
return FALSE;
}
// _buffer当前是指向bigbuffer的指针
// 获取前4字节, NAT穿越时前4字节必须为0
memcpy(&non_esp, _buffer, sizeof(u_int32_t));
if (non_esp != 0) {
openswan_log("recvfrom %s:%u has no Non-ESP marker"
, ip_str(cur_from), (unsigned) cur_from_port);
return FALSE;
}
// _buffer后移4字节
_buffer += sizeof(u_int32_t);
// 数据包长减4
packet_len -= sizeof(u_int32_t);
}
#endif
/* Clone actual message contents
* and set up md->packet_pbs to describe it.
*/
// 将消息摘要结构中的参数初始化, 指到合适的缓冲区
// 就向内核的sk_buff结构一样, 数据缓冲区是固定的, 只是将指针指向缓冲区的不同的位置
init_pbs(&md->packet_pbs
#ifdef NAT_TRAVERSAL
, clone_bytes(_buffer, packet_len, "message buffer in comm_handle()")
#else
, clone_bytes(bigbuffer, packet_len, "message buffer in comm_handle()")
#endif
, packet_len, "packet");
// 记录调试数据
DBG(DBG_RAW | DBG_CRYPT | DBG_PARSING | DBG_CONTROL,
{
DBG_log(BLANK_FORMAT);
DBG_log("*received %d bytes from %s:%u on %s (port=%d)"
, (int) pbs_room(&md->packet_pbs)
, ip_str(cur_from), (unsigned) cur_from_port
, ifp->ip_dev->id_rname
, ifp->port);
});
// 记录原始数据
DBG(DBG_RAW,
DBG_dump("", md->packet_pbs.start, pbs_room(&md->packet_pbs)));
#ifdef NAT_TRAVERSAL
// 支持NAT穿越情况下检查是否是NAT穿越的保活数据, 这是不应该在应用层收到的
// 而是直接在内核里就丢掉了
if ((pbs_room(&md->packet_pbs)==1) && (md->packet_pbs.start[0]==0xff)) {
/**
* NAT-T Keep-alive packets should be discared by kernel ESPinUDP
* layer. But boggus keep-alive packets (sent with a non-esp marker)
* can reach this point. Complain and discard them.
*/
DBG(DBG_NATT,
DBG_log("NAT-T keep-alive (boggus ?) should not reach this
point. "
"Ignored. Sender: %s:%u", ip_str(cur_from),
(unsigned) cur_from_port);
);
return FALSE;
}
#endif
return TRUE;
}
...... 待续 ......