1459 static inline void lock_sock(struct sock *sk) 1460 { 1461 lock_sock_nested(sk, 0); 1462 }lock_sock_nested:
2284 void lock_sock_nested(struct sock *sk, int subclass) 2285 { 2286 might_sleep(); //说明调用本函数可能导致睡眠 2287 spin_lock_bh(&sk->sk_lock.slock); //申请自旋锁并关闭本地软中断 2288 if (sk->sk_lock.owned) //已有进程正在持有锁 2289 __lock_sock(sk); 2290 sk->sk_lock.owned = 1; //标记锁正在被进程、持有 2291 spin_unlock(&sk->sk_lock.slock); //释放自旋锁(注意软中断没有恢复) 2292 /* 2293 * The sk_lock has mutex_lock() semantics here: 2294 */ 2295 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); 2296 local_bh_enable(); //开启软中断,允许软中断运行 2297 }释放锁时使用release_sock:
2300 void release_sock(struct sock *sk) 2301 { 2302 /* 2303 * The sk_lock has mutex_unlock() semantics: 2304 */ 2305 mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); 2306 2307 spin_lock_bh(&sk->sk_lock.slock); 2308 if (sk->sk_backlog.tail) //backlog队列有skb 2309 __release_sock(sk); //处理backlog队列中的skb 2310 2311 if (sk->sk_prot->release_cb) 2312 sk->sk_prot->release_cb(sk); //执行因进程锁定socket而被延迟的软中断任务 2313 2314 sk->sk_lock.owned = 0; //标识进程释放锁 2315 if (waitqueue_active(&sk->sk_lock.wq)) //有进程在等待队列中 2316 wake_up(&sk->sk_lock.wq); //唤醒进程 2317 spin_unlock_bh(&sk->sk_lock.slock); 2318 }软中断使用bh_lock_sock_nested申请自旋锁,使用bh_unlock_sock释放自旋锁。
进程T1先调用lock_sock_nested函数获取锁,设置sk->sk_lock.owned = 1后访问socket;进程T2调用lock_sock_nested函数时会调用__lock_sock函数:
1832 static void __lock_sock(struct sock *sk) 1833 __releases(&sk->sk_lock.slock) 1834 __acquires(&sk->sk_lock.slock) 1835 { 1836 DEFINE_WAIT(wait); 1837 1838 for (;;) { 1839 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait, 1840 TASK_UNINTERRUPTIBLE); //设置进程状态为TASK_UNINTERRUPTIBLE,一旦放弃CPU进程就会无法被调度,除非状态被改变 1841 spin_unlock_bh(&sk->sk_lock.slock); 1842 schedule(); //放弃CPU 1843 spin_lock_bh(&sk->sk_lock.slock); 1844 if (!sock_owned_by_user(sk)) 1845 break; 1846 } 1847 finish_wait(&sk->sk_lock.wq, &wait); 1848 }DEFINE_WAIT定义了一个睡眠事件:
889 #define DEFINE_WAIT_FUNC(name, function) \ 890 wait_queue_t name = { \ 891 .private = current, \ 892 .func = function, \ 893 .task_list = LIST_HEAD_INIT((name).task_list), \ 894 } 895 896 #define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function)
T2在执行1842行的schedule后会进入睡眠状态,因为在prepare_to_wait_exclusive函数中设置了进程状态:
81 void 82 prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state) 83 { 84 unsigned long flags; 85 86 wait->flags |= WQ_FLAG_EXCLUSIVE; 87 spin_lock_irqsave(&q->lock, flags); 88 if (list_empty(&wait->task_list)) 89 __add_wait_queue_tail(q, wait); //将进程所属的wait加入到sk->sk_lock.wq.task_list中 90 set_current_state(state); //设置进程状态 91 spin_unlock_irqrestore(&q->lock, flags); 92 }T1执行release_sock释放锁时,会执行wake_up唤醒T2,wake_up是封装了__wake_up函数的宏,__wake_up来执行唤醒动作:
3159 static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, 3160 int nr_exclusive, int wake_flags, void *key) 3161 { 3162 wait_queue_t *curr, *next; 3163 3164 list_for_each_entry_safe(curr, next, &q->task_list, task_list) { //从第一个开始唤醒 3165 unsigned flags = curr->flags; 3166 3167 if (curr->func(curr, mode, wake_flags, key) && //curr->func指向DEFINE_WAIT函数所安装的函数autoremove_wake_function 3168 (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) 3169 break; 3170 } 3171 } ... 3183 void __wake_up(wait_queue_head_t *q, unsigned int mode, 3184 int nr_exclusive, void *key) 3185 { 3186 unsigned long flags; 3187 3188 spin_lock_irqsave(&q->lock, flags); 3189 __wake_up_common(q, mode, nr_exclusive, 0, key); 3190 spin_unlock_irqrestore(&q->lock, flags); 3191 }autoremove_wake_function调用default_wake_function函数,default_wake_function函数调用try_to_wake_up唤醒T2:
1484 static int 1485 try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) 1486 { ... 1514 p->state = TASK_WAKING; //进程可以重新被CPU调度 ...也就是说,两个进程先后访问同一个socket,后访问的会睡眠,等待先访问的释放了锁后才会被唤醒从而有机会进行访问。唤醒的顺序就是排队的顺序。
一个CPU在同一时刻只能运行一个软中断,故软中断之间的并发访问只能在不同CPU之间进行。软中断使用的锁是自旋锁,第二个软中断申请这种锁时会执行紧致的循环直到锁的拥有者释放锁。由于CPU在软中断上下文不能停留太长时间(否则CPU的其它任务无法执行),使用这种锁会以最快的速度得到锁。在得到锁后的访问也不能时间过长,尤其是不能睡眠。两个及其以上软中断同时访问一个socket的情况有:收包软中断与定时器超时同时发生、开启irqloadbalance时由同一网卡收到的包由不同的CPU同时处理、由不同的网卡抵达的请求访问同一个listen scoket等。
软中断的运行优先级很高,进程在运行的任意时刻都有可能被软中断打断(除非关闭软中断)。按照访问的先后顺序有两种情况:
(1)软中断先访问进程后访问
这时软中断已经获取了自旋锁,进程在获取自旋锁时会等待,软中断释放锁时进程才能成功获取锁。
(2)进程先访问软中断后访问
进程获取自旋锁(关软中断,防止被软中断打断)时会将sk->sk_lock.owned设置为1后释放自旋锁并开启软中断,然后执行对socket的访问。这时如果软中断发生,则进程的执行被中止。软中断执行到TCP入口函数tcp_v4_rcv时:
1961 int tcp_v4_rcv(struct sk_buff *skb) 1962 { ... 2024 bh_lock_sock_nested(sk); //获取自旋锁 2025 ret = 0; 2026 if (!sock_owned_by_user(sk)) { sk->sk_lock.owned为1时判断为假 ... 2039 } else if (unlikely(sk_add_backlog(sk, skb, 2040 sk->sk_rcvbuf + sk->sk_sndbuf))) { 2041 bh_unlock_sock(sk); //释放自旋锁 2042 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP); 2043 goto discard_and_relse; 2044 } 2045 bh_unlock_sock(sk); //释放自旋锁在进程锁定socket的情况下skb会由sk_add_backlog函数处理:
777 static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *skb, 778 unsigned int limit) 779 { 780 if (sk_rcvqueues_full(sk, skb, limit)) 781 return -ENOBUFS; 782 783 __sk_add_backlog(sk, skb); 784 sk->sk_backlog.len += skb->truesize; 785 return 0; 786 }由__sk_add_backlog函数将skb放入backlog队列中:
749 static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb) 750 { 751 /* dont let skb dst not refcounted, we are going to leave rcu lock */ 752 skb_dst_force(skb); 753 754 if (!sk->sk_backlog.tail) 755 sk->sk_backlog.head = skb; 756 else 757 sk->sk_backlog.tail->next = skb; 758 759 sk->sk_backlog.tail = skb; 760 skb->next = NULL; 761 }将skb放入backlog队列后,软中断返回,进程得到机会运行。在进程释放锁之前所有软中断都会将skb放入到backlog队列中。当进程调用release_sock释放锁时,如果backlog队列非空则会执行__release_sock:
1850 static void __release_sock(struct sock *sk) 1851 __releases(&sk->sk_lock.slock) 1852 __acquires(&sk->sk_lock.slock) 1853 { 1854 struct sk_buff *skb = sk->sk_backlog.head; 1855 1856 do { 1857 sk->sk_backlog.head = sk->sk_backlog.tail = NULL; 1858 bh_unlock_sock(sk); //释放自旋锁,但不开启软中断 1859 1860 do { 1861 struct sk_buff *next = skb->next; 1862 1863 prefetch(next); 1864 WARN_ON_ONCE(skb_dst_is_noref(skb)); 1865 skb->next = NULL; 1866 sk_backlog_rcv(sk, skb); //处理一个skb 1867 1868 /* 1869 * We are in process context here with softirqs 1870 * disabled, use cond_resched_softirq() to preempt. 1871 * This is safe to do because we've taken the backlog 1872 * queue private: 1873 */ 1874 cond_resched_softirq(); //开启软中断并放弃CPU,等待下次被调度到;被调度到时重新禁用软中断 1875 1876 skb = next; 1877 } while (skb != NULL); 1878 1879 bh_lock_sock(sk); 1880 } while ((skb = sk->sk_backlog.head) != NULL); 1881 1882 /* 1883 * Doing the zeroing here guarantee we can not loop forever 1884 * while a wild producer attempts to flood us. 1885 */ 1886 sk->sk_backlog.len = 0; 1887 }__release_sock处理backlog队列的方法是:首先将backlog队列中的所有skb转移到私有队列(保证处理时的安全),然后释放自旋锁,并在关闭软中断的条件下调用sk_backlog_rcv函数处理skb。每处理一个skb就放弃CPU一次,以防止队列中skb过多导致软中断关闭时间过长。在处理期间如果发生了软中断则skb被放入到原理的backlog队列中,与当前处理的队列没有关系。sk_backlog_rcv将skb放入TCP中进行处理:
790 static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) 791 { 792 if (sk_memalloc_socks() && skb_pfmemalloc(skb)) 793 return __sk_backlog_rcv(sk, skb); //调用sk->sk_backlog_rcv 794 795 return sk->sk_backlog_rcv(sk, skb); //指向tcp_v4_do_rcv函数 796 }最终,backlog队列中的skb会由tcp_v4_do_rcv函数进行处理。