前面介绍了读写文件时,是通过sd_init_command设置的scsi_cmnd命令结构,其实我们也可以通过scsi_execute_req函数直接发送scsi命令,不过需要两次转变。
仍然以scsi磁盘举例,最初scsi这边发送的是scsi命令,可是从block走就得变成request,然而走到磁盘那边又得变回scsi命令,换言之,这整个过程scsi命令要变两次身。
比如,我们想获得磁盘的容量,就可以直接调用:
cmd[0] = READ_CAPACITY;
res = scsi_execute_req(sdp, cmd, DMA_NONE, NULL, 0, &sshdr,
SD_TIMEOUT, SD_MAX_RETRIES);
首先让我们从scsi磁盘那边很常用的一个函数开始(比如sd_ioctl),我们来看scsi命令是如何在光天化日之下被偷梁换柱的变成了request,这个函数就是scsi_execute_req()。来自位于SCSI中间层的drivers/scsi/scsi_lib.c:
216 int scsi_execute_req(struct scsi_device *sdev, const unsigned char *cmd, 217 int data_direction, void *buffer, unsigned bufflen, 218 struct scsi_sense_hdr *sshdr, int timeout, int retries) 219 { 220 char *sense = NULL; 221 int result; 222 223 if (sshdr) { 224 sense = kzalloc(SCSI_SENSE_BUFFERSIZE, GFP_NOIO); 225 if (!sense) 226 return DRIVER_ERROR << 24; 227 } 228 result = scsi_execute(sdev, cmd, data_direction, buffer, bufflen, 229 sense, timeout, retries, 0); 230 if (sshdr) 231 scsi_normalize_sense(sense, SCSI_SENSE_BUFFERSIZE, sshdr); 232 233 kfree(sense); 234 return result; 235 } |
这里面最需要关注的就是一个函数,scsi_execute(),来自同一个文件:
179 int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, 180 int data_direction, void *buffer, unsigned bufflen, 181 unsigned char *sense, int timeout, int retries, int flags) 182 { 183 struct request *req; 184 int write = (data_direction == DMA_TO_DEVICE); 185 int ret = DRIVER_ERROR << 24; 186 187 req = blk_get_request(sdev->request_queue, write, __GFP_WAIT); 188 189 if (bufflen && blk_rq_map_kern(sdev->request_queue, req, 190 buffer, bufflen, __GFP_WAIT)) 191 goto out; 192 193 req->cmd_len = COMMAND_SIZE(cmd[0]); 194 memcpy(req->cmd, cmd, req->cmd_len); 195 req->sense = sense; 196 req->sense_len = 0; 197 req->retries = retries; 198 req->timeout = timeout; 199 req->cmd_type = REQ_BLOCK_PC; 200 req->cmd_flags |= flags | REQ_QUIET | REQ_PREEMPT; 201 202 /* 203 * head injection *required* here otherwise quiesce won't work 204 */ 205 blk_execute_rq(req->q, NULL, req, 1); 206 207 ret = req->errors; 208 out: 209 blk_put_request(req); 210 211 return ret; 212 } |
首先被调用的是blk_get_request.来自block/ll_rw_blk.c:
2215 struct request *blk_get_request(request_queue_t *q, int rw, gfp_t gfp_mask) 2216 { 2217 struct request *rq; 2218 2219 BUG_ON(rw != READ && rw != WRITE); 2220 2221 spin_lock_irq(q->queue_lock); 2222 if (gfp_mask & __GFP_WAIT) { 2223 rq = get_request_wait(q, rw, NULL); 2224 } else { 2225 rq = get_request(q, rw, NULL, gfp_mask); 2226 if (!rq) 2227 spin_unlock_irq(q->queue_lock); 2228 } 2229 /* q->queue_lock is unlocked at this point */ 2230 2231 return rq; 2232 } |
注意到我们调用这个函数的时候,第二个参数确实是__GFP_WAIT。所以2223行会被执行。get_request_wait()来自同一个文件:
2173 static struct request *get_request_wait(request_queue_t *q, int rw_flags, 2174 struct bio *bio) 2175 { 2176 const int rw = rw_flags & 0x01; 2177 struct request *rq; 2178 2179 rq = get_request(q, rw_flags, bio, GFP_NOIO); 2180 while (!rq) { 2181 DEFINE_WAIT(wait); 2182 struct request_list *rl = &q->rq; 2183 2184 prepare_to_wait_exclusive(&rl->wait[rw], &wait, 2185 TASK_UNINTERRUPTIBLE); 2186 2187 rq = get_request(q, rw_flags, bio, GFP_NOIO); 2188 2189 if (!rq) { 2190 struct io_context *ioc; 2191 2192 blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ); 2193 2194 __generic_unplug_device(q); 2195 spin_unlock_irq(q->queue_lock); 2196 io_schedule(); 2197 2198 /* 2199 * After sleeping, we become a "batching" process and 2200 * will be able to allocate at least one request, and 2201 * up to a big batch of them for a small period time. 2202 * See ioc_batching, ioc_set_batching 2203 */ 2204 ioc = current_io_context(GFP_NOIO, q->node); 2205 ioc_set_batching(q, ioc); 2206 2207 spin_lock_irq(q->queue_lock); 2208 } 2209 finish_wait(&rl->wait[rw], &wait); 2210 } 2211 2212 return rq; 2213 } |
而真正被调用的又是get_request(),仍然是来自同一个文件。
2068 static struct request *get_request(request_queue_t *q, int rw_flags, 2069 struct bio *bio, gfp_t gfp_mask) 2070 { 2071 struct request *rq = NULL; 2072 struct request_list *rl = &q->rq; 2073 struct io_context *ioc = NULL; 2074 const int rw = rw_flags & 0x01; 2075 int may_queue, priv; 2076 2077 may_queue = elv_may_queue(q, rw_flags); 2078 if (may_queue == ELV_MQUEUE_NO) 2079 goto rq_starved; 2080 2081 if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) { 2082 if (rl->count[rw]+1 >= q->nr_requests) { 2083 ioc = current_io_context(GFP_ATOMIC, q->node); 2084 /* 2085 * The queue will fill after this allocation, so set 2086 * it as full, and mark this process as "batching". 2087 * This process will be allowed to complete a batch of 2088 * requests, others will be blocked. 2089 */ 2090 if (!blk_queue_full(q, rw)) { 2091 ioc_set_batching(q, ioc); 2092 blk_set_queue_full(q, rw); 2093 } else { 2094 if (may_queue != ELV_MQUEUE_MUST 2095 && !ioc_batching(q, ioc)) { 2096 /* 2097 * The queue is full and the allocating 2098 * process is not a "batcher", and not 2099 * exempted by the IO scheduler 2100 */ 2101 goto out; 2102 } 2103 } 2104 } 2105 blk_set_queue_congested(q, rw); 2106 } 2107 2108 /* 2109 * Only allow batching queuers to allocate up to 50% over the defined 2110 * limit of requests, otherwise we could have thousands of requests 2111 * allocated with any setting of ->nr_requests 2112 */ 2113 if (rl->count[rw] >= (3 * q->nr_requests / 2)) 2114 goto out; 2115 2116 rl->count[rw]++; 2117 rl->starved[rw] = 0; 2118 2119 priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 2120 if (priv) 2121 rl->elvpriv++; 2122 2123 spin_unlock_irq(q->queue_lock); 2124 2125 rq = blk_alloc_request(q, rw_flags, priv, gfp_mask); 2126 if (unlikely(!rq)) { 2127 /* 2128 * Allocation failed presumably due to memory. Undo anything 2129 * we might have messed up. 2130 * 2131 * Allocating task should really be put onto the front of the 2132 * wait queue, but this is pretty rare. 2133 */ 2134 spin_lock_irq(q->queue_lock); 2135 freed_request(q, rw, priv); 2136 2137 /* 2138 * in the very unlikely event that allocation failed and no 2139 * requests for this direction was pending, mark us starved 2140 * so that freeing of a request in the other direction will 2141 * notice us. another possible fix would be to split the 2142 * rq mempool into READ and WRITE 2143 */ 2144 rq_starved: 2145 if (unlikely(rl->count[rw] == 0)) 2146 rl->starved[rw] = 1; 2147 2148 goto out; 2149 } 2150 2151 /* 2152 * ioc may be NULL here, and ioc_batching will be false. That's 2153 * OK, if the queue is under the request limit then requests need 2154 * not count toward the nr_batch_requests limit. There will always 2155 * be some limit enforced by BLK_BATCH_TIME. 2156 */ 2157 if (ioc_batching(q, ioc)) 2158 ioc->nr_batch_requests--; 2159 2160 rq_init(q, rq); 2161 2162 blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ); 2163 out: 2164 return rq; 2165 } |
这个elv_may_queue来自block/elevator.c:
848 int elv_may_queue(request_queue_t *q, int rw) 849 { 850 elevator_t *e = q->elevator; 851 852 if (e->ops->elevator_may_queue_fn) 853 return e->ops->elevator_may_queue_fn(q, rw); 854 855 return ELV_MQUEUE_MAY; 856 } |
属于我们的那个elevator_t结构体变量是当初我们在elevator_init()中调用elevator_alloc()申请的。它的ops显然是和具体我们采用了哪种电梯有关系的。这里我们为了简便起见,选择“noop”,这种最简单最原始的机制。再一次贴出它的elevator_type:
87 static struct elevator_type elevator_noop = { 88 .ops = { 89 .elevator_merge_req_fn = noop_merged_requests, 90 .elevator_dispatch_fn = noop_dispatch, 91 .elevator_add_req_fn = noop_add_request, 92 .elevator_queue_empty_fn = noop_queue_empty, 93 .elevator_former_req_fn = noop_former_request, 94 .elevator_latter_req_fn = noop_latter_request, 95 .elevator_init_fn = noop_init_queue, 96 .elevator_exit_fn = noop_exit_queue, 97 }, 98 .elevator_name = "noop", 99 .elevator_owner = THIS_MODULE, 100 }; |
我们看到,对于我们选择的这种noop的电梯,elevator_may_queue_fn根本就没有定义。所以带着一个返回值ELV_MQUEUE_MAY,我们返回到get_request()中来。rl又是什么呢?2072行我们让它指向了q->rq,它就是request_queue。
这里我们看到了rq其实是struct request_list结构体变量,在前面输入/输出调度程序中见过了。不过这些我们现在都不想看,我们想看的只有其中的几个函数,第一个是2125行blk_alloc_request(),来自ll_rw_blk.c:
1970 static struct request * 1971 blk_alloc_request(request_queue_t *q, int rw, int priv, gfp_t gfp_mask) 1972 { 1973 struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); 1974 1975 if (!rq) 1976 return NULL; 1977 1978 /* 1979 * first three bits are identical in rq->cmd_flags and bio->bi_rw, 1980 * see bio.h and blkdev.h 1981 */ 1982 rq->cmd_flags = rw | REQ_ALLOCED; 1983 1984 if (priv) { 1985 if (unlikely(elv_set_request(q, rq, gfp_mask))) { 1986 mempool_free(rq, q->rq.rq_pool); 1987 return NULL; 1988 } 1989 rq->cmd_flags |= REQ_ELVPRIV; 1990 } 1991 1992 return rq; 1993 } |
其它我们不懂没有关系,至少我们从1972行可以看出这里申请了一个struct request的结构体指针,换句话说,此前,我们已经有了请求队列,但是没有实质性的元素,从这一刻起,我们有了一个真正的request。虽然现在还没有进入到队伍中去,但这只是早晚的事儿了。请看下面:
下一个,get_request()的2160行的rq_init():
238 static void rq_init(request_queue_t *q, struct request *rq) 239 { 240 INIT_LIST_HEAD(&rq->queuelist); 241 INIT_LIST_HEAD(&rq->donelist); 242 243 rq->errors = 0; 244 rq->bio = rq->biotail = NULL; 245 INIT_HLIST_NODE(&rq->hash); 246 RB_CLEAR_NODE(&rq->rb_node); 247 rq->ioprio = 0; 248 rq->buffer = NULL; 249 rq->ref_count = 1; 250 rq->q = q; 251 rq->special = NULL; 252 rq->data_len = 0; 253 rq->data = NULL; 254 rq->nr_phys_segments = 0; 255 rq->sense = NULL; 256 rq->end_io = NULL; 257 rq->end_io_data = NULL; 258 rq->completion_data = NULL; 259 } |
这个函数在干什么?很简单,就是对刚申请的rq进行初始化。
然后,get_request()就开开心心的返回了,正常情况下,get_request_wait()也会跟着返回,再接着,blk_get_request()也就返回了。我们也带着申请好初始化好的req回到scsi_execute()中去,而接下来一段代码就是我们最关心的,对req的真正的赋值,比如req->cmd_len,req->cmd等等,就是这样被赋上的。换言之,我们的scsi命令就是这样被request拖下水的,从此它们之间不再是以前那种“水留不住落花的漂泊,落花走不进水的世界”的关系,而是沦落到了一荣俱荣一损俱损狼狈为奸的关系。
至此,完成了第一次变身,从scsi命令到request的变身。