在大家都进入了kdb之后,in control的那个cpu就开始执行1566行的kdb_local()函数了.依然定义于kdb/kdbmain.c:
1134 /*
1135 * kdb_local
1136 *
1137 * The main code for kdb. This routine is invoked on a specific
1138 * processor, it is not global. The main kdb() routine ensures
1139 * that only one processor at a time is in this routine. This
1140 * code is called with the real reason code on the first entry
1141 * to a kdb session, thereafter it is called with reason SWITCH,
1142 * even if the user goes back to the original cpu.
1143 *
1144 * Inputs:
1145 * reason The reason KDB was invoked
1146 * error The hardware-defined error code
1147 * regs The exception frame at time of fault/breakpoint. NULL
1148 * for reason SILENT or CPU_UP, otherwise valid.
1149 * db_result Result code from the break or debug point.
1150 * Returns:
1151 * 0 KDB was invoked for an event which it wasn't responsible
1152 * 1 KDB handled the event for which it was invoked.
1153 * KDB_CMD_GO User typed 'go'.
1154 * KDB_CMD_CPU User switched to another cpu.
1155 * KDB_CMD_SS Single step.
1156 * KDB_CMD_SSB Single step until branch.
1157 * Locking:
1158 * none
1159 * Remarks:
1160 * none
1161 */
1162
1163 static int
1164 kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs, kdb_dbtrap_t db_result)
1165 {
1166 char *cmdbuf;
1167 int diag;
1168 struct task_struct *kdb_current = kdb_curr_task(smp_processor_id());
1169
1170 /* If kdb has been entered for an event which has been/will be
1171 * recovered then silently return. We have to get this far into kdb in
1172 * order to synchronize all the cpus, typically only one cpu (monarch)
1173 * knows that the event is recoverable but the other cpus (slaves) may
1174 * also be driven into kdb before that decision is made by the monarch.
1175 *
1176 * To pause in kdb even for recoverable events, 'set RECOVERY_PAUSE 1'
1177 */
1178 KDB_DEBUG_STATE("kdb_local 1", reason);
1179 if (reason == KDB_REASON_ENTER
1180 && KDB_FLAG(RECOVERY)
1181 && !KDB_FLAG(CATASTROPHIC)) {
1182 int recovery_pause = 0;
1183 kdbgetintenv("RECOVERY_PAUSE", &recovery_pause);
1184 if (recovery_pause == 0)
1185 reason = KDB_REASON_SILENT;
1186 else
1187 kdb_printf("%s: Recoverable error detected but"
1188 " RECOVERY_PAUSE is set, staying in KDB/n",
1189 __FUNCTION__);
1190 }
1191
1192 KDB_DEBUG_STATE("kdb_local 2", reason);
1193 kdb_go_count = 0;
1194 if (kdb_quiet(reason)) {
1195 /* no message */
1196 } else if (reason == KDB_REASON_DEBUG) {
1197 /* special case below */
1198 } else {
1199 kdb_printf("/nEntering kdb (current=0x%p, pid %d) ", kdb_current, kdb_current->pid);
1200 #if defined(CONFIG_SMP)
1201 kdb_printf("on processor %d ", smp_processor_id());
1202 #endif
1203 }
1204
1205 switch (reason) {
1206 case KDB_REASON_DEBUG:
1207 {
1208 /*
1209 * If re-entering kdb after a single step
1210 * command, don't print the message.
1211 */
1212 switch(db_result) {
1213 case KDB_DB_BPT:
1214 kdb_printf("/nEntering kdb (0x%p, pid %d) ", kdb_current, kdb_current->pid);
1215 #if defined(CONFIG_SMP)
1216 kdb_printf("on processor %d ", smp_processor_id());
1217 #endif
1218 kdb_printf("due to Debug @ " kdb_machreg_fmt "/n", kdba_getpc(regs));
1219 break;
1220 case KDB_DB_SSB:
1221 /*
1222 * In the midst of ssb command. Just return.
1223 */
1224 KDB_DEBUG_STATE("kdb_local 3", reason);
1225 return KDB_CMD_SSB; /* Continue with SSB command */
1226
1227 break;
1228 case KDB_DB_SS:
1229 break;
1230 case KDB_DB_SSBPT:
1231 KDB_DEBUG_STATE("kdb_local 4", reason);
1232 return 1; /* kdba_db_trap did the work */
1233 default:
1234 kdb_printf("kdb: Bad result from kdba_db_trap: %d/n",
1235 db_result);
1236 break;
1237 }
1238
1239 }
1240 break;
1241 case KDB_REASON_ENTER:
1242 if (KDB_STATE(KEYBOARD))
1243 kdb_printf("due to Keyboard Entry/n");
1244 else
1245 kdb_printf("due to KDB_ENTER()/n");
1246 break;
1247 case KDB_REASON_KEYBOARD:
1248 KDB_STATE_SET(KEYBOARD);
1249 kdb_printf("due to Keyboard Entry/n");
1250 break;
1251 case KDB_REASON_ENTER_SLAVE: /* drop through, slaves only get released via cpu switch */
1252 case KDB_REASON_SWITCH:
1253 kdb_printf("due to cpu switch/n");
1254 if (KDB_STATE(GO_SWITCH)) {
1255 KDB_STATE_CLEAR(GO_SWITCH);
1256 KDB_DEBUG_STATE("kdb_local 5", reason);
1257 return KDB_CMD_GO;
1258 }
1259 break;
1260 case KDB_REASON_OOPS:
1261 kdb_printf("Oops: %s/n", kdb_diemsg);
1262 kdb_printf("due to oops @ " kdb_machreg_fmt "/n", kdba_getpc(regs));
1263 kdba_dumpregs(regs, NULL, NULL);
1264 break;
1265 case KDB_REASON_NMI:
1266 kdb_printf("due to NonMaskable Interrupt @ " kdb_machreg_fmt "/n",
1267 kdba_getpc(regs));
1268 kdba_dumpregs(regs, NULL, NULL);
1269 break;
1270 case KDB_REASON_BREAK:
1271 kdb_printf("due to Breakpoint @ " kdb_machreg_fmt "/n", kdba_getpc(regs));
1272 /*
1273 * Determine if this breakpoint is one that we
1274 * are interested in.
1275 */
1276 if (db_result != KDB_DB_BPT) {
1277 kdb_printf("kdb: error return from kdba_bp_trap: %d/n", db_result);
1278 KDB_DEBUG_STATE("kdb_local 6", reason);
1279 return 0; /* Not for us, dismiss it */
1280 }
1281 break;
1282 case KDB_REASON_RECURSE:
1283 kdb_printf("due to Recursion @ " kdb_machreg_fmt "/n", kdba_getpc(regs));
1284 break;
1285 case KDB_REASON_CPU_UP:
1286 case KDB_REASON_SILENT:
1287 KDB_DEBUG_STATE("kdb_local 7", reason);
1288 if (reason == KDB_REASON_CPU_UP)
1289 kdba_cpu_up();
1290 return KDB_CMD_GO; /* Silent entry, silent exit */
1291 break;
1292 default:
1293 kdb_printf("kdb: unexpected reason code: %d/n", reason);
1294 KDB_DEBUG_STATE("kdb_local 8", reason);
1295 return 0; /* Not for us, dismiss it */
1296 }
1297
1298 kdba_local_arch_setup();
1299
1300 kdba_set_current_task(kdb_current);
1301
1302 while (1) {
1303 /*
1304 * Initialize pager context.
1305 */
1306 kdb_nextline = 1;
1307 KDB_STATE_CLEAR(SUPPRESS);
1308 #ifdef kdba_setjmp
1309 /*
1310 * Use kdba_setjmp/kdba_longjmp to break out of
1311 * the pager early and to attempt to recover from kdb errors.
1312 */
1313 KDB_STATE_CLEAR(LONGJMP);
1314 if (kdbjmpbuf) {
1315 if (kdba_setjmp(&kdbjmpbuf[smp_processor_id()])) {
1316 /* Command aborted (usually in pager) */
1317 continue;
1318 }
1319 else
1320 KDB_STATE_SET(LONGJMP);
1321 }
1322 #endif /* kdba_setjmp */
1323
1324 cmdbuf = cmd_cur;
1325 *cmdbuf = '/0';
1326 *(cmd_hist[cmd_head])='/0';
1327
1328 if (KDB_FLAG(ONLY_DO_DUMP)) {
1329 /* kdb is off but a catastrophic error requires a dump.
1330 * Take the dump and reboot.
1331 * Turn on logging so the kdb output appears in the log
1332 * buffer in the dump.
1333 */
1334 const char *setargs[] = { "set", "LOGGING", "1" };
1335 kdb_set(2, setargs);
1336 kdb_do_dump();
1337 kdb_reboot(0, NULL);
1338 /*NOTREACHED*/
1339 }
1340
1341 do_full_getstr:
1342 #if defined(CONFIG_SMP)
1343 snprintf(kdb_prompt_str, CMD_BUFLEN, kdbgetenv("PROMPT"), smp_processor_id());
1344 #else
1345 snprintf(kdb_prompt_str, CMD_BUFLEN, kdbgetenv("PROMPT"));
1346 #endif
1347 if (defcmd_in_progress)
1348 strncat(kdb_prompt_str, "[defcmd]", CMD_BUFLEN);
1349
1350 /*
1351 * Fetch command from keyboard
1352 */
1353 cmdbuf = kdb_getstr(cmdbuf, CMD_BUFLEN, kdb_prompt_str);
1354 if (*cmdbuf != '/n') {
1355 if (*cmdbuf < 32) {
1356 if(cmdptr == cmd_head) {
1357 strncpy(cmd_hist[cmd_head], cmd_cur, CMD_BUFLEN);
1358 *(cmd_hist[cmd_head]+strlen(cmd_hist[cmd_head])-1) = '/0';
1359 }
1360 if(!handle_ctrl_cmd(cmdbuf))
1361 *(cmd_cur+strlen(cmd_cur)-1) = '/0';
1362 cmdbuf = cmd_cur;
1363 goto do_full_getstr;
1364 }
1365 else
1366 strncpy(cmd_hist[cmd_head], cmd_cur, CMD_BUFLEN);
1367
1368 cmd_head = (cmd_head+1) % KDB_CMD_HISTORY_COUNT;
1369 if (cmd_head == cmd_tail) cmd_tail = (cmd_tail+1) % KDB_CMD_HISTORY_COUNT;
1370
1371 }
1372
1373 cmdptr = cmd_head;
1374 diag = kdb_parse(cmdbuf);
1375 if (diag == KDB_NOTFOUND) {
1376 kdb_printf("Unknown kdb command: '%s'/n", cmdbuf);
1377 diag = 0;
1378 }
1379 if (diag == KDB_CMD_GO
1380 || diag == KDB_CMD_CPU
1381 || diag == KDB_CMD_SS
1382 || diag == KDB_CMD_SSB)
1383 break;
1384
1385 if (diag)
1386 kdb_cmderror(diag);
1387 }
1388
1389 kdba_local_arch_cleanup();
1390
1391 KDB_DEBUG_STATE("kdb_local 9", diag);
1392 return diag;
1393 }
我承认一开始我看到这些几百行的函数是有一些害怕,但是慢慢的我发现,其实Kernel中出现这种几百行的函数,就好比华为死一个人一样,再正常不过了.倒是假如哪天写代码的不写这种暴长的函数了,就好似妓女不卖淫了,又如同嫖客不嫖娼了,人们反而会说她们不正常,人们反而会说他们不务正业.
我们能做的只是让一切继续,继续往下看,由于我们的reason是KDB_REASON_KEYBOARD,所以我们会执行1199行,会执行1249行,从而在屏幕上我们总能看到类似下面这样的信息被打印出来:
Entering kdb (current=0xffff81022fcab7e0, pid 0) on processor 7 due to Keyboard Entry
[7]kdb>
而如果我们是从KDB_ENTER()进来的,那么1245行的信息就会打印出来,因为我们的reason是KDB_REASON_ENTER.
下一个需要关注的函数是kdba_set_current_task().来自arch/i386/kdb/kdbasupport.c
944 void
945 kdba_set_current_task(const struct task_struct *p)
946 {
947 kdb_current_task = p;
948 if (kdb_task_has_cpu(p)) {
949 struct kdb_running_process *krp = kdb_running_process + kdb_process_cpu(p);
950 kdb_current_regs = krp->regs;
951 return;
952 }
953 kdb_current_regs = NULL;
954 }
如果你看不明白这个函数那我也没话说,其实我相信湖北天门市的城管都能看懂的.无非就是为了保存一个作案现场,以便日后使用.用kdb_current_task保存了当前的这个进程,用kdb_current_regs保存了当前的寄存器.天门城管就是看明白了魏文华把他们的作案现场(粗暴执法)给保存(拍摄)了下来所以才会恼羞成怒,若是他们没进监狱,我倒是希望他们给个面子,来给大家讲解Linux Kernel代码.
从1302行开始,我们发现我们又将进入一个死循环了.先不用看具体的代码,想想也能知道,kdb提示符已经可以打印出来,接下来,kdb进入一个死循环的目的就是为了时时刻刻等待着我们输入kdb那些命令,一个命令执行完了之后,继续循环,继续等待.这种意境很美好,相当于kdb用玫瑰铺满道路,期待你输入的命令,如果你什么也不输入,那么她只会等到心碎的花瓣在寒风中哭泣.
而具体的代码也确实如我们所说的那样,1343行snprintf就是准备打印kdb提示符,默认的就是”[n]kdb> “,n表示处理器的编号.然后1353行kdb_getstr()就是如注释说的那样,从键盘获取命令.(当然你会看到,实际上kdb提示符也是在kdb_getstr()中打印出来的.)假如你什么也没输入,那么cmdbuf就是空.
376 /*
377 * kdb_getstr
378 *
379 * Print the prompt string and read a command from the
380 * input device.
381 *
382 * Parameters:
383 * buffer Address of buffer to receive command
384 * bufsize Size of buffer in bytes
385 * prompt Pointer to string to use as prompt string
386 * Returns:
387 * Pointer to command buffer.
388 * Locking:
389 * None.
390 * Remarks:
391 * For SMP kernels, the processor number will be
392 * substituted for %d, %x or %o in the prompt.
393 */
394
395 char *
396 kdb_getstr(char *buffer, size_t bufsize, char *prompt)
397 {
398 if(prompt && kdb_prompt_str!=prompt)
399 strncpy(kdb_prompt_str, prompt, CMD_BUFLEN);
400 kdb_printf(kdb_prompt_str);
401 kdb_nextline = 1; /* Prompt and input resets line number */
402 return kdb_read(buffer, bufsize);
403 }
而kdb_read()中具体是怎么读的呢?其它不用说,还是凭男人的直觉就知道,一定是轮询.事实也的确是如此,不过我们暂且不去深究,先把这个函数跳过去,以尽快结束我们kdb()这个大函数.
甭管读到什么,总之当且仅当我们读到一些东西的时候,kdb_getstr()才会返回,从而cmdbuf里是一定有东西了,也就是说你一定是输入了一些东西了,而在1374行,kdb_parse(),就会执行命令.执行命令的返回值赋给了diag.当然有很多种情况,最后1392行,也就把diag给返回了.也就是说我们回到了kdb_main_loop().
在这里kdb_local的返回值赋给了result,下面的代码就是拿着result去分析来分析去,设置各种flag,然后最终又把result作为返回值给返回了.如果你想退出kdb,你可以输入”go”,这种情况diag以及之后的result就都是KDB_CMD_GO.
我们继续回溯,kdb_main_loop的结束引发了kdba_main_loop()也返回.返回值都是一样的.直到此刻,我们终于再一次回到了kdb().
这之后,kdba_adjust_ip()被调用,不过对x86来说,这是个空函数,飘过.
再之后,我们看到WAIT_IPI,HOLD_CPU给清掉了.与此同时,设置了LEAVING flag.
然后又出现了一个疑似死循环,while中调用了一个比较重要的函数,kdb_previous_event(),来自kdb/kdbmain.c:
1418 /*
1419 * kdb_previous_event
1420 *
1421 * Return a count of cpus that are leaving kdb, i.e. the number
1422 * of processors that are still handling the previous kdb event.
1423 *
1424 * Inputs:
1425 * None.
1426 * Returns:
1427 * Count of cpus in previous event.
1428 * Locking:
1429 * none
1430 * Remarks:
1431 * none
1432 */
1433
1434 static int
1435 kdb_previous_event(void)
1436 {
1437 int i, leaving = 0;
1438 for (i = 0; i < NR_CPUS; ++i) {
1439 if (KDB_STATE_CPU(LEAVING, i))
1440 ++leaving;
1441 }
1442 return leaving;
1443 }
这个函数统计的就是有几个cpu对应的LEAVING flag被设置了,实际情况是我们刚刚为每个CPU设置了LEAVING,因此对于多处理器的机器来说,这里的返回值肯定不是1.而while循环中说,如果kdb_previous_event返回值不为1,则while循环就像永不消逝的电波一样,它反反复复,它生生不息,它试图摆脱轮回的束缚,但是,它能摆脱吗?
为了看看这个循环是否真的是死循环,我们得先回去看一下那些spin in kdb_main_loop()中的cpu,要知道它们循环的条件是HOLD_CPU被设置了,而我们现在看到这个flag终于被清除掉了,这就意味着各个cpu都将结束该循环.
而回过去仔细看kdb_main_loop(),你会发现由于LEAVING flag的设置,使得各个cpu将结束kdb_main_loop(),返回值result为1,然后kdba_main_loop()也返回了,然后由于返回值为1,使得kdb()也将最终返回.但是在返回之前,2091行,LEAVING也将被清除.也就是说,对于那些spin的cpu,它们所对应的LEAVING flag是已经被清除掉了,只剩下这个in control的cpu还设置了这个flag,因此,这时候,kdb_previous_event()将等于1,于是,这个疑似死循环终于可以结束了.
最后作一些恢复工作之后,永垂不朽的kdb()函数也终于垂了朽了.到这一刻,所有的cpu都正式离开了kdb(),从此世界清静了,人们群众的生活再次回归宁静,各个进程又按着往日的习惯运转着,执行着,睡眠着.