in graminit.c定义了一些struct, 是理解和实现DFA的关键
static dfa dfas[81] = { {256, //d_type "single_input", //char *d_name 0, //d_initial 3, //d_nstates states_0, //state *d_state "\004\050\060\200\000\000\000\240\340\223\160\220\045\200\020\000\000\206\120\076\204" //bitset d_first =>char* d_first }, ..... } typedef struct { int d_type; /* Non-terminal this represents */ char *d_name; /* For printing */ int d_initial; /* Initial state */ int d_nstates; state *d_state; /* Array of states */ bitset d_first; } dfa; typedef struct { int s_narcs; arc *s_arc; /* Array of arcs */ /* Optional accelerators */ int s_lower; /* Lowest label index */ int s_upper; /* Highest label index */ int *s_accel; /* Accelerator */ int s_accept; /* Nonzero for accepting state */ } state; static state states_0[3] = { {3, arcs_0_0}, {1, arcs_0_1}, {1, arcs_0_2}, }; static arc arcs_0_0[3] = { {2, 1}, {3, 1}, {4, 2}, }; /* An arc from one state to another */ typedef struct { short a_lbl; /* Label of this arc */ short a_arrow; /* State where this arc goes to */ } arc; /* A grammar*/ typedef struct { int g_ndfas; dfa *g_dfa; /* Array of DFAs */ labellist g_ll; int g_start; /* Start symbol of the grammar */ int g_accel; /* Set if accelerators present */ } grammar; /* A list of labels */ typedef struct { int ll_nlabels; label *ll_label; } labellist; static label labels[168] = { {0, "EMPTY"}, ... {49, 0}, {1, "del"}, {326, 0}, {1, "pass"}, {277, 0}, {278, 0}, {279, 0}, {281, 0}, {280, 0}, {1, "break"}, {1, "continue"}, {1, "return"}, {1, "raise"}, {1, "from"}, {283, 0}, {284, 0}, {1, "import"}, .... } /* A label of an arc */ typedef struct { int lb_type; char *lb_str; } label; //Parser的核心数据结构 grammar _PyParser_Grammar = { 81, /*int g_ndfas;*/ dfas, /*dfa *g_dfa*/ {168, labels}, /*labellist => (ll_nlabels, label *ll_label) */定义在前面 256 /*int g_start*/ }; python启动的时候
对grammar对象里面的每个dfa object的每个state对象的
int *s_accel; /* Accelerator */ 数组赋值
对第一个dfa object,其定义为 { 256, "single_input", 0, 3, states_0, "\004\050\060\200\000\000\000\240\340\223\160\220\045\200\020\000\000\206\120\076\204" } 它有三个state static state states_0[3] = { {3, arcs_0_0}, //3 arc {1, arcs_0_1}, {1, arcs_0_2}, }; static arc arcs_0_0[3] = { {2, 1}, //{a_lbl, a_arrow} , #a_lbl is Label of this arc, #a_arrow is State where this arc goes to {3, 1}, {4, 2}, }; static arc arcs_0_1[1] = { {0, 1}, }; static arc arcs_0_2[1] = { {2, 1}, }; 取第一个state的第一个arc {2,1}的 a_lbl (2 in this case), 得到static label labels[168] 中第a_lbl(2)个label 对象 { 4 /*int lb_type*/, 0/*char *lb_str*/} lb_type 是 4 和 #define NT_OFFSET 256 比较, 小于 NT_OFFSET, 并且不为0 则 accel[2] = arc->arrow , 也就是 accel[2] = 1 接着 去第二个arc {3,1} , 然后取label 对象labels[3], 即 {269, 0} 而269>= NT_OFFSET, 所以调 dfa *d1 = PyGrammar_FindDFA(g, 269); 该函数是从static dfa dfas[81] 找到第 269 - NT_OFFSET 个 dfa object(No 13 in this case),即 {269, "simple_stmt", 0, 4, states_13, "\000\040\040\200\000\000\000\240\340\223\160\000\000\200\020\000\000\206\120\076\200"}, /*comments :从这点也可以看出为什么dfas 从256开始编号,依次递增*/ 然后对 "\000\040\040\200\000\000\000\240\340\223\160\000\000\200\020\000\000\206\120\076\200"},进行testbit测试,作用见前一篇博客 这里首先得到ibit是13, 于是 accel[13] = arc->a_arrow | (1 << 7) | ((269 - NT_OFFSET) << 8) = 1101,10000001 /*comments: 从16进制角度看, 逗号前面得代表下标, 后面的代表 arrow */ 继续testbit测试, 接着得到ibit是21, accel[21] = 1101,10000001 ibit 31, 61,63,69, 70,71.... 测试完毕后 取第三个arc (此处作者略去1000字) 最后 while (nl > 0 && accel[nl-1] == -1) nl--; for (k = 0; k < nl && accel[k] == -1;) k++; 找出first and last 不为-1 的下标 k, nl 最后得到该state的 s->s_lower = k; s->s_upper = nl; for (i = 0; k < nl; i++, k++) s->s_accel[i] = accel[k]; 把accel[] 集中付给0开始的 s->s_accel 终于处理完了一个state....