scan_token()函数很重要,ghostscript写得比较琐碎难懂,里面有些有英文解释。
我只对我关注的部分加了些中文注释。当然不是所有的都理解了。但是功能还是清楚了的,像某些函数接口。
如果让我写的话,我一定比他写得更清晰。哈哈,当然他的scan_token基本的框架
设计还是很赞同的。同时另外一个很困难的interp函数也给我这种感觉,很琐碎。但思想比较好,比纯递归高明些。
/* * Read a token from a stream. Return 0 if an ordinary token was read, * >0 for special situations (see iscan.h). * If the token required a terminating character (i.e., was a name or * number) and the next character was whitespace, read and discard * that character. Note that the state is relevant for e_VMerror * as well as for scan_Refill. */ int scan_token(i_ctx_t *i_ctx_p, ref * pref, scanner_state * pstate) { stream *const s = pstate->s_file.value.pfile; ref *myref = pref; int retcode = 0; int c; /* #define s_declare_inline(s, cp, ep)/ register const byte *cp;/ const byte *ep #define s_begin_inline(s, cp, ep)/ cp = (s)->srptr, ep = (s)->srlimit #define s_end_inline(s, cp, ep)/ (s)->srptr = cp #define sputback_inline(s, cp, ep)/ --cp */ s_declare_inline(s, sptr, endptr); #define scan_begin_inline() s_begin_inline(s, sptr, endptr) #define scan_getc() sgetc_inline(s, sptr, endptr) #define scan_putback() sputback_inline(s, sptr, endptr) #define scan_end_inline() s_end_inline(s, sptr, endptr) const byte *newptr; byte *daptr; #define sreturn(code)/ { retcode = gs_note_error(code); goto sret; } #define if_not_spush1()/ if ( osp < ostop ) osp++;/ else if ( (retcode = ref_stack_push(&o_stack, 1)) >= 0 )/ ;/ else #define spop1()/ if ( osp >= osbot ) osp--;/ else ref_stack_pop(&o_stack, 1) int max_name_ctype = (recognize_btokens()? ctype_name : ctype_btoken); /*用于数字*/ #define scan_sign(sign, ptr)/ switch ( *ptr ) {/ case '-': sign = -1; ptr++; break;/ case '+': sign = 1; ptr++; break;/ default: sign = 0;/ } /*回滚nback字符*/ #define refill2_back(styp,nback)/ BEGIN sptr -= nback; scan_type = styp; goto pause; END #define ensure2_back(styp,nback)/ if ( sptr >= endptr ) refill2_back(styp,nback) #define ensure2(styp) ensure2_back(styp, 1) #define refill2(styp) refill2_back(styp, 1) byte s1[2]; const byte *const decoder = scan_char_decoder; int status; int sign; const bool check_only = (pstate->s_options & SCAN_CHECK_ONLY) != 0; const bool PDFScanRules = (i_ctx_p->scanner_options & SCAN_PDF_RULES) != 0; /* * The following is a hack so that ^D will be self-delimiting in PS files * (to compensate for bugs in some PostScript-generating applications) * but not in strings (to match CPSI on the CET) or PDF. */ const int ctrld = (pstate->s_options & SCAN_FROM_STRING || PDFScanRules ? 0x04 : 0xffff); scanner_state sstate; #define pstack sstate.s_pstack #define pdepth sstate.s_pdepth #define scan_type sstate.s_scan_type #define da sstate.s_da #define name_type sstate.s_ss.s_name.s_name_type #define try_number sstate.s_ss.s_name.s_try_number sptr = endptr = NULL; /* Quiet compiler */ if (pstate->s_pstack != 0) { if_not_spush1() return retcode; myref = osp; } /* Check whether we are resuming after an interruption. */ if (pstate->s_scan_type != scanning_none) { sstate = *pstate; if (!da.is_dynamic && da.base != da.buf) { /* The da contains some self-referencing pointers. */ /* Fix them up now. */ uint next = da.next - da.base; uint limit = da.limit - da.base; da.base = da.buf; da.next = da.buf + next; da.limit = da.buf + limit; } daptr = da.next; switch (scan_type) { case scanning_binary: retcode = (*sstate.s_ss.binary.cont) (i_ctx_p, myref, &sstate); scan_begin_inline(); if (retcode == scan_Refill) goto pause; goto sret; case scanning_comment: scan_begin_inline(); goto cont_comment; case scanning_name: goto cont_name; case scanning_string: goto cont_string; default: return_error(e_Fatal); } } /* Fetch any state variables that are relevant even if */ /* scan_type == scanning_none. */ pstack = pstate->s_pstack; pdepth = pstate->s_pdepth; ref_assign(&sstate.s_file, &pstate->s_file); sstate.s_options = pstate->s_options; SCAN_INIT_ERROR(&sstate); scan_begin_inline(); /* * Loop invariants: * If pstack != 0, myref = osp, and *osp is a valid slot. */ top:c = scan_getc(); if_debug1('S', (c >= 32 && c <= 126 ? "`%c'" : c >= 0 ? "`//%03o'" : "`%d'"), c); switch (c) { case ' ': case '/f': case '/t': case char_CR: case char_EOL: case char_NULL: /* 上面这些字符被跳过 */ goto top; case 0x04: /* see ctrld above */ if (c == ctrld) /* treat as ordinary name char */ goto begin_name; case '[': case ']': /* [......] 的解析方式和 {......}的解析方式是不同的。 []中的每个元素都要经过执行,也就是要经过interp函数解析; 而{}中的元素是不需要执行,而是直接压栈。 ']' 操作符的执行是由两个操作符构成:zcounttomark ,array */ s1[0] = (byte) c; /*查找字符是否在表中?如果不在就加入。并让它和对象关联,这个对象关联最重要的是要执行的函数,将在interp执行*/ retcode = name_ref(imemory, s1, 1, myref, 1); /* can't fail */ r_set_attrs(myref, a_executable); break; /* Strings: As hexadecimal data, enclosed in < and > As ASCII base-85 data, enclosed in <~ and ~> Dictionaries: << key1 value1 key2 value2 … keyn valuen >> */ case '<': if (scan_enable_level2) { ensure2(scanning_none); c = scan_getc(); switch (c) { case '<'://字典的操作符“<<......>>” scan_putback(); name_type = 0; try_number = false; goto try_funny_name; case '~': //处理 ASCII85 <~.......~> s_A85D_init_inline(&sstate.s_ss.a85d); sstate.s_ss.st.template = &s_A85D_template; goto str; } scan_putback(); } s_AXD_init_inline(&sstate.s_ss.axd); sstate.s_ss.st.template = &s_AXD_template; str:scan_end_inline(); dynamic_init(&da, imemory); cont_string:for (;;) { stream_cursor_write w; w.ptr = da.next - 1; w.limit = da.limit - 1; //处理字符串 调用s_PSSD_process() status = (*sstate.s_ss.st.template->process) (&sstate.s_ss.st, &s->cursor.r, &w, s->end_status == EOFC); if (!check_only) da.next = w.ptr + 1; switch (status) { case 0: status = s->end_status; if (status < 0) { if (status == EOFC) { if (check_only) { retcode = scan_Refill; scan_type = scanning_string; goto suspend; } else sreturn(e_syntaxerror); } break; } s_process_read_buf(s); continue; case 1: if (!check_only) { retcode = dynamic_grow(&da, da.next, max_string_size); if (retcode == e_VMerror) { scan_type = scanning_string; goto suspend; } else if (retcode < 0) sreturn(retcode); } continue; } break; } scan_begin_inline(); switch (status) { default: /*case ERRC: */ sreturn(e_syntaxerror); case INTC: case CALLC: scan_type = scanning_string; goto pause; case EOFC: ; } retcode = dynamic_make_string(i_ctx_p, myref, &da, da.next); if (retcode < 0) { /* VMerror */ sputback(s); /* rescan ) */ scan_type = scanning_string; goto suspend; } break; case '(': sstate.s_ss.pssd.from_string = ((pstate->s_options & SCAN_FROM_STRING) != 0) && !scan_enable_level2; s_PSSD_partially_init_inline(&sstate.s_ss.pssd); sstate.s_ss.st.template = &s_PSSD_template; goto str; case '{': /* 比较 {} 和 [] 的执行方式不同 */ if (pstack == 0) { /* outermost procedure */ if_not_spush1() { scan_putback(); scan_type = scanning_none; goto pause_ret; } pdepth = ref_stack_count_inline(&o_stack); } make_int(osp, pstack); pstack = ref_stack_count_inline(&o_stack); if_debug3('S', "[S{]d=%d, s=%d->%d/n", pdepth, (int)osp->value.intval, pstack); goto snext; case '>': if (scan_enable_level2) { ensure2(scanning_none); name_type = 0; try_number = false; goto try_funny_name; } /* falls through */ case ')': /* 在处理'('的时候,处理了 ')' */ sreturn(e_syntaxerror); case '}': if (pstack == 0) sreturn(e_syntaxerror); osp--; { uint size = ref_stack_count_inline(&o_stack) - pstack; ref arr; if_debug4('S', "[S}]d=%d, s=%d->%d, c=%d/n", pdepth, pstack, (pstack == pdepth ? 0 : ref_stack_index(&o_stack, size)->value.intval), size + pstack); myref = (pstack == pdepth ? pref : &arr); if (check_only) { make_empty_array(myref, 0); ref_stack_pop(&o_stack, size); } else if (ref_array_packing.value.boolval) { retcode = make_packed_array(myref, &o_stack, size, idmemory, "scanner(packed)"); if (retcode < 0) { /* must be VMerror */ osp++; scan_putback(); scan_type = scanning_none; goto pause_ret; } r_set_attrs(myref, a_executable); } else { /* 分配一个数组*/ retcode = ialloc_ref_array(myref, a_executable + a_all, size, "scanner(proc)"); if (retcode < 0) { /* must be VMerror */ osp++; scan_putback(); scan_type = scanning_none; goto pause_ret; }//ref_stack_store 这步非常重要:把o_stack中size个对象拷贝到myref中。 retcode = ref_stack_store(&o_stack, myref, size, 0, 1, false, idmemory, "scanner"); if (retcode < 0) { ifree_ref_array(myref, "scanner(proc)"); sreturn(retcode); } ref_stack_pop(&o_stack, size); } if (pstack == pdepth) { /* This was the top-level procedure. */ spop1(); pstack = 0; } else { if (osp < osbot) ref_stack_pop_block(&o_stack); pstack = osp->value.intval; *osp = arr; goto snext; } } break; case '/': /* * If the last thing in the input is a '/', don't try to read * any more data. */ /* name要分两种形式:/name, //name */ if (sptr >= endptr && s->end_status != EOFC) { refill2(scanning_none); } c = scan_getc(); if (!PDFScanRules && (c == '/')) { name_type = 2; c = scan_getc(); } else name_type = 1; try_number = false; switch (decoder[c]) { case ctype_name: default: goto do_name; case ctype_btoken: if (!recognize_btokens()) goto do_name; /* otherwise, an empty name */ case ctype_exception: case ctype_space: /* * Amazingly enough, the Adobe implementations don't accept * / or // followed by [, ], <<, or >>, so we do the same. * (Older versions of our code had a ctype_other case here * that handled these specially.) */ case ctype_other: if (c == ctrld) /* see above */ goto do_name; da.base = da.limit = daptr = 0; da.is_dynamic = false; goto nx; } case '%': /*处理注释*/ { /* Scan as much as possible within the buffer. */ const byte *base = sptr; const byte *end; while (++sptr < endptr) /* stop 1 char early */ switch (*sptr) { case char_CR: end = sptr; if (sptr[1] == char_EOL) sptr++; cend: /* Check for externally processed comments. */ retcode = scan_comment(i_ctx_p, myref, &sstate, base, end, false); if (retcode != 0) goto comment; goto top; case char_EOL: case '/f': end = sptr; goto cend; } /* * We got to the end of the buffer while inside a comment. * If there is a possibility that we must pass the comment * to an external procedure, move what we have collected * so far into a private buffer now. */ #define comment_line da.buf --sptr; comment_line[1] = 0; { /* Could be an externally processable comment. */ uint len = sptr + 1 - base; if (len > sizeof(comment_line)) len = sizeof(comment_line); memcpy(comment_line, base, len); daptr = comment_line + len; } da.base = comment_line; da.is_dynamic = false; } /* Enter here to continue scanning a comment. */ /* daptr must be set. */ cont_comment:for (;;) { switch ((c = scan_getc())) { default: if (c < 0) switch (c) { case INTC: case CALLC: da.next = daptr; scan_type = scanning_comment; goto pause; case EOFC: /* * One would think that an EOF in a comment * should be a syntax error, but there are * quite a number of files that end that way. */ goto end_comment; default: sreturn(e_syntaxerror); } if (daptr < comment_line + max_comment_line) *daptr++ = c; continue; case char_CR: case char_EOL: case '/f': end_comment: retcode = scan_comment(i_ctx_p, myref, &sstate, comment_line, daptr, true); if (retcode != 0) goto comment; goto top; } } #undef comment_line /*NOTREACHED */ case EOFC: if (pstack != 0) { if (check_only) goto pause; sreturn(e_syntaxerror); } retcode = scan_EOF; break; case ERRC: sreturn(e_ioerror); /* Check for a Level 2 funny name (<< or >>). */ /* c is '<' or '>'. We already did an ensure2. */ try_funny_name: { int c1 = scan_getc(); if (c1 == c) { s1[0] = s1[1] = c; name_ref(imemory, s1, 2, myref, 1); /* can't fail */ goto have_name; } scan_putback(); } sreturn(e_syntaxerror); /* Handle separately the names that might be a number. */ case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '.': sign = 0; nr: /* * Skip a leading sign, if any, by conditionally passing * sptr + 1 rather than sptr. Also, if the last character * in the buffer is a CR, we must stop the scan 1 character * early, to be sure that we can test for CR+LF within the * buffer, by passing endptr rather than endptr + 1. */ /*以数字开头的有可能是name*/ retcode = scan_number(sptr + (sign & 1), endptr /*(*endptr == char_CR ? endptr : endptr + 1) */ , sign, myref, &newptr, i_ctx_p->scanner_options); if (retcode == 1 && decoder[newptr[-1]] == ctype_space) {//如果不是返回代码为1和以空格结束,那么再按name处理:goto do_name sptr = newptr - 1; /*数字应该进入这个流程*/ if (*sptr == char_CR && sptr[1] == char_EOL) sptr++; retcode = 0; ref_mark_new(myref); break; } /*2222_ ,以数字开头name的情况*/ name_type = 0; try_number = true; goto do_name; case '+': sign = 1; goto nr; case '-': sign = -1; goto nr; /* Check for a binary object */ #define case4(c) case c: case c+1: case c+2: case c+3 case4(128): case4(132): case4(136): case4(140): case4(144): case4(148): case4(152): case4(156): #undef case4 if (recognize_btokens()) { scan_end_inline(); retcode = scan_binary_token(i_ctx_p, myref, &sstate); scan_begin_inline(); if (retcode == scan_Refill) goto pause; break; } /* Not a binary object, fall through. */ /* The default is a name. */ default: if (c < 0) { dynamic_init(&da, name_memory(imemory)); /* da state must be clean */ scan_type = scanning_none; goto pause; } /* Populate the switch with enough cases to force */ /* simple compilers to use a dispatch rather than tests. */ case '!': case '"': case '#': case '$': case '&': case '/'': case '*': case ',': case '=': case ':': case ';': case '?': case '@': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '//': case '^': case '_': case '`': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case '|': case '~': begin_name: /* Common code for scanning a name. */ /* try_number and name_type are already set. */ /* We know c has ctype_name (or maybe ctype_btoken, */ /* or is ^D) or is a digit. */ name_type = 0; try_number = false; do_name: /* Try to scan entirely within the stream buffer. */ /* We stop 1 character early, so we don't switch buffers */ /* looking ahead if the name is terminated by /r/n. */ da.base = (byte *) sptr; da.is_dynamic = false; { const byte *endp1 = endptr - 1; do { if (sptr >= endp1) /* stop 1 early! */ goto dyn_name; } while (decoder[*++sptr] <= max_name_ctype || *sptr == ctrld); /* digit or name */ } /* Name ended within the buffer. */ daptr = (byte *) sptr; c = *sptr; goto nx; dyn_name: /* Name extended past end of buffer. */ scan_end_inline(); /* Initialize the dynamic area. */ /* We have to do this before the next */ /* sgetc, which will overwrite the buffer. */ da.limit = (byte *)++ sptr; da.memory = name_memory(imemory); retcode = dynamic_grow(&da, da.limit, name_max_string); if (retcode < 0) { dynamic_save(&da); if (retcode != e_VMerror) sreturn(retcode); scan_type = scanning_name; goto pause_ret; } daptr = da.next; /* Enter here to continue scanning a name. */ /* daptr must be set. */ cont_name:scan_begin_inline(); while (decoder[c = scan_getc()] <= max_name_ctype || c == ctrld) { if (daptr == da.limit) { retcode = dynamic_grow(&da, daptr, name_max_string); if (retcode < 0) { dynamic_save(&da); if (retcode != e_VMerror) sreturn(retcode); scan_putback(); scan_type = scanning_name; goto pause_ret; } daptr = da.next; } *daptr++ = c; } nx:switch (decoder[c]) { case ctype_other: if (c == ctrld) /* see above */ break; case ctype_btoken: scan_putback(); break; case ctype_space: /* Check for /r/n */ if (c == char_CR) { if (sptr >= endptr) { /* ensure2 *//* We have to check specially for */ /* the case where the very last */ /* character of a file is a CR. */ if (s->end_status != EOFC) { sptr--; goto pause_name; } } else if (sptr[1] == char_EOL) sptr++; } break; case ctype_exception: switch (c) { case INTC: case CALLC: goto pause_name; case ERRC: sreturn(e_ioerror); case EOFC: break; } } /* Check for a number */ if (try_number) { const byte *base = da.base; scan_sign(sign, base); retcode = scan_number(base, daptr, sign, myref, &newptr, i_ctx_p->scanner_options); if (retcode == 1) { ref_mark_new(myref); retcode = 0; } else if (retcode != e_syntaxerror) { dynamic_free(&da); if (name_type == 2) sreturn(e_syntaxerror); break; /* might be e_limitcheck */ } } if (da.is_dynamic) { /* We've already allocated the string on the heap. */ uint size = daptr - da.base; // 这儿很重要,上面scan_number()能识别数字,但是这儿size还是能确定是否是数字?或是name // 比如:2222_,是一个name,而非数字,但是解析器处理起来很冗余。 retcode = name_ref(imemory, da.base, size, myref, -1); if (retcode >= 0) { dynamic_free(&da); } else { retcode = dynamic_resize(&da, size); if (retcode < 0) { /* VMerror */ if (c != EOFC) scan_putback(); scan_type = scanning_name; goto pause_ret; } retcode = name_ref(imemory, da.base, size, myref, 2); } } else { retcode = name_ref(imemory, da.base, (uint) (daptr - da.base), myref, !s->foreign); } /* Done scanning. Check for preceding /'s. */ if (retcode < 0) { if (retcode != e_VMerror) sreturn(retcode); if (!da.is_dynamic) { da.next = daptr; dynamic_save(&da); } if (c != EOFC) scan_putback(); scan_type = scanning_name; goto pause_ret; } have_name:switch (name_type) { case 0: /* ordinary executable name */ /* /name0 这种形式 */ if (r_has_type(myref, t_name)) /* i.e., not a number */ r_set_attrs(myref, a_executable); case 1: /* quoted name */ break; case 2: /* immediate lookup */ /* //name0 这种形式 需要立即在字典里查找 */ { ref *pvalue; if (!r_has_type(myref, t_name) || (pvalue = dict_find_name(myref)) == 0) { ref_assign(&sstate.s_error.object, myref); r_set_attrs(&sstate.s_error.object, a_executable); /* Adobe compatibility */ sreturn(e_undefined); } if (pstack != 0 && r_space(pvalue) > ialloc_space(idmemory) ) sreturn(e_invalidaccess); ref_assign_new(myref, pvalue); } } } sret:if (retcode < 0) { scan_end_inline(); pstate->s_error = sstate.s_error; if (pstack != 0) { if (retcode == e_undefined) *pref = *osp; /* return undefined name as error token */ ref_stack_pop(&o_stack, ref_stack_count(&o_stack) - (pdepth - 1)); } return retcode; } /* If we are at the top level, return the object, */ /* otherwise keep going. */ if (pstack == 0) { scan_end_inline(); return retcode; } snext:if_not_spush1() { scan_end_inline(); scan_type = scanning_none; goto save; } myref = osp; goto top; /* Pause for an interrupt or callout. */ pause_name: /* If we're still scanning within the stream buffer, */ /* move the characters to the private buffer (da.buf) now. */ da.next = daptr; dynamic_save(&da); scan_type = scanning_name; pause: retcode = scan_Refill; pause_ret: scan_end_inline(); suspend: if (pstack != 0) osp--; /* myref */ save: *pstate = sstate; return retcode; /* Handle a scanned comment. */ comment: if (retcode < 0) goto sret; scan_end_inline(); scan_type = scanning_none; goto save; }