编译原理项目

PLO编译器分析

PL0文法

  • Program → Block.
    program

  • Block → [ConstDecl][VarDecl][ProcDecl] Stmt
    程序块的基本结构是:常量定义、变量定义、过程定义、语句

  • ConstDecl → const ConstDef {, ConstDef} ;
    常量定义

  • ConstDef → ident = number

  • VarDecl → var ident {, ident} ;
    变量定义

  • ProcDecl → procedure ident ; Block ; {procedure ident ; Block ;}
    过程定义
    编译原理项目_第1张图片

  • Stmt → ident := Exp | call ident | begin Stmt {; Stmt} end | if Cond then Stmt | while Cond do Stmt | ε
    赋值语句、调用语句、begin-end块语句、判断语句、循环语句
    编译原理项目_第2张图片

  • Cond → odd Exp | Exp RelOp Exp
    条件表达式

  • RelOp → = | <> | < | > | <= | >=
    关系运算符
    编译原理项目_第3张图片

  • Exp → [+ | − ] Term {+ Term | − Term}
    表达式
    编译原理项目_第4张图片

  • Term → Factor {∗ Factor | / Factor}

    编译原理项目_第5张图片

  • Factor → ident | number | ( Exp )
    因子
    编译原理项目_第6张图片

  • ident
    字母开头的字母/数字串

  • numbers
    无符号整数

PL0指令集定义

PL/0的目标代码放在一个固定的存储数组中,而其中所需的数据组织成一个栈的形式存放。
它的中间语言是一种栈机器代码,其指令集结构如下。

指令/F 含义 L A
LIT 将常数置于栈顶 0 常量
LOD 将变量的值置于栈顶 层次差 数据地址
STO 将栈顶的值赋予某变量 层次差 数据地址
CAL 过程调用 层次差 程序地址
INT 在数据栈中分配空间,t寄存器增加A 0 常量
JPC,JMP 条件/无条件转移 0 程序地址
OPR 一组算术或逻辑运算指令 0 运算类别

PL0编译器执行过程

  1. 读文件
  2. 编译器初始化
    包括:对保留字表( word )、保留字表中每一个保留字对应的 symbol 类型( wsym )、部分符号对应的 symbol 类型表( ssym )、类 PCODE 指令助记符表( mnemonic )、声明开始集合( declbegsys )、表达式开始集合( statbegsys )、项开始符号集合( facbegsys )以及一些全局变量的初始化
  3. 首次调用getsym()进行词法分析
  4. 调用block()过程,包括词法分析和语法分析
  5. 判断当前单词是否为’.’,不是则错误
  6. 判断源程序是否存在错误,是则错误
  7. 没有出错则调用解释过程interpret()
  8. 程序结束

阶段一代码

{PL0编译程序}

program  PL0;
{带有代码生成的PL0编译程序}
{label  99;}
const
  norw = 11; {保留字的个数}
  txmax = 100; {标识符表长度}
  nmax = 14; {数字的最大位数}
  al = 10; {标识符的长度}
  amax = 2047; {最大地址}
  levmax = 3; {程序体嵌套的最大深度}
  cxmax = 200; {生成目标代码数组的大小}

type
  {枚举类型常用自然语言中含义清楚、明了的单词(看成代码)来表示“顺序关系”,
  是一种顺序类型,是根据说明中的排列先后顺序,才具有0,1,2…n的序号关系,
  可用来作循环变量初值和终值,也可用来作数组下标。
  但枚举类型不是数值常量或字符常量,不能进行算术运算,
  只能作为“序号关系”来使用。}
  symbol = (nul, ident, number, plus, minus, times, slash, oddsym,
  eql, neq, lss, leq, gtr, geq, lparen, rparen, comma, semicolon,
  period, becomes, beginsym, endsym, ifsym, thensym,
  whilesym, dosym, callsym, constsym, varsym, procsym );
  {在正常情况下,如果系统按4字节对齐,
  那么尽管前面的A只需要一个字节,
  但是随后的三个字节是空着的,B从下一个四字节的边界开始分配。}
  alfa = packed array [1..al] of char;
  objects = (constant, variable, procedures);

  {Pascal系统把具有共同特征的同一有序类型的对象汇集在一起,形成一个集合,
  可将集合类型的所有元素作为一个整体进行集合运算}

  symset = set of symbol;
  fct = (lit, opr, lod, sto, cal, int, jmp, jpc); {functions}

  {PASCAL系统定义了记录类型,可用来表示不同类型的数据。}
  instruction = packed record
    f : fct;  {功能码}
    l : 0..levmax; {相对层数}
    a : 0..amax; {相对地址}
  end;
  {LIT 0,a : 取常数a
  OPR 0,a : 执行运算a
  LOD l,a : 取层差为l的层﹑相对地址为a的变量
  STO l,a : 存到层差为l的层﹑相对地址为a的变量
  CAL l,a : 调用层差为l的过程
  INT 0,a : t寄存器增加a
  JMP 0,a : 转移到指令地址a处
  JPC 0,a : 条件转移到指令地址a处 }

{全局变量定义}
var
  output : text;{输出文件}
  input : text;{源代码文件}

  ch : char; {最近读到的字符}
  sym : symbol; {最近读到的符号}
  id : alfa; {最近读到的标识符}
  num : integer; {最近读到的数}
  cc : integer; {character count当前行的字符计数}
  ll : integer; {line lengt 当前行的长度}
  kk, err : integer;
  cx : integer; {code index 代码数组的当前下标}
  line : array [1..81] of char;{缓冲一行代码}
  a : alfa;
  code : array [0..cxmax] of instruction;{保存编译后的代码,要输出}
  word : array [1..norw] of alfa;
  wsym : array [1..norw] of symbol;
  ssym : array [char] of symbol;
  mnemonic : array [fct] of packed array [1..5] of char;
  declbegsys, statbegsys, facbegsys : symset;
  {声明开始,表达式形式,项开始的集合}
  table : array [0..txmax] of {符号表,相当于一个类,最多txmax(100)个符号}
         record
           name : alfa;{元素名}
           case kind : objects of
                constant : (val : integer);{如果是变量则保存常量的值}
                variable, procedures : (level, adr : integer)
                {如果是变量或者过程,保留层数和偏移地址}
end;

{定义错误程序,n是错误类型}
{character count}
procedure error (n : integer);
begin
  writeln('****', ' ' : cc - 1, '↑', n : 2);
  err := err + 1
end {error};

procedure getsym;
var  i, j, k : integer;
procedure  getch;
begin
  {cc 当前行的字符计数}
  {ll 当前行的长度}
  if cc = ll then{表示读完,一开始初始化为0}
  begin
    if eof(input) then
    begin
      write('PROGRAM INCOMPLETE');
      writeln;
      halt(0)
    end;
    ll := 0; cc := 0;
    write(cx : 5, ' ');{输出代码地址}
    while not eoln(input) do
    begin
      ll := ll + 1;{行缓冲区+1}
      read(input, ch);
      write(ch);
      line[ll] := ch
    end;
    writeln;
    readln(input);
    ll := ll + 1;
    line[ll] := ' '
    {read(line[ll])}{读下一行}
  end;
  cc := cc + 1;
  ch := line[cc]
end {getch};

begin {getsym}
  while ch = ' ' do getch;{空字符跳过}
  {如果是字母开头}
  if ch in ['a'..'z'] then
    begin {标识符或保留字} k := 0;
    repeat
      if k < al then
      begin k:= k + 1; a[k] := ch
      end;
      getch
    until not (ch in ['a'..'z', '0'..'9']);
    if k >= kk
      then kk := k {kk是上一个标识符的长度}
      else
        repeat a[kk] := ' '; kk := kk - 1;
        until kk = k;{删除上一个标识符在a中的字符}

    id := a;  i := 1;  j := norw;
    {二分查找保留字表}
    repeat  k := (i+j) div 2;
      if id <= word[k] then j := k - 1;
      if id >= word[k] then i := k + 1;
    until i > j;
    if i - 1 > j
    then sym := wsym[k] {把保留字的类型给sym}
    else sym := ident;{未找到,说明是标识符}
    end
  {如果是数字开头}
  else if ch in ['0'..'9'] then
  begin k := 0;  num := 0;  sym := number;
  repeat
    num := 10*num + (ord(ch)-ord('0'));
    k := k + 1;  getch;
  until not (ch in ['0'..'9']);
  if k > nmax then  error(30) {大于数字的最大位数 14}
  end

  {字符是:}
  else if ch = ':' then
  begin  getch;
    if ch = '=' then
    begin
      sym := becomes;{表示赋值}
      getch
    end
    else
      sym := nul;{非法}
  end

  {标点符号}
  else
  begin  sym := ssym[ch];  getch end

end {getsym};

{目标代码的生成}
{Fct, L, A}
procedure gen(x : fct; y, z : integer);
begin
  if cx > cxmax then
  begin write('PROGRAM TOO LONG');
    writeln;
    halt(0)
  end;
  with code[cx] do
    begin
      f := x;  l := y;  a := z
    end;
  cx := cx + 1
end {gen};

{测试字符的合法性,目的是跳过所有的非法字符,使得能继续工作}
procedure test(s1, s2 : symset; n : integer);
begin
  if not (sym in s1) then
    begin
      error(n);
      s1 := s1 + s2;
      while not (sym in s1) do getsym end
end {test};

{处理的主程序}
{fsys用来恢复错误的单词集合}
procedure  block(lev, tx : integer; fsys : symset);
var
  dx : integer; {本过程数据空间分配下标}
  tx0 : integer; {本过程标识表起始下标}
  cx0 : integer; {本过程代码起始下标}

procedure  enter(k : objects);
begin {把objects填入符号表中}
  tx := tx +1;
  with table[tx] do
    begin  name := id;  kind := k;
    case k of
      constant :
        begin
          {amx = 2047,最大地址}
          if num > amax then begin error(30); num := 0 end;
          val := num
        end;
      variable :
        begin
          level := lev;  adr := dx;  dx := dx +1;
        end;
      procedures : level := lev
    end
  end
end {enter};

function  position(id : alfa) : integer;
  var  i : integer;
  begin {在标识符表中查标识符id}
    table[0].name := id;  i := tx;
    while table[i].name <> id do i := i - 1;
    position := i
  end {position};

procedure constdeclaration;
begin
if sym = ident then
begin  getsym;
  if sym in [eql, becomes] then
  begin
    if sym = becomes then error(1);
    getsym;
    if sym = number then
    begin  enter(constant); getsym
    end
    else error(2)
  end else error(3)
end else error(4)
  end {constdeclaration};

procedure  vardeclaration;
begin
  if sym = ident then
  begin  enter(variable);  getsym
  end else error(4)
end {vardeclaration};

procedure  listcode;
var  i : integer;
begin  {列出本程序体(block)中生成的代码}
  for i := cx0 to cx-1 do
    with code[i] do
      writeln(i, mnemonic[f] : 10, l : 3, a : 5)
end {listcode};



procedure  statement(fsys : symset);
var  i, cx1, cx2 : integer;

procedure  expression(fsys : symset);
  var  addop : symbol;
  procedure  term(fsys : symset);
  var  mulop : symbol;
    procedure  factor(fsys : symset);
      var i : integer;
      begin  test(facbegsys, fsys, 24);
        while sym in facbegsys do
        begin
          if sym = ident then
          begin
            i := position(id);
            if i = 0 then error(11) else
              with table[i] do
                case kind of
                  constant : gen(lit, 0, val);
                  variable : gen(lod, lev-level, adr);
                  procedures : error(21)
                end;
            getsym
          end
          else if sym = number then
            begin
              if num > amax then begin error(30); num := 0 end;
              gen(lit, 0, num); getsym
            end
         else if sym = lparen then
            begin
              getsym;
              expression([rparen]+fsys);
              if sym = rparen then getsym else error(22)
            end;
         test(fsys, [lparen], 23)
       end
    end {factor};

  begin { procedure term( fsys : symset);}
    factor(fsys+[times, slash]);
    while sym in [times, slash] do
      begin
        mulop := sym;{保存当前的运算符}
        getsym;
        factor(fsys+[times, slash]);
        if mulop = times then gen(opr, 0, 4){乘法}
        else gen(opr, 0, 5){除法}
      end
  end {term};

begin {procedure expression( fsys: symset);}
  if sym in [plus, minus] then{如果有正负号}
    begin
      addop := sym;  getsym;
      term(fsys+[plus, minus]);
      if addop = minus then gen(opr, 0, 1)
    end
  else term(fsys+[plus, minus]);
  while sym in [plus, minus] do{如果有正负号}
    begin
      addop := sym;  getsym;
      term(fsys+[plus, minus]);
      if addop = plus then gen(opr, 0, 2)
      else gen(opr, 0, 3)
    end
end {expression};


procedure  condition(fsys : symset);
  var  relop : symbol;
begin
  if sym = oddsym then
  begin getsym;  expression(fsys);  gen(opr, 0, 6) end
  else
  begin
    expression([eql, neq, lss, gtr, leq, geq] + fsys);
    if not (sym in [eql, neq, lss, leq, gtr, geq]) then
      error(20)  else
    begin
      relop := sym;
      getsym;
      expression(fsys);
      case relop of
        eql : gen(opr, 0, 8);
        neq : gen(opr, 0, 9);
        lss : gen(opr, 0, 10);
        geq : gen(opr, 0, 11);
        gtr : gen(opr, 0, 12);
        leq : gen(opr, 0, 13);
      end
    end
  end
end {condition};

begin {procedure statement( fsys : symset )}
if sym = ident then
  begin
    i := position(id);{记录标识符的位置}
    if i = 0 then error(11){没找到}
    else if table[i].kind <> variable then{对非变量赋值}
      begin error(12); i := 0; end;

    getsym;
    if sym = becomes then getsym else error(13);
    expression(fsys);{赋值可以是表达式}
    if i <> 0 then
      with table[i] do gen(sto, lev-level, adr){将表达式的值赋给变量}
  end
else if sym = callsym then
  begin
    getsym;
    if sym <> ident then error(14)
    else
      begin
        i := position(id);
        if i = 0 then error(11)
        else
          with table[i] do
          if kind = procedures then gen(cal, lev-level, adr)
          else error(15);
        getsym
      end
end

else if sym = ifsym then
begin
  getsym;
  condition([thensym, dosym]+fsys);
  if sym = thensym then getsym else error(16);
  cx1 := cx;
  gen(jpc, 0, 0);{为else生成条件跳转指令,跳转位置暂填0}
  statement(fsys);
  code[cx1].a := cx{回填跳转指令的地址}
end

else if sym = beginsym then
begin
  getsym;
  statement([semicolon, endsym]+fsys);
  while sym in [semicolon]+statbegsys do
  begin
    if sym = semicolon then getsym else error(10);
    statement([semicolon, endsym]+fsys)
  end;
  if sym = endsym then getsym else error(17)
end

else if sym = whilesym then
begin
  cx1 := cx;
  getsym;
  condition([dosym]+fsys);
  cx2 := cx;
  gen(jpc, 0, 0);{为退出while建立跳转指令地址,cx2保存这条指令的位置}
  if sym = dosym then getsym else error(18);
  statement(fsys);
  gen(jmp, 0, cx1);{重新执行while语句的判断}
  code[cx2].a := cx{重填退出while的指令的地址}
end;


test(fsys, [ ], 19)
end {statement};


begin {procedure block( lev,tx : integer; fsys : symset )}

  dx := 3;{记录运行栈空间的栈顶位置, 要预留SL、DL、RA的空间}
  tx0 := tx;{记录当前符号表栈顶位置}
  table[tx].adr := cx;{符号表当前栈顶的偏移地址记录下一条生成代码的开始位置}
  gen(jmp, 0, 0);{生成一条从declation到stament的跳转指令,跳转的地址未知}
  if lev > levmax then error(32);

  repeat
  if sym = constsym then{const声明}
    begin
      getsym;
      repeat
        constdeclaration;
        while sym = comma do
          begin getsym; constdeclaration end;

        if sym = semicolon then getsym else error(5)
      until sym <> ident
  end;

  if sym = varsym then{变量声明}
  begin
    getsym;
    repeat
      vardeclaration;
      while sym = comma do
      begin  getsym;  vardeclaration  end;

      if sym = semicolon then getsym else error(5)
    until sym <> ident;
  end;

  while sym = procsym do{过程声明}
  begin
    getsym;
    if sym = ident then{如果第一个是标识符,则记录符号表}
      begin  enter(procedures);  getsym  end
    else error(4);
    if sym = semicolon then getsym else error(5);
    {执行分程序的分析过程}
    block(lev+1, tx, [semicolon]+fsys);
    {递归回来后应有分号}
    if sym = semicolon then
      begin
        getsym;
        test(statbegsys+[ident, procsym], fsys, 6) {测试sym是否合法}
      end
    else error(5)
  end;
  test(statbegsys+[ident], declbegsys, 7)
  until not (sym in declbegsys);

  code[table[tx0].adr].a := cx;{回填跳转指令}

  with table[tx0] do{符号表新加记录}
  begin  adr := cx; {代码开始地址} end;
  cx0 := cx;{当前代码的分配地址}
  gen(int, 0, dx);{分配dx个空间}
  statement([semicolon, endsym]+fsys); {语句}
  gen(opr, 0, 0); {生成返回指令}
  test(fsys, [ ], 8);
  listcode;

end  {block};


procedure writecode;
var  i : integer;
begin  {列出本程序体(block)中生成的代码}
  for i := 0 to cx-1 do
    with code[i] do
      begin
        write(output, i);
        write(output, mnemonic[f] : 10);
        write(output, l : 3);
        writeln(output, a : 5);

      end
end;

procedure  interpret;
const  stacksize = 500;
var  p, b, t : integer; {program程序地址寄存器, base基地址寄存器,topstack栈顶地址寄存器}
     i : instruction; {instruction指令寄存器}
     s : array [1..stacksize] of integer; {数据存储栈}

{计算基地址的函数}
function  base(l : integer) : integer;
  var  b1 : integer;
begin
  b1 := b; {顺静态链求层差为l的层的基地址}
  while l > 0 do{如果层数大于0,说明寻找的不是本层}
  begin
    b1 := s[b1];{记录当前层数据基址的内容}
    l := l-1
  end;
  base := b1{将找到的基地址保存起来}
end {base};

begin
  writeln('START PL/0');
  t := 0;  b := 1;  p := 0;
  s[1] := 0;  s[2] := 0;  s[3] := 0;
  {每个过程运行时的数据空间的前三个单元是
  SL:指向本过程静态直接外层过程的SL单元
  DL:指向调用本过程的过程的最新数据空间的第一个单元
  RA:返回地址
  }
  repeat
    i := code[p];  p := p+1;
    with i do
    case f of
      {lit 将常数置于栈顶}
      lit : begin
           t := t+1;  s[t] := a;
           {writeln('LIT: put', a ,'to the top of the stack')}
          end;

      opr : case a of {运算}
           0 : begin {返回}
                t := b-1;{所有数据栈清空}
                p := s[t+3];{获得return address}
                b := s[t+2];{获得return后的基址,因为被调用层次的DL指向调用层次的基址}
                {writeln('RETURN : to address', p)}
              end;
           1 : begin{取反}
              s[t] := -s[t];
              {writeln('Not operation: the top stack becomes', s[t])}
              end;
           2 : begin{求和}
                t := t-1;  s[t] := s[t] + s[t+1];
                {writeln('Plus operation: the top stack becomes', s[t])}
              end;
           3 : begin{相减}
                t := t-1;  s[t] := s[t]-s[t+1];
                {writeln('Subtract operation: the top stack becomes', s[t])}
              end;
           4 : begin{相乘}
                t := t-1;  s[t] := s[t] * s[t+1];
                {writeln('Multiply operation: the top stack becomes', s[t])}
              end;
           5 : begin{相除}
                t := t-1;  s[t] := s[t] div s[t+1];
                {writeln('Divide operation: the top stack becomes', s[t])}
              end;
           6 : begin
                s[t] := ord(odd(s[t]));
                {writeln('ODD: the top stack becomes', s[t])}
              end;
           8 : begin{相等}
                t := t-1;
                s[t] := ord(s[t] = s[t+1]);
                {writeln('Equal operation: the top stack becoms', s[t])}
              end;
           9: begin{不相等}
                t := t-1;
                s[t] := ord(s[t] <> s[t+1]);
                {writeln('Inqual operation: the top stack becoms', s[t])}
              end;
           10 : begin
                t := t-1;
                s[t] := ord(s[t] < s[t+1]);
                {writeln('LSS operation: the top stack becoms', s[t])}
              end;
           11: begin
                t := t-1;
                s[t] := ord(s[t] >= s[t+1]);
                {writeln('GEQ operation: the top stack becoms', s[t])}
              end;
           12 : begin
                t := t-1;
                s[t] := ord(s[t] > s[t+1]);
                {writeln('GTR operation: the top stack becoms', s[t])}
              end;
           13 : begin
                t := t-1;
                s[t] := ord(s[t] <= s[t+1]);
                {writeln('LEQ operation: the top stack becoms', s[t])}
              end;
           end;
      lod : {将变量的值置于栈顶}
            begin
            t := t + 1;  s[t] := s[base(l) + a];
            {writeln('LOD: put', s[t], 'to the top stack')}
           end;
      sto : begin
        {将栈顶的值赋给某变量}
            s[base(l) + a] := s[t];
            writeln(s[t]);
            t := t-1;
            {writeln('STO: s[', base(l) + a, ']becomes', s[t])}
          end;
      cal :
          begin {generate new block mark}
            s[t+1] := base( l );  s[t+2] := b;
            s[t+3] := p;
            b := t+1;  p := a;
            {writeln('CAL: generate new block at', a, '----------------------------------')}
          end;
      int : begin
            t := t + a;
            {writeln('INT:allocat space', a);}
            end;
      jmp : begin
            p := a;
            {writeln('JMP: jump to', a)}
            end;
      jpc : begin
            if s[t] = 0 then
              begin
                p := a;
                {writeln('JPC: jump to', a);}
              end;
            {else writeln('JPC: no jump');}
            t := t-1
          end;
    end {with, case}
  until p = 0;
  write('END PL/0');
end {interpret};


begin  {主程序}

  assign(output, 'output' );{用文件变量f与a驱磁盘上f1.dat文件对应}
  rewrite(output);

  assign(input, 'origin');
  reset(input);
  {ASCII码的顺序}
  for ch := 'A' to ';' do
    ssym[ch] := nul;{记号都设为不合法}

  word[1] := 'begin     '; word[2] := 'call      ';
  word[3] := 'const     '; word[4] := 'do        ';
  word[5] := 'end       '; word[6] := 'if        ';
  word[7] := 'odd       '; word[8] := 'procedure ';
  word[9] := 'then      '; word[10] := 'var       ';
  word[11] := 'while     ';
  {保留字}
  wsym[1] := beginsym;   wsym[2] := callsym;
  wsym[3] := constsym;   wsym[4] := dosym;
  wsym[5] := endsym;    wsym[6] := ifsym;
  wsym[7] := oddsym;    wsym[8] := procsym;
  wsym[9] := thensym;    wsym[10] := varsym;
  wsym[11] := whilesym;
  {保留字的记号}
  ssym['+'] := plus;      ssym['-'] := minus;
  ssym['*'] := times;     ssym['/'] := slash;
  ssym['('] := lparen;     ssym[')'] := rparen;
  ssym['='] := eql;       ssym[','] := comma;
  ssym['.'] := period;     ssym['!'] := neq;
  ssym['<'] := lss;       ssym['>'] := gtr;
  ssym['['] := leq;      ssym[']'] := geq;
  ssym[';'] := semicolon;
  {算符和标点符号的记号}
  mnemonic[lit] := 'LIT';     mnemonic[opr] := 'OPR';
  mnemonic[lod] := 'LOD';    mnemonic[sto] := 'STO';
  mnemonic[cal] := 'CAL';    mnemonic[int] := 'INT';
  mnemonic[jmp] := 'JMP';    mnemonic[jpc] := 'JPC';
  {中间代码指令的字符串}
  declbegsys := [constsym, varsym, procsym];
  {declation 说明语句的开始符号}
  statbegsys := [beginsym, callsym, ifsym, whilesym];
  {stament 语句的开始符号}
  facbegsys := [ident, number, lparen];
  {factors 因子的开始符号}
  {page(output);}
  err := 0;{出错的标识符个数}
  cc := 0;{当前行的字符计数}
  cx := 0; {代码数组的当前下标}
  ll := 0;
  ch := ' ';
  kk := al;
  getsym;
  block(0, 0, [period]+declbegsys+statbegsys);
  writecode;
  if sym <> period then error(9);

  if err = 0 then interpret
  else write('ERRORS IN PL/0 PROGRAM');

end.

阶段二的代码

{PL0编译程序}

program  PL0;
{带有代码生成的PL0编译程序}
{label  99;}
const
  norw = 13; {保留字的个数}
  txmax = 100; {标识符表长度}
  nmax = 14; {数字的最大位数}
  al = 10; {标识符的长度}
  amax = 2047; {最大地址}
  levmax = 3; {程序体嵌套的最大深度}
  cxmax = 200; {生成目标代码数组的大小}

type
  {枚举类型常用自然语言中含义清楚、明了的单词(看成代码)来表示“顺序关系”,
  是一种顺序类型,是根据说明中的排列先后顺序,才具有0,1,2…n的序号关系,
  可用来作循环变量初值和终值,也可用来作数组下标。
  但枚举类型不是数值常量或字符常量,不能进行算术运算,
  只能作为“序号关系”来使用。}
  symbol = (nul, ident, number, plus, minus, times, slash, oddsym,
  eql, neq, lss, leq, gtr, geq, lparen, rparen, comma, semicolon,
  period, becomes, beginsym, endsym, ifsym, thensym,
  whilesym, dosym, callsym, constsym, varsym, procsym, readsym, writesym );
  {在正常情况下,如果系统按4字节对齐,
  那么尽管前面的A只需要一个字节,
  但是随后的三个字节是空着的,B从下一个四字节的边界开始分配。}
  alfa = packed array [1..al] of char;
  objects = (constant, variable, procedures);

  {Pascal系统把具有共同特征的同一有序类型的对象汇集在一起,形成一个集合,
  可将集合类型的所有元素作为一个整体进行集合运算}

  symset = set of symbol;
  fct = (lit, opr, lod, sto, cal, int, jmp, jpc, red, wrt); {functions}

  {PASCAL系统定义了记录类型,可用来表示不同类型的数据。}
  instruction = packed record
    f : fct;  {功能码}
    l : 0..levmax; {相对层数}
    a : 0..amax; {相对地址}
  end;
  {LIT 0,a : 取常数a
  OPR 0,a : 执行运算a
  LOD l,a : 取层差为l的层﹑相对地址为a的变量
  STO l,a : 存到层差为l的层﹑相对地址为a的变量
  CAL l,a : 调用层差为l的过程
  INT 0,a : t寄存器增加a
  JMP 0,a : 转移到指令地址a处
  JPC 0,a : 条件转移到指令地址a处 }

{全局变量定义}
var
  output : text;{输出文件}
  input : text;{源代码文件}

  ch : char; {最近读到的字符}
  sym : symbol; {最近读到的符号}
  id : alfa; {最近读到的标识符}
  num : integer; {最近读到的数}
  cc : integer; {character count当前行的字符计数}
  ll : integer; {line lengt 当前行的长度}
  kk, err : integer;
  cx : integer; {code index 代码数组的当前下标}
  line : array [1..81] of char;{缓冲一行代码}
  a : alfa;
  code : array [0..cxmax] of instruction;{保存编译后的代码,要输出}
  word : array [1..norw] of alfa;
  wsym : array [1..norw] of symbol;
  ssym : array [char] of symbol;
  mnemonic : array [fct] of packed array [1..5] of char;
  declbegsys, statbegsys, facbegsys : symset;
  {声明开始,表达式形式,项开始的集合}
  table : array [0..txmax] of {符号表,相当于一个类,最多txmax(100)个符号}
         record
           name : alfa;{元素名}
           case kind : objects of
                constant : (val : integer);{如果是变量则保存常量的值}
                variable, procedures : (level, adr : integer)
                {如果是变量或者过程,保留层数和偏移地址}
end;

{定义错误程序,n是错误类型}
{character count}
procedure error (n : integer);
begin
  writeln('****', ' ' : cc - 1, '^', n : 2);
  err := err + 1
end {error};

procedure getsym;
var  i, j, k : integer;
procedure  getch;
begin
  {cc 当前行的字符计数}
  {ll 当前行的长度}
  if cc = ll then{表示读完,一开始初始化为0}
  begin
    if eof(input) then
    begin
      write('PROGRAM INCOMPLETE');
      writeln;
      halt(0)
    end;
    ll := 0; cc := 0;
    write(cx : 5, ' ');{输出代码地址}
    while not eoln(input) do
    begin
      ll := ll + 1;{行缓冲区+1}
      read(input, ch);
      write(ch);
      line[ll] := ch
    end;
    writeln;
    readln(input);
    ll := ll + 1;
    line[ll] := ' '
    {read(line[ll])}{读下一行}
  end;
  cc := cc + 1;
  ch := line[cc]
end {getch};

begin {getsym}
  while ch = ' ' do getch;{空字符跳过}
  {如果是字母开头}
  if ch in ['a'..'z'] then
    begin {标识符或保留字} k := 0;
    repeat
      if k < al then
      begin k:= k + 1; a[k] := ch
      end;
      getch
    until not (ch in ['a'..'z', '0'..'9']);
    if k >= kk
      then kk := k {kk是上一个标识符的长度}
      else
        repeat a[kk] := ' '; kk := kk - 1;
        until kk = k;{删除上一个标识符在a中的字符}

    id := a;  i := 1;  j := norw;
    {二分查找保留字表}
    repeat  k := (i+j) div 2;
      if id <= word[k] then j := k - 1;
      if id >= word[k] then i := k + 1;
    until i > j;
    if i - 1 > j
    then sym := wsym[k] {把保留字的类型给sym}
    else sym := ident;{未找到,说明是标识符}
    end
  {如果是数字开头}
  else if ch in ['0'..'9'] then
  begin k := 0;  num := 0;  sym := number;
  repeat
    num := 10*num + (ord(ch)-ord('0'));
    k := k + 1;  getch;
  until not (ch in ['0'..'9']);
  if k > nmax then  error(30) {大于数字的最大位数 14}
  end

  {字符是:}
  else if ch = ':' then
  begin  getch;
    if ch = '=' then
    begin
      sym := becomes;{表示赋值}
      getch
    end
    else
      sym := nul;{非法}
  end

  else if ch = '<' then
  begin getch;
    if ch = '=' then
    begin
      sym := leq;
      getch;
    end
    else if ch = '>' then
    begin
      sym := neq;
      getch;
    end
    else sym := lss;

  end

  else if ch = '>' then
  begin getch;
    if ch = '=' then
    begin
      sym := geq;
      getch;
    end
    else sym := gtr;

  end

  {其他标点符号}
  else
  begin  sym := ssym[ch];  getch end

end {getsym};

{目标代码的生成}
{Fct, L, A}
procedure gen(x : fct; y, z : integer);
begin
  if cx > cxmax then
  begin write('PROGRAM TOO LONG');
    writeln;
    halt(0)
  end;
  with code[cx] do
    begin
      f := x;  l := y;  a := z
    end;
  cx := cx + 1
end {gen};

{测试字符的合法性,目的是跳过所有的非法字符,使得能继续工作}
procedure test(s1, s2 : symset; n : integer);
begin
  if not (sym in s1) then
    begin
      error(n);
      s1 := s1 + s2;
      while not (sym in s1) do getsym end
end {test};

{处理的主程序}
{fsys用来恢复错误的单词集合}
procedure  block(lev, tx : integer; fsys : symset);
var
  dx : integer; {本过程数据空间分配下标}
  tx0 : integer; {本过程标识表起始下标}
  cx0 : integer; {本过程代码起始下标}

procedure  enter(k : objects);
begin {把objects填入符号表中}
  tx := tx +1;
  with table[tx] do
    begin  name := id;  kind := k;
    case k of
      constant :
        begin
          {amx = 2047,最大地址}
          if num > amax then begin error(30); num := 0 end;
          val := num
        end;
      variable :
        begin
          level := lev;  adr := dx;  dx := dx +1;
        end;
      procedures : level := lev
    end
  end
end {enter};

function  position(id : alfa) : integer;
  var  i : integer;
  begin {在标识符表中查标识符id}
    table[0].name := id;  i := tx;
    while table[i].name <> id do i := i - 1;
    position := i
  end {position};

procedure constdeclaration;
begin
if sym = ident then
begin  getsym;
  if sym in [eql, becomes] then
  begin
    if sym = becomes then error(1);
    getsym;
    if sym = number then
    begin  enter(constant); getsym
    end
    else error(2)
  end else error(3)
end else error(4)
  end {constdeclaration};

procedure  vardeclaration;
begin
  if sym = ident then
  begin  enter(variable);  getsym
  end else error(4)
end {vardeclaration};

procedure  listcode;
var  i : integer;
begin  {列出本程序体(block)中生成的代码}
  for i := cx0 to cx - 1 do
    with code[i] do
      writeln(i, mnemonic[f] : 10, l : 3, a : 5)
end {listcode};



procedure  statement(fsys : symset);
var  i, cx1, cx2 : integer;

procedure  expression(fsys : symset);
  var  addop : symbol;
  procedure  term(fsys : symset);
  var  mulop : symbol;
    procedure  factor(fsys : symset);
      var i : integer;
      begin  test(facbegsys, fsys, 24);
        while sym in facbegsys do
        begin
          if sym = ident then
          begin
            i := position(id);
            if i = 0 then error(11) else
              with table[i] do
                case kind of
                  constant : gen(lit, 0, val);
                  variable : gen(lod, lev-level, adr);
                  procedures : error(21)
                end;
            getsym
          end
          else if sym = number then
            begin
              if num > amax then begin error(30); num := 0 end;
              gen(lit, 0, num); getsym
            end
         else if sym = lparen then
            begin
              getsym;
              expression([rparen]+fsys);
              if sym = rparen then getsym else error(22)
            end;
         test(fsys, [lparen], 23)
       end
    end {factor};

  begin { procedure term( fsys : symset);}
    factor(fsys+[times, slash]);
    while sym in [times, slash] do
      begin
        mulop := sym;{保存当前的运算符}
        getsym;
        factor(fsys+[times, slash]);
        if mulop = times then gen(opr, 0, 4){乘法}
        else gen(opr, 0, 5){除法}
      end
  end {term};

begin {procedure expression( fsys: symset);}
  if sym in [plus, minus] then{如果有正负号}
    begin
      addop := sym;  getsym;
      term(fsys+[plus, minus]);
      if addop = minus then gen(opr, 0, 1)
    end
  else term(fsys+[plus, minus]);
  while sym in [plus, minus] do{如果有正负号}
    begin
      addop := sym;  getsym;
      term(fsys+[plus, minus]);
      if addop = plus then gen(opr, 0, 2)
      else gen(opr, 0, 3)
    end
end {expression};


procedure  condition(fsys : symset);
  var  relop : symbol;
begin
  if sym = oddsym then
  begin getsym;  expression(fsys);  gen(opr, 0, 6) end
  else
  begin
    expression([eql, neq, lss, gtr, leq, geq] + fsys);
    if not (sym in [eql, neq, lss, leq, gtr, geq]) then
      error(20)  else
    begin
      relop := sym;
      getsym;
      expression(fsys);
      case relop of
        eql : gen(opr, 0, 8);
        neq : gen(opr, 0, 9);
        lss : gen(opr, 0, 10);
        geq : gen(opr, 0, 11);
        gtr : gen(opr, 0, 12);
        leq : gen(opr, 0, 13);
      end
    end
  end
end {condition};

begin {procedure statement( fsys : symset )}
if sym = ident then
  begin
    i := position(id);{记录标识符的位置}
    if i = 0 then error(11){没找到}
    else if table[i].kind <> variable then{对非变量赋值}
      begin error(12); i := 0; end;

    getsym;
    if sym = becomes then getsym else error(13);
    expression(fsys);{赋值可以是表达式}
    if i <> 0 then
      with table[i] do gen(sto, lev-level, adr){将表达式的值赋给变量}
  end
else if sym = callsym then
  begin
    getsym;
    if sym <> ident then error(14)
    else
      begin
        i := position(id);
        if i = 0 then error(11)
        else
          with table[i] do
          if kind = procedures then gen(cal, lev-level, adr)
          else error(15);
        getsym
      end
end

else if sym = ifsym then
begin
  getsym;
  condition([thensym, dosym]+fsys);
  if sym = thensym then getsym else error(16);
  cx1 := cx;
  gen(jpc, 0, 0);{为else生成条件跳转指令,跳转位置暂填0}
  statement(fsys);
  code[cx1].a := cx{回填跳转指令的地址}
end

else if sym = beginsym then
begin
  getsym;
  statement([semicolon, endsym]+fsys);
  while sym in [semicolon]+statbegsys do
  begin
    if sym = semicolon then getsym else error(10);
    statement([semicolon, endsym]+fsys)
  end;
  if sym = endsym then getsym else error(17)
end

else if sym = whilesym then
begin
  cx1 := cx;
  getsym;
  condition([dosym]+fsys);
  cx2 := cx;
  gen(jpc, 0, 0);{为退出while建立跳转指令地址,cx2保存这条指令的位置}
  if sym = dosym then getsym else error(18);
  statement(fsys);
  gen(jmp, 0, cx1);{重新执行while语句的判断}
  code[cx2].a := cx{重填退出while的指令的地址}
end

else if sym = readsym then
begin
  getsym;
  if sym <> lparen then error(100);
  repeat
    getsym;
    if sym <> ident then error(101);
    i := position(id);
    if i = 0 then error(102);
    if table[i].kind <> variable then
    begin
      error(103);{只能对变量赋值}
      i := 0;
    end;
    with table[i] do
      gen(red, lev-level, adr);
      getsym;
  until (sym <> comma);
  if sym <> rparen then error(104);
  getsym;
end

else if sym = writesym then
begin
  getsym;
  if sym <> lparen then error(105);
  repeat
    getsym;
    expression([rparen, comma] + fsys);
    gen(wrt, 0, 0);
  until (sym <> comma);
  if sym <> rparen then error(106);
  getsym;
end;



test(fsys, [ ], 19)
end {statement};


begin {procedure block( lev,tx : integer; fsys : symset )}

  dx := 3;{记录运行栈空间的栈顶位置, 要预留SL、DL、RA的空间}
  tx0 := tx;{记录当前符号表栈顶位置}
  table[tx].adr := cx;{符号表当前栈顶的偏移地址记录下一条生成代码的开始位置}
  gen(jmp, 0, 0);{生成一条从declation到stament的跳转指令,跳转的地址未知}
  if lev > levmax then error(32);

  repeat
  if sym = constsym then{const声明}
    begin
      getsym;
      repeat
        constdeclaration;
        while sym = comma do
          begin getsym; constdeclaration end;

        if sym = semicolon then getsym else error(5)
      until sym <> ident
  end;

  if sym = varsym then{变量声明}
  begin
    getsym;
    repeat
      vardeclaration;
      while sym = comma do
      begin  getsym;  vardeclaration  end;

      if sym = semicolon then getsym else error(5)
    until sym <> ident;
  end;

  while sym = procsym do{过程声明}
  begin
    getsym;
    if sym = ident then{如果第一个是标识符,则记录符号表}
      begin  enter(procedures);  getsym  end
    else error(4);
    if sym = semicolon then getsym else error(5);
    {执行分程序的分析过程}
    block(lev+1, tx, [semicolon]+fsys);
    {递归回来后应有分号}
    if sym = semicolon then
      begin
        getsym;
        test(statbegsys+[ident, procsym], fsys, 6) {测试sym是否合法}
      end
    else error(5)
  end;
  test(statbegsys+[ident], declbegsys, 7)
  until not (sym in declbegsys);

  code[table[tx0].adr].a := cx;{回填跳转指令}

  with table[tx0] do{符号表新加记录}
  begin  adr := cx; {代码开始地址} end;
  cx0 := cx;{当前代码的分配地址}
  gen(int, 0, dx);{分配dx个空间}
  statement([semicolon, endsym]+fsys); {语句}
  gen(opr, 0, 0); {生成返回指令}
  test(fsys, [ ], 8);
  listcode;

end  {block};


procedure writecode;
var  i : integer;
begin  {列出本程序体(block)中生成的代码}
  for i := 0 to cx - 1 do
    with code[i] do
      begin
        writeln(i, mnemonic[f] : 10, l : 3, a : 5);
        writeln(output, i, mnemonic[f] : 10, l : 3, a : 5);

      end;

end;

procedure  interpret;
const  stacksize = 500;
var  p, b, t : integer; {program程序地址寄存器, base基地址寄存器,topstack栈顶地址寄存器}
     i : instruction; {instruction指令寄存器}
     s : array [1..stacksize] of integer; {数据存储栈}

{计算基地址的函数}
function  base(l : integer) : integer;
  var  b1 : integer;
begin
  b1 := b; {顺静态链求层差为l的层的基地址}
  while l > 0 do{如果层数大于0,说明寻找的不是本层}
  begin
    b1 := s[b1];{记录当前层数据基址的内容}
    l := l-1
  end;
  base := b1{将找到的基地址保存起来}
end {base};

begin
  writeln('START PL/0');
  t := 0;  b := 1;  p := 0;
  s[1] := 0;  s[2] := 0;  s[3] := 0;
  {每个过程运行时的数据空间的前三个单元是
  SL:指向本过程静态直接外层过程的SL单元
  DL:指向调用本过程的过程的最新数据空间的第一个单元
  RA:返回地址
  }
  repeat
    i := code[p];  p := p+1;
    with i do
    case f of
      {lit 将常数置于栈顶}
      lit : begin
           t := t+1;  s[t] := a;
          end;

      opr : case a of {运算}
           0 : begin {返回}
                t := b-1;{所有数据栈清空}
                p := s[t+3];{获得return address}
                b := s[t+2];{获得return后的基址,因为被调用层次的DL指向调用层次的基址}
              end;
           1 : begin{取反}
              s[t] := -s[t];
              end;
           2 : begin{求和}
                t := t-1;  s[t] := s[t] + s[t+1];
              end;
           3 : begin{相减}
                t := t-1;  s[t] := s[t]-s[t+1];
              end;
           4 : begin{相乘}
                t := t-1;  s[t] := s[t] * s[t+1];
              end;
           5 : begin{相除}
                t := t-1;  s[t] := s[t] div s[t+1];
              end;
           6 : begin
                s[t] := ord(odd(s[t]));
                {writeln('ODD: the top stack becomes', s[t])}
              end;
           8 : begin{相等}
                t := t-1;
                s[t] := ord(s[t] = s[t+1]);
              end;
           9: begin{不相等}
                t := t-1;
                s[t] := ord(s[t] <> s[t+1]);
              end;
           10 : begin
                t := t-1;
                s[t] := ord(s[t] < s[t+1]);
              end;
           11: begin
                t := t-1;
                s[t] := ord(s[t] >= s[t+1]);
              end;
           12 : begin
                t := t-1;
                s[t] := ord(s[t] > s[t+1]);
              end;
           13 : begin
                t := t-1;
                s[t] := ord(s[t] <= s[t+1]);
              end;
           end;
      lod : {将变量的值置于栈顶}
            begin
            t := t + 1;  s[t] := s[base(l) + a];
           end;
      sto : begin
        {将栈顶的值赋给某变量}
            s[base(l) + a] := s[t];
            {writeln(s[t]);}
            t := t-1;
          end;
      cal :
          begin {generate new block mark}
            s[t+1] := base( l );  s[t+2] := b;
            s[t+3] := p;
            b := t+1;  p := a;
          end;
      int : begin
            t := t + a;

            end;
      jmp : begin
            p := a;
            end;
      jpc : begin
            if s[t] = 0 then
              begin
                p := a;
              end;
            t := t-1
          end;
      red : begin
          write('$ ');
          readln(s[base(l) + a]);
      end;
      wrt : begin
          writeln(s[t]);
          t := t + 1;

      end;
    end {with, case}
  until p = 0;
  write('END PL/0');
end {interpret};


begin  {主程序}

  assign(output, 'output' );{用文件变量f与a驱磁盘上f1.dat文件对应}
  rewrite(output);

  assign(input, 'origin');
  reset(input);
  {ASCII码的顺序}
  for ch := 'A' to ';' do
    ssym[ch] := nul;{记号都设为不合法}

  word[1] := 'begin     '; word[2] := 'call      ';
  word[3] := 'const     '; word[4] := 'do        ';
  word[5] := 'end       '; word[6] := 'if        ';
  word[7] := 'odd       '; word[8] := 'procedure ';
  word[10] := 'then      '; word[11] := 'var       ';
  word[12] := 'while     ';word[9] := 'read      ';
  word[13] := 'write     ';
  {保留字}
  wsym[1] := beginsym;   wsym[2] := callsym;
  wsym[3] := constsym;   wsym[4] := dosym;
  wsym[5] := endsym;    wsym[6] := ifsym;
  wsym[7] := oddsym;    wsym[8] := procsym;
  wsym[10] := thensym;    wsym[11] := varsym;
  wsym[12] := whilesym;  wsym[9] := readsym;
  wsym[13] := writesym;
  {保留字的记号}
  ssym['+'] := plus;      ssym['-'] := minus;
  ssym['*'] := times;     ssym['/'] := slash;
  ssym['('] := lparen;     ssym[')'] := rparen;
  ssym['='] := eql;       ssym[','] := comma;
  ssym['.'] := period;     ssym['!'] := neq;
  ssym['<'] := lss;       ssym['>'] := gtr;
  ssym['['] := leq;      ssym[']'] := geq;
  ssym[';'] := semicolon;
  {算符和标点符号的记号}
  mnemonic[lit] := 'LIT';     mnemonic[opr] := 'OPR';
  mnemonic[lod] := 'LOD';    mnemonic[sto] := 'STO';
  mnemonic[cal] := 'CAL';    mnemonic[int] := 'INT';
  mnemonic[jmp] := 'JMP';    mnemonic[jpc] := 'JPC';
  mnemonic[red] := 'RED';    mnemonic[wrt] := 'WRT';
  {中间代码指令的字符串}
  declbegsys := [constsym, varsym, procsym];
  {declation 说明语句的开始符号}
  statbegsys := [beginsym, callsym, ifsym, whilesym];
  {stament 语句的开始符号}
  facbegsys := [ident, number, lparen];
  {factors 因子的开始符号}
  {page(output);}
  err := 0;{出错的标识符个数}
  cc := 0;{当前行的字符计数}
  cx := 0; {代码数组的当前下标}
  ll := 0;
  ch := ' ';
  kk := al;
  getsym;
  block(0, 0, [period]+declbegsys+statbegsys);
  writecode;
  if sym <> period then error(9);

  if err = 0 then interpret
  else write('ERRORS IN PL/0 PROGRAM');
  close(input);
  close(output);{这是必须要有的,不然输出文件异常}
end.


你可能感兴趣的:(编译原理项目)