Polygon zkEVM zkASM编译器——zkasmcom

1. 引言

Polygon zkEVM采用zkASM(zero-knowledge Assembly language)语言来解析EVM bytecode。

zkASM编译器代码见:

  • https://github.com/0xPolygonHermez/zkasmcom:负责将.zkasm编译为json文件供zkExecutor使用。
  • https://github.com/0xPolygonHermez/zkevm-storage-rom:为storage二级状态机的zkASM编译器。负责将storage zkasm文件编译为json文件。
  • https://github.com/0xPolygonHermez/zkevm-rom:包含了zkEVM中的zkasm源代码。

本文重点关注zkasm代码,其主要依赖3个库:

  • yargs:交互式命令行工具,负责参数解析。
  • ffjavascript:Finite Field Library in Javascript。
  • jison:一个用JavaScript语言实现的一个语法分析器生成器。
"build_parser_zkasm": "mkdir -p build; ./node_modules/.bin/jison src/zkasm_parser.jison -o build/zkasm_parser.js",
"build_parser_command": "mkdir -p build; ./node_modules/.bin/jison src/command_parser.jison -o build/command_parser.js",
"build": "npm run build_parser_zkasm && npm run build_parser_command"

npm run build会生成2个解析器文件:

  • 1)zkasm_parser.js:compile.js中调用const lines = zkasm_parser.parse(src); 对 *.zkasm文件进行编译。
  • 2)command_parser.js:当为main入口时,调用cmdList[i] = command_parser.parse(cmdList[i]); 对command进行解析。

zkasm中的常量参数有:【STEP和ROTL_C为只读寄存器。】

const maxConst = (1n << 32n) - 1n;
const minConst = -(1n << 31n);
const maxConstl = (1n << 256n) - 1n;
const minConstl = -(1n << 255n);
const readOnlyRegisters = ['STEP', 'ROTL_C'];

以arrays.zkasm为例:【冒号右侧为OPCODE,对应的相应常量多项式设置见zkasm_parser.jison中的op内相应操作码的设置,如ARITH操作码对应$$ = { arith: 1, arithEq0: 1},表示会设置arith和arithEq0常量多项式在该行的值为1。

VAR GLOBAL a[100] # 以type、scope、name、count来描述。
VAR GLOBAL b
VAR GLOBAL c[300]
VAR GLOBAL d

start: # 对应type为“label”,identifier为“start”,line为所在代码行,此处为6。里面的每行type为“step”。
        STEP => A
        0   :ASSERT

        1   :MSTORE(a)
        2   :MSTORE(b)
        3   :MSTORE(c)
        4   :MSTORE(d)
        @a => A
        @b => A
        @c => A
        @d => A

end:
       0 => A,B,C,D,E,CTX, SP, PC, GAS, MAXMEM, SR

finalWait:
        ${beforeLast()}  : JMPN(finalWait)

                         : JMP(start)
opINVALID:

执行node src/zkasm.js test/arrays.zkasm -o arrays.json进行编译。

其中,const lines = zkasm_parser.parse(src);解析后的结果为:

[
 { # VAR GLOBAL a[100]
  "type": "var",
  "scope": "GLOBAL",
  "name": "a",
  "count": 100
 },
 { # VAR GLOBAL b
  "type": "var",
  "scope": "GLOBAL",
  "name": "b",
  "count": 1
 },
 { # VAR GLOBAL c[300]
  "type": "var",
  "scope": "GLOBAL",
  "name": "c",
  "count": 300
 },
 { # VAR GLOBAL d
  "type": "var",
  "scope": "GLOBAL",
  "name": "d",
  "count": 1
 },
 { # start:
  "type": "label",
  "identifier": "start",
  "line": 6
 },
 { # STEP => A,将STEP寄存器的值直接赋值给A寄存器。
  "type": "step",
  "assignment": {
   "in": {
    "type": "REG",
    "reg": "STEP"
   },
   "out": [
    "A"
   ]
  },
  "ops": [],
  "line": 7
 },
 { # 0   :ASSERT,assert常量0"type": "step",
  "assignment": {
   "in": {
    "type": "CONST",
    "const": 0
   },
   "out": []
  },
  "ops": [
   {
    "assert": 1
   }
  ],
  "line": 8
 },
 { # 1   :MSTORE(a),将常量1存入a数组中的第一位置。
  "type": "step",
  "assignment": {
   "in": {
    "type": "CONST",
    "const": 1
   },
   "out": []
  },
  "ops": [
   {
    "offset": "a",
    "mOp": 1,
    "mWR": 1
   }
  ],
  "line": 10
 },
 { # 2   :MSTORE(b),将常量2存入b中。
  "type": "step",
  "assignment": {
   "in": {
    "type": "CONST",
    "const": 2
   },
   "out": []
  },
  "ops": [
   {
    "offset": "b",
    "mOp": 1,
    "mWR": 1
   }
  ],
  "line": 11
 },
 { # 3   :MSTORE(c),将常量3存入数组c中。
  "type": "step",
  "assignment": {
   "in": {
    "type": "CONST",
    "const": 3
   },
   "out": []
  },
  "ops": [
   {
    "offset": "c",
    "mOp": 1,
    "mWR": 1
   }
  ],
  "line": 12
 },
 { # 4   :MSTORE(d),将常量4存入d中。
  "type": "step",
  "assignment": {
   "in": {
    "type": "CONST",
    "const": 4
   },
   "out": []
  },
  "ops": [
   {
    "offset": "d",
    "mOp": 1,
    "mWR": 1
   }
  ],
  "line": 13
 },
 { # @a => A,将a的索引赋值给A"type": "step",
  "assignment": {
   "in": {
    "type": "reference",
    "identifier": "a"
   },
   "out": [
    "A"
   ]
  },
  "ops": [],
  "line": 14
 },
 { # @b => A,将b的索引赋值给A"type": "step",
  "assignment": {
   "in": {
    "type": "reference",
    "identifier": "b"
   },
   "out": [
    "A"
   ]
  },
  "ops": [],
  "line": 15
 },
 { # @c => A,将c的索引赋值给A"type": "step",
  "assignment": {
   "in": {
    "type": "reference",
    "identifier": "c"
   },
   "out": [
    "A"
   ]
  },
  "ops": [],
  "line": 16
 },
 { # @d => A,将d的索引赋值给A"type": "step",
  "assignment": {
   "in": {
    "type": "reference",
    "identifier": "d"
   },
   "out": [
    "A"
   ]
  },
  "ops": [],
  "line": 17
 },
 { # end:
  "type": "label",
  "identifier": "end",
  "line": 19
 },
 { # 0 => A,B,C,D,E,CTX, SP, PC, GAS, MAXMEM, SR,将这些寄存器清零。
  "type": "step",
  "assignment": {
   "in": {
    "type": "CONST",
    "const": 0
   },
   "out": [
    "A",
    "B",
    "C",
    "D",
    "E",
    "CTX",
    "SP",
    "PC",
    "GAS",
    "MAXMEM",
    "SR"
   ]
  },
  "ops": [],
  "line": 20
 },
 { # finalWait:
  "type": "label",
  "identifier": "finalWait",
  "line": 22
 },
 { # ${beforeLast()}  : JMPN(finalWait)
  "type": "step",
  "assignment": {
   "in": {
    "type": "TAG",
    "tag": "beforeLast()" # 为标签。
   },
   "out": []
  },
  "ops": [
   {
    "JMPC": 0,
    "JMPN": 1,
    "offset": "finalWait"
   }
  ],
  "line": 23
 },
 { # : JMP(start)
  "type": "step",
  "assignment": null,
  "ops": [
   {
    "JMP": 1,
    "JMPC": 0,
    "JMPN": 0,
    "offset": "start"
   }
  ],
  "line": 25
 },
 { # opINVALID:
  "type": "label",
  "identifier": "opINVALID",
  "line": 26
 }
]

然后对以上内容逐行处理:

	for (let i=0; i<lines.length; i++) {
        const l = lines[i];
        ctx.currentLine = l;
        l.fileName = relativeFileName;
        if (l.type == "include") {
            const fullFileNameI = path.resolve(fileDir, l.file);
            await compile(fullFileNameI, ctx);
            if (pendingCommands.length>0) error(l, "command not allowed before include");
            lastLineAllowsCommand = false;
        } else if (l.type == "var") {
            if (typeof ctx.vars[l.name] !== "undefined") error(l, `Variable ${l.name} already defined`);
            if (l.scope == "GLOBAL") { // 给全局变量根据名称分配,不允许有重名情况。
                ctx.vars[l.name] = {
                    scope: "GLOBAL",
                    offset: ctx.lastGlobalVarAssigned + 1
                }
                ctx.lastGlobalVarAssigned += l.count; // 适于按数组分配。
            } else if (l.scope == "CTX") {
                ctx.vars[l.name] = {
                    scope: "CTX",
                    offset: ctx.lastLocalVarCtxAssigned + 1
                }
                ctx.lastLocalVarCtxAssigned += l.count;
            } else {
                throw error(l, `Invalid scope ${l.scope}`);
            }
            if (pendingCommands.length>0) error(l, "command not allowed before var");
            lastLineAllowsCommand = false;
        } else if (l.type == 'constdef' || l.type == 'constldef' ) {
            const value = evaluateExpression(ctx, l.value);
            let ctype = l.type == 'constldef' ? 'CONSTL':'CONST';
            defineConstant(ctx, l.name, ctype, value);
        } else if (l.type == "step") { // start/end等标签下的实际执行语句
            const traceStep = { // traceStep内map:step[key]=op[key]
                // type: "step"
            };
            try {
                for (let j=0; j< l.ops.length; j++) { //过滤校验下规则,不能同时定义2个assignement。
                    if (!l.ops[j].assignment) continue;
                    if (l.assignment) {
                        error(l, "not allowed assignments with this operation");
                    }
                    l.assignment = l.ops[j].assignment;
                    delete l.ops[j].assignment;
                }
				
				/*function appendOp(step, op) {
				    Object.keys(op).forEach(function(key) {
				        if (typeof step[key] !== "undefined") throw new Error(`Var ${key} already defined`);
				        step[key] = op[key];
				    });
				}*/
                if (l.assignment) { //处理assignment中的in和out内容。
                    appendOp(traceStep, processAssignmentIn(ctx, l.assignment.in, ctx.out.length));
                    appendOp(traceStep, processAssignmentOut(ctx, l.assignment.out));
                }
                for (let j=0; j< l.ops.length; j++) { //将每个ops元素存入step map中。
                    appendOp(traceStep, l.ops[j])
                }

                if (traceStep.JMPC && !traceStep.bin) {
                    error(l, "JMPC must go together with a binary op");
                }
            } catch (err) {
                error(l, err);
            }
            // traceStep.lineNum = ctx.out.length;
            traceStep.line = l;
            ctx.out.push(traceStep); //将traceStep放入ctx.out数组中。
            if (pendingCommands.length>0) {
                traceStep.cmdBefore = pendingCommands;
                pendingCommands = [];
            }
            lastLineAllowsCommand = !(traceStep.JMP || traceStep.JMPC || traceStep.JMPN);
        } else if (l.type == "label") { // start/end等标识符,不允许有重名情况。
            const id = l.identifier
            if (ctx.definedLabels[id]) error(l, `RedefinedLabel: ${id}` );
            ctx.definedLabels[id] = ctx.out.length;
            if (pendingCommands.length>0) error(l, "command not allowed before label")
            lastLineAllowsCommand = false;
        } else if (l.type == "command") {
            if (lastLineAllowsCommand) {
                if (typeof ctx.out[ctx.out.length-1].cmdAfter === "undefined")
                    ctx.out[ctx.out.length-1].cmdAfter = [];
                ctx.out[ctx.out.length-1].cmdAfter.push(l.cmd);
            } else {
                pendingCommands.push(l.cmd);
            }
        } else {
            error(l, `Invalid line type: ${l.type}`);
        }
    }

assignment中的in内容的处理规则为:

function processAssignmentIn(ctx, input, currentLine) {
    const res = {};
    let E1, E2;
    if (input.type == "TAG") { # ${beforeLast()}  : JMPN(finalWait),会调用command_parser。
        res.freeInTag = input.tag ? command_parser.parse(input.tag) : { op: ""};
        res.inFREE = 1n;
        return res;
    }
    if (input.type == "REG") {
        if (input.reg == "zkPC") {
            res.CONST = BigInt(currentLine);
        }
        else {
            res["in"+ input.reg] = 1n;
        }
        return res;
    }
    if (input.type == "COUNTER") {
        let res = {};
        res["in" + input.counter.charAt(0).toUpperCase() + input.counter.slice(1)] = 1n;
        return res;
    }
    if (input.type == "CONST") {
        res.CONST = BigInt(input.const);
        return res;
    }
    if (input.type == "CONSTL") {
        res.CONSTL = BigInt(input.const);
        return res;
    }
    if (input.type == 'CONSTID') {
        const [value, ctype] = getConstant(ctx, input.identifier);
        res[ctype] = value;
        return res;
    }

    if (input.type == "exp") {
        res.CONST = BigInt(input.values[0])**BigInt(input.values[1]);
        return res;
    }
    if ((input.type == "add") || (input.type == "sub") || (input.type == "neg") || (input.type == "mul")) {
        E1 = processAssignmentIn(ctx, input.values[0], currentLine);
    }
    if ((input.type == "add") || (input.type == "sub") || (input.type == "mul")) {
        E2 = processAssignmentIn(ctx, input.values[1], currentLine);
    }
    if (input.type == "mul") {
        if (isConstant(E1)) {
            if (typeof E2.CONSTL !== 'undefined') {
                throw new Error("Not allowed CONST and CONSTL in same operation");
            }
            Object.keys(E2).forEach(function(key) {
                E2[key] *= E1.CONST;
            });
            return E2;
        } else if (isConstant(E2)) {
            if (typeof E1.CONSTL !== 'undefined') {
                throw new Error("Not allowed CONST and CONSTL in same operation");
            }
            Object.keys(E1).forEach(function(key) {
                E1[key] *= E2.CONST;
            });
            return E1;
        } else {
            throw new Error("Multiplication not allowed in input");
        }
    }
    if (input.type == "neg") {
        Object.keys(E1).forEach(function(key) {
            E1[key] = -E1[key];
        });
        return E1;
    }
    if (input.type == "sub") {
        Object.keys(E2).forEach(function(key) {
            if (key != "freeInTag") {
                E2[key] = -E2[key];
            }
        });
        input.type = "add";
    }
    if (input.type == "add") {
        if (E1.freeInTag && E2.freeInTag) throw new Error("Only one tag allowed");
        Object.keys(E2).forEach(function(key) {
            if (E1[key]) {
                E1[key] += E2[key];
            } else {
                E1[key] = E2[key];
            }
        });
        if (typeof E1.CONST !== 'undefined' && typeof E1.CONSTL !== 'undefined') {
            throw new Error("Not allowed CONST and CONSTL in same operation");
        }
        return E1;
    }
    if (input.type == 'reference') {
        res.labelCONST = input.identifier;
        if (typeof ctx.definedLabels[input.identifier] !== 'undefined') {
            res.CONST = BigInt(ctx.definedLabels[input.identifier]);
        }
        else if (typeof ctx.vars[input.identifier] !== 'undefined') {
            res.CONST = BigInt(ctx.vars[input.identifier].offset);
        }
        else {
            throw new Error(`Not found label/variable ${input.identifier}`)
        }
        return res;
    }
    throw new Error( `Invalid type: ${input.type}`);


    function isConstant(o) {
        let res = true;
        Object.keys(o).forEach(function(key) {
            if (key != "CONST") res = false;
        });
        return res;
    }
}

assignment中out内容的处理规则为:

function processAssignmentOut(ctx, outputs) {
    const res = {};
    for (let i=0; i<outputs.length; i++) {
        if (typeof res["set"+ outputs[i]] !== "undefined") throw new Error(`Register ${outputs[i]} added twice in asssignment output`);
        if (readOnlyRegisters.includes(outputs[i])) { // 预留的只读寄存器不可写,不能在out中。
            const l = ctx.currentLine;
            throw new Error(`Register ${outputs[i]} is readonly register, could not be used as output destination. ${l.fileName}:${l.line}`);
        }
        res["set"+ outputs[i]] = 1;
    }
    return res;
}

最后再进一步将ctx.out中的内容铺平展开:

	if (isMain) {
        for (let i=0; i<ctx.out.length; i++) {
            if (
                    (typeof ctx.out[i].offset !== "undefined") &&
                    (isNaN(ctx.out[i].offset))
               ) {
                if (ctx.out[i].JMP || ctx.out[i].JMPC || ctx.out[i].JMPN) {
                    if (typeof ctx.definedLabels[ctx.out[i].offset] === "undefined") {
                        error(ctx.out[i].line, `Label: ${ctx.out[i].offset} not defined.`);
                    }
                    ctx.out[i].offsetLabel = ctx.out[i].offset;
                    ctx.out[i].offset = ctx.definedLabels[ctx.out[i].offset];
                } else {
                    ctx.out[i].offsetLabel = ctx.out[i].offset;
                    if (typeof ctx.vars[ctx.out[i].offset] === "undefined") {
                        error(ctx.out[i].line, `Variable: ${ctx.out[i].offset} not defined.`);
                    }
                    if (ctx.vars[ctx.out[i].offset].scope === 'CTX') {
                        ctx.out[i].useCTX = 1;
                    } else if (ctx.vars[ctx.out[i].offset].scope === 'GLOBAL') {
                        ctx.out[i].useCTX = 0;
                    } else {
                        error(ctx.out[i].line, `Invalid variable scpoe: ${ctx.out[i].offset} not defined.`);
                    }
                    ctx.out[i].offset = ctx.vars[ctx.out[i].offset].offset;
                }
            }
            try {
                parseCommands(ctx.out[i].cmdBefore);
                parseCommands(ctx.out[i].cmdAfter);
            } catch (err) {
                err.message = "Error parsing tag: " + err.message;
                error(ctx.out[i].line, err);
            }
            resolveDataOffset(i, ctx.out[i]);
            ctx.out[i].fileName = ctx.out[i].line.fileName;
            ctx.out[i].line = ctx.out[i].line.line;
            ctx.out[i].lineStr = ctx.srcLines[ctx.out[i].fileName][ctx.out[i].line - 1] ?? '';
        }

        const res = {
            program:  stringifyBigInts(ctx.out),
            labels: ctx.definedLabels
        }

        return res;
    }

最终arrays.zkasm的编译结果为:

{
 "program": [
  {
   "inSTEP": "1",
   "setA": 1,
   "line": 7,
   "fileName": "arrays.zkasm",
   "lineStr": "        STEP => A"
  },
  {
   "CONST": "0",
   "assert": 1,
   "line": 8,
   "fileName": "arrays.zkasm",
   "lineStr": "        0   :ASSERT"
  },
  {
   "CONST": "1",
   "offset": 0,
   "mOp": 1,
   "mWR": 1,
   "line": 10,
   "offsetLabel": "a",
   "useCTX": 0,
   "fileName": "arrays.zkasm",
   "lineStr": "        1   :MSTORE(a)"
  },
  {
   "CONST": "2",
   "offset": 100,
   "mOp": 1,
   "mWR": 1,
   "line": 11,
   "offsetLabel": "b",
   "useCTX": 0,
   "fileName": "arrays.zkasm",
   "lineStr": "        2   :MSTORE(b)"
  },
  {
   "CONST": "3",
   "offset": 101,
   "mOp": 1,
   "mWR": 1,
   "line": 12,
   "offsetLabel": "c",
   "useCTX": 0,
   "fileName": "arrays.zkasm",
   "lineStr": "        3   :MSTORE(c)"
  },
  {
   "CONST": "4",
   "offset": 401,
   "mOp": 1,
   "mWR": 1,
   "line": 13,
   "offsetLabel": "d",
   "useCTX": 0,
   "fileName": "arrays.zkasm",
   "lineStr": "        4   :MSTORE(d)"
  },
  {
   "labelCONST": "a",
   "CONST": "0",
   "setA": 1,
   "line": 14,
   "fileName": "arrays.zkasm",
   "lineStr": "        @a => A"
  },
  {
   "labelCONST": "b",
   "CONST": "100",
   "setA": 1,
   "line": 15,
   "fileName": "arrays.zkasm",
   "lineStr": "        @b => A"
  },
  {
   "labelCONST": "c",
   "CONST": "101",
   "setA": 1,
   "line": 16,
   "fileName": "arrays.zkasm",
   "lineStr": "        @c => A"
  },
  {
   "labelCONST": "d",
   "CONST": "401",
   "setA": 1,
   "line": 17,
   "fileName": "arrays.zkasm",
   "lineStr": "        @d => A"
  },
  {
   "CONST": "0",
   "setA": 1,
   "setB": 1,
   "setC": 1,
   "setD": 1,
   "setE": 1,
   "setCTX": 1,
   "setSP": 1,
   "setPC": 1,
   "setGAS": 1,
   "setMAXMEM": 1,
   "setSR": 1,
   "line": 20,
   "fileName": "arrays.zkasm",
   "lineStr": "       0 => A,B,C,D,E,CTX, SP, PC, GAS, MAXMEM, SR"
  },
  {
   "freeInTag": {
    "op": "functionCall",
    "funcName": "beforeLast",
    "params": []
   },
   "inFREE": "1",
   "JMPC": 0,
   "JMPN": 1,
   "offset": 11,
   "line": 23,
   "offsetLabel": "finalWait",
   "fileName": "arrays.zkasm",
   "lineStr": "        ${beforeLast()}  : JMPN(finalWait)"
  },
  {
   "JMP": 1,
   "JMPC": 0,
   "JMPN": 0,
   "offset": 0,
   "line": 25,
   "offsetLabel": "start",
   "fileName": "arrays.zkasm",
   "lineStr": "                         : JMP(start)"
  }
 ],
 "labels": {
  "start": 0,
  "end": 10,
  "finalWait": 11,
  "opINVALID": 13
 }
}

参考资料

[1] zkASM基础语法

附录:Polygon Hermez 2.0 zkEVM系列博客

  • ZK-Rollups工作原理
  • Polygon zkEVM——Hermez 2.0简介
  • Polygon zkEVM网络节点
  • Polygon zkEVM 基本概念
  • Polygon zkEVM Prover
  • Polygon zkEVM工具——PIL和CIRCOM
  • Polygon zkEVM节点代码解析
  • Polygon zkEVM的pil-stark Fibonacci状态机初体验
  • Polygon zkEVM的pil-stark Fibonacci状态机代码解析
  • Polygon zkEVM PIL编译器——pilcom 代码解析
  • Polygon zkEVM Arithmetic状态机

你可能感兴趣的:(zkVM,零知识证明)