【Solidity】可用于(内联)装配的语言:Joyfully Universal Language

JULIA是一种中间语言,可以编译到各种不同的后台(EVM 1EVM 1.5EWASM)。它已经可以用于Solidity内部的“内联汇编”,未来版本的Solidity编译器甚至会使用JULIA作为中间语言。对于JULIA来说构建高级优化器阶段也很容易。


JULIA是类型化的,变量和字面量都必须用后缀表示法来指定类型。所支持的类型是bool, u8, s8, u32, s32, u64, s64, u128, s128, u256s256

    function power(base:u256, exponent:u256) -> result:u256
        switch exponent
        case 0:u256 { result := 1:u256 }
        case 1:u256 { result := base }
            result := power(mul(base, base), div(exponent, 2:u256))
            switch mod(exponent, 2:u256)
                case 1:u256 { result := mul(base, result) }

也可以使用for循环而不是递归来实现相同的函数。在这里,我们需要的EVM操作码lt (小于)和add可用。

    function power(base:u256, exponent:u256) -> result:u256
        result := 1:u256
        for { let i := 0:u256 } lt(i, exponent) { i := add(i, 1:u256) }
            result := mul(result, base)




Block = '{' Statement* '}'
Statement =
    Block |
    FunctionDefinition |
    VariableDeclaration |
    Assignment |
    Expression |
    Switch |
    ForLoop |
FunctionDefinition =
    'function' Identifier '(' TypedIdentifierList? ')'
    ( '->' TypedIdentifierList )? Block
VariableDeclaration =
    'let' TypedIdentifierList ( ':=' Expression )?
Assignment =
    IdentifierList ':=' Expression
Expression =
    FunctionCall | Identifier | Literal
If =
    'if' Expression Block
Switch =
    'switch' Expression Case* ( 'default' Block )?
Case =
    'case' Literal Block
ForLoop =
    'for' Block Expression Block Block
BreakContinue =
    'break' | 'continue'
FunctionCall =
    Identifier '(' ( Expression ( ',' Expression )* )? ')'
Identifier = [a-zA-Z_$] [a-zA-Z_0-9]*
IdentifierList = Identifier ( ',' Identifier)*
TypeName = Identifier | BuiltinTypeName
BuiltinTypeName = 'bool' | [us] ( '8' | '32' | '64' | '128' | '256' )
TypedIdentifierList = Identifier ':' TypeName ( ',' Identifier ':' TypeName )*
Literal =
    (NumberLiteral | StringLiteral | HexLiteral | TrueLiteral | FalseLiteral) ':' TypeName
NumberLiteral = HexNumber | DecimalNumber
HexLiteral = 'hex' ('"' ([0-9a-fA-F]{2})* '"' | '\'' ([0-9a-fA-F]{2})* '\'')
StringLiteral = '"' ([^"\r\n\\] | '\\' .)* '"'
TrueLiteral = 'true'
FalseLiteral = 'false'
HexNumber = '0x' [0-9a-fA-F]+
DecimalNumber = [0-9]+

语法限制(Restrictions on the Grammar)

Switches必须至少有一个case(包括default case)。如果覆盖表达式的所有可能值,则不应允许default case(即,具有布尔表达式的switch并且同时具有true和false情况不应允许default case)。







范围规则(Scoping Rules)

JULIA中的作用域绑定到块并且所有声明(FunctionDefinition, VariableDeclaration)引入新的标识符到这些作用域内。





形式规范(Formal Specification)





E(G, L, <{St1, ..., Stn}>: Block) =
    let G1, L1, mode = E(G, L, St1, ..., Stn)
    let L2 be a restriction of L1 to the identifiers of L
    G1, L2, mode
E(G, L, St1, ..., Stn: Statement) =
    if n is zero:
        G, L, regular
        let G1, L1, mode = E(G, L, St1)
        if mode is regular then
            E(G1, L1, St2, ..., Stn)
            G1, L1, mode
E(G, L, FunctionDefinition) =
    G, L, regular
E(G, L, ..., varn := rhs>: VariableDeclaration) =
    E(G, L, ..., varn := rhs>: Assignment)
E(G, L, ..., varn>: VariableDeclaration) =
    let L1 be a copy of L where L1[$vari] = 0 for i = 1, ..., n
    G, L1, regular
E(G, L, ..., varn := rhs>: Assignment) =
    let G1, L1, v1, ..., vn = E(G, L, rhs)
    let L2 be a copy of L1 where L2[$vari] = vi for i = 1, ..., n
    G, L2, regular
E(G, L, <for { i1, ..., in } condition post body>: ForLoop) =
    if n >= 1:
        let G1, L1, mode = E(G, L, i1, ..., in)
        // mode has to be regular due to the syntactic restrictions
        let G2, L2, mode = E(G1, L1, for {} condition post body)
        // mode has to be regular due to the syntactic restrictions
        let L3 be the restriction of L2 to only variables of L
        G2, L3, regular
        let G1, L1, v = E(G, L, condition)
        if v is false:
            G1, L1, regular
            let G2, L2, mode = E(G1, L, body)
            if mode is break:
                G2, L2, regular
                G3, L3, mode = E(G2, L2, post)
                E(G3, L3, for {} condition post body)
E(G, L, break: BreakContinue) =
    G, L, break
E(G, L, continue: BreakContinue) =
    G, L, continue
E(G, L, <if condition body>: If) =
    let G0, L0, v = E(G, L, condition)
    if v is true:
        E(G0, L0, body)
        G0, L0, regular
E(G, L, <switch condition case l1:t1 st1 ... case ln:tn stn>: Switch) =
    E(G, L, switch condition case l1:t1 st1 ... case ln:tn stn default {})
E(G, L, <switch condition case l1:t1 st1 ... case ln:tn stn default st'>: Switch) =
    let G0, L0, v = E(G, L, condition)
    // i = 1 .. n
    // Evaluate literals, context doesn't matter
    let _, _, v1 = E(G0, L0, l1)
    let _, _, vn = E(G0, L0, ln)
    if there exists smallest i such that vi = v:
        E(G0, L0, sti)
        E(G0, L0, st')

E(G, L, : Identifier) =
    G, L, L[$name]
E(G, L, : FunctionCall) =
    G1, L1, vn = E(G, L, argn)
    G(n-1), L(n-1), v2 = E(G(n-2), L(n-2), arg2)
    Gn, Ln, v1 = E(G(n-1), L(n-1), arg1)
    Let  ret1, ..., retm block>
    be the function of name $fname visible at the point of the call.
    Let L' be a new local state such that
    L'[$parami] = vi and L'[$reti] = 0 for all i.
    Let G'', L'', mode = E(Gn, L', block)
    G'', Ln, L''[$ret1], ..., L''[$retm]
E(G, L, l: HexLiteral) = G, L, hexString(l),
    where hexString decodes l from hex and left-aligns it into 32 bytes
E(G, L, l: StringLiteral) = G, L, utf8EncodeLeftAligned(l),
    where utf8EncodeLeftAligned performs a utf8 encoding of l
    and aligns it left into 32 bytes
E(G, L, n: HexNumber) = G, L, hex(n)
    where hex is the hexadecimal decoding function
E(G, L, n: DecimalNumber) = G, L, dec(n),
    where dec is the decimal decoding function

类型转换函数(Type Conversion Functions)


以下类型转换函数必须是可用的:- u32tobool(x:u32) -> y:bool - booltou32(x:bool) -> y:u32 - u32tou64(x:u32) -> y:u64 - u64tou32(x:u64) -> y:u32 - etc. (TBD)

底层函数(Low-level Functions)


addu256(x:u256, y:u256) -> z:u256 | x + y
subu256(x:u256, y:u256) -> z:u256 | x - y
mulu256(x:u256, y:u256) -> z:u256 | x * y
divu256(x:u256, y:u256) -> z:u256 | x / y
divs256(x:s256, y:s256) -> z:s256 | x / y, for signed numbers in two’s complement
modu256(x:u256, y:u256) -> z:u256 | x % y
mods256(x:s256, y:s256) -> z:s256 | x % y, for signed numbers in two’s complement
signextendu256(i:u256, x:u256) -> z:u256 | sign extend from (i*8+7)th bit counting from least significant
expu256(x:u256, y:u256) -> z:u256 | x to the power of y
addmodu256(x:u256, y:u256, m:u256) -> z:u256| (x + y) % m with arbitrary precision arithmetics
mulmodu256(x:u256, y:u256, m:u256) -> z:u256| (x * y) % m with arbitrary precision arithmetics
ltu256(x:u256, y:u256) -> z:bool | 1 if x < y, 0 otherwise
gtu256(x:u256, y:u256) -> z:bool | 1 if x > y, 0 otherwise
sltu256(x:s256, y:s256) -> z:bool | 1 if x < y, 0 otherwise, for signed numbers in two’s complement
sgtu256(x:s256, y:s256) -> z:bool | 1 if x > y, 0 otherwise, for signed numbers in two’s complement
equ256(x:u256, y:u256) -> z:bool | 1 if x == y, 0 otherwise
notu256(x:u256) -> z:u256 | ~x, every bit of x is negated
andu256(x:u256, y:u256) -> z:u256 | bitwise and of x and y
oru256(x:u256, y:u256) -> z:u256 | bitwise or of x and y
xoru256(x:u256, y:u256) -> z:u256 | bitwise xor of x and y
shlu256(x:u256, y:u256) -> z:u256 | logical left shift of x by y
shru256(x:u256, y:u256) -> z:u256 | logical right shift of x by y
saru256(x:u256, y:u256) -> z:u256 | arithmetic right shift of x by y
byte(n:u256, x:u256) -> v:u256 | nth byte of x, where the most significant byte is the 0th byte Cannot this be just replaced by and256(shr256(n, x), 0xff) and let it be optimised out by the EVM backend?
Memory and storage
mload(p:u256) -> v:u256 | mem[p..(p+32))
mstore(p:u256, v:u256) | mem[p..(p+32)) := v
mstore8(p:u256, v:u256) | mem[p] := v & 0xff - only modifies a single byte
sload(p:u256) -> v:u256 | storage[p]
sstore(p:u256, v:u256) | storage[p] := v
msize() -> size:u256 | size of memory, i.e. largest accessed memory index, albeit due due to the memory extension function, which extends by words,this will always be a multiple of 32 bytes
Execution control
create(v:u256, p:u256, s:u256) | create new contract with code mem[p..(p+s)) and send v wei and return the new address
call(g:u256, a:u256, v:u256, in:u256, | call contract at address a with input mem[in..(in+insize)) insize:u256, out:u256,
callcode(g:u256, a:u256, v:u256, in:u256, | identical to call but only use the code from a insize:u256, out:u256, | and stay in the context of the outsize:u256) -> r:u256 | current contract otherwise
delegatecall(g:u256, a:u256, in:u256, | identical to callcode, insize:u256, out:u256, | but also keep caller outsize:u256) -> r:u256 | and callvalue
stop() | stop execution, identical to return(0,0) Perhaps it would make sense retiring this as it equals to return(0,0). It can be an optimisation by the EVM backend.
abort() | abort (equals to invalid instruction on EVM)
return(p:u256, s:u256) | end execution, return data mem[p..(p+s))
revert(p:u256, s:u256) | end execution, revert state changes, return data mem[p..(p+s))
selfdestruct(a:u256) | end execution, destroy current contract and send funds to a
log0(p:u256, s:u256) | log without topics and data mem[p..(p+s))
log1(p:u256, s:u256, t1:u256) | log with topic t1 and data mem[p..(p+s))
log2(p:u256, s:u256, t1:u256, t2:u256) | log with topics t1, t2 and data mem[p..(p+s))
log3(p:u256, s:u256, t1:u256, t2:u256, | log with topics t, t2, t3 and data mem[p..(p+s)) t3:u256)
log4(p:u256, s:u256, t1:u256, t2:u256, | log with topics t1, t2, t3, t4 and data mem[p..(p+s)) t3:u256, t4:u256)
State queries
blockcoinbase() -> address:u256 | current mining beneficiary
blockdifficulty() -> difficulty:u256 | difficulty of the current block
blockgaslimit() -> limit:u256 | block gas limit of the current block
blockhash(b:u256) -> hash:u256 | hash of block nr b - only for last 256 blocks excluding current
blocknumber() -> block:u256 | current block number
blocktimestamp() -> timestamp:u256 | timestamp of the current block in seconds since the epoch
txorigin() -> address:u256 | transaction sender
txgasprice() -> price:u256 | gas price of the transaction
gasleft() -> gas:u256 | gas still available to execution
balance(a:u256) -> v:u256 | wei balance at address a
this() -> address:u256 | address of the current contract / execution context
caller() -> address:u256 | call sender (excluding delegatecall)
callvalue() -> v:u256 | wei sent together with the current call
calldataload(p:u256) -> v:u256 | call data starting from position p (32 bytes)
calldatasize() -> v:u256 | size of call data in bytes
calldatacopy(t:u256, f:u256, s:u256) | copy s bytes from calldata at position f to mem at position t
codesize() -> size:u256 | size of the code of the current contract / execution context
codecopy(t:u256, f:u256, s:u256) | copy s bytes from code at position f to mem at position t
extcodesize(a:u256) -> size:u256 | size of the code at address a
extcodecopy(a:u256, t:u256, f:u256, s:u256) | like codecopy(t, f, s) but take code at address a
discardu256(unused:u256) | discard value
splitu256tou64(x:u256) -> (x1:u64, x2:u64, | split u256 to four u64’s x3:u64, x4:u64)
combineu64tou256(x1:u64, x2:u64, x3:u64, | combine four u64’s into a single u256 x4:u64) -> (x:u256)
sha3(p:u256, s:u256) -> v:u256 | keccak(mem[p…(p+s)))



后端: EVM


EVM 1.5






TopLevelObject = 'object' '{' Code? ( Object | Data )* '}'
Object = 'object' StringLiteral '{' Code? ( Object | Data )* '}'
Code = 'code' Block
Data = 'data' StringLiteral HexLiteral
HexLiteral = 'hex' ('"' ([0-9a-fA-F]{2})* '"' | '\'' ([0-9a-fA-F]{2})* '\'')
StringLiteral = '"' ([^"\r\n\\] | '\\' .)* '"'




// Code consists of a single object. A single "code" node is the code of the object.
// Every (other) named object or data section is serialized and
// made accessible to the special built-in functions datacopy / dataoffset / datasize
object {
    code {
        let size = datasize("runtime")
        let offset = allocate(size)
        // This will turn into a memory->memory copy for eWASM and
        // a codecopy for EVM
        datacopy(dataoffset("runtime"), offset, size)
        // this is a constructor and the runtime code is returned
        return(offset, size)

    data "Table2" hex"4123"

    object "runtime" {
        code {
            // runtime code

            let size = datasize("Contract2")
            let offset = allocate(size)
            // This will turn into a memory->memory copy for eWASM and
            // a codecopy for EVM
            datacopy(dataoffset("Contract2"), offset, size)
            // constructor parameter is a single number 0x1234
            mstore(add(offset, size), 0x1234)
            create(offset, add(size, 32))

        // Embedded object. Use case is that the outside is a factory contract,
        // and Contract2 is the code to be created by the factory
        object "Contract2" {
            code {
                // code here ...

            object "runtime" {
                code {
                    // code here ...

             data "Table1" hex"4123"


