LCM源代码之Type Language(一)_转:


LCM是一套消息传递和数据编组(序列化,data marshalling)的库和工具,主要面向高带宽和低延时的近实时系统。它提供了publish/subscribe消息的机制,数据序列化和反序列化的代码自动生成工具。它出自MIT DARPA Urban Challenge Team。LCM轻量级且代码可读性极高,适合作为学习机器人运行架构的第一个架构。本文是LCM源代码分析的第一篇,分析LCM Type Language的自动生成工具。


LCM通过LCM Type Language来定义要传递的数据,然后通过代码自动生成来产生序列化和反序列化的代码,提供了很多主流编程语言的支持。它支持两种结构体,structenum,其语法类似Cstruct的成员变量除了支持Primitive Types,它还支持固定数组和可变长数组,常量。每个类型自带一个哈希值,由成员变量类型和名字的哈希来产生,主要被用在反序列化时的类型检查。它的自动生成工具本质上是一个C结构体的parser。对于一个lcm类定义文件,它首先分词,再解析语法,最后生成各编程语言的代码文件。

分词 tokenize.h/c


  • 单字符token,C语言格式,以'开头,可以有escaped char,以'结束。
  • 单字符token,有"();\",:\'[]"
  • C++字符常量,以"开头,以"结尾。
  • C++格式注释,单个注释为一个分词,支持/*//
  • 连续的运算符,有"!~<>=&|^%*+="
  • 其他所有由whitespace分隔的 alpha-numeric字符串。注意.也算alpha-numeric字符。


  1. 忽略whitespace。
  2. 判断是否为字符常量,字符常量以'开头,可以有escaped char,以'结束。若是,创建分词。
  3. 判读是否为字符串常量,字符串常量以"开头,以"结尾。若是,创建分词。这部分有bug,如果运算符是文件的最后一个字符,将会陷入死循环,因为它没有判断下一个是不是EOF,无条件回滚一个字符。
  4. 判断是否是连续的运算符。若是,创建分词。
  5. 判断是否是注释。若是,创建分词。
  6. 判断是否是alpha-numeric字符串。若是,创建分词。

如果输入字符串为abc;'e' "string"/*comment*/ 3.14=+==123,它会被分词为:

tok#   line    col: token
   0      1      0: abc
   1      1      3: ;
   2      1      4: 'e'
   3      1      8: "string"
   4      1     16: comment
   5      1     28: 3.14
   6      1     32: =+==
   7      1     36: 123

语法解析 lcmgen.h/c

该文件可以看做是LCM Type Language的语法分析器(Parser),其实就是一个C语言的struct的语法解析器,采用了递归下降分析。函数主入口为lcmgen_handle_file,循环调用parse_entity函数直至没有可以解析的实体(entity)。parse_entity函数是真正的解析器,它会顺序尝试解析三种实体packagestructenum,任何一个实体被成功解析都会返回。PS:这种调法意味着同一个lcm文件中可以有多个package语句,该语句以下的定义直至下一个package语句被安插在同一个package中。


package 实体


enum 实体

enum实例的解析也不复杂,其内部主要是循环地去解析枚举类型值lcm_enum_value_t,其实就是解析是否是val [= xxx],的格式,可以不赋值,lcm会自动地给未赋值的变量一个当前最大值+1,它也会查是否有重复的值。解析时同样不检查是否是合法变量名。它的哈希值计算仅仅考虑枚举类型的名字,每个值的名字不考虑,这是为了方便往里面添加新的值,比如ErrorCode是不断增长的。

// Enum涉及的结构体
// 值名和值
// lcm_enum_value_t: the symbolic name of an enum and its constant
//                   value.
typedef struct lcm_enum_value lcm_enum_value_t;

struct lcm_enum_value {
    char *valuename;
    int32_t value;
// 类型名、值、哈希值。
// lcm_enum_t: an enumeration, also a first-class LCM object.
typedef struct lcm_enum lcm_enum_t;

struct lcm_enum {
    lcm_typename_t *enumname;  // name of the enum

    GPtrArray *values;  // legal values for the enum
    char *lcmfile;      // file/path of function that declared it

    // hash values for enums are "weak". They only involve the name of the enum,
    // so that new enumerated values can be added without breaking the hash.
    int64_t hash;

struct 实体


常变量解析,常变量以const打头,解析类型token,类型仅支持"int8_t", "int16_t", "int32_t", "int64_t", "float", "double",期望下一个token是等号,接下来是常量值,最后是;



/// struct涉及的结构体 
/// 类型名,类型名中包含了其package,在tokenize提到,dot也算是alpha-numeric character。
// lcm_typename_t: represents the name of a type, including package
typedef struct lcm_typename lcm_typename_t;
struct lcm_typename {
    char *lctypename;  // fully-qualified name, e.g., ""
    char *package;     // package name, e.g., ""
    char *shortname;   // e.g., "laser_t"

// 数组分为可变长数组和定长数组,、
// lcm_dimension_t: represents the size of a dimension of an
//                  array. The size can be either dynamic (a variable)
//                  or a constant.
typedef enum { LCM_CONST, LCM_VAR } lcm_dimension_mode_t;

typedef struct lcm_dimension lcm_dimension_t;

struct lcm_dimension {
    lcm_dimension_mode_t mode;
    char *size;  // a string containing either a member variable name or a constant

// 成员变量的定义:类型名、变量名、维度、注释
// lcm_member_t: represents one member of a struct, including (if its
//               an array), its dimensions.
typedef struct lcm_member lcm_member_t;

struct lcm_member {
    lcm_typename_t *type;
    char *membername;

    // an array of lcm_dimension_t. A scalar is a 1-dimensional array
    // of length 1.
    GPtrArray *dimensions;

    // Comments in the LCM type definition immediately before a member
    // declaration are attached to that member
    char *comment;

// 除了成员变量,还支持内嵌结构体、枚举类型和常量
// lcm_struct_t: a first-class LCM object declaration
typedef struct lcm_struct lcm_struct_t;

struct lcm_struct {
    lcm_typename_t *structname;  // name of the data type

    GPtrArray *members;  // lcm_member_t

    // recursive declaration of structs and enums
    GPtrArray *structs;    // lcm_struct_t
    GPtrArray *enums;      // locally-declared enums  DEPRECATED
    GPtrArray *constants;  // lcm_constant_t

    char *lcmfile;  // file/path of function that declared it
    int64_t hash;

    // Comments in the LCM type defition immediately before a struct is declared
    // are attached to that struct.
    char *comment;

// 类似于成员变量,多了个类型和值。
// lcm_constant_: the symbolic name of a constant and its value.
typedef struct lcm_constant lcm_constant_t;

struct lcm_constant {
    char *lctypename;  // int8_t / int16_t / int32_t / int64_t / float / double
    char *membername;
    union {
        int8_t i8;
        int16_t i16;
        int32_t i32;
        int64_t i64;
        float f;
        double d;
    } val;
    char *val_str;  // value as a string, as specified in the .lcm file

    // Comments in the LCM type definition immediately before a constant are
    // attached to the constant.
    char *comment;

C++代码生成 emit_cpp.c

该模块负责根据语法解析的结果生成C++代码,函数主入口为int emit_cpp(lcmgen_t *lcmgen)。C++代码生成不支持枚举类型类型,不过C代码生成支持,个人觉得把emit_c里面的代码拿过来就能用。该函数为每一个struct都生成单独的头文件。


它会根据每个结构体所在的package,在指定目录下生成对应的头文件。比如package为myspace.types,头文件的路径为 specified_folder/myspace/types/struct_name.hpp






  • 非数组 emit(2, "%-10s %s;", typename, membername);
  • 固定大小的数组,最终的声明是 typename membername[num1][num2]..;
  • 动态数组,即使有些维度是常量,一律当做可变长处理
// lcm声明
// int32_t size;
// double   degCelsius[size][2];

// 产生声明语句
// std::vector< std::vector< double > > degCelsius;



  • C++11 static constexpr %-8s %s = %s%s;", mapped_typename, lc->membername,lc->val_str, suffix)
  • C++98 static const %-8s %s = %s%s;", mapped_typename, lc->membername,lc->val_str, suffix



  • inline int encode(void *buf, int offset, int maxlen) const;
  • inline int getEncodedSize() const;
  • inline int decode(const void *buf, int offset, int maxlen);
  • inline static int64_t getHash();
  • inline static const char* getTypeName();

此外,还有一些helper function也会被生成,这些函数被用来帮助public成员函数的实现。primitive类的相关函数定义提前写在lcm/lcm_coretypes.h中,这里阐述的是用户自定义的成员函数。




/** Encode a message into binary form.
 * @param buf The output buffer.
 * @param offset Encoding starts at thie byte offset into @p buf.
 * @param maxlen Maximum number of bytes to write.  This should generally be equal to getEncodedSize().
 * @return The number of bytes encoded, or <0 on error.
inline int encode(void *buf, int offset, int maxlen) const;









lcmgen结构清晰,代码可读性高,小巧玲珑,值得一看。LCM Type Language的优点是strong typed,这一点比protobuffer强,但是缺乏版本控制、向前向后兼容性。个人比较推崇FlatBuffer,兼顾类型检查和前后兼容性。


  • LCM Overview - Describes the design principles of LCM, the best place to start for a quick understanding of LCM. 6 pages (PDF). IROS, Taipei, Taiwan, Oct 2010.
  • Technical Report - An expanded version of the overview, provides a more comprehensive description. 17 pages (PDF). Technical Report MIT-CSAIL-TR-2009-041, Massachusetts Institute of Technology, 2009



假设我们有如下LCM Type:

package myspace.types;

struct temperature_t
    const int64_t const_value = 999;

    string str;

    int64_t utime;

    int32_t size;
    foonamespace.Foo  foo[size][2];  
    float point[3];

    Bar  bar[2];

使用./lcm-gen -x temperature.lcm编译后,产生myspace/types/temperature_t.hpp

 * BY HAND!!
 * Generated by lcm-gen

#ifndef __myspace_types_temperature_t_hpp__
#define __myspace_types_temperature_t_hpp__


#include "foonamespace/Foo.hpp"
#include "myspace/types/Bar.hpp"

namespace myspace
namespace types

class temperature_t
        std::string str;

        int64_t    utime;

        int32_t    size;

        std::vector< std::vector< foonamespace::Foo > > foo;

        float      point[3];

        myspace::types::Bar bar[2];

        // If you're using C++11 and are getting compiler errors saying
        // things like ‘constexpr’ needed for in-class initialization of
        // static data member then re-run lcm-gen with '--cpp-std=c++11'
        // to generate code that is compliant with C++11
        static const int64_t  const_value = 999LL;

         * Encode a message into binary form.
         * @param buf The output buffer.
         * @param offset Encoding starts at thie byte offset into @p buf.
         * @param maxlen Maximum number of bytes to write.  This should generally be
         *  equal to getEncodedSize().
         * @return The number of bytes encoded, or <0 on error.
        inline int encode(void *buf, int offset, int maxlen) const;

         * Check how many bytes are required to encode this message.
        inline int getEncodedSize() const;

         * Decode a message from binary form into this instance.
         * @param buf The buffer containing the encoded message.
         * @param offset The byte offset into @p buf where the encoded message starts.
         * @param maxlen The maximum number of bytes to read while decoding.
         * @return The number of bytes decoded, or <0 if an error occured.
        inline int decode(const void *buf, int offset, int maxlen);

         * Retrieve the 64-bit fingerprint identifying the structure of the message.
         * Note that the fingerprint is the same for all instances of the same
         * message type, and is a fingerprint on the message type definition, not on
         * the message contents.
        inline static int64_t getHash();

         * Returns "temperature_t"
        inline static const char* getTypeName();

        // LCM support functions. Users should not call these
        inline int _encodeNoHash(void *buf, int offset, int maxlen) const;
        inline int _getEncodedSizeNoHash() const;
        inline int _decodeNoHash(const void *buf, int offset, int maxlen);
        inline static uint64_t _computeHash(const __lcm_hash_ptr *p);

int temperature_t::encode(void *buf, int offset, int maxlen) const
    int pos = 0, tlen;
    int64_t hash = getHash();

    tlen = __int64_t_encode_array(buf, offset + pos, maxlen - pos, &hash, 1);
    if(tlen < 0) return tlen; else pos += tlen;

    tlen = this->_encodeNoHash(buf, offset + pos, maxlen - pos);
    if (tlen < 0) return tlen; else pos += tlen;

    return pos;

int temperature_t::decode(const void *buf, int offset, int maxlen)
    int pos = 0, thislen;

    int64_t msg_hash;
    thislen = __int64_t_decode_array(buf, offset + pos, maxlen - pos, &msg_hash, 1);
    if (thislen < 0) return thislen; else pos += thislen;
    if (msg_hash != getHash()) return -1;

    thislen = this->_decodeNoHash(buf, offset + pos, maxlen - pos);
    if (thislen < 0) return thislen; else pos += thislen;

    return pos;

int temperature_t::getEncodedSize() const
    return 8 + _getEncodedSizeNoHash();

int64_t temperature_t::getHash()
    static int64_t hash = static_cast(_computeHash(NULL));
    return hash;

const char* temperature_t::getTypeName()
    return "temperature_t";

int temperature_t::_encodeNoHash(void *buf, int offset, int maxlen) const
    int pos = 0, tlen;

    char* str_cstr = const_cast(this->str.c_str());
    tlen = __string_encode_array(
        buf, offset + pos, maxlen - pos, &str_cstr, 1);
    if(tlen < 0) return tlen; else pos += tlen;

    tlen = __int64_t_encode_array(buf, offset + pos, maxlen - pos, &this->utime, 1);
    if(tlen < 0) return tlen; else pos += tlen;

    tlen = __int32_t_encode_array(buf, offset + pos, maxlen - pos, &this->size, 1);
    if(tlen < 0) return tlen; else pos += tlen;

    for (int a0 = 0; a0 < this->size; a0++) {
        for (int a1 = 0; a1 < 2; a1++) {
            tlen = this->foo[a0][a1]._encodeNoHash(buf, offset + pos, maxlen - pos);
            if(tlen < 0) return tlen; else pos += tlen;

    tlen = __float_encode_array(buf, offset + pos, maxlen - pos, &this->point[0], 3);
    if(tlen < 0) return tlen; else pos += tlen;

    for (int a0 = 0; a0 < 2; a0++) {
        tlen = this->bar[a0]._encodeNoHash(buf, offset + pos, maxlen - pos);
        if(tlen < 0) return tlen; else pos += tlen;

    return pos;

int temperature_t::_decodeNoHash(const void *buf, int offset, int maxlen)
    int pos = 0, tlen;

    int32_t __str_len__;
    tlen = __int32_t_decode_array(
        buf, offset + pos, maxlen - pos, &__str_len__, 1);
    if(tlen < 0) return tlen; else pos += tlen;
    if(__str_len__ > maxlen - pos) return -1;
        static_cast(buf) + offset + pos, __str_len__ - 1);
    pos += __str_len__;

    tlen = __int64_t_decode_array(buf, offset + pos, maxlen - pos, &this->utime, 1);
    if(tlen < 0) return tlen; else pos += tlen;

    tlen = __int32_t_decode_array(buf, offset + pos, maxlen - pos, &this->size, 1);
    if(tlen < 0) return tlen; else pos += tlen;

    try {
    } catch (...) {
        return -1;
    for (int a0 = 0; a0 < this->size; a0++) {
        try {
        } catch (...) {
            return -1;
        for (int a1 = 0; a1 < 2; a1++) {
            tlen = this->foo[a0][a1]._decodeNoHash(buf, offset + pos, maxlen - pos);
            if(tlen < 0) return tlen; else pos += tlen;

    tlen = __float_decode_array(buf, offset + pos, maxlen - pos, &this->point[0], 3);
    if(tlen < 0) return tlen; else pos += tlen;

    for (int a0 = 0; a0 < 2; a0++) {
        tlen = this->bar[a0]._decodeNoHash(buf, offset + pos, maxlen - pos);
        if(tlen < 0) return tlen; else pos += tlen;

    return pos;

int temperature_t::_getEncodedSizeNoHash() const
    int enc_size = 0;
    enc_size += this->str.size() + 4 + 1;
    enc_size += __int64_t_encoded_array_size(NULL, 1);
    enc_size += __int32_t_encoded_array_size(NULL, 1);
    for (int a0 = 0; a0 < this->size; a0++) {
        for (int a1 = 0; a1 < 2; a1++) {
            enc_size += this->foo[a0][a1]._getEncodedSizeNoHash();
    enc_size += __float_encoded_array_size(NULL, 3);
    for (int a0 = 0; a0 < 2; a0++) {
        enc_size += this->bar[a0]._getEncodedSizeNoHash();
    return enc_size;

uint64_t temperature_t::_computeHash(const __lcm_hash_ptr *p)
    const __lcm_hash_ptr *fp;
    for(fp = p; fp != NULL; fp = fp->parent)
        if(fp->v == temperature_t::getHash)
            return 0;
    const __lcm_hash_ptr cp = { p, temperature_t::getHash };

    uint64_t hash = 0xd82eda712360e3edLL +
         foonamespace::Foo::_computeHash(&cp) +

    return (hash<<1) + ((hash>>63)&1);
