【编译原理】json解析器的全流程实现

目录

一、json结构

1. 数据类型

2. 语法

二、文法

1. 词法设计

2. 语法设计

3. 文法符号分析

4. 文法DFA

5. 文法分析表

三、程序实现

1. 程序思路

2. 源码实现

3. 测试

测试代码

测试结果

四、结语


一、json结构

简单地介绍一下json的数据类型和语法

1. 数据类型

json每一个数据都是一个“对象”,其数据类型有6种:null、array、object、bool、number、string。

2. 语法

一个文件只能有一个对象,一个array可以存储多个对象,对象之间用逗号分隔,不限数据类型;一个object也可以存储多个对象,而且是按照key-value形式存储,每一个key都对应一个对象,同样也不限数据类型。

二、文法

1. 词法设计

json字符串可拆分成10种词组:

①数字:由负号或数字开头,其后由数字和一个小数点组成的字符串

②字符串:由引号开头、引号结尾的字符串

③布尔值:true、false

④空值:null

⑤上花括号:{ 

⑥下花括号:}

⑦上中括号:[

⑧下中括号:]

⑨冒号

⑩逗号

2. 语法设计

设定文法

其中

 开始符是,产生式如下图:

【编译原理】json解析器的全流程实现_第1张图片

 注:文法未经优化

3. 文法符号分析

FIRST集合:

 FOLLOW集合:

4. 文法DFA

这个DFA是手动画的,所以可能会有那么一丢丢问题。

【编译原理】json解析器的全流程实现_第2张图片

5. 文法分析表

根据DFA依次填表(已结合FOLLOW集合),得到以下分析表:

(sn表示移进,并进入状态n;r n表示根据第n条产生式进行规约)

状态 ACTION GOTO
数字 字符串 布尔值 空值 { } [ ] , : $ S A B C D
1 s5 s6 s7 s8 s2 s9     3 4  
2   s12 s10     11  
3   acc    
4   r2    
5   r3 r3 r3 r3    
6   r4 r4 r4 r4    
7   r5 r5 r5 r5    
8   r6 r6 r6 r6    
9 s5 s6 s7 s8 s2 s9 s18     19 20
10   r9 r9 r9 r9    
11   s13 s14      
12   s16      
13   r7 r7 r7 r7    
14   s12     15  
15   r12 s14      
16 s5 s6 s7 s8 s2 s9     17  
17   r11 r11      
18   r10 r10 r10 r10    
19   r13 r13      
20   s21 s22      
21   r8 r8 r8 r8    
22 s5 s6 s7 s8 s2 s9   19 23
23               r14 s22              

三、程序实现

1. 程序思路

①把文本拆分成若干个词组(名为token),形成一个token序列,并在末尾插入一个表示结束的token(即文法中的$)

②根据文法表,判断token序列的顺序是否符合设定的语法

③将合法的token序列转换为C的json数据结构

2. 源码实现

语言:C99标准及以上

注:代码未经优化

头文件:

#ifndef LIB_JSON_H
#define LIB_JSON_H

// Json 数据类型
#define Null 0
#define Array 1
#define Object 2
#define Bool 3
#define Number 4
#define String 5

// 解析错误码
#define SUCCESS 0 // 解析成功
#define ERROR -1 // 语法错误
#define UNKONW_CHARACTOR -2 // 无法识别的字符
#define LOST_QUOTATION -3 // 缺少引号
#define MULTIPLE_POINTS -4 // 多个小数点

#ifndef __cplusplus
typedef char bool;
#define true 1
#define false 0
#endif

struct JsonObj
{
    char *name; // 名称
    void *data; // 数据内容
    int type; // 数据类型
    int length; // array或object长度
    int capacity; // array或object容量
    int rindex; // array或object的读指针
};

typedef struct JsonObj * JsonObjPtr;

struct ParseResult
{
    int row; // 错误所在行号
    int col; // 错误所在列号
    int error_code; // 错误码
    JsonObjPtr result; // 解析成功的json对象类型指针,须手动调用Free释放内存
};

#ifdef __cplusplus
extern "C" 
{
#endif

/*
 * 释放空间
 */
extern void Free(JsonObjPtr obj);

/*
 * 创建Json类型结构,内存由父结点管理,若无父结点,则需手动管理
 */
extern JsonObjPtr Create(int type);
extern JsonObjPtr CreateNull();
extern JsonObjPtr CreateBool();
extern JsonObjPtr CreateNumber();
extern JsonObjPtr CreateString();
extern JsonObjPtr CreateArray();
extern JsonObjPtr CreateObject();

/*
 * 函数返回值,失败返回0,成功返回1
 */

/*
 * 判断结点类型
 */
extern bool IsNull(const JsonObjPtr obj);
extern bool IsBool(const JsonObjPtr obj);
extern bool IsNumber(const JsonObjPtr obj);
extern bool IsString(const JsonObjPtr obj);
extern bool IsArray(const JsonObjPtr obj);
extern bool IsObject(const JsonObjPtr obj);

/*
 * 设置结点值
 */
extern bool SetBool(JsonObjPtr obj, bool value);
extern bool SetNumber(JsonObjPtr obj, double value);
extern bool SetString(JsonObjPtr obj, const char *value);

/*
 * 获取结点值
 */
extern bool GetBool(JsonObjPtr obj);
extern double GetNumber(JsonObjPtr obj);
extern const char *GetString(JsonObjPtr obj);

/*
 * 在末尾增加数组元素
 */
extern bool AppendArrayElement(JsonObjPtr obj, JsonObjPtr c);
extern bool AppendObjectElement(JsonObjPtr obj, const char *name, JsonObjPtr c);

/*
 * 在末尾删除数组元素
 */
extern bool RemoveArrayElement(JsonObjPtr obj);

/*
 * 删除对象结点的元素
 */
extern bool RemoveObjectElement(JsonObjPtr obj, const char *name);

/*
 * 获取数组元素
 */
extern JsonObjPtr GetArrayElement(JsonObjPtr obj, int index);

/*
 * 获取对象元素
 */
extern JsonObjPtr GetObjectElement(JsonObjPtr obj, const char *name);

/*
 * 遍历第一个数组或对象结点
 */
extern JsonObjPtr FirstElement(JsonObjPtr obj);

/*
 * 遍历下一个数组或对象结点
 */
extern JsonObjPtr NextElement(JsonObjPtr obj);

/*
 * 解析json字符串
 * @str  json字符串
 * @size  字符串长度
 * @return  ParseResult对象
 */
extern struct ParseResult Parse(const char *str, int size);

// 获取解析错误信息
extern const char * ErrorMsg(const struct ParseResult *pr);

// 获取成功解析时的json对象指针,需要手动调用Free()释放内存
extern JsonObjPtr GetJsonObjPtr(struct ParseResult *pr);

// 判断解析是否成功
extern bool ParseSuccess(const struct ParseResult *pr);

// 获取解析错误所在的位置
extern int ParseErrorRow(const struct ParseResult *pr);
extern int ParseErrorCol(const struct ParseResult *pr);

/*
 * json object转化成字符串
 * @buff  用于存放字符串结果的内存空间
 * @newl  新行的字符,可以为NULL
 * @newc  行的起始符,可以为NULL
 * @aoc  object冒号后的字符,可以为NULL
 * @aac  array逗号后的字符,可以为NULL
 * @return  返回字符串长度
 */
extern int ToString(JsonObjPtr obj, char *buff, const char *newl, const char *newc, const char *aoc, const char *aac);

/*
 * 将json对象转化成格式化的字符串
 * @return  ToString(obj, buff, "\n", "    ", " ", " ")
 */
extern int ToFormatString(JsonObjPtr obj, char *buff);

/*
 * 将json对象转化成非格式化的字符串
 * @return  ToString(obj, buff, NULL, NULL, NULL, NULL)
 */
extern int ToUnformatString(JsonObjPtr obj, char *buff);

#ifdef __cplusplus
}
#endif

#endif // LIB_JSON_H

源文件:

#include "libjson.h"
#include  // malloc  realloc  free  atof  gcvt 
#include  // memcpy  strlen  strcmp
#include  // INT_MIN

#ifndef NULL
#define NULL 0
#endif

#define IndexObj(obj, index) \
    *(((JsonObjPtr *)obj->data) + index)

static void FreeChildren(JsonObjPtr obj)
{
    JsonObjPtr *array_data = (JsonObjPtr *)obj->data;
    int i = 0;
    while(i < obj->length)
    {
        Free(array_data[i++]);
    }
}

void Free(JsonObjPtr obj)
{
    if(NULL == obj)
    {
        return;
    }

    switch(obj->type)
    {
    case Array:
    case Object: 
        FreeChildren(obj); 
    case Bool:
    case Number:
    case String: 
        free(obj->data);
    default: 
        free(obj); 
        break;
    }
}

JsonObjPtr Create(int type)
{
    switch(type)
    {
    case Array: return CreateArray();
    case Object: return CreateObject();
    case Bool: return CreateBool();
    case Number: return CreateNumber();
    case String: return CreateString();
    default: return CreateNull();
    }
}

JsonObjPtr CreateNull()
{
    JsonObjPtr ptr = (JsonObjPtr)malloc(sizeof(struct JsonObj));
    ptr->data = NULL;
    ptr->length = 0;
    ptr->rindex = 0;
    ptr->type = Null;
    ptr->capacity = 0;
    ptr->name = NULL;
    return ptr;
}

JsonObjPtr CreateBool()
{
    JsonObjPtr ptr = (JsonObjPtr)malloc(sizeof(struct JsonObj));
    ptr->data = malloc(sizeof(bool));
    ptr->length = 0;
    ptr->rindex = 0;
    ptr->type = Bool;
    ptr->capacity = 0;
    ptr->name = NULL;
    return ptr;
}

JsonObjPtr CreateNumber()
{
    JsonObjPtr ptr = (JsonObjPtr)malloc(sizeof(struct JsonObj));
    ptr->data = malloc(sizeof(double));
    ptr->length = 0;
    ptr->rindex = 0;
    ptr->type = Number;
    ptr->capacity = 0;
    ptr->name = NULL;
    return ptr;
}

JsonObjPtr CreateString()
{
    JsonObjPtr ptr = (JsonObjPtr)malloc(sizeof(struct JsonObj));
    ptr->data = malloc(sizeof(char));
    *(char *)ptr->data = '\0';
    ptr->length = 1;
    ptr->rindex = 0;
    ptr->type = String;
    ptr->capacity = 1;
    ptr->name = NULL;
    return ptr;
}

JsonObjPtr CreateArray()
{
    JsonObjPtr ptr = (JsonObjPtr)malloc(sizeof(struct JsonObj));
    ptr->data = malloc(sizeof(JsonObjPtr));
    ptr->length = 0;
    ptr->rindex = 0;
    ptr->type = Array;
    ptr->capacity = 1;
    ptr->name = NULL;
    return ptr;
}

JsonObjPtr CreateObject()
{
    JsonObjPtr ptr = (JsonObjPtr)malloc(sizeof(struct JsonObj));
    ptr->data = malloc(sizeof(JsonObjPtr));
    ptr->length = 0;
    ptr->rindex = 0;
    ptr->type = Object;
    ptr->capacity = 1;
    ptr->name = NULL;
    return ptr;
}

bool IsNull(const JsonObjPtr obj)
{
    return obj->type == Null;
}

bool IsBool(const JsonObjPtr obj)
{
    return obj->type == Bool;
}

bool IsNumber(const JsonObjPtr obj)
{
    return obj->type == Number;
}

bool IsString(const JsonObjPtr obj)
{
    return obj->type == String;
}

bool IsArray(const JsonObjPtr obj)
{
    return obj->type == Array;
}

bool IsObject(const JsonObjPtr obj)
{
    return obj->type == Object;
}

bool SetBool(JsonObjPtr obj, bool value)
{
    if(!IsBool(obj))
    {
        return false;
    }
    *(bool *)obj->data = value;
    return true;
}

bool SetNumber(JsonObjPtr obj, double value)
{
    if(!IsNumber(obj))
    {
        return false;
    }
    *(double *)obj->data = value;
    return true;
}

bool SetString(JsonObjPtr obj, const char *value)
{
    int len;
    if(!IsString(obj))
    {
        return false;
    }
    len = (int)strlen(value);
    free(obj->data);
    obj->data = malloc(sizeof(char) * len + 1);
    memcpy(obj->data, value, len);
    *(((char *)obj->data) + len) = '\0';
    return true;
}

bool GetBool(JsonObjPtr obj)
{
    return *((bool *)obj->data); 
}

double GetNumber(JsonObjPtr obj)
{
    return *((double *)obj->data); 
}

const char *GetString(JsonObjPtr obj)
{
    return (const char *)obj->data; 
}

static void AppendElement(JsonObjPtr obj, JsonObjPtr c)
{
    if(obj->length >= obj->capacity)
    {
        obj->capacity *= 2;
        obj->data = realloc(obj->data, sizeof(JsonObjPtr) * obj->capacity);
    }

    IndexObj(obj, obj->length++) = c;
}

bool AppendArrayElement(JsonObjPtr obj, JsonObjPtr c)
{
    if(!IsArray(obj))
    {
        return false;
    }

    AppendElement(obj, c);
    return true;
}

bool AppendObjectElement(JsonObjPtr obj, const char *name, JsonObjPtr c)
{
    int nlen;
    if(!IsObject(obj))
    {
        return false;
    }

    nlen = strlen(name);
    c->name = (char *)realloc(c->name, sizeof(char) * nlen + 1);
    memcpy(c->name, name, nlen);
    c->name[nlen] = '\0';

    AppendElement(obj, c);
    return true;
}

bool RemoveArrayElement(JsonObjPtr obj)
{
    if(!IsArray(obj))
    {
        return false;
    }

    if(obj->length < 1)
    {
        return false;
    }

    Free(IndexObj(obj, obj->length - 1));
    --obj->length;
    return true;
}

bool RemoveObjectElement(JsonObjPtr obj, const char *name)
{
    int i = 0;
    if(!IsObject(obj))
    {
        return false;
    }

    while(i < obj->length)
    {
        JsonObjPtr ptr = IndexObj(obj, i);
        if(0 == strcmp(name, ptr->name))
        {
            break;
        }
        ++i;
    }

    if(i >= obj->length)
    {
        return true;
    }

    --obj->length;
    while(i++ < obj->length)
    {
        IndexObj(obj, i - 1) = IndexObj(obj, i);
    }
    return true;
}

JsonObjPtr GetArrayElement(JsonObjPtr obj, int index)
{
    if(!IsArray(obj))
    {
        return NULL;
    }

    if(index >= obj->length)
    {
        return NULL;
    }

    return IndexObj(obj, index);
}

JsonObjPtr GetObjectElement(JsonObjPtr obj, const char *name)
{
    int i = 0;
    if(!IsObject(obj))
    {
        return NULL;
    }

    while(i < obj->length)
    {
        JsonObjPtr ptr = IndexObj(obj, i);
        if(0 == strcmp(name, ptr->name))
        {
            return ptr;
        }
        ++i;
    }

    return NULL;
}

JsonObjPtr FirstElement(JsonObjPtr obj)
{
    if(!IsArray(obj) && !IsObject(obj))
    {
        return NULL; 
    }

    obj->rindex = 0;
    return obj->rindex >= obj->length ? NULL : IndexObj(obj, obj->rindex);
}

JsonObjPtr NextElement(JsonObjPtr obj)
{
    if(!IsArray(obj) && !IsObject(obj))
    {
        return NULL; 
    }
    
    ++obj->rindex;
    return obj->rindex >= obj->length ? NULL : IndexObj(obj, obj->rindex);
}

/*
 * =========================================================
 * ==========================解析实现========================
 * =========================================================
 */
#define TTYPE_NUMBER 0 // number
#define TTYPE_STRING 1 // string
#define TTYPE_BOOL 2 // true、false
#define TTYPE_NULL 3 // null
#define TTYPE_UP_OBJECT 4 // {
#define TTYPE_DOWN_OBJECT 5 // }
#define TTYPE_UP_ARRAY 6 // [
#define TTYPE_DOWN_ARRAY 7 // ]
#define TTYPE_COMMA 8 // 逗号
#define TTYPE_COLON 9 // 冒号
#define TTYPE_END 10 // 结束符

#define is_point(x) ((x) == '.')
#define is_digital(x) ((x) <= '9' && (x) >= '0')

struct Token
{
    int row;
    int col;
    int type;
    char *str;
};

static int GetTokenList(const char *cur, const char *end, struct Token **out, int *err_code)
{
    const char *tmp = NULL;
    struct Token *v, *tmp_token = NULL;
    int count = 0, cap = 1; // 数量、容量
    int row = 1, col = 0;// 当前行列号
    bool point = false; // 是否遇到小数点

    while(cur != end)
    {
        if(count >= cap)
        {
            cap *= 2;
            *out = (struct Token *)realloc(*out, sizeof(struct Token) * cap);
        }

        v = &(*out)[count];
        ++col;
        v->row = row;
        v->col = col;

        switch(*cur)
        {
        case '{':
            v->type = TTYPE_UP_OBJECT;
            v->str = NULL;
            break;
        case '}':
            v->type = TTYPE_DOWN_OBJECT;
            v->str = NULL;
            break;
        case '[':
            v->type = TTYPE_UP_ARRAY;
            v->str = NULL;
            break;
        case ']':
            v->type = TTYPE_DOWN_ARRAY;
            v->str = NULL;
            break;
        case ':':
            v->type = TTYPE_COLON;
            v->str = NULL;
            break;
        case ',':
            v->type = TTYPE_COMMA;
            v->str = NULL;
            break;
        case '\\':
            break;
        case '-':
        case '0': case '1': case '2': case '3': case '4':
        case '5': case '6': case '7': case '8': case '9':
            tmp = cur;
            ++tmp;
            while(is_digital(*tmp) && tmp != end)
            { 
                if(is_point(*++tmp))
                {
                    ++tmp; 
                    if(point)
                    {
                        *err_code = MULTIPLE_POINTS;
                        return count;
                    }
                    point = true;
                }
            }

            point = false;
            v->type = TTYPE_NUMBER;
            v->str = malloc(tmp - cur + 1);
            memcpy(v->str, cur, tmp - cur);
            v->str[tmp - cur] = '\0';
            cur = --tmp;
            break;

        case '"':
            tmp = cur;
            ++tmp;
            while(tmp != end)
            { 
                if(*tmp == '\\')
                {
                    if(++tmp == end)
                    {
                        break;
                    }
                }
                else if(*tmp == '"')
                {
                    break;
                }
                ++tmp;
            }
            v->type = TTYPE_STRING;
            v->str = malloc(tmp - cur);
            memcpy(v->str, cur + 1, tmp - cur - 1);
            v->str[tmp - cur - 1] = '\0';
            cur = tmp;
            break;

        case 'n':
            if(cur + 3 >= end)
            {
                *err_code = LOST_QUOTATION;
                return count;
            }
            if(*(cur + 1) != 'u' || *(cur + 2) != 'l' || *(cur + 3) != 'l')
            {
                *err_code = LOST_QUOTATION;
                return count;
            }
            v->type = TTYPE_NULL;
            v->str = malloc(5);
            memcpy(v->str, cur, 4);
            v->str[4] = '\0';
            cur += 3;
            break;
        
        case 't':
            if(cur + 3 >= end)
            {
                *err_code = LOST_QUOTATION;
                return count;
            }
            if(*(cur + 1) != 'r' || *(cur + 2) != 'u' || *(cur + 3) != 'e')
            {
                *err_code = LOST_QUOTATION;
                return count;
            }
            v->type = TTYPE_BOOL;
            v->str = malloc(5);
            memcpy(v->str, cur, 4);
            v->str[4] = '\0';
            cur += 3;
            break;

        case 'f':
            if(cur + 4 >= end)
            {
                *err_code = LOST_QUOTATION;
                return count;
            }
            if(*(cur + 1) != 'a' || *(cur + 2) != 'l' || *(cur + 3) != 's' || *(cur + 4) != 'e')
            {
                *err_code = LOST_QUOTATION;
                return count;
            }
            v->type = TTYPE_BOOL;
            v->str = malloc(6);
            memcpy(v->str, cur, 5);
            v->str[5] = '\0';
            cur += 4;
            break;

        case ' ':
        case '\t':
            --count;
            break;
        case '\n':
        case '\r':
            ++row;
            col = 1;
            --count;
            break;
        default:
            *err_code = UNKONW_CHARACTOR;
            return count;
        }

        ++count;
        ++cur;
    }

    if(count > 0)
    {
        if(count >= cap)
        {
            cap *= 2;
            *out = (struct Token *)realloc(*out, sizeof(struct Token) * cap);
        }

        v = &(*out)[count++];
        tmp_token = &(*out)[count - 2];
        v->type = TTYPE_END;
        v->str = NULL;
        v->row = tmp_token->row;
        v->col = tmp_token->col + (NULL == tmp_token->str ? 0 : strlen(tmp_token->str));
    }

    *err_code = SUCCESS;
    return count;
}

#ifdef _DEBUG
#include  
static void _PrintStack(int *st, int count)
{
    int i = 0;
    if(count <= 0)
    { return; }

    printf("%d", st[i++]);
    while(i < count)
    {
        printf(",%d", st[i++]);
    }
}
static void _PrintType(int type)
{
    switch (type)
    {
    case TTYPE_NUMBER:
        printf("number");
        break;
    case TTYPE_STRING:
        printf("string");
        break;
    case TTYPE_BOOL:
        printf("bool");
        break;
    case TTYPE_NULL:
        printf("null");
        break;
    case TTYPE_UP_OBJECT:
        printf("{");
        break;
    case TTYPE_DOWN_OBJECT:
        printf("}");
        break;
    case TTYPE_UP_ARRAY:
        printf("[");
        break;
    case TTYPE_DOWN_ARRAY:
        printf("]");
        break;
    case TTYPE_COMMA:
        printf(",");
        break;
    case TTYPE_COLON:
        printf(":");
        break;
    case TTYPE_END:
        printf("$");
        break;
    default:
        break;
    }
}

#define PrintStack(st, count) _PrintStack(st, count)
#define Printf printf
#define PrintType(t) _PrintType(t)

#else

#define PrintStack(st, count)
#define Printf(...)
#define PrintType(t)

#endif

/*
 * 语法分析,判断语法是否合法
 */
static void CheckSyntax(
    struct Token *t, int size, int *err_code, int *err_row, int *err_col)
{
    // S:0  A:1  B:2  C:3  D:4
    static const int INFER[14] = { 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 4, 4 };
    // 规约式长度
    static const int STATUTE_LEN[14] = { 2, 1, 1, 1, 1, 1, 3, 3, 2, 2, 3, 3, 1, 3};
    static const int GOTO[23][5] =
    {
            //     S        A        B        C        D
    /* 1*/  {INT_MIN,       3,       4, INT_MIN, INT_MIN},
    /* 2*/  {INT_MIN, INT_MIN, INT_MIN,      11, INT_MIN},
    /* 3*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /* 4*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /* 5*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /* 6*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /* 7*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /* 8*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /* 9*/  {INT_MIN, INT_MIN,      19, INT_MIN,      20},
    /*10*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*11*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*12*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*13*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*14*/  {INT_MIN, INT_MIN, INT_MIN,      15, INT_MIN},
    /*15*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*16*/  {INT_MIN, INT_MIN,      17, INT_MIN, INT_MIN},
    /*17*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*18*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*19*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*20*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*21*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*22*/  {INT_MIN, INT_MIN,      19, INT_MIN,      23},
    /*23*/  {INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    };

    static const int ACTION[23][11] = 
    {
            //    数字   字符串    布尔值     空值        {        }        [        ]        ,        :        $
    /* 1*/  {       5,       6,       7,       8,       2, INT_MIN,       9, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /* 2*/  { INT_MIN,      12, INT_MIN, INT_MIN, INT_MIN,      10, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /* 3*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,       0},
    /* 4*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,      -2},
    /* 5*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,      -3, INT_MIN,      -3,      -3, INT_MIN,      -3},
    /* 6*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,      -4, INT_MIN,      -4,      -4, INT_MIN,      -4},
    /* 7*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,      -5, INT_MIN,      -5,      -5, INT_MIN,      -5},
    /* 8*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,      -6, INT_MIN,      -6,      -6, INT_MIN,      -6},
    /* 9*/  {       5,       6,       7,       8,       2, INT_MIN,       9,      18, INT_MIN, INT_MIN, INT_MIN},
    /*10*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,      -9, INT_MIN,      -9,      -9, INT_MIN,      -9},
    /*11*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,      13, INT_MIN, INT_MIN,      14, INT_MIN, INT_MIN},
    /*12*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,      16, INT_MIN},
    /*13*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,      -7, INT_MIN,      -7,      -7, INT_MIN,      -7},
    /*14*/  { INT_MIN,      12, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*15*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,     -12, INT_MIN, INT_MIN,      14, INT_MIN, INT_MIN},
    /*16*/  {       5,       6,       7,       8,       2, INT_MIN,       9, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*17*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,     -11, INT_MIN, INT_MIN,     -11, INT_MIN, INT_MIN},
    /*18*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,     -10, INT_MIN,     -10,     -10, INT_MIN,     -10},
    /*19*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,     -13,     -13, INT_MIN, INT_MIN},
    /*20*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,      21,      22, INT_MIN, INT_MIN},
    /*21*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,      -8, INT_MIN,      -8,      -8, INT_MIN,      -8},
    /*22*/  {       5,       6,       7,       8,       2, INT_MIN,       9, INT_MIN, INT_MIN, INT_MIN, INT_MIN},
    /*23*/  { INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN, INT_MIN,     -14,      22, INT_MIN, INT_MIN},
    };

    struct Token *cur = NULL; // 当前token
    int index = 0; // Token序列访问索引

    int state_size = 16; // 状态栈空间大小
    int state_index = 0; // 状态栈元素数量
    int *st = malloc(sizeof(int) * state_size); // 状态栈

    int vi = 0; // 当前状态栈顶值
    int action_state = 0; // 当前action表值
    int goto_state = 0; // 当前goto表值
    int infer_state = 0; // 当前规约获得的非终结符

    st[state_index++] = 1;

    while(state_index > 0)
    {
        PrintStack(st, state_index);
        // 取栈顶元素
        vi = st[state_index - 1];
        cur = &t[index];
        
        Printf("  ");
        PrintType(cur->type);

        action_state = ACTION[vi - 1][cur->type];
        // 结束了
        if(action_state == 0)
        {
            Printf("  success\n");
            break;
        }
        // 非法语法
        else if(action_state == INT_MIN)
        {
            *err_code = ERROR;
            *err_row = cur->row;
            *err_col = cur->col;
            Printf("\n");
            return;
        }

        // 移进操作
        if(action_state > 0)
        {
            if(state_index >= state_size)
            {
                state_size *= 2;
                st = (int *)realloc(st, sizeof(int) * state_size);
            }
            st[state_index++] = action_state;
            ++index;
            Printf("  shift-%d", action_state);
        }
        // 规约操作
        else
        {
            state_index -= STATUTE_LEN[- action_state - 1];
            // 重新获取栈顶元素
            vi = st[state_index - 1];
            Printf("  reduce%d", action_state);

            // 获取goto表值
            infer_state = INFER[- action_state - 1];
            goto_state = GOTO[vi - 1][infer_state];
            if(goto_state == INT_MIN)
            {
                *err_code = ERROR;
                *err_row = cur->row;
                *err_col = cur->col;
                Printf("\n");
                return;
            }
            if(state_index >= state_size)
            {
                state_size *= 2;
                st = (int *)realloc(st, sizeof(int) * state_size);
            }
            st[state_index++] = goto_state;
            Printf("  goto-%d", goto_state);
        }

        Printf("\n");
    }

    if(index != size - 1)
    {
        Printf("\n");
        cur = &t[index];
        *err_code = ERROR;
        *err_row = cur->row;
        *err_col = cur->col;
    }

    free(st);
}

static JsonObjPtr CreateNewObjWithToken(struct Token *t)
{
    JsonObjPtr result = NULL;
    switch (t->type)
    {
    case TTYPE_NUMBER:
        result = CreateNumber();
        SetNumber(result, atof(t->str));
        break;
    case TTYPE_STRING:
        result = CreateString();
        result->data = t->str;
        t->str = NULL;
        break;
    case TTYPE_BOOL:
        result = CreateBool();
        SetBool(result, strcmp("true", t->str) == 0 ? true : false);
        break;
    case TTYPE_NULL:
        result = CreateNull();
        break;
    case TTYPE_UP_OBJECT:
        result = CreateObject();
        break;
    case TTYPE_UP_ARRAY:
        result = CreateArray();
        break;
    default:
        break;
    }
    return result;
}

/*
 * 将token序列转化为Json对象
 */
static JsonObjPtr ChangeToJsonObject(struct Token *t)
{
    JsonObjPtr result = NULL; // 解析结果
    JsonObjPtr new_obj = NULL; // 新对象
    struct Token *cur = NULL; // 当前token
    JsonObjPtr top = NULL; // 栈顶结构
    int index = 0; // Token序列访问索引

    int st_cap = 4; // 栈大小
    int st_index = 0; // 栈元素数量
    JsonObjPtr *st = (JsonObjPtr *)malloc(sizeof(JsonObjPtr) * st_cap); // 栈

    struct Token *name_token = NULL; // 用于存储object时的name token

    result = CreateNewObjWithToken(&t[index++]); // 先创建根节点
    if(IsObject(result) || IsArray(result))
    {
        st[st_index++] = result;
    }

    while(st_index > 0)
    {
        cur = &t[index++];
        top = st[st_index - 1];

        switch (cur->type)
        {
        case TTYPE_NUMBER:
            new_obj = CreateNumber();
            SetNumber(new_obj, atof(cur->str));
            break;
        case TTYPE_STRING:
            if(NULL != name_token || !IsObject(top))
            {
                new_obj = CreateString();
                new_obj->data = cur->str;
                cur->str = NULL;
            }
            else 
            {
                name_token = cur;
                continue;
            }
            break;
        case TTYPE_BOOL:
            new_obj = CreateBool();
            SetBool(new_obj, strcmp("true", cur->str) == 0 ? true : false);
            break;
        case TTYPE_NULL:
            new_obj = CreateNull();
            break;
        case TTYPE_UP_OBJECT:
            new_obj = CreateObject();
            if(st_index >= st_cap)
            {
                st_cap *= 2;
                st = (JsonObjPtr *)realloc(st, sizeof(JsonObjPtr) * st_cap);
            }
            st[st_index++] = new_obj;
            break;
        case TTYPE_DOWN_OBJECT:
            --st_index;
            continue;
        case TTYPE_UP_ARRAY:
            new_obj = CreateArray();
            if(st_index >= st_cap)
            {
                st_cap *= 2;
                st = (JsonObjPtr *)realloc(st, sizeof(JsonObjPtr) * st_cap);
            }
            st[st_index++] = new_obj;
            break;
        case TTYPE_DOWN_ARRAY:
            --st_index;
            continue;
        case TTYPE_END:
            st_index = 0;
            continue;
        default:
            continue;
        }

        if(NULL != name_token)
        {
            new_obj->name = name_token->str;
            name_token->str = NULL;
            name_token = NULL;
        }
        AppendElement(top, new_obj);
    } 

    return result;
}

struct ParseResult Parse(const char *str, int size)
{
    struct ParseResult result;
    const char *cur = str, *end = str + size;
    struct Token *t = (struct Token *)malloc(sizeof(struct Token));
    int count = GetTokenList(cur, end, &t, &result.error_code);
    
    if(result.error_code != SUCCESS)
    {
        if(count > 0)
        {
            result.row = t[count - 1].row;
            result.col = t[count - 1].col;
        }
        else 
        {
            result.row = 0;
            result.col = 0;
        }
    }
    else if(count > 0)
    {
        CheckSyntax(t, count, &result.error_code, &result.row, &result.col);
        if(ParseSuccess(&result))
        {
            result.result = ChangeToJsonObject(t);
        }
    }
    while(count-- > 0)
    {
        if(NULL != t[count].str)
        {
            free(t[count].str);
        }
    }
    free(t);
    return result;
}

const char * ErrorMsg(const struct ParseResult *pr)
{
    if(NULL == pr)
    {
        return "";
    }

    switch (pr->error_code)
    {
    case ERROR:
        return "parse error";
    case UNKONW_CHARACTOR:
        return "unkown charactor";
    case LOST_QUOTATION:
        return "maybe lost quotation";
    case MULTIPLE_POINTS:
        return "decimal point is too many";
    }

    return "unkown error";
}

JsonObjPtr GetJsonObjPtr(struct ParseResult *pr)
{
    return NULL == pr ? NULL : pr->result;
}

bool ParseSuccess(const struct ParseResult *pr)
{
    return NULL == pr ? false : (pr->error_code == SUCCESS);
}

int ParseErrorRow(const struct ParseResult *pr)
{
    return NULL == pr ? -1 : pr->row;
}

int ParseErrorCol(const struct ParseResult *pr)
{
    return NULL == pr ? -1 : pr->col;
}

static int NumberToString(double num, char *buff)
{
    int count = 15;
    gcvt(num, 16, buff);
    while(buff[count] == '0' || buff[count] == '\0')
    {
        --count;
    }
    if(buff[count] == '.')
    {
        --count;
    }
    return count + 1;
}

static int WriteBuffer(char *buff, const char *str, int len, int count)
{
    int result = 0;
    while(count-- > 0)
    {
        memcpy(buff + result, str, len);
        result += len;
    }
    return result;
}

int ToString(JsonObjPtr obj, char *buff, const char *newl, const char *newc, const char *aoc, const char *aac)
{
    int buff_index = 0; // buff索引
    int str_length = 0;

    JsonObjPtr cur = obj, next = NULL, parent = NULL;
    int st_cap = 4; // 栈大小
    int st_index = 0; // 栈元素数量
    JsonObjPtr *st = (JsonObjPtr *)malloc(sizeof(JsonObjPtr) * st_cap); // 栈

    int newl_len = strlen(NULL == newl ? (newl = "") : newl);
    int newc_len = strlen(NULL == newc ? (newc = "") : newc);
    int aoc_len = strlen(NULL == aoc ? (aoc = "") : aoc);
    int aac_len = strlen(NULL == aac ? (aac = "") : aac);

    while(NULL != cur)
    {
        if(NULL != parent)
        {
            next = NextElement(parent);
            if(NULL == next)
            {
                buff_index += WriteBuffer(buff + buff_index, newl, newl_len, 1);
                buff_index += WriteBuffer(buff + buff_index, newc, newc_len, st_index - 1);

                buff[buff_index++] = parent->type == Array ? ']' : '}';
                if(--st_index == 0)
                {
                    break;
                }
                parent = st[st_index - 1];
                continue;
            }
            buff[buff_index++] = ',';
            if(IsObject(parent))
            {
                buff_index += WriteBuffer(buff + buff_index, newl, newl_len, 1);
                buff_index += WriteBuffer(buff + buff_index, newc, newc_len, st_index);
            }
            else
            {
                buff_index += WriteBuffer(buff + buff_index, aac, aac_len, 1);
            }
            cur = next;
        }
        else if(st_index > 0)
        {
            parent = st[st_index - 1];
            cur = FirstElement(parent);
        }

        if(NULL != parent && NULL != cur && IsObject(parent))
        {
            buff[buff_index++] = '"';
            buff_index += WriteBuffer(buff + buff_index, cur->name, strlen(cur->name), 1);
            buff[buff_index++] = '"';
            buff[buff_index++] = ':';
            buff_index += WriteBuffer(buff + buff_index, aoc, aoc_len, 1);
        }

        switch (cur->type)
        {
        case Null:
            buff[buff_index++] = 'n';
            buff[buff_index++] = 'u';
            buff[buff_index++] = 'l';
            buff[buff_index++] = 'l';
            break;
        case Array:
            buff[buff_index++] = '[';

            goto new_layer;
            break;
        case Object:
            buff[buff_index++] = '{';

        new_layer:
            if(st_index >= st_cap)
            {
                st_cap *= 2;
                st = (JsonObjPtr *)realloc(st, sizeof(JsonObjPtr) * st_cap); // 栈
            }
            st[st_index++] = cur;
            parent = NULL;

            buff_index += WriteBuffer(buff + buff_index, newl, newl_len, 1);
            buff_index += WriteBuffer(buff + buff_index, newc, newc_len, st_index);
            break;
        case Bool:
            if(GetBool(cur))
            {
                buff[buff_index++] = 't';
                buff[buff_index++] = 'r';
                buff[buff_index++] = 'u';
                buff[buff_index++] = 'e';
            }
            else 
            {
                buff[buff_index++] = 'f';
                buff[buff_index++] = 'a';
                buff[buff_index++] = 'l';
                buff[buff_index++] = 's';
                buff[buff_index++] = 'e';
            }
            break;
        case Number:
            buff_index += NumberToString(GetNumber(cur), buff + buff_index);
            break;
        case String:
            buff[buff_index++] = '"';
            str_length = strlen(GetString(cur));
            memcpy(buff + buff_index, GetString(cur), str_length);
            buff_index += str_length;
            buff[buff_index++] = '"';
            break;
        default:
            break;
        }
    }

    buff[buff_index] = '\0';
    return buff_index;
}

int ToFormatString(JsonObjPtr obj, char *buff)
{ 
    return ToString(obj, buff, "\n", "    ", " ", " "); 
}

int ToUnformatString(JsonObjPtr obj, char *buff)
{ 
    return ToString(obj, buff, NULL, NULL, NULL, NULL); 
}

3. 测试

测试代码

#include 
#include 
#include 
#include "libjson.h"

static const char *input_json_str =
"                       \
{                       \
    \"aaaa\": 1000,     \
    \"bbbb\": \"value\",\
    \"cccc\": [         \
        null, true, false \
    ],                  \
    \"dddd\": \"wocao\" \
}                       \
";

static void test()
{
    char buff[1024];
    JsonObjPtr result = NULL, new_obj = NULL;
    struct ParseResult pr = Parse(input_json_str, strlen(input_json_str));
    if(!ParseSuccess(&pr))
    {
        printf("parse error!!! at row(%d) col(%d)\n", 
            ParseErrorRow(&pr), ParseErrorCol(&pr));
        return;
    }

    printf("parse success!!!\n");
    result = GetJsonObjPtr(&pr);
    if(NULL != result)
    {
        ToUnformatString(result, buff);
        printf("unformat string: %s\n", buff);
    }

    printf("-----------add node-------------\n");
    new_obj = CreateString();
    SetString(new_obj, "yohohoho");
    AppendObjectElement(result, "new node", new_obj);
    ToUnformatString(result, buff);
    printf("unformat string: %s\n", buff);
    printf("==========================\n");
    ToFormatString(result, buff);
    printf("format string: \n%s\n", buff);

    Free(result);
}

int main()
{
    test();
    // system("pause");
    return 0;
}

测试结果

①文法分析过程

【编译原理】json解析器的全流程实现_第3张图片

 ②测试输出结果:

【编译原理】json解析器的全流程实现_第4张图片

四、结语

这个json解析器是我在系统地复习了编译原理之后的实验品,如有问题,欢迎指出!

你可能感兴趣的:(编译原理,C,json,编译原理)