根据正则解析出实例数据


local f = require('DataGenerator')

function test_data (data)
local i = 0
for _, patt in ipairs(data) do

--print(patt)
for _, k in ipairs(f(patt)) do
--print(k)
  if string.match(k, patt) then
    i = i + 1
    print('ok ' ..  i .. ' - "' .. k .. '" match /' .. patt .. '/')
  end
end

end
end

local data = { 'bdede$xx$' }

test_data(data)


-- 单数组最大记录

local array_limit = 100
– + * 最大上限 10 { n, } 增加的限度
local max_rep_times = 10
– 数组连接,超出最大记录数后,每次递增的记录数
local step_length = 1

local unpack = table.unpack or unpack

local function error (…)
print(…)
os.exit()
end

– 将 {num} {num,}{num, num} 格式的字符串
– 解析成 数据格式,也就是一个数组
local function parse_quantifier_str (str)
local array = {}
local mode = 0
local num_str = ''
for char in string.gmatch(str, '.') do

-- 初始化模式
if mode == 0 then
  -- 遇到开始符号,才能进入另外的模式
  if char == '{' then
    mode = 1
  end
  -- 开始解析
elseif mode == 1 then
  -- 遇到字符,添加到缓冲,状态升级
  if string.match(char, '%d') then
    num_str = char
    mode = 2
  elseif not char == ' ' then
    error('error:01:class difine with no digit numer')
  end 
elseif mode == 2 then
  -- 接着遇到数字就添加进去
  if string.match(char, '%d') then
    num_str = num_str .. char
    -- 如果遇到逗号,模式改变
  elseif char == ',' then
    table.insert(array, tonumber(num_str))
    num_str = ''
    mode = 3
  elseif char == '}' then
    -- {5} => {5, 5}
    table.insert(array, tonumber(num_str))
    table.insert(array, tonumber(num_str))
    num_str = ''
    break
    -- 如果遇到除空格之外的字符,就是一个错误
  elseif not char == ' ' then
    error('error:02:quantifier with non digit char')
  end
elseif mode == 3 then
  -- 遇到结束符号,就退出了
  -- 因为上个字符是逗号,所以就是 { num, } 模式
  if char == '}' then
    table.insert(array, array[1] + max_rep_times)
    break
    -- 如果是数字
  elseif string.match(char,'%d') then
    mode = 4
    num_str = char
  elseif not char == ' ' then
    error('error:03: wrong char in class str')
  end
  -- 上次的符号是 , 逗号,
elseif mode == 4 then
  -- 继续遇到数字的话,就继续添加
  if string.match(char, '%d') then
    num_str = num_str .. char
  elseif char == '}' then
    table.insert(array, tonumber(num_str))
    break
  elseif not char == ' ' then
    error('error:04: wrong char in class str')
  end
end

end
return array
end

local function concat_array (a1, a2)
local limit = limit or array_limit
local max_length = 1
if #a1 > max_length then max_length = #a1 end
if #a2 > max_length then max_length = #a2 end
if max_length > limit then limit = max_length + step_length end

count = 0
local a = {}
for _, v1 in ipairs(a1) do

for _, v2 in ipairs(a2) do
  table.insert(a, v1 .. v2)
  count = count + 1
end
if count > limit then break end

end
return a
end

– 将多个数组合并
local function concat_multi_array (array)
if #array == 1 then

return array[1]

elseif #array == 2 then

return concat_array(unpack(array))

elseif #array > 2 then

local a1 = table.remove(array)
local a2 = table.remove(array)
local a = concat_array(a2,a1)
table.insert(array, a)
return concat_multi_array(array)

end
end

local function concat_rep_array (array, rep_times)
local rep_array = {}
if rep_times == 0 then return { '' } end
if rep_times == 1 then return array end
for i = 1, rep_times do

table.insert(rep_array, array)

end
return concat_multi_array(rep_array)
end

– class and quantifier to array
local function cnq_to_array (a)
local combin_array = {}
for _, value in ipairs(a) do

local class, quant = unpack(value)
local array = {}
local from, to = unpack(quant)
for i = from, to do
  local i_array = concat_rep_array(class, i)
  -- 将生成的记录加入原有的记录中
  for _, v in ipairs(i_array) do
    table.insert(array, v)
  end
end
table.insert(combin_array, array)

end
return combin_array
end

local count = 128
local char_id = {}

– 申请一个ID,从 128开始-。如果是有参数为 1
– 那么就同时生成一个 table 作为 char_id
local function apply_id_char (id, mode)
mode = mode or 0
count = count + 1
local char = string.char(count)
char_id[char] = id
if mode == 1 then char_id[char] = { id = id } end
return char
end

– 获取字符的原始字符
local function get_char_id (char)
local value = char_id[char]
if type(value) == 'table' then

return value.id

elseif type(value) == 'string' then

return value

else

error('error:05:not exists id records: ' .. char)
return nil

end
end

– 获取字符的代表字符集
local function get_char_class (char)

– if not get_char_id(char) then print(char) end
local array = {}
– 使用可见字符
for i = 0, 126 do

local i_char = string.char(i)
local patt = ''
if char == '.' then
  patt = char
else
  patt = '%' .. get_char_id(char)
end
if string.find(i_char, patt) then
  table.insert(array, i_char)
end

end
return array
end

– 根据 class char of quantifier char get matched str with pos
local function get_char_str (char, pos)
local str_list = char_id[char]
– 不要使用数字作为 table 的索引,因为这是数组的地盘
if str_list then

return str_list[pos]

else

error('error:06:not exists char record')

end
end

– 获取数量字符表,并同时能获得对应的数量数据结构
local quantifier_char_table = {
['+'] = { 1, max_rep_times },
['*'] = { 0, max_rep_times },
['-'] = { 0, max_rep_times },
['?'] = { 0, 1},
}
– 获取其他的转义字符的字符列表。用于替换
local escape_id_table = {}

– class_char_table, char is class
local class_str = 'a c d l p s u w x z A C D L P S U W X Z'
local class_char_table = { ['.'] = '.' }
for id in string.gmatch(class_str, '%S') do
local id_char = apply_id_char(id)
class_char_table[id_char] = id
escape_id_table[id] = id_char
end

– 有些字符的转义需要隐藏,以便对不转义的字符进行的处理
local conceal_str = '+ - * ? { } % .'
local conceal_char_table = {}
for id in string.gmatch(conceal_str, '%S') do
local id_char = apply_id_char(id)
conceal_char_table[id_char] = id
escape_id_table[id] = id_char
end

– […] => class_char
– 将 user 自定义字符集结构进行字符化
local user_class_char = apply_id_char('[…]', 1)

– {num,num} {num} {num,} => quantifier_char
– 将自定义数量结构进行字符化
local user_quantifier_char = apply_id_char('{…}', 1)

– [0-8 a-z]
– 区间字符定义
local user_range_char = apply_id_char('[n-m]', 1)

function process_patt_str (patt)

– first and end $ is not any uses
patt = string.gsub(patt, '
%^', '')
patt = string.gsub(patt, '%$$', '')

– replace all escape magic char to other char
– 对所有转义的字符进行处理,应用规则
– 用相应的字符代替
patt = string.gsub(patt, '%%(.)', function (id)

if escape_id_table[id] then
  return escape_id_table[id]
end
return id

end)

– could not add user defined class to class char
patt = string.gsub(patt, '%[.-%]', function (str)

table.insert(char_id[user_class_char], str)
return user_class_char

end)

– also could not add user defined quantifier to quantifier char
patt = string.gsub(patt, '{%d+,?%d-}', function (str)

table.insert(char_id[user_quantifier_char], str)
return user_quantifier_char

end)
return patt
end

– 将 […] 格式的字符串解析成 数组,字符数组
local function parse_class_str (str)
– 预处理字符集字符串
– 将 .-. 结构的捕获出来
– 叫做 char_range
– 获取其中 a-z 0-9 A-Z 的结构
local range_char_table = {}
str = string.gsub(str, '(.)%-(.)', function (from, to)

local class = {}
local from_index = string.byte(from)
local to_index = string.byte(to)
for i = from_index, to_index do
  table.insert(class, string.char(i))
end
table.insert(range_char_table, class)
return user_range_char 

end)

– 其中的字符有 class 字符,除此以外,都是字符
– 只有两种模式
local mode = 0
local reverse_mode = 0
local range_pos = 1
– 用散列保存字符集,以去重
local char_table = {}
for char in string.gmatch(str, '.') do

if mode == 0 then
  if char == '[' then
    mode = 1
  end
elseif mode == 1 then
  mode = 2
  if char == '^' then
    reverse_mode = 1
  elseif class_char_table[char] then
    local class = get_char_class(char)
    for _, k in ipairs(class) do
      char_table[k] = 1
    end
  elseif char == user_range_char then
    -- print('call here')
    local class = range_char_table[range_pos]
    range_pos = range_pos + 1
    for _, k in ipairs(class) do
      char_table[k] = 1
    end
  elseif conceal_char_table[char] then
    local id = get_char_id(char)
    char_table[id] = 1
  elseif char == ']' then
    error('error:07:without class define')
    break
  else
    char_table[char] = 1
  end
elseif mode == 2 then
  if class_char_table[char] then
    local class = get_char_class(char)
    for _, k in ipairs(class) do
      char_table[k] = 1
    end
  elseif char == user_range_char then
    -- print('call here')
    local class = range_char_table[range_pos]
    range_pos = range_pos + 1
    for _, k in ipairs(class) do
      char_table[k] = 1
    end
  elseif conceal_char_table[char] then
    local id = get_char_id(char)
    char_table[id] = 1
  elseif char == ']' then
    break
  else
    char_table[char] = 1
  end
end

end
– 将去重后的字符集合并成数组输出,可以进行排序
local array = {}
– 如果 class 前面定义有 ^ 标志
if reverse_mode == 1 then

for i = 0, 126 do
  local char = string.char(i)
  if not char_table[char] then
    table.insert(array, char)
  end
end

else

for k, _ in pairs(char_table) do
  table.insert(array, k)
end
-- 如果没有的话,顺序就是乱的,需要排序
table.sort(array, function (a,b)
  return a < b
end)

end
return array
end

– patt to array 解析 pattern to array
– { {char_list, amount}, {char_list. amount} }
local function patt_to_array (patt)
local array = {}
– return array
local class = {}
local mode = 0
– 定位在 class_char 的位置指针
local class_pos = 1
– 定位在 quantifier_char 的位置指针
local quantifier_pos = 1
– 如果末尾是 - 那么就连最后一个 class 一起删除
patt = string.gsub(patt, '-$', '?')
– 在末尾添加一个字符以结束解析
patt = patt .. '$'
for char in string.gmatch(patt, '.') do

-- 初始化模式 mode = 0
if mode == 0 then
  -- 字符集字符,动态获取
  if class_char_table[char] then
    class = get_char_class(char)
    mode = 1
  -- 数量字符,进行映射
  elseif quantifier_char_table[char] then
    error('error:08:quantifier char could not at begin')
  -- 转义字符,要进行恢复
  -- 自定义字符集
  elseif char == user_class_char then
    local str = get_char_str(char, class_pos)
    class = parse_class_str(str)
    class_pos = class_pos + 1
    mode = 1
    -- 自定义数量字符
  elseif char == user_quantifier_char then
    error('error:09:user quantifier could not at begin')
  elseif conceal_char_table[char] then
    local id = get_char_id(char)
    class = { id }
    mode = 1
  else
    -- 其他字符处理
    class = { char }
    mode = 1
  end

  -- 如果是字符,或字符集模式 1
elseif mode == 1 then
  -- 初始化 quantifier
  local quantifier = {1, 1}

  -- if it is quantifer identifer, then mode = 0
  -- and get defined quantifer
  if quantifier_char_table[char] then
    quantifier = quantifier_char_table[char]
    mode = 0
  elseif char == user_quantifier_char then
    local str = get_char_str(char, quantifier_pos)
    quantifier = parse_quantifier_str(str)
    quantifier_pos = quantifier_pos + 1
    mode = 0
  end
  -- push current class and quantifier to array
  table.insert(array, { class, quantifier })

  -- if meet new class char, then create new class
  if class_char_table[char] then
    class = get_char_class(char)
  elseif char == user_class_char then
    local str = get_char_str(char, class_pos)
    class = parse_class_str(str)
    class_pos = class_pos + 1
  elseif conceal_char_table[char] then
    local id = get_char_id(char)
    class = { id }
  else
    class = { char }
  end
end

end
return array
end

function DataGenerator (str, mode)
mode = mode or 0
local patt = process_patt_str(str)
local array = patt_to_array(patt)
local data = cnq_to_array(array)
return concat_multi_array(data)
end

return DataGenerator

你可能感兴趣的:(lua,generator,Data,RegExp)