lua5.4数据结构之Table

概述

  • lua表分为两部分,一部分是数组,一部分是hash表,这两部分共存于表中。
  • 数组下标从1开始。
  • #取长度仅在数组是连续时有效,其他情况下的长度是不可靠的(因为内部使用二分法)

说明

  • 部分注释保留了源码的注释,某些源码注释可能解释的更为清晰
  • 该文从表的创建,增删改查,以及扩容、迭代和取长来分析表结构特征
  • 该文采用深度优先进行代码探索

数据结构

  • Table
typedef struct Table {
  CommonHeader;	//公共头部,所有需要GC的数据结构都有,里面包括,next指针,数据类型和gc节点颜色
  lu_byte flags;  /* 1<

lu_byte lsizenode; //哈希部分的大小,取log2,例大小为8的hash表此字段为3 unsigned int alimit; //数组的长度 TValue *array; //数组部分 Node *node; //hash表部分 Node *lastfree; //hash表中最后一个为没被用过的节点 struct Table *metatable; //元表 GCObject *gclist; //GC相关的链表,todo以后再说 } Table;

  • Node
typedef union Node {
  struct NodeKey {
    TValuefields;  //Value字段
    lu_byte key_tt;  //key的类型
    int next;  //相同hash值链表next
    Value key_val;  //key值
  } u;
  TValue i_val;  //联合体,直接获取Value字段
} Node;
#define TValuefields	Value value_; lu_byte tt_	//值和类型
  • TVale
typedef struct TValue {
  TValuefields;
} TValue;

#define TValuefields	Value value_; lu_byte tt_	//值和类型

typedef union Value {
  struct GCObject *gc;    //所有可回收类型,由该指针指向
  void *p;         //轻量数据类型
  lua_CFunction f; //C函数
  lua_Integer i;   //整数
  lua_Number n;    //浮点数
} Value;

//tt_取自一下几种
#define LUA_TNONE		(-1)
#define LUA_TNIL		0
#define LUA_TBOOLEAN		1
#define LUA_TLIGHTUSERDATA	2
#define LUA_TNUMBER		3
#define LUA_TSTRING		4
#define LUA_TTABLE		5
#define LUA_TFUNCTION		6
#define LUA_TUSERDATA		7
#define LUA_TTHREAD		8
#define LUA_NUMTYPES		9

算法

luaH_new

  • luaH_new
    参数:lua_state
    功能:新建一个表,并初始化。
Table *luaH_new (lua_State *L) {
  GCObject *o = luaC_newobj(L, LUA_VTABLE, sizeof(Table));	//新建LUA_VTABLE类型,所有可回收类型,由GCObject指针指向
  Table *t = gco2t(o);	//转换类型取得表
  t->metatable = NULL;	//设元表为空
  t->flags = cast_byte(maskflags);  //标志位设置为无元方法字段
  t->array = NULL;	//数组为空
  t->alimit = 0;	//数组长度为0
  setnodevector(L, t, 0);	//初始化hash部分,L为虚拟机,t为表,0为hash部分长度
  return t;
}

//宏
#define gco2t(o)  check_exp((o)->tt == LUA_VTABLE, &((cast_u(o))->h))	//把可回收对象转换成一个表
#define check_exp(c,e)		(e)
#define cast_u(o)	cast(union GCUnion *, (o))

//可回收类型联合体
union GCUnion {
  GCObject gc;  //公共头
  struct TString ts;	//LUA_TSTRING		4
  struct Udata u;	//LUA_TUSERDATA		7
  union Closure cl;	//LUA_TFUNCTION		6
  struct Table h;	//LUA_TTABLE		5
  struct Proto p;	//
  struct lua_State th;  //LUA_TTHREAD		8
  struct UpVal upv;
};
  • luaC_newobj
    参数:虚拟机、对象类型、对象大小
    功能:根据tt参数和size,创建一个可回收对象并链接至gc链表。
GCObject *luaC_newobj (lua_State *L, int tt, size_t sz) {
  global_State *g = G(L);	//取得L->G全局状态机
  GCObject *o = cast(GCObject *, luaM_newobject(L, novariant(tt), sz));	//使用全局状态机的分配函数进行空间分配
  o->marked = luaC_white(g);	//gc颜色标记
  o->tt = tt;	//设置类型
  o->next = g->allgc;	//链接至gc链表
  g->allgc = o;
  return o;
}

//宏
#define G(L)	(L->l_G)
  • setnodevector
    参数:虚拟机、表、hash部分大小
    功能:根据给定size,分配表的hash部分大小
static void setnodevector (lua_State *L, Table *t, unsigned int size) {
  if (size == 0) {  //hash部分无元素
    t->node = cast(Node *, dummynode);  //指向哑节点,这个节点是公用的,所以不需要分配空间
    t->lsizenode = 0;	//大小为0,2^0 = 1,因此hash有一个节点,是哑节点
    t->lastfree = NULL;  //最后一个未使用位置为NULL
  }
  else {
    int i;
    int lsize = luaO_ceillog2(size);	//size向上取整
    if (lsize > MAXHBITS || (1u << lsize) > MAXHSIZE)
      luaG_runerror(L, "table overflow");
    size = twoto(lsize);	//2^size
    t->node = luaM_newvector(L, size, Node);	//创建空间
    for (i = 0; i < (int)size; i++) {	//初始化hash部分
      Node *n = gnode(t, i);
      gnext(n) = 0;
      setnilkey(n);
      setempty(gval(n));
    }
    t->lsizenode = cast_byte(lsize);	//设置大小
    t->lastfree = gnode(t, size);  //lastfree设置为最后一个节点
  }
}

//宏
#define dummynode		(&dummynode_)
static const Node dummynode_ = {
  {{NULL}, LUA_VEMPTY,  /* value's value and type */
   LUA_VNIL, 0, {NULL}}  /* key type, next, and key value */
};

//直接查表更省时间
int luaO_ceillog2 (unsigned int x) {
  static const lu_byte log_2[256] = {  /* log_2[i] = ceil(log2(i - 1)) */
    0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
    6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
    7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
    7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8
  };
  int l = 0;
  x--;
  while (x >= 256) { l += 8; x >>= 8; }
  return l + log_2[x];
}

//分配空间
#define luaM_newvector(L,n,t)	cast(t*, luaM_malloc_(L, (n)*sizeof(t), 0))
void *luaM_malloc_ (lua_State *L, size_t size, int tag) {
  if (size == 0)
    return NULL;  /* that's all */
  else {
    global_State *g = G(L);
    void *newblock = firsttry(g, NULL, tag, size);	//第一次尝试分配空间
    if (l_unlikely(newblock == NULL)) {
      newblock = tryagain(L, NULL, tag, size);	//第二次尝试
      if (newblock == NULL)
        luaM_error(L);
    }
    g->GCdebt += size;	//计入gc
    return newblock;
  }
}
#define firsttry(g,block,os,ns)    ((*g->frealloc)(g->ud, block, os, ns))

luaH_get

  • luaH_get
    参数:Table,key
    功能:根据key取得表中的value
const TValue *luaH_get (Table *t, const TValue *key) {
  switch (ttypetag(key)) {	//根据key类型采取不同策略
    case LUA_VSHRSTR: return luaH_getshortstr(t, tsvalue(key));	//短字符串类型(40byte及以下)
    case LUA_VNUMINT: return luaH_getint(t, ivalue(key));	//整数类型
    case LUA_VNIL: return &absentkey;	//nil类型
    case LUA_VNUMFLT: {	//浮点类型
      lua_Integer k;
      if (luaV_flttointeger(fltvalue(key), &k, F2Ieq)) //是否可以转化为整数 1.00
        return luaH_getint(t, k);
    }
    default:
      return getgeneric(t, key, 0);	//通用查找
  }
}

所有的短字符串类型(40byte及以下)都存储在全局字符串表,这个表在&G(L)->strt,也就是全局状态机里面,所有使用该字符串的地方实际都在引用该字符串。

这样短字符串的频繁比较就不必按字节比较,直接比较地址是否相同即可。

Lua 5.2.1为了解决Hash Dos问题,把长字符串独立出来,不再通过hash进入全局字符串表,同时,使用了一个随机种子用于哈希值的计算,使攻击者无法轻易构造出拥有相同哈希值的不同字符串,这个随机种子是在Lua State 创建时放在全局表中的,它利用了构造状态机的内存地址随机性以及用户可配置的一个随机量同时来决定。

  • luaH_getshortstr
    参数:Table,key
    功能:根据短字符串key取得table中的value
const TValue *luaH_getshortstr (Table *t, TString *key) {
  Node *n = hashstr(t, key);	//拿到字符串的hash对应位置
  lua_assert(key->tt == LUA_VSHRSTR);
  for (;;) {  //遍历桶获取value
    if (keyisshrstr(n) && eqshrstr(keystrval(n), key))	//key是短字符串 && 节点n的key和key相等
      return gval(n);  /* that's it */
    else {
      int nx = gnext(n);
      if (nx == 0)
        return &absentkey;  //没找到
      n += nx;
    }
  }
}

表的Hash部分采用数组结构,对于相同的key采用链表方式连接,与传统方式不同的是,链表部分并非采用桶式链接,而是在本表的空余位置进行放置并连接

例如:
hash部分:8个空位
nil nil nil nil nil nil nil nil
lastfree = 8

第一个元素位置为1:
1 nil nil nil nil nil nil nil
lastfree = 8

第二个元素位置为4:
1 nil nil 2 nil nil nil nil
lastfree = 8

第三个元素位置1:
1 nil nil 2 nil nil nil 3
lastfree = 7
[1]->next = 7 (8 - 1)
第三个元素和第一个元素发生冲突
那么元素三应找到一个空位lastfree - -
然后把元素三连接到元素一上
所以元素一的next为元素一和元素三的距离也就是(8 - 1 = 7)

找元素三的时候,发现他的hash值为1,但是位置1并不是他,于是便顺着next查找即可找到。

  • luaH_getint
    参数:Table,key
    功能:根据整数key取得table中的value
const TValue *luaH_getint (Table *t, lua_Integer key) {
  if (l_castS2U(key) - 1u < t->alimit)  //key在[1,t->alimit]
    return &t->array[key - 1];
  else if (!limitequalsasize(t) &&  //key在(t->alimit,reallimit]
           (l_castS2U(key) == t->alimit + 1 ||
            l_castS2U(key) - 1u < luaH_realasize(t))) {
    t->alimit = cast_uint(key);	//设置数组长度
    return &t->array[key - 1];
  }
  else {
    Node *n = hashint(t, key);
    for (;;) {  //查找是否在hash部分
      if (keyisinteger(n) && keyival(n) == key)
        return gval(n);  /* that's it */
      else {
        int nx = gnext(n);
        if (nx == 0) break;
        n += nx;
      }
    }
    return &absentkey;
  }
}

LUAI_FUNC unsigned int luaH_realasize (const Table *t) {
  if (limitequalsasize(t))	//是否真实长度
    return t->alimit;  
  else {
    unsigned int size = t->alimit;
    //计算不小于n的最小2的指数倍
    size |= (size >> 1);
    size |= (size >> 2);
    size |= (size >> 4);
    size |= (size >> 8);
    size |= (size >> 16);
#if (UINT_MAX >> 30) > 3
    size |= (size >> 32);  /* unsigned int has more than 32 bits */
#endif
    size++;
    lua_assert(ispow2(size) && size/2 < t->alimit && t->alimit < size);
    return size;
  }
}

//使用BITRAS标志是否为真实长度
//比如数组大小为8,实际只有1-6个元素,此时alimit = 6
#define limitequalsasize(t)	(isrealasize(t) || ispow2((t)->alimit))
#define isrealasize(t)		(!((t)->flags & BITRAS))
#define setrealasize(t)		((t)->flags &= cast_byte(~BITRAS))
#define setnorealasize(t)	((t)->flags |= BITRAS)

t->alimit的真实长度永远是2的整数倍,但t-alimit但不超过其真实长度。
当对不连续数组使用#号时有可能将alimit设置为不可靠的长度,因为#使用了二分法。

  • getgeneric
    参数:Table,key,deadok
    功能:根据key取得table的hash部分的value,deadok是否允许查找已经removed的key(此处不允许)
static const TValue *getgeneric (Table *t, const TValue *key, int deadok) {
  Node *n = mainpositionTV(t, key);	//根据key查找相应的位置的节点
  for (;;) {  //遍历桶
    if (equalkey(key, n, deadok))
      return gval(n);  /* that's it */
    else {
      int nx = gnext(n);
      if (nx == 0)
        return &absentkey;  /* not found */
      n += nx;
    }
  }
}

//根据key hash到相应Node
static Node *mainpositionTV (const Table *t, const TValue *key) {
  switch (ttypetag(key)) {
    case LUA_VNUMINT: {
      lua_Integer i = ivalue(key);
      return hashint(t, i);
    }
    case LUA_VNUMFLT: {
      lua_Number n = fltvalue(key);
      return hashmod(t, l_hashfloat(n));
    }
    case LUA_VSHRSTR: {
      TString *ts = tsvalue(key);
      return hashstr(t, ts);
    }
    case LUA_VLNGSTR: {
      TString *ts = tsvalue(key);
      return hashpow2(t, luaS_hashlongstr(ts));
    }
    case LUA_VFALSE:
      return hashboolean(t, 0);
    case LUA_VTRUE:
      return hashboolean(t, 1);
    case LUA_VLIGHTUSERDATA: {
      void *p = pvalue(key);
      return hashpointer(t, p);
    }
    case LUA_VLCF: {
      lua_CFunction f = fvalue(key);
      return hashpointer(t, f);
    }
    default: {
      GCObject *o = gcvalue(key);
      return hashpointer(t, o);
    }
  }
}

luaH_set

  • luaH_set
    参数:lua_State,Table,key,value
    功能:根据key取得表的相应位置,然后把该位置的值设为value
void luaH_set (lua_State *L, Table *t, const TValue *key, TValue *value) {
  const TValue *slot = luaH_get(t, key);	//根据上边的get获取到slot
  luaH_finishset(L, t, key, slot, value);	//设此slot的值为value
}
  • luaH_finishset
    参数:lua_State,Table,key,slot,value
    功能:设置slot[key] = value
void luaH_finishset (lua_State *L, Table *t, const TValue *key,const TValue *slot, TValue *value) {
  if (isabstkey(slot))	//槽位没有key,有key为修改,没key为new
    luaH_newkey(L, t, key, value);	//新建key并赋值
  else {
    if (l_unlikely(isshared(t)))
      luaG_runerror(L, "attempt to change a shared table");
    setobj2t(L, cast(TValue *, slot), value);	//slot[key] = value
  }
}

//宏
#define cast(t, exp)	((t)(exp))
#define setobj2t	setobj
#define setobj(L,obj1,obj2) \
	{ TValue *io1=(obj1); const TValue *io2=(obj2); \
          io1->value_ = io2->value_; settt_(io1, io2->tt_); \
	  checkliveness(L,io1); lua_assert(!isnonstrictnil(io1)); }
  • luaH_newkey
    参数:lua_State,Table,key,value
    功能:插入新key到hash表,第一步检查key对应mainposition是否为空,不是的话,检查mainposition的元素是否是该位置的,不是让这个元素滚开,自己坐下,否则自己滚到一个其他空位置(lastfree)
void luaH_newkey (lua_State *L, Table *t, const TValue *key, TValue *value) {
  Node *mp;
  TValue aux;
  if (l_unlikely(isshared(t)))
    luaG_runerror(L, "attempt to change a shared table");
  if (l_unlikely(ttisnil(key)))
    luaG_runerror(L, "table index is nil");
  else if (ttisfloat(key)) {	//float能转换为整数先转换为整数
    lua_Number f = fltvalue(key);
    lua_Integer k;
    if (luaV_flttointeger(f, &k, F2Ieq)) {  /* does key fit in an integer? */
      setivalue(&aux, k);
      key = &aux;  /* insert it as an integer */
    }
    else if (l_unlikely(luai_numisnan(f)))
      luaG_runerror(L, "table index is NaN");
  }
  if (ttisnil(value))
    return;  /* do not insert nil values */
  mp = mainpositionTV(t, key);	//找到key对应的位置
  if (!isempty(gval(mp)) || isdummy(t)) {  //位置被占了?
    Node *othern;
    Node *f = getfreepos(t);  //先找一个空位
    if (f == NULL) {  //找不到空位?
      rehash(L, t, key);  //hash表扩容
      /* whatever called 'newkey' takes care of TM cache */
      luaH_set(L, t, key, value);  //重新插入
      return;
    }
    lua_assert(!isdummy(t));
    othern = mainpositionfromnode(t, mp);	//拿到位置的元素
    if (othern != mp) {  //这个元素不是这个位置的吗?
      //是,让他滚开
      while (othern + gnext(othern) != mp)  //找到这个元素在桶中的前置节点
        othern += gnext(othern);
      gnext(othern) = cast_int(f - othern);  //前置节点的next为freepos
      *f = *mp;  //freepos设置为该节点
      if (gnext(mp) != 0) {	//该节点的next不等于空?
        gnext(f) += cast_int(mp - f);  //转移该节点的next到freepos上
        gnext(mp) = 0;  /* now 'mp' is free */
      }
      setempty(gval(mp));
    }
    else {  //不是,自己滚开
      //new key将会到freepos并插入到桶上
      if (gnext(mp) != 0)
        gnext(f) = cast_int((mp + gnext(mp)) - f);  /* chain new position */
      else lua_assert(gnext(f) == 0);
      gnext(mp) = cast_int(f - mp);
      mp = f;
    }
  }
  setnodekey(L, mp, key);	//设置节点key
  luaC_barrierback(L, obj2gco(t), key);	//gc check
  lua_assert(isempty(gval(mp)));
  setobj2t(L, gval(mp), value);	//设置值
}

//取得空位置	可见lastfree是最后一个未使用过的位置
static Node *getfreepos (Table *t) {
  if (!isdummy(t)) {
    while (t->lastfree > t->node) {
      t->lastfree--;
      if (keyisnil(t->lastfree))
        return t->lastfree;
    }
  }
  return NULL;  /* could not find a free place */
}

static void rehash (lua_State *L, Table *t, const TValue *ek) {
  unsigned int asize;  //数组长度
  unsigned int na;  //数组部分key个数
  unsigned int nums[MAXABITS + 1];	//每个2指数倍之间的元素个数
  int i;
  int totaluse;	//总共有多少个key
  for (i = 0; i <= MAXABITS; i++) nums[i] = 0;  /* reset counts */
  setlimittosize(t);	//设置表的数组长度为真实长度
  na = numusearray(t, nums);  //统计数组部分key个数
  totaluse = na;  /* all those keys are integer keys */
  totaluse += numusehash(t, nums, &na);  //加上hash部分key个数
  /* count extra key */
  if (ttisinteger(ek))
    na += countint(ivalue(ek), nums);	//加上当前要设置的key
  totaluse++;
  /* compute new size for array part */
  asize = computesizes(nums, &na);	//计算数组长度,数组部分利用率要不低于50%
  /* resize the table to new computed sizes */
  luaH_resize(L, t, asize, totaluse - na);	//重新设置大小
}

static unsigned int numusearray (const Table *t, unsigned int *nums) {
  int lg;
  unsigned int ttlg;  /* 2^lg */
  unsigned int ause = 0;  //数组总共元素个数
  unsigned int i = 1;  /* count to traverse all array keys */
  unsigned int asize = limitasasize(t);  //数组真实大小
  //遍历每个片
  for (lg = 0, ttlg = 1; lg <= MAXABITS; lg++, ttlg *= 2) {
    unsigned int lc = 0;  /* counter */
    unsigned int lim = ttlg;
    if (lim > asize) {
      lim = asize;  /* adjust upper limit */
      if (i > lim)
        break;  /* no more elements to count */
    }
    /* count elements in range (2^(lg - 1), 2^lg] */
    for (; i <= lim; i++) {
      if (!isempty(&t->array[i-1]))
        lc++;
    }
    nums[lg] += lc;
    ause += lc;
  }
  return ause;
}

static int numusehash (const Table *t, unsigned int *nums, unsigned int *pna) {
  int totaluse = 0;  /* total number of elements */
  int ause = 0;  /* elements added to 'nums' (can go to array part) */
  int i = sizenode(t);
  while (i--) {
    Node *n = &t->node[i];
    if (!isempty(gval(n))) {
      if (keyisinteger(n))
        ause += countint(keyival(n), nums);
      totaluse++;
    }
  }
  *pna += ause;
  return totaluse;
}

//找到一个大小保证利用率不小于50%
static unsigned int computesizes (unsigned int nums[], unsigned int *pna) {
  int i;
  unsigned int twotoi;  /* 2^i (candidate for optimal size) */
  unsigned int a = 0;  /* number of elements smaller than 2^i */
  unsigned int na = 0;  /* number of elements to go to array part */
  unsigned int optimal = 0;  /* optimal size for array part */
  /* loop while keys can fill more than half of total size */
  for (i = 0, twotoi = 1;
       twotoi > 0 && *pna > twotoi / 2;
       i++, twotoi *= 2) {
    a += nums[i];
    if (a > twotoi/2) {  /* more than half elements present? */
      optimal = twotoi;  /* optimal size (till now) */
      na = a;  /* all elements up to 'optimal' will go to array part */
    }
  }
  lua_assert((optimal == 0 || optimal / 2 < na) && na <= optimal);
  *pna = na;
  return optimal;
}

void luaH_resize (lua_State *L, Table *t, unsigned int newasize, unsigned int nhsize) {
  unsigned int i;
  Table newt;  /* to keep the new hash part */
  unsigned int oldasize = setlimittosize(t);
  TValue *newarray;
  /* create new hash part with appropriate size into 'newt' */
  setnodevector(L, &newt, nhsize);
  if (newasize < oldasize) {  //缩短数组部分?
    t->alimit = newasize;  /* pretend array has new size... */
    exchangehashpart(t, &newt);  //设置hash部分为空
    //把数组多余的部分插入hash
    for (i = newasize; i < oldasize; i++) {
      if (!isempty(&t->array[i]))
        luaH_setint(L, t, i + 1, &t->array[i]);
    }
    t->alimit = oldasize;  /* restore current size... */
    exchangehashpart(t, &newt);  //交换回原来的hash部分
  }
  //分配新数组
  newarray = luaM_reallocvector(L, t->array, oldasize, newasize, TValue);
  if (l_unlikely(newarray == NULL && newasize > 0)) {  /* allocation failed? */
    freehash(L, &newt);  /* release new hash part */
    luaM_error(L);  /* raise error (with array unchanged) */
  }
  /* allocation ok; initialize new part of the array */
  exchangehashpart(t, &newt);  /* 't' has the new hash ('newt' has the old) */
  t->array = newarray;  /* set new array part */
  t->alimit = newasize;
  for (i = oldasize; i < newasize; i++)  /* clear new slice of the array */
     setempty(&t->array[i]);
  /* re-insert elements from old hash part into new parts */
  reinsert(L, &newt, t);  //把多出的hash和原先的hash合并
  freehash(L, &newt);  /* free old hash part */
}

//宏
#define setnodekey(L,node,obj) \
	{ Node *n_=(node); const TValue *io_=(obj); \
	  n_->u.key_val = io_->value_; n_->u.key_tt = io_->tt_; \
	  checkliveness(L,io_); }
#define setobj2t	setobj
#define setobj(L,obj1,obj2) \
	{ TValue *io1=(obj1); const TValue *io2=(obj2); \
          io1->value_ = io2->value_; settt_(io1, io2->tt_); \
	  checkliveness(L,io1); lua_assert(!isnonstrictnil(io1)); }
  • luaH_next
    参数:lua_State,Table,key
    功能:根据key找到下一个key,迭代器的实现是用key去遍历的
int luaH_next (lua_State *L, Table *t, StkId key) {
  unsigned int asize = luaH_realasize(t);
  unsigned int i = findindex(L, t, s2v(key), asize);  //1~alimit在数组>alimit在hash
  for (; i < asize; i++) {  //遍历数组
    if (!isempty(&t->array[i])) {  /* a non-empty entry? */
      setivalue(s2v(key), i + 1);
      setobj2s(L, key + 1, &t->array[i]);
      return 1;
    }
  }
  for (i -= asize; cast_int(i) < sizenode(t); i++) {  //遍历hash节点
    if (!isempty(gval(gnode(t, i)))) {  /* a non-empty entry? */
      Node *n = gnode(t, i);
      getnodekey(L, s2v(key), n);
      setobj2s(L, key + 1, gval(n));
      return 1;
    }
  }
  return 0;  /* no more elements */
}

static unsigned int findindex (lua_State *L, Table *t, TValue *key,
                               unsigned int asize) {
  unsigned int i;
  if (ttisnil(key)) return 0;  /* first iteration */
  i = ttisinteger(key) ? arrayindex(ivalue(key)) : 0;
  if (i - 1u < asize)  /* is 'key' inside array part? */	//不是整数unsigned int - 1为max long long 
    return i;  /* yes; that's the index */
  else {
    const TValue *n = getgeneric(t, key, 1);	//找到hash中的位置
    if (l_unlikely(isabstkey(n)))
      luaG_runerror(L, "invalid key to 'next'");  /* key not found */
    i = cast_int(nodefromval(n) - gnode(t, 0));  /* key index in hash table */
    /* hash elements are numbered after array ones */
    return (i + 1) + asize;
  }
}
  • luaH_getn
    参数:Table
    功能:返回Table的长度,仅在数组部分连续下才有意义,因为内部使用二分法,二分法无法对不规则数组进行二分
lua_Unsigned luaH_getn (Table *t) {
  unsigned int limit = t->alimit;
  if (limit > 0 && isempty(&t->array[limit - 1])) {  //边界为空,那么肯定在[1,alimit]之间
    /* there must be a boundary before 'limit' */
    if (limit >= 2 && !isempty(&t->array[limit - 2])) {
      /* 'limit - 1' is a boundary; can it be a new limit? */
      if (ispow2realasize(t) && !ispow2(limit - 1)) {
        t->alimit = limit - 1;
        setnorealasize(t);  /* now 'alimit' is not the real size */
      }
      return limit - 1;
    }
    else {  /* must search for a boundary in [0, limit] */
      unsigned int boundary = binsearch(t->array, 0, limit);
      /* can this boundary represent the real size of the array? */
      if (ispow2realasize(t) && boundary > luaH_realasize(t) / 2) {
        t->alimit = boundary;  /* use it as the new limit */
        setnorealasize(t);
      }
      return boundary;
    }
  }
  /* 'limit' is zero or present in table */
  if (!limitequalsasize(t)) {  //在(alimit,reallimit],当对不连续数组#,会产生错误边界导致边界后边也有元素
    /* 'limit' > 0 and array has more elements after 'limit' */
    if (isempty(&t->array[limit]))  /* 'limit + 1' is empty? */
      return limit;  /* this is the boundary */
    /* else, try last element in the array */
    limit = luaH_realasize(t);
    if (isempty(&t->array[limit - 1])) {  /* empty? */
      /* there must be a boundary in the array after old limit,
         and it must be a valid new limit */
      unsigned int boundary = binsearch(t->array, t->alimit, limit);
      t->alimit = boundary;
      return boundary;
    }
    /* else, new limit is present in the table; check the hash part */
  }
  //数组部分满,有更多的元素在hash表中,多半由于hash表未满,数组还没扩容
  lua_assert(limit == luaH_realasize(t) &&
             (limit == 0 || !isempty(&t->array[limit - 1])));
  if (isdummy(t) || isempty(luaH_getint(t, cast(lua_Integer, limit + 1))))
    return limit;  /* 'limit + 1' is absent */
  else  /* 'limit + 1' is also present */
    return hash_search(t, limit);
}

static lua_Unsigned hash_search (Table *t, lua_Unsigned j) {
  lua_Unsigned i;
  if (j == 0) j++;  /* the caller ensures 'j + 1' is present */
  do {
    i = j;  /* 'i' is a present index */
    if (j <= l_castS2U(LUA_MAXINTEGER) / 2)
      j *= 2;
    else {
      j = LUA_MAXINTEGER;
      if (isempty(luaH_getint(t, j)))  /* t[j] not present? */
        break;  /* 'j' now is an absent index */
      else  /* weird case */
        return j;  /* well, max integer is a boundary... */
    }
  } while (!isempty(luaH_getint(t, j)));  /* repeat until an absent t[j] */
  /* i < j  &&  t[i] present  &&  t[j] absent */
  while (j - i > 1u) {  /* do a binary search between them */
    lua_Unsigned m = (i + j) / 2;
    if (isempty(luaH_getint(t, m))) j = m;
    else i = m;
  }
  return i;
}

总结

并不是数组是连续的元素就都在数组里,如果有新元素就放在hash表里,直到hash表没有lastfree才会扩容或缩减,这个时候才会把数组大小分配为利用率大于等于50%的大小。而且#这个东西取不连续的数组,返回的结果是不确定的,因为源码使用的是二分,如果数组不连续,长度可能出现在数组部分,也可能出在哈希部分,所以#取得的长度和数组利用率无关。

你可能感兴趣的:(lua,数据结构,lua,开发语言,c语言,linux)