The hash function is used to transform the key into the index (the hash) of an array element (the slot or bucket) where the corresponding value is to be sought.
fetch(Key, D) -> Slot = get_slot(D, Key), Bkt = get_bucket(D, Slot), try fetch_val(Key, Bkt) catch badarg -> erlang:error(badarg, [Key, D]) end. %% get_slot(Hashdb, Key) -> Slot. %% Get the slot. First hash on the new range, if we hit a bucket %% which has not been split use the unsplit buddy bucket. get_slot(T, Key) -> H = erlang:phash(Key, T#dict.maxn), if H > T#dict.n -> H - T#dict.bso; true -> H end. %% get_bucket(Hashdb, Slot) -> Bucket. get_bucket(T, Slot) -> get_bucket_s(T#dict.segs, Slot).
Segment大小固定我们只需要随着数据大小不断修改顶层tuple的size就可以.Segments tuple的最后一个元素是一个空的segment用于后续扩展.Segments每次成倍扩展收缩,这对性能并无很大损害.注意dict对外暴露的接口并没有包含数据实际的位置信息.store/3,append/3,append_list/3,update/3,update/4,update_counter/3这些接口都检查是否需要进行扩展,
filter/2 erase/2会检查是否需要进行收缩.由于dict能够随着数据量的变化动态调整进行缩放,兼顾了内存消耗和访问效率.
%% Note: mk_seg/1 must be changed too if seg_size is changed. -define(seg_size, 16). -define(max_seg, 32). -define(expand_load, 5). -define(contract_load, 3). -define(exp_size, (?seg_size * ?expand_load)). -define(con_size, (?seg_size * ?contract_load)). %% Define a hashtable. The default values are the standard ones. -record(dict, {size=0 %元素的数量 n=?seg_size % 已经激活的slot数量 maxn=?seg_size % 最大slots数 bso=?seg_size div 2 %最大bucket数散列表中当前允许的最大 bucket 数量,扩张操作需要据此判断是否要增加新的 bucket 区段,初始为 16; exp_size=?exp_size %扩张阈值 初始值为16*5=80 con_size=?con_size %收缩阈值 初始值为16*3=48 empty :: tuple(), % Empty segment segs :: tuple() % Segments 所有的数据存放的地方 }).
在新建一个dict的时候,Empty会初始化成为一个数据模版
new() -> Empty = mk_seg(?seg_size), #dict{empty=Empty,segs={Empty}}. mk_seg(16) -> {[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]}. %%16估计也是测试经验值
Eshell V5.9.1 (abort with ^G) 1> dict:new(). {dict,0,16,16,8,80,48, {[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]}, {{[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]}}} 2> dict:store(k,v,v(1)). {dict,1,16,16,8,80,48, {[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]}, {{[],[],[],[],[],[],[],[],[],[],[],[[k|v]],[],[],[],[]}}} 3> dict:store(k2,v2,v(2)). {dict,2,16,16,8,80,48, {[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]}, {{[],[], [[k2|v2]], [],[],[],[],[],[],[],[], [[k|v]], [],[],[],[]}}} 4>
%% append_bkt(Key, Val, Bucket) -> {NewBucket,PutCount}. append_bkt(Key, Val, [?kv(Key,Bag)|Bkt]) -> {[?kv(Key,Bag ++ [Val])|Bkt],0}; append_bkt(Key, Val, [Other|Bkt0]) -> {Bkt1,Ic} = append_bkt(Key, Val, Bkt0), {[Other|Bkt1],Ic}; append_bkt(Key, Val, []) -> {[?kv(Key,[Val])],1}. %% app_list_bkt(Key, L, Bucket) -> {NewBucket,PutCount}. app_list_bkt(Key, L, [?kv(Key,Bag)|Bkt]) -> {[?kv(Key,Bag ++ L)|Bkt],0}; app_list_bkt(Key, L, [Other|Bkt0]) -> {Bkt1,Ic} = app_list_bkt(Key, L, Bkt0), {[Other|Bkt1],Ic}; app_list_bkt(Key, L, []) -> {[?kv(Key,L)],1}.
晚安!