matlab实现多元霍夫曼编码

    采用matlab矩阵和元胞数据结构等可以简单的实现多元霍夫曼编码。

   (1) 假设实现Q元霍夫曼编码,首先判断符号数量个数len是否满足(len-Q) / (Q-1)为整数,如果不是,向概率中添加若干0概率事件,事件符号为len+1,len+2...直到满足前面所述等式,添加完后的事件个数记为N。

    (2)将事件符号(1-N),父亲节点(先置为0),概率大小(probability)添加到矩阵temp中。

    (3)将temp按照概率从大到小排序,截去最后三行,将这三行的父亲节点置为N+1,然后存入矩阵final中。计算这三行的概率之和,形成一个新节点new,new的事件符号为N+1,父亲节点为0,概率大小为选取的三行的概率之和。将new加入temp,重复第三步,直至temp为空。

    (4)在搜索节点,构造编码。首先声明alldict元胞矩阵,共三列,第一列第二列分别存储final第一列第二列,第三列为空。

    (5)将0-(Q-1)分配给事件符号最大的节点,存入第三列,然后寻找事件符号次大的节点,将0-(Q-1)分配给第三列,如果第三列已经存在部分编码,则将现在分配的编码和已分配的编码合并,并且现在分配的编码放在原来编码的前面。重复第五步,直至没有父亲节点。编号的码已存在alldict的第三列。

    (6)将alldict按照第一列排序,选取前len行,再选取其中的第1和第3列的作为返回值。然后计算平均编码长度。


function [dict, avglen] = QHuffmanEncode(symbols, probability, Q)
% symbols must be [1,2,3...
% probability must have only one row
% Q-ary
% dict is a cell, col = 2, row = Q
% col 1 is the symbol
% col 2 is the symbol's code
% avglen is average length of the final code
% edit by LiuMengran in hust


if(nargin ~= 3)
    error('The input parameter count should be three.');
end
[m,n] = size(probability);
if(m ~= 1 || n <= 1)
    error('probability should be a row mat.');
end
[m,n] = size(symbols);
if(m ~= 1 || n <= 1)
    error('symbols should be a row mat.');
end
m = length(probability);
if(m ~= n)
    error('symbols and probability should have the same length.');
end
for i = 1:n
    if(symbols(1,i) ~= i)
        error('symbols should be [1,2,3...].');
    end
end


orig = probability;
% calculate the count to add to meet (Q-1)*n+Q
len = length(symbols);
time = ceil( (len - Q) / (Q - 1) );
need = (Q-1)*time + Q - len;
% get new probability data
symbols = [symbols zeros(1, need)];
probability = [probability zeros(1, need)];
N = length(probability);


% var temp and final have the similar struct, row = symbols length, col = 3
% col 1 is the symbol, include the sum symbol, which named new
% col 2 is the symbol's parent, using an integer to experess
% col 3 is the symbol's probability
% temp stores the symbols that haven't used
% final stores the symbols that have used
symbols = symbols';
probability = probability';
temp = [symbols zeros(N,1) probability];
final = [];
for i = 1:time+1
    temp = sortrows(temp, 3);
    temp(1:Q,2) = N + i;
    psum = sum(temp(1:Q,3));
    new = [N+i 0 psum];
    final = [final; temp(1:Q,:)];
    temp = temp(temp(:,2)==0, :);
    temp = [temp;new];
end
% search code from final
final = sortrows(final, 1);
alldict = cell(N+time, 3);
for i = 1:(N+time)
    alldict{i,1} = final(i,1);
    alldict{i,2} = final(i,2);
end
for i = 1:(time+1)
    k = 0;
    for j = 1:(N+time)
        if(alldict{j,2}==(N+time+2-i))
            alldict{j,3} = [k, alldict{j,3}];
            k = k + 1;
        end
    end
end
for i = 1:(time+1)
    for k = 1:(N+time)
        if(alldict{k,1} == (N+time+1-i))
            new = alldict(N+time+1-i,:);
        end
    end
    for j = 1:(N+time)
        if(alldict{j,2} == (N+time+1-i))
            alldict{j,3} = [new{1,3} alldict{j,3}];
        end
    end
end


% get dict
dict = {alldict{1:N,1}; alldict{1:N,3}};
dict = {dict{1,N-len+1:N}; dict{2,N-len+1:N}};
dict = dict';
% get average length
avglen = 0;
for i = 1:length(dict)
    avglen = avglen + length(dict{i, 2})*orig(i);
end
end



%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% QHuffmanEncode test file
% Q is the symbol amount
symbols = [1 2 3 4 5 6 7 8];
probability = [0.4 0.2 0.2 0.06 0.05 0.04 0.03 0.02];
Q = 4;
[dict, avglen] = QHuffmanEncode(symbols, probability, Q);
for i = 1:length(symbols)
    disp([dict{i,1} dict{i,2}]);
end
disp(avglen);

你可能感兴趣的:(通信)