时间序列相似性

 对于两个序列来说,如果要比较两个波形的相似程度,可以使用DWT(动态时间规整)的方法。对于dwt方法,可以解决处理两个长度不一样的序列。

DTW是一种衡量两个时间序列之间的相似度的方法,主要应用在语音识别领域来识别两段语音是否表示同一个单词,度量的特征量是:两个序列之间的最短距离。

原理:

DTW通过把时间序列进行延伸和缩短,来计算两个时间序列性之间的相似性。

1,两个要进行匹配的数据A=[A1,A2,...An]和B=[B1,B2,...Bm]

归整路径的形式为W=w1,w2,...,wK,其中Max(|A|,|B|)<=K<=|A|+|B|。

wk的形式为(i,j),其中i表示的是A中的i坐标,j表示的是B中的j坐标.

归整路径W必须从w1=(1,1)开始,到wK=(|X|,|Y|)结尾,以保证X和Y中的每个坐标都在W中出现.

(i,j)必须是单调增加的,从w(a1,a2)沿着某条路径到达w(am,bn)。

找路径满足:假如当前节点是w(ai,bj),那么下一个节点必须是在   w(i+1,j),w(i,j+1),w(i+1,j+1)之间选择,并且路径必须是最短的。

计算的时候是按照动态规划的思想计算,也就是说在计算到达第(i,j)个节点的最短路径时候,考虑的是左下角也即第(i-1,j)、(i-1,j-1)、(i,j-1)这三个点到(i,j)的最短距离。

最后的目标是要找到一个在两个序列之间的最短距离以及实现这个最短距离的路径。

距离选用任意经典的距离计算方法:欧几里得距离

2,代码实现-matlab

function [Dist,D,k,w,rw,tw]=dtw(r,t,pflag)
%
% [Dist,D,k,w,rw,tw]=dtw(r,t,pflag)
%
% Dynamic Time Warping Algorithm
% Dist is unnormalized distance between t and r
% D is the accumulated distance matrix
% k is the normalizing factor
% w is the optimal path
% t is the vector you are testing against
% r is the vector you are testing
% rw is the warped r vector
% tw is the warped t vector
% pflag  plot flag: 1 (yes), 0(no)
%
% Version comments:
% rw, tw and pflag added by Pau Mic

[row,M]=size(r); if (row > M) M=row; r=r'; end;
[row,N]=size(t); if (row > N) N=row; t=t'; end;
d=sqrt((repmat(r',1,N)-repmat(t,M,1)).^2); %this makes clear the above instruction Thanks Pau Mic
dd=abs(repmat(r',1,N)-repmat(t,M,1));
dd
d
D=zeros(size(d));
D(1,1)=d(1,1);

for m=2:M
    D(m,1)=d(m,1)+D(m-1,1);
end
for n=2:N
    D(1,n)=d(1,n)+D(1,n-1);
end
for m=2:M
    for n=2:N
        D(m,n)=d(m,n)+min(D(m-1,n),min(D(m-1,n-1),D(m,n-1))); % this double MIn construction improves in 10-fold the Speed-up. Thanks Sven Mensing
    end
end

Dist=D(M,N);
n=N;
m=M;
k=1;
w=[M N];
w
while ((n+m)~=2)
    if (n-1)==0
        m=m-1;
    elseif (m-1)==0
        n=n-1;
    else
        [values,number]=min([D(m-1,n),D(m,n-1),D(m-1,n-1)]);
        switch number
            case 1
                m=m-1;
            case 2
                n=n-1;
            case 3
                m=m-1;
                n=n-1;
        end
    end
    k=k+1;
    w=[m n; w]; % this replace the above sentence. Thanks Pau Mic
end
[values,number]
D
m
n
w
% warped waves
rw=r(w(:,1));
tw=t(w(:,2));

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if pflag
    
    % --- Accumulated distance matrix and optimal path
    figure('Name','DTW - Accumulated distance matrix and optimal path', 'NumberTitle','off');
    
    main1=subplot('position',[0.19 0.19 0.67 0.79]);
    image(D);
    cmap = contrast(D);
    colormap(cmap); % 'copper' 'bone', 'gray' imagesc(D);
    hold on;
    x=w(:,1); y=w(:,2);
    ind=find(x==1); x(ind)=1+0.2;
    ind=find(x==M); x(ind)=M-0.2;
    ind=find(y==1); y(ind)=1+0.2;
    ind=find(y==N); y(ind)=N-0.2;
    plot(y,x,'-w', 'LineWidth',1);
    hold off;
    axis([1 N 1 M]);
    set(main1, 'FontSize',7, 'XTickLabel','', 'YTickLabel','');
    
    colorb1=subplot('position',[0.88 0.19 0.05 0.79]);
    nticks=8;
    ticks=floor(1:(size(cmap,1)-1)/(nticks-1):size(cmap,1));
    mx=max(max(D));
    mn=min(min(D));
    ticklabels=floor(mn:(mx-mn)/(nticks-1):mx);
    colorbar(colorb1);
    set(colorb1, 'FontSize',7, 'YTick',ticks, 'YTickLabel',ticklabels);
    set(get(colorb1,'YLabel'), 'String','Distance', 'Rotation',-90, 'FontSize',7, 'VerticalAlignment','bottom');
    
    left1=subplot('position',[0.07 0.19 0.10 0.79]);
    plot(r,M:-1:1,'-b');
    set(left1, 'YTick',mod(M,10):10:M, 'YTickLabel',10*rem(M,10):-10:0)
    axis([min(r) 1.1*max(r) 1 M]);
    set(left1, 'FontSize',7);
    set(get(left1,'YLabel'), 'String','Samples', 'FontSize',7, 'Rotation',-90, 'VerticalAlignment','cap');
    set(get(left1,'XLabel'), 'String','Amp', 'FontSize',6, 'VerticalAlignment','cap');
    
    bottom1=subplot('position',[0.19 0.07 0.67 0.10]);
    plot(t,'-r');
    axis([1 N min(t) 1.1*max(t)]);
    set(bottom1, 'FontSize',7, 'YAxisLocation','right');
    set(get(bottom1,'XLabel'), 'String','Samples', 'FontSize',7, 'VerticalAlignment','middle');
    set(get(bottom1,'YLabel'), 'String','Amp', 'Rotation',-90, 'FontSize',6, 'VerticalAlignment','bottom');
    
    % --- Warped signals
    figure('Name','DTW - warped signals', 'NumberTitle','off');
    
    subplot(1,2,1);
    set(gca, 'FontSize',7);
    hold on;
    plot(r,'-bx');
    plot(t,':r.');
    hold off;
    axis([1 max(M,N) min(min(r),min(t)) 1.1*max(max(r),max(t))]);
    grid;
    legend('signal 1','signal 2');
    title('Original signals');
    xlabel('Samples');
    ylabel('Amplitude');
    
    subplot(1,2,2);
    set(gca, 'FontSize',7);
    hold on;
    plot(rw,'-bx');
    plot(tw,':r.');
    hold off;
    axis([1 k min(min([rw; tw])) 1.1*max(max([rw; tw]))]);
    grid;
    legend('signal 1','signal 2');
    title('Warped signals');
    xlabel('Samples');
    ylabel('Amplitude');
    
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% test
clear  
clc  
a=[8 9 1 9 6 1 3 5]';  
b=[2 5 4 6 7 8 3 7 7 2]';  
[Dist,D,k,w,rw,tw] = DTW(a,b,1);  
fprintf('最短距离为%d\n',Dist)  
fprintf('最优路径为')  
w


3,代码实现-python code1--画图

from math import *
import matplotlib.pyplot as plt
import numpy

def print_matrix(mat) :
    print '[matrix] width : %d height : %d' % (len(mat[0]), len(mat))
    print '-----------------------------------'
    for i in range(len(mat)) :
        print mat[i]#[v[:2] for v in mat[i]]

def dist_for_float(p1, p2) :
    dist = 0.0
    elem_type = type(p1)
    if  elem_type == float or elem_type == int :
        dist = float(abs(p1 - p2))
    else :
        sumval = 0.0
        for i in range(len(p1)) :
            sumval += pow(p1[i] - p2[i], 2)
        dist = pow(sumval, 0.5)
    return dist

def dtw(s1, s2, dist_func) :
    w = len(s1)
    h = len(s2)
    
    mat = [([[0, 0, 0, 0] for j in range(w)]) for i in range(h)]
    
    #print_matrix(mat)
    
    for x in range(w) :
        for y in range(h) :            
            dist = dist_func(s1[x], s2[y])
            mat[y][x] = [dist, 0, 0, 0]
            
    #print_matrix(mat)

    elem_0_0 = mat[0][0]
    elem_0_0[1] = elem_0_0[0] * 2

    for x in range(1, w) :
        mat[0][x][1] = mat[0][x][0] + mat[0][x - 1][1]
        mat[0][x][2] = x - 1
        mat[0][x][3] = 0

    for y in range(1, h) :
        mat[y][0][1] = mat[y][0][0] + mat[y - 1][0][1]            
        mat[y][0][2] = 0
        mat[y][0][3] = y - 1

    for y in range(1, h) :
        for x in range(1, w) :
            distlist = [mat[y][x - 1][1], mat[y - 1][x][1], 2 * mat[y - 1][x - 1][1]]
            mindist = min(distlist)
            idx = distlist.index(mindist)
            mat[y][x][1] = mat[y][x][0] + mindist
            if idx == 0 :
                mat[y][x][2] = x - 1
                mat[y][x][3] = y
            elif idx == 1 :
                mat[y][x][2] = x
                mat[y][x][3] = y - 1
            else :
                mat[y][x][2] = x - 1
                mat[y][x][3] = y - 1

    result = mat[h - 1][w - 1]
    retval = result[1]
    path = [(w - 1, h - 1)]
    while True :
        x = result[2]
        y = result[3]
        path.append((x, y))

        result = mat[y][x]
        if x == 0 and y == 0 :
            break
        
    #print_matrix(mat)

    return retval, sorted(path)

def display(s1, s2) :

    val, path = dtw(s1, s2, dist_for_float)
    
    w = len(s1)
    h = len(s2)
    
    mat = [[1] * w for i in range(h)]
    for node in path :
        x, y = node
        mat[y][x] = 0

    mat = numpy.array(mat)
    
    plt.subplot(2, 2, 2)
    c = plt.pcolor(mat, edgecolors='k', linewidths=4)
    plt.title('Dynamic Time Warping (%f)' % val)

    plt.subplot(2, 2, 1)
    plt.plot(s2, range(len(s2)), 'g')
    
    plt.subplot(2, 2, 4)
    plt.plot(range(len(s1)), s1, 'r')
        

    plt.show()
    

s1 = [1, 2, 3, 4, 5, 5, 5, 4]
s2 = [3, 4, 5, 5, 5, 4]
s2 = [1, 2, 3, 4, 5, 5]
s2 = [2, 3, 4, 5, 5, 5]
#val, path = dtw(s1, s2, dist_for_float)
display(s1, s2)


4,代码实现-python  code2-只计算距离

import numpy as np
from numpy import *
from numpy.matlib import repmat


def DTW(r, t, plt=True):
    M = len(r)
    N = len(t)
    r_array = np.matrix(r)
    t_array = np.matrix(t)
    ### 距离计算公式
    distance_classical = abs(repmat(r_array.T, 1, N) - repmat(t_array, M, 1))
    D = np.zeros(shape(distance_classical))
    for m in range(1, M):
        D[m, 0] = distance_classical[m, 0] + D[m - 1, 0]
    for n in range(1, N):
        D[0, n] = distance_classical[0, n] + D[0, n - 1]
    for m in range(1, M):
        for n in range(1, N):
            D[m, n] = distance_classical[m, n] + min(D[m - 1, n], min(D[m - 1, n - 1], D[m, n - 1]))
    Dist = D[m, n]




你可能感兴趣的:(数据处理)