from dtaidistance import dtw
from dtaidistance import dtw_visualisation as dtwvis
import numpy as np
import matplotlib.pyplot as plt
s1 = np.array([0., 0, 1, 2, 1, 0, 1, 0, 0, 2, 1, 0, 0])
s2 = np.array([0., 1, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0, 0])
path=dtw.warping_path(s1,s2)
dtwvis.plot_warping(s1,s2,path)
path
'''
[(0, 0),
(1, 0),
(2, 1),
(3, 2),
(3, 3),
(4, 4),
(5, 5),
(6, 5),
(7, 6),
(8, 7),
(9, 8),
(10, 9),
(11, 10),
(11, 11),
(12, 12)]
'''
s1 = np.array([0., 0, 1, 2, 1, 0, 1, 0, 0, 2, 1, 0, 0])
s2 = np.array([0., 1, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0, 0])
dtw.distance(s1,s2)
#1.4142135623730951
最快的版本(比其他版本快30至300倍)直接使用C语言编写,但需要数组作为输入(使用双精度类型),并且(可选地)通过将max_dist设置为欧几里得距离的上界来剪枝计算
s1 = np.array([0., 0, 1, 2, 1, 0, 1, 0, 0, 2, 1, 0, 0])
s2 = np.array([0., 1, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0, 0])
dtw.distance_fast(s1,s2)
#1.4142135623730951
s1 = np.array([0., 0, 1, 2, 1, 0, 1, 0, 0, 2, 1, 0, 0])
s2 = np.array([0., 1, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0, 0])
dtw.distance_fast(s1,s2,use_pruning=True)
#1.4142135623730951
距离函数具有线性空间复杂度但二次时间复杂度。
s1 = np.array([0., 0, 1, 2, 1, 0, 1, 0, 0, 2, 1, 0, 0])
s2 = np.array([0., 1, 2, 3, 1, 0, 0, 0, 2, 1, 0, 0, 0])
distance,matrix=dtw.warping_paths(s1,s2)
distance
#1.4142135623730951
matrix.shape
#(14, 14)
dtw.best_path(matrix)
'''
[(0, 0),
(1, 0),
(2, 1),
(3, 2),
(3, 3),
(4, 4),
(5, 5),
(6, 5),
(7, 6),
(8, 7),
(9, 8),
(10, 9),
(11, 10),
(11, 11),
(12, 12)]
'''
这个其实和我们前面的warping_path是一样的
dtw.warping_path(s1,s2)
'''
dtw.warping_path(s1,s2)
'''
dtwvis.plot_warpingpaths(s1,s2,matrix,dtw.warping_path(s1,s2))
dtw.distance_matrix
方法dtw.distance_matrix_fast
方法可以加速计算,该方法试图在C语言环境中运行所有算法from dtaidistance import dtw
import numpy as np
timeseries = [
np.array([0, 0, 1, 2, 1, 0, 1, 0, 0], dtype=np.double),
np.array([0.0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0]),
np.array([0.0, 0, 1, 2, 1, 0, 0, 0])]
ds = dtw.distance_matrix(timeseries)
ds
'''
array([[0. , 1.41421356, 1. ],
[1.41421356, 0. , 1. ],
[1. , 1. , 0. ]])
'''
输入为一个列表的列表
可以将ds转化成上三角矩阵的值,节省空间
from dtaidistance import dtw
import numpy as np
timeseries = [
np.array([0, 0, 1, 2, 1, 0, 1, 0, 0], dtype=np.double),
np.array([0.0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0]),
np.array([0.0, 0, 1, 2, 1, 0, 0, 0])]
ds = dtw.distance_matrix(timeseries,compact=True)
ds
#array('d', [1.4142135623730951, 1.0, 1.0])