计算样本集的样本间距离矩阵,一对好基友(常同时出现,squareform 的参数就是 pdist 的返回值,没有取平方的意思,只是将 pdist 返回的一维形式,拓展为方阵)
from scipy.spatial.distance pdist, squareform
sq_dists = pdist(X, metric='sqeuclidean')
# X.shape = (n_samples, n_features)
# sq_dists.shape = ((1+n-1)*(n-1)/2, ),是一个一维向量
mat_sq_dists = squareform(sq_dists)
# mat_sq_dists.shape = (n_samples, n_samples)
# 主对角线元素为0,是一个对称矩阵,
# 从 pdist 转化而来;
from scipy.special import comb
# 排列数
from scipy.special import perm
# 组合
from scipy.misc import factorial
# 阶乘
# 返回为numpy.ndarray
# 自然它可接受多个数,分别计算其阶乘
>>> comb(10, 2)
# 45.0
>>> factorial(10)
array(3628800.0)
>>> factorial([1, 2, 3, 4, 5])
array([ 1., 2., 6., 24., 120.])
## integrate:积分
注:scipy.misc
也有comb
、factoral
,但没有perm
from scipy import integrate
尤其是integrate.quad
可用来计算定积分,进而计算 CDF(cumulative density function),
如 Gamma 分布的概率密度函数为:
from scipy.special import gamma
def gamma_pdf(x, alpha, beta):
return beta**alpha*x**(alpha-1)*np.exp(-beta*x)/gamma(alpha)
def gamma_cdf(x, alpha, beta):
return [integrate.quad(gamma_pdf, 0, t, args=(alpha, beta)) for t in x]
def main():
params = [(1, 2), (2, 2), (3, 2), (5, 1), (9, .5), (7.5, 1), (0.5, 1.)]
x = np.arange(0.01, 20, .01)
plt.figure()
for alpha, beta in params:
plt.plot(x, gamma_pdf(x, alpha, 1/beta), label=r'$\alpha={:.1f},\beta={:.1f}$'.format(alpha, beta), lw=2)
plt.legend(loc='best', frameon=False)
plt.show()
plt.figure()
for alpha, beta in params:
plt.plot(x, gamma_cdf(x, alpha, beta), label=r'$\alpha={:.1f},\beta={:.1f}$'.format(alpha, beta), lw=2)
plt.legend(loc='best', frameon=False)
plt.show()
if __name__ == '__main__':
main()