头文件:
/* * Copyright (c) 2008-2011 Zhang Ming (M. Zhang), [email protected] * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation, either version 2 or any later version. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. A copy of the GNU General Public License is available at: * http://www.fsf.org/licensing/licenses */ /***************************************************************************** * statistics.h * * Some generally used routines about probability and statistics, such as * median, mean, variance, standard variance, skew, kurtosis and the * PDF(probability density function). * * Zhang Ming, 2010-03, Xi'an Jiaotong University. *****************************************************************************/ #ifndef STATISTICS_H #define STATISTICS_H #include <vector.h> namespace splab { template<typename Type> Type mid( const Vector<Type>& ); template<typename Type> Type mean( const Vector<Type>& ); template<typename Type> Type var( const Vector<Type>& ); template<typename Type> Type stdVar( const Vector<Type>& ); template<typename Type> Vector<Type> standard( const Vector<Type>& ); template<typename Type> Type skew( const Vector<Type>& ); template<typename Type> Type kurt( const Vector<Type>& ); template<typename Type> Vector<Type> pdf( Vector<Type>&, const Type lambda=Type(1.0) ); #include <statistics-impl.h> } // namespace splab #endif // STATISTICS_H
实现文件:
/* * Copyright (c) 2008-2011 Zhang Ming (M. Zhang), [email protected] * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation, either version 2 or any later version. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. A copy of the GNU General Public License is available at: * http://www.fsf.org/licensing/licenses */ /***************************************************************************** * statistics-impl.h * * Implementation for statistics functions. * * Zhang Ming, 2010-03, Xi'an Jiaotong University. *****************************************************************************/ /** * Get the median value of a sequence. */ template <typename Type> Type mid( const Vector<Type> &v ) { Vector<Type> tmp(v); int index, N = tmp.size(), K = N/2; for( int k=0; k<=K; ++k ) { index = k; for( int i=k+1; i<N; ++i ) if( tmp[index] < tmp[i] ) index = i; if( index != k ) swap( tmp[index], tmp[k] ); } return tmp[K]; } /** * Mean value of a (random) sequence. */ template <typename Type> inline Type mean( const Vector<Type> &v ) { return sum(v) / v.dim(); } /** * Variance of a (random) sequence. */ template <typename Type> Type var( const Vector<Type> &v ) { int N = v.dim(); Type m = mean( v ); Type var = 0; for( int i=0; i<N; ++i ) var += ( v[i]-m ) * ( v[i]-m ); if( N>1 ) var /= N-1; // var /= N; return var; } /** * Standard variance of a (random) sequence. */ template <typename Type> inline Type stdVar( const Vector<Type> &v ) { return sqrt( var(v) ); } /** * Make Standardization of a (random) sequence. */ template <typename Type> inline Vector<Type> standard( const Vector<Type> &v ) { return (v-mean(v)) / stdVar(v); } /** * Skew of a (random) sequence. */ template <typename Type> Type skew( const Vector<Type> &v ) { Type m = mean(v), s = stdVar(v), tmp = 0, result = 0; for( int i=0; i<v.dim(); ++i ) { tmp = (v[i]-m); result += tmp * tmp * tmp; } result /= v.size()*s*s*s; return result; } /** * Kurtosis of a (random) sequence. */ template <typename Type> Type kurt( const Vector<Type> &v ) { Type m = mean(v), d = var(v), tmp = 0, result = 0; for( int i=0; i<v.dim(); ++i ) { tmp = (v[i]-m); result += tmp * tmp * tmp * tmp; } result /= v.size()*d*d; return result-3; } /** * Fast algorithm of PDF(probability density function) estimation * of a random sequence. The binning method and third-order cardinal * spline kernel function are used for reducing the computation * amount. Lambda is the coefficient of kernel bandwidth "kb", the * optimal "kb"( 2.11/Lx^0.2 * std(xn) ) is chosen when "lambda=1.0". * If you want to compute more point's probability, you can use a * smaller "lambda", such as 0.5, in this case you'd better make a * smoothing for the result. */ template <typename Type> Vector<Type> pdf( Vector<Type> &xn, const Type lambda ) { int i, k, tmpInt, Lpx, Lx = xn.size(); Type tmpDbl, kb = Type( lambda * 2.107683*stdVar(xn)/pow(Lx, 0.2) ); Vector<Type> r(Lx); Vector<int> pos(Lx), index(Lx); // binning the data into blocks, each block width is "kb" for( i=0; i<Lx; ++i ) { tmpDbl = xn[i] / kb; pos[i] = int(tmpDbl); r[i] = tmpDbl - pos[i]; } // compute the blocks index tmpInt = 1 - min(pos); for( i=0; i<Lx; ++i ) index[i] = pos[i]+tmpInt; // weights for left, middle and right block Vector<Type> weightl(Lx), weightc(Lx), weightr(Lx); for( i=0; i<Lx; ++i ) { tmpDbl = r[i]; weightl[i] = (1-tmpDbl)*(1-tmpDbl) / Type(2.0); weightc[i] = Type(0.5) + tmpDbl*(1-tmpDbl); weightr[i] = tmpDbl*tmpDbl / Type(2.0); } Lpx = max(index) - min(index) + 3; Vector<Type> px(Lpx); // compute probability density function for( i=0; i<Lx; ++i ) { k = index[i]; px[k-1] += weightl[i] / Lx ; px[k] += weightc[i] / Lx ; px[k+1] += weightr[i] / Lx ; } return px; }
测试代码:
/***************************************************************************** * statistics_test.cpp * * Statistics routines testing. * * Zhang Ming, 2010-03, Xi'an Jiaotong University. *****************************************************************************/ #define BOUNDS_CHECK #include <iostream> #include <iomanip> #include <random.h> #include <statistics.h> using namespace std; using namespace splab; typedef double Type; const int N = 1000; int main() { Vector<Type> xn = randn( 37, Type(0.0), Type(1.0), N ); cout << setiosflags(ios::fixed) << setprecision(4); cout << "The minimum maximum and median value of the sequence." << endl; cout << min(xn) << endl << max(xn) << endl << mid(xn) <<endl << endl; cout << "The mean, variance and standard variance of the sequence." << endl; cout << mean(xn) << endl << var(xn) << endl << stdVar(xn) << endl << endl; Vector<Type> yn = xn - mean(xn); cout << "The skew and kurtosis of the sequence." << endl; cout << skew(yn) << endl << kurt(yn) << endl << endl; cout << "The PDF of the sequence." << endl; cout << pdf(yn) << endl; return 0; }
运行结果:
The minimum maximum and median value of the sequence. -3.6176 3.1326 0.0294 The mean, variance and standard variance of the sequence. 0.0167 1.0343 1.0170 The skew and kurtosis of the sequence. -0.1354 -0.0274 The PDF of the sequence. size: 14 by 1 0.0024 0.0026 0.0142 0.0406 0.0864 0.1595 0.2216 0.1322 0.1923 0.0903 0.0426 0.0127 0.0024 0.0003 Process returned 0 (0x0) execution time : 0.109 s Press any key to continue.