



PCA(Principal Component Analysis,主成分分析),简单点说就是利用计算出来的较少的主成分变量,来代替原始数据所有维度,以达到数据降维的目的。关于PCA的文章和原理的介绍网上非常多,这里我就不做详细的描述了,推荐一个李民录博客里的PCA吧:http://blog.csdn.net/liminlu0314/article/details/8957009


原始图像--->重采样到全色图像大小---> PCA变换得到主成分图像------>用全色图像“替换”第一主成分----->PCA反变换





    CPLSetConfigOption( "GDAL_FILENAME_IS_UTF8", "NO" );
    // 打开多波段图像
    multiDataset = ( GDALDataset* )GDALOpen( multiFilename.c_str(), GA_ReadOnly );
    if( multiDataset == NULL )
        cout << "打开多波段图像失败!" << endl;
        throw multiFilename + "file not valid";
    int multiWidth = multiDataset->GetRasterXSize();
    int multiHeight = multiDataset->GetRasterYSize();
    int multiBandCount = multiDataset->GetRasterCount();
    // 验证是否为4波段数据
    if ( multiBandCount != 4 )
        cout << "图像波段数有误,目前只能处理4波段图像!" << endl;
        throw multiFilename + "file not valid";
    // 打开高分辨率图像
    highResDataset = ( GDALDataset* )GDALOpen( highRSFilename.c_str(), GA_ReadOnly );
    if( highResDataset == NULL )
        cout << "打开高分辨率图像失败!" << endl;
        throw highRSFilename + "file not valid";
    int highResWidth = highResDataset->GetRasterXSize();
    int highResHeight = highResDataset->GetRasterYSize();
    // 判断两幅图像是否等大小
    if ( highResHeight != multiHeight || highResWidth != multiWidth )
        cout << "图像大小不一致" << endl;
        throw multiFilename + "and" + highRSFilename + "don't match...";





    double* bandMean = calMean( multiDataset );// 计算波段均值
    double* covMatrix = calCovMatrix( multiDataset, bandMean );// 计算协方差矩阵
/// 计算图像波段均值.
/// 图像数据集.
/// double * 图像均值向量.
double* PcaFusion::calMean( GDALDataset * dataset )
    double* bandMean = new double [this->bandCount];
    for ( int i = 0; i < this->bandCount; i++ )
        double dMaxValue, dMinValue;
        multiDataset->GetRasterBand( i + 1 )->ComputeStatistics( FALSE, &dMinValue, &dMaxValue, bandMean + i, 0, NULL, NULL );
    if ( bandMean == NULL )
        cout << "统计波段均值失败!" << endl;
        return NULL;
    return bandMean;
/// 计算协方差矩阵.
/// 图像数据集.
/// 图像波段均值向量.
/// double * 图像协方差矩阵.
double* PcaFusion::calCovMatrix( GDALDataset * dataset, double * bandMean )
    double *dCovariance = new double[this->bandCount * this->bandCount];
    int index = 0;
    for ( int i = 0; i < this->bandCount; i++ )
        float* poData1 = new float[ this->height * this->width];
        int bandList = {i + 1};
        multiDataset->RasterIO( GF_Read, 0, 0, this->width, this->height, poData1, this->width, this->height, GDT_Float32, 1, &bandList, 0, 0, 0 );
        for ( int j = 0; j < this->bandCount; j++ )
            float* poData2 = new float[ this->height * this->width];
            int bandList = {j + 1};
            multiDataset->RasterIO( GF_Read, 0, 0, this->width, this->height, poData2, this->width, this->height, GDT_Float32,  1, &bandList, 0, 0, 0  );
            double sum = 0;
            for ( int pix = 0; pix < this->height * this->width; pix++ )
                sum += ( poData1[pix] - bandMean[i] ) * ( poData2[pix] - bandMean[j] );
            dCovariance[index++] = sum * 1.0 / (  this->height * this->width - 1 );
    return dCovariance;




// 计算协方差所形成的矩阵的特征值与特征向量
    double eps = 0.0001;   //控制精度要求
    double *eigenVector = new double[this->bandCount * this->bandCount];
    eejcb( covMatrix, this->bandCount, eigenVector, eps, 100000 );
// 统计img最值
    double imgMax = -100000, imgMin = 100000;
    for ( int index = 0; index < width * height; index++ )
        if ( imgMax < img[index] )
            imgMax = img[index];
        if ( imgMin > img[index] )
            imgMin = img[index];
    // 统计ref最值
    double refMax = -100000, refMin = 100000;
    for ( int index = 0; index < width * height; index++ )
        if ( refMax < ref[index] )
            refMax = ref[index];
        if ( refMin > ref[index] )
            refMin = ref[index];
    // 变换img元素值到ref元素值范围
    for ( int i = 0; i < width * height; i++ )
        img[i] = ( img[i] - imgMin ) / ( imgMax - imgMin );
        img[i] = img[i] * ( refMax - refMin ) + refMin;
    // 再次统计img最值
    imgMax = -100000, imgMin = 100000;
    for ( int index = 0; index < width * height; index++ )
        if ( imgMax < img[index] )
            imgMax = img[index];
        if ( imgMin > img[index] )
            imgMin = img[index];
    // 将img和ref复制一份,分别把复制的数组乘以factor,变成整型
    int* imgCopy = new int[width * height];
    int* refCopy = new int[width * height];
    for ( int i = 0; i < width * height; i++ )
        imgCopy[i] = ( int )( img[i] * factor );
        refCopy[i] = ( int )( ref[i] * factor );
    delete ref;
    int imgCopyMax = imgMax * factor;
    int imgCopyMin = imgMin * factor;
    int refCopyMax = refMax * factor;
    int refCopyMin = refMin * factor;
    // 分别统计两幅影像的直方图
    int length = imgCopyMax - imgCopyMin + 1;
    int* imgHist = new int[length];
    int* refHist = new int[length];
    // 清零
    for( int i = 0; i < length; i++ )
        imgHist[i] = 0;
        refHist[i] = 0;
    for ( int i = 0; i < width * height; i++ )
        int val = imgCopy[i] - imgCopyMin;
        imgHist[val] += 1;
        int val2 = refCopy[i] - imgCopyMin;
        refHist[val2] += 1;


注意以上代码中,我将原始浮点型的数据乘上了一个 factor ,变成了整型,这个很关键。这个 factor 的默认值我设置的 100


下面我们再来具体说matchHistogram()函数,要匹配直方图,需要先利用直方图得到累积分布函数(cumulative distribution function),其实也就是统计每个直方图中值的概率分布,得到一个概率分布函数。


    for ( int i = 0; i < width * height; i++ )
        imgCopy[i] = fun[imgCopy[i] - imgCopyMin] + imgCopyMin;
        img[i] = imgCopy[i] / factor;





// 用高分辨率图像替换第一主分量
    int bandList = {1};
    float *highData = new float[this->width * this->height];
    highResDataset->RasterIO( GF_Read, 0, 0, this->width, this->height, highData, this->width, this->height, GDT_Float32, 1, &bandList, 0, 0, 0 );
    projToRange( highData, resAfterPCA[0] );// 这里调用的是统计直方图,并进行直方图匹配那部分的内容
    resAfterPCA[0] = highData;




// ***********************************************************************
// Author           : Jacory
// Created          : 11-14-2014
// Last Modified By : Jacory
// Last Modified On : 11-22-2014
// ***********************************************************************
//     Copyright (c) . All rights reserved.
// PCA融合类,实现将多波段遥感图像与全色波段图像进行融合
// 可处理浮点型图像数据
// ***********************************************************************
#pragma once


class GDALDataset;
class GDALRasterBand;

using namespace std;

class PcaFusion
    PcaFusion( string multiFilename, string highRSFilename, string saveName = "" );
    ~PcaFusion( void );
    // getter
    string getMultiName() {return multiFilename;};
    string getHighRSName() {return highRSFilename;};
    string getSaveName() {return saveName;};
    const char* getSaveFormat() const { return saveFormat; }
    int getFactor() {return factor;};
    // setter
    void setMultiName( string multi );
    void setHighRSName( string highRS );
    void setSaveName( string sName );
    void setSaveFormat( const char* val ) { saveFormat = val; }
    void setFactor( int val );
    // 统计波段均值
    double* calMean( GDALDataset* dataset );
    // 求图像矩阵协方差
    double* calCovMatrix( GDALDataset* dataset, double* bandMean );
    // 求实对称矩阵的特征值及特征向量的雅格比法
    bool eejcb( double a[], int n, double v[], double eps, int jt );
    // 矩阵转置
    void transMatrix( double *matrix, int m, int n );
    // 矩阵求逆
    void inverseMatrix( double *matrix, int n );
    // 线性拉伸
    void linearStretch( float** pResult, int width, int height, int bandCount );
    // PCA融合
    bool principalFusion();
    void projToRange( float* img, float* ref );
    // PCA变换
    float** PCATransform( float **imgMatrix, double* eigenVector );
    // PCA逆变换
    float** inversePCA( float **imgMatrix, double* eigenVector );
    // 按特征值大小排序特征向量
    void sortEigenVector( double* eigenVector, double* covAfterEejcb );
    // 保存图像
    void saveFile( float** pResult, const char* format = "GTiff" );
    // 得到累积分布函数
    double* cdf( int* h, int length );
    // 直方图匹配
    int* matchHistogram( int* hA, int* hR, int length );
    /// 多波段图像路径
    string multiFilename;
    /// 高分辨率图像路径
    string highRSFilename;
    /// 保存结果文件路径
    string saveName;
    /// 图像高度
    int height;
    /// 图像宽度
    int width;
    /// 图像波段数
    int bandCount;
    /// 保存图像的格式
    const char* saveFormat;
    /// 多波段图像数据集
    GDALDataset* multiDataset;
    /// 高分辨率图像数据集
    GDALDataset* highResDataset;
    /// 用于控制直方图匹配精度的系数,从100到10000,值越大,执行速度越慢,默认为100
    int factor;
#include "PcaFusion.h"
#include "gdal_priv.h"

using namespace std;

PcaFusion::PcaFusion( string multiFilename, string highRSFilename, string saveName /*= "" */ )
    : multiFilename( multiFilename ), highRSFilename( highRSFilename ), saveName( saveName )
    CPLSetConfigOption( "GDAL_FILENAME_IS_UTF8", "NO" );
    // 打开多波段图像
    multiDataset = ( GDALDataset* )GDALOpen( multiFilename.c_str(), GA_ReadOnly );
    if( multiDataset == NULL )
        cout << "打开多波段图像失败!" << endl;
        throw multiFilename + "file not valid";
    int multiWidth = multiDataset->GetRasterXSize();
    int multiHeight = multiDataset->GetRasterYSize();
    int multiBandCount = multiDataset->GetRasterCount();
    // 验证是否为4波段数据
    if ( multiBandCount != 4 )
        cout << "图像波段数有误,目前只能处理4波段图像!" << endl;
        throw multiFilename + "file not valid";
    // 打开高分辨率图像
    highResDataset = ( GDALDataset* )GDALOpen( highRSFilename.c_str(), GA_ReadOnly );
    if( highResDataset == NULL )
        cout << "打开高分辨率图像失败!" << endl;
        throw highRSFilename + "file not valid";
    int highResWidth = highResDataset->GetRasterXSize();
    int highResHeight = highResDataset->GetRasterYSize();
    // 判断两幅图像是否等大小
    if ( highResHeight != multiHeight || highResWidth != multiWidth )
        cout << "图像大小不一致" << endl;
        throw multiFilename + "and" + highRSFilename + "don't match...";
    this->bandCount = 4;
    this->height = multiHeight;
    this->width = multiWidth;
    this->factor = 100;
    this->saveFormat = "GTiff";// 默认保存图像为GTiff格式

PcaFusion::~PcaFusion( void )
    GDALClose( multiDataset );
    GDALClose( highResDataset );

void PcaFusion::setMultiName( string multi )
    if ( multi == "" )
        cout << "multi file name is empty..." << endl;
    this->multiFilename = multi;

void PcaFusion::setHighRSName( string highRS )
    if ( highRS == "" )
        cout << "high resolution file name is empty..." << endl;
    this->highRSFilename = highRS;

void PcaFusion::setSaveName( string sName )
    if ( sName == "" )
        cout << "save file name is empty..." << endl;
    this->saveName = sName;

/// 矩阵转置.
/// 矩阵数组指针.
/// 矩阵行数.
/// 矩阵列数.
void PcaFusion::transMatrix( double *matrix, int m, int n )
    if( ( p = new double[m * n] ) == NULL )
    double temp = 0;
    for( int i = 0; i < m; i++ )
        for( int j = 0; j < n; j++ )
            *( p + i * n + j ) = *( matrix + j * m + i );
    for( int i = 0; i < m; i++ )
        for( int j = 0; j < n; j++ )
            *( matrix + i * n + j ) = *( p + i * n + j );
    delete []p;

/// 矩阵求逆.
/// 矩阵数组指针.
/// 矩阵阶数.
void PcaFusion::inverseMatrix( double *matrix, int n )
    int *is, *js, i, j, k, l, u, v;
    double d, p;
    is = new int[n * sizeof( int )];
    js = new int[n * sizeof( int )];
    for ( k = 0; k <= n - 1; k++ )
        d = 0.0;
        for ( i = k; i <= n - 1; i++ )
            for ( j = k; j <= n - 1; j++ )
                l = i * n + j;
                p = fabs( matrix[l] );
                if ( p > d ) { d = p; is[k] = i; js[k] = j;}
        if ( d + 1.0 == 1.0 )
            delete []is;
            delete []js;
            printf( "err**not inv\n" );
        if ( is[k] != k )
            for ( j = 0; j <= n - 1; j++ )
                u = k * n + j;
                v = is[k] * n + j;
                p = matrix[u];
                matrix[u] = matrix[v];
                matrix[v] = p;
        if ( js[k] != k )
            for ( i = 0; i <= n - 1; i++ )
                u = i * n + k;
                v = i * n + js[k];
                p = matrix[u];
                matrix[u] = matrix[v];
                matrix[v] = p;
        l = k * n + k;
        matrix[l] = 1.0 / matrix[l];
        for ( j = 0; j <= n - 1; j++ )
            if ( j != k )
            { u = k * n + j; matrix[u] = matrix[u] * matrix[l];}
        for ( i = 0; i <= n - 1; i++ )
            if ( i != k )
                for ( j = 0; j <= n - 1; j++ )
                    if ( j != k )
                        u = i * n + j;
                        matrix[u] = matrix[u] - matrix[i * n + k] * matrix[k * n + j];
        for ( i = 0; i <= n - 1; i++ )
            if ( i != k )
            { u = i * n + k; matrix[u] = -matrix[u] * matrix[l];}
    for ( k = n - 1; k >= 0; k-- )
        if ( js[k] != k )
            for ( j = 0; j <= n - 1; j++ )
                u = k * n + j;
                v = js[k] * n + j;
                p = matrix[u];
                matrix[u] = matrix[v];
                matrix[v] = p;
        if ( is[k] != k )
            for ( i = 0; i <= n - 1; i++ )
                u = i * n + k;
                v = i * n + is[k];
                p = matrix[u];
                matrix[u] = matrix[v];
                matrix[v] = p;
    delete []is;
    delete []js;

/// 线性拉伸.
/// 图像矩阵.
/// 图像宽.
/// 图像高.
/// 图像波段数.
void PcaFusion::linearStretch( float** pResult, int width, int height, int bandCount )
    for ( int i = 0; i < bandCount; i++ )
        double dMaxValue = -1000, dMinValue = 1000;
        for ( int index = 0; index < width * height; index++ )
            if ( dMaxValue < pResult[i][index] )
                dMaxValue = pResult[i][index];
            if ( dMinValue > pResult[i][index] )
                dMinValue = pResult[i][index];
        for( int j = 0; j < width * height; j++ )
            if ( dMaxValue - dMinValue < 255 )
                pResult[i][j] = pResult[i][j] - dMinValue;
                pResult[i][j] = ( pResult[i][j] - dMinValue ) * 255.0 / ( dMaxValue - dMinValue );

/// PCA融合.
bool PcaFusion::principalFusion()
    if ( multiDataset == NULL || highResDataset == NULL || saveName == "" )
        cout << "数据集读取失败..." << endl;
        throw  "read data failed.";
    double* bandMean = calMean( multiDataset );// 计算波段均值
    double* covMatrix = calCovMatrix( multiDataset, bandMean );// 计算协方差矩阵
    // 计算协方差所形成的矩阵的特征值与特征向量
    double eps = 0.0001;   //控制精度要求
    double *eigenVector = new double[this->bandCount * this->bandCount];
    eejcb( covMatrix, this->bandCount, eigenVector, eps, 100000 );
    // 按特征值由大到小的顺序排列特征向量
    sortEigenVector( eigenVector, covMatrix );
    /*double eigenVector[] = {0.552398846622175, 0.514249636770153,	0.555078608019249,	-0.349700678082944,
                            0.552839196696649,	0.287442545809982,	-0.388975754303186,	0.678559848516214,
                            0.504571737069979,	-0.347417512377672,	-0.542438417723444,	-0.574864329404061,
                            0.366921924932919,	-0.729537638530976,	0.496332715485658,	0.294613255826687
    /*double eigenVector[] = {1, 0, 0, 0,
    0, 1, 0, 0,
    0, 0, 1, 0,
    0, 0, 0, 1
    //transMatrix( eigenVector, bandCount, bandCount );
    // 构造融合结果矩阵
    float** pResult = new float*[this->bandCount];
    for ( int band = 0; band < this->bandCount; band++ )
        pResult[band] = new float[this->width * this->height];
        int bandList = {band + 1};
        multiDataset->RasterIO( GF_Read, 0, 0, this->width, this->height, pResult[band], this->width, this->height, GDT_Float32, 1, &bandList, 0, 0, 0 );
    // 将多光谱图像进行主分量变换
    float** resAfterPCA = PCATransform( pResult, eigenVector );
    delete []pResult;
    // 用高分辨率图像替换第一主分量
    int bandList = {1};
    float *highData = new float[this->width * this->height];
    highResDataset->RasterIO( GF_Read, 0, 0, this->width, this->height, highData, this->width, this->height, GDT_Float32, 1, &bandList, 0, 0, 0 );
    projToRange( highData, resAfterPCA[0] );
    resAfterPCA[0] = highData;
    // 主分量逆变换
    float** resAfterInversePCA = inversePCA( resAfterPCA, eigenVector );
    delete []resAfterPCA;
    // 将结果写入图像
    saveFile( resAfterInversePCA, saveFormat );
    return true;

void PcaFusion::projToRange( float* img, float* ref )
    // 统计img最值
    double imgMax = -100000, imgMin = 100000;
    for ( int index = 0; index < width * height; index++ )
        if ( imgMax < img[index] )
            imgMax = img[index];
        if ( imgMin > img[index] )
            imgMin = img[index];
    // 统计ref最值
    double refMax = -100000, refMin = 100000;
    for ( int index = 0; index < width * height; index++ )
        if ( refMax < ref[index] )
            refMax = ref[index];
        if ( refMin > ref[index] )
            refMin = ref[index];
    // 变换img元素值到ref元素值范围
    for ( int i = 0; i < width * height; i++ )
        img[i] = ( img[i] - imgMin ) / ( imgMax - imgMin );
        img[i] = img[i] * ( refMax - refMin ) + refMin;
    // 再次统计img最值
    imgMax = -100000, imgMin = 100000;
    for ( int index = 0; index < width * height; index++ )
        if ( imgMax < img[index] )
            imgMax = img[index];
        if ( imgMin > img[index] )
            imgMin = img[index];
    // 将img和ref复制一份,分别把复制的数组乘以factor,变成整型
    int* imgCopy = new int[width * height];
    int* refCopy = new int[width * height];
    for ( int i = 0; i < width * height; i++ )
        imgCopy[i] = ( int )( img[i] * factor );
        refCopy[i] = ( int )( ref[i] * factor );
    delete ref;
    int imgCopyMax = imgMax * factor;
    int imgCopyMin = imgMin * factor;
    int refCopyMax = refMax * factor;
    int refCopyMin = refMin * factor;
    // 分别统计两幅影像的直方图
    int length = imgCopyMax - imgCopyMin + 1;
    int* imgHist = new int[length];
    int* refHist = new int[length];
    // 清零
    for( int i = 0; i < length; i++ )
        imgHist[i] = 0;
        refHist[i] = 0;
    for ( int i = 0; i < width * height; i++ )
        int val = imgCopy[i] - imgCopyMin;
        imgHist[val] += 1;
        int val2 = refCopy[i] - imgCopyMin;
        refHist[val2] += 1;
    int* fun = matchHistogram( imgHist, refHist, length );
    delete refHist;
    delete imgHist;
    delete refCopy;
    for ( int i = 0; i < width * height; i++ )
        imgCopy[i] = fun[imgCopy[i] - imgCopyMin] + imgCopyMin;
        img[i] = imgCopy[i] / factor;
    delete imgCopy;

void PcaFusion::setFactor( int val )
    if ( val < 100 )
        cout << "factor参数设置太小,精度会很低。将会采用默认值100" << endl;
        factor = 100;
    else if ( val > 10000 )
        cout << "factor参数设置太大,执行速度非常慢。将会采用最大推荐值10000" << endl;
        factor = 10000;
        factor = val;


