近红外光谱分析技术属于交叉领域,需要化学、计算机科学、生物科学等多领域的合作。为此,在(北邮邮电大学杨辉华老师团队)指导下,近期准备开源传统的pls,svm,ann,rf等经典算法,以及sg,msc,一阶导,二阶导等预处理以及ga等波长选择算法,和CNN、AE等最新深度学习算法,以帮助其他专业的更容易建立具有良好预测能力和鲁棒性的近红外光谱模型。
本篇主要讲述基于python语言的光谱预处理方法,稍后更新matlab语言版本的光谱预处理方法,
# 最大最小值归一化
def MMS(data):
return MinMaxScaler().fit_transform(data)
# 标准化
def SS(data):
return StandardScaler().fit_transform(data)
# 均值中心化
def CT(data):
for i in range(data.shape[0]):
MEAN = np.mean(data[i])
data[i] = data[i] - MEAN
return data
# 标准正态变换
def SNV(data):
m = data.shape[0]
n = data.shape[1]
print(m, n) #
# 求标准差
data_std = np.std(data, axis=1) # 每条光谱的标准差
# 求平均值
data_average = np.mean(data, axis=1) # 每条光谱的平均值
# SNV计算
data_snv = [[((data[i][j] - data_average[i]) / data_std[i]) for j in range(n)] for i in range(m)]
return data_snv
# 移动平均平滑
def MA(a, WSZ=21):
for i in range(a.shape[0]):
out0 = np.convolve(a[i], np.ones(WSZ, dtype=int), 'valid') / WSZ # WSZ是窗口宽度,是奇数
r = np.arange(1, WSZ - 1, 2)
start = np.cumsum(a[i, :WSZ - 1])[::2] / r
stop = (np.cumsum(a[i, :-WSZ:-1])[::2] / r)[::-1]
a[i] = np.concatenate((start, out0, stop))
return a
# Savitzky-Golay平滑滤波
def SG(data, w=21, p=3):
return signal.savgol_filter(data, w, p)
# 一阶导数
def D1(data):
n, p = data.shape
Di = np.ones((n, p - 1))
for i in range(n):
Di[i] = np.diff(data[i])
return Di
# 二阶导数
def D2(data):
n, p = data.shape
Di = np.ones((n, p - 2))
for i in range(n):
Di[i] = np.diff(np.diff(data[i]))
return Di
# 趋势校正(DT)
def DT(data):
x = np.asarray(range(350, 2501), dtype=np.float32)
out = np.array(data)
l = LinearRegression()
for i in range(out.shape[0]):
l.fit(x.reshape(-1, 1), out[i].reshape(-1, 1))
k = l.coef_
b = l.intercept_
for j in range(out.shape[1]):
out[i][j] = out[i][j] - (j * k + b)
return out
# MSC(数据)
def MSC(Data):
# 计算平均光谱
n, p = Data.shape
msc = np.ones((n, p))
for j in range(n):
mean = np.mean(Data, axis=0)
# 线性拟合
for i in range(n):
y = Data[i, :]
l = LinearRegression()
l.fit(mean.reshape(-1, 1), y.reshape(-1, 1))
k = l.coef_
b = l.intercept_
msc[i, :] = (y - b) / k
return msc
代码如下(示例):
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
推荐基于anaconda安装python,参考安装如下:
基于anaconda安装python
import numpy as np
import matplotlib.pyplot as plt
from scipy import signal
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler, StandardScaler
#载入数据
data_path = './/data//data.csv' #数据
xcol_path = './/data//xcol.csv' #波长
data = np.loadtxt(open(data_path, 'rb'), dtype=np.float64, delimiter=',', skiprows=0)
xcol = np.loadtxt(open(xcol_path, 'rb'), dtype=np.float64, delimiter=',', skiprows=0)
# 绘制MSC预处理后图片
plt.figure(500)
x_col = xcol #数组逆序
y_col = np.transpose(data)
plt.plot(x_col, y_col)
plt.xlabel("Wavenumber(nm)")
plt.ylabel("Absorbance")
plt.title("The spectrum of the raw for dataset",fontweight= "semibold",fontsize='large') #记得改名字MSC
plt.show()
#数据预处理、可视化和保存
datareprocessing_path = './/data//dataMSC.csv' #波长
Data_Msc = MSC(data) #改这里的函数名就可以得到不同的预处理
# 绘制MSC预处理后图片
plt.figure(500)
x_col = xcol #数组逆序
y_col = np.transpose(Data_Msc)
plt.plot(x_col, y_col)
plt.xlabel("Wavenumber(nm)")
plt.ylabel("Absorbance")
plt.title("The spectrum of the MSC for dataset",fontweight= "semibold",fontsize='large') #记得改名字MSC
plt.show()
#保存预处理后的数据
np.savetxt(datareprocessing_path, Data_Msc, delimiter=',')
python代码参考湖南师范大学同学,完整代码可从获得GitHub仓库
代码仅供学术使用,如需问题,联系方式:QQ:1427950662,微信:Fu_siry