选择数据路径
以上三个文件为原始数据(Brain Products数据):
EEG脑电预处理的原理解释:http://t.csdnimg.cn/BI0Sx
import os
os.chdir("F:/data_Project/ml_part2/1_preprocessing/1_single_preprocessing/")
import mne
import numpy as np
#导入一例原始数据
#读取原始数据 mne区分是否是原始数据的方式是看数据的纬度
#二维的数据(连续数据)都是raw 分段过的数据都用epoch的形式读
#不知道用什么函数 可以打mne.io.read之后tab一下
raw = mne.io.read_raw_brainvision('1.vhdr',preload=True)
#eeg1 = mne.io.read_epochs_eeglab('1_LH.set')
#查看raw的基本信息
raw.info
#
#当前raw:64*439880,64个通道,439880个数据
print(raw)
#查看raw的函数、方法、属性
dir(raw)
#查看采样率信息
sampling_rate = raw.info['sfreq']‘、
#查看采样点信息(采样点个数)
n_time_samps = raw.n_times
#每个采样点对应的时间信息 单位是s
time_secs = raw.times
#查看通道信息(通道名/数量)
ch_names = raw.ch_names
n_ch = len(ch_names)
raw.plot()
#定义一次显示多少个通道 多长时间的数据 以及绘图的尺寸
#一般建议:60导左右的数据 用30e-6
raw.plot(n_channels= 64, duration=5, scalings = 30e-6)
#通道定位 绘制地形图
#查看数据通道信息
# print(raw.ch_names)
# montage = mne.channels.read_custom_montage('standard-10-5-cap385.elp')
# raw.set_montage(montage)
#由于数据中的原始通道名称 没有办法被成功定位
#所以需要修改通道的名称信息
#这里'standard-10-5-cap385.elp'是国际标准10-5系统脑电通道定位文件(注意大小写,把key改为value)
mapping = {'FP1':'Fp1', 'FPZ':'Fpz','FP2':'Fp2', 'FZ':'Fz',
'FCZ':'FCz', 'CZ':'Cz', 'CPZ':'CPz', 'PZ':'Pz',
'PO5':'PO5', 'POZ':'POz', 'PO6':'PO6', 'OZ':'Oz',
'HEO':'HEOG', 'VEO':'VEOG'}
#修改通道的名称,用copy不会改变原数据
raw_rename_ch = raw.copy().rename_channels(mapping)
#读取通道定位文件
montage = mne.channels.read_custom_montage('standard-10-5-cap385.elp')
#进行通道定位
raw_rename_ch.set_montage(montage)
#绘制通道排布图(电极位置)
raw_rename_ch.plot_sensors()
#设置绘制的参数:通道类型 通道名称 和脑子轮廓的大小
raw_rename_ch.plot_sensors(ch_type = 'eeg', show_names = True, sphere = 0.075)
#绘制频谱响应 规定频率的范围(脑电数据随着频率的升高,能量越低;但图中50有激增,这是工频干扰,需要滤波,把干扰凹陷掉)
raw_rename_ch.plot_psd(fmin = 1, fmax = 70,spatial_colors = True)
#去除无用电极 更改并选择通道类型
raw_select_ch = raw_rename_ch.copy()
#将HEOG和VEOG定义为眼电电极(此时info:chs: 62 EEG, 2 EOG)
raw_select_ch.set_channel_types({'HEOG':'eog','VEOG':'eog'})
raw_select_ch.info
#只保留eeg类型的数据(此时info:chs: 62 EEG)
raw_select_ch.pick(['eeg'])
raw_select_ch.info
若无用电极较多,可先去除电极再重命名通道。
复杂的数据可在eeglab中处理后,再导入python处理。
#滤波
#对数据进行0.1到30的带通滤波
#此处0.1-30Hz的滤波为有用数据,小于和大于的部分是衰减,而不是全部去除
raw_band = raw_select_ch.copy().filter(0.1,30,picks = 'eeg')
#绘制频谱响应图 方便观察滤波前后在频率响应上的变化
raw_band.plot_psd(fmin = 1, fmax = 70,spatial_colors = True)
#raw_band.plot()
#滤波后改变highpass: 0.1 Hz/lowpass: 30.0 Hz
raw_band.info
#凹陷(带阻)滤波 去除工频干扰 中国是50Hz 国外是60Hz
#以50Hz为中心频率点 带阻的宽度为4Hz --> 48-52的带阻
#当前50Hz部分影响不大 效果不明显
raw_band_notch = raw_band.notch_filter(50, notch_widths = 4)
raw_band_notch.plot_psd(fmin = 1, fmax = 70,spatial_colors = True)
#对数据进行降采样 info:sfreq: 500.0 Hz
#我们能分析到的最大频率是采样率的一半 但实际能准确分析道的频率是采样率的1/3~1/4
#降采样的好处是缩减数据量 节约计算和存储成本 但要注意降采样后的数据对频域分析的影响
#建议先滤波再降采样
raw_resampled = raw_band_notch.copy().resample(sfreq = 500)
raw_resampled.info
#分段
#读取数据的marker,也就是mne中的events_from_annotations
events_from_anno, event_dict = mne.events_from_annotations(raw_resampled)
#event_dict中:marker的类型和标记名称之间的关系
print(event_dict)
#events_from_anno中:标记出现的时间点 持续时间(都是0) 类型信息
print(events_from_anno)
#定义想要对什么marker数据进行分段
#event_dict中有四种刺激 只取两种刺激marker(10/11),不关心反映marker(100/200)
#{'Stimulus/10': 10001, 'Stimulus/100': 10002, 'Stimulus/11': 10003, 'Stimulus/200': 10004}
custom_mapping = {'Stimulus/10': 10, 'Stimulus/11': 11}
#更改数据中的marker信息(此时类型信息只有10/11)
(events_from_anno,event_dict) = mne.events_from_annotations(raw_resampled,event_id = custom_mapping)
print(event_dict)
print(events_from_anno)
#对数据进行分段(定义分段的长度 和基线的范围)
my_epochs = mne.Epochs(raw_resampled, events_from_anno, tmin = -0.2, tmax = 0.8,baseline = (-0.2,0))
#基线校正
my_epochs.apply_baseline()
my_epochs.info
#保存刚分段好的数据
#用pickel把my_epochs变量存成my_epochs.pkl文件
import pickle
output = open('my_epochs.pkl','wb')
pickle.dump(my_epochs, output)
output.close()
#读数据
#将my_epochs.pkl文件读进来 读取在my_epochs变量中
del my_epochs
pkl_file = open('my_epochs.pkl','rb')
my_epochs = pickle.load(pkl_file)
pkl_file.close()
my_epochs.info
#这里存储的目的是:对比查看去除环段的效果 并存储备份
#去除坏段
my_epochs_good = my_epochs.copy()
#绘制数据 分段前的数据(定义绘制数据的时间长度) 分段后的数据(定义绘制的分段个数)
my_epochs_good.plot(n_epochs = 5,n_channels= 64)
#在上面的图中可手动标红认为是坏段的数据
#下面的代码是去除标记过的坏段 恢复状态
my_epochs_good.drop_bad
my_epochs_good.info
#查看剩余分段的信息(此时一共100个,10的50个,11的50个)
print(len(my_epochs_good.events))
#插值坏电极(=去除坏通道) 类似上面直接点击标记
my_epochs_good.plot(n_epochs = 5,n_channels= 64)
good_ch = my_epochs_good.load_data().copy().interpolate_bads(reset_bads=True)
good_ch.info
good_ch.plot(n_epochs = 5,n_channels= 64)
#这里一个错误提醒 执行1选择bads 执行2的info中才有
#good_ch.plot(n_epochs = 5,n_channels= 64)
#good_ch.info
#去除坏段/坏通道需手动选取,工作量较大,需备份反复处理尝试
脑电数据ICA典型成分识别:http://t.csdnimg.cn/TQojJ
#ICA独立成分分析
#在做ICA时 意图是去伪迹 但ICA不是万能的
#比较好算出来的独立成分 是类似于眼电/肌点 在数据中对数据影响并不是非常大 而且规律出现的
#建议在ICA之前先进行坏段去除/坏导插值 将规律的眼电信息保留
from mne.preprocessing import (ICA)
ica_data = good_ch.copy()
#定义ICA独立成分分析的数量 = 实际的电极数量 - 坏通道的数量
#但是当电极很多的时候 可以只进行64个独立成分的分解
ica = ICA(n_components= 50)
ica.fit(ica_data)
#绘制ICA的独立成分图 只有地形图
ica.plot_components()
#图像的结果分析(越往后的成分影响越小):ICA000眨眼成分等等
#不要随意去除 去经典眨眼眼动头动等成分就可以
#绘制一个一个独立的成分属性图 range(0,50) [0,50)
ica.plot_properties(good_ch, picks= [0,1])
ica.plot_properties(good_ch, picks= np.array(range(0,50)))
#查看某个特定成分去除前后对数据的整体影响 方便判断是否选择正确
#红和黑是完全重合 没必要去除 有尖可能是眨眼等 需去除
ica.plot_overlay(raw_resampled, exclude=[0], picks='eeg')
output = open('ica.pkl','wb')
pickle.dump(ica, output)
output.close()
pkl_file = open('ica.pkl','rb')
my_epochs = pickle.load(pkl_file)
pkl_file.close()
#去除伪迹成分
#指定要去除的成分 以及查看plot确认 (ICA000就是0,成分是从0开始)
ica.exclude = [0]
ica.plot_components([0])
ica_clean = good_ch.copy()
#导入数据
ica_clean.load_data()
#根据刚才指定的编号去除伪迹成分
ica.apply(ica_clean)
#查看去除前后数据 对比效果
ica_clean.plot(n_epochs = 5, scalings = 30e-6)
good_ch.plot(n_epochs = 5, scalings = 30e-6)
output = open('ica_clean.pkl','wb')
pickle.dump(ica_clean, output)
output.close()
pkl_file = open('ica_clean.pkl','rb')
good_ch = pickle.load(pkl_file)
pkl_file.close()
#极端值去伪迹
extreme_values = ica_clean.copy()
#建立一个伪迹去除标准 100uv(一定要先ICA处理再极端值处理)
stronger_reject_criteria = dict(eeg=100e-6)
extreme_values.drop_bad(reject=stronger_reject_criteria)
#查看去除的各个分段的去除原因 人为/通道超过了自动拒绝标准
print(extreme_values.drop_log)
extreme_values.plot_drop_log()
output = open('extreme_values.pkl','wb')
pickle.dump(extreme_values, output)
output.close()
pkl_file = open('extreme_values.pkl','rb')
extreme_values = pickle.load(pkl_file)
pkl_file.close()
#重参考(选择参考很重要)
#在线记录的参考电极 > 重参考电极 > 要分析的电极 > 剩余电极
#重参考一般用双侧乳突['TP9', 'TP10'] 平均参考要先去除双侧乳突等非脑电电极 再进行平均参考
refered = extreme_values.copy().set_eeg_reference(ref_channels=['TP9', 'TP10'])
#refered = extreme_values.copy().set_eeg_reference(ref_channels='average')
output = open('refered.pkl','wb')
pickle.dump(refered, output)
output.close()
F:\data_Project\ml_part2\1_preprocessing\scripts\single_bacth.py
import os
os.chdir("C:/Users/Lenovo/Desktop/ml_part2/1_preprocessing/1_single_preprocessing/")
import mne
import numpy as np
#导入一例原始数据
raw = mne.io.read_raw_brainvision('1.vhdr',preload=True)
#eeg1 = mne.io.read_epochs_eeglab('1_LH.set')
raw.info
print(raw)
dir(raw)
sampling_rate = raw.info['sfreq']
n_time_samps = raw.n_times
time_secs = raw.times
ch_names = raw.ch_names
n_ch = len(ch_names)
raw.plot()
raw.plot(n_channels= 64, duration=5, scalings = 30e-6)
#通道定位 绘制地形图
#查看数据通道信息
# print(raw.ch_names)
# montage = mne.channels.read_custom_montage('standard-10-5-cap385.elp')
# raw.set_montage(montage)
mapping = {'FP1':'Fp1', 'FPZ':'Fpz','FP2':'Fp2', 'FZ':'Fz',
'FCZ':'FCz', 'CZ':'Cz', 'CPZ':'CPz', 'PZ':'Pz',
'PO5':'PO5', 'POZ':'POz', 'PO6':'PO6', 'OZ':'Oz',
'HEO':'HEOG', 'VEO':'VEOG'}
raw_rename_ch = raw.copy().rename_channels(mapping)
montage = mne.channels.read_custom_montage('standard-10-5-cap385.elp')
raw_rename_ch.set_montage(montage)
raw_rename_ch.plot_sensors()
raw_rename_ch.plot_sensors(ch_type = 'eeg', show_names = True, sphere = 0.075)
raw_rename_ch.plot_psd(fmin = 1, fmax = 70,spatial_colors = True)
raw_select_ch = raw_rename_ch.copy()
raw_select_ch.set_channel_types({'HEOG':'eog','VEOG':'eog'})
raw_select_ch.info
raw_select_ch.pick(['eeg'])
raw_select_ch.info
#滤波
raw_band = raw_select_ch.copy().filter(0.1,30,picks = 'eeg')
raw_band.plot_psd(fmin = 1, fmax = 70,spatial_colors = True)
#raw_band.plot()
raw_band.info
raw_band_notch = raw_band.notch_filter(50, notch_widths = 4)
raw_band_notch.plot_psd(fmin = 1, fmax = 70,spatial_colors = True)
raw_resampled = raw_band_notch.copy().resample(sfreq = 500)
raw_resampled.info
#分段
events_from_anno, event_dict = mne.events_from_annotations(raw_resampled)
print(event_dict)
print(events_from_anno)
custom_mapping = {'Stimulus/10': 10, 'Stimulus/11': 11}
(events_from_anno,event_dict) = mne.events_from_annotations(raw_resampled,event_id = custom_mapping)
print(event_dict)
print(events_from_anno)
my_epochs = mne.Epochs(raw_resampled, events_from_anno, tmin = -0.2, tmax = 0.8,baseline = (-0.2,0))
my_epochs.apply_baseline()
my_epochs.info
#保存刚分段好的数据
import pickle
output = open('my_epochs.pkl','wb')
pickle.dump(my_epochs, output)
output.close()
del my_epochs
pkl_file = open('my_epochs.pkl','rb')
my_epochs = pickle.load(pkl_file)
pkl_file.close()
my_epochs.info
#去除坏段
my_epochs_good = my_epochs.copy()
my_epochs_good.plot(n_epochs = 5,n_channels= 64)
my_epochs_good.drop_bad
my_epochs_good.info
print(len(my_epochs_good.events))
#插值坏电极
my_epochs_good.plot(n_epochs = 5,n_channels= 64)
good_ch = my_epochs_good.load_data().copy().interpolate_bads(reset_bads=True)
good_ch.info
good_ch.plot(n_epochs = 5,n_channels= 64)
output = open('good_ch.pkl','wb')
pickle.dump(good_ch, output)
output.close()
pkl_file = open('good_ch.pkl','rb')
good_ch = pickle.load(pkl_file)
pkl_file.close()
from mne.preprocessing import (ICA)
ica_data = good_ch.copy()
ica = ICA(n_components= 50)
ica.fit(ica_data)
ica.plot_components()
#range(0,50) [0,50)
ica.plot_properties(good_ch, picks= [0,1])
ica.plot_properties(good_ch, picks= np.array(range(0,50)))
ica.plot_overlay(raw_resampled, exclude=[0], picks='eeg')
output = open('ica.pkl','wb')
pickle.dump(ica, output)
output.close()
pkl_file = open('ica.pkl','rb')
my_epochs = pickle.load(pkl_file)
pkl_file.close()
ica.exclude = [0]
ica.plot_components([0])
ica_clean = good_ch.copy()
ica_clean.load_data()
ica.apply(ica_clean)
ica_clean.plot(n_epochs = 5, scalings = 30e-6)
good_ch.plot(n_epochs = 5, scalings = 30e-6)
output = open('ica_clean.pkl','wb')
pickle.dump(ica_clean, output)
output.close()
pkl_file = open('ica_clean.pkl','rb')
good_ch = pickle.load(pkl_file)
pkl_file.close()
extreme_values = ica_clean.copy()
stronger_reject_criteria = dict(eeg=100e-6)
extreme_values.drop_bad(reject=stronger_reject_criteria)
print(extreme_values.drop_log)
extreme_values.plot_drop_log()
output = open('extreme_values.pkl','wb')
pickle.dump(extreme_values, output)
output.close()
pkl_file = open('extreme_values.pkl','rb')
extreme_values = pickle.load(pkl_file)
pkl_file.close()
refered = extreme_values.copy().set_eeg_reference(ref_channels=['TP9', 'TP10'])
#refered = extreme_values.copy().set_eeg_reference(ref_channels='average')
output = open('refered.pkl','wb')
pickle.dump(refered, output)
output.close()
这里有部分步骤需要一个个赋值i去跑,例如ICA
F:\data_Project\ml_part2\1_preprocessing\scripts\preprocessing_batch.py
#先更改工作路径 记得使用正斜杠/
#定义工作路径
import os
#引入必要的模块
os.chdir("F:/data_Project/ml_part2/1_preprocessing/2_preprocessing_batch/")
import mne
import numpy as np
import matplotlib.pyplot as plt
import pickle
#定义数据的根目录
root = 'C:/Users/Lenovo/Desktop/ml_part2/1_preprocessing/2_preprocessing_batch/'
#定义可能用到的数据路径(每个步骤单独存储)
path_list = ['1_raw_data/','2_epoch_data/','3_rm_epochs_g_chs/',
'4_ica_cleaned/','5_extreme_values_refered/']
for i in range(1,3):
#拼接数据的文件名
filename = os.path.join(root,path_list[0]) + str(i) + '.vhdr'
print(filename)
raw = mne.io.read_raw_brainvision(filename,preload=True)
mapping = {'FP1':'Fp1', 'FPZ':'Fpz','FP2':'Fp2', 'FZ':'Fz',
'FCZ':'FCz', 'CZ':'Cz', 'CPZ':'CPz', 'PZ':'Pz',
'PO5':'PO5', 'POZ':'POz', 'PO6':'PO6', 'OZ':'Oz',
'HEO':'HEOG', 'VEO':'VEOG'}
raw_rename_ch = raw.copy().rename_channels(mapping)
montage = mne.channels.read_custom_montage('standard-10-5-cap385.elp')
raw_rename_ch.set_montage(montage)
raw_select_ch = raw_rename_ch.copy()
raw_select_ch.set_channel_types({'HEOG':'eog','VEOG':'eog'})
raw_select_ch.pick(['eeg'])
#滤波
raw_band = raw_select_ch.copy().filter(0.1,30,picks = 'eeg')
raw_band_notch = raw_band.notch_filter(50, notch_widths = 4)
raw_resampled = raw_band_notch.copy().resample(sfreq = 500)
#分段
custom_mapping = {'Stimulus/10': 10, 'Stimulus/11': 11}
(events_from_anno,event_dict) = mne.events_from_annotations(raw_resampled,event_id = custom_mapping)
my_epochs = mne.Epochs(raw_resampled, events_from_anno, tmin = -0.2, tmax = 0.8,baseline = (-0.2,0))
savename = os.path.join(root,path_list[1]) + str(i) + '.pkl'
output = open(savename,'wb')
pickle.dump(my_epochs, output)
output.close()
i = 1
filename = os.path.join(root,path_list[1]) + str(i) + '.pkl'
pkl_file = open(filename,'rb')
my_epochs = pickle.load(pkl_file)
pkl_file.close()
#去除坏段
my_epochs_good = my_epochs.copy()
my_epochs_good.plot(n_epochs = 5)
my_epochs_good.drop_bad
my_epochs_good.info
#插值坏电极
good_ch = my_epochs_good.load_data().copy().interpolate_bads(reset_bads=False)
savename = os.path.join(root,path_list[2]) + str(i) + '.pkl'
output = open(savename,'wb')
pickle.dump(good_ch, output)
output.close()
from mne.preprocessing import (ICA)
i = 1
filename = os.path.join(root,path_list[2]) + str(i) + '.pkl'
pkl_file = open(filename,'rb')
good_ch = pickle.load(pkl_file)
pkl_file.close()
ica_data = good_ch.copy()
ica = ICA(n_components= 50)
ica.fit(ica_data)
ica.plot_components()
good_ch.load_data()
ica.plot_sources(good_ch, show_scrollbars=False)
#range(0,50) [0,50)
ica.plot_properties(good_ch, picks= [0,1])
ica.plot_properties(good_ch, picks= np.array(range(0,50)))
ica.plot_overlay(raw_resampled, exclude=[0], picks='eeg')
ica.exclude = [0]
ica_clean = good_ch.copy()
ica_clean.load_data()
ica.apply(ica_clean)
ica_clean.plot(n_epochs = 5, scalings = 30e-6)
good_ch.plot(n_epochs = 5, scalings = 30e-6)
savename = os.path.join(root,path_list[3]) + str(i) + '.pkl'
output = open(savename,'wb')
pickle.dump(ica_clean, output)
output.close()
for i in range(1,3):
filename = os.path.join(root,path_list[3]) + str(i) + '.pkl'
pkl_file = open(filename,'rb')
ica_clean = pickle.load(pkl_file)
pkl_file.close()
reject_criteria = dict(eeg=100e-6)
extreme_values = ica_clean.copy()
stronger_reject_criteria = dict(eeg=100e-6)
extreme_values.drop_bad(reject=stronger_reject_criteria)
refered = extreme_values.copy().set_eeg_reference(ref_channels=['TP9', 'TP10'])
#refered = extreme_values.copy().set_eeg_reference(ref_channels='average')
savename = os.path.join(root,path_list[4]) + str(i) + '.pkl'
output = open(savename,'wb')
pickle.dump(refered, output)
output.close()