本程序实现了MIT-BIH心率失常数据的读取、转存和预处理等操作
- 导入第三方包/设置路径/定义和数据选择
- 原始数据转存为.csv文件
- 心拍分割
- 重新标注(可以2分类或5分类)
导入第三方包/设置路径/定义和数据选择
import wfdb
import numpy as np
import matplotlib.pyplot as plt
from scipy.signal import medfilt
import pywt
import pandas as pd
path_load1 = 'F:/ECG_MIT-BIH/MIT-BIH/'
path_training_save1 = 'F:/ECG_MIT-BIH/training/path_save1/'
path_load2 = 'F:/ECG_MIT-BIH/training/path_save1/'
path_training_save2 = 'F:/ECG_MIT-BIH/training/path_save2/'
path_training_save3 = 'F:/ECG_MIT-BIH/training/path_save3/'
path_load3 = 'F:/ECG_MIT-BIH/training/path_save3/'
path_training_save4 = 'F:/ECG_MIT-BIH/training/path_save4/'
path_load4 = 'F:/ECG_MIT-BIH/training/path_save4/'
path_training_lead0_save5 = 'F:/ECG_MIT-BIH/training/path_save5_double/lead0/'
path_training_lead1_save5 = 'F:/ECG_MIT-BIH/training/path_save5_double/lead1/'
DS1 = ['101', '106', '108', '109', '112', '114', '115', '116', '118','119', '122',
'124', '201', '203', '205', '207', '208', '209', '215', '220', '223', '230']
DS2 = ['100', '103', '105', '111', '113', '117', '121', '123', '200', '202', '210', '212',
'213', '214', '219', '221', '222', '228', '231', '232', '233', '234']
DS = DS1
DSName = 'DS1'
原始数据转存为.csv文件
def ReadTrainingSet(DS):
for ds in DS:
dataSetLoc = path_load1 + ds
record = wfdb.rdrecord(dataSetLoc)
lead0 = record.p_signal[:,0]
lead1 = record.p_signal[:,1]
annotation = wfdb.rdann(dataSetLoc, 'atr')
annIdx = annotation.sample
annType = annotation.symbol
trainingSet[ds] = {'lead0': lead0, 'lead1': lead1, 'annIdx': annIdx, 'annType': annType}
trainingSet = {}
ReadTrainingSet(DS1)
headers = ['lead0', 'lead1', 'annIdx', 'annType']
for k, v in trainingSet.items():
df1 = pd.DataFrame({ 'lead0': trainingSet[k]['lead0'], 'lead1': trainingSet[k]['lead1']})
df1.to_csv( path_training_save1 + k + '_lead.csv', index=False)
df2 = pd.DataFrame({ 'annIdx': trainingSet[k]['annIdx'], 'annType': trainingSet[k]['annType']})
df2.to_csv( path_training_save1 + k + '_ann.csv', index=False)
print(k,end = '')
plt.figure(figsize=(20, 6), dpi=80)
temp1 = trainingSet['109']['lead0'][:6000]
plt.plot(temp1)
plt.show()
结果如下
心拍分割
trainingSet_leads = {}
trainingSet_anns = {}
for ds in DS:
trainingSet_leads[ds] = pd.read_csv(path_load2 + ds + '_lead.csv')
trainingSet_anns[ds] = pd.read_csv(path_load2 + ds + '_ann.csv')
print(ds,end = '')
print('\n读取完毕!')
Hbs = {}
for ds in DS:
lead0 = trainingSet_leads[ds]['lead0']
lead1 = trainingSet_leads[ds]['lead1']
hbs0 = []
hbs1 = []
anns = []
annIdxs = []
for row in trainingSet_anns[ds].itertuples():
if row[2] in Non_beat_anns:
continue
elif row[1] < 91:
continue
elif row[1] + 144 > len(lead0):
continue
else:
anns.append(row[2])
annIdxs.append(row[1] - 1)
hbs0.append(lead0[row[1] - 91: row[1] + 144])
hbs1.append(lead1[row[1] - 91: row[1] + 144])
Hbs[ds] = pd.DataFrame({'lead0': hbs0, 'lead1': hbs1, 'ann': anns, 'annIdx': annIdxs})
print(ds,end='')
print('\n分割完成!')
if DSName == 'DS1':
temp = pd.Series(list(Hbs['114']['lead0']))
Hbs['114']['lead0'] = Hbs['114']['lead1']
Hbs['114']['lead1'] = temp
print('校正完成!')
Hbs_lead0 = {}
Hbs_lead1 = {}
for ds in DS:
Hbs_lead0[ds] = pd.DataFrame({
'ann': Hbs[ds]['ann'],
'annIdx': Hbs[ds]['annIdx'],
'beat': Hbs[ds]['lead0'],
})
Hbs_lead1[ds] = pd.DataFrame({
'ann': Hbs[ds]['ann'],
'annIdx': Hbs[ds]['annIdx'],
'beat': Hbs[ds]['lead1'],
})
print('完成!')
for ds in DS:
beatValues = []
beatIndexs = []
for i in range(len(Hbs_lead0[ds]['beat'])):
beatValues.append(list(Hbs_lead0[ds]['beat'][i].values))
beatIndexs.append(list(Hbs_lead0[ds]['beat'][i].index))
Hbs_lead0[ds]['beatValues'] = beatValues
Hbs_lead0[ds]['beatIndex'] = beatIndexs
beatValues = []
beatIndexs = []
for i in range(len(Hbs_lead1[ds]['beat'])):
beatValues.append(list(Hbs_lead1[ds]['beat'][i].values))
beatIndexs.append(list(Hbs_lead1[ds]['beat'][i].index))
Hbs_lead1[ds]['beatValues'] = beatValues
Hbs_lead1[ds]['beatIndex'] = beatIndexs
Hbs_lead0[ds].to_csv( path_training_save2 + ds + '_lead0.csv', index=False)
Hbs_lead1[ds].to_csv( path_training_save2 + ds + '_lead1.csv', index=False)
print(ds,end = '')
print('\n已保存到path_training_save2中!')
Hbs_lead0_0 = {}
Hbs_lead1_1 = {}
for ds in DS:
Hbs_lead0_0[ds]=Hbs_lead0[ds].drop(columns = ['annIdx','beat','beatIndex'])
Hbs_lead1_1[ds]=Hbs_lead1[ds].drop(columns = ['annIdx','beat','beatIndex'])
Hbs_lead0_0[ds].to_csv( path_training_save3 + ds + '_lead0.csv', header = 0, index=False)
Hbs_lead1_1[ds].to_csv( path_training_save3 + ds + '_lead1.csv', header = 0, index=False)
print('保存完毕!')
Hbs_lead0 = {}
Hbs_lead1 = {}
for ds in DS:
Hbs_lead0[ds] = pd.read_csv( path_load3 + ds + '_lead0.csv',header = None,names = ['A','B'])
Hbs_lead1[ds] = pd.read_csv( path_load3 + ds + '_lead1.csv',header = None,names = ['A','B'])
for key, value in Hbs_lead0.items():
df0 = pd.DataFrame({ 'index': Hbs_lead0[key]['A'], 'lead0': Hbs_lead0[key]['B']})
df1 = df0['index'].str.split(',',expand=True)
df2 = df0['lead0'].str.lstrip("[")
df2 = df2.str.rstrip("]")
df2 = df2.str.split(',',expand=True)
df = pd.concat([df1,df2],join="inner",axis=1)
df.to_csv( path_training_save4 + key + '_lead0.csv',header=0,index=None)
print(key,end = '')
print('\nlead0保存完毕!')
for key, value in Hbs_lead1.items():
df0 = pd.DataFrame({ 'index': Hbs_lead1[key]['A'], 'lead1': Hbs_lead1[key]['B']})
df1 = df0['index'].str.split(',',expand=True)
df2 = df0['lead1'].str.lstrip("[")
df2 = df2.str.rstrip("]")
df2 = df2.str.split(',',expand=True)
df = pd.concat([df1,df2],join="inner",axis=1)
df.to_csv( path_training_save4 + key + '_lead1.csv',header=0,index=None)
print(key,end = '')
print('\nlead1保存完毕!')
重新标注(可以2分类或5分类)
N = ['N', 'L', 'R', 'e', 'j']
S = ['A', 'a', 'J', 'S']
V = ['V', 'E']
F = ['F']
Q = ['l', 'f', 'Q']
Non_beat_anns = ['[', ']', '!', 'x', '(', ')', 'p', 't', 'u', '`', '~', '^', '|', '+', 's', 'T', '*', 'D', '=', '"', '@']
Hbs_lead0 = {}
Hbs_lead1 = {}
for ds in DS:
Hbs_lead0[ds] = pd.read_csv( path_load4 + ds + '_lead0.csv',header = None)
Hbs_lead1[ds] = pd.read_csv( path_load4 + ds + '_lead1.csv',header = None)
for key, value in Hbs_lead0.items():
df0 = pd.DataFrame({ 'index': Hbs_lead0[key].loc[:,0]})
for i in range(0,len(df0['index'])):
if df0['index'][i] in N:
df0['index'][i] = 0
elif df0['index'][i] in S:
df0['index'][i] = 1
elif df0['index'][i] in V:
df0['index'][i] = 1
elif df0['index'][i] in F:
df0['index'][i] = 1
elif df0['index'][i] in Q:
df0['index'][i] = 1
df1 = Hbs_lead0[key][list(range(236))]
df1 = df1.drop(columns=0,axis=1)
df = pd.concat([df0,df1],join="inner",axis=1)
df.to_csv( path_training_lead0_save5 + key + '_lead0.csv',header=0,index=None)
print(key,end = '')
print('\nlead0保存完毕!')
for key, value in Hbs_lead1.items():
df0 = pd.DataFrame({ 'index': Hbs_lead1[key].loc[:,0]})
for i in range(0,len(df0['index'])):
if df0['index'][i] in N :
df0['index'][i] = 0
elif df0['index'][i] in S:
df0['index'][i] = 1
elif df0['index'][i] in V:
df0['index'][i] = 1
elif df0['index'][i]in F:
df0['index'][i] = 1
elif df0['index'][i] in Q:
df0['index'][i] = 1
df1 = Hbs_lead1[key][list(range(236))]
df1 = df1.drop(columns=0,axis=1)
df = pd.concat([df0,df1],join="inner",axis=1)
df.to_csv( path_training_lead1_save5 + key + '_lead1.csv',header=0,index=None)
print(key,end = '')
print('\nlead1保存完毕!')