比赛地址:http://challenge.xfyun.cn/topic/info?type=multilingual
这里提取音频文件logmel特征并保存:
import os
import wave
import librosa
import numpy as np
from tqdm import tqdm
import pickle as pkl
import librosa
from sklearn.preprocessing import normalize
def extract_logmel(y, sr, size=3):
"""
extract log mel spectrogram feature
:param y: the input signal (audio time series)
:param sr: sample rate of 'y'
:param size: the length (seconds) of random crop from original audio, default as 3 seconds
:return: log-mel spectrogram feature
"""
# normalization
y = y.astype(np.float32)
normalization_factor = 1 / np.max(np.abs(y))
y = y * normalization_factor
# random crop
if len(y) < size * sr:
new_y = np.zeros((size * sr, ))
new_y[:len(y)] = y
y = new_y
start = np.random.randint(0, len(y) - size * sr)
y = y[start: start + size * sr]
# extract log mel spectrogram #####
melspectrogram = librosa.feature.melspectrogram(
y=y, sr=sr, n_fft=2048, hop_length=1024, n_mels=60)
logmelspec = librosa.power_to_db(melspectrogram)
return logmelspec
def get_wave_norm(file):
data, framerate = librosa.load(file, sr=22050)
return data, framerate
LABELS = ['L{:03}'.format(i) for i in range(1, 18)]
N_CLASS = len(LABELS)
DATA_DIR = './train'
SUB_DIR = ['train', 'dev']
file_glob = []
for i, cls_fold in enumerate(os.listdir(DATA_DIR)):
if not cls_fold.startswith('L'):
continue
cls_base = os.path.join(DATA_DIR, cls_fold)
lbl = cls_fold.split('-')[0]
for type_fold in SUB_DIR:
base = os.path.join(cls_base, type_fold)
files = os.listdir(base)
print('{} {} num:'.format(lbl, type_fold), len(files))
for pt in files:
file_pt = os.path.join(base, pt)
file_glob.append((file_pt, LABELS.index(lbl)))
print('done.')
print(len(file_glob))
data = []
for file, lbl in tqdm(file_glob):
try:
raw, sr = get_wave_norm(file)
except Exception as e:
print(e, file)
feature = extract_logmel(y=raw, sr=sr, size=3)
data.append((feature, lbl))
with open('./data.pkl', 'wb') as f:
pkl.dump(data, f)
from audiomentations import *
import os
import wave
import librosa
import numpy as np
from tqdm import tqdm
import pickle as pkl
import librosa
DATA_DIR = './test'
file_glob = []
def track_features(y, sr):
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
spectral_center = librosa.feature.spectral_centroid(y=y, sr=sr)
chroma = librosa.feature.chroma_stft(y=y, sr=sr)
spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
feature_0 = mfcc.T
feature_1 = spectral_center.T
feature_2 = chroma.T
feature_3 = spectral_contrast.T
features = np.concatenate(
[feature_0, feature_1, feature_2, feature_3], axis=-1)
return features
def get_wave_norm(file):
with wave.open(file, 'rb') as f:
params = f.getparams()
nchannels, sampwidth, framerate, nframes = params[:4]
data = f.readframes(nframes)
data = np.fromstring(data, dtype=np.int16)
data = data * 1.0 / max(abs(data))
return data, framerate
seg = 150000
data = {}
for cls_fold in tqdm(os.listdir(DATA_DIR)):
if not cls_fold.startswith('t'):
continue
cls_base = os.path.join(DATA_DIR, cls_fold)
try:
raw, sr = get_wave_norm(cls_base)
except Exception as e:
print(e, cls_base)
length = raw.shape[0]
temp = []
for i in range((length//seg)*2+1):
start = i * int(seg/2)
end = start + seg
if end > length:
end = length
if end - start == 50000:
x = raw[start:end]
else:
x = np.zeros(seg)
x[start-end:] = raw[start:end]
r = track_features(x, sr)
temp.append(r)
data[cls_fold] = temp
with open('./test_data.pkl', 'wb') as f:
pkl.dump(data, f)
test = []
for key, value in data.items():
test.append(value)
print(np.max(test))
print(np.min(test))
将数据另存为npy格式:
import pickle as pkl
from tqdm import tqdm
import numpy as np
with open('data.pkl', 'rb') as f:
data = pkl.load(f)
train_x = []
train_y = []
np.random.shuffle(data)
for x, y in data:
train_x.append(x)
train_y.append(y)
train_x = np.array(train_x)
train_y = np.array(train_y)
np.save('train_x.npy', train_x)
np.save('train_y.npy', train_y)
print(train_x.shape)
print(train_y.shape)
使用1D卷积+双向LSTM:
import keras.backend as K
from keras import regularizers
from keras import layers
from keras.models import Sequential
import keras
import os
import wave
import numpy as np
import pickle as pkl
LABELS = ['L{:03}'.format(i) for i in range(1, 18)]
N_CLASS = len(LABELS)
train_x = np.load('train_x.npy')
train_y = np.load('train_y.npy')
print(train_x.shape)
print(train_y.shape)
model = Sequential()
model.add(layers.Conv1D(16, 5, input_shape=(293, 33),
kernel_regularizer=regularizers.l1_l2(1e-7)))
model.add(layers.Conv1D(16, 3, activation='elu',
kernel_regularizer=regularizers.l1_l2(1e-7)))
model.add(layers.Conv1D(16, 3, activation='elu',
kernel_regularizer=regularizers.l1_l2(1e-7)))
model.add(layers.BatchNormalization())
model.add(layers.MaxPool1D())
model.add(layers.Dropout(0.5))
model.add(layers.Conv1D(32, 3, activation='elu',
kernel_regularizer=regularizers.l1_l2(1e-7)))
model.add(layers.Conv1D(32, 3, activation='elu',
kernel_regularizer=regularizers.l1_l2(1e-7)))
model.add(layers.BatchNormalization())
model.add(layers.MaxPool1D())
model.add(layers.Dropout(0.5))
model.add(layers.Conv1D(64, 3, activation='elu',
kernel_regularizer=regularizers.l1_l2(1e-7)))
model.add(layers.Conv1D(64, 3, activation='elu',
kernel_regularizer=regularizers.l1_l2(1e-7)))
model.add(layers.BatchNormalization())
model.add(layers.Bidirectional(layers.LSTM(128, dropout=0.5, return_sequences=True,
kernel_regularizer=regularizers.l1_l2(1e-7))))
model.add(layers.Bidirectional(layers.LSTM(128, dropout=0.5, return_sequences=True,
kernel_regularizer=regularizers.l1_l2(1e-7))))
model.add(layers.LSTM(128,
kernel_regularizer=regularizers.l1_l2(1e-7)))
model.add(layers.Dense(128, activation='elu',
kernel_regularizer=regularizers.l1_l2(1e-7)))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(N_CLASS, activation="softmax"))
model.summary()
adam = keras.optimizers.adam(2e-4)
model.compile(loss='categorical_crossentropy',
optimizer=adam, metrics=['accuracy'])
# Train model on dataset
batch_size = 128
steps = len(train_x) // batch_size
model.fit(x=train_x, y=train_y, batch_size=batch_size,
epochs=200, validation_split=0.1, shuffle=True)
model.save('my_model.h5')
import keras.backend as K
from keras import regularizers
from keras import layers
from keras.models import Sequential
import keras
import os
import wave
import numpy as np
import pickle as pkl
from tqdm import tqdm
import pandas as pd
from keras.models import load_model
LABELS = ['L{:03}'.format(i) for i in range(1, 18)]
N_CLASS = len(LABELS)
test_pt = 'test_data.pkl'
with open(test_pt, 'rb') as f:
raw_data = pkl.load(f)
model = load_model('my_model.h5')
result = {'id': [], 'label': []}
for key, value in tqdm(raw_data.items()):
x = np.array(value)
y = model.predict(x)
y = np.mean(y, axis=0)
pred = LABELS[np.argmax(y)]
result['id'].append(key)
result['label'].append(pred)
result = pd.DataFrame(result)
result.to_csv('./submission.csv', index=False)