小老师布置的任务:让我把采集到的情绪相关的音频数据进行数据处理和分析。数据包括11个人的7种情绪音频,利用MFCC处理音频后,提取不同情绪组合进行训练,得出训练结果。
import random
import librosa as lb
import soundfile as sf
import numpy as np
import os, glob, pickle
import matplotlib.pyplot as plt
import itertools
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
如果不设置随机种子,每次模型训练结果(准确率)都不一样 ,因为调用函数时候,随机数不同。
random.seed(123)
np.random.seed(123)
emotion_labels = [
'calm',
'happy',
'sad',
'angry',
'fear',
'disgust',
'surprise'
]
label_set1=[]
for i in emotion_labels:
for j in emotion_labels:
if i<=j:
continue
else:
focused_emotion_labels=(i,j)
label_set1.append(focused_emotion_labels)
#label_set1
def audio_features(file_title, mfcc, chroma, mel):
with sf.SoundFile(file_title) as audio_recording:
audio = audio_recording.read(dtype="float32")
sample_rate = audio_recording.samplerate
if len(audio.shape) != 1:
return None
result=np.array([])
if mfcc:
mfccs=np.mean(lb.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40).T, axis=0)
result=np.hstack((result, mfccs))
if chroma:
stft=np.abs(lb.stft(audio))
chroma=np.mean(lb.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
result=np.hstack((result, chroma))
if mel:
mel=np.mean(lb.feature.melspectrogram(audio, sr=sample_rate).T,axis=0)
result=np.hstack((result, mel))
# print("file_title: {}, result.shape: {}".format(file_title, result.shape))
return result
其中更改模型参数,[5,6,7,8,9]训练 5次模型,最后取均值 accr=np.array(accuracy).mean()
for i in label_set1:
x = []
y = []
accuracy=[]
for file in glob.glob("./test_data1//*//*.wav"):
file_path=os.path.basename(file) #返回绝对路径下的文件名
emotion = file_path.split(",")[1]
if emotion not in i:
continue
feature = audio_features(file, mfcc=True, chroma=False, mel=False)
if feature is None:
continue
x.append(feature)
y.append(emotion)
for j in [5,6,7,8,9]:
final_dataset = train_test_split(np.array(x), y, test_size=0.5, random_state=j)
X_train, X_test, y_train, y_test=final_dataset
from collections import Counter
Counter(y_train), Counter(y_test)
model = MLPClassifier(hidden_layer_sizes=(200,), learning_rate='adaptive', max_iter=400)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy_sccores = accuracy_score(y_true=y_test, y_pred=y_pred)*100
accuracy.append(accuracy_sccores)
accr=np.array(accuracy).mean()
print("{},{:.1f}%".format(i,accr))
('calm', 'angry'),51.6%
('happy', 'calm'),58.9%
('happy', 'angry'),36.7%
('happy', 'fear'),31.1%
('happy', 'disgust'),40.0%
('sad', 'calm'),60.0%
('sad', 'happy'),27.3%
('sad', 'angry'),43.6%
('sad', 'fear'),44.4%
('sad', 'disgust'),30.0%
('fear', 'calm'),73.8%
('fear', 'angry'),40.0%
('fear', 'disgust'),33.3%
('disgust', 'calm'),77.3%
('disgust', 'angry'),53.3%
('surprise', 'calm'),62.2%
('surprise', 'happy'),40.0%
('surprise', 'sad'),36.4%
('surprise', 'angry'),41.7%
('surprise', 'fear'),35.6%
('surprise', 'disgust'),50.0%
label_set2=[]
for i in emotion_labels:
for j in emotion_labels:
if i<=j:
continue
else:
for k in emotion_labels:
if j<=k:
continue
else:
focused_emotion_labels=(i,j,k)
label_set2.append(focused_emotion_labels)
#label_set2
label_set3=[]
for i in emotion_labels:
for j in emotion_labels:
if i<=j:
continue
else:
for k in emotion_labels:
if j<=k:
continue
else:
for l in emotion_labels:
if k<=l:
continue
else:
focused_emotion_labels=(i,j,k,l)
label_set3.append(focused_emotion_labels)
#label_set3
我觉得我的代码还有很多可以优化的地方,比如情绪组合那里应该可以用其他方法实现,因为我这个方法组合数一多,循环就嵌套的多,明显不够好。但我不知道怎么改hhhh