1, 配置环境,需要安装两个包,
① pydub, 命令:pip install pydub ----> 把mp3格式文件转化为wav格式
② python_speech_features ,命令:pip install python_speech_features \
----->完成傅里叶变化和梅尔倒谱(MFCC)
③ 安装ffmpeg:
从http://www.ffmpeg.org/download.html下载对应自己电脑版本的软件包,在本地解压,
并将解压后文件夹中的bin目录添加到本地的环境变量中。
2, 特征工程:
如何把音频转化为数值矩阵,至于标签列,只需通过zip映射即可完成。
如果我们了解了一首歌是如何转化为数值矩阵的,那么只需要将这个定义好的函数做for loop;
即可完成音乐文件的批量处理。
其中,涉及达到降维的部分,我们不再用sklearn中的库进行调包处理,而是用自己的一套
算法达到降维的目的。总的宗旨是:每1个实例,只能是1行。
具体方法是:讲原来5999×13的矩阵,在0方向上取均值得到一个1×13的矩阵,然后在1方向上,
对列两两之间计算协方差,得到1×91维的向量,再与之前1×13维的向量进行append,最终得到一个
1×104维的向量来表征1首歌的特征.
对label的处理,先将music_info.csv 转化为list之后,再转化为字典;
[['不生气了,好吗', '清新'], ['回不去的过往', '清新'],
['Bady I Love You', '清新'], ['心与心', '清新'], ['我默念的好朋友', '清新'],
['是我太软弱', '清新'], ['可惜不是你', '清新'], ["I don't know", '清新'],
['love line', '清新'], ['叶子', '清新'], ['刚好', '清新'], ['Run To You', '清新'],
['Ready Aim Fire', '兴奋'], ['Ready For War', '兴奋'], ['Is She with You?', '兴奋'],
['Everybody Knows', '兴奋'], ['Icky Thump', '兴奋'], ['我还有点小糊涂', '快乐'],
['海绵宝宝片尾曲', '快乐'], ['神奇宝贝·胖丁之歌', '快乐'], ['四季的问候', '快乐'],
['晚安喵', '快乐'], ['让她开心(童声版)', '快乐'], ['简单爱', '快乐']]
3, 建模调参:
对选择的模型-支持向量机(svm)进行网格交叉验证(gridSearch),打印出最有的参数来构造一个新
的模型。用新的模型来训练样本,并将训练好的模型进行持久化保存,以便于后来预测的时候调用。
最优参数:{'C': 0.1, 'decision_function_shape': 'ovo', \
'kernel': 'rbf', 'probability': True}
最优模型准确率:0.25757575757575757
寻找最佳模型共耗时2.33
4, 预测:
feature.py 特征提取
#coding:utf-8
import pandas as pd
import numpy as np
import glob#搜索歌单路径
from pydub.audio_segment import AudioSegment
from scipy.io import wavfile
from python_speech_features import mfcc
import os#.remove("")
import sys
import time#计算程序运行时间
def 获取歌单():#extract_label
'''获取歌单,为正反向字典制作做准备'''
data = pd.read_csv(歌单路径)
data = data[['name','tag']]
return data
def 获取单首歌曲特征(file):#fetch_index_label
'''转换音乐文件格式并且提取其特征'''
'''./data/music\\50 Cent - Ready For War.mp3'''
items = file.split('.')
file_format = items[-1].lower()#获取歌曲格式 mp3
file_name = file[: -(len(file_format)+1)]#获取歌曲名称
#把mp3格式的数据转化为wav
if file_format != 'wav':
'''把mp3格式转换为wav,保存至原文件夹中'''
song = AudioSegment.from_file(file, format = 'mp3')
file = file_name + '.wav'
song.export(file, format = 'wav')
try:
'''提取wav格式歌曲特征'''
rate, data = wavfile.read(file)
mfcc_feas = mfcc(data, rate, numcep = 13, nfft = 2048)
mm = np.transpose(mfcc_feas)
mf = np.mean(mm ,axis = 1)# mf变成104维的向量
mc = np.cov(mm)
result = mf
for i in range(mm.shape[0]):
result = np.append(result, np.diag(mc, i))
# os.remove(file)
return result#返回1个104维的向量
except Exception as msg:
print(msg)
def 特征提取主函数():#主函数extract_and_export
df = 获取歌单()
name_label_list = np.array(df).tolist()
name_label_dict = dict(map(lambda t: (t[0], t[1]), name_label_list))#歌单做成字典
'''['回不去的过往','清新']'''
'''{'回不去的过往':'清新',......}'''
labels = set(name_label_dict.values())
'''['清新 ','兴奋','快乐','','','',.....]'''
#labelsIDS化
label_index_dict = dict(zip(labels, np.arange(len(labels))))#歌曲标签数值映射
'''{'清新 ':0,'兴奋':1,'快乐':2,'','','',.....}#一共有10个'''
# print(label_index_dict)
# for k in label_index_dict:
# print(k)
# print(label_index_dict[k])
# sys.exit(0)
all_music_files = glob.glob(歌曲源路径)
'''./data/music\\50 Cent - Ready For War.mp3'''
'''./data/music\\Am,Double D,Join - 她带我看过这世界.mp3'''
all_music_files.sort()
'''查找样本歌曲,获取样本'''
loop_count = 0
flag = True
all_mfcc = np.array([])
for file_name in all_music_files:
'''获取样本所有歌曲的特征'''
print('开始处理:' + file_name)#.replace('\xa0', '') .replace('\xa0', '')
'''./data/music\\50 Cent - Ready For War.mp3'''
music_name = file_name.split('\\')[-1].split('.')[-2].split('-')[-1]#\为转意字符
music_name = music_name.strip()
if music_name in name_label_dict:
'''样本标签数值化'''
label_index = label_index_dict[name_label_dict[music_name]]
'''歌曲标签字典比对查询——重要!!!'''
'''[0, 1, 3, 4, 2 ........]'''
ff = 获取单首歌曲特征(file_name)#104维的向量
ff = np.append(ff, label_index)#给特征值赋予标签 ,得到105维的向量
'''给特征加标签,最后变为一个1行105维的向量'''
if flag:
all_mfcc = ff
flag = False
else:
all_mfcc = np.vstack([all_mfcc, ff])
else:
print('无法处理:' + file_name.replace('\xa0', '') +'; 原因是: 列表中不存在此歌曲')
print('looping-----%d' % loop_count)
print('all_mfcc.shape:', end='')
print(all_mfcc.shape)
loop_count +=1 #i++ i++ i/= i*=
#保存数据
label_index_list = []
for k in label_index_dict:
#字典转为list再保存
label_index_list.append([k, label_index_dict[k]])
pd.DataFrame(label_index_list).to_csv(数值化标签路径, header = None, \
index = False, encoding = 'utf-8')
pd.DataFrame(all_mfcc).to_csv(歌曲特征文件存放路径, header= None, \
index =False, encoding='utf-8')
return all_mfcc
if __name__=='__main__':
歌单路径 = './data/music_info.csv'#music_info_csv_file_path
歌曲源路径 = './data/music/*.mp3'#music_audio_dir
数值化标签路径 = './data/music_index_label.csv'#music_index_label_path
歌曲特征文件存放路径 = './data/music_features.csv'#music_features_file_path
start = time.time()
特征提取主函数()
end = time.time()
print('总耗时%.2f秒'%(end - start))
svm.py
#coding:utf-8
'调参、训练并保存模型'
from sklearn import svm
import acc
import pandas as pd
from sklearn.utils import shuffle#洗牌
import numpy as np
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.externals import joblib#保存模型模块 load,tf.saver.save()
#joblib
#tf.saver.save("/")
import sys
import time
#构建网格实验的参数及架构
def 网格交叉验证(X, Y):#internal_cross_validation
parameters = {
'kernel':('linear', 'rbf', 'poly'),
'C':[0.1, 1],
'probability':[True, False],
'decision_function_shape':['ovo', 'ovr']
}
clf = GridSearchCV(svm.SVC(random_state = 0), param_grid = parameters, cv = 5)#固定格式
print('开始交叉验证获取最优参数构建')
clf.fit(X, Y)
print('最优参数:', end = '')
print(clf.best_params_)
print('最优模型准确率:', end = '')
print(clf.best_score_)
#开始交叉验证,返回最优参数
def 交叉验证主函数(music_csv_file_path= None, data_percentage = 0.7): # cross_validation
if not music_csv_file_path:
music_csv_file_path = 歌曲特征文件存放路径
print('开始读取数据:' + music_csv_file_path)
data = pd.read_csv(music_csv_file_path, sep = ',', header = None, encoding = 'utf-8')
sample_fact = 0.7
if isinstance(data_percentage, float) and 0 < data_percentage < 1:
sample_fact = data_percentage
data = data.sample(frac = sample_fact).T
X = data[:-1].T
Y = np.array(data[-1:])[0]
# print(X)
# print(Y)
# sys.exit(0)
网格交叉验证(X, Y)
#===========================================================================
#获得参数之后构建多项式模型
def 多项式模型(X, Y):
"""进行模型训练,并且计算训练集上预测值与label的准确性
"""
clf = svm.SVC(kernel = 'rbf', C= 0.1, probability = True, decision_function_shape = 'ovo', random_state = 0)
clf.fit(X, Y)
res = clf.predict(X)
# print(res)
# sys.exit("53")
restrain = acc.get(res,Y)
return clf, restrain#返回模型及预测准确度
#开始训练模型
def 多次训练并保存模型(train_percentage = 0.7, fold = 5000, music_csv_file_path=None, model_out_f= None):#fit_dump_model
"""pass"""
if not music_csv_file_path:
music_csv_file_path = 歌曲特征文件存放路径
data = pd.read_csv(music_csv_file_path, sep=',', header = None, encoding = 'utf-8')
#trick
max_train_source = None
max_test_source = None
max_source = None
best_clf = None
flag = True
for index in range(1, int(fold)+1):#epoch 也可以写成1000
print(index)
shuffle_data = shuffle(data)#特征
X = shuffle_data.T[:-1].T
Y = np.array(shuffle_data.T[-1:])[0]
x_train, x_test, y_train, y_test = train_test_split(X, Y, train_size = train_percentage)#并未制定随机种子
(clf, train_source) = 多项式模型(x_train, y_train)#返回的是模型及训练集上的准确率
y_predict = clf.predict(x_test)
test_source = acc.get(y_predict, y_test)#测试集的准确率
source = 0.35 * train_source + 0.65 * test_source#模型综合准确率
#记录最优模型
#找出最大的精确率对应的模型:
if flag:
max_source = source
max_train_source = train_source
max_test_source = test_source
best_clf = clf
flag = False
else:
if max_source < source:
max_source = source
max_train_source = train_source
max_test_source = test_source
best_clf = clf
#模型的实时保存
# if index % 10 == 0:
# if not model_out_f:
# model_out_f = 模型保存路径
# joblib.dump(best_clf, model_out_f)
print('第%d次epoch训练,训练集上的正确率为:%0.2f, 测试集上正确率为:%0.2f,加权平均正确率为:%0.2f'%(index , train_source,\
test_source, source ))
print('最优模型效果:训练集上的正确率为:%0.2f,测试集上的正确率为:%0.2f, 加权评均正确率为:%0.2f'%(max_train_source,\
max_test_source, max_source))
print('最优模型是:')
print(best_clf)
#保存模型或模型持久化
if not model_out_f:
model_out_f = 模型保存路径
joblib.dump(best_clf, model_out_f)
if __name__ == '__main__':
数值化标签路径 = './data/music_index_label.csv'#music_index_label_path
歌曲特征文件存放路径 = './data/music_features.csv'#default_music_csv_file_path
模型保存路径 = './data/music_model.pkl'#default_model_file_path
#第①步(运行完之后注释掉)
# print('='*30 + '网格训练寻找最合适模型开始。。。' + '='*30)
# start= time.time()
# 交叉验证主函数(music_csv_file_path= None, data_percentage = 0.7)
# end = time.time()
# print('寻找最佳模型共耗时%.2f'%(end-start))
#第②步
print('='*30 + '网格训练寻找最合适模型开始。。。' + '='*30)
start= time.time()
多次训练并保存模型(music_csv_file_path=None, model_out_f= None)
end = time.time()
print('训练模型共耗时%.2f'%(end-start))
svm_main.py
#coding:utf-8
'''完成反向字典制作及预测'''
#from music_category import svm
#from music_category import feature
import feature
import pandas as pd
import numpy as np
from sklearn.externals import joblib#保存模型模块
import sys
import time
数值化标签路径 = './data/music_index_label.csv'
# svm.cross_validation(data_percentage=0.99)
def 加载模型(model_f = None):#load_model
if not model_f:
model_f = 模型保存路径
clf = joblib.load(model_f)
return clf
#构建音乐文件预测的反向字典
def 反向字典制作():#fetch_index_label
"""
从文件中读取index和label之间的映射关系,并返回dict
"""
data = pd.read_csv(数值化标签路径, header=None, encoding='utf-8')
name_label_list = np.array(data).tolist()
index_label_dict = dict(map(lambda t: (t[1], t[0]), name_label_list))
return index_label_dict
#得到反向字典
反向字典 = 反向字典制作()
#预测模型数值+反向字典查找
def 预测(clf, X):
label_index = clf.predict([X])
label = 反向字典[label_index[0]]
return label
if __name__ == '__main__':
数值化标签路径 = './data/music_index_label.csv'
模型保存路径 = './data/music_model.pkl'
clf = 加载模型()
#svm.多次训练并保存模型(train_percentage = 0.9, fold = 1000)
# path = './data/test/50 Cent - Ready For War.mp3'#兴奋
# path = './data/test/A*Teens - Floorfiller.mp3'#流行
path = './data/test/Maize - I Like You-浪漫.mp3'#怀旧
music_feature = feature.获取单首歌曲特征(path)
# music_feature = music_feature.reshape(1,-1)
label = 预测(clf, music_feature)
print('预测标签为:%s'% label)
acc.py
# encoding:utf-8
def get(res,tes):
#精确度
n = len(res)
truth = (res == tes)
pre = 0
for flag in truth:#[1, 0 , 0 ,1 ,1]
if flag:
pre += 1
return (pre * 100) /n #百分比 ,返回的是res 和tes 之间的想等的概率