本帖训练一个简单的神经网络模型,用来判断声音是男是女。
本帖数据集取自voice-gender项目,这个项目使用了n种分类模型,并比较了准确率,但是它没有使用神经网络模型,本帖算是一个补充。
这个数据集是经过R语言处理过的,它提取出了.WAV文件的一些声音属性。如果你想自己从wav文件中提取声音属性,参看voice-gender项目中一个叫sound.R源码文件。
数据集字段:”meanfreq”,”sd”,”median”,”Q25″,”Q75″,”IQR”,”skew”,”kurt”,”sp.ent”,”sfm”,”mode”,”centroid”,”meanfun”,”minfun”,”maxfun”,”meandom”,”mindom”,”maxdom”,”dfrange”,”modindx”,”label”。最后一个字段标记了是男声还是女声,前面字段是声音属性。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
|
import
os
import
requests
import
pandas
as
pd
import
numpy
as
np
import
random
import
tensorflow
as
tf
# 0.12
from
sklearn
.
model_selection
import
train_test_split
# 下载数据集
if
not
os.path
.
exists
(
'voice.csv'
)
:
url
=
'http://blog.topspeedsnail.com/wp-content/uploads/2016/12/voice.csv'
data
=
requests
.
get
(
url
)
.
content
with
open
(
'voice.csv'
,
'wb'
)
as
f
:
f
.
write
(
data
)
voice_data
=
pd
.
read_csv
(
'voice.csv'
)
#print(voice_data.head())
#print(voice_data.tail())
voice_data
=
voice_data
.
values
# 分离声音特性和分类
voices
=
voice_data
[
:
,
:
-
1
]
labels
=
voice_data
[
:
,
-
1
:
]
# ['male'] ['female']
# 把分类转为one-hot
labels_tmp
=
[
]
for
label
in
labels
:
tmp
=
[
]
if
label
[
0
]
==
'male'
:
tmp
=
[
1.0
,
0.0
]
else
:
# 'female'
tmp
=
[
0.0
,
1.0
]
labels_tmp
.
append
(
tmp
)
labels
=
np
.
array
(
labels_tmp
)
# shuffle
voices_tmp
=
[
]
lables_tmp
=
[
]
index_shuf
=
[
i
for
i
in
range
(
len
(
voices
)
)
]
random
.
shuffle
(
index_shuf
)
for
i
in
index_shuf
:
voices_tmp
.
append
(
voices
[
i
]
)
lables_tmp
.
append
(
labels
[
i
]
)
voices
=
np
.
array
(
voices_tmp
)
labels
=
np
.
array
(
lables_tmp
)
train_x
,
test_x
,
train_y
,
test_y
=
train_test_split
(
voices
,
labels
,
test_size
=
0.1
)
banch_size
=
64
n_banch
=
len
(
train_x
)
/
/
banch
_size
X
=
tf
.
placeholder
(
dtype
=
tf
.
float32
,
shape
=
[
None
,
voices
.
shape
[
-
1
]
]
)
# 20
Y
=
tf
.
placeholder
(
dtype
=
tf
.
float32
,
shape
=
[
None
,
2
]
)
# 3层(feed-forward)
def
neural_network
(
)
:
w1
=
tf
.
Variable
(
tf
.
random_normal
(
[
voices
.
shape
[
-
1
]
,
512
]
,
stddev
=
0.5
)
)
b1
=
tf
.
Variable
(
tf
.
random_normal
(
[
512
]
)
)
output
=
tf
.
matmul
(
X
,
w1
)
+
b1
w2
=
tf
.
Variable
(
tf
.
random_normal
(
[
512
,
1024
]
,
stddev
=
.
5
)
)
b2
=
tf
.
Variable
(
tf
.
random_normal
(
[
1024
]
)
)
output
=
tf
.
nn
.
softmax
(
tf
.
matmul
(
output
,
w2
)
+
b2
)
w3
=
tf
.
Variable
(
tf
.
random_normal
(
[
1024
,
2
]
,
stddev
=
.
5
)
)
b3
=
tf
.
Variable
(
tf
.
random_normal
(
[
2
]
)
)
output
=
tf
.
nn
.
softmax
(
tf
.
matmul
(
output
,
w3
)
+
b3
)
return
output
# 训练神经网络
def
train_neural_network
(
)
:
output
=
neural_network
(
)
cost
=
tf
.
reduce_mean
(
tf
.
reduce_sum
(
tf
.
nn
.
softmax_cross_entropy_with_logits
(
output
,
Y
)
)
)
lr
=
tf
.
Variable
(
0.001
,
dtype
=
tf
.
float32
,
trainable
=
False
)
opt
=
tf
.
train
.
AdamOptimizer
(
learning_rate
=
lr
)
var_list
=
[
t
for
t
in
tf
.
trainable_variables
(
)
]
train_step
=
opt
.
minimize
(
cost
,
var_list
=
var_list
)
#saver = tf.train.Saver(tf.global_variables())
#saver.restore(sess, tf.train.latest_checkpoint('.'))
with
tf
.
Session
(
)
as
sess
:
sess
.
run
(
tf
.
global_variables_initializer
(
)
)
#summary_writer = tf.train.SummaryWriter('voices')
for
epoch
in
range
(
200
)
:
sess
.
run
(
tf
.
assign
(
lr
,
0.001
*
(
0.97
*
*
epoch
)
)
)
for
banch
in
range
(
n_banch
)
:
voice_banch
=
train_x
[
banch
*
banch_size
:
(
banch
+
1
)
*
(
banch_size
)
]
label_banch
=
train_y
[
banch
*
banch_size
:
(
banch
+
1
)
*
(
banch_size
)
]
_
,
loss
=
sess
.
run
(
[
train_step
,
cost
]
,
feed_dict
=
{
X
:
voice_banch
,
Y
:
label_banch
}
)
print
(
epoch
,
banch
,
loss
)
# 准确率
prediction
=
tf
.
equal
(
tf
.
argmax
(
output
,
1
)
,
tf
.
argmax
(
Y
,
1
)
)
accuracy
=
tf
.
reduce_mean
(
tf
.
cast
(
prediction
,
dtype
=
tf
.
float32
)
)
accuracy
=
sess
.
run
(
accuracy
,
feed_dict
=
{
X
:
test_x
,
Y
:
test_y
}
)
print
(
"准确率"
,
accuracy
)
#prediction = sess.run(output, feed_dict={X: test_x})
train_neural_network
(
)
|