吴恩达深度学习course5 week3:Trigger word detection - v1

# -*- coding:utf-8 -*- 
#Author: shenying
#Date: 18-7-19 上午11:39

import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'


import numpy as np
from pydub import AudioSegment
import random
import sys
import os
import glob
import IPython
from IPython.display import Image
from td_utils import *

from keras.callbacks import ModelCheckpoint
from keras.models import Model,load_model,Sequential
from keras.layers import Dense,Activation,Dropout,Input,Masking,TimeDistributed,LSTM,Conv1D
from keras.layers import GRU,Bidirectional,BatchNormalization,Reshape
from keras.optimizers import Adam


# IPython.display.Audio("./raw_data/activates/1.wav")
# IPython.display.Audio('/home/shenying/dl/code/5/raw_data/negatives/4.wav')
# IPython.display.Audio('/home/shenying/dl/code/5/raw_data/backgrounds/1.wav')
# IPython.display.Audio("audio_examples/example_train.wav")
# x = graph_spectrogram("audio_examples/example_train.wav")
# plt.show()
# _, data = wavfile.read("audio_examples/example_train.wav")
# print("Time steps in audio recording before spectrogram", data[:,0].shape)
# print("Time steps in input after spectrogram", x.shape)


# print("background len: " + str(len(backgrounds[0])))
# print("activate[0] len: " + str(len(activates[0])))
# print("activate[1] len: " + str(len(activates[1])))
def get_random_time_segment(segment_ms):
    segment_start=np.random.randint(low=0,high=10000-segment_ms)
    segment_end=segment_start+segment_ms-1
    return (segment_start,segment_end)
def is_overlapping(segment_time,previous_segments):
    segment_start,segment_end=segment_time
    overlap=False
    for previous_start,previous_end in previous_segments:
        if segment_start<=previous_end and segment_end>=previous_start:
            overlap=True
    return overlap
def insert_audio_clip(background,audio_clip,previous_segments):
    segment_ms=len(audio_clip)
    segment_time=get_random_time_segment(segment_ms)
    while is_overlapping(segment_time,previous_segments):
        segment_time=get_random_time_segment(segment_ms)
    previous_segments.append(segment_time)
    new_background=background.overlay(audio_clip,position=segment_time[0])
    return new_background,segment_time
def insert_ones(y,segment_end_ms):
    segment_end_y=int(segment_end_ms*Ty/10000)
    for i in range(segment_end_y+1,segment_end_y+51):
        if ithreshold and consecutive_timesteps>75:
            audio_clip=audio_clip.overlay(chime,position=((i/Ty)*audio_clip.duration_seconds)*1000)
            consecutive_timesteps=0
    audio_clip.export("chime_output_pre_model.wav",format='wav')
def preprocess_audio(filename):
    pading=AudioSegment.silent(duration=10000)
    segment=AudioSegment.from_wav(filename)[:10000]
    segment=pading.overlay(segment)
    segment=segment.set_frame_rate(44100)
    segment.export(filename,format='wav')

if __name__=="__main__":
    Tx=5511
    n_freq=101
    Ty=1375
    activates,negatives,backgrounds=load_raw_audio()


    # overlap1 = is_overlapping((950, 1430), [(2000, 2550), (260, 949)])
    # overlap2 = is_overlapping((2305, 2950), [(824, 1532), (1900, 2305), (3424, 3656)])
    # print("Overlap 1 = ", overlap1)
    # print("Overlap 2 = ", overlap2)

    # np.random.seed(5)
    # audio_clip, segment_time = insert_audio_clip(backgrounds[0], activates[0], [(3790, 4400)])
    # audio_clip.export("insert_test1.wav", format="wav")
    # print("Segment Time: ", segment_time)
    # IPython.display.Audio("insert_test1.wav")
    # IPython.display.Audio("audio_examples/insert_reference.wav")

    # arr1 = insert_ones(np.zeros((1, Ty)), 9700)
    # plt.plot(insert_ones(arr1, 4251)[0, :])
    # plt.show()
    # print("sanity checks:", arr1[0][1333], arr1[0][634], arr1[0][635])

    x, y = create_training_example(backgrounds[0], activates, negatives)
    # IPython.display.Audio("train.wav")
    # IPython.display.Audio("audio_examples/train_reference.wav")
    plt.plot[y[0,:]]
    plt.show()

    # X = np.load("./XY_train/X.npy")
    # Y = np.load("./XY_train/Y.npy")
    #
    # X_dev = np.load("./XY_dev/X_dev.npy")
    # Y_dev = np.load("./XY_dev/Y_dev.npy")
    #
    # model = model(input_shape=(Tx, n_freq))
    # model.summary()

    # print("load model:")
    # model=load_model('/home/shenying/dl/code/5/model3/tr_model.h5')
    # model1.summary()
    # opt=Adam(lr=0.0001,beta_1=0.9,beta_2=0.999,decay=0.01)
    # model.compile(loss='binary_crossentropy',optimizer=opt,metrics=['accuracy'])
    # model.fit(X,Y,batch_size=5,epochs=10)

    # loss, acc = model.evaluate(X_dev, Y_dev)
    # print("Dev set accuracy = ", acc)
    chime_file="audio_examples/chime.wav"
    # plt.figure()
    #
    # IPython.display.Audio('/home/shenying/dl/code/5/raw_data/dev/1.wav')
    # IPython.display.Audio('/home/shenying/dl/code/5/raw_data/dev/2.wav')

    # filename = "./raw_data/dev/1.wav"
    # prediction = detect_triggerword(filename)
    # chime_on_activate(filename, prediction, 0.5)
    # IPython.display.Audio("./chime_output.wav")

    # filename = "./raw_data/dev/2.wav"
    # prediction = detect_triggerword(filename)
    # chime_on_activate(filename, prediction, 0.5)
    # IPython.display.Audio("./chime_output.wav")
    # my_filename="audio_examples/my_audio.wav"
    # preprocess_audio(my_filename)
    # predition=detect_triggerword(my_filename)
    # chime_on_activate(my_filename,predition,0.5)



 

你可能感兴趣的:(dl-deeping,learning)