# -*- coding:utf-8 -*-
#Author: shenying
#Date: 18-7-19 上午11:39
import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
import numpy as np
from pydub import AudioSegment
import random
import sys
import os
import glob
import IPython
from IPython.display import Image
from td_utils import *
from keras.callbacks import ModelCheckpoint
from keras.models import Model,load_model,Sequential
from keras.layers import Dense,Activation,Dropout,Input,Masking,TimeDistributed,LSTM,Conv1D
from keras.layers import GRU,Bidirectional,BatchNormalization,Reshape
from keras.optimizers import Adam
# IPython.display.Audio("./raw_data/activates/1.wav")
# IPython.display.Audio('/home/shenying/dl/code/5/raw_data/negatives/4.wav')
# IPython.display.Audio('/home/shenying/dl/code/5/raw_data/backgrounds/1.wav')
# IPython.display.Audio("audio_examples/example_train.wav")
# x = graph_spectrogram("audio_examples/example_train.wav")
# plt.show()
# _, data = wavfile.read("audio_examples/example_train.wav")
# print("Time steps in audio recording before spectrogram", data[:,0].shape)
# print("Time steps in input after spectrogram", x.shape)
# print("background len: " + str(len(backgrounds[0])))
# print("activate[0] len: " + str(len(activates[0])))
# print("activate[1] len: " + str(len(activates[1])))
def get_random_time_segment(segment_ms):
segment_start=np.random.randint(low=0,high=10000-segment_ms)
segment_end=segment_start+segment_ms-1
return (segment_start,segment_end)
def is_overlapping(segment_time,previous_segments):
segment_start,segment_end=segment_time
overlap=False
for previous_start,previous_end in previous_segments:
if segment_start<=previous_end and segment_end>=previous_start:
overlap=True
return overlap
def insert_audio_clip(background,audio_clip,previous_segments):
segment_ms=len(audio_clip)
segment_time=get_random_time_segment(segment_ms)
while is_overlapping(segment_time,previous_segments):
segment_time=get_random_time_segment(segment_ms)
previous_segments.append(segment_time)
new_background=background.overlay(audio_clip,position=segment_time[0])
return new_background,segment_time
def insert_ones(y,segment_end_ms):
segment_end_y=int(segment_end_ms*Ty/10000)
for i in range(segment_end_y+1,segment_end_y+51):
if ithreshold and consecutive_timesteps>75:
audio_clip=audio_clip.overlay(chime,position=((i/Ty)*audio_clip.duration_seconds)*1000)
consecutive_timesteps=0
audio_clip.export("chime_output_pre_model.wav",format='wav')
def preprocess_audio(filename):
pading=AudioSegment.silent(duration=10000)
segment=AudioSegment.from_wav(filename)[:10000]
segment=pading.overlay(segment)
segment=segment.set_frame_rate(44100)
segment.export(filename,format='wav')
if __name__=="__main__":
Tx=5511
n_freq=101
Ty=1375
activates,negatives,backgrounds=load_raw_audio()
# overlap1 = is_overlapping((950, 1430), [(2000, 2550), (260, 949)])
# overlap2 = is_overlapping((2305, 2950), [(824, 1532), (1900, 2305), (3424, 3656)])
# print("Overlap 1 = ", overlap1)
# print("Overlap 2 = ", overlap2)
# np.random.seed(5)
# audio_clip, segment_time = insert_audio_clip(backgrounds[0], activates[0], [(3790, 4400)])
# audio_clip.export("insert_test1.wav", format="wav")
# print("Segment Time: ", segment_time)
# IPython.display.Audio("insert_test1.wav")
# IPython.display.Audio("audio_examples/insert_reference.wav")
# arr1 = insert_ones(np.zeros((1, Ty)), 9700)
# plt.plot(insert_ones(arr1, 4251)[0, :])
# plt.show()
# print("sanity checks:", arr1[0][1333], arr1[0][634], arr1[0][635])
x, y = create_training_example(backgrounds[0], activates, negatives)
# IPython.display.Audio("train.wav")
# IPython.display.Audio("audio_examples/train_reference.wav")
plt.plot[y[0,:]]
plt.show()
# X = np.load("./XY_train/X.npy")
# Y = np.load("./XY_train/Y.npy")
#
# X_dev = np.load("./XY_dev/X_dev.npy")
# Y_dev = np.load("./XY_dev/Y_dev.npy")
#
# model = model(input_shape=(Tx, n_freq))
# model.summary()
# print("load model:")
# model=load_model('/home/shenying/dl/code/5/model3/tr_model.h5')
# model1.summary()
# opt=Adam(lr=0.0001,beta_1=0.9,beta_2=0.999,decay=0.01)
# model.compile(loss='binary_crossentropy',optimizer=opt,metrics=['accuracy'])
# model.fit(X,Y,batch_size=5,epochs=10)
# loss, acc = model.evaluate(X_dev, Y_dev)
# print("Dev set accuracy = ", acc)
chime_file="audio_examples/chime.wav"
# plt.figure()
#
# IPython.display.Audio('/home/shenying/dl/code/5/raw_data/dev/1.wav')
# IPython.display.Audio('/home/shenying/dl/code/5/raw_data/dev/2.wav')
# filename = "./raw_data/dev/1.wav"
# prediction = detect_triggerword(filename)
# chime_on_activate(filename, prediction, 0.5)
# IPython.display.Audio("./chime_output.wav")
# filename = "./raw_data/dev/2.wav"
# prediction = detect_triggerword(filename)
# chime_on_activate(filename, prediction, 0.5)
# IPython.display.Audio("./chime_output.wav")
# my_filename="audio_examples/my_audio.wav"
# preprocess_audio(my_filename)
# predition=detect_triggerword(my_filename)
# chime_on_activate(my_filename,predition,0.5)