loss = tf.nn.ctc_loss(labels=targets, inputs=logits, sequence_length=seq_len)
----->
loss = tf.nn.ctc_loss(labels=targets, inputs=logits, sequence_length=seq_len,preprocess_collapse_repeated = True)
def data_filter(train_path,valid_path,length):
train_data_list = []
train_list=os.listdir(train_path)
for train_ in train_list:
if train_.endswith(".txt"):
pass
else:
with open(train_path+train_+'/train_word.txt', 'r')as f:
line = f.readline()
while line:
if ',' in line:
one_data = line.split(',')
label=one_data[1].split(' ')
else:
one_data = line.split(' ')
label = one_data[1:]
label = [ int(x) - 1 for x in label ]
runs = [len(list(g)) for _, g in groupby(label)]
repeats = sum(u for u in runs if u > 1)
if len(label)+repeats 1)
if len(label)+repeats
length=100
train_data_list,valid_data_list=data_filter('','',length/4)