|
19 | 19 | from six.moves import zip, range
|
20 | 20 | from util.audio import audiofile_to_input_vector
|
21 | 21 | from util.config import Config, initialize_globals
|
22 |
| -from util.ctc import ctc_label_dense_to_sparse |
23 | 22 | from util.flags import create_flags, FLAGS
|
24 | 23 | from util.logging import log_error
|
25 | 24 | from util.preprocess import pmap, preprocess
|
@@ -111,7 +110,14 @@ def create_windows(features):
|
111 | 110 | labels_ph = tf.placeholder(tf.int32, [FLAGS.test_batch_size, None], name="labels")
|
112 | 111 | label_lengths_ph = tf.placeholder(tf.int32, [FLAGS.test_batch_size], name="label_lengths")
|
113 | 112 |
|
114 |
| - sparse_labels = tf.cast(ctc_label_dense_to_sparse(labels_ph, label_lengths_ph, FLAGS.test_batch_size), tf.int32) |
| 113 | + # We add 1 to all elements of the transcript to avoid any zero values |
| 114 | + # since we use that as an end-of-sequence token for converting the batch |
| 115 | + # into a SparseTensor. So here we convert the placeholder back into a |
| 116 | + # SparseTensor and subtract ones to get the real labels. |
| 117 | + sparse_labels = tf.contrib.layers.dense_to_sparse(labels_ph) |
| 118 | + neg_ones = tf.SparseTensor(sparse_labels.indices, -1 * tf.ones_like(sparse_labels.values), sparse_labels.dense_shape) |
| 119 | + sparse_labels = tf.sparse_add(sparse_labels, neg_ones) |
| 120 | + |
115 | 121 | loss = tf.nn.ctc_loss(labels=sparse_labels,
|
116 | 122 | inputs=layers['raw_logits'],
|
117 | 123 | sequence_length=inputs['input_lengths'])
|
@@ -143,7 +149,7 @@ def create_windows(features):
|
143 | 149 |
|
144 | 150 | features = pad_to_dense(batch['features'].values)
|
145 | 151 | features_len = batch['features_len'].values
|
146 |
| - labels = pad_to_dense(batch['transcript'].values) |
| 152 | + labels = pad_to_dense(batch['transcript'].values + 1) |
147 | 153 | label_lengths = batch['transcript_len'].values
|
148 | 154 |
|
149 | 155 | logits, loss_ = session.run([transposed, loss], feed_dict={
|
|
0 commit comments