aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Mesnard <thomas.mesnard@ens.fr>2015-12-28 20:35:38 +0100
committerThomas Mesnard <thomas.mesnard@ens.fr>2015-12-28 20:35:38 +0100
commite8e37dee0c5c846b1aa2dd24dc99095191f72a9b (patch)
treed033f04eaca8178ada7ee966c4d8e56df45a6ace
parentc9ba2abc7172b4657216e0fcc638098060d7f753 (diff)
downloadpgm-ctc-e8e37dee0c5c846b1aa2dd24dc99095191f72a9b.tar.gz
pgm-ctc-e8e37dee0c5c846b1aa2dd24dc99095191f72a9b.zip
Kind of works
-rw-r--r--ctc.py23
-rw-r--r--edit_distance.py24
-rw-r--r--main.py31
3 files changed, 57 insertions, 21 deletions
diff --git a/ctc.py b/ctc.py
index 5f34b2c..734e354 100644
--- a/ctc.py
+++ b/ctc.py
@@ -28,14 +28,14 @@ class CTC(Brick):
- Return the probability found at the end of that sequence
"""
T = probs.shape[0]
+ B = probs.shape[1]
C = probs.shape[2]-1
L = l.shape[0]
S = 2*L+1
- B = l.shape[1]
# l_blk = l with interleaved blanks
l_blk = C * tensor.ones((S, B), dtype='int32')
- l_blk = tensor.set_subtensor(l_blk[1::2,:],l)
+ l_blk = tensor.set_subtensor(l_blk[1::2,:], l)
l_blk = l_blk.T # now l_blk is B x S
# dimension of alpha (corresponds to alpha hat in the paper) :
@@ -43,13 +43,10 @@ class CTC(Brick):
# dimension of c :
# T x B
# first value of alpha (size B x S)
- alpha0 = tensor.concatenate([
- probs[0, :, C][:,None],
- probs[0][tensor.arange(B), l[0]][:,None],
- tensor.zeros((B, S-2))
+ alpha0 = tensor.concatenate([ tensor.ones((B, 1)),
+ tensor.zeros((B, S-1))
], axis=1)
- c0 = alpha0.sum(axis=1)
- alpha0 = alpha0 / c0[:,None]
+ c0 = tensor.ones((B,))
# recursion
l_blk_2 = tensor.concatenate([-tensor.ones((B,2)), l_blk[:,:-2]], axis=1)
@@ -76,8 +73,11 @@ class CTC(Brick):
sequences=[probs, probs_mask],
outputs_info=[alpha0, c0])
+ prob = tensor.log(c).sum(axis=0) + tensor.log(alpha[-1][tensor.arange(B), 2*l_len.astype('int32')-1]
+ + alpha[-1][tensor.arange(B), 2*l_len.astype('int32')])
+
# return the log probability of the labellings
- return tensor.log(c).sum(axis=0)
+ return -prob
def best_path_decoding(self, probs, probs_mask=None):
@@ -89,7 +89,8 @@ class CTC(Brick):
maxprob = probs.argmax(axis=2)
is_double = tensor.eq(maxprob[:-1], maxprob[1:])
maxprob = tensor.switch(tensor.concatenate([tensor.zeros((1,B)), is_double]),
- maxprob, C*tensor.ones_like(maxprob))
+ C*tensor.ones_like(maxprob), maxprob)
+ # maxprob = theano.printing.Print('maxprob')(maxprob.T).T
# returns two values :
# label : (T x) T x B
@@ -105,7 +106,7 @@ class CTC(Brick):
[label_length, label], _ = scan(fn=recursion,
sequences=[maxprob, probs_mask],
- outputs_info=[tensor.zeros((B,),dtype='int32'),tensor.zeros((T,B))])
+ outputs_info=[tensor.zeros((B,),dtype='int32'),-tensor.ones((T,B))])
return label[-1], label_length[-1]
diff --git a/edit_distance.py b/edit_distance.py
new file mode 100644
index 0000000..d76cc00
--- /dev/null
+++ b/edit_distance.py
@@ -0,0 +1,24 @@
+import numpy
+import theano
+from theano import tensor
+
+@theano.compile.ops.as_op(itypes=[tensor.imatrix, tensor.ivector, tensor.imatrix, tensor.ivector],
+ otypes=[tensor.ivector])
+def batch_edit_distance(a, a_len, b, b_len):
+ B = a.shape[0]
+ assert b.shape[0] == B
+
+ q = max(a.shape[1], b.shape[1]) * numpy.ones((B, a.shape[1]+1, b.shape[1]+1), dtype='int32')
+ q[:, 0, 0] = 0
+
+ for i in range(a.shape[1]+1):
+ for j in range(b.shape[1]+1):
+ if i > 0:
+ q[:, i, j] = numpy.minimum(q[:, i, j], q[:, i-1, j]+1)
+ if j > 0:
+ q[:, i, j] = numpy.minimum(q[:, i, j], q[:, i, j-1]+1)
+ if i > 0 and j > 0:
+ q[:, i, j] = numpy.minimum(q[:, i, j], q[:, i-1, j-1]+numpy.not_equal(a[:, i-1], b[:, j-1]))
+ return q[numpy.arange(B), a_len, b_len]
+
+# vim: set sts=4 ts=4 sw=4 tw=0 et :
diff --git a/main.py b/main.py
index e288b9b..e505348 100644
--- a/main.py
+++ b/main.py
@@ -8,7 +8,7 @@ from ctc import CTC
from blocks.initialization import IsotropicGaussian, Constant
from fuel.datasets import IterableDataset
from fuel.streams import DataStream
-from blocks.algorithms import (GradientDescent, Scale,
+from blocks.algorithms import (GradientDescent, Scale, AdaDelta, RemoveNotFinite,
StepClipping, CompositeRule)
from blocks.extensions.monitoring import TrainingDataMonitoring, DataStreamMonitoring
from blocks.main_loop import MainLoop
@@ -18,10 +18,12 @@ from blocks.graph import ComputationGraph
from dummy_dataset import setup_datastream
+from edit_distance import batch_edit_distance
+
floatX = theano.config.floatX
-n_epochs = 200
+n_epochs = 10000
num_input_classes = 5
h_dim = 20
rec_dim = 20
@@ -63,6 +65,10 @@ y_hat = tensor.nnet.softmax(
).reshape((y_hat_pre.shape[0], y_hat_pre.shape[1], -1))
y_hat.name = 'y_hat'
+#y_hat = theano.printing.Print('y_hat')(y_hat)
+#y = theano.printing.Print('y')(y)
+#y_mask = theano.printing.Print('y_mask')(y_mask)
+
y_hat_mask = input_mask
# Cost
@@ -71,6 +77,10 @@ cost = CTC().apply(y, y_hat, y_len, y_hat_mask).mean()
cost.name = 'CTC'
dl, dl_length = CTC().best_path_decoding(y_hat, y_hat_mask)
+
+edit_distance = batch_edit_distance(dl.T.astype('int32'), dl_length, y.T.astype('int32'), y_len.astype('int32')).mean()
+edit_distance.name = 'edit_distance'
+
L = y.shape[0]
B = y.shape[1]
dl = dl[:L, :]
@@ -80,6 +90,7 @@ is_error = tensor.switch(is_error.sum(axis=0), tensor.ones((B,)), tensor.neq(y_l
error_rate = is_error.mean()
error_rate.name = 'error_rate'
+
# Initialization
for brick in [input_to_h, pre_lstm, lstm, rec_to_o]:
brick.weights_init = IsotropicGaussian(0.01)
@@ -87,23 +98,23 @@ for brick in [input_to_h, pre_lstm, lstm, rec_to_o]:
brick.initialize()
print('Bulding DataStream ...')
-ds, stream = setup_datastream(batch_size=10,
- nb_examples=1000, rng_seed=123,
- min_out_len=10, max_out_len=20)
-valid_ds, valid_stream = setup_datastream(batch_size=10,
+ds, stream = setup_datastream(batch_size=100,
+ nb_examples=10000, rng_seed=123,
+ min_out_len=5, max_out_len=10)
+valid_ds, valid_stream = setup_datastream(batch_size=100,
nb_examples=1000, rng_seed=456,
- min_out_len=10, max_out_len=20)
+ min_out_len=5, max_out_len=10)
print('Bulding training process...')
algorithm = GradientDescent(cost=cost,
parameters=ComputationGraph(cost).parameters,
- step_rule=CompositeRule([StepClipping(10.0),
- Scale(0.02)]))
+ step_rule=CompositeRule([RemoveNotFinite(), AdaDelta()]))
+ # CompositeRule([StepClipping(10.0), Scale(0.02)]))
monitor_cost = TrainingDataMonitoring([cost, error_rate],
prefix="train",
after_epoch=True)
-monitor_valid = DataStreamMonitoring([cost, error_rate],
+monitor_valid = DataStreamMonitoring([cost, error_rate, edit_distance],
data_stream=valid_stream,
prefix="valid",
after_epoch=True)