forked from rakeshvar/rnn_ctc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlstm.py
103 lines (88 loc) · 3.44 KB
/
lstm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import theano.tensor as tt
import theano as th
from theano.tensor.nnet import sigmoid
import numpy as np
from activations import activation_by_name
from weights import stacked_ortho_wts, share
class LSTM():
"""
Long Short Term Memory Layer.
Does not implement incell connections from cell value to the gates.
Reference: Supervised Sequence Learning with RNNs by Alex Graves
Chapter 4, Fig 4.2
"""
def __init__(self, inpt,
nin, nunits,
forget=False,
actvn_pre='tanh',
actvn_post='linear',
learn_init_states=True):
"""
Init
:param inpt: Lower layer's excitation.
:param nin: Dimension of lower layer.
:param nunits: Number of units.
:param forget: Want a seperate forget gate (or use 1-input)?
:param actvn_pre: Activation applied to new candidate for cell value.
:param actvn_post: Activation applied to cell value before output.
:param learn_init_states: Should the intial states be learnt?
:return: Output
"""
# TODO: Incell connections
num_activations = 3 + forget
w = stacked_ortho_wts(nin, nunits, num_activations)
u = stacked_ortho_wts(nunits, nunits, num_activations)
b = share(np.zeros(num_activations * nunits))
out0 = share(np.zeros(nunits))
cell0 = share(np.zeros(nunits))
actvn_pre = activation_by_name(actvn_pre)
actvn_post = activation_by_name(actvn_post)
def step(in_t, out_tm1, cell_tm1):
"""
Scan function.
:param in_t: Current input from bottom layer
:param out_tm1: Prev output of LSTM layer
:param cell_tm1: Prev cell value
:return: Current output and cell value
"""
tmp = tt.dot(out_tm1, u) + in_t
inn_gate = sigmoid(tmp[:nunits])
out_gate = sigmoid(tmp[nunits:2 * nunits])
fgt_gate = sigmoid(
tmp[2 * nunits:3 * nunits]) if forget else 1 - inn_gate
cell_val = actvn_pre(tmp[-nunits:])
cell_val = fgt_gate * cell_tm1 + inn_gate * cell_val
out = out_gate * actvn_post(cell_val)
return out, cell_val
inpt = tt.dot(inpt, w) + b
# seqlen x nin * nin x 3*nout + 3 * nout = seqlen x 3*nout
rval, updates = th.scan(step,
sequences=[inpt],
outputs_info=[out0, cell0], )
self.output = rval[0]
self.params = [w, u, b]
if learn_init_states:
self.params += [out0, cell0]
self.nout = nunits
class BDLSTM():
"""
Bidirectional Long Short Term Memory Layer.
"""
def __init__(self, inpt,
nin, nunits,
forget=False,
actvn_pre='tanh',
actvn_post='linear',
learn_init_states=True):
"""
See `LSTM`.
"""
fwd = LSTM(inpt, nin, nunits, forget, actvn_pre, actvn_post,
learn_init_states)
bwd = LSTM(inpt[::-1], nin, nunits, forget, actvn_pre, actvn_post,
learn_init_states)
self.params = fwd.params + bwd.params
self.nout = fwd.nout + bwd.nout
self.output = tt.concatenate([fwd.output,
bwd.output[::-1]],
axis=1)