-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathRNN_predict_next_char.py
More file actions
211 lines (169 loc) · 8.19 KB
/
RNN_predict_next_char.py
File metadata and controls
211 lines (169 loc) · 8.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
"""
Predicting next character with RNN using Tensorflow.
Suppose we decide to predict the 11'th character using the last 10 characters.
Say we have a short story excerpt:
"before the midnight..."
The tensor fed into tensorflow's rnn should be shaped like this [some nr of rows, 10]:
[
[b, e, f, o, r, e, , t, h, e],
[e, f, o, r, e, , t, h, e, ],
[f, o, r, e, , t, h, e, , m],
[o, r, e, , t, h, e, , m, i],
[r, e, , t, h, e, , m, i, d], ... ]
The output should be like this (the character we want to predict):
[
[ ],
[m],
[i],
[d],
[n], ... ]
Roughly speaking, rnn does this process: output(t) = input_column(t) * w1 + hidden_state(t-1) * w2.
So, before returning the final output (i.e. the 11th character) the net will repeat this process 10 times.
Of course, firstly, characters must be encoded into some kind of numerical representation.
After feeding this data into rnn, tensorflow does all the work. The rest is self-explanatory (well, kind of).
Helpful source: https://medium.com/towards-data-science/lstm-by-example-using-tensorflow-feb0c1968537
"""
import numpy as np
import tensorflow as tf
import pylab as pl
from urllib.request import urlopen
def download_data(
url='https://ia601603.us.archive.org/3/items/CamusAlbertTheStranger/CamusAlbert-TheStranger_djvu.txt'):
# Short story e.g.: http://textfiles.com/stories/aircon.txt
# Longer story e.g.: https://ia601603.us.archive.org/3/items/CamusAlbertTheStranger/CamusAlbert-TheStranger_djvu.txt
text = str(urlopen(url).read())
text = text.replace('\\r', '') \
.replace('\\n', '') \
.replace('\\\'', '') \
.replace('\\xe2\\x99\\xa6', '') \
.replace('\\xe2\\x80\\x94', '')
return text
def get_dicts(text):
"""
Returns a tuple of three objects:
dictionary is a dictionary that contains all unique characters in given text (keys) and their ids (values)
reverse_dictionary is a dictionary that contains character's ids (as keys) and characters (as values)
chars is text converted into a list, where each element is single character.
"""
chars = list(text) # splits strings into chars and puts it into a list
# chars = ''.join(char for char in text).split() # splits string into swords and stores it in a list
dictionary, reverse_dictionary = {}, {}
for id, char in enumerate(set(chars)):
dictionary[char] = id
reverse_dictionary[id] = char
return dictionary, reverse_dictionary, chars
def get_data(chars, dictionary, time_steps):
"""
Returns data ready to be fed into neural net:
x_data contains all sequences of characters (not chars, but their ids!). Single row corresponds to single sequence.
y_data contains the id of next character in a sequence
"""
x_data = np.zeros(shape=(len(chars) - time_steps, time_steps))
y_data = np.zeros(shape=(len(chars) - time_steps, len(set(chars))))
for data_point, sequence_end in zip(range(len(chars)), range(time_steps, len(chars))):
x_data[data_point] = [dictionary[char] for char in chars[data_point:sequence_end]]
y_data[data_point, dictionary[chars[sequence_end]]] = 1
return x_data, y_data
def forward_prop(x, w, n_hidden, drop):
"""
RNN with tanh activation in hidden layers and softmax activation in the last layer.
Number of elements in n_hidden correspond to layers, each number corresponds to number of neurons in a layer.
tf.contrib.rnn.static_rnn create weights and biases automatically, so there is no need to initiate it manually
to follow things up, you can check all the tf variables by tf.get_collection('variables')
"""
# split the data to time_steps columns, to recure one column by another
x_split = tf.split(x, time_steps, 1)
# stack lstm cells, a cell per hidden layer
stacked_lstm_cells = [] # a list of lstm cells to be inputed into MultiRNNCell
for layer_size in n_hidden:
stacked_lstm_cells.append(tf.contrib.rnn.BasicLSTMCell(layer_size, activation=tf.nn.tanh))
# create the net and add dropout
lstm_cell = tf.contrib.rnn.MultiRNNCell(stacked_lstm_cells)
lstm_cell_with_dropout = tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=drop[0])
# forwawrd propagate
outputs, state = tf.contrib.rnn.static_rnn(lstm_cell_with_dropout, x_split, dtype=tf.float32)
logits = tf.matmul(outputs[-1], w) # logits are used for cross entropy
output = tf.nn.softmax(logits)
return logits, output
def get_mini_batch(x, y, batch_size):
data_point = np.random.randint(0, len(x), size=batch_size)
x_batch = x[data_point]
y_batch = y[data_point]
return x_batch, y_batch
def generate_new_text(txt, print_length, new_line, dictionary, reverse_dictionary):
"""
Generates text by predicting next character.
Function arguments:
txt - some text to start with. Must be time_steps in length.
print_length - the length of predicted text in characters
new_line - after that many characters it prints in a new line
"""
# prepare text to feed it into net
txt_sample = list(txt) # do this if trying to predict next char
# txt_sample = ''.join(word for word in txt).split() # do this if trying to predict next word
x_data_sample = np.zeros(shape=(1, time_steps))
for id, char in zip(range(len(txt_sample)), txt_sample):
x_data_sample[:, id] = dictionary[char]
# print the text given as argument
print(txt, end='')
# predict next char, print, use predicted char to predict next and so on
txt_length = 1
for _ in range(print_length):
next_char_id = np.random.choice(74, p=sess.run([y_], feed_dict={x: x_data_sample, drop: [1.0]})[0].ravel())
next_char = reverse_dictionary[next_char_id]
x_data_sample = np.delete(x_data_sample, 0, axis=1)
x_data_sample = np.insert(x_data_sample, len(x_data_sample[0]), next_char_id, axis=1)
if txt_length % new_line != 0: # same line
print(next_char, end='')
else: # new line
print(next_char)
txt_length += 1
# hyper-parameters
n_hidden = [74, 126] # neurons in a layers, each element corresponds to new hidden layer
batch_size = 250
time_steps = 40 # size of sequence of chars
learning_rate = 1e-3
dropout = 0.9
# download and prepare data, initiate weights
text = download_data()
dictionary, reverse_dictionary, chars = get_dicts(text)
x_data, y_data = get_data(chars, dictionary, time_steps)
# initiate tf placeholders
x = tf.placeholder(tf.float32, [None, time_steps])
y = tf.placeholder(tf.float32, [None, len(dictionary)])
drop = tf.placeholder(tf.float32, [1])
# create other tf objects
w = tf.Variable(tf.random_normal([n_hidden[-1], len(dictionary)]), dtype=tf.float32) # last layer weights
logits, y_ = forward_prop(x, w, n_hidden, drop)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y_, axis=1), tf.argmax(y, axis=1)), tf.float32))
saver = tf.train.Saver()
# initiate tf session
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
# initiate new training session
cost_hist = []
iter = 0
# training loop
while True:
iter += 1
# get mini batch and train
x_batch, y_batch = get_mini_batch(x_data, y_data, batch_size)
_ = sess.run([optimizer], feed_dict={x: x_batch, y: y_batch, drop: [dropout]})
# plot and print
if iter % 100 == 0:
cost_, acc = sess.run([cost, accuracy], feed_dict={x: x_batch, y: y_batch, drop: [1.0]})
cost_hist.append(cost_)
pl.cla()
pl.plot(cost_hist)
pl.pause(1e-99)
# generate new text by giving rnn something to start
if iter % 500 == 0 or iter == 1:
starting_txt = 'Ive had the body moved to our little mor'
generate_new_text(txt=starting_txt, print_length=100, new_line=100, dictionary=dictionary,
reverse_dictionary=reverse_dictionary)
# save
# if iter % 500 == 0:
# save_path = saver.save(sess, "model.ckpt")