目录
修改后代码(需要新建几个python文件):
import numpy as np
import tensorflow.compat.v1 as tf
import tensorflow_addons as tfa
import argparse
import datetime
from cnn import CNN
from net_manager import NetManager
from reinforce import Reinforce
from tensorflow.examples.tutorials.mnist import input_data
def parse_args():
desc = "TensorFlow implementation of 'Neural Architecture Search with Reinforcement Learning'"
parser = argparse.ArgumentParser(description=desc)
parser.add_argument('--max_layers', default=2)
args = parser.parse_args()
args.max_layers = int(args.max_layers)
return args
'''
Policy network is a main network for searching optimal architecture
it uses NAS - Neural Architecture Search recurrent network cell.
https://github.com/tensorflow/tensorflow/blob/r1.4/tensorflow/contrib/rnn/python/ops/rnn_cell.py#L1363
Args:
state: current state of required topology
max_layers: maximum number of layers
Returns:
3-D tensor with new state (new topology)
'''
def policy_network(state, max_layers):
with tf.name_scope("policy_network"):
nas_cell = tfa.rnn.NASCell(4 * max_layers)
outputs, state = tf.nn.dynamic_rnn(
nas_cell,
tf.expand_dims(state, -1),
dtype=tf.float32
)
bias = tf.Variable([0.05] * 4 * max_layers)
outputs = tf.nn.bias_add(outputs, bias)
print("outputs: ", outputs, outputs[:, -1:, :],
tf.slice(outputs, [0, 4 * max_layers - 1, 0], [1, 1, 4 * max_layers]))
# return tf.slice(outputs, [0, 4*max_layers-1, 0], [1, 1, 4*max_layers]) # Returned last output of rnn
return outputs[:, -1:, :]
def train(mnist):
global args
sess = tf.Session()
global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 0.1
learning_rate = tf.train.exponential_decay(0.99, global_step,
500, 0.96, staircase=True)
optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate)
reinforce = Reinforce(sess, optimizer, policy_network, args.max_layers, global_step)
net_manager = NetManager(num_input=784,
num_classes=10,
learning_rate=0.001,
mnist=mnist,
bathc_size=100)
MAX_EPISODES = 2500
step = 0
state = np.array([[10.0, 128.0, 1.0, 1.0] * args.max_layers], dtype=np.float32)
pre_acc = 0.0
total_rewards = 0
for i_episode in range(MAX_EPISODES):
action = reinforce.get_action(state)
print("ca:", action)
if all(ai > 0 for ai in action[0][0]):
reward, pre_acc = net_manager.get_reward(action, step, pre_acc)
print("=====>", reward, pre_acc)
else:
reward = -1.0
total_rewards += reward
# In our sample action is equal state
state = action[0]
reinforce.storeRollout(state, reward)
step += 1
ls = reinforce.train_step(1)
log_str = "current time: " + str(datetime.datetime.now().time()) + " episode: " + str(
i_episode) + " loss: " + str(ls) + " last_state: " + str(state) + " last_reward: " + str(reward) + "\n"
log = open("lg3.txt", "a+")
log.write(log_str)
log.close()
print(log_str)
def main():
global args
args = parse_args()
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
train(mnist)
if __name__ == '__main__':
tf.disable_v2_behavior()
main()
import tensorflow.compat.v1 as tf
from cnn import CNN
class NetManager():
def __init__(self, num_input, num_classes, learning_rate, mnist,
max_step_per_action=5500 * 3,
bathc_size=100,
dropout_rate=0.85):
self.num_input = num_input
self.num_classes = num_classes
self.learning_rate = learning_rate
self.mnist = mnist
self.max_step_per_action = max_step_per_action
self.bathc_size = bathc_size
self.dropout_rate = dropout_rate
def get_reward(self, action, step, pre_acc):
action = [action[0][0][x:x + 4] for x in range(0, len(action[0][0]), 4)]
cnn_drop_rate = [c[3] for c in action]
with tf.Graph().as_default() as g:
with g.container('experiment' + str(step)):
model = CNN(self.num_input, self.num_classes, action)
loss_op = tf.reduce_mean(model.loss)
optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
train_op = optimizer.minimize(loss_op)
with tf.Session() as train_sess:
init = tf.global_variables_initializer()
train_sess.run(init)
for step in range(self.max_step_per_action):
batch_x, batch_y = self.mnist.train.next_batch(self.bathc_size)
feed = {model.X: batch_x,
model.Y: batch_y,
model.dropout_keep_prob: self.dropout_rate,
model.cnn_dropout_rates: cnn_drop_rate}
_ = train_sess.run(train_op, feed_dict=feed)
if step % 100 == 0:
# Calculate batch loss and accuracy
loss, acc = train_sess.run(
[loss_op, model.accuracy],
feed_dict={model.X: batch_x,
model.Y: batch_y,
model.dropout_keep_prob: 1.0,
model.cnn_dropout_rates: [1.0] * len(cnn_drop_rate)})
print("Step " + str(step) +
", Minibatch Loss= " + "{:.4f}".format(loss) +
", Current accuracy= " + "{:.3f}".format(acc))
batch_x, batch_y = self.mnist.test.next_batch(10000)
loss, acc = train_sess.run(
[loss_op, model.accuracy],
feed_dict={model.X: batch_x,
model.Y: batch_y,
model.dropout_keep_prob: 1.0,
model.cnn_dropout_rates: [1.0] * len(cnn_drop_rate)})
print("!!!!!!acc:", acc, pre_acc)
if acc - pre_acc <= 0.01:
return acc, acc
else:
return 0.01, acc
import tensorflow.compat.v1 as tf
class CNN():
def __init__(self, num_input, num_classes, cnn_config):
cnn = [c[0] for c in cnn_config]
cnn_num_filters = [c[1] for c in cnn_config]
max_pool_ksize = [c[2] for c in cnn_config]
self.X = tf.placeholder(tf.float32,
[None, num_input],
name="input_X")
self.Y = tf.placeholder(tf.int32, [None, num_classes], name="input_Y")
self.dropout_keep_prob = tf.placeholder(tf.float32, [], name="dense_dropout_keep_prob")
self.cnn_dropout_rates = tf.placeholder(tf.float32, [len(cnn), ], name="cnn_dropout_keep_prob")
Y = self.Y
X = tf.expand_dims(self.X, -1)
pool_out = X
with tf.name_scope("Conv_part"):
for idd, filter_size in enumerate(cnn):
with tf.name_scope("L" + str(idd)):
conv_out = tf.layers.conv1d(
pool_out,
filters=cnn_num_filters[idd],
kernel_size=(int(filter_size)),
strides=1,
padding="SAME",
name="conv_out_" + str(idd),
activation=tf.nn.relu,
kernel_initializer=tf.initializers.glorot_normal(),
bias_initializer=tf.zeros_initializer
)
pool_out = tf.layers.max_pooling1d(
conv_out,
pool_size=(int(max_pool_ksize[idd])),
strides=1,
padding='SAME',
name="max_pool_" + str(idd)
)
pool_out = tf.nn.dropout(pool_out, self.cnn_dropout_rates[idd])
flatten_pred_out = tf.layers.flatten(pool_out)
self.logits = tf.layers.dense(flatten_pred_out, num_classes)
self.prediction = tf.nn.softmax(self.logits, name="prediction")
self.loss = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=Y, name="loss")
correct_pred = tf.equal(tf.argmax(self.prediction, 1), tf.argmax(Y, 1))
self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name="accuracy")
import tensorflow.compat.v1 as tf
import random
import numpy as np
class Reinforce():
def __init__(self, sess, optimizer, policy_network, max_layers, global_step,
division_rate=100.0,
reg_param=0.001,
discount_factor=0.99,
exploration=0.3):
self.sess = sess
self.optimizer = optimizer
self.policy_network = policy_network
self.division_rate = division_rate
self.reg_param = reg_param
self.discount_factor = discount_factor
self.max_layers = max_layers
self.global_step = global_step
self.reward_buffer = []
self.state_buffer = []
self.create_variables()
var_lists = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
self.sess.run(tf.variables_initializer(var_lists))
def get_action(self, state):
return self.sess.run(self.predicted_action, {self.states: state})
if random.random() < self.exploration:
return np.array([[random.sample(range(1, 35), 4 * self.max_layers)]])
else:
return self.sess.run(self.predicted_action, {self.states: state})
def create_variables(self):
with tf.name_scope("model_inputs"):
# raw state representation
tf.disable_eager_execution()
self.states = tf.placeholder(tf.float32, [None, self.max_layers * 4], name="states")
with tf.name_scope("predict_actions"):
# initialize policy network
with tf.variable_scope("policy_network"):
self.policy_outputs = self.policy_network(self.states, self.max_layers)
self.action_scores = tf.identity(self.policy_outputs, name="action_scores")
self.predicted_action = tf.cast(tf.scalar_mul(self.division_rate, self.action_scores), tf.int32,
name="predicted_action")
# regularization loss
policy_network_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="policy_network")
# compute loss and gradients
with tf.name_scope("compute_gradients"):
# gradients for selecting action from policy network
self.discounted_rewards = tf.placeholder(tf.float32, (None,), name="discounted_rewards")
with tf.variable_scope("policy_network", reuse=True):
self.logprobs = self.policy_network(self.states, self.max_layers)
print("self.logprobs", self.logprobs)
# compute policy loss and regularization loss
self.cross_entropy_loss = tf.nn.softmax_cross_entropy_with_logits(logits=self.logprobs[:, -1, :],
labels=self.states)
self.pg_loss = tf.reduce_mean(self.cross_entropy_loss)
self.reg_loss = tf.reduce_sum(
[tf.reduce_sum(tf.square(x)) for x in policy_network_variables]) # Regularization
self.loss = self.pg_loss + self.reg_param * self.reg_loss
# compute gradients
self.gradients = self.optimizer.compute_gradients(self.loss)
# compute policy gradients
for i, (grad, var) in enumerate(self.gradients):
if grad is not None:
self.gradients[i] = (grad * self.discounted_rewards, var)
# training update
with tf.name_scope("train_policy_network"):
# apply gradients to update policy network
self.train_op = self.optimizer.apply_gradients(self.gradients, global_step=self.global_step)
def storeRollout(self, state, reward):
self.reward_buffer.append(reward)
self.state_buffer.append(state[0])
def train_step(self, steps_count):
states = np.array(self.state_buffer[-steps_count:]) / self.division_rate
rewars = self.reward_buffer[-steps_count:]
_, ls = self.sess.run([self.train_op, self.loss],
{self.states: states,
self.discounted_rewards: rewars})
return ls
代码地址:neural-architecture-search-master
修改后代码:
import numpy as np
import csv
import tensorflow as tf
from keras import backend as K
from keras.datasets import cifar10
from keras.utils import to_categorical
from controller import Controller, StateSpace
from manager import NetworkManager
from model import model_fn
tf.compat.v1.disable_eager_execution()
tf.get_logger().setLevel('ERROR')
# create a shared session between Keras and Tensorflow
policy_sess = tf.compat.v1.Session()
tf.compat.v1.keras.backend.set_session(policy_sess)
NUM_LAYERS = 4 # number of layers of the state space
MAX_TRIALS = 250 # maximum number of models generated
MAX_EPOCHS = 10 # maximum number of epochs to train
CHILD_BATCHSIZE = 128 # batchsize of the child models
EXPLORATION = 0.8 # high exploration for the first 1000 steps
REGULARIZATION = 1e-3 # regularization strength
CONTROLLER_CELLS = 32 # number of cells in RNN controller
EMBEDDING_DIM = 20 # dimension of the embeddings for each state
ACCURACY_BETA = 0.8 # beta value for the moving average of the accuracy
CLIP_REWARDS = 0.0 # clip rewards in the [-0.05, 0.05] range
RESTORE_CONTROLLER = True # restore controller to continue training
# construct a state space
state_space = StateSpace()
# add states
state_space.add_state(name='kernel', values=[1, 3])
state_space.add_state(name='filters', values=[16, 32, 64])
# print the state space being searched
state_space.print_state_space()
# prepare the training data for the NetworkManager
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)
dataset = [x_train, y_train, x_test, y_test] # pack the dataset for the NetworkManager
previous_acc = 0.0
total_reward = 0.0
with policy_sess.as_default():
# create the Controller and build the internal policy network
controller = Controller(policy_sess, NUM_LAYERS, state_space,
reg_param=REGULARIZATION,
exploration=EXPLORATION,
controller_cells=CONTROLLER_CELLS,
embedding_dim=EMBEDDING_DIM,
restore_controller=RESTORE_CONTROLLER)
# create the Network Manager
manager = NetworkManager(dataset, epochs=MAX_EPOCHS, child_batchsize=CHILD_BATCHSIZE, clip_rewards=CLIP_REWARDS,
acc_beta=ACCURACY_BETA)
# get an initial random state space if controller needs to predict an
# action from the initial state
state = state_space.get_random_state_space(NUM_LAYERS)
print("Initial Random State : ", state_space.parse_state_space_list(state))
print()
# clear the previous files
controller.remove_files()
# train for number of trails
for trial in range(MAX_TRIALS):
with policy_sess.as_default():
tf.compat.v1.keras.backend.set_session(policy_sess)
actions = controller.get_action(state) # get an action for the previous state
# print the action probabilities
state_space.print_actions(actions)
print("Predicted actions : ", state_space.parse_state_space_list(actions))
# build a model, train and get reward and accuracy from the network manager
reward, previous_acc = manager.get_rewards(model_fn, state_space.parse_state_space_list(actions))
print("Rewards : ", reward, "Accuracy : ", previous_acc)
with policy_sess.as_default():
tf.compat.v1.keras.backend.set_session(policy_sess)
total_reward += reward
print("Total reward : ", total_reward)
# actions and states are equivalent, save the state and reward
state = actions
controller.store_rollout(state, reward)
# train the controller on the saved state and the discounted rewards
loss = controller.train_step()
print("Trial %d: Controller loss : %0.6f" % (trial + 1, loss))
# write the results of this trial into a file
with open('train_history.csv', mode='a+') as f:
data = [previous_acc, reward]
data.extend(state_space.parse_state_space_list(state))
writer = csv.writer(f)
writer.writerow(data)
print()
print("Total Reward : ", total_reward)
import numpy as np
import time
import pprint
from collections import OrderedDict
from keras import backend as K
import tensorflow as tf
import os
if not os.path.exists('weights/'):
os.makedirs('weights/')
tf.compat.v1.disable_eager_execution()
class StateSpace:
'''
State Space manager
Provides utilit functions for holding "states" / "actions" that the controller
must use to train and predict.
Also provides a more convenient way to define the search space
'''
def __init__(self):
self.states = OrderedDict()
self.state_count_ = 0
def add_state(self, name, values):
'''
Adds a "state" to the state manager, along with some metadata for efficient
packing and unpacking of information required by the RNN Controller.
Stores metadata such as:
- Global ID
- Name
- Valid Values
- Number of valid values possible
- Map from value ID to state value
- Map from state value to value ID
Args:
name: name of the state / action
values: valid values that this state can take
Returns:
Global ID of the state. Can be used to refer to this state later.
'''
index_map = {}
for i, val in enumerate(values):
index_map[i] = val
value_map = {}
for i, val in enumerate(values):
value_map[val] = i
metadata = {
'id': self.state_count_,
'name': name,
'values': values,
'size': len(values),
'index_map_': index_map,
'value_map_': value_map,
}
self.states[self.state_count_] = metadata
self.state_count_ += 1
return self.state_count_ - 1
def embedding_encode(self, id, value):
'''
Embedding index encode the specific state value
Args:
id: global id of the state
value: state value
Returns:
embedding encoded representation of the state value
'''
state = self[id]
size = state['size']
value_map = state['value_map_']
value_idx = value_map[value]
one_hot = np.zeros((1, size), dtype=np.float32)
one_hot[np.arange(1), value_idx] = value_idx + 1
return one_hot
def get_state_value(self, id, index):
'''
Retrieves the state value from the state value ID
Args:
id: global id of the state
index: index of the state value (usually from argmax)
Returns:
The actual state value at given value index
'''
state = self[id]
index_map = state['index_map_']
if (type(index) == list or type(index) == np.ndarray) and len(index) == 1:
index = index[0]
value = index_map[index]
return value
def get_random_state_space(self, num_layers):
'''
Constructs a random initial state space for feeding as an initial value
to the Controller RNN
Args:
num_layers: number of layers to duplicate the search space
Returns:
A list of one hot encoded states
'''
states = []
for id in range(self.size * num_layers):
state = self[id]
size = state['size']
sample = np.random.choice(size, size=1)
sample = state['index_map_'][sample[0]]
state = self.embedding_encode(id, sample)
states.append(state)
return states
def parse_state_space_list(self, state_list):
'''
Parses a list of one hot encoded states to retrieve a list of state values
Args:
state_list: list of one hot encoded states
Returns:
list of state values
'''
state_values = []
for id, state_one_hot in enumerate(state_list):
state_val_idx = np.argmax(state_one_hot, axis=-1)[0]
value = self.get_state_value(id, state_val_idx)
state_values.append(value)
return state_values
def print_state_space(self):
''' Pretty print the state space '''
print('*' * 40, 'STATE SPACE', '*' * 40)
pp = pprint.PrettyPrinter(indent=2, width=100)
for id, state in self.states.items():
pp.pprint(state)
print()
def print_actions(self, actions):
''' Print the action space properly '''
print('Actions :')
for id, action in enumerate(actions):
if id % self.size == 0:
print("*" * 20, "Layer %d" % (((id + 1) // self.size) + 1), "*" * 20)
state = self[id]
name = state['name']
vals = [(n, p) for n, p in zip(state['values'], *action)]
print("%s : " % name, vals)
print()
def __getitem__(self, id):
return self.states[id % self.size]
@property
def size(self):
return self.state_count_
class Controller:
'''
Utility class to manage the RNN Controller
'''
def __init__(self, policy_session, num_layers, state_space,
reg_param=0.001,
discount_factor=0.99,
exploration=0.8,
controller_cells=32,
embedding_dim=20,
clip_norm=0.0,
restore_controller=False):
self.policy_session = policy_session # type: tf.Session
self.num_layers = num_layers
self.state_space = state_space # type: StateSpace
self.state_size = self.state_space.size
self.controller_cells = controller_cells
self.embedding_dim = embedding_dim
self.reg_strength = reg_param
self.discount_factor = discount_factor
self.exploration = exploration
self.restore_controller = restore_controller
self.clip_norm = clip_norm
self.reward_buffer = []
self.state_buffer = []
self.cell_outputs = []
self.policy_classifiers = []
self.policy_actions = []
self.policy_labels = []
self.build_policy_network()
def get_action(self, state):
'''
Gets a one hot encoded action list, either from random sampling or from
the Controller RNN
Args:
state: a list of one hot encoded states, whose first value is used as initial
state for the controller RNN
Returns:
A one hot encoded action list
'''
if np.random.random() < self.exploration:
print("Generating random action to explore")
actions = []
for i in range(self.state_size * self.num_layers):
state_ = self.state_space[i]
size = state_['size']
sample = np.random.choice(size, size=1)
sample = state_['index_map_'][sample[0]]
action = self.state_space.embedding_encode(i, sample)
actions.append(action)
return actions
else:
print("Prediction action from Controller")
initial_state = self.state_space[0]
size = initial_state['size']
if state[0].shape != (1, size):
state = state[0].reshape((1, size)).astype('int32')
else:
state = state[0]
print("State input to Controller for Action : ", state.flatten())
with self.policy_session.as_default():
tf.compat.v1.keras.backend.set_session(self.policy_session)
with tf.name_scope('action_prediction'):
pred_actions = self.policy_session.run(self.policy_actions, feed_dict={self.state_input: state})
return pred_actions
def build_policy_network(self):
with self.policy_session.as_default():
tf.compat.v1.keras.backend.set_session(self.policy_session)
with tf.name_scope('controller'):
with tf.compat.v1.variable_scope('policy_network'):
# state input is the first input fed into the controller RNN.
# the rest of the inputs are fed to the RNN internally
with tf.name_scope('state_input'):
state_input = tf.compat.v1.placeholder(dtype=tf.int32, shape=(1, None), name='state_input')
self.state_input = state_input
# we can use LSTM as the controller as well
nas_cell = tf.compat.v1.nn.rnn_cell.LSTMCell(self.controller_cells)
cell_state = nas_cell.zero_state(batch_size=1, dtype=tf.float32)
embedding_weights = []
# for each possible state, create a new embedding. Reuse the weights for multiple layers.
with tf.compat.v1.variable_scope('embeddings', reuse=tf.compat.v1.AUTO_REUSE):
for i in range(self.state_size):
state_ = self.state_space[i]
size = state_['size']
# size + 1 is used so that 0th index is never updated and is "default" value
weights = tf.compat.v1.get_variable('state_embeddings_%d' % i,
shape=[size + 1, self.embedding_dim],
initializer=tf.compat.v1.initializers.random_uniform(
-1., 1.))
embedding_weights.append(weights)
# initially, cell input will be 1st state input
embeddings = tf.nn.embedding_lookup(embedding_weights[0], state_input)
cell_input = embeddings
# we provide a flat list of chained input-output to the RNN
for i in range(self.state_size * self.num_layers):
state_id = i % self.state_size
state_space = self.state_space[i]
size = state_space['size']
with tf.name_scope('controller_output_%d' % i):
# feed the ith layer input (i-1 layer output) to the RNN
outputs, final_state = tf.compat.v1.nn.dynamic_rnn(nas_cell,
cell_input,
initial_state=cell_state,
dtype=tf.float32)
# add a new classifier for each layers output
classifier = tf.compat.v1.layers.dense(outputs[:, -1, :], units=size,
name='classifier_%d' % (i),
reuse=False)
preds = tf.nn.softmax(classifier)
# feed the previous layer (i-1 layer output) to the next layers input, along with state
# take the class label
cell_input = tf.argmax(preds, axis=-1)
cell_input = tf.expand_dims(cell_input, -1, name='pred_output_%d' % (i))
cell_input = tf.cast(cell_input, tf.int32)
cell_input = tf.add(cell_input,
1) # we avoid using 0 so as to have a "default" embedding at 0th index
# embedding lookup of this state using its state weights ; reuse weights
cell_input = tf.nn.embedding_lookup(embedding_weights[state_id], cell_input,
name='cell_output_%d' % (i))
cell_state = final_state
# store the tensors for later loss computation
self.cell_outputs.append(cell_input)
self.policy_classifiers.append(classifier)
self.policy_actions.append(preds)
policy_net_variables = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES,
scope='policy_network')
with tf.name_scope('optimizer'):
self.global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 0.1
learning_rate = tf.compat.v1.train.exponential_decay(starter_learning_rate, self.global_step,
500, 0.95, staircase=True)
tf.summary.scalar('learning_rate', learning_rate)
self.optimizer = tf.compat.v1.train.RMSPropOptimizer(learning_rate=learning_rate)
with tf.name_scope('losses'):
self.discounted_rewards = tf.compat.v1.placeholder(tf.float32, shape=(None,), name='discounted_rewards')
tf.summary.scalar('discounted_reward', tf.reduce_sum(self.discounted_rewards))
# calculate sum of all the individual classifiers
cross_entropy_loss = 0
for i in range(self.state_size * self.num_layers):
classifier = self.policy_classifiers[i]
state_space = self.state_space[i]
size = state_space['size']
with tf.name_scope('state_%d' % (i + 1)):
labels = tf.compat.v1.placeholder(dtype=tf.float32, shape=(None, size),
name='cell_label_%d' % i)
self.policy_labels.append(labels)
ce_loss = tf.compat.v1.nn.softmax_cross_entropy_with_logits_v2(logits=classifier, labels=labels)
tf.summary.scalar('state_%d_ce_loss' % (i + 1), tf.reduce_mean(ce_loss))
cross_entropy_loss += ce_loss
policy_gradient_loss = tf.reduce_mean(cross_entropy_loss)
reg_loss = tf.reduce_sum([tf.reduce_sum(tf.square(x)) for x in policy_net_variables]) # Regularization
# sum up policy gradient and regularization loss
self.total_loss = policy_gradient_loss + self.reg_strength * reg_loss
tf.summary.scalar('total_loss', self.total_loss)
self.gradients = self.optimizer.compute_gradients(self.total_loss)
with tf.name_scope('policy_gradients'):
# normalize gradients so that they dont explode if argument passed
if self.clip_norm is not None and self.clip_norm != 0.0:
norm = tf.constant(self.clip_norm, dtype=tf.float32)
gradients, vars = zip(*self.gradients) # unpack the two lists of gradients and the variables
gradients, _ = tf.clip_by_global_norm(gradients, norm) # clip by the norm
self.gradients = list(zip(gradients, vars)) # we need to set values later, convert to list
# compute policy gradients
for i, (grad, var) in enumerate(self.gradients):
if grad is not None:
self.gradients[i] = (grad * self.discounted_rewards, var)
# training update
with tf.name_scope("train_policy_network"):
# apply gradients to update policy network
self.train_op = self.optimizer.apply_gradients(self.gradients, global_step=self.global_step)
self.summaries_op = tf.compat.v1.summary.merge_all()
timestr = time.strftime("%Y-%m-%d-%H-%M-%S")
filename = 'logs/%s' % timestr
self.summary_writer = tf.compat.v1.summary.FileWriter(filename, graph=self.policy_session.graph)
self.policy_session.run(tf.compat.v1.global_variables_initializer())
self.saver = tf.compat.v1.train.Saver(max_to_keep=1)
if self.restore_controller:
path = tf.train.latest_checkpoint('weights/')
if path is not None and tf.compat.v1.train.checkpoint_exists(path):
print("Loading Controller Checkpoint !")
self.saver.restore(self.policy_session, path)
def store_rollout(self, state, reward):
self.reward_buffer.append(reward)
self.state_buffer.append(state)
# dump buffers to file if it grows larger than 50 items
if len(self.reward_buffer) > 20:
with open('buffers.txt', mode='a+') as f:
for i in range(20):
state_ = self.state_buffer[i]
state_list = self.state_space.parse_state_space_list(state_)
state_list = ','.join(str(v) for v in state_list)
f.write("%0.4f,%s\n" % (self.reward_buffer[i], state_list))
print("Saved buffers to file `buffers.txt` !")
self.reward_buffer = [self.reward_buffer[-1]]
self.state_buffer = [self.state_buffer[-1]]
def discount_rewards(self):
'''
Compute discounted rewards over the entire reward buffer
Returns:
Discounted reward value
'''
rewards = np.asarray(self.reward_buffer)
discounted_rewards = np.zeros_like(rewards)
running_add = 0
for t in reversed(range(0, rewards.size)):
if rewards[t] != 0:
running_add = 0
running_add = running_add * self.discount_factor + rewards[t]
discounted_rewards[t] = running_add
return discounted_rewards[-1]
def train_step(self):
'''
Perform a single train step on the Controller RNN
Returns:
the training loss
'''
states = self.state_buffer[-1]
label_list = []
# parse the state space to get real value of the states,
# then one hot encode them for comparison with the predictions
state_list = self.state_space.parse_state_space_list(states)
for id, state_value in enumerate(state_list):
state_one_hot = self.state_space.embedding_encode(id, state_value)
label_list.append(state_one_hot)
# the initial input to the controller RNN
state_input_size = self.state_space[0]['size']
state_input = states[0].reshape((1, state_input_size)).astype('int32')
print("State input to Controller for training : ", state_input.flatten())
# the discounted reward value
reward = self.discount_rewards()
reward = np.asarray([reward]).astype('float32')
feed_dict = {
self.state_input: state_input,
self.discounted_rewards: reward
}
# prepare the feed dict with the values of all the policy labels for each
# of the Controller outputs
for i, label in enumerate(label_list):
feed_dict[self.policy_labels[i]] = label
with self.policy_session.as_default():
tf.compat.v1.keras.backend.set_session(self.policy_session)
print("Training RNN (States ip) : ", state_list)
print("Training RNN (Reward ip) : ", reward.flatten())
_, loss, global_step = self.policy_session.run([self.train_op, self.total_loss,
self.global_step],
feed_dict=feed_dict)
# self.summary_writer.add_summary(self.summaries_op, global_step)
self.saver.save(self.policy_session, save_path='weights/controller.ckpt', global_step=self.global_step)
# reduce exploration after many train steps
if global_step != 0 and global_step % 20 == 0 and self.exploration > 0.5:
self.exploration *= 0.99
return loss
def remove_files(self):
files = ['train_history.csv', 'buffers.txt']
for file in files:
if os.path.exists(file):
os.remove(file)
from keras.models import Model
from keras.layers import Input, Dense, Conv2D, GlobalAveragePooling2D
# generic model design
def model_fn(actions):
# unpack the actions from the list
kernel_1, filters_1, kernel_2, filters_2, kernel_3, filters_3, kernel_4, filters_4 = actions
ip = Input(shape=(32, 32, 3))
x = Conv2D(filters_1, (kernel_1, kernel_1), strides=(2, 2), padding='same', activation='relu')(ip)
x = Conv2D(filters_2, (kernel_2, kernel_2), strides=(1, 1), padding='same', activation='relu')(x)
x = Conv2D(filters_3, (kernel_3, kernel_3), strides=(2, 2), padding='same', activation='relu')(x)
x = Conv2D(filters_4, (kernel_4, kernel_4), strides=(1, 1), padding='same', activation='relu')(x)
x = GlobalAveragePooling2D()(x)
x = Dense(10, activation='softmax')(x)
model = Model(ip, x)
return model
import numpy as np
from keras.models import Model
from keras import backend as K
from keras.callbacks import ModelCheckpoint
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
tf.get_logger().setLevel('ERROR')
class NetworkManager:
'''
Helper class to manage the generation of subnetwork training given a dataset
'''
def __init__(self, dataset, epochs=5, child_batchsize=128, acc_beta=0.8, clip_rewards=0.0):
'''
Manager which is tasked with creating subnetworks, training them on a dataset, and retrieving
rewards in the term of accuracy, which is passed to the controller RNN.
Args:
dataset: a tuple of 4 arrays (X_train, y_train, X_val, y_val)
epochs: number of epochs to train the subnetworks
child_batchsize: batchsize of training the subnetworks
acc_beta: exponential weight for the accuracy
clip_rewards: float - to clip rewards in [-range, range] to prevent
large weight updates. Use when training is highly unstable.
'''
self.dataset = dataset
self.epochs = epochs
self.batchsize = child_batchsize
self.clip_rewards = clip_rewards
self.beta = acc_beta
self.beta_bias = acc_beta
self.moving_acc = 0.0
def get_rewards(self, model_fn, actions):
'''
Creates a subnetwork given the actions predicted by the controller RNN,
trains it on the provided dataset, and then returns a reward.
Args:
model_fn: a function which accepts one argument, a list of
parsed actions, obtained via an inverse mapping from the
StateSpace.
actions: a list of parsed actions obtained via an inverse mapping
from the StateSpace. It is in a specific order as given below:
Consider 4 states were added to the StateSpace via the `add_state`
method. Then the `actions` array will be of length 4, with the
values of those states in the order that they were added.
If number of layers is greater than one, then the `actions` array
will be of length `4 * number of layers` (in the above scenario).
The index from [0:4] will be for layer 0, from [4:8] for layer 1,
etc for the number of layers.
These action values are for direct use in the construction of models.
Returns:
a reward for training a model with the given actions
'''
with tf.compat.v1.Session(graph=tf.Graph()) as network_sess:
tf.compat.v1.keras.backend.set_session(network_sess)
# generate a submodel given predicted actions
model = model_fn(actions) # type: Model
model.compile('adam', 'categorical_crossentropy', metrics=['accuracy'])
# unpack the dataset
X_train, y_train, X_val, y_val = self.dataset
# train the model using Keras methods
model.fit(X_train, y_train, batch_size=self.batchsize, epochs=self.epochs,
verbose=1, validation_data=(X_val, y_val),
callbacks=[ModelCheckpoint('weights/temp_network.h5',
monitor='val_accuracy', verbose=1,
save_freq="epoch",
save_best_only=True,
save_weights_only=True)])
# load best performance epoch in this training session
model.load_weights('weights/temp_network.h5')
# evaluate the model
loss, acc = model.evaluate(X_val, y_val, batch_size=self.batchsize)
# compute the reward
reward = (acc - self.moving_acc)
# if rewards are clipped, clip them in the range -0.05 to 0.05
if self.clip_rewards:
reward = np.clip(reward, -0.05, 0.05)
# update moving accuracy with bias correction for 1st update
if 0.0 < self.beta < 1.0:
self.moving_acc = self.beta * self.moving_acc + (1 - self.beta) * acc
self.moving_acc = self.moving_acc / (1 - self.beta_bias)
self.beta_bias = 0
reward = np.clip(reward, -0.1, 0.1)
print()
print("Manager: EWA Accuracy = ", self.moving_acc)
# clean up resources and GPU memory
network_sess.close()
return reward, acc
from keras.engine import Layer
from keras import activations
from keras import initializers
from keras import regularizers
from keras import constraints
from keras import backend as K
from keras.layers import RNN
from keras.layers.recurrent import _generate_dropout_mask, _generate_dropout_ones
import warnings
# import tensorflow as tf
# import tensorflow.contrib.rnn as rnn
class NASCell(Layer):
"""Neural Architecture Search (NAS) recurrent network cell.
This implements the recurrent cell from the paper:
https://arxiv.org/abs/1611.01578
Barret Zoph and Quoc V. Le.
"Neural Architecture Search with Reinforcement Learning" Proc. ICLR 2017.
The class uses an optional projection layer.
# Arguments
units: Positive integer, dimensionality of the output space.
projection_units: (optional) Positive integer, The output dimensionality
for the projection matrices. If None, no projection is performed.
activation: Activation function to use
(see [activations](../activations.md)).
If you pass None, no activation is applied
(ie. "linear" activation: `a(x) = x`).
recurrent_activation: Activation function to use
for the recurrent step
(see [activations](../activations.md)).
projection_activation: Activation function to use
for the projection step
(see [activations](../activations.md)).
use_bias: Boolean, whether the layer uses a bias vector.
kernel_initializer: Initializer for the `kernel` weights matrix,
used for the linear transformation of the inputs.
(see [initializers](../initializers.md)).
recurrent_initializer: Initializer for the `recurrent_kernel`
weights matrix,
used for the linear transformation of the recurrent state.
(see [initializers](../initializers.md)).
projection_initializer: Initializer for the `projection_kernel`
weights matrix,
used for the linear transformation of the projection step.
(see [initializers](../initializers.md)).
bias_initializer: Initializer for the bias vector
(see [initializers](../initializers.md)).
unit_forget_bias: Boolean.
If True, add 1 to the bias of the forget gate at initialization.
Setting it to true will also force `bias_initializer="zeros"`.
This is recommended in [Jozefowicz et al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf)
kernel_regularizer: Regularizer function applied to
the `kernel` weights matrix
(see [regularizer](../regularizers.md)).
recurrent_regularizer: Regularizer function applied to
the `recurrent_kernel` weights matrix
(see [regularizer](../regularizers.md)).
projection_regularizer: Regularizer function applied to
the `projection_kernel` weights matrix
(see [regularizer](../regularizers.md)).
bias_regularizer: Regularizer function applied to the bias vector
(see [regularizer](../regularizers.md)).
kernel_constraint: Constraint function applied to
the `kernel` weights matrix
(see [constraints](../constraints.md)).
recurrent_constraint: Constraint function applied to
the `recurrent_kernel` weights matrix
(see [constraints](../constraints.md)).
projection_constraint: Constraint function applied to
the `projection_kernel` weights matrix
(see [constraints](../constraints.md)).
bias_constraint: Constraint function applied to the bias vector
(see [constraints](../constraints.md)).
dropout: Float between 0 and 1.
Fraction of the units to drop for
the linear transformation of the inputs.
recurrent_dropout: Float between 0 and 1.
Fraction of the units to drop for
the linear transformation of the recurrent state.
implementation: Implementation mode, either 1 or 2.
Mode 1 will structure its operations as a larger number of
smaller dot products and additions, whereas mode 2 will
batch them into fewer, larger operations. These modes will
have different performance profiles on different hardware and
for different applications.
"""
def __init__(self, units,
projection_units=None,
activation='tanh',
recurrent_activation='sigmoid',
projection_activation='linear',
use_bias=True,
kernel_initializer='glorot_uniform',
recurrent_initializer='orthogonal',
projection_initializer='glorot_uniform',
bias_initializer='zeros',
unit_forget_bias=False,
kernel_regularizer=None,
recurrent_regularizer=None,
projection_regularizer=None,
bias_regularizer=None,
kernel_constraint=None,
recurrent_constraint=None,
projection_constraint=None,
bias_constraint=None,
dropout=0.,
recurrent_dropout=0.,
implementation=2,
**kwargs):
super(NASCell, self).__init__(**kwargs)
self.units = units
self.projection_units = projection_units
self.activation = activations.get(activation)
self.recurrent_activation = activations.get(recurrent_activation)
self.projection_activation = activations.get(projection_activation)
self.cell_activation = activations.get('relu')
self.use_bias = use_bias
self.kernel_initializer = initializers.get(kernel_initializer)
self.recurrent_initializer = initializers.get(recurrent_initializer)
self.projection_initializer = initializers.get(projection_initializer)
self.bias_initializer = initializers.get(bias_initializer)
self.unit_forget_bias = unit_forget_bias
self.kernel_regularizer = regularizers.get(kernel_regularizer)
self.recurrent_regularizer = regularizers.get(recurrent_regularizer)
self.projection_regularizer = regularizers.get(projection_regularizer)
self.bias_regularizer = regularizers.get(bias_regularizer)
self.kernel_constraint = constraints.get(kernel_constraint)
self.recurrent_constraint = constraints.get(recurrent_constraint)
self.projection_constraint = constraints.get(projection_constraint)
self.bias_constraint = constraints.get(bias_constraint)
self.dropout = min(1., max(0., dropout))
self.recurrent_dropout = min(1., max(0., recurrent_dropout))
self.implementation = implementation
if self.projection_units is not None:
self.state_size = (self.projection_units, self.units)
else:
self.state_size = (self.units, self.units)
self._dropout_mask = None
self._recurrent_dropout_mask = None
def build(self, input_shape):
input_dim = input_shape[-1]
if self.projection_units is not None:
recurrent_output_dim = self.projection_units
else:
recurrent_output_dim = self.units
self.kernel = self.add_weight(shape=(input_dim, self.units * 8),
name='kernel',
initializer=self.kernel_initializer,
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint)
self.recurrent_kernel = self.add_weight(
shape=(recurrent_output_dim, self.units * 8),
name='recurrent_kernel',
initializer=self.recurrent_initializer,
regularizer=self.recurrent_regularizer,
constraint=self.recurrent_constraint)
if self.projection_units is not None:
self.projection_kernel = self.add_weight(
shape=(self.units, self.projection_units),
name='projection_kernel',
initializer=self.projection_initializer,
regularizer=self.projection_regularizer,
constraint=self.projection_constraint)
if self.use_bias:
if self.unit_forget_bias:
def bias_initializer(shape, *args, **kwargs):
return K.concatenate([
self.bias_initializer((self.units,), *args, **kwargs),
initializers.Ones()((self.units,), *args, **kwargs),
self.bias_initializer((self.units * 6,), *args, **kwargs),
])
else:
bias_initializer = self.bias_initializer
self.bias = self.add_weight(shape=(self.units * 8,),
name='bias',
initializer=bias_initializer,
regularizer=self.bias_regularizer,
constraint=self.bias_constraint)
else:
self.bias = None
self.kernel_0 = self.kernel[:, :self.units]
self.kernel_1 = self.kernel[:, self.units: self.units * 2]
self.kernel_2 = self.kernel[:, self.units * 2: self.units * 3]
self.kernel_3 = self.kernel[:, self.units * 3: self.units * 4]
self.kernel_4 = self.kernel[:, self.units * 4: self.units * 5]
self.kernel_5 = self.kernel[:, self.units * 5: self.units * 6]
self.kernel_6 = self.kernel[:, self.units * 6: self.units * 7]
self.kernel_7 = self.kernel[:, self.units * 7:]
self.recurrent_kernel_0 = self.recurrent_kernel[:, :self.units]
self.recurrent_kernel_1 = self.recurrent_kernel[:, self.units: self.units * 2]
self.recurrent_kernel_2 = self.recurrent_kernel[:, self.units * 2: self.units * 3]
self.recurrent_kernel_3 = self.recurrent_kernel[:, self.units * 3: self.units * 4]
self.recurrent_kernel_4 = self.recurrent_kernel[:, self.units * 4: self.units * 5]
self.recurrent_kernel_5 = self.recurrent_kernel[:, self.units * 5: self.units * 6]
self.recurrent_kernel_6 = self.recurrent_kernel[:, self.units * 6: self.units * 7]
self.recurrent_kernel_7 = self.recurrent_kernel[:, self.units * 7:]
if self.use_bias:
self.bias_0 = self.bias[:self.units]
self.bias_1 = self.bias[self.units: self.units * 2]
self.bias_2 = self.bias[self.units * 2: self.units * 3]
self.bias_3 = self.bias[self.units * 3: self.units * 4]
self.bias_4 = self.bias[self.units * 4: self.units * 5]
self.bias_5 = self.bias[self.units * 5: self.units * 6]
self.bias_6 = self.bias[self.units * 6: self.units * 7]
self.bias_7 = self.bias[self.units * 7:]
else:
self.bias_0 = None
self.bias_1 = None
self.bias_2 = None
self.bias_3 = None
self.bias_4 = None
self.bias_5 = None
self.bias_6 = None
self.bias_7 = None
self.built = True
def call(self, inputs, states, training=None):
if 0 < self.dropout < 1 and self._dropout_mask is None:
self._dropout_mask = _generate_dropout_mask(
_generate_dropout_ones(inputs, K.shape(inputs)[-1]),
self.dropout,
training=training,
count=8)
if (0 < self.recurrent_dropout < 1 and
self._recurrent_dropout_mask is None):
_recurrent_dropout_mask = _generate_dropout_mask(
_generate_dropout_ones(inputs, self.units),
self.recurrent_dropout,
training=training,
count=8)
self._recurrent_dropout_mask = _recurrent_dropout_mask
# dropout matrices for input units
dp_mask = self._dropout_mask
# dropout matrices for recurrent units
rec_dp_mask = self._recurrent_dropout_mask
h_tm1 = states[0] # previous memory state
c_tm1 = states[1] # previous carry state
if self.implementation == 1:
if 0 < self.dropout < 1.:
inputs_0 = inputs * dp_mask[0]
inputs_1 = inputs * dp_mask[1]
inputs_2 = inputs * dp_mask[2]
inputs_3 = inputs * dp_mask[3]
inputs_4 = inputs * dp_mask[4]
inputs_5 = inputs * dp_mask[5]
inputs_6 = inputs * dp_mask[6]
inputs_7 = inputs * dp_mask[7]
else:
inputs_0 = inputs
inputs_1 = inputs
inputs_2 = inputs
inputs_3 = inputs
inputs_4 = inputs
inputs_5 = inputs
inputs_6 = inputs
inputs_7 = inputs
x_0 = K.dot(inputs_0, self.kernel_0)
x_1 = K.dot(inputs_1, self.kernel_1)
x_2 = K.dot(inputs_2, self.kernel_2)
x_3 = K.dot(inputs_3, self.kernel_3)
x_4 = K.dot(inputs_4, self.kernel_4)
x_5 = K.dot(inputs_5, self.kernel_5)
x_6 = K.dot(inputs_6, self.kernel_6)
x_7 = K.dot(inputs_7, self.kernel_7)
if self.use_bias:
x_0 = K.bias_add(x_0, self.bias_0)
x_1 = K.bias_add(x_1, self.bias_1)
x_2 = K.bias_add(x_2, self.bias_2)
x_3 = K.bias_add(x_3, self.bias_3)
x_4 = K.bias_add(x_4, self.bias_4)
x_5 = K.bias_add(x_5, self.bias_5)
x_6 = K.bias_add(x_6, self.bias_6)
x_7 = K.bias_add(x_7, self.bias_7)
if 0 < self.recurrent_dropout < 1.:
h_tm1_0 = h_tm1 * rec_dp_mask[0]
h_tm1_1 = h_tm1 * rec_dp_mask[1]
h_tm1_2 = h_tm1 * rec_dp_mask[2]
h_tm1_3 = h_tm1 * rec_dp_mask[3]
h_tm1_4 = h_tm1 * rec_dp_mask[4]
h_tm1_5 = h_tm1 * rec_dp_mask[5]
h_tm1_6 = h_tm1 * rec_dp_mask[6]
h_tm1_7 = h_tm1 * rec_dp_mask[7]
else:
h_tm1_0 = h_tm1
h_tm1_1 = h_tm1
h_tm1_2 = h_tm1
h_tm1_3 = h_tm1
h_tm1_4 = h_tm1
h_tm1_5 = h_tm1
h_tm1_6 = h_tm1
h_tm1_7 = h_tm1
# First Layer
layer1_0 = self.recurrent_activation(x_0 + K.dot(h_tm1_0, self.recurrent_kernel_0))
layer1_1 = self.cell_activation(x_1 + K.dot(h_tm1_1, self.recurrent_kernel_1))
layer1_2 = self.recurrent_activation(x_2 + K.dot(h_tm1_2, self.recurrent_kernel_2))
layer1_3 = self.cell_activation(x_3 * K.dot(h_tm1_3, self.recurrent_kernel_3))
layer1_4 = self.activation(x_4 + K.dot(h_tm1_4, self.recurrent_kernel_4))
layer1_5 = self.recurrent_activation(x_5 + K.dot(h_tm1_5, self.recurrent_kernel_5))
layer1_6 = self.activation(x_6 + K.dot(h_tm1_6, self.recurrent_kernel_6))
layer1_7 = self.recurrent_activation(x_7 + K.dot(h_tm1_7, self.recurrent_kernel_7))
# Second Layer
layer2_0 = self.activation(layer1_0 * layer1_1)
layer2_1 = self.activation(layer1_2 + layer1_3)
layer2_2 = self.activation(layer1_4 * layer1_5)
layer2_3 = self.recurrent_activation(layer1_6 + layer1_7)
# Inject the Cell
layer2_0 = self.activation(layer2_0 + c_tm1)
# Third Layer
layer3_0_pre = layer2_0 * layer2_1
c = layer3_0_pre # create a new cell
layer3_0 = layer3_0_pre
layer3_1 = self.activation(layer2_2 + layer2_3)
# Final Layer
h = self.activation(layer3_0 * layer3_1)
if self.projection_units is not None:
h = self.projection_activation(K.dot(h, self.projection_kernel))
else:
if 0. < self.dropout < 1.:
inputs *= dp_mask[0]
z = K.dot(inputs, self.kernel)
if 0. < self.recurrent_dropout < 1.:
h_tm1 *= rec_dp_mask[0]
zr = K.dot(h_tm1, self.recurrent_kernel)
if self.use_bias:
zr = K.bias_add(zr, self.bias)
z0 = z[:, :self.units]
z1 = z[:, self.units: 2 * self.units]
z2 = z[:, 2 * self.units: 3 * self.units]
z3 = z[:, 3 * self.units: 4 * self.units]
z4 = z[:, 4 * self.units: 5 * self.units]
z5 = z[:, 5 * self.units: 6 * self.units]
z6 = z[:, 6 * self.units: 7 * self.units]
z7 = z[:, 7 * self.units:]
zr0 = zr[:, :self.units]
zr1 = zr[:, self.units: 2 * self.units]
zr2 = zr[:, 2 * self.units: 3 * self.units]
zr3 = zr[:, 3 * self.units: 4 * self.units]
zr4 = zr[:, 4 * self.units: 5 * self.units]
zr5 = zr[:, 5 * self.units: 6 * self.units]
zr6 = zr[:, 6 * self.units: 7 * self.units]
zr7 = zr[:, 7 * self.units:]
# First Layer
layer1_0 = self.recurrent_activation(z0 + zr0)
layer1_1 = self.cell_activation(z1 + zr1)
layer1_2 = self.recurrent_activation(z2 + zr2)
layer1_3 = self.cell_activation(z3 * zr3)
layer1_4 = self.activation(z4 + zr4)
layer1_5 = self.recurrent_activation(z5 + zr5)
layer1_6 = self.activation(z6 + zr6)
layer1_7 = self.recurrent_activation(z7 + zr7)
# Second Layer
layer2_0 = self.activation(layer1_0 * layer1_1)
layer2_1 = self.activation(layer1_2 + layer1_3)
layer2_2 = self.activation(layer1_4 * layer1_5)
layer2_3 = self.recurrent_activation(layer1_6 + layer1_7)
# Inject the Cell
layer2_0 = self.activation(layer2_0 + c_tm1)
# Third Layer
layer3_0_pre = layer2_0 * layer2_1
c = layer3_0_pre
layer3_0 = layer3_0_pre
layer3_1 = self.activation(layer2_2 + layer2_3)
# Final Layer
h = self.activation(layer3_0 * layer3_1)
if self.projection_units is not None:
h = self.projection_activation(K.dot(h, self.projection_kernel))
if 0 < self.dropout + self.recurrent_dropout:
if training is None:
h._uses_learning_phase = True
return h, [h, c]
def get_config(self):
config = {'units': self.units,
'projection_units': self.projection_units,
'activation': activations.serialize(self.activation),
'recurrent_activation': activations.serialize(self.recurrent_activation),
'projection_activation': activations.serialize(self.projection_activation),
'use_bias': self.use_bias,
'kernel_initializer': initializers.serialize(self.kernel_initializer),
'recurrent_initializer': initializers.serialize(self.recurrent_initializer),
'projection_initializer': initializers.serialize(self.projection_initializer),
'bias_initializer': initializers.serialize(self.bias_initializer),
'unit_forget_bias': self.unit_forget_bias,
'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),
'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer),
'projection_regularizer': regularizers.serialize(self.projection_regularizer),
'bias_regularizer': regularizers.serialize(self.bias_regularizer),
'kernel_constraint': constraints.serialize(self.kernel_constraint),
'recurrent_constraint': constraints.serialize(self.recurrent_constraint),
'projection_constraint': constraints.serialize(self.projection_constraint),
'bias_constraint': constraints.serialize(self.bias_constraint),
'dropout': self.dropout,
'recurrent_dropout': self.recurrent_dropout,
'implementation': self.implementation}
base_config = super(NASCell, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
class NASRNN(RNN):
"""Neural Architecture Search (NAS) recurrent network cell.
This implements the recurrent cell from the paper:
https://arxiv.org/abs/1611.01578
Barret Zoph and Quoc V. Le.
"Neural Architecture Search with Reinforcement Learning" Proc. ICLR 2017.
The class uses an optional projection layer.
# Arguments
units: Positive integer, dimensionality of the output space.
projection_units: (optional) Positive integer, The output dimensionality
for the projection matrices. If None, no projection is performed.
activation: Activation function to use
(see [activations](../activations.md)).
If you pass None, no activation is applied
(ie. "linear" activation: `a(x) = x`).
recurrent_activation: Activation function to use
for the recurrent step
(see [activations](../activations.md)).
projection_activation: Activation function to use
for the projection step
(see [activations](../activations.md)).
use_bias: Boolean, whether the layer uses a bias vector.
kernel_initializer: Initializer for the `kernel` weights matrix,
used for the linear transformation of the inputs.
(see [initializers](../initializers.md)).
recurrent_initializer: Initializer for the `recurrent_kernel`
weights matrix,
used for the linear transformation of the recurrent state.
(see [initializers](../initializers.md)).
projection_initializer: Initializer for the `projection_kernel`
weights matrix,
used for the linear transformation of the projection step.
(see [initializers](../initializers.md)).
bias_initializer: Initializer for the bias vector
(see [initializers](../initializers.md)).
unit_forget_bias: Boolean.
If True, add 1 to the bias of the forget gate at initialization.
Setting it to true will also force `bias_initializer="zeros"`.
This is recommended in [Jozefowicz et al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf)
kernel_regularizer: Regularizer function applied to
the `kernel` weights matrix
(see [regularizer](../regularizers.md)).
recurrent_regularizer: Regularizer function applied to
the `recurrent_kernel` weights matrix
(see [regularizer](../regularizers.md)).
projection_regularizer: Regularizer function applied to
the `projection_kernel` weights matrix
(see [regularizer](../regularizers.md)).
bias_regularizer: Regularizer function applied to the bias vector
(see [regularizer](../regularizers.md)).
kernel_constraint: Constraint function applied to
the `kernel` weights matrix
(see [constraints](../constraints.md)).
recurrent_constraint: Constraint function applied to
the `recurrent_kernel` weights matrix
(see [constraints](../constraints.md)).
projection_constraint: Constraint function applied to
the `projection_kernel` weights matrix
(see [constraints](../constraints.md)).
bias_constraint: Constraint function applied to the bias vector
(see [constraints](../constraints.md)).
dropout: Float between 0 and 1.
Fraction of the units to drop for
the linear transformation of the inputs.
recurrent_dropout: Float between 0 and 1.
Fraction of the units to drop for
the linear transformation of the recurrent state.
implementation: Implementation mode, either 1 or 2.
Mode 1 will structure its operations as a larger number of
smaller dot products and additions, whereas mode 2 will
batch them into fewer, larger operations. These modes will
have different performance profiles on different hardware and
for different applications.
return_sequences: Boolean. Whether to return the last output.
in the output sequence, or the full sequence.
return_state: Boolean. Whether to return the last state
in addition to the output.
go_backwards: Boolean (default False).
If True, process the input sequence backwards and return the
reversed sequence.
stateful: Boolean (default False). If True, the last state
for each sample at index i in a batch will be used as initial
state for the sample of index i in the following batch.
unroll: Boolean (default False).
If True, the network will be unrolled,
else a symbolic loop will be used.
Unrolling can speed-up a RNN,
although it tends to be more memory-intensive.
Unrolling is only suitable for short sequences.
# References
- [Long short-term memory](http://www.bioinf.jku.at/publications/older/2604.pdf) (original 1997 paper)
- [Learning to forget: Continual prediction with NestedLSTM](http://www.mitpressjournals.org/doi/pdf/10.1162/089976600300015015)
- [Supervised sequence labeling with recurrent neural networks](http://www.cs.toronto.edu/~graves/preprint.pdf)
- [A Theoretically Grounded Application of Dropout in Recurrent Neural Networks](http://arxiv.org/abs/1512.05287)
- [Nested LSTMs](https://arxiv.org/abs/1801.10308)
"""
def __init__(self, units,
projection_units=None,
activation='tanh',
recurrent_activation='sigmoid',
projection_activation='linear',
use_bias=True,
kernel_initializer='glorot_uniform',
recurrent_initializer='orthogonal',
projection_initializer='glorot_uniform',
bias_initializer='zeros',
unit_forget_bias=False,
kernel_regularizer=None,
recurrent_regularizer=None,
projection_regularizer=None,
bias_regularizer=None,
activity_regularizer=None,
kernel_constraint=None,
recurrent_constraint=None,
projection_constraint=None,
bias_constraint=None,
dropout=0.,
recurrent_dropout=0.,
implementation=2,
return_sequences=False,
return_state=False,
go_backwards=False,
stateful=False,
unroll=False,
**kwargs):
if implementation == 0:
warnings.warn('`implementation=0` has been deprecated, '
'and now defaults to `implementation=2`.'
'Please update your layer call.')
if K.backend() == 'theano':
warnings.warn(
'RNN dropout is no longer supported with the Theano backend '
'due to technical limitations. '
'You can either set `dropout` and `recurrent_dropout` to 0, '
'or use the TensorFlow backend.')
dropout = 0.
recurrent_dropout = 0.
cell = NASCell(units, projection_units,
activation=activation,
recurrent_activation=recurrent_activation,
projection_activation=projection_activation,
use_bias=use_bias,
kernel_initializer=kernel_initializer,
recurrent_initializer=recurrent_initializer,
projection_initializer=projection_initializer,
unit_forget_bias=unit_forget_bias,
bias_initializer=bias_initializer,
kernel_regularizer=kernel_regularizer,
recurrent_regularizer=recurrent_regularizer,
bias_regularizer=bias_regularizer,
projection_regularizer=projection_regularizer,
kernel_constraint=kernel_constraint,
recurrent_constraint=recurrent_constraint,
bias_constraint=bias_constraint,
projection_constraint=projection_constraint,
dropout=dropout,
recurrent_dropout=recurrent_dropout,
implementation=implementation)
super(NASRNN, self).__init__(cell,
return_sequences=return_sequences,
return_state=return_state,
go_backwards=go_backwards,
stateful=stateful,
unroll=unroll,
**kwargs)
self.activity_regularizer = regularizers.get(activity_regularizer)
def call(self, inputs, mask=None, training=None, initial_state=None, constants=None):
self.cell._dropout_mask = None
self.cell._recurrent_dropout_mask = None
return super(NASRNN, self).call(inputs,
mask=mask,
training=training,
initial_state=initial_state,
constants=constants)
@property
def units(self):
return self.cell.units
@property
def projection_units(self):
return self.cell.projection_units
@property
def activation(self):
return self.cell.activation
@property
def recurrent_activation(self):
return self.cell.recurrent_activation
@property
def projection_activation(self):
return self.cell.projection_activation
@property
def use_bias(self):
return self.cell.use_bias
@property
def kernel_initializer(self):
return self.cell.kernel_initializer
@property
def recurrent_initializer(self):
return self.cell.recurrent_initializer
@property
def bias_initializer(self):
return self.cell.bias_initializer
@property
def projection_initializer(self):
return self.cell.projection_initializer
@property
def unit_forget_bias(self):
return self.cell.unit_forget_bias
@property
def kernel_regularizer(self):
return self.cell.kernel_regularizer
@property
def recurrent_regularizer(self):
return self.cell.recurrent_regularizer
@property
def bias_regularizer(self):
return self.cell.bias_regularizer
@property
def projection_regularizer(self):
return self.cell.projection_regularizer
@property
def kernel_constraint(self):
return self.cell.kernel_constraint
@property
def recurrent_constraint(self):
return self.cell.recurrent_constraint
@property
def bias_constraint(self):
return self.cell.bias_constraint
@property
def projection_constraint(self):
return self.cell.projection_constraint
@property
def dropout(self):
return self.cell.dropout
@property
def recurrent_dropout(self):
return self.cell.recurrent_dropout
@property
def implementation(self):
return self.cell.implementation
def get_config(self):
config = {'units': self.units,
'projection_units': self.projection_units,
'activation': activations.serialize(self.activation),
'recurrent_activation': activations.serialize(self.recurrent_activation),
'projection_activation': activations.serialize(self.projection_activation),
'use_bias': self.use_bias,
'kernel_initializer': initializers.serialize(self.kernel_initializer),
'recurrent_initializer': initializers.serialize(self.recurrent_initializer),
'bias_initializer': initializers.serialize(self.bias_initializer),
'projection_initializer': initializers.serialize(self.projection_initializer),
'unit_forget_bias': self.unit_forget_bias,
'kernel_regularizer': regularizers.serialize(self.kernel_regularizer),
'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer),
'bias_regularizer': regularizers.serialize(self.bias_regularizer),
'projection_regularizer': regularizers.serialize(self.projection_regularizer),
'activity_regularizer': regularizers.serialize(self.activity_regularizer),
'kernel_constraint': constraints.serialize(self.kernel_constraint),
'recurrent_constraint': constraints.serialize(self.recurrent_constraint),
'bias_constraint': constraints.serialize(self.bias_constraint),
'projection_constraint': constraints.serialize(self.projection_constraint),
'dropout': self.dropout,
'recurrent_dropout': self.recurrent_dropout,
'implementation': self.implementation}
base_config = super(NASRNN, self).get_config()
del base_config['cell']
return dict(list(base_config.items()) + list(config.items()))
@classmethod
def from_config(cls, config):
if 'implementation' in config and config['implementation'] == 0:
config['implementation'] = 2
return cls(**config)