"""
``nn()`` is used to train an instance of ``globalemu`` on the preprocessed
data in ``base_dir``. All of the parameters for ``nn()`` are kwargs and
a number of them can be left at their default values however you will
need to set the ``base_dir`` and possibly ``epochs`` and ``xHI`` (see below and
the tutorial for details).
"""
import tensorflow as tf
from tensorflow import keras
import numpy as np
import time
import os
from globalemu.models import network_models
from globalemu.losses import loss_functions
[docs]class nn():
r"""
**kwargs:**
batch_size: **int / default: 100**
| The batch size used by ``tensorflow`` when performing training.
Corresponds to the number of samples propagated before the
networks hyperparameters are updated. Keep the value ~100 as
this will help with memory management and training speed.
epochs: **int / default: 10**
| The number of epochs to train the network on. An epoch
corresponds to training on x batches where x is sufficiently
large for every sample to have influenced an update of the
network hyperparameters.
activation: **string / default: 'tanh'**
| The type of activation function used in the neural networks
hidden layers. The activation function effects the way that the
network learns and updates its hyperparameters. The defualt
is a commonly used activation for regression neural networks.
lr: **float / default: 0.001**
| The learning rate acts as a "step size" in the optimization and
its value can effect the quality of the emulation. Typical
values fall in the range 0.001-0.1.
dropout: **float / default: 0**
| The dropout for the neural network training. ``globalemu`` is
designed so that you shouldn't need dropout to prevent
overfitting but we leave it as an option.
input_shape: **int / default: 8**
| The number of input parameters (astrophysical parameters
plus redshift) for the neural network. The default accounts
for 7 astrophysical
parameters and a single redshift input.
output_shape: **int / default: 1**
| The number of ouputs (temperature) from the neural network.
This shouldn't need changing.
layer_sizes: **list / default: [input_shape, input_shape]**
| The number of hidden layers and the number of nodes in each
layer. For example ``layer_sizes=[8, 8]`` will create
two hidden layers both with 8 nodes (this is the default).
base_dir: **string / default: 'model_dir/'**
| This should be the same as the ``base_dir`` used when
preprocessing. It contains the data that the network will
work with and is the directory in which the trained model will
be saved in.
early_stop: **Bool / default: False**
| If ``early_stop`` is set too ``True`` then the network will stop
learning if the loss has not changed within
the last twenty epochs.
xHI: **Bool / default: False**
| If True then ``globalemu`` will act as if it is training a
neutral fraction history emulator.
output_activation: **string / default: 'linear'**
| Determines the output activation function for the network.
Modifying this
is useful if the emulator output is required to be positive or
negative etc. If xHI is True then the output activation is
set to 'relu' else the function is 'linear'. See the tensorflow
documentation for more details on the types of activation
functions available.
loss_function: **Callable/ default: None**
| By default the code uses an MSE loss however users are able to
pass their own loss functions when training the neural
network. These should be functions that take in the true labels
(temperatures) and the predicted labels and return some measure
of loss. Care needs to be taken to ensure that the correct loss
function is supplied when resuming the training of
a previous run as ``globalemu`` will not check this. In order
for the loss function to work it must be built
using the tensorflow.keras backend. An example would be
.. code:: python
from tensorflow.keras import backend as K
def custom_loss(true_labels, predicted_labels,
netowrk_inputs):
return K.mean(K.abs(true_labels - predicted_labels))
The function must take in as arguments the `true_labels`,
the `predicted_labels` and the `network_inputs`.
resume: **Bool / default: False**
| If set to ``True`` then ``globalemu`` will look in the
``base_dir`` for a trained model and ``loss_history.txt``
file (which contains the loss recorded at each epoch) and
load these in to continue training. If ``resume`` is ``True``
then you need to make sure all of the kwargs are set the
with the same values that they had in the initial training
for a consistent run.
There will be a human readable file in ``base_dir`` called
"kwargs.txt" detailing
the values of the kwargs that were provided for the
initial training run. Anything missing from this file will
of had its default value. This file will not be overwritten
if ``resume=True``.
random_seed: **int or float / default: None**
| This kwarg sets the random seed used by tensorflow with the
function ``tf.random.set_seed(random_seed)``. It should
be used if you want to have reproducible results but note
that it may cause an 'out of memory' error if training on
large amounts of data
(see https://github.com/tensorflow/tensorflow/issues/37252).
"""
def __init__(self, **kwargs):
for key, values in kwargs.items():
if key not in set(
['batch_size', 'activation', 'epochs',
'lr', 'dropout', 'input_shape',
'output_shape', 'layer_sizes', 'base_dir',
'early_stop', 'xHI', 'resume',
'random_seed', 'output_activation',
'loss_function']):
raise KeyError("Unexpected keyword argument in nn()")
self.resume = kwargs.pop('resume', False)
self.base_dir = kwargs.pop('base_dir', 'model_dir/')
if type(self.base_dir) is not str:
raise TypeError("'base_dir' must be a sting.")
elif self.base_dir.endswith('/') is False:
raise KeyError("'base_dir' must end with '/'.")
if self.resume is not True:
with open(self.base_dir + 'kwargs.txt', 'w') as f:
for key, values in kwargs.items():
f.write(str(key) + ': ' + str(values) + '\n')
f.close()
self.batch_size = kwargs.pop('batch_size', 100)
self.activation = kwargs.pop('activation', 'tanh')
if type(self.activation) is not str:
raise TypeError("'activation' must be a string.")
self.epochs = kwargs.pop('epochs', 10)
self.lr = kwargs.pop('lr', 1e-3)
self.drop_val = kwargs.pop('dropout', 0)
self.input_shape = kwargs.pop('input_shape', 8)
self.output_shape = kwargs.pop('output_shape', 1)
self.layer_sizes = kwargs.pop(
'layer_sizes', [self.input_shape, self.input_shape])
if type(self.layer_sizes) is not list:
raise TypeError("'layer_sizes' must be a list.")
self.early_stop = kwargs.pop('early_stop', False)
self.xHI = kwargs.pop('xHI', False)
self.random_seed = kwargs.pop('random_seed', None)
boolean_kwargs = [self.resume, self.early_stop, self.xHI]
boolean_strings = ['resume', 'early_stop', 'xHI']
for i in range(len(boolean_kwargs)):
if type(boolean_kwargs[i]) is not bool:
raise TypeError("'" + boolean_strings[i] + "' must be a bool.")
int_kwargs = [self.batch_size, self.epochs, self.input_shape,
self.output_shape]
int_strings = ['batch_size', 'epochs', 'input_shape',
'output_shape']
for i in range(len(int_kwargs)):
if type(int_kwargs[i]) is not int:
raise TypeError("'" + int_strings[i] + "' must be a int.")
float_kwargs = [self.lr, self.drop_val,
self.random_seed]
float_strings = ['lr', 'dropout', 'random_seed']
for i in range(len(float_kwargs)):
if float_kwargs[i] is not None:
if type(float_kwargs[i]) not in set([float, int]):
raise TypeError("'" + float_strings[i] +
"' must be a float.")
loss_function = kwargs.pop('loss_function', None)
if loss_function is not None:
if not callable(loss_function):
raise TypeError('loss_function should be a callable.')
if self.random_seed is not None:
tf.random.set_seed(self.random_seed)
if not os.path.exists(self.base_dir):
os.mkdir(self.base_dir)
pwd = os.getcwd()
train_dataset_fp = pwd + '/' + self.base_dir + 'train_dataset.csv'
column_names = [
'p' + str(i)
for i in range(self.input_shape + self.output_shape)]
label_names = column_names[-1]
train_dataset = tf.data.experimental.make_csv_dataset(
train_dataset_fp,
self.batch_size,
column_names=column_names,
label_name=label_names,
num_epochs=1)
test_data = np.loadtxt(self.base_dir + 'test_data.txt')
test_labels = np.loadtxt(self.base_dir + 'test_label.txt')
def pack_features_vector(features, labels):
return tf.stack(list(features.values()), axis=1), labels
train_dataset = train_dataset.map(pack_features_vector)
self.output_activation = kwargs.pop('output_activation', 'linear')
if self.xHI is True:
self.output_activation = 'relu'
if self.resume is True:
model = keras.models.load_model(
self.base_dir + 'model.h5',
compile=False)
else:
model = network_models().basic_model(
self.input_shape, self.output_shape,
self.layer_sizes, self.activation, self.drop_val,
self.output_activation)
def loss(model, x, y, training):
y_ = tf.transpose(model(x, training=training))[0]
lf = loss_functions(y, y_)
if loss_function is None:
return lf.mse(), lf.rmse()
else:
return loss_function(y, y_, x), lf.rmse()
def grad(model, inputs, targets):
with tf.GradientTape() as tape:
loss_value, rmse = loss(model, inputs, targets, training=True)
return loss_value, rmse, tape.gradient(
loss_value, model.trainable_variables)
optimizer = keras.optimizers.Adam(learning_rate=self.lr)
if self.resume is True:
train_loss_results = list(
np.loadtxt(self.base_dir + 'loss_history.txt'))
test_loss_results = list(
np.loadtxt(self.base_dir + 'test_loss_history.txt'))
else:
train_loss_results = []
test_loss_results = []
train_rmse_results = []
num_epochs = self.epochs
c = 0
minimum_model = None
for epoch in range(num_epochs):
s = time.time()
epoch_loss_avg = tf.keras.metrics.Mean()
epoch_rmse_avg = tf.keras.metrics.Mean()
for x, y in train_dataset:
loss_values, rmse, grads = grad(model, x, y)
optimizer.apply_gradients(
zip(grads, model.trainable_variables))
epoch_loss_avg.update_state(loss_values)
epoch_rmse_avg.update_state(rmse)
train_loss_results.append(epoch_loss_avg.result())
train_rmse_results.append(epoch_rmse_avg.result())
e = time.time()
test_loss, _ = loss(model, test_data, test_labels, training=False)
test_loss_results.append(test_loss)
print(
'Epoch: {:03d}, Loss: {:.5f}, Test Loss: {:.5f},'
.format(epoch, epoch_loss_avg.result(), test_loss_results[-1])
+ 'RMSE: {:.5f}, Time: {:.3f}'
.format(epoch_rmse_avg.result(), e-s), flush=True)
if self.early_stop:
c += 1
if epoch == 0:
minimum_loss = test_loss_results[-1]
minimum_epoch = epoch
minimum_model = None
else:
if test_loss_results[-1] < minimum_loss:
minimum_loss = test_loss_results[-1]
minimum_epoch = epoch
minimum_model = model
c = 0
if minimum_model:
if c == round((self.epochs/100)*2):
print('Early stopped. Minimum at = ' +
str(minimum_epoch) +
' Epochs used = ' + str(epoch))
break
if (epoch + 1) % 10 == 0:
model.save(self.base_dir + 'model.h5')
np.savetxt(
self.base_dir + 'loss_history.txt', train_loss_results)
np.savetxt(
self.base_dir + 'test_loss_history.txt', test_loss_results)
if minimum_model:
minimum_model.save(self.base_dir + 'model.h5')
else:
model.save(self.base_dir + 'model.h5')
np.savetxt(self.base_dir + 'loss_history.txt', train_loss_results)
np.savetxt(self.base_dir + 'test_loss_history.txt', test_loss_results)