Source code for nengo_dl.benchmarks

"""
Benchmark networks and utilities for evaluating NengoDL's performance.
"""

import inspect
import itertools
import os
import random
import time

import click
import matplotlib.pyplot as plt
import nengo
import numpy as np
import tensorflow as tf

import nengo_dl


[docs]def cconv(dimensions, neurons_per_d, neuron_type):
    """
    Circular convolution (EnsembleArray) benchmark.

    Parameters
    ----------
    dimensions : int
        Number of dimensions for vector values
    neurons_per_d : int
        Number of neurons to use per vector dimension
    neuron_type : `~nengo.neurons.NeuronType`
        Simulation neuron type

    Returns
    -------
    net : `nengo.Network`
        benchmark network
    """

    with nengo.Network(label="cconv", seed=0) as net:
        net.config[nengo.Ensemble].neuron_type = neuron_type
        net.config[nengo.Ensemble].gain = nengo.dists.Choice([1, -1])
        net.config[nengo.Ensemble].bias = nengo.dists.Uniform(-1, 1)

        net.cconv = nengo.networks.CircularConvolution(
            neurons_per_d, dimensions)

        net.inp_a = nengo.Node([0] * dimensions)
        net.inp_b = nengo.Node([1] * dimensions)
        nengo.Connection(net.inp_a, net.cconv.A)
        nengo.Connection(net.inp_b, net.cconv.B)

        net.p = nengo.Probe(net.cconv.output)

    return net


[docs]def integrator(dimensions, neurons_per_d, neuron_type):
    """
    Single integrator ensemble benchmark.

    Parameters
    ----------
    dimensions : int
        Number of dimensions for vector values
    neurons_per_d : int
        Number of neurons to use per vector dimension
    neuron_type : `~nengo.neurons.NeuronType`
        Simulation neuron type

    Returns
    -------
    net : `nengo.Network`
        benchmark network
    """

    with nengo.Network(label="integrator", seed=0) as net:
        net.config[nengo.Ensemble].neuron_type = neuron_type
        net.config[nengo.Ensemble].gain = nengo.dists.Choice([1, -1])
        net.config[nengo.Ensemble].bias = nengo.dists.Uniform(-1, 1)

        net.integ = nengo.networks.EnsembleArray(neurons_per_d, dimensions)
        nengo.Connection(net.integ.output, net.integ.input, synapse=0.01)

        net.inp = nengo.Node([0] * dimensions)
        nengo.Connection(net.inp, net.integ.input, transform=0.01)

        net.p = nengo.Probe(net.integ.output)

    return net


[docs]def pes(dimensions, neurons_per_d, neuron_type):
    """
    PES learning rule benchmark.

    Parameters
    ----------
    dimensions : int
        Number of dimensions for vector values
    neurons_per_d : int
        Number of neurons to use per vector dimension
    neuron_type : `~nengo.neurons.NeuronType`
        Simulation neuron type

    Returns
    -------
    net : `nengo.Network`
        benchmark network
    """

    with nengo.Network(label="pes", seed=0) as net:
        net.config[nengo.Ensemble].neuron_type = neuron_type
        net.config[nengo.Ensemble].gain = nengo.dists.Choice([1, -1])
        net.config[nengo.Ensemble].bias = nengo.dists.Uniform(-1, 1)

        net.inp = nengo.Node([1] * dimensions)
        net.pre = nengo.Ensemble(neurons_per_d * dimensions, dimensions)
        net.post = nengo.Node(size_in=dimensions)

        nengo.Connection(net.inp, net.pre)

        conn = nengo.Connection(
            net.pre, net.post, learning_rule_type=nengo.PES())

        nengo.Connection(net.post, conn.learning_rule, transform=-1)
        nengo.Connection(net.inp, conn.learning_rule)

        net.p = nengo.Probe(net.post)

    return net


[docs]def basal_ganglia(dimensions, neurons_per_d, neuron_type):
    """
    Basal ganglia network benchmark.

    Parameters
    ----------
    dimensions : int
        Number of dimensions for vector values
    neurons_per_d : int
        Number of neurons to use per vector dimension
    neuron_type : `~nengo.neurons.NeuronType`
        Simulation neuron type

    Returns
    -------
    net : `nengo.Network`
        benchmark network
    """

    with nengo.Network(label="basal_ganglia", seed=0) as net:
        net.config[nengo.Ensemble].neuron_type = neuron_type

        net.inp = nengo.Node([1] * dimensions)
        net.bg = nengo.networks.BasalGanglia(dimensions, neurons_per_d)
        nengo.Connection(net.inp, net.bg.input)
        net.p = nengo.Probe(net.bg.output)

    return net


[docs]def mnist(use_tensor_layer=True):
    """
    A network designed to stress-test tensor layers (based on mnist net).

    Parameters
    ----------
    use_tensor_layer : bool
        If True, use individual tensor_layers to build the network, as opposed
        to a single TensorNode containing all layers.

    Returns
    -------
    net : `nengo.Network`
        benchmark network
    """

    with nengo.Network() as net:
        # create node to feed in images
        net.inp = nengo.Node(np.ones(28 * 28))

        if use_tensor_layer:
            nengo_nl = nengo.RectifiedLinear()

            ensemble_params = dict(max_rates=nengo.dists.Choice([100]),
                                   intercepts=nengo.dists.Choice([0]))
            amplitude = 1
            synapse = None

            x = nengo_dl.tensor_layer(
                net.inp, tf.layers.conv2d, shape_in=(28, 28, 1), filters=32,
                kernel_size=3
            )
            x = nengo_dl.tensor_layer(x, nengo_nl,
                                      **ensemble_params)

            x = nengo_dl.tensor_layer(
                x, tf.layers.conv2d, shape_in=(26, 26, 32),
                transform=amplitude, filters=32, kernel_size=3
            )
            x = nengo_dl.tensor_layer(x, nengo_nl,
                                      **ensemble_params)

            x = nengo_dl.tensor_layer(
                x, tf.layers.average_pooling2d, shape_in=(24, 24, 32),
                synapse=synapse, transform=amplitude, pool_size=2, strides=2)

            x = nengo_dl.tensor_layer(
                x, tf.layers.dense, units=128
            )
            x = nengo_dl.tensor_layer(x, nengo_nl,
                                      **ensemble_params)

            x = nengo_dl.tensor_layer(x, tf.layers.dropout, rate=0.4,
                                      transform=amplitude)

            x = nengo_dl.tensor_layer(x, tf.layers.dense, units=10)
        else:
            nl = tf.nn.relu

            # def softlif_layer(x, sigma=1, tau_ref=0.002, tau_rc=0.02,
            #                   amplitude=1):
            #     # x -= 1
            #     z = tf.nn.softplus(x / sigma) * sigma
            #     z += 1e-10
            #     rates = amplitude / (tau_ref + tau_rc * tf.log1p(1 / z))
            #     return rates

            @nengo_dl.reshaped((28, 28, 1))
            def mnist_node(_, x):  # pragma: no cover
                x = tf.layers.conv2d(x, filters=32, kernel_size=3,
                                     activation=nl)
                x = tf.layers.conv2d(x, filters=32, kernel_size=3,
                                     activation=nl)
                x = tf.layers.average_pooling2d(x, pool_size=2, strides=2)
                x = tf.contrib.layers.flatten(x)
                x = tf.layers.dense(x, 128, activation=nl)
                x = tf.layers.dropout(x, rate=0.4)
                x = tf.layers.dense(x, 10)

                return x

            node = nengo_dl.TensorNode(mnist_node, size_in=28 * 28,
                                       size_out=10)
            x = node
            nengo.Connection(net.inp, node, synapse=None)

        net.p = nengo.Probe(x)

    return net


[docs]def spaun(dimensions):
    """
    Builds the Spaun network from [1]_

    Parameters
    ----------
    dimensions : int
        Number of dimensions for vector values

    Returns
    -------
    net : `nengo.Network`
        benchmark network

    References
    ----------
    .. [1] Chris Eliasmith, Terrence C. Stewart, Xuan Choo, Trevor Bekolay,
       Travis DeWolf, Yichuan Tang, and Daniel Rasmussen (2012). A large-scale
       model of the functioning brain. Science, 338:1202-1205.

    Notes
    -----
    This network needs to be installed via

    .. code-block:: bash

        pip install git+https://github.com/drasmuss/spaun2.0.git
    """

    from _spaun.configurator import cfg
    from _spaun.vocabulator import vocab
    from _spaun.experimenter import experiment
    from _spaun.modules.stim import stim_data
    from _spaun.modules.vision import vis_data
    from _spaun.modules.motor import mtr_data
    from _spaun.spaun_main import Spaun

    vocab.sp_dim = dimensions
    cfg.mtr_arm_type = None

    cfg.set_seed(1)
    experiment.initialize(
        "A", stim_data.get_image_ind, stim_data.get_image_label,
        cfg.mtr_est_digit_response_time, "", cfg.rng)
    vocab.initialize(
        stim_data.stim_SP_labels, experiment.num_learn_actions, cfg.rng)
    vocab.initialize_mtr_vocab(mtr_data.dimensions, mtr_data.sps)
    vocab.initialize_vis_vocab(vis_data.dimensions, vis_data.sps)

    return Spaun()


[docs]def random_network(dimensions, neurons_per_d, neuron_type, n_ensembles,
                   connections_per_ensemble, seed=0):
    """
    Basal ganglia network benchmark.

    Parameters
    ----------
    dimensions : int
        Number of dimensions for vector values
    neurons_per_d : int
        Number of neurons to use per vector dimension
    neuron_type : `~nengo.neurons.NeuronType`
        Simulation neuron type
    n_ensembles : int
        Number of ensembles in the network
    connections_per_ensemble : int
        Outgoing connections from each ensemble

    Returns
    -------
    net : `nengo.Network`
        benchmark network
    """

    random.seed(seed)
    with nengo.Network(label="random", seed=seed) as net:
        net.inp = nengo.Node([0] * dimensions)
        net.out = nengo.Node(size_in=dimensions)
        net.p = nengo.Probe(net.out)
        ensembles = [
            nengo.Ensemble(neurons_per_d * dimensions, dimensions,
                           neuron_type=neuron_type)
            for _ in range(n_ensembles)]
        dec = np.ones((neurons_per_d * dimensions, dimensions))
        for ens in net.ensembles:
            # add a connection to input and output node, so we never have
            # any "orphan" ensembles
            nengo.Connection(net.inp, ens)
            nengo.Connection(ens, net.out, solver=nengo.solvers.NoSolver(dec))

            posts = random.sample(ensembles, connections_per_ensemble)
            for post in posts:
                nengo.Connection(ens, post, solver=nengo.solvers.NoSolver(dec))

    return net


[docs]def run_profile(net, train=False, n_steps=150, do_profile=True, **kwargs):
    """
    Run profiler on a benchmark network.

    Parameters
    ----------
    net : `~nengo.Network`
        The nengo Network to be profiled.
    train : bool
        If True, profile the ``sim.train`` function. Otherwise, profile the
        ``sim.run`` function.
    n_steps : int
        The number of timesteps to run the simulation.
    do_profile : bool
        Whether or not to run profiling

    Notes
    -----
    kwargs will be passed on to `.Simulator`
    """

    with net:
        nengo_dl.configure_settings(inference_only=not train)

    with nengo_dl.Simulator(net, **kwargs) as sim:
        # note: we run a few times to try to eliminate startup overhead (only
        # the data from the last run will be kept)
        if train:
            opt = tf.train.GradientDescentOptimizer(0.001)
            x = np.random.randn(sim.minibatch_size, n_steps, net.inp.size_out)
            y = np.random.randn(sim.minibatch_size, n_steps, net.p.size_in)

            for _ in range(2):
                sim.train({net.inp: x, net.p: y}, optimizer=opt, n_epochs=1,
                          profile=do_profile)

            start = time.time()
            sim.train({net.inp: x, net.p: y}, optimizer=opt, n_epochs=1,
                      profile=do_profile)
            exec_time = time.time() - start
            print("Execution time:", exec_time)

        else:
            for _ in range(2):
                sim.run_steps(n_steps, profile=do_profile)

            start = time.time()
            sim.run_steps(n_steps, profile=do_profile)
            exec_time = time.time() - start
            print("Execution time:", exec_time)

    return exec_time


@click.group(chain=True)
def main():
    """Command-line interface for benchmarks."""


@main.command()
@click.pass_obj
@click.option("--benchmark", default="cconv", help="Name of benchmark network")
@click.option("--dimensions", default=128, help="Number of dimensions")
@click.option("--neurons_per_d", default=64, help="Neurons per dimension")
@click.option("--neuron_type", default="RectifiedLinear",
              help="Nengo neuron model")
@click.option("--kwarg", type=str, multiple=True,
              help="Arbitrary kwarg to pass to benchmark network (key=value)")
def build(obj, benchmark, dimensions, neurons_per_d, neuron_type,
          kwarg):
    """Builds one of the benchmark networks"""

    # get benchmark network by name
    benchmark = globals()[benchmark]

    # get the neuron type object from string class name
    try:
        neuron_type = getattr(nengo, neuron_type)()
    except AttributeError:
        neuron_type = getattr(nengo_dl, neuron_type)()

    # set up kwargs
    kwargs = dict((k, int(v)) for k, v in [a.split("=") for a in kwarg])

    # add the special cli kwargs if applicable; note we could just do
    # everything through --kwarg, but it is convenient to have a
    # direct option for the common arguments
    params = inspect.signature(benchmark).parameters
    for kw in ("benchmark", "dimensions", "neurons_per_d", "neuron_type"):
        if kw in params:
            kwargs[kw] = locals()[kw]

    # build benchmark and add to context for chaining
    print("Building %s with %s" % (
        nengo_dl.utils.function_name(benchmark, sanitize=False), kwargs))

    obj["net"] = benchmark(**kwargs)


@main.command()
@click.pass_obj
@click.option("--train/--no-train", default=False,
              help="Whether to profile training (as opposed to running) "
                   "the network")
@click.option("--n_steps", default=150,
              help="Number of steps for which to run the simulation")
@click.option("--batch_size", default=1, help="Number of inputs to the model")
@click.option("--device", default="/gpu:0",
              help="TensorFlow device on which to run the simulation")
@click.option("--unroll", default=25,
              help="Number of steps for which to unroll the simulation")
@click.option("--time-only", is_flag=True, default=False,
              help="Only count total time, rather than profiling internals")
def profile(obj, train, n_steps, batch_size, device, unroll, time_only):
    """Runs profiling on a network (call after 'build')"""

    if "net" not in obj:
        raise ValueError("Must call `build` before `profile`")

    obj["time"] = run_profile(
        obj["net"], do_profile=not time_only, train=train, n_steps=n_steps,
        minibatch_size=batch_size, device=device, unroll_simulation=unroll)


@main.command()
def matmul_vs_reduce():  # pragma: no cover
    """
    Compares two different approaches to batched matrix multiplication
    (tf.matmul vs tf.multiply+tf.reduce_sum).

    This is relevant for figuring out which approach is more efficient
    on a given system for different matrix shapes (determining which method
    we use in DotIncBuilder).
    """

    # a_shape = (n_ops, s0, s1, 1)
    # x_shape = (n_ops, 1, s1, mini)
    # for matmul we omit the 1 dimensions

    a_c = tf.placeholder(tf.float64, shape=(None, None, None), name="a_c")
    x_c = tf.placeholder(tf.float64, shape=(None, None, None), name="b_c")
    a_d = tf.placeholder(tf.float64, shape=(None, None, None, 1), name="a_d")
    x_d = tf.placeholder(tf.float64, shape=(None, 1, None, None), name="b_d")
    c = tf.matmul(a_c, x_c)
    d = tf.reduce_sum(tf.multiply(a_d, x_d), axis=-2)

    reps = 100
    n_ops_range = [1, 4, 8, 16, 32, 64]
    mini_range = [1, 16, 32, 64, 128]
    s0_range = [1, 64, 128, 192, 256]
    s1_range = [1, 64, 128, 192, 256]

    matmul_times = np.zeros((len(n_ops_range), len(mini_range), len(s0_range),
                             len(s1_range)))
    reduce_times = np.zeros_like(matmul_times)

    params = itertools.product(
        enumerate(n_ops_range), enumerate(mini_range), enumerate(s0_range),
        enumerate(s1_range))

    with tf.Session() as sess:
        for (i, n_ops), (j, mini), (k, s0), (l, s1) in params:
            print(n_ops, mini, s0, s1)

            a_val = np.random.randn(n_ops, s0, s1, 1)
            x_val = np.random.randn(n_ops, 1, s1, mini)

            for r in range(reps + 3):
                if r == 3:
                    start = time.time()
                c_val = sess.run(c, feed_dict={a_c: a_val[..., 0],
                                               x_c: x_val[:, 0]})
            matmul_times[i, j, k, l] = (time.time() - start) / reps

            for r in range(reps + 3):
                if r == 3:
                    start = time.time()
                d_val = sess.run(d, feed_dict={a_d: a_val, x_d: x_val})
            reduce_times[i, j, k, l] = (time.time() - start) / reps

            assert np.allclose(c_val, d_val)

    fig, ax = plt.subplots(len(n_ops_range), len(mini_range), sharex=True,
                           sharey=True)

    X, Y = np.meshgrid(s0_range, s1_range)
    Z = matmul_times - reduce_times
    v = np.sort(np.concatenate((np.linspace(np.min(Z), np.max(Z), 10), [0])))
    for i, n_ops in enumerate(n_ops_range):
        for j, mini in enumerate(mini_range):
            cs = ax[i][j].contourf(X, Y, Z[i, j], v)
            if i == 0:
                ax[i][j].set_title("mini %d" % mini)
            if j == 0:
                ax[i][j].set_ylabel("ops %d" % n_ops)

    DATA_DIR = os.path.join(os.path.dirname(nengo_dl.__file__), "..", "data")
    np.savez(os.path.join(DATA_DIR, "matmul_benchmarks"), n_ops_range,
             mini_range, s0_range, s1_range, Z)

    fig.subplots_adjust(right=0.8)
    cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7])
    fig.colorbar(cs, cax=cbar_ax)

    plt.show()


@main.command()
@click.option("--device", default="/gpu:0",
              help="TensorFlow device on which to run benchmarks")
def performance_samples(device):  # pragma: no cover
    """
    Run a brief sample of the benchmarks to check overall performance.

    This is mainly used to quickly check that there haven't been any unexpected
    performance regressions.
    """

    # TODO: automatically run some basic performance tests during CI

    default_kwargs = {"n_steps": 1000, "device": device,
                      "unroll_simulation": 25,
                      "progress_bar": False, "do_profile": False}

    print("cconv + relu")
    net = cconv(128, 64, nengo.RectifiedLinear())
    run_profile(net, minibatch_size=64, **default_kwargs)

    print("cconv + lif")
    net = cconv(128, 64, nengo.LIF())
    run_profile(net, minibatch_size=64, **default_kwargs)

    print("integrator training + relu")
    net = integrator(128, 32, nengo.RectifiedLinear())
    run_profile(net, minibatch_size=64, train=True, **default_kwargs)

    print("integrator training + lif")
    net = integrator(128, 32, nengo.LIF())
    run_profile(net, minibatch_size=64, train=True, **default_kwargs)

    print("random")
    net = random_network(128, 64, nengo.RectifiedLinear(), n_ensembles=50,
                         connections_per_ensemble=5, seed=0)
    run_profile(net, **default_kwargs)

    print("spaun")
    net = spaun(1)
    run_profile(net, **default_kwargs)

    # example benchmark data
    # CPU: 4.00GHz Intel Core i7-6700K
    # GPU: NVIDIA GeForce GTX 980 Ti
    # TensorFlow version: 1.10.0
    # Nengo version: 2.8.0
    # NengoDL version: 1.2.0

    # cconv + relu
    # Execution time: 1.0098507404327393

    # cconv + lif
    # Execution time: 2.074916362762451

    # integrator training + relu
    # Execution time: 1.8205187320709229

    # integrator training + lif
    # Execution time: 2.669060707092285

    # random
    # Execution time: 21.686023235321045

    # spaun
    # Execution time: 9.540623426437378


if __name__ == "__main__":
    main(obj={})  # pragma: no cover