Source code for penne.expr

"""Building and using neural networks."""

from . import debug
from . import backend
from . import backend as numpy
from six.moves import range
import operator
import traceback # for expression tracebacks
import subprocess # for calling graphviz

__all__ = ["Expression", "Unary", "Binary", "Reduction",
           "constant", "one_hot", "zeros", "ones", "full",
           "parameter", "load_model", "save_model",
           "add", "subtract", "multiply", "divide", "negative", "power",
           "maximum", "minimum", "clip", 
           #"less", "less_equal", "greater", "greater_equal", "where",
           #"logical_and", "logical_or", "logical_not",
           "exp", "log", "tanh",
           "asum", "amax", "amin", "mean",
           "dot", "einsum", "vecdot",
           "concatenate", "stack", "transpose", "reshape", "expand_dims", "setitem",
           "topological", "graphviz"]

class Expression(object):
    """Base class for expression classes."""
    serial = 0
    def __init__(self, *args):
        self.args = args
        if debug.stack or debug.profile:
            self.stack = debug.extract_stack()
        for i, arg in enumerate(args):
            if not isinstance(arg, Expression):
                raise TypeError("expected Expression as argument %d, got %s" % (i, arg))
        self.serial = Expression.serial
        Expression.serial += 1

    def forward(self, values):
        """Compute value of self given values of args."""
        raise NotImplementedError()

    def backward(self, values, gradients):
        """Update gradient with respect to args given gradient with respect to self.

        pre/post: gradients[self] has the same shape as values[self].
        pre: gradients[self.args[i]] is either 0.0 or has the same shape as values[self.args[i]].
        post: gradients[self.args[i]] has the same shape as values[self.args[i]].
        """
        pass

    def __getitem__(self, item):    return getitem(self, item)
    def __setitem__(self, item, x):
        raise NotImplementedError("use a = setitem(a, item, b) instead")

    def __add__(self, other):    return add(self, other)
    def __sub__(self, other):    return subtract(self, other)
    def __mul__(self, other):    return multiply(self, other)
    def __div__(self, other):    return divide(self, other)
    def __pow__(self, other):    return power(self, other)
    def __neg__(self):           return negative(self)

    def __lt__(self, other):     return less(self, other)
    def __le__(self, other):     return less_equal(self, other)
    def __gt__(self, other):     return greater(self, other)
    def __ge__(self, other):     return greater_equal(self, other)

    def dot(self, other):        return dot(self, other)

    def __str__(self):
        args = ["<%s>" % arg.serial for arg in self.args]
        return "%s(%s)" % (self.__class__.__name__, ', '.join(args))

    def _repr_png_(self):
        """IPython magic: show PNG representation of the transducer.
        Adapted from pyfst."""
        process = subprocess.Popen(['dot', '-Tpng'], 
                                   stdin=subprocess.PIPE,
                                   stdout=subprocess.PIPE, 
                                   stderr=subprocess.PIPE)
        out, err = process.communicate(graphviz(self))
        if err:
            raise Exception(err)
        return out

[docs]class constant(Expression):
    """A constant value.

    The value can be accessed in the ``value`` field, and can be
    changed between calls to ``compute_values``.

    :param value: The value of the new expression.
    :type value: NumPy array
    """
    def __init__(self, value):
        Expression.__init__(self)
        self.value = numpy.asarray(value)

    def forward(self, values):
        values[self] = self.value

    def __str__(self):
        return "constant(%s)" % (self.value,)
        
def one_hot(size, i):
    """A one-hot vector.

    :param size: The size of the vector.
    :type size: int
    :param i: The entry to set to one; all others are zero.
    :type i: int
    """
    result = numpy.zeros((dims,))
    result[i] = 1.
    return constant(result)

[docs]def zeros(shape):
    return constant(numpy.zeros(shape))
[docs]def ones(shape):
    return constant(numpy.ones(shape))
[docs]def full(shape, value):
    return constant(numpy.full(shape, value))

## Parameters and models

[docs]class parameter(Expression):
    """A parameter that is to be trained.

    :param value: The initial value of the new parameter.
    :type value: Numpy array
    """

    # Global list of all parameter objects.
    all = []

    def __init__(self, value, model=all):
        Expression.__init__(self)
        self.value = numpy.asarray(value)
        model.append(self)

    def forward(self, values):
        values[self] = self.value

    def __str__(self):
        return "parameter(%s)" % (self.value,)

[docs]def save_model(outfile, model=parameter.all):
    """Saves parameters to a file.

    :param outfile: File or filename to write to.
    :type value: file or str
    """
    if isinstance(outfile, str):
        with open(outfile, "w") as outfilehandle:
            save_model(outfilehandle, model)
    else:
        for param in model:
            numpy.save(outfile, param.value)

[docs]def load_model(infile, model=parameter.all):
    """Loads parameters from a file.

    Assumes that there are exactly the same number of parameters as
    when the model was saved, created in the same order and with the
    same shapes.

    :param infile: File or filename to read from.
    :type value: file or str
    """

    if isinstance(infile, str):
        with open(infile) as infilehandle:
            load_model(infilehandle, model)
    else:
        for i, param in enumerate(model):
            v = numpy.load(infile)
            if param.value.shape != v.shape:
                raise ValueError("expected shape {}, read shape {}".format(param.value.shape, v.shape))
            param.value[...] = v

class Unary(Expression):
    """Base class for unary componentwise operations."""
    def __init__(self, x):
        Expression.__init__(self, x)

    def forward(self, values):
        x = self.args[0]
        values[self] = self.f(values[x])

    def backward(self, values, gradients):
        x = self.args[0]
        dfdx = self.dfdx(values[x], values[self])
        if x in gradients:
            gradients[x] += contract_like(dfdx * gradients[self], values[x])

class Binary(Expression):
    """Base class for binary componentwise operations."""
    def __init__(self, x, y):
        Expression.__init__(self, x, y)

    def forward(self, values):
        x, y = self.args
        values[self] = self.f(values[x], values[y])

    def backward(self, values, gradients):
        x, y = self.args
        dfdx = self.dfdx(values[x], values[y], values[self])
        dfdy = self.dfdy(values[x], values[y], values[self])
        if x in gradients:
            gradients[x] += contract_like(dfdx * gradients[self], values[x])
        if y in gradients:
            gradients[y] += contract_like(dfdy * gradients[self], values[y])

## Arithmetic operations

[docs]class add(Binary):
    f = staticmethod(lambda x,y: numpy.add(x,y))

    # "Inline" df to avoid multiplications by 1 in this very common case
    def backward(self, values, gradients):
        x, y = self.args
        if x in gradients:
            gradients[x] += contract_like(gradients[self], values[x])
        if y in gradients:
            gradients[y] += contract_like(gradients[self], values[y])

[docs]class subtract(Binary):
    f = staticmethod(lambda x,y: numpy.subtract(x,y))
    dfdx = staticmethod(lambda x,y,z: 1.)
    dfdy = staticmethod(lambda x,y,z: -1.)

class negative(Unary):
    f = staticmethod(lambda x: numpy.negative(x))
    dfdx = staticmethod(lambda x,z: -1.)

[docs]class multiply(Binary):
    f = staticmethod(lambda x,y: numpy.multiply(x,y))
    dfdx = staticmethod(lambda x,y,z: y)
    dfdy = staticmethod(lambda x,y,z: x)

[docs]class divide(Binary):
    f = staticmethod(lambda x,y: numpy.divide(x,y))
    dfdx = staticmethod(lambda x,y,z: 1./y)
    dfdy = staticmethod(lambda x,y,z: -x/y**2)

[docs]class power(Binary):
    f = staticmethod(lambda x,y: numpy.power(x,y))
    dfdx = staticmethod(lambda x,y,z: z*y/x)
    dfdy = staticmethod(lambda x,y,z: z*numpy.log(x))

[docs]class log(Unary):
    f = staticmethod(lambda x: numpy.log(x))
    dfdx = staticmethod(lambda x,z: 1./x)

[docs]class exp(Unary):
    f = staticmethod(lambda x: numpy.exp(x))
    dfdx = staticmethod(lambda x,z: z)

[docs]class tanh(Unary):
    f = staticmethod(lambda x: numpy.tanh(x))
    @staticmethod
    def dfdx(x, z):
        try:
            return numpy.onemxx(z)
        except AttributeError:
            return 1.-z**2
    gain = 1.

[docs]class maximum(Binary):
    f = staticmethod(lambda x,y: numpy.maximum(x,y))
    dfdx = staticmethod(lambda x,y,z: (x > y).astype(float))
    dfdy = staticmethod(lambda x,y,z: (x <= y).astype(float))

[docs]class minimum(Binary):
    f = staticmethod(lambda x,y: numpy.minimum(x,y))
    dfdx = staticmethod(lambda x,y,z: (x < y).astype(float))
    dfdy = staticmethod(lambda x,y,z: (x >= y).astype(float))

[docs]def clip(x, lo, hi):
    return minimum(maximum(x, lo), hi)

## Conditionals

class where(Expression):
    def forward(self, values):
        c, x, y = self.args
        values[self] = numpy.where(values[c], values[x], values[y])

    def backward(self, values, gradients):
        c, x, y = self.args
        if x in gradients:
            gradients[x] += contract_like(c.astype(bool) * gradients[self], values[x])
        if y in gradients:
            gradients[y] += contract_like(numpy.logical_not(c) * gradients[self], values[x])

"""
class greater(Binary):
    f = staticmethod(lambda x,y: numpy.greater(x,y))
class greater_equal(Binary):
    f = staticmethod(lambda x,y: numpy.greater_equal(x,y))
class less(Binary):
    f = staticmethod(lambda x,y: numpy.less(x,y))
class less_equal(Binary):
    f = staticmethod(lambda x,y: numpy.less_equal(x,y))
class logical_and(Binary):
    f = staticmethod(lambda x,y: numpy.logical_and(x,y))
class logical_or(Binary):
    f = staticmethod(lambda x,y: numpy.logical_or(x,y))
class logical_not(Binary):
    f = staticmethod(lambda x: numpy.logical_not(x))"""

### Reductions

def _keepdims(shp, axis, keepdims):
    if keepdims:
        return shp
    if axis is None:
        assert shp == ()
        return shp
    if type(axis) is int:
        if axis < 0: axis += len(shp)+1
        return shp[:axis] + (1,) + shp[axis:]
    if type(axis) is tuple:
        shp = list(shp)
        nd = len(shp)+len(axis)
        axis = [i if i >= 0 else i+nd for i in axis]
        for i in axis:
            shp[i:i] = [1]
        return tuple(shp)
    raise ValueError("invalid axis")

class Reduction(Expression):
    """Base class for reduction operations."""

    def __init__(self, x, axis=None, keepdims=False):
        Expression.__init__(self, x)
        self.axis = axis
        self.keepdims = keepdims

    def forward(self, values):
        values[self] = self.f(values[self.args[0]], axis=self.axis, keepdims=self.keepdims)

    def backward(self, values, gradients):
        arg = self.args[0]
        if arg not in gradients:
            return

        shp = _keepdims(values[self].shape, self.axis, self.keepdims)

        gself = numpy.asarray(gradients[self]).reshape(shp)
        vself = numpy.asarray(values[self]).reshape(shp)

        d = self.df(values[arg], vself, axis=self.axis)
        if numpy.ndim(gradients[arg]) == 0:
            gradients[arg] = numpy.zeros_like(values[arg])
        gradients[arg] += gself * d

class sum(Reduction):
    f = staticmethod(lambda x,axis,keepdims: numpy.sum(x,axis=axis,keepdims=keepdims))
    df = staticmethod(lambda x,y,axis: 1.)
asum = sum

[docs]class amax(Reduction):
    f = staticmethod(lambda x,axis,keepdims: numpy.amax(x,axis=axis,keepdims=keepdims))
    @staticmethod
    def df(x, y, axis):
        is_max = (x == y).astype(x.dtype)
        z = numpy.asum(is_max, axis=axis, keepdims=True)
        return is_max / z
max = amax

[docs]class amin(Reduction):
    f = staticmethod(lambda x,axis,keepdims: numpy.amin(x,axis=axis,keepdims=keepdims))
    @staticmethod
    def df(x, y, axis):
        is_min = (x == y).astype(x.dtype)
        return is_min / numpy.asum(is_min, axis=axis, keepdims=True)
min = amin

[docs]class mean(Reduction):
    f = staticmethod(lambda x,axis,keepdims: numpy.mean(x,axis=axis,keepdims=keepdims))
    df = staticmethod(lambda x,y,axis: float(y.size) / x.size)

[docs]class vecdot(Expression):
    """Equivalent to sum(x*y, axis).

    :type x: Expression
    :type y: Expression
    :param axis: axis or axes along which to perform the sum-product.
    :type axis: int or tuple of ints
    :param keepdims: leave axis or axes in result as dimensions with size one.
    :type keepdims: bool
    """
    def __init__(self, x, y, axis=None, keepdims=False):
        Expression.__init__(self, x, y)
        self.axis = axis
        self.keepdims = keepdims
    
    def forward(self, values):
        x, y = self.args
        values[self] = numpy.vecdot(values[x], values[y], self.axis, self.keepdims)
    def backward(self, values, gradients):
        x, y = self.args
        shp = _keepdims(values[self].shape, self.axis, self.keepdims)
        gself = numpy.asarray(gradients[self]).reshape(shp)
        gradients[x] += contract_like(gself * values[y], values[x])
        gradients[y] += contract_like(gself * values[x], values[y])

### Product-like operations

[docs]class dot(Expression):
    def __init__(self, x, y):
        Expression.__init__(self, x, y)

    def forward(self, values):
        xv = values[self.args[0]]
        yv = values[self.args[1]]
        values[self] = numpy.dot(xv, yv)

    def backward(self, values, gradients):
        x, y = self.args
        xd, yd = numpy.ndim(values[x]), numpy.ndim(values[y])

        # Common cases

        if xd <= 1 and yd <= 1:
            if x in gradients:
                gradients[x] += numpy.dot(gradients[self], values[y])
            if y in gradients:
                d = numpy.dot(values[x], gradients[self])
                gradients[y] += numpy.dot(values[x], gradients[self])

        elif xd == 2 and yd == 1:
            if x in gradients:
                if numpy.ndim(gradients[x]) > 0:
                    gradients[x] = numpy.add_outer(gradients[self], values[y], gradients[x])
                else:
                    gradients[x] += numpy.outer(gradients[self], values[y])
            if y in gradients:
                gradients[y] += numpy.dot(values[x].T, gradients[self])

        elif xd == 1 and yd == 2:
            if x in gradients:
                gradients[x] += numpy.dot(gradients[self], values[y].T)
            if y in gradients:
                if numpy.ndim(gradients[y]) > 0:
                    gradients[y] = numpy.add_outer(values[x], gradients[self], gradients[y])
                else:
                    gradients[y] += numpy.outer(values[x], gradients[self])

        elif xd == 2 and yd == 2:
            if x in gradients:
                if numpy.ndim(gradients[x]) > 0:
                    gradients[x] = numpy.add_dot(gradients[self], values[y].T, gradients[x])
                else:
                    gradients[x] += numpy.dot(gradients[self], values[y].T)
            if y in gradients:
                if numpy.ndim(gradients[y]) > 0:
                    gradients[y] = numpy.add_dot(values[x].T, gradients[self], gradients[y])
                else:
                    gradients[y] += numpy.dot(values[x].T, gradients[self])

        elif yd == 1:
            xv = values[x]
            xr = xv.reshape((-1, xv.shape[-1]))
            gzr = gradients[self].reshape((xr.shape[0],))
            if x in gradients:
                if numpy.ndim(gradients[x]) > 0:
                    gradients[x] = numpy.add_outer(gzr, values[y], gradients[x])
                else:
                    gradients[x] += numpy.outer(gzr, values[y]).reshape(values[x].shape)
            if y in gradients:
                gradients[y] += numpy.dot(xr.T, gzr).reshape(values[y].shape)

        else:
            xv, yv = values[x], numpy.swapaxes(values[y], -2, 0)
            xr = xv.reshape((-1, xv.shape[-1]))
            yr = yv.reshape((yv.shape[0], -1))
            gzr = gradients[self].reshape((xr.shape[0], yr.shape[1]))
            if x in gradients:
                gradients[x] += numpy.dot(gzr, yr.T).reshape(xv.shape)
            if y in gradients:
                gradients[y] += numpy.swapaxes(numpy.dot(xr.T, gzr).reshape(yv.shape), -2, 0)

class einsum(Expression):
    def __init__(self, subscripts, *args):
        Expression.__init__(self, *args)
        self.subscripts = subscripts
        lhs, rhs = subscripts.split("->")
        self.bsubscripts = []
        for i in lhs.split(","):
            self.bsubscripts.append("%s,%s->%s" % (lhs, rhs, i))

    def forward(self, values):
        values[self] = numpy.einsum(self.subscripts, *[values[arg] for arg in self.args])
    def backward(self, values, gradients):
        args = [values[arg] for arg in self.args] + [gradients[self]]
        for i in range(len(self.args)):
            if self.args[i] in gradients:
                args[i] = numpy.broadcast_to(1., values[self.args[i]].shape)
                gradients[self.args[i]] += numpy.einsum(self.bsubscripts[i], *args)
                args[i] = values[self.args[i]]

### Cutting and pasting

[docs]class concatenate(Expression):
    def __init__(self, args, axis=0):
        if not isinstance(axis, int):
            raise TypeError("axis must be an int")
        Expression.__init__(self, *args)
        self.axis = axis
    def forward(self, values):
        values[self] = numpy.concatenate([values[arg] for arg in self.args], axis=self.axis)
    def backward(self, values, gradients):
        i = 0
        s = [slice(None)]*numpy.ndim(gradients[self])
        for arg in self.args:
            d = values[arg].shape[self.axis]
            if arg in gradients:
                s[self.axis] = slice(i, i+d)
                gradients[arg] += gradients[self][tuple(s)]
            i += d

[docs]class stack(Expression):
    def __init__(self, args, axis=0):
        if not isinstance(axis, int):
            raise TypeError("axis must be an int")
        Expression.__init__(self, *args)
        self.axis = axis
    def forward(self, values):
        values[self] = numpy.stack([values[arg] for arg in self.args], axis=self.axis)
    def backward(self, values, gradients):
        s = [slice(None)]*numpy.ndim(gradients[self])
        for i, arg in enumerate(self.args):
            if arg in gradients:
                s[self.axis] = i
                gradients[arg] += gradients[self][tuple(s)]

[docs]class reshape(Expression):
    def __init__(self, arg, shape):
        Expression.__init__(self, arg)
        self.shape = shape
    def forward(self, values):
        values[self] = numpy.reshape(values[self.args[0]], self.shape)
    def backward(self, values, gradients):
        arg = self.args[0]
        if arg in gradients:
            gradients[arg] += numpy.reshape(gradients[self], values[arg].shape)

[docs]class expand_dims(Expression):
    def __init__(self, arg, axis):
        Expression.__init__(self, arg)
        self.axis = axis
    def forward(self, values):
        values[self] = numpy.expand_dims(values[self.args[0]], self.axis)
    def backward(self, values, gradients):
        arg = self.args[0]
        if arg in gradients:
            gradients[arg] += numpy.reshape(gradients[self], values[arg].shape)

[docs]class transpose(Expression):
    def __init__(self, arg, axes=None):
        Expression.__init__(self, arg)
        self.axes = axes
        if axes:
            self.raxes = [i for (i,j) in sorted(enumerate(axes), key=operator.itemgetter(1))]
        else:
            self.raxes = None
    def forward(self, values):
        values[self] = numpy.transpose(values[self.args[0]], self.axes)
    def backward(self, values, gradients):
        arg = self.args[0]
        if arg in gradients:
            gradients[arg] += numpy.transpose(gradients[self], self.raxes)

class getitem(Expression):
    """
    :param arg: array to be indexed
    :param item: index
    :type item: int or slice, not Expression
    """
    def __init__(self, arg, item):
        Expression.__init__(self, arg)
        if isinstance(item, Expression): raise TypeError("item should be an index or slice")
        self.item = item

    def forward(self, values):
        arg = values[self.args[0]]
        values[self] = arg[self.item]
    def backward(self, values, gradients):
        arg = self.args[0]
        if arg in gradients:
            if not hasattr(gradients[arg], 'shape'):
                gradients[arg] = numpy.zeros_like(values[arg])
            gradients[arg][self.item] += gradients[self]

    def __str__(self):
        item = self.item
        if isinstance(item, slice): 
            start = item.start or ""
            stop = item.stop or ""
            if item.stride is not None:
                item = "%s:%s:%s" % (start, stop, item.stride)
            else:
                item = "%s:%s" % (start, stop)
        return "%s[%s]" % (self.args[0], item)

[docs]class setitem(Expression):
    """A new expression with an index or slice modified.

    Note that when evaluated, this makes a copy of the whole array.

    :param x: array to be indexed
    :param item: index
    :type item: int or slice, not Expression
    :param y: new value
    """
    def __init__(self, x, item, y):
        Expression.__init__(self, x, y)
        if isinstance(item, Expression): raise TypeError("item should be an index or slice")
        self.item = item

    def forward(self, values):
        x, y = self.args
        values[self] = values[x][...]
        values[self][self.item] = values[y]

    def backward(self, values, gradients):
        x, y = self.args
        if x in gradients:
            gradients[x] += gradients[self]
            gradients[x][self.item] -= gradients[self][self.item]
        if y in gradients:
            gradients[y] += gradients[self][self.item]

    def __str__(self):
        item = self.item
        if isinstance(item, slice): 
            start = item.start or ""
            stop = item.stop or ""
            if item.stride is not None:
                item = "%s:%s:%s" % (start, stop, item.stride)
            else:
                item = "%s:%s" % (start, stop)
        return "%s[%s] := %s" % (self.args[0], item, self.args[1])

###

def topological(root):
    """Traverse an Expression in topological (bottom-up) order."""
    stack = [(root, 0)]
    result = []
    visited = set()
    while len(stack) > 0:
        x, i = stack.pop()
        if i == len(x.args):
            result.append(x)
        else:
            stack.append((x, i+1))
            if x.args[i] not in visited:
                stack.append((x.args[i], 0))
                visited.add(x.args[i])
    return result

### Visualization

def graphviz(x):
    """Draw the computation graph of an expression in the DOT language.

    :param x: the expression to draw
    :type x: Expression
    :return: DOT representation of computation graph
    :rtype: str

    The output can be processed using GraphViz's dot command.
    """
    result = []
    result.append("digraph {\n")
    result.append('  rankdir=BT;\n')
    result.append('  node [shape=box,margin=0.1,width=0,height=0,style=filled,fillcolor=lightgrey,penwidth=0,fontname="Courier",fontsize=10];\n')
    result.append("  edge [arrowsize=0.5];\n")
    for subx in topological(x):
        result.append('  %s [label="%s: %s"];\n' % (subx.serial, subx.serial, subx.__class__.__name__))
        for arg in subx.args:
            result.append("  %s -> %s\n" % (arg.serial, subx.serial))
    result.append("}\n")
    return "".join(result)

### Utilities

def contract_like(a, b):
    """Sum axes of a so that shape is the same as b."""
    a = numpy.asarray(a)
    b = numpy.asarray(b)
    if a.shape == b.shape:
        return a
    else:
        b_shape = (1,)*(a.ndim-b.ndim) + b.shape
        axes = tuple(axis for axis in range(a.ndim) if a.shape[axis] > b_shape[axis])
        if axes != ():
            a = numpy.sum(a, axes)
        return a.reshape(b.shape)