Source code for penne.expr

"""Building and using neural networks."""

from . import debug
from . import backend
from . import backend as numpy
from six.moves import range
import operator
import traceback # for expression tracebacks
import subprocess # for calling graphviz

__all__ = ["Expression", "Unary", "Binary", "Reduction",
           "constant", "one_hot", "zeros", "ones", "full",
           "parameter", "load_model", "save_model",
           "add", "subtract", "multiply", "divide", "negative", "power",
           "maximum", "minimum", "clip", 
           #"less", "less_equal", "greater", "greater_equal", "where",
           #"logical_and", "logical_or", "logical_not",
           "exp", "log", "tanh",
           "asum", "amax", "amin", "mean",
           "dot", "einsum", "vecdot",
           "concatenate", "stack", "transpose", "reshape", "expand_dims", "setitem",
           "topological", "graphviz"]

class Expression(object):
    """Base class for expression classes."""
    serial = 0
    def __init__(self, *args):
        self.args = args
        if debug.stack or debug.profile:
            self.stack = debug.extract_stack()
        for i, arg in enumerate(args):
            if not isinstance(arg, Expression):
                raise TypeError("expected Expression as argument %d, got %s" % (i, arg))
        self.serial = Expression.serial
        Expression.serial += 1

    def forward(self, values):
        """Compute value of self given values of args."""
        raise NotImplementedError()

    def backward(self, values, gradients):
        """Update gradient with respect to args given gradient with respect to self.

        pre/post: gradients[self] has the same shape as values[self].
        pre: gradients[self.args[i]] is either 0.0 or has the same shape as values[self.args[i]].
        post: gradients[self.args[i]] has the same shape as values[self.args[i]].
        """
        pass

    def __getitem__(self, item):    return getitem(self, item)
    def __setitem__(self, item, x):
        raise NotImplementedError("use a = setitem(a, item, b) instead")

    def __add__(self, other):    return add(self, other)
    def __sub__(self, other):    return subtract(self, other)
    def __mul__(self, other):    return multiply(self, other)
    def __div__(self, other):    return divide(self, other)
    def __pow__(self, other):    return power(self, other)
    def __neg__(self):           return negative(self)

    def __lt__(self, other):     return less(self, other)
    def __le__(self, other):     return less_equal(self, other)
    def __gt__(self, other):     return greater(self, other)
    def __ge__(self, other):     return greater_equal(self, other)

    def dot(self, other):        return dot(self, other)

    def __str__(self):
        args = ["<%s>" % arg.serial for arg in self.args]
        return "%s(%s)" % (self.__class__.__name__, ', '.join(args))

    def _repr_png_(self):
        """IPython magic: show PNG representation of the transducer.
        Adapted from pyfst."""
        process = subprocess.Popen(['dot', '-Tpng'], 
                                   stdin=subprocess.PIPE,
                                   stdout=subprocess.PIPE, 
                                   stderr=subprocess.PIPE)
        out, err = process.communicate(graphviz(self))
        if err:
            raise Exception(err)
        return out

[docs]class constant(Expression): """A constant value. The value can be accessed in the ``value`` field, and can be changed between calls to ``compute_values``. :param value: The value of the new expression. :type value: NumPy array """ def __init__(self, value): Expression.__init__(self) self.value = numpy.asarray(value) def forward(self, values): values[self] = self.value def __str__(self): return "constant(%s)" % (self.value,)
def one_hot(size, i): """A one-hot vector. :param size: The size of the vector. :type size: int :param i: The entry to set to one; all others are zero. :type i: int """ result = numpy.zeros((dims,)) result[i] = 1. return constant(result)
[docs]def zeros(shape): return constant(numpy.zeros(shape))
[docs]def ones(shape): return constant(numpy.ones(shape))
[docs]def full(shape, value): return constant(numpy.full(shape, value))
## Parameters and models
[docs]class parameter(Expression): """A parameter that is to be trained. :param value: The initial value of the new parameter. :type value: Numpy array """ # Global list of all parameter objects. all = [] def __init__(self, value, model=all): Expression.__init__(self) self.value = numpy.asarray(value) model.append(self) def forward(self, values): values[self] = self.value def __str__(self): return "parameter(%s)" % (self.value,)
[docs]def save_model(outfile, model=parameter.all): """Saves parameters to a file. :param outfile: File or filename to write to. :type value: file or str """ if isinstance(outfile, str): with open(outfile, "w") as outfilehandle: save_model(outfilehandle, model) else: for param in model: numpy.save(outfile, param.value)
[docs]def load_model(infile, model=parameter.all): """Loads parameters from a file. Assumes that there are exactly the same number of parameters as when the model was saved, created in the same order and with the same shapes. :param infile: File or filename to read from. :type value: file or str """ if isinstance(infile, str): with open(infile) as infilehandle: load_model(infilehandle, model) else: for i, param in enumerate(model): v = numpy.load(infile) if param.value.shape != v.shape: raise ValueError("expected shape {}, read shape {}".format(param.value.shape, v.shape)) param.value[...] = v
class Unary(Expression): """Base class for unary componentwise operations.""" def __init__(self, x): Expression.__init__(self, x) def forward(self, values): x = self.args[0] values[self] = self.f(values[x]) def backward(self, values, gradients): x = self.args[0] dfdx = self.dfdx(values[x], values[self]) if x in gradients: gradients[x] += contract_like(dfdx * gradients[self], values[x]) class Binary(Expression): """Base class for binary componentwise operations.""" def __init__(self, x, y): Expression.__init__(self, x, y) def forward(self, values): x, y = self.args values[self] = self.f(values[x], values[y]) def backward(self, values, gradients): x, y = self.args dfdx = self.dfdx(values[x], values[y], values[self]) dfdy = self.dfdy(values[x], values[y], values[self]) if x in gradients: gradients[x] += contract_like(dfdx * gradients[self], values[x]) if y in gradients: gradients[y] += contract_like(dfdy * gradients[self], values[y]) ## Arithmetic operations
[docs]class add(Binary): f = staticmethod(lambda x,y: numpy.add(x,y)) # "Inline" df to avoid multiplications by 1 in this very common case def backward(self, values, gradients): x, y = self.args if x in gradients: gradients[x] += contract_like(gradients[self], values[x]) if y in gradients: gradients[y] += contract_like(gradients[self], values[y])
[docs]class subtract(Binary): f = staticmethod(lambda x,y: numpy.subtract(x,y)) dfdx = staticmethod(lambda x,y,z: 1.) dfdy = staticmethod(lambda x,y,z: -1.)
class negative(Unary): f = staticmethod(lambda x: numpy.negative(x)) dfdx = staticmethod(lambda x,z: -1.)
[docs]class multiply(Binary): f = staticmethod(lambda x,y: numpy.multiply(x,y)) dfdx = staticmethod(lambda x,y,z: y) dfdy = staticmethod(lambda x,y,z: x)
[docs]class divide(Binary): f = staticmethod(lambda x,y: numpy.divide(x,y)) dfdx = staticmethod(lambda x,y,z: 1./y) dfdy = staticmethod(lambda x,y,z: -x/y**2)
[docs]class power(Binary): f = staticmethod(lambda x,y: numpy.power(x,y)) dfdx = staticmethod(lambda x,y,z: z*y/x) dfdy = staticmethod(lambda x,y,z: z*numpy.log(x))
[docs]class log(Unary): f = staticmethod(lambda x: numpy.log(x)) dfdx = staticmethod(lambda x,z: 1./x)
[docs]class exp(Unary): f = staticmethod(lambda x: numpy.exp(x)) dfdx = staticmethod(lambda x,z: z)
[docs]class tanh(Unary): f = staticmethod(lambda x: numpy.tanh(x)) @staticmethod def dfdx(x, z): try: return numpy.onemxx(z) except AttributeError: return 1.-z**2 gain = 1.
[docs]class maximum(Binary): f = staticmethod(lambda x,y: numpy.maximum(x,y)) dfdx = staticmethod(lambda x,y,z: (x > y).astype(float)) dfdy = staticmethod(lambda x,y,z: (x <= y).astype(float))
[docs]class minimum(Binary): f = staticmethod(lambda x,y: numpy.minimum(x,y)) dfdx = staticmethod(lambda x,y,z: (x < y).astype(float)) dfdy = staticmethod(lambda x,y,z: (x >= y).astype(float))
[docs]def clip(x, lo, hi): return minimum(maximum(x, lo), hi)
## Conditionals class where(Expression): def forward(self, values): c, x, y = self.args values[self] = numpy.where(values[c], values[x], values[y]) def backward(self, values, gradients): c, x, y = self.args if x in gradients: gradients[x] += contract_like(c.astype(bool) * gradients[self], values[x]) if y in gradients: gradients[y] += contract_like(numpy.logical_not(c) * gradients[self], values[x]) """ class greater(Binary): f = staticmethod(lambda x,y: numpy.greater(x,y)) class greater_equal(Binary): f = staticmethod(lambda x,y: numpy.greater_equal(x,y)) class less(Binary): f = staticmethod(lambda x,y: numpy.less(x,y)) class less_equal(Binary): f = staticmethod(lambda x,y: numpy.less_equal(x,y)) class logical_and(Binary): f = staticmethod(lambda x,y: numpy.logical_and(x,y)) class logical_or(Binary): f = staticmethod(lambda x,y: numpy.logical_or(x,y)) class logical_not(Binary): f = staticmethod(lambda x: numpy.logical_not(x))""" ### Reductions def _keepdims(shp, axis, keepdims): if keepdims: return shp if axis is None: assert shp == () return shp if type(axis) is int: if axis < 0: axis += len(shp)+1 return shp[:axis] + (1,) + shp[axis:] if type(axis) is tuple: shp = list(shp) nd = len(shp)+len(axis) axis = [i if i >= 0 else i+nd for i in axis] for i in axis: shp[i:i] = [1] return tuple(shp) raise ValueError("invalid axis") class Reduction(Expression): """Base class for reduction operations.""" def __init__(self, x, axis=None, keepdims=False): Expression.__init__(self, x) self.axis = axis self.keepdims = keepdims def forward(self, values): values[self] = self.f(values[self.args[0]], axis=self.axis, keepdims=self.keepdims) def backward(self, values, gradients): arg = self.args[0] if arg not in gradients: return shp = _keepdims(values[self].shape, self.axis, self.keepdims) gself = numpy.asarray(gradients[self]).reshape(shp) vself = numpy.asarray(values[self]).reshape(shp) d = self.df(values[arg], vself, axis=self.axis) if numpy.ndim(gradients[arg]) == 0: gradients[arg] = numpy.zeros_like(values[arg]) gradients[arg] += gself * d class sum(Reduction): f = staticmethod(lambda x,axis,keepdims: numpy.sum(x,axis=axis,keepdims=keepdims)) df = staticmethod(lambda x,y,axis: 1.) asum = sum
[docs]class amax(Reduction): f = staticmethod(lambda x,axis,keepdims: numpy.amax(x,axis=axis,keepdims=keepdims)) @staticmethod def df(x, y, axis): is_max = (x == y).astype(x.dtype) z = numpy.asum(is_max, axis=axis, keepdims=True) return is_max / z
max = amax
[docs]class amin(Reduction): f = staticmethod(lambda x,axis,keepdims: numpy.amin(x,axis=axis,keepdims=keepdims)) @staticmethod def df(x, y, axis): is_min = (x == y).astype(x.dtype) return is_min / numpy.asum(is_min, axis=axis, keepdims=True)
min = amin
[docs]class mean(Reduction): f = staticmethod(lambda x,axis,keepdims: numpy.mean(x,axis=axis,keepdims=keepdims)) df = staticmethod(lambda x,y,axis: float(y.size) / x.size)
[docs]class vecdot(Expression): """Equivalent to sum(x*y, axis). :type x: Expression :type y: Expression :param axis: axis or axes along which to perform the sum-product. :type axis: int or tuple of ints :param keepdims: leave axis or axes in result as dimensions with size one. :type keepdims: bool """ def __init__(self, x, y, axis=None, keepdims=False): Expression.__init__(self, x, y) self.axis = axis self.keepdims = keepdims def forward(self, values): x, y = self.args values[self] = numpy.vecdot(values[x], values[y], self.axis, self.keepdims) def backward(self, values, gradients): x, y = self.args shp = _keepdims(values[self].shape, self.axis, self.keepdims) gself = numpy.asarray(gradients[self]).reshape(shp) gradients[x] += contract_like(gself * values[y], values[x]) gradients[y] += contract_like(gself * values[x], values[y])
### Product-like operations
[docs]class dot(Expression): def __init__(self, x, y): Expression.__init__(self, x, y) def forward(self, values): xv = values[self.args[0]] yv = values[self.args[1]] values[self] = numpy.dot(xv, yv) def backward(self, values, gradients): x, y = self.args xd, yd = numpy.ndim(values[x]), numpy.ndim(values[y]) # Common cases if xd <= 1 and yd <= 1: if x in gradients: gradients[x] += numpy.dot(gradients[self], values[y]) if y in gradients: d = numpy.dot(values[x], gradients[self]) gradients[y] += numpy.dot(values[x], gradients[self]) elif xd == 2 and yd == 1: if x in gradients: if numpy.ndim(gradients[x]) > 0: gradients[x] = numpy.add_outer(gradients[self], values[y], gradients[x]) else: gradients[x] += numpy.outer(gradients[self], values[y]) if y in gradients: gradients[y] += numpy.dot(values[x].T, gradients[self]) elif xd == 1 and yd == 2: if x in gradients: gradients[x] += numpy.dot(gradients[self], values[y].T) if y in gradients: if numpy.ndim(gradients[y]) > 0: gradients[y] = numpy.add_outer(values[x], gradients[self], gradients[y]) else: gradients[y] += numpy.outer(values[x], gradients[self]) elif xd == 2 and yd == 2: if x in gradients: if numpy.ndim(gradients[x]) > 0: gradients[x] = numpy.add_dot(gradients[self], values[y].T, gradients[x]) else: gradients[x] += numpy.dot(gradients[self], values[y].T) if y in gradients: if numpy.ndim(gradients[y]) > 0: gradients[y] = numpy.add_dot(values[x].T, gradients[self], gradients[y]) else: gradients[y] += numpy.dot(values[x].T, gradients[self]) elif yd == 1: xv = values[x] xr = xv.reshape((-1, xv.shape[-1])) gzr = gradients[self].reshape((xr.shape[0],)) if x in gradients: if numpy.ndim(gradients[x]) > 0: gradients[x] = numpy.add_outer(gzr, values[y], gradients[x]) else: gradients[x] += numpy.outer(gzr, values[y]).reshape(values[x].shape) if y in gradients: gradients[y] += numpy.dot(xr.T, gzr).reshape(values[y].shape) else: xv, yv = values[x], numpy.swapaxes(values[y], -2, 0) xr = xv.reshape((-1, xv.shape[-1])) yr = yv.reshape((yv.shape[0], -1)) gzr = gradients[self].reshape((xr.shape[0], yr.shape[1])) if x in gradients: gradients[x] += numpy.dot(gzr, yr.T).reshape(xv.shape) if y in gradients: gradients[y] += numpy.swapaxes(numpy.dot(xr.T, gzr).reshape(yv.shape), -2, 0)
class einsum(Expression): def __init__(self, subscripts, *args): Expression.__init__(self, *args) self.subscripts = subscripts lhs, rhs = subscripts.split("->") self.bsubscripts = [] for i in lhs.split(","): self.bsubscripts.append("%s,%s->%s" % (lhs, rhs, i)) def forward(self, values): values[self] = numpy.einsum(self.subscripts, *[values[arg] for arg in self.args]) def backward(self, values, gradients): args = [values[arg] for arg in self.args] + [gradients[self]] for i in range(len(self.args)): if self.args[i] in gradients: args[i] = numpy.broadcast_to(1., values[self.args[i]].shape) gradients[self.args[i]] += numpy.einsum(self.bsubscripts[i], *args) args[i] = values[self.args[i]] ### Cutting and pasting
[docs]class concatenate(Expression): def __init__(self, args, axis=0): if not isinstance(axis, int): raise TypeError("axis must be an int") Expression.__init__(self, *args) self.axis = axis def forward(self, values): values[self] = numpy.concatenate([values[arg] for arg in self.args], axis=self.axis) def backward(self, values, gradients): i = 0 s = [slice(None)]*numpy.ndim(gradients[self]) for arg in self.args: d = values[arg].shape[self.axis] if arg in gradients: s[self.axis] = slice(i, i+d) gradients[arg] += gradients[self][tuple(s)] i += d
[docs]class stack(Expression): def __init__(self, args, axis=0): if not isinstance(axis, int): raise TypeError("axis must be an int") Expression.__init__(self, *args) self.axis = axis def forward(self, values): values[self] = numpy.stack([values[arg] for arg in self.args], axis=self.axis) def backward(self, values, gradients): s = [slice(None)]*numpy.ndim(gradients[self]) for i, arg in enumerate(self.args): if arg in gradients: s[self.axis] = i gradients[arg] += gradients[self][tuple(s)]
[docs]class reshape(Expression): def __init__(self, arg, shape): Expression.__init__(self, arg) self.shape = shape def forward(self, values): values[self] = numpy.reshape(values[self.args[0]], self.shape) def backward(self, values, gradients): arg = self.args[0] if arg in gradients: gradients[arg] += numpy.reshape(gradients[self], values[arg].shape)
[docs]class expand_dims(Expression): def __init__(self, arg, axis): Expression.__init__(self, arg) self.axis = axis def forward(self, values): values[self] = numpy.expand_dims(values[self.args[0]], self.axis) def backward(self, values, gradients): arg = self.args[0] if arg in gradients: gradients[arg] += numpy.reshape(gradients[self], values[arg].shape)
[docs]class transpose(Expression): def __init__(self, arg, axes=None): Expression.__init__(self, arg) self.axes = axes if axes: self.raxes = [i for (i,j) in sorted(enumerate(axes), key=operator.itemgetter(1))] else: self.raxes = None def forward(self, values): values[self] = numpy.transpose(values[self.args[0]], self.axes) def backward(self, values, gradients): arg = self.args[0] if arg in gradients: gradients[arg] += numpy.transpose(gradients[self], self.raxes)
class getitem(Expression): """ :param arg: array to be indexed :param item: index :type item: int or slice, not Expression """ def __init__(self, arg, item): Expression.__init__(self, arg) if isinstance(item, Expression): raise TypeError("item should be an index or slice") self.item = item def forward(self, values): arg = values[self.args[0]] values[self] = arg[self.item] def backward(self, values, gradients): arg = self.args[0] if arg in gradients: if not hasattr(gradients[arg], 'shape'): gradients[arg] = numpy.zeros_like(values[arg]) gradients[arg][self.item] += gradients[self] def __str__(self): item = self.item if isinstance(item, slice): start = item.start or "" stop = item.stop or "" if item.stride is not None: item = "%s:%s:%s" % (start, stop, item.stride) else: item = "%s:%s" % (start, stop) return "%s[%s]" % (self.args[0], item)
[docs]class setitem(Expression): """A new expression with an index or slice modified. Note that when evaluated, this makes a copy of the whole array. :param x: array to be indexed :param item: index :type item: int or slice, not Expression :param y: new value """ def __init__(self, x, item, y): Expression.__init__(self, x, y) if isinstance(item, Expression): raise TypeError("item should be an index or slice") self.item = item def forward(self, values): x, y = self.args values[self] = values[x][...] values[self][self.item] = values[y] def backward(self, values, gradients): x, y = self.args if x in gradients: gradients[x] += gradients[self] gradients[x][self.item] -= gradients[self][self.item] if y in gradients: gradients[y] += gradients[self][self.item] def __str__(self): item = self.item if isinstance(item, slice): start = item.start or "" stop = item.stop or "" if item.stride is not None: item = "%s:%s:%s" % (start, stop, item.stride) else: item = "%s:%s" % (start, stop) return "%s[%s] := %s" % (self.args[0], item, self.args[1])
### def topological(root): """Traverse an Expression in topological (bottom-up) order.""" stack = [(root, 0)] result = [] visited = set() while len(stack) > 0: x, i = stack.pop() if i == len(x.args): result.append(x) else: stack.append((x, i+1)) if x.args[i] not in visited: stack.append((x.args[i], 0)) visited.add(x.args[i]) return result ### Visualization def graphviz(x): """Draw the computation graph of an expression in the DOT language. :param x: the expression to draw :type x: Expression :return: DOT representation of computation graph :rtype: str The output can be processed using GraphViz's dot command. """ result = [] result.append("digraph {\n") result.append(' rankdir=BT;\n') result.append(' node [shape=box,margin=0.1,width=0,height=0,style=filled,fillcolor=lightgrey,penwidth=0,fontname="Courier",fontsize=10];\n') result.append(" edge [arrowsize=0.5];\n") for subx in topological(x): result.append(' %s [label="%s: %s"];\n' % (subx.serial, subx.serial, subx.__class__.__name__)) for arg in subx.args: result.append(" %s -> %s\n" % (arg.serial, subx.serial)) result.append("}\n") return "".join(result) ### Utilities def contract_like(a, b): """Sum axes of a so that shape is the same as b.""" a = numpy.asarray(a) b = numpy.asarray(b) if a.shape == b.shape: return a else: b_shape = (1,)*(a.ndim-b.ndim) + b.shape axes = tuple(axis for axis in range(a.ndim) if a.shape[axis] > b_shape[axis]) if axes != (): a = numpy.sum(a, axes) return a.reshape(b.shape)