"""Building and using neural networks."""
from . import debug
from . import backend
from . import backend as numpy
from six.moves import range
import operator
import traceback # for expression tracebacks
import subprocess # for calling graphviz
__all__ = ["Expression", "Unary", "Binary", "Reduction",
"constant", "one_hot", "zeros", "ones", "full",
"parameter", "load_model", "save_model",
"add", "subtract", "multiply", "divide", "negative", "power",
"maximum", "minimum", "clip",
#"less", "less_equal", "greater", "greater_equal", "where",
#"logical_and", "logical_or", "logical_not",
"exp", "log", "tanh",
"asum", "amax", "amin", "mean",
"dot", "einsum", "vecdot",
"concatenate", "stack", "transpose", "reshape", "expand_dims", "setitem",
"topological", "graphviz"]
class Expression(object):
"""Base class for expression classes."""
serial = 0
def __init__(self, *args):
self.args = args
if debug.stack or debug.profile:
self.stack = debug.extract_stack()
for i, arg in enumerate(args):
if not isinstance(arg, Expression):
raise TypeError("expected Expression as argument %d, got %s" % (i, arg))
self.serial = Expression.serial
Expression.serial += 1
def forward(self, values):
"""Compute value of self given values of args."""
raise NotImplementedError()
def backward(self, values, gradients):
"""Update gradient with respect to args given gradient with respect to self.
pre/post: gradients[self] has the same shape as values[self].
pre: gradients[self.args[i]] is either 0.0 or has the same shape as values[self.args[i]].
post: gradients[self.args[i]] has the same shape as values[self.args[i]].
"""
pass
def __getitem__(self, item): return getitem(self, item)
def __setitem__(self, item, x):
raise NotImplementedError("use a = setitem(a, item, b) instead")
def __add__(self, other): return add(self, other)
def __sub__(self, other): return subtract(self, other)
def __mul__(self, other): return multiply(self, other)
def __div__(self, other): return divide(self, other)
def __pow__(self, other): return power(self, other)
def __neg__(self): return negative(self)
def __lt__(self, other): return less(self, other)
def __le__(self, other): return less_equal(self, other)
def __gt__(self, other): return greater(self, other)
def __ge__(self, other): return greater_equal(self, other)
def dot(self, other): return dot(self, other)
def __str__(self):
args = ["<%s>" % arg.serial for arg in self.args]
return "%s(%s)" % (self.__class__.__name__, ', '.join(args))
def _repr_png_(self):
"""IPython magic: show PNG representation of the transducer.
Adapted from pyfst."""
process = subprocess.Popen(['dot', '-Tpng'],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
out, err = process.communicate(graphviz(self))
if err:
raise Exception(err)
return out
[docs]class constant(Expression):
"""A constant value.
The value can be accessed in the ``value`` field, and can be
changed between calls to ``compute_values``.
:param value: The value of the new expression.
:type value: NumPy array
"""
def __init__(self, value):
Expression.__init__(self)
self.value = numpy.asarray(value)
def forward(self, values):
values[self] = self.value
def __str__(self):
return "constant(%s)" % (self.value,)
def one_hot(size, i):
"""A one-hot vector.
:param size: The size of the vector.
:type size: int
:param i: The entry to set to one; all others are zero.
:type i: int
"""
result = numpy.zeros((dims,))
result[i] = 1.
return constant(result)
[docs]def zeros(shape):
return constant(numpy.zeros(shape))
[docs]def ones(shape):
return constant(numpy.ones(shape))
[docs]def full(shape, value):
return constant(numpy.full(shape, value))
## Parameters and models
[docs]class parameter(Expression):
"""A parameter that is to be trained.
:param value: The initial value of the new parameter.
:type value: Numpy array
"""
# Global list of all parameter objects.
all = []
def __init__(self, value, model=all):
Expression.__init__(self)
self.value = numpy.asarray(value)
model.append(self)
def forward(self, values):
values[self] = self.value
def __str__(self):
return "parameter(%s)" % (self.value,)
[docs]def save_model(outfile, model=parameter.all):
"""Saves parameters to a file.
:param outfile: File or filename to write to.
:type value: file or str
"""
if isinstance(outfile, str):
with open(outfile, "w") as outfilehandle:
save_model(outfilehandle, model)
else:
for param in model:
numpy.save(outfile, param.value)
[docs]def load_model(infile, model=parameter.all):
"""Loads parameters from a file.
Assumes that there are exactly the same number of parameters as
when the model was saved, created in the same order and with the
same shapes.
:param infile: File or filename to read from.
:type value: file or str
"""
if isinstance(infile, str):
with open(infile) as infilehandle:
load_model(infilehandle, model)
else:
for i, param in enumerate(model):
v = numpy.load(infile)
if param.value.shape != v.shape:
raise ValueError("expected shape {}, read shape {}".format(param.value.shape, v.shape))
param.value[...] = v
class Unary(Expression):
"""Base class for unary componentwise operations."""
def __init__(self, x):
Expression.__init__(self, x)
def forward(self, values):
x = self.args[0]
values[self] = self.f(values[x])
def backward(self, values, gradients):
x = self.args[0]
dfdx = self.dfdx(values[x], values[self])
if x in gradients:
gradients[x] += contract_like(dfdx * gradients[self], values[x])
class Binary(Expression):
"""Base class for binary componentwise operations."""
def __init__(self, x, y):
Expression.__init__(self, x, y)
def forward(self, values):
x, y = self.args
values[self] = self.f(values[x], values[y])
def backward(self, values, gradients):
x, y = self.args
dfdx = self.dfdx(values[x], values[y], values[self])
dfdy = self.dfdy(values[x], values[y], values[self])
if x in gradients:
gradients[x] += contract_like(dfdx * gradients[self], values[x])
if y in gradients:
gradients[y] += contract_like(dfdy * gradients[self], values[y])
## Arithmetic operations
[docs]class add(Binary):
f = staticmethod(lambda x,y: numpy.add(x,y))
# "Inline" df to avoid multiplications by 1 in this very common case
def backward(self, values, gradients):
x, y = self.args
if x in gradients:
gradients[x] += contract_like(gradients[self], values[x])
if y in gradients:
gradients[y] += contract_like(gradients[self], values[y])
[docs]class subtract(Binary):
f = staticmethod(lambda x,y: numpy.subtract(x,y))
dfdx = staticmethod(lambda x,y,z: 1.)
dfdy = staticmethod(lambda x,y,z: -1.)
class negative(Unary):
f = staticmethod(lambda x: numpy.negative(x))
dfdx = staticmethod(lambda x,z: -1.)
[docs]class multiply(Binary):
f = staticmethod(lambda x,y: numpy.multiply(x,y))
dfdx = staticmethod(lambda x,y,z: y)
dfdy = staticmethod(lambda x,y,z: x)
[docs]class divide(Binary):
f = staticmethod(lambda x,y: numpy.divide(x,y))
dfdx = staticmethod(lambda x,y,z: 1./y)
dfdy = staticmethod(lambda x,y,z: -x/y**2)
[docs]class power(Binary):
f = staticmethod(lambda x,y: numpy.power(x,y))
dfdx = staticmethod(lambda x,y,z: z*y/x)
dfdy = staticmethod(lambda x,y,z: z*numpy.log(x))
[docs]class log(Unary):
f = staticmethod(lambda x: numpy.log(x))
dfdx = staticmethod(lambda x,z: 1./x)
[docs]class exp(Unary):
f = staticmethod(lambda x: numpy.exp(x))
dfdx = staticmethod(lambda x,z: z)
[docs]class tanh(Unary):
f = staticmethod(lambda x: numpy.tanh(x))
@staticmethod
def dfdx(x, z):
try:
return numpy.onemxx(z)
except AttributeError:
return 1.-z**2
gain = 1.
[docs]class maximum(Binary):
f = staticmethod(lambda x,y: numpy.maximum(x,y))
dfdx = staticmethod(lambda x,y,z: (x > y).astype(float))
dfdy = staticmethod(lambda x,y,z: (x <= y).astype(float))
[docs]class minimum(Binary):
f = staticmethod(lambda x,y: numpy.minimum(x,y))
dfdx = staticmethod(lambda x,y,z: (x < y).astype(float))
dfdy = staticmethod(lambda x,y,z: (x >= y).astype(float))
[docs]def clip(x, lo, hi):
return minimum(maximum(x, lo), hi)
## Conditionals
class where(Expression):
def forward(self, values):
c, x, y = self.args
values[self] = numpy.where(values[c], values[x], values[y])
def backward(self, values, gradients):
c, x, y = self.args
if x in gradients:
gradients[x] += contract_like(c.astype(bool) * gradients[self], values[x])
if y in gradients:
gradients[y] += contract_like(numpy.logical_not(c) * gradients[self], values[x])
"""
class greater(Binary):
f = staticmethod(lambda x,y: numpy.greater(x,y))
class greater_equal(Binary):
f = staticmethod(lambda x,y: numpy.greater_equal(x,y))
class less(Binary):
f = staticmethod(lambda x,y: numpy.less(x,y))
class less_equal(Binary):
f = staticmethod(lambda x,y: numpy.less_equal(x,y))
class logical_and(Binary):
f = staticmethod(lambda x,y: numpy.logical_and(x,y))
class logical_or(Binary):
f = staticmethod(lambda x,y: numpy.logical_or(x,y))
class logical_not(Binary):
f = staticmethod(lambda x: numpy.logical_not(x))"""
### Reductions
def _keepdims(shp, axis, keepdims):
if keepdims:
return shp
if axis is None:
assert shp == ()
return shp
if type(axis) is int:
if axis < 0: axis += len(shp)+1
return shp[:axis] + (1,) + shp[axis:]
if type(axis) is tuple:
shp = list(shp)
nd = len(shp)+len(axis)
axis = [i if i >= 0 else i+nd for i in axis]
for i in axis:
shp[i:i] = [1]
return tuple(shp)
raise ValueError("invalid axis")
class Reduction(Expression):
"""Base class for reduction operations."""
def __init__(self, x, axis=None, keepdims=False):
Expression.__init__(self, x)
self.axis = axis
self.keepdims = keepdims
def forward(self, values):
values[self] = self.f(values[self.args[0]], axis=self.axis, keepdims=self.keepdims)
def backward(self, values, gradients):
arg = self.args[0]
if arg not in gradients:
return
shp = _keepdims(values[self].shape, self.axis, self.keepdims)
gself = numpy.asarray(gradients[self]).reshape(shp)
vself = numpy.asarray(values[self]).reshape(shp)
d = self.df(values[arg], vself, axis=self.axis)
if numpy.ndim(gradients[arg]) == 0:
gradients[arg] = numpy.zeros_like(values[arg])
gradients[arg] += gself * d
class sum(Reduction):
f = staticmethod(lambda x,axis,keepdims: numpy.sum(x,axis=axis,keepdims=keepdims))
df = staticmethod(lambda x,y,axis: 1.)
asum = sum
[docs]class amax(Reduction):
f = staticmethod(lambda x,axis,keepdims: numpy.amax(x,axis=axis,keepdims=keepdims))
@staticmethod
def df(x, y, axis):
is_max = (x == y).astype(x.dtype)
z = numpy.asum(is_max, axis=axis, keepdims=True)
return is_max / z
max = amax
[docs]class amin(Reduction):
f = staticmethod(lambda x,axis,keepdims: numpy.amin(x,axis=axis,keepdims=keepdims))
@staticmethod
def df(x, y, axis):
is_min = (x == y).astype(x.dtype)
return is_min / numpy.asum(is_min, axis=axis, keepdims=True)
min = amin
[docs]class mean(Reduction):
f = staticmethod(lambda x,axis,keepdims: numpy.mean(x,axis=axis,keepdims=keepdims))
df = staticmethod(lambda x,y,axis: float(y.size) / x.size)
[docs]class vecdot(Expression):
"""Equivalent to sum(x*y, axis).
:type x: Expression
:type y: Expression
:param axis: axis or axes along which to perform the sum-product.
:type axis: int or tuple of ints
:param keepdims: leave axis or axes in result as dimensions with size one.
:type keepdims: bool
"""
def __init__(self, x, y, axis=None, keepdims=False):
Expression.__init__(self, x, y)
self.axis = axis
self.keepdims = keepdims
def forward(self, values):
x, y = self.args
values[self] = numpy.vecdot(values[x], values[y], self.axis, self.keepdims)
def backward(self, values, gradients):
x, y = self.args
shp = _keepdims(values[self].shape, self.axis, self.keepdims)
gself = numpy.asarray(gradients[self]).reshape(shp)
gradients[x] += contract_like(gself * values[y], values[x])
gradients[y] += contract_like(gself * values[x], values[y])
### Product-like operations
[docs]class dot(Expression):
def __init__(self, x, y):
Expression.__init__(self, x, y)
def forward(self, values):
xv = values[self.args[0]]
yv = values[self.args[1]]
values[self] = numpy.dot(xv, yv)
def backward(self, values, gradients):
x, y = self.args
xd, yd = numpy.ndim(values[x]), numpy.ndim(values[y])
# Common cases
if xd <= 1 and yd <= 1:
if x in gradients:
gradients[x] += numpy.dot(gradients[self], values[y])
if y in gradients:
d = numpy.dot(values[x], gradients[self])
gradients[y] += numpy.dot(values[x], gradients[self])
elif xd == 2 and yd == 1:
if x in gradients:
if numpy.ndim(gradients[x]) > 0:
gradients[x] = numpy.add_outer(gradients[self], values[y], gradients[x])
else:
gradients[x] += numpy.outer(gradients[self], values[y])
if y in gradients:
gradients[y] += numpy.dot(values[x].T, gradients[self])
elif xd == 1 and yd == 2:
if x in gradients:
gradients[x] += numpy.dot(gradients[self], values[y].T)
if y in gradients:
if numpy.ndim(gradients[y]) > 0:
gradients[y] = numpy.add_outer(values[x], gradients[self], gradients[y])
else:
gradients[y] += numpy.outer(values[x], gradients[self])
elif xd == 2 and yd == 2:
if x in gradients:
if numpy.ndim(gradients[x]) > 0:
gradients[x] = numpy.add_dot(gradients[self], values[y].T, gradients[x])
else:
gradients[x] += numpy.dot(gradients[self], values[y].T)
if y in gradients:
if numpy.ndim(gradients[y]) > 0:
gradients[y] = numpy.add_dot(values[x].T, gradients[self], gradients[y])
else:
gradients[y] += numpy.dot(values[x].T, gradients[self])
elif yd == 1:
xv = values[x]
xr = xv.reshape((-1, xv.shape[-1]))
gzr = gradients[self].reshape((xr.shape[0],))
if x in gradients:
if numpy.ndim(gradients[x]) > 0:
gradients[x] = numpy.add_outer(gzr, values[y], gradients[x])
else:
gradients[x] += numpy.outer(gzr, values[y]).reshape(values[x].shape)
if y in gradients:
gradients[y] += numpy.dot(xr.T, gzr).reshape(values[y].shape)
else:
xv, yv = values[x], numpy.swapaxes(values[y], -2, 0)
xr = xv.reshape((-1, xv.shape[-1]))
yr = yv.reshape((yv.shape[0], -1))
gzr = gradients[self].reshape((xr.shape[0], yr.shape[1]))
if x in gradients:
gradients[x] += numpy.dot(gzr, yr.T).reshape(xv.shape)
if y in gradients:
gradients[y] += numpy.swapaxes(numpy.dot(xr.T, gzr).reshape(yv.shape), -2, 0)
class einsum(Expression):
def __init__(self, subscripts, *args):
Expression.__init__(self, *args)
self.subscripts = subscripts
lhs, rhs = subscripts.split("->")
self.bsubscripts = []
for i in lhs.split(","):
self.bsubscripts.append("%s,%s->%s" % (lhs, rhs, i))
def forward(self, values):
values[self] = numpy.einsum(self.subscripts, *[values[arg] for arg in self.args])
def backward(self, values, gradients):
args = [values[arg] for arg in self.args] + [gradients[self]]
for i in range(len(self.args)):
if self.args[i] in gradients:
args[i] = numpy.broadcast_to(1., values[self.args[i]].shape)
gradients[self.args[i]] += numpy.einsum(self.bsubscripts[i], *args)
args[i] = values[self.args[i]]
### Cutting and pasting
[docs]class concatenate(Expression):
def __init__(self, args, axis=0):
if not isinstance(axis, int):
raise TypeError("axis must be an int")
Expression.__init__(self, *args)
self.axis = axis
def forward(self, values):
values[self] = numpy.concatenate([values[arg] for arg in self.args], axis=self.axis)
def backward(self, values, gradients):
i = 0
s = [slice(None)]*numpy.ndim(gradients[self])
for arg in self.args:
d = values[arg].shape[self.axis]
if arg in gradients:
s[self.axis] = slice(i, i+d)
gradients[arg] += gradients[self][tuple(s)]
i += d
[docs]class stack(Expression):
def __init__(self, args, axis=0):
if not isinstance(axis, int):
raise TypeError("axis must be an int")
Expression.__init__(self, *args)
self.axis = axis
def forward(self, values):
values[self] = numpy.stack([values[arg] for arg in self.args], axis=self.axis)
def backward(self, values, gradients):
s = [slice(None)]*numpy.ndim(gradients[self])
for i, arg in enumerate(self.args):
if arg in gradients:
s[self.axis] = i
gradients[arg] += gradients[self][tuple(s)]
[docs]class reshape(Expression):
def __init__(self, arg, shape):
Expression.__init__(self, arg)
self.shape = shape
def forward(self, values):
values[self] = numpy.reshape(values[self.args[0]], self.shape)
def backward(self, values, gradients):
arg = self.args[0]
if arg in gradients:
gradients[arg] += numpy.reshape(gradients[self], values[arg].shape)
[docs]class expand_dims(Expression):
def __init__(self, arg, axis):
Expression.__init__(self, arg)
self.axis = axis
def forward(self, values):
values[self] = numpy.expand_dims(values[self.args[0]], self.axis)
def backward(self, values, gradients):
arg = self.args[0]
if arg in gradients:
gradients[arg] += numpy.reshape(gradients[self], values[arg].shape)
[docs]class transpose(Expression):
def __init__(self, arg, axes=None):
Expression.__init__(self, arg)
self.axes = axes
if axes:
self.raxes = [i for (i,j) in sorted(enumerate(axes), key=operator.itemgetter(1))]
else:
self.raxes = None
def forward(self, values):
values[self] = numpy.transpose(values[self.args[0]], self.axes)
def backward(self, values, gradients):
arg = self.args[0]
if arg in gradients:
gradients[arg] += numpy.transpose(gradients[self], self.raxes)
class getitem(Expression):
"""
:param arg: array to be indexed
:param item: index
:type item: int or slice, not Expression
"""
def __init__(self, arg, item):
Expression.__init__(self, arg)
if isinstance(item, Expression): raise TypeError("item should be an index or slice")
self.item = item
def forward(self, values):
arg = values[self.args[0]]
values[self] = arg[self.item]
def backward(self, values, gradients):
arg = self.args[0]
if arg in gradients:
if not hasattr(gradients[arg], 'shape'):
gradients[arg] = numpy.zeros_like(values[arg])
gradients[arg][self.item] += gradients[self]
def __str__(self):
item = self.item
if isinstance(item, slice):
start = item.start or ""
stop = item.stop or ""
if item.stride is not None:
item = "%s:%s:%s" % (start, stop, item.stride)
else:
item = "%s:%s" % (start, stop)
return "%s[%s]" % (self.args[0], item)
[docs]class setitem(Expression):
"""A new expression with an index or slice modified.
Note that when evaluated, this makes a copy of the whole array.
:param x: array to be indexed
:param item: index
:type item: int or slice, not Expression
:param y: new value
"""
def __init__(self, x, item, y):
Expression.__init__(self, x, y)
if isinstance(item, Expression): raise TypeError("item should be an index or slice")
self.item = item
def forward(self, values):
x, y = self.args
values[self] = values[x][...]
values[self][self.item] = values[y]
def backward(self, values, gradients):
x, y = self.args
if x in gradients:
gradients[x] += gradients[self]
gradients[x][self.item] -= gradients[self][self.item]
if y in gradients:
gradients[y] += gradients[self][self.item]
def __str__(self):
item = self.item
if isinstance(item, slice):
start = item.start or ""
stop = item.stop or ""
if item.stride is not None:
item = "%s:%s:%s" % (start, stop, item.stride)
else:
item = "%s:%s" % (start, stop)
return "%s[%s] := %s" % (self.args[0], item, self.args[1])
###
def topological(root):
"""Traverse an Expression in topological (bottom-up) order."""
stack = [(root, 0)]
result = []
visited = set()
while len(stack) > 0:
x, i = stack.pop()
if i == len(x.args):
result.append(x)
else:
stack.append((x, i+1))
if x.args[i] not in visited:
stack.append((x.args[i], 0))
visited.add(x.args[i])
return result
### Visualization
def graphviz(x):
"""Draw the computation graph of an expression in the DOT language.
:param x: the expression to draw
:type x: Expression
:return: DOT representation of computation graph
:rtype: str
The output can be processed using GraphViz's dot command.
"""
result = []
result.append("digraph {\n")
result.append(' rankdir=BT;\n')
result.append(' node [shape=box,margin=0.1,width=0,height=0,style=filled,fillcolor=lightgrey,penwidth=0,fontname="Courier",fontsize=10];\n')
result.append(" edge [arrowsize=0.5];\n")
for subx in topological(x):
result.append(' %s [label="%s: %s"];\n' % (subx.serial, subx.serial, subx.__class__.__name__))
for arg in subx.args:
result.append(" %s -> %s\n" % (arg.serial, subx.serial))
result.append("}\n")
return "".join(result)
### Utilities
def contract_like(a, b):
"""Sum axes of a so that shape is the same as b."""
a = numpy.asarray(a)
b = numpy.asarray(b)
if a.shape == b.shape:
return a
else:
b_shape = (1,)*(a.ndim-b.ndim) + b.shape
axes = tuple(axis for axis in range(a.ndim) if a.shape[axis] > b_shape[axis])
if axes != ():
a = numpy.sum(a, axes)
return a.reshape(b.shape)