Source code for arsenal.maths.checkgrad

import numpy as np
from arsenal.maths.compare import compare
from arsenal.maths.rvs import spherical
from arsenal.iterview import iterview


# cg: Solve a linear systems, A x = b, using only A-x products. Assumes the
# matrix `A` is positive definite (same conditions as conjugate gradient). The
# cg solver requires that the Ax operator is an instance of LinearOperator
from scipy.sparse.linalg import cg as implicit_cg
from scipy.sparse.linalg import LinearOperator


from scipy.optimize import minimize
from scipy.linalg import norm
from arsenal import iterview


[docs]def fd_Hessian(f, x, eps=1e-5):
    [d] = x.flatten().shape
    x = x.flat
    [F] = f().shape
    H = np.zeros((d,d,F))
    for i in range(d):
        for j in range(d):
            # central-difference approximation of Hessian

            x_i = x[i]
            x_j = x[j]

            x[i] = x_i + eps
            x[j] = x_j + eps

            bb = f()

            x[i] = x_i + eps
            x[j] = x_j - eps

            ba = f()

            x[i] = x_i - eps
            x[j] = x_j + eps

            ab = f()

            x[i] = x_i - eps
            x[j] = x_j - eps

            aa = f()

            # reset
            x[i] = x_i
            x[j] = x_j

            H[i,j] = (bb - ba - ab + aa) / (4*eps**2)

    return H


[docs]def prox_numerical(f, x, s, jac=None):
    "Numerically estimate the proximal operator `Prox_{s*f}(x)`."

    def fg(z):
        d = (z-x)
        p = 0.5/s * (d @ d)
        if jac:
            fz, gz = f(z)
            return (fz + p, gz + 1/s * d)
        else:
            return f(z) + p

    sol = minimize(fg, x, jac=jac)
    for _ in range(4):
        sol = minimize(fg, sol.x, jac=jac)
    return sol


[docs]def fd_Jv(f, x, shape=None):
    """Finite-difference approximation to a Jacobian-vector product, J[f](x) v,
    where f: Rⁿ ↦ Rᵐ.

    Example: If f is a gradient, this function approximates Hessian-vector
    products.

    Solve a linear system for second-order gradient methods without
    materializing the Hessian or Fisher matrix. For example, the natural
    gradient direction in an exponential family:

      g = Euclidean_gradient(w)
      Fvp = fd_Jv(dlogZ, w)     # Fisher-vector products
      ng = implicit_cg(Fvp, g)[0]

    """
    if shape is None:
        [N] = x.shape
        [M] = f(x).shape
    else:
        N,M = shape
    def Jv(v, eps=1e-5):
        return (f(x + eps*v) - f(x - eps*v)) / (2*eps)
    return LinearOperator((N, M), matvec=Jv)


[docs]def finite_difference(f, eps=1e-5):
    def grad(x):
        x = np.array(x)
        g = []
        for k in range(x.shape[0]):
            v = x[k]
            x[k] = v + eps
            b = f(x)
            x[k] = v - eps
            a = f(x)
            x[k] = v
            g.append((b-a) / 2 / eps)
        return np.array(g)
    return grad


[docs]def fdcheck(func, w, g, keys = None, eps = 1e-5, quiet=0, verbose=1, progressbar=1, throw=True):
    """
    Finite-difference check.

    Returns `arsenal.math.compare` instance.

    - `func`: zero argument function, which references `w` in caller's scope.
    - `w`: parameters.
    - `g`: gradient estimate to compare against
    - `keys`: dimensions to check
    - `eps`: perturbation size

    """
    if quiet:
        verbose = 0
        progressbar = 0

    if keys is None:
        if hasattr(w, 'keys'):  # support for sparse vectors represented as a dictionary-like object.
            keys = list(w.keys())
            d = {}
        else:
            # use flat views, if need be.
            if len(w.shape) > 1: w = w.flat
            if len(g.shape) > 1: g = g.flat
            d = np.zeros_like(w)
            keys = list(range(len(w)))    # TODO: these keys have lost their names. So not good for debugging.
    else:
        d = {}

    for k in (iterview(keys) if progressbar else keys):
        was = w[k]
        w[k] = was + eps
        b = func()
        w[k] = was - eps
        a = func()
        w[k] = was
        d[k] = (b-a) / (2*eps)

    if throw and not np.allclose([d[k] for k in keys],
                                 [g[k] for k in keys]):
        compare(d, g, verbose=True)
        raise AssertionError('^^^^ see compare above')


    return compare([d[k] for k in keys],
                   [g[k] for k in keys],
                   verbose=verbose)


[docs]def quick_fdcheck(func, w, g, n_checks = 20, eps = 1e-5,
                  quiet=True, verbose=False, progressbar=False, throw=True):
    """
    Check gradient along random directions (a faster alternative to axis-aligned directions).

    Tim Vieira (2017) "How to test gradient implementations"
    https://timvieira.github.io/blog/post/2017/04/21/how-to-test-gradient-implementations/

    """

    if quiet:
        verbose = 0
        progressbar = 0

    keys = ['rand_%s' % i for i in range(n_checks)]
    H = {}
    G = {}

    was = w.flatten()

    w = np.asarray(w.flat)
    g = np.asarray(g.flat)

    dim = len(w)

    for k in (iterview(keys) if progressbar else keys):
        d = spherical(dim)
        G[k] = g.dot(d)
        w[:] = was + eps*d
        b = func()
        w[:] = was - eps*d
        a = func()
        w[:] = was
        H[k] = (b-a) / (2*eps)

    different = not np.allclose(list(H.values()), list(G.values()))
    if verbose or different:
        compare(H, G, verbose=True)

    if different and throw:
        raise AssertionError('^^^^ see compare above')

    return compare(H, G, verbose=False)
Source code for arsenal.maths.checkgrad

arsenal

Navigation

Related Topics