Example of AD in slides

Example of AD in slides#

import numpy as np
def forward(mx = 1.0, my = 1.0):

    # m1 = A1(m0), A1 only modifies x-component
    mx = mx**2 + my

    # m2 = A2(m1), A2 only modifies y-component
    my = mx + my**2

    # J = J(m2)
    J = mx + my
    
    return J
def finite_diff_grad(mx = 1.0, my = 1.0, eps = 1.e-7):
    
    J = forward(mx, my)
    
    # Perturb in x-direction
    mx_perturb = mx + eps
    Jx_perturb = forward(mx_perturb, my)
    gx = (Jx_perturb - J) / eps

    # Perturb in y-direction
    my_perturb = my + eps
    Jy_perturb = forward(mx, my_perturb)
    gy = (Jy_perturb - J) / eps
    
    return np.array([gx, gy])

finite_diff_grad()
array([4.00000021, 4.00000011])
def TLM_grad(mx = 1.0, my = 1.0):

    mx_orig = mx
    my_orig = my
    
    # x-component of gradient
    dmx = 1.0
    dmy = 0.0

    dmx = 2*mx*dmx + dmy # dm1 = B1(m0)*dm0
    dmy = dmx + 2*my*dmy # dm2 = B2(m1)*dm1
    gx  = dmx + dmy      # dJ = gx

    # y-component of gradient
    dmx = 0.0
    dmy = 1.0

    dmx = 2*mx*dmx + dmy # dm1 = B1(m0)*dm0
    dmy = dmx + 2*my*dmy # dm2 = B2(m1)*dm1
    gy  = dmx + dmy      # dJ = gy
    
    return np.array([gx, gy])
    
TLM_grad()
array([4., 4.])
def adj_store_grad(mx = 1.0, my = 1.0):

    tape = []
    
    tape.append([mx, my]) # Store m0
    mx = mx**2 + my

    tape.append([mx, my]) # Store m1
    my = mx + my**2

    J  = mx + my
    
    mxb = 0.0
    myb = 0.0
    Jb  = 1.0 # dJ/dJ = 1.0

    # m2b = dJ/dm2
    mxb = mxb + Jb
    myb = myb + Jb
    Jb  = 0.0

    # m1b = B2(m1)^T * m2b
    mx, my = tape.pop()
    mxb = mxb + myb 
    myb = 2*my*myb

    # m0b = B1(m0)^T * m1b
    mx, my = tape.pop()
    myb = mxb + myb
    mxb = 2*mx*mxb

    # g = dJ/dm0 = m0b
    return np.array([mxb, myb])
    
adj_store_grad()
array([4., 4.])