Example of AD in slides#
import numpy as np
def forward(mx = 1.0, my = 1.0):
# m1 = A1(m0), A1 only modifies x-component
mx = mx**2 + my
# m2 = A2(m1), A2 only modifies y-component
my = mx + my**2
# J = J(m2)
J = mx + my
return J
def finite_diff_grad(mx = 1.0, my = 1.0, eps = 1.e-7):
J = forward(mx, my)
# Perturb in x-direction
mx_perturb = mx + eps
Jx_perturb = forward(mx_perturb, my)
gx = (Jx_perturb - J) / eps
# Perturb in y-direction
my_perturb = my + eps
Jy_perturb = forward(mx, my_perturb)
gy = (Jy_perturb - J) / eps
return np.array([gx, gy])
finite_diff_grad()
array([4.00000021, 4.00000011])
def TLM_grad(mx = 1.0, my = 1.0):
mx_orig = mx
my_orig = my
# x-component of gradient
dmx = 1.0
dmy = 0.0
dmx = 2*mx*dmx + dmy # dm1 = B1(m0)*dm0
dmy = dmx + 2*my*dmy # dm2 = B2(m1)*dm1
gx = dmx + dmy # dJ = gx
# y-component of gradient
dmx = 0.0
dmy = 1.0
dmx = 2*mx*dmx + dmy # dm1 = B1(m0)*dm0
dmy = dmx + 2*my*dmy # dm2 = B2(m1)*dm1
gy = dmx + dmy # dJ = gy
return np.array([gx, gy])
TLM_grad()
array([4., 4.])
def adj_store_grad(mx = 1.0, my = 1.0):
tape = []
tape.append([mx, my]) # Store m0
mx = mx**2 + my
tape.append([mx, my]) # Store m1
my = mx + my**2
J = mx + my
mxb = 0.0
myb = 0.0
Jb = 1.0 # dJ/dJ = 1.0
# m2b = dJ/dm2
mxb = mxb + Jb
myb = myb + Jb
Jb = 0.0
# m1b = B2(m1)^T * m2b
mx, my = tape.pop()
mxb = mxb + myb
myb = 2*my*myb
# m0b = B1(m0)^T * m1b
mx, my = tape.pop()
myb = mxb + myb
mxb = 2*mx*mxb
# g = dJ/dm0 = m0b
return np.array([mxb, myb])
adj_store_grad()
array([4., 4.])