
$G'^{i}_{x_i}(x) = -(x_i)^{-2}$
$f'_{x_i}(G(x)) = -(x_i)^{-2} * d_i$
class Inv(minitorch.Function):
@staticmethod
def forward(ctx, t1: Tensor) -> Tensor:
ctx.save_for_backward(t1)
return t1.f.inv_map(t1)
@staticmethod
def backward(ctx, d: Tensor) -> Tensor:
(t1,) = ctx.saved_values
return d.f.inv_back_zip(t1, d)
$G'^{i}_{x_i}(x, y) = y_i$
$f'_{x_i}(G(x, y)) = y_i * d_i$
class Mul(minitorch.Function):
@staticmethod
def forward(ctx, t1: Tensor, t2:Tensor) -> Tensor:
ctx.save_for_backwards((t1, t2))
return t1.f.mul_map(t1, t2)
@staticmethod
def backward(ctx, d: Tensor) -> Tensor:
(t1, t2) = ctx.saved_values
return d.f.mul_map(t2, d), d.f.mul_map(t1, d)
$G'_{x_i}(x) = 1$
$f'_{x_i}(G(x)) = d$

$$ \begin{eqnarray*} \text{lin}(x; w, b) &=& x_1 \times w_1 + x_2 \times w_2 + b \\ h_ 1 &=& \text{ReLU}(\text{lin}(x; w^0, b^0)) \\ h_ 2 &=& \text{ReLU}(\text{lin}(x; w^1, b^1))\\ m(x) &=& \text{lin}(h; w, b) \end{eqnarray*} $$
split_graph(s1, s2)

# X - (BATCH, FEATURES)
out = model.forward(X)
# out - (BATCH)
l = loss(out)
# l - (1)
class Network(minitorch.Module):
def __init__(self):
...
self.layer1 = Linear(FEATURES, HIDDEN)
self.layer2 = Linear(HIDDEN, HIDDEN)
self.layer3 = Linear(HIDDEN, 1)




Step 3
for p in model.parameters():
if p.value.grad is not None:
p.update(p.value - RATE * (p.value.grad / float(data.N)))

embedding
Get word vector
word_one_hot = tensor([0 if i != word else 1
for i in range(VOCAB)])
embedding = (layer1 * word_one_hot).sum(1)

(word_emb1 * word_emb2).sum()
Query 1
^(lisbon|portugal|america|washington|rome|athens|london|england|greece|italy)$
Query 2
^(doctor|patient|lawyer|client|clerk|customer|author|reader)$



