def expression():
    x = Scalar(1.0)
    y = Scalar(1.0)
    z = -y * sum([x, x, x]) * y + 10.0 * x
    h_x_y = z + z
    return h_x_y

SVG(make_graph(expression(), lr=True))

draw_boxes(["$x$", "$z = g(x)$", "$f(g(x))$"], [1, 1])

draw_boxes([r"$d\cdot g'(x)$", "$f'(z)$", "$1$"], [1, 1], lr=False)

draw_boxes([("$x$", "$y$"), "$z = g(x, y)$", "$h(x,y)$"], [1, 1])

draw_boxes(
    [(r"$d \times  g'_x(x, y)$", r"$d \times g'_y(x, y)$"), "$f'(z)$", "$1$"],
    [1, 1],
    lr=False,
)

draw_boxes(["$x$", ("$z_1, z_2$"), "$h(x)$"], [1, 1])

draw_boxes(["$x$", ("$z_1 = g(x)$", "$z_2 = g(x)$"), "$h(x)$"], [1, 1])

draw_boxes(
    [r"$d \cdot g'_x(x)$", ("$f'_{z_1}(z_1, z_2)$", "$f'_{z_2}(z_1, z_2)$"), "$1$"],
    [1, 1],
    lr=False,
)

chalk.set_svg_height(200)
backprop(1)

backprop(2)

backprop(3)

backprop(4)

backprop(5)

backprop(6)

backprop(7)


chalk.set_svg_height(200)

from minitorch import Parameter, Module
class Linear(Module):
    def __init__(self, w1, w2, b):
        super().__init__()
        self.w1 = Parameter(w1)
        self.w2 = Parameter(w2)
        self.b = Parameter(b)

    def forward(self, x1: float, x2: float) -> float:
        return self.w1.value * x1 + self.w2.value * x2 + self.b.value


model = Linear(1, 1, -0.9)
draw_graph(model)

def point_loss(x):
    return minitorch.operators.relu(x)


def full_loss(m):
    l = 0
    for x, y in zip(s.X, s.y):
        l += point_loss(-y * m.forward(*x))
    return -l


graph(point_loss, [], [-2, -0.2, 1])

chalk.hcat(
    [show(Linear(1, 1, -0.6)), show(Linear(1, 1, -0.7)), show(Linear(1, 1, -0.8))], 0.3
)

chalk.set_svg_height(400)

show_loss(full_loss, Linear(1, 1, 0))
chalk.set_svg_height(200)

class LinearModule(minitorch.Module):
    def __init__(self):
        super().__init__()
        # 0.0 is start value for param
        self.w1 = Parameter(Scalar(0.0))
        self.w2 = Parameter(Scalar(0.0))
        self.bias = Parameter(Scalar(0.0))

    def forward(self, x1: Scalar, x2: Scalar) -> Scalar:
        return x1 * self.w1.value + x2 * self.w2.value + self.bias.value

def train_step(optim, model, data):
    # Step 1 - Forward (Loss function)
    x_1, x_2 = Scalar(data[0]), Scalar(data[1])
    loss = model.forward(x_1, x_2).relu()
    # Step 2 - Backward (Compute derivative)
    loss.backward()
    # Step 3 - Update Params
    optim.step()

class LinearModule(minitorch.Module):
    def __init__(self, in_size):
        super().__init__()
        self.weights = []
        self.bias = []
        # Need add parameter
        for i in range(in_size):
            self.weights.append(self.add_parameter(f"weight_{i}", 0.0))

chalk.set_svg_height(300)
model1 = Linear(1, 1, -1.0)
model2 = Linear(0.5, 1.5, -1.0)
compare(model1, model2)

split_graph(s1_hard, s2_hard, show_origin=True)

model = Linear(1, 1, -0.7)
draw_with_hard_points(model)

yellow = Linear(-1, 0, 0.25)
ycolor = Color("#fde699")
draw_with_hard_points(yellow, ycolor, Color("white"))

graph(
    minitorch.operators.relu,
    [yellow.forward(*pt) for pt in s2_hard],
    [yellow.forward(*pt) for pt in s1_hard],
    3,
    0.25,
    c=ycolor,
)

green = Linear(1, 0, -0.8)
gcolor = Color("#d1e9c3")
draw_with_hard_points(green, gcolor, Color("white"))

graph(
    minitorch.operators.relu,
    [green.forward(*pt) for pt in s2_hard],
    [green.forward(*pt) for pt in s1_hard],
    3,
    0.25,
    c=gcolor,
)

draw_nn_graph(green, yellow)

@dataclass
class MLP:
    lin1: Linear
    lin2: Linear
    final: Linear

    def forward(self, x1, x2):
        x1_1 = minitorch.operators.relu(self.lin1.forward(x1, x2))
        x2_1 = minitorch.operators.relu(self.lin2.forward(x1, x2))
        return self.final.forward(x1_1, x2_1)


mlp = MLP(green, yellow, Linear(3, 3, -0.3))
draw_with_hard_points(mlp)

class LinearModule(Module):
    def __init__(self):
        super().__init__()
        self.w_1 = Parameter(Scalar(0.0))
        self.w_2 = Parameter(Scalar(0.0))
        self.b = Parameter(Scalar(0.0))

    def forward(self, inputs):
        return inputs[0] * self.w_1.value + inputs[1] * self.w_2.value + self.b.value

class Network(minitorch.Module):
    def __init__(self):
        super().__init__()
        self.unit1 = LinearModule()
        self.unit2 = LinearModule()
        self.classify = LinearModule()

    def forward(self, x):
        h1 = self.unit1.forward(x).relu()
        h2 = self.unit2.forward(x).relu()
        return self.classify.forward((h1, h2))

model = Network()
parameters = dict(model.named_parameters())
parameters

{'unit1.w_1': Scalar(0.0),
 'unit1.w_2': Scalar(0.0),
 'unit1.b': Scalar(0.0),
 'unit2.w_1': Scalar(0.0),
 'unit2.w_2': Scalar(0.0),
 'unit2.b': Scalar(0.0),
 'classify.w_1': Scalar(0.0),
 'classify.w_2': Scalar(0.0),
 'classify.b': Scalar(0.0)}

model = Network()
x1, x2 = Scalar(0.5), Scalar(0.5)
# Step 1
out = model.forward((0.5, 0.5))
loss = out.relu()
# Step 2
SVG(make_graph(loss, lr=True))

parameters["unit1.w_1"].value.derivative

Module 2.0 - Neural Networks¶

Our Goal¶

Example: Function¶

Chain Rule: Simple Case¶

Chain Rule: Two Arguments¶

Chain Rule: Repeated Use¶

Chain Rule: Repeated Use¶

Algorithm: Outer Loop¶

Algorithm: Inner Loop¶

Example¶

Example¶

Example¶

Example¶

Example¶

Example¶

Example¶

Quiz¶

Outline¶

Model Training¶

Reminder: MiniML¶

Model 1¶

Point Loss¶

Class Goal¶

Parameter Fitting¶

Update Procedure¶

Module for Linear¶

Training Loop¶

More Features: Linear Model¶

More Features: Linear (Code)¶

Neural Networks¶

Linear Model Example¶

Harder Datasets¶

Harder Datasets¶

Neural Networks¶

Intuition: Neural Networks¶

Notation: Multiple Parameters¶

Intuition: Split 1¶

Reshape: ReLU¶

Math View¶

Intuition: Split 2¶

Math View¶

Reshape: ReLU¶

Reshape: ReLU¶

Final Layer¶

Math View¶

Math View (Alt)¶

Code View¶

Code View¶

Training¶

Derivatives¶

Derivatives¶

Playground¶

QA¶