import sys
import minitorch
sys.path.append("project/interface/")
sys.path.append("../project/interface/")
from plots import animate, plot
from drawing import split_graph, draw_below, aqua, black, draw_graph
from chalk import *
from drawing import x_mark, circle_mark
from colour import Color
import random
random.seed(10)

s = minitorch.datasets["Simple"](10)
spl = minitorch.datasets["Split"](10)

def linear(w, b):
    def model(x1, x2):
        return w[0] * x1 + w[1] * x2  + b
    return model
base_model = linear((1, 1), -1.1)

s1 = [s.X[i] for i in range(len(s.y)) if base_model(*s.X[i]) < 0]
s2 = [s.X[i] for i in range(len(s.y)) if base_model(*s.X[i]) > 0]
def draw_with_easy_points(model):
    return draw_graph(model) + split_graph(s1, s2, show_origin=False)
set_svg_draw_height(300)


draw_with_easy_points(base_model)


split_graph([s1[0]], []) | hstrut(0.3) | split_graph([s1[1]], [])


split_graph([], [s2[0]]) | hstrut(0.3) | split_graph([], [s2[1]])


split_graph(s1, s2)


d = spl
s1_hard = [d.X[i] for i in range(len(d.y)) if d.y[i] == 0]
s2_hard = [d.X[i] for i in range(len(d.y)) if d.y[i] == 1]
split_graph(s1_hard, s2_hard)


def draw_with_hard_points(model, c1=None, c2=None):
    if c1 is None:
        d = draw_graph(model)
    else:
        d = draw_graph(model,c1=c1, c2=c2)
    return d + split_graph(s1_hard, s2_hard, show_origin=False)
#-

bad_model = linear((1, 1), -1.5)
draw_with_easy_points(bad_model)

# For most of the data points, the model puts them in class X.
# Only for a little area on the top right would it decide
# to put those points in class O.

# We can overlay the simple dataset described ealier over this model.
# This tells us roughly how well the model fits this dataset.

draw_with_easy_points(bad_model)

# Models can take many different forms, Here is another model, which we will
# discuss more below,
# that splits the data points up based on three regions (Model B).

def split_model(x1, x2):
    return linear((1, 1), -1.5)(x1, x2) * linear((1, 1), -0.5)(x1, x2)
draw_with_easy_points(split_model)

# Models may also have strange shapes and even disconnected regions. Any
# blue/red split will do, for instance (Model C):

def part_model(x1, x2):
    return 1 if (0.0 <= x1 < 0.5 and 0.0 <= x2 < 0.6) else 0
draw_with_easy_points(part_model)

# A *model class* specifies the general shape of models that you want to
# explore. Given that we as programmers don't know what the dataset looks
# like, we try to give a class of functions for our system to explore. Machine
# learning is the process of finding the best model from that class.

# The first model class we consider is *linear models*. Linear models separate
# the data space with only a single straight line. For instance, Model A is
# a linear model, but an intuitively "better" model looks like this:

draw_with_easy_points(base_model)

# Note that Model B also uses lines, but it is not a linear model: it uses
# multiple lines to split up the space.

# Parameters
# -----------

# Once we have decided on our model class, we need a way to move between models
# in that
# class. Ideally, we would have internal knobs that alter the properties of
# the model.

def make_model(param):
   def model(x1, x2):
       if x1 < param:
           return 1
       else:
           return 0
   return model
draw_with_easy_points(make_model(0.4))

draw_with_easy_points(make_model(0.6))

# In the case of the linear models, there are two main knobs we might use,

# a. rotating the linear separator ("slope")


draw_graph(linear((1, 1), -1.0)).center_xy() | hstrut(0.5) | text("→", 0.5).fill_color(black) | hstrut(0.5) |  draw_graph(linear((0.5, 1.5), -1.0)).center_xy()


bias = draw_graph(linear((1, 1), -1.0)).center_xy() | hstrut(0.5) | text("→", 0.5).fill_color(black) | hstrut(0.5) | draw_graph(linear((1, 1), -1.5)).center_xy()
bias


def make_linear(w1, w2, b):
    def model(x1, x2):
        return 1 if (x1 * w1 + x2 * w2 + b > 0.0) else 0
    return model


biases = [-0.098 + (i / 100.0) for i in range(0, 25, 5)]

hcat([draw_with_easy_points(make_linear(0.1, -0.2, b)) for b in biases], sep=0.5)


draw_with_easy_points(linear((1, 1), -1.0))


draw_with_easy_points(linear((1, 1), -0.5))


def with_points(pts1, pts2, b):
    w1, w2 = 1, 1
    model = linear((w1, w2), b)
    line = make_path([(0, b), (1, b + 1)])
    dia = draw_graph(model) + split_graph(pts1, pts2, False)

    for pt in pts1 + pts2:
        pt2 = line.get_trace().trace_p(P2(pt[0], -pt[1]), V2(-1, 1))
        if pt2:
            dia += make_path([(pt[0], -pt[1]), pt2]).dashing([5, 5], 0)
    return dia
with_points(s1, s2, -0.5)


with_points([s1[0]], [], -1.5)


with_points([s1[0]], [], -1)


with_points([s1[0]], [], -0.5)


def graph(fn, xs, os, width=4, offset=0, c=Color("black")):
    path = []
    for a in range(100):
        a = width * ((a / 100) - 0.5) - offset
        path.append((a, fn(a)))
    dia = make_path([(-width/2, 0), (width/2, 0)]) + make_path(path).line_color(c)

    for pt in xs:
        dia += x_mark().scale(width / 2).translate(pt, fn(pt))
    for pt in os:
        dia += circle_mark().scale(width / 2).translate(pt, fn(pt))
    return dia.reflect_y()
#-

def loss(x):
    return -math.log(minitorch.operators.sigmoid(-x))
graph(loss, [], [])


graph(loss, [], [-2, 0, 2])


draw_with_easy_points(linear((1, 1), -0.5))


bias


linear((1, 1), -0.5)(0, 0)

-0.5


def full_loss(b):
    l = 0
    m = linear((1, 1), b)
    for pt in s1:
        l += -loss(-m(*pt))
    for pt in s2:
        l += -loss( m(*pt))
    return l

d = empty()
scores = []
path = []
i = 0
for j, b in enumerate(range(20)):
    b = -1.5 + b / 20
    pt = (b, full_loss(b))
    path.append(pt)
    if j % 5 == 0:
        d = d | hstrut(0.5) |  draw_with_easy_points(linear((1, 1), b)).named(("graph", i))
        p = circle(0.01).translate(pt[0], pt[1]).fill_color(black)
        p = p.named(("x", i))
        i += 1
        scores.append(p)
print(path)
d = (concat(scores) + make_path(path)).center_xy().scale(3)  / vstrut(0.5) /  d.scale(2).center_xy()
for i in range(i):
    d = d.connect(("graph", i), ("x", i), ArrowOpts(head_pad=0.1))
set_svg_height(500)
d

[(-1.5, -8.91709764826237), (-1.45, -8.816705574754979), (-1.4, -8.722212419566382), (-1.35, -8.633669248579224), (-1.3, -8.55112072729222), (-1.25, -8.474604921691927), (-1.2, -8.40415312616738), (-1.15, -8.339789720058922), (-1.1, -8.281532054193912), (-1.05, -8.229390368502255), (-1.0, -8.183367741527482), (-0.95, -8.143460072358145), (-0.9, -8.109656095204183), (-0.85, -8.08193742653808), (-0.8, -8.06027864441572), (-0.75, -8.044647399291918), (-0.7, -8.03500455535482), (-0.65, -8.031304361126814), (-0.6, -8.033494647821058), (-0.55, -8.041517053706167)]


set_svg_height(300)
draw_with_easy_points(linear((1, 1), -1.1))


draw_with_hard_points(linear((1, 1), -0.7))


yellow = linear((-1, 0), 0.25)
ycolor = Color("#fde699")
draw_with_hard_points(yellow, ycolor, Color("white"))


green = linear((1, 0), -0.8)
gcolor = Color("#d1e9c3")
draw_with_hard_points(green, gcolor, Color("white"))


yellow(1.0, 0.0)

-0.75


graph(minitorch.operators.relu,
      [yellow(*pt) for pt in s2_hard],
      [yellow(*pt) for pt in s1_hard], 1, 0.25, c=ycolor)


graph(minitorch.operators.relu,
      [green(*pt) for pt in s2_hard],
      [green(*pt) for pt in s1_hard], 1.5, 0.25, c=gcolor)


sc = 3
s1_trans = [(minitorch.operators.relu(green(*p)) * sc, minitorch.operators.relu(yellow(*p)) * sc) for p in s1_hard]
s2_trans = [(minitorch.operators.relu(green(*p)) * sc , minitorch.operators.relu(yellow(*p)) * sc) for p in s2_hard]
final = linear((1 ,1), -0.3)
draw_graph(final) + make_path([(0, 0), (0, -1)]).line_color(ycolor).line_width(1.0) + make_path([(0, 0), (1, 0)]).line_color(gcolor).line_width(1.0) +  split_graph(s1_trans, s2_trans, show_origin=False)


def mlp(x, y):
    yel = minitorch.operators.relu(yellow(x, y))
    gre = minitorch.operators.relu(green(x, y))
    return final(sc* gre, sc*yel)
draw_with_hard_points(mlp)


mlp(0.5, 0.5)

-0.3

ML Primer¶

Dataset¶

Model¶

Loss¶

Fitting Parameters¶

Neural Networks¶