Easy to write as a layer
class Embedding(minitorch.Module):
def __init__(self, vocab_size, emb_size):
super().__init__()
self.weights = \
minitorch.Parameter(minitorch.rand((vocab_size, emb_size)))
self.vocab_size = vocab_size
def forward(input):
return (input @ self.weights.values)
Quiz
How do we handle locality in features?
Output Values
output[0] = weight[0] * input[0] + weight[1] * input[1] + weight[2] * input[2]
output[1] = weight[0] * input[1] + weight[1] * input[2] + weight[2] * input[3]
output[2] = weight[0] * input[2] + weight[1] * input[3] + weight[2] * input[4]
Unroll
def unroll(input, T, K):
out = [[input[i + k] if i + k < T else 0
for k in range(K)]
for i in range(T)]
return tensor(out)
Unroll
input = tensor([1, 2, 3, 4, 5, 6])
K = 3
T = input.shape[0]
unrolled_input = unroll(input, T, K)
print(unrolled_input)
[ [1.00 2.00 3.00] [2.00 3.00 4.00] [3.00 4.00 5.00] [4.00 5.00 6.00] [5.00 6.00 0.00] [6.00 0.00 0.00]]
Unroll + zip + reduce ::
weight = tensor([5, 2, 3])
output = (unrolled_input @ weight.view(K, 1)).view(T)
print(output)
[18.00 28.00 38.00 48.00 37.00 30.00]
Output Values
output[0] = weight[0] * input[0] + weight[1] * input[1] + weight[2] * input[2]
output[1] = weight[0] * input[1] + weight[1] * input[2] + weight[2] * input[3]
output[2] = weight[0] * input[2] + weight[1] * input[3] + weight[2] * input[4]
class Conv:
@staticmethod
def backward(ctx, d):
...
grad_input[2] = weight[0] * d[2] + weight[1] * d[1] + weight[2] * d[0]
...
Reverse the convolutional anchor
def unroll_chan(input, T, C, K):
out = [[input[i + k, c] if i + k < T else 0
for k in range(K)
for c in range(C)]
for i in range(T)]
return tensor(out)
in_channels = 2
input = rand(T, in_channels)
unrolled_input = unroll_chan(input, T, in_channels, K)
print(unrolled_input.shape) # Shape: T x (in_channels * K)
(6, 6)
out_channels = 3
weight = rand(in_channels * K, out_channels)
output = unrolled_input @ weight
print(output.shape)
(6, 3)
Sizes
# Input image - batch x in_channel x height x width
# Weight - out_channel x in_channel x kernel_height x kernel_width
# Output image - batch x out_channel x height x width
Same idea as 1D
Nothing different from 1D version