Neural nets are mathematical expressions that take input as the data, the weights and parameters. They contain a mathematical expression for the forward pass and a loss function that measures the accuracy of the predictions. We try to minimize the loss by using backpropagation. Backpropagation calculates the gradients and maniputales them to minimize the loss. We do this iteratively by using gradient descent.
from micrograd.engine import Value
import random
from graphviz import Digraph
import math
import matplotlib.pyplot as plt
class Value:
""" stores a single scalar value and its gradient """
def __init__(self, data, _children=(), _op=''):
self.data = data
self.grad = 0
# internal variables used for autograd graph construction
self._backward = lambda: None
self._prev = set(_children)
self._op = _op # the op that produced this node, for graphviz / debugging / etc
def __add__(self, other):
other = other if isinstance(other, Value) else Value(other)
out = Value(self.data + other.data, (self, other), '+')
def _backward():
self.grad += out.grad
other.grad += out.grad
out._backward = _backward
return out
def __mul__(self, other):
other = other if isinstance(other, Value) else Value(other)
out = Value(self.data * other.data, (self, other), '*')
def _backward():
self.grad += other.data * out.grad
other.grad += self.data * out.grad
out._backward = _backward
return out
def __pow__(self, other):
assert isinstance(other, (int, float)), "only supporting int/float powers for now"
out = Value(self.data**other, (self,), f'**{other}')
def _backward():
self.grad += (other * self.data**(other-1)) * out.grad
out._backward = _backward
return out
def relu(self):
out = Value(0 if self.data < 0 else self.data, (self,), 'ReLU')
def _backward():
self.grad += (out.data > 0) * out.grad
out._backward = _backward
return out
def tanh(self):
x = self.data
t = (math.exp(x) - math.exp(-x)) / (math.exp(x) + math.exp(-x))
out = Value(t, (self, ), 'tanh')
def _backward():
self.grad += (1 - t**2) * out.grad
out._backward = _backward
return out
def backward(self):
# topological order all of the children in the graph
topo = []
visited = set()
def build_topo(v):
if v not in visited:
visited.add(v)
for child in v._prev:
build_topo(child)
topo.append(v)
build_topo(self)
# go one variable at a time and apply the chain rule to get its gradient
self.grad = 1
for v in reversed(topo):
v._backward()
def __neg__(self): # -self
return self * -1
def __radd__(self, other): # other + self
return self + other
def __sub__(self, other): # self - other
return self + (-other)
def __rsub__(self, other): # other - self
return other + (-self)
def __rmul__(self, other): # other * self
return self * other
def __truediv__(self, other): # self / other
return self * other**-1
def __rtruediv__(self, other): # other / self
return other * self**-1
def __repr__(self):
return f"Value(data={self.data}, grad={self.grad})"
class Neuron(): # replicating a neuron
def __init__(self, nin):
self.w = [Value(random.uniform(-1,1)) for _ in range(nin)]
self.b = Value(random.uniform(-1,1))
def __call__(self, x):
act = sum((wi*xi for wi,xi in zip(self.w, x)), self.b)
return act.tanh()
def parameters(self): # here, we are collecting all of the parameters of the neuron, weights and biases
return self.w + [self.b]
class Layer(): # replicating a layer that contains one or more neurons: dim, out (n neurons out)
def __init__(self, nin, nout):
self.neurons = [Neuron(nin) for _ in range(nout)] # we take a list of neurons and we output a certain amount of neurons (?)
def __call__(self, x):
outs = [n(x) for n in self.neurons]
return outs[0] if len(outs) == 1 else outs
def parameters(self): # collecting all of the parameters of the neurons within the layer
# return [p for neuron in self.neurons for p in neuron.parameters()] # list comprehension version (shorter)
params = []
for neuron in self.neurons:
ps = neuron.parameters()
params.extend(ps)
return params
class MLP():
def __init__(self, nin, nouts): # instead of taking one nout like in Layer, now we take a list of nouts. It defines the sizes of all the layers we want in a MLP
sz = [nin] + nouts
self.layers = [Layer(sz[i], sz[i+1]) for i in range(len(nouts))] # we iterate over the pairs and create Layer objects for them
def __call__(self, x): #Â in the call funciton, we call the Layer objects sequentially
for layer in self.layers:
x = layer(x)
return x
def parameters(self):
#Â return [p for layer in self.layers for p in layer.parameters()] # list comprehension version (shorter)
params = []
for layer in self.layers:
ps = layer.parameters()
params.extend(ps)
return params
def __repr__(self):
return f"MLP of [{', '.join(str(layer) for layer in self.layers)}]"
n = MLP(3, [3, 3, 1]) # n inputs, [dim layer 1, dim layer 2, dim layer 3 (output)]
input = [
[3.0, 4.7, -3.2],
[2.1, -7.0, 3.5],
[4.4, -0.1, -0.2]
]
ys = [1.0, 1.0, 1.0] # desired targets
len(n.parameters()) # total number of parameters of the MLP
ypred = [n(x) for x in input]
loss = sum((preds - ground_truth)**2 for ground_truth, preds in zip(ys, ypred)) # we build a loss function: MSE
print('Initial loss before any backpropagation: ', loss)
[(preds - ground_truth)**2 for ground_truth, preds in zip(ys, ypred)] #Â individual losses, before summation
print('Gradient of a neuron before backpropagation: ', n.layers[1].neurons[1].w[0].grad) # gradient
print('Value of a neuron before backpropagation: ', n.layers[1].neurons[1].w[0].data) # value of the neuron
loss.backward() # doing a backward pass, or backpropagating, to initialize the gradients
'''
This lines of code check the gradient of a specific weight and value of a specific neuron, meaning how it influences the loss (negative of positive).
If it is negative, increasing this weight will make the loss go down. If it is positive, decreasing this weight will make the loss go down.
'''
print('Gradient of a neuron after backpropagation: ', n.layers[1].neurons[1].w[0].grad) # gradient, which should have been initialized
print('Value of a neuron after backpropagation: ', n.layers[1].neurons[1].w[0].data) # value of the neuron, which should remain the same as before
''' Gradient descent updates the values of the neurons, whereas the backward pass (backpropagation) updates the values of the gradients.
'''
for p in n.parameters(): #Â manual gradient descent. We modify p.data by a small step size in the direction of the gradient
p.data += -0.1 * p.grad #Â the step is negative because we want to minimize the loss
print('New gradient of the neuron: ', n.layers[1].neurons[1].w[0].grad) # gradient, which should remain the same as it is not updated during gradient descent
print('New value of the neuron: ', n.layers[1].neurons[1].w[0].data) #Â new value of the neuron, which has decreased slightly, as we want the loss to decrease, and we want this particular neuron to decrease to make the loss go down
ypred = [n(x) for x in input]
loss = loss = sum((preds - ground_truth)**2 for ground_truth, preds in zip(ys, ypred)) # we re-evaluate the loss after perfoming gradient descent. It should be lower than before
print('Loss after backproagation: ', loss)
ypred
n.parameters() # set of weights and biases that can predict our outputs
n = MLP(5, [10, 10, 1]) # nin, [dim layer 1, dim layer 2, dim layer 3 (output)]
inputs = [
[3.0, 4.7, -3.2],
[2.1, -7.0, 3.5],
[4.4, -0.1, -0.2],
[1.0, 1.0, -1.0]
]
ys = [1.0, -1.0, -1.0, 1.0] # desired targets (just random) before loss
len(n.parameters())
loss_values = []
grads = []
params = []
for k in range(100):
# 1. forward pass
ypred = [n(x) for x in inputs]
loss = sum((preds - ground_truth)**2 for ground_truth, preds in zip(ys, ypred))
# 2. backward pass
grads_values = []
params_values = []
for p in n.parameters():
grads_values.append(p.grad) # grads
params_values.append(p.data) # params
p.grad = 0.0 # flush out gradients before each iteration
grads.append(grads_values)
params.append(params_values)
loss.backward() #Â backward
# 3. update step = gradient descent
for p in n.parameters():
p.data += -0.001 * p.grad
loss_values.append(loss.data)
print(k, loss.data)
plt.plot(loss_values);
plt.plot(grads);
plt.plot(params);