{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "source": [ "from graphviz import Digraph\n", "\n", "def trace(root):\n", " #Builds a set of all nodes and edges in a graph\n", " nodes, edges = set(), set()\n", " def build(v):\n", " if v not in nodes:\n", " nodes.add(v)\n", " for child in v._prev:\n", " edges.add((child, v))\n", " build(child)\n", " build(root)\n", " return nodes, edges\n", "\n", "def draw_dot(root):\n", " dot = Digraph(format='svg', graph_attr={'rankdir': 'LR'}) #LR == Left to Right\n", "\n", " nodes, edges = trace(root)\n", " for n in nodes:\n", " uid = str(id(n))\n", " #For any value in the graph, create a rectangular ('record') node for it\n", " dot.node(name = uid, label = \"{ %s | data %.4f | grad %.4f }\" % ( n.label, n.data, n.grad), shape='record')\n", " if n._op:\n", " #If this value is a result of some operation, then create an op node for it\n", " dot.node(name = uid + n._op, label=n._op)\n", " #and connect this node to it\n", " dot.edge(uid + n._op, uid)\n", "\n", " for n1, n2 in edges:\n", " #Connect n1 to the node of n2\n", " dot.edge(str(id(n1)), str(id(n2)) + n2._op)\n", "\n", " return dot" ], "metadata": { "id": "T0rN8d146jvF" }, "execution_count": 1, "outputs": [] }, { "cell_type": "code", "source": [ "import math" ], "metadata": { "id": "JlYxBvFK0AjA" }, "execution_count": 2, "outputs": [] }, { "cell_type": "code", "source": [ "class Value:\n", "\n", " def __init__(self, data, _children=(), _op='', label=''):\n", " self.data = data\n", " self.grad = 0.0\n", " self._prev = set(_children)\n", " self._op = _op\n", " self.label = label\n", "\n", "\n", " def __repr__(self): # This basically allows us to print nicer looking expressions for the final output\n", " return f\"Value(data={self.data})\"\n", "\n", " def __add__(self, other):\n", " out = Value(self.data + other.data, (self, other), '+')\n", "\n", " return out\n", "\n", " def __mul__(self, other):\n", " out = Value(self.data * other.data, (self, other), '*')\n", "\n", " return out\n", "\n", " def tanh(self):\n", " x = self.data\n", " t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)\n", " out = Value(t, (self, ), 'tanh')\n", "\n", " return out" ], "metadata": { "id": "iMW-UQKm1Con" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "#What the graph looks like right now, the grad values are set to 0\n", "draw_dot(o)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 322 }, "id": "j8zlrUnLz8F4", "outputId": "9ea436d3-3701-4bb8-9fad-7dd9e14cbbe9" }, "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "image/svg+xml": "\n\n\n\n\n\n%3\n\n\n\n137841228343824\n\nx1*w1 + x2*w2\n\ndata -6.0000\n\ngrad 0.0000\n\n\n\n137841228338448+\n\n+\n\n\n\n137841228343824->137841228338448+\n\n\n\n\n\n137841228343824+\n\n+\n\n\n\n137841228343824+->137841228343824\n\n\n\n\n\n137841228346992\n\nw1\n\ndata -3.0000\n\ngrad 0.0000\n\n\n\n137841228340464*\n\n*\n\n\n\n137841228346992->137841228340464*\n\n\n\n\n\n137841228344448\n\nx2*w2\n\ndata 0.0000\n\ngrad 0.0000\n\n\n\n137841228344448->137841228343824+\n\n\n\n\n\n137841228344448*\n\n*\n\n\n\n137841228344448*->137841228344448\n\n\n\n\n\n137841228336768\n\nx2\n\ndata 0.0000\n\ngrad 0.0000\n\n\n\n137841228336768->137841228344448*\n\n\n\n\n\n137841228335280\n\no\n\ndata 0.6044\n\ngrad 0.0000\n\n\n\n137841228335280tanh\n\ntanh\n\n\n\n137841228335280tanh->137841228335280\n\n\n\n\n\n137841228340464\n\nx1*w1\n\ndata -6.0000\n\ngrad 0.0000\n\n\n\n137841228340464->137841228343824+\n\n\n\n\n\n137841228340464*->137841228340464\n\n\n\n\n\n137841228338448\n\nn\n\ndata 0.7000\n\ngrad 0.0000\n\n\n\n137841228338448->137841228335280tanh\n\n\n\n\n\n137841228338448+->137841228338448\n\n\n\n\n\n137841228340560\n\nx1\n\ndata 2.0000\n\ngrad 0.0000\n\n\n\n137841228340560->137841228340464*\n\n\n\n\n\n137841228348240\n\nb\n\ndata 6.7000\n\ngrad 0.0000\n\n\n\n137841228348240->137841228338448+\n\n\n\n\n\n137841228344736\n\nw2\n\ndata 1.0000\n\ngrad 0.0000\n\n\n\n137841228344736->137841228344448*\n\n\n\n\n\n", "text/plain": [ "" ] }, "metadata": {}, "execution_count": 11 } ] }, { "cell_type": "markdown", "source": [ "----------" ], "metadata": { "id": "2uE_QgHkyovt" } }, { "cell_type": "markdown", "source": [ "We'll be adding a 'backward' function to our Value object and then implement its functionality in each of the operation function. \\\n", "\\\n", "We are basically converting everything we did manually to calculate the gradients in each operation to code :)" ], "metadata": { "id": "o2ob02X8ypoE" } }, { "cell_type": "code", "source": [ "class Value:\n", "\n", " def __init__(self, data, _children=(), _op='', label=''):\n", " self.data = data\n", " self.grad = 0.0\n", " self._backward = lambda: None #Its an empty function by default. This is what will do that gradient calculation at each of the operations.\n", " self._prev = set(_children)\n", " self._op = _op\n", " self.label = label\n", "\n", "\n", " def __repr__(self): # This basically allows us to print nicer looking expressions for the final output\n", " return f\"Value(data={self.data})\"\n", "\n", " def __add__(self, other):\n", " out = Value(self.data + other.data, (self, other), '+')\n", "\n", " def backward():\n", " self.grad = 1.0 * out.grad #Remember we are doing chain rule here, hence the product with out.grad\n", " other.grad = 1.0 * out.grad\n", "\n", " out._backward = backward\n", " return out\n", "\n", " def __mul__(self, other):\n", " out = Value(self.data * other.data, (self, other), '*')\n", "\n", " def backward():\n", " self.grad = other.data * out.grad #Remember we are doing chain rule here, hence the product with out.grad\n", " other.grad = self.data * out.grad\n", "\n", " out._backward = backward\n", " return out\n", "\n", " def tanh(self):\n", " x = self.data\n", " t = (math.exp(2*x) - 1)/(math.exp(2*x) + 1)\n", " out = Value(t, (self, ), 'tanh')\n", "\n", " def backward():\n", " self.grad = 1 - (t**2) * out.grad #Remember we are doing chain rule here, hence the product with out.grad\n", "\n", " out._backward = backward\n", " return out" ], "metadata": { "id": "4XPxg_t3wl35" }, "execution_count": 29, "outputs": [] }, { "cell_type": "code", "source": [ "#Inputs x1, x2 of the neuron\n", "x1 = Value(2.0, label='x1')\n", "x2 = Value(0.0, label='x2')\n", "\n", "#Weights w1, w2 of the neuron - The synaptic values\n", "w1 = Value(-3.0, label='w1')\n", "w2 = Value(1.0, label='w2')\n", "\n", "#The bias of the neuron\n", "b = Value(6.8813735870195432, label='b')\n", "\n", "x1w1 = x1*w1; x1w1.label = 'x1*w1'\n", "x2w2 = x2*w2; x2w2.label = 'x2*w2'\n", "\n", "#The summation\n", "x1w1x2w2 = x1w1 + x2w2; x1w1x2w2.label = 'x1*w1 + x2*w2'\n", "\n", "#n is basically the cell body, but without the activation function\n", "n = x1w1x2w2 + b; n.label = 'n'\n", "\n", "#Now we pass n to the activation function\n", "o = n.tanh(); o.label = 'o'" ], "metadata": { "id": "S3HaLbW_zvne" }, "execution_count": 32, "outputs": [] }, { "cell_type": "markdown", "source": [ "Now, we call the '_backward' function that we have made one by one in order (backwards through the equation/graph) \\\n", "\\\n", "But before we could do that, we have to first set the value of o.grad to 1.0 \\\n", "As notice in the Value object code that, we have initialised it to 0 \\\n", "\\\n", "Therefore, we'll start by adding o.grad to 1.0 and then we'll call the '_backward' function rest of them one by one" ], "metadata": { "id": "F5uk5VVf1S8W" } }, { "cell_type": "code", "source": [ "o.grad = 1.0" ], "metadata": { "id": "ldcILQ3y1RoO" }, "execution_count": 33, "outputs": [] }, { "cell_type": "code", "source": [ "draw_dot(o)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 322 }, "id": "2nAmCY0E15Lx", "outputId": "275d6787-e01f-413a-d6a9-a187ac16ece2" }, "execution_count": 34, "outputs": [ { "output_type": "execute_result", "data": { "image/svg+xml": "\n\n\n\n\n\n%3\n\n\n\n132454147340848\n\no\n\ndata 0.7071\n\ngrad 1.0000\n\n\n\n132454147340848tanh\n\ntanh\n\n\n\n132454147340848tanh->132454147340848\n\n\n\n\n\n132454147341904\n\nx1\n\ndata 2.0000\n\ngrad 0.0000\n\n\n\n132454147332880*\n\n*\n\n\n\n132454147341904->132454147332880*\n\n\n\n\n\n132454147335280\n\nn\n\ndata 0.8814\n\ngrad 0.0000\n\n\n\n132454147335280->132454147340848tanh\n\n\n\n\n\n132454147335280+\n\n+\n\n\n\n132454147335280+->132454147335280\n\n\n\n\n\n132454147340944\n\nx2*w2\n\ndata 0.0000\n\ngrad 0.0000\n\n\n\n132454147331008+\n\n+\n\n\n\n132454147340944->132454147331008+\n\n\n\n\n\n132454147340944*\n\n*\n\n\n\n132454147340944*->132454147340944\n\n\n\n\n\n132454147328224\n\nw2\n\ndata 1.0000\n\ngrad 0.0000\n\n\n\n132454147328224->132454147340944*\n\n\n\n\n\n132454147332832\n\nb\n\ndata 6.8814\n\ngrad 0.0000\n\n\n\n132454147332832->132454147335280+\n\n\n\n\n\n132454147332880\n\nx1*w1\n\ndata -6.0000\n\ngrad 0.0000\n\n\n\n132454147332880->132454147331008+\n\n\n\n\n\n132454147332880*->132454147332880\n\n\n\n\n\n132454147331392\n\nx2\n\ndata 0.0000\n\ngrad 0.0000\n\n\n\n132454147331392->132454147340944*\n\n\n\n\n\n132454147331008\n\nx1*w1 + x2*w2\n\ndata -6.0000\n\ngrad 0.0000\n\n\n\n132454147331008->132454147335280+\n\n\n\n\n\n132454147331008+->132454147331008\n\n\n\n\n\n132454147335616\n\nw1\n\ndata -3.0000\n\ngrad 0.0000\n\n\n\n132454147335616->132454147332880*\n\n\n\n\n\n", "text/plain": [ "" ] }, "metadata": {}, "execution_count": 34 } ] }, { "cell_type": "markdown", "source": [ "Now. we go with o" ], "metadata": { "id": "yAFb_M9Z191o" } }, { "cell_type": "code", "source": [ "o._backward()" ], "metadata": { "id": "Q0RbjS7_17iq" }, "execution_count": 35, "outputs": [] }, { "cell_type": "code", "source": [ "draw_dot(o)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 322 }, "id": "o3mJmjwE2AlD", "outputId": "5173f640-b103-41a6-bac1-504a602e9b60" }, "execution_count": 36, "outputs": [ { "output_type": "execute_result", "data": { "image/svg+xml": "\n\n\n\n\n\n%3\n\n\n\n132454147340848\n\no\n\ndata 0.7071\n\ngrad 1.0000\n\n\n\n132454147340848tanh\n\ntanh\n\n\n\n132454147340848tanh->132454147340848\n\n\n\n\n\n132454147341904\n\nx1\n\ndata 2.0000\n\ngrad 0.0000\n\n\n\n132454147332880*\n\n*\n\n\n\n132454147341904->132454147332880*\n\n\n\n\n\n132454147335280\n\nn\n\ndata 0.8814\n\ngrad 0.5000\n\n\n\n132454147335280->132454147340848tanh\n\n\n\n\n\n132454147335280+\n\n+\n\n\n\n132454147335280+->132454147335280\n\n\n\n\n\n132454147340944\n\nx2*w2\n\ndata 0.0000\n\ngrad 0.0000\n\n\n\n132454147331008+\n\n+\n\n\n\n132454147340944->132454147331008+\n\n\n\n\n\n132454147340944*\n\n*\n\n\n\n132454147340944*->132454147340944\n\n\n\n\n\n132454147328224\n\nw2\n\ndata 1.0000\n\ngrad 0.0000\n\n\n\n132454147328224->132454147340944*\n\n\n\n\n\n132454147332832\n\nb\n\ndata 6.8814\n\ngrad 0.0000\n\n\n\n132454147332832->132454147335280+\n\n\n\n\n\n132454147332880\n\nx1*w1\n\ndata -6.0000\n\ngrad 0.0000\n\n\n\n132454147332880->132454147331008+\n\n\n\n\n\n132454147332880*->132454147332880\n\n\n\n\n\n132454147331392\n\nx2\n\ndata 0.0000\n\ngrad 0.0000\n\n\n\n132454147331392->132454147340944*\n\n\n\n\n\n132454147331008\n\nx1*w1 + x2*w2\n\ndata -6.0000\n\ngrad 0.0000\n\n\n\n132454147331008->132454147335280+\n\n\n\n\n\n132454147331008+->132454147331008\n\n\n\n\n\n132454147335616\n\nw1\n\ndata -3.0000\n\ngrad 0.0000\n\n\n\n132454147335616->132454147332880*\n\n\n\n\n\n", "text/plain": [ "" ] }, "metadata": {}, "execution_count": 36 } ] }, { "cell_type": "markdown", "source": [ "Now, from n" ], "metadata": { "id": "EgCQYtk_2nF0" } }, { "cell_type": "code", "source": [ "n._backward()" ], "metadata": { "id": "iPvOJyA-2C11" }, "execution_count": 37, "outputs": [] }, { "cell_type": "code", "source": [ "draw_dot(o)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 322 }, "id": "a56spKYG2skK", "outputId": "03fee540-36b3-498b-cef7-98422aedbb74" }, "execution_count": 38, "outputs": [ { "output_type": "execute_result", "data": { "image/svg+xml": "\n\n\n\n\n\n%3\n\n\n\n132454147340848\n\no\n\ndata 0.7071\n\ngrad 1.0000\n\n\n\n132454147340848tanh\n\ntanh\n\n\n\n132454147340848tanh->132454147340848\n\n\n\n\n\n132454147341904\n\nx1\n\ndata 2.0000\n\ngrad 0.0000\n\n\n\n132454147332880*\n\n*\n\n\n\n132454147341904->132454147332880*\n\n\n\n\n\n132454147335280\n\nn\n\ndata 0.8814\n\ngrad 0.5000\n\n\n\n132454147335280->132454147340848tanh\n\n\n\n\n\n132454147335280+\n\n+\n\n\n\n132454147335280+->132454147335280\n\n\n\n\n\n132454147340944\n\nx2*w2\n\ndata 0.0000\n\ngrad 0.0000\n\n\n\n132454147331008+\n\n+\n\n\n\n132454147340944->132454147331008+\n\n\n\n\n\n132454147340944*\n\n*\n\n\n\n132454147340944*->132454147340944\n\n\n\n\n\n132454147328224\n\nw2\n\ndata 1.0000\n\ngrad 0.0000\n\n\n\n132454147328224->132454147340944*\n\n\n\n\n\n132454147332832\n\nb\n\ndata 6.8814\n\ngrad 0.5000\n\n\n\n132454147332832->132454147335280+\n\n\n\n\n\n132454147332880\n\nx1*w1\n\ndata -6.0000\n\ngrad 0.0000\n\n\n\n132454147332880->132454147331008+\n\n\n\n\n\n132454147332880*->132454147332880\n\n\n\n\n\n132454147331392\n\nx2\n\ndata 0.0000\n\ngrad 0.0000\n\n\n\n132454147331392->132454147340944*\n\n\n\n\n\n132454147331008\n\nx1*w1 + x2*w2\n\ndata -6.0000\n\ngrad 0.5000\n\n\n\n132454147331008->132454147335280+\n\n\n\n\n\n132454147331008+->132454147331008\n\n\n\n\n\n132454147335616\n\nw1\n\ndata -3.0000\n\ngrad 0.0000\n\n\n\n132454147335616->132454147332880*\n\n\n\n\n\n", "text/plain": [ "" ] }, "metadata": {}, "execution_count": 38 } ] }, { "cell_type": "markdown", "source": [ "Now, b is a leaf node so we just leave it there (Another reason why _backward was initiallised to None function, it's because it won't be called for leaf nodes. Therefore we set it to None for them) \\\n", "\\\n", "We'll continue with x1w1x1w2" ], "metadata": { "id": "ZoUTucmy2yDk" } }, { "cell_type": "code", "source": [ "x1w1x2w2._backward()" ], "metadata": { "id": "8lo99I7A2vPS" }, "execution_count": 39, "outputs": [] }, { "cell_type": "code", "source": [ "draw_dot(o)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 322 }, "id": "D_PYbqNS3HHp", "outputId": "83e5ad18-2cd7-4a49-d157-70bea42bbac1" }, "execution_count": 40, "outputs": [ { "output_type": "execute_result", "data": { "image/svg+xml": "\n\n\n\n\n\n%3\n\n\n\n132454147340848\n\no\n\ndata 0.7071\n\ngrad 1.0000\n\n\n\n132454147340848tanh\n\ntanh\n\n\n\n132454147340848tanh->132454147340848\n\n\n\n\n\n132454147341904\n\nx1\n\ndata 2.0000\n\ngrad 0.0000\n\n\n\n132454147332880*\n\n*\n\n\n\n132454147341904->132454147332880*\n\n\n\n\n\n132454147335280\n\nn\n\ndata 0.8814\n\ngrad 0.5000\n\n\n\n132454147335280->132454147340848tanh\n\n\n\n\n\n132454147335280+\n\n+\n\n\n\n132454147335280+->132454147335280\n\n\n\n\n\n132454147340944\n\nx2*w2\n\ndata 0.0000\n\ngrad 0.5000\n\n\n\n132454147331008+\n\n+\n\n\n\n132454147340944->132454147331008+\n\n\n\n\n\n132454147340944*\n\n*\n\n\n\n132454147340944*->132454147340944\n\n\n\n\n\n132454147328224\n\nw2\n\ndata 1.0000\n\ngrad 0.0000\n\n\n\n132454147328224->132454147340944*\n\n\n\n\n\n132454147332832\n\nb\n\ndata 6.8814\n\ngrad 0.5000\n\n\n\n132454147332832->132454147335280+\n\n\n\n\n\n132454147332880\n\nx1*w1\n\ndata -6.0000\n\ngrad 0.5000\n\n\n\n132454147332880->132454147331008+\n\n\n\n\n\n132454147332880*->132454147332880\n\n\n\n\n\n132454147331392\n\nx2\n\ndata 0.0000\n\ngrad 0.0000\n\n\n\n132454147331392->132454147340944*\n\n\n\n\n\n132454147331008\n\nx1*w1 + x2*w2\n\ndata -6.0000\n\ngrad 0.5000\n\n\n\n132454147331008->132454147335280+\n\n\n\n\n\n132454147331008+->132454147331008\n\n\n\n\n\n132454147335616\n\nw1\n\ndata -3.0000\n\ngrad 0.0000\n\n\n\n132454147335616->132454147332880*\n\n\n\n\n\n", "text/plain": [ "" ] }, "metadata": {}, "execution_count": 40 } ] }, { "cell_type": "markdown", "source": [ "Now finally, to finding the gradient of the intial values" ], "metadata": { "id": "W3fQZPPZ3Mg_" } }, { "cell_type": "code", "source": [ "x1w1._backward()\n", "x2w2._backward()" ], "metadata": { "id": "n0OkYLRn3KUS" }, "execution_count": 41, "outputs": [] }, { "cell_type": "code", "source": [ "draw_dot(o)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 322 }, "id": "uLfNK3633c66", "outputId": "660c35e3-a91b-4bc9-9596-a17cae4998fb" }, "execution_count": 42, "outputs": [ { "output_type": "execute_result", "data": { "image/svg+xml": "\n\n\n\n\n\n%3\n\n\n\n132454147340848\n\no\n\ndata 0.7071\n\ngrad 1.0000\n\n\n\n132454147340848tanh\n\ntanh\n\n\n\n132454147340848tanh->132454147340848\n\n\n\n\n\n132454147341904\n\nx1\n\ndata 2.0000\n\ngrad -1.5000\n\n\n\n132454147332880*\n\n*\n\n\n\n132454147341904->132454147332880*\n\n\n\n\n\n132454147335280\n\nn\n\ndata 0.8814\n\ngrad 0.5000\n\n\n\n132454147335280->132454147340848tanh\n\n\n\n\n\n132454147335280+\n\n+\n\n\n\n132454147335280+->132454147335280\n\n\n\n\n\n132454147340944\n\nx2*w2\n\ndata 0.0000\n\ngrad 0.5000\n\n\n\n132454147331008+\n\n+\n\n\n\n132454147340944->132454147331008+\n\n\n\n\n\n132454147340944*\n\n*\n\n\n\n132454147340944*->132454147340944\n\n\n\n\n\n132454147328224\n\nw2\n\ndata 1.0000\n\ngrad 0.0000\n\n\n\n132454147328224->132454147340944*\n\n\n\n\n\n132454147332832\n\nb\n\ndata 6.8814\n\ngrad 0.5000\n\n\n\n132454147332832->132454147335280+\n\n\n\n\n\n132454147332880\n\nx1*w1\n\ndata -6.0000\n\ngrad 0.5000\n\n\n\n132454147332880->132454147331008+\n\n\n\n\n\n132454147332880*->132454147332880\n\n\n\n\n\n132454147331392\n\nx2\n\ndata 0.0000\n\ngrad 0.5000\n\n\n\n132454147331392->132454147340944*\n\n\n\n\n\n132454147331008\n\nx1*w1 + x2*w2\n\ndata -6.0000\n\ngrad 0.5000\n\n\n\n132454147331008->132454147335280+\n\n\n\n\n\n132454147331008+->132454147331008\n\n\n\n\n\n132454147335616\n\nw1\n\ndata -3.0000\n\ngrad 1.0000\n\n\n\n132454147335616->132454147332880*\n\n\n\n\n\n", "text/plain": [ "" ] }, "metadata": {}, "execution_count": 42 } ] }, { "cell_type": "markdown", "source": [ "----------------" ], "metadata": { "id": "w1B0CDnz4O_X" } }, { "cell_type": "markdown", "source": [ "### **Final output^**" ], "metadata": { "id": "p43Gt7B54dDp" } }, { "cell_type": "markdown", "source": [ "Hence, we have not only verified the manual backpropagation calculation that we did, but also created funtions directly for each of them!" ], "metadata": { "id": "n63PmnLT4QCs" } } ] }