{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "code",
"source": [
"class Value:\n",
"\n",
" def __init__(self, data, _children=(), _op='', label=''):\n",
" self.data = data\n",
" self.grad = 0.0\n",
" self._prev = set(_children)\n",
" self._op = _op\n",
" self.label = label\n",
"\n",
"\n",
" def __repr__(self): # This basically allows us to print nicer looking expressions for the final output\n",
" return f\"Value(data={self.data})\"\n",
"\n",
" def __add__(self, other):\n",
" out = Value(self.data + other.data, (self, other), '+')\n",
" return out\n",
"\n",
" def __mul__(self, other):\n",
" out = Value(self.data * other.data, (self, other), '*')\n",
" return out"
],
"metadata": {
"id": "jtRAdDVT6jf2"
},
"execution_count": 20,
"outputs": []
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "AIP2sPDm6Los",
"outputId": "8e1d5665-fc27-4ddb-95ac-a9cf53f25d51"
},
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Value(data=-8.0)"
]
},
"metadata": {},
"execution_count": 21
}
],
"source": [
"a = Value(2.0, label='a')\n",
"b = Value(-3.0, label='b')\n",
"c = Value(10.0, label='c')\n",
"e = a*b; e.label='e'\n",
"d= e + c; d.label='d'\n",
"f = Value(-2.0, label='f')\n",
"L = d*f; L.label='L'\n",
"L"
]
},
{
"cell_type": "code",
"source": [
"from graphviz import Digraph\n",
"\n",
"def trace(root):\n",
" #Builds a set of all nodes and edges in a graph\n",
" nodes, edges = set(), set()\n",
" def build(v):\n",
" if v not in nodes:\n",
" nodes.add(v)\n",
" for child in v._prev:\n",
" edges.add((child, v))\n",
" build(child)\n",
" build(root)\n",
" return nodes, edges\n",
"\n",
"def draw_dot(root):\n",
" dot = Digraph(format='svg', graph_attr={'rankdir': 'LR'}) #LR == Left to Right\n",
"\n",
" nodes, edges = trace(root)\n",
" for n in nodes:\n",
" uid = str(id(n))\n",
" #For any value in the graph, create a rectangular ('record') node for it\n",
" dot.node(name = uid, label = \"{ %s | data %.4f | grad %.4f }\" % ( n.label, n.data, n.grad), shape='record')\n",
" if n._op:\n",
" #If this value is a result of some operation, then create an op node for it\n",
" dot.node(name = uid + n._op, label=n._op)\n",
" #and connect this node to it\n",
" dot.edge(uid + n._op, uid)\n",
"\n",
" for n1, n2 in edges:\n",
" #Connect n1 to the node of n2\n",
" dot.edge(str(id(n1)), str(id(n2)) + n2._op)\n",
"\n",
" return dot"
],
"metadata": {
"id": "T0rN8d146jvF"
},
"execution_count": 22,
"outputs": []
},
{
"cell_type": "code",
"source": [
"draw_dot(L)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 247
},
"id": "k7wjwrfo6nUl",
"outputId": "d78c4618-6574-49f9-8e80-f2faa8dad69a"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"image/svg+xml": "\n\n\n\n\n",
"text/plain": [
""
]
},
"metadata": {},
"execution_count": 7
}
]
},
{
"cell_type": "markdown",
"source": [
"----------------------"
],
"metadata": {
"id": "UO6I8Z-_CaNv"
}
},
{
"cell_type": "markdown",
"source": [
"### **Now, let's start to fill those grad values**"
],
"metadata": {
"id": "wB-SONL3CltR"
}
},
{
"cell_type": "markdown",
"source": [
"--------------"
],
"metadata": {
"id": "EhvqPDYqF50Z"
}
},
{
"cell_type": "markdown",
"source": [
"**Let's first find the derivative of L w.r.t L**"
],
"metadata": {
"id": "dF0QlSFJCbsI"
}
},
{
"cell_type": "code",
"source": [
"#This is just a staging function to show how the calculation of each of the derivative is taking place\n",
"def lol():\n",
"\n",
" h = 0.001\n",
"\n",
" #Here we are basically making them as local variables, to not affect the global variables on top\n",
" a = Value(2.0, label='a')\n",
" b = Value(-3.0, label='b')\n",
" c = Value(10.0, label='c')\n",
" e = a*b; e.label='e'\n",
" d= e + c; d.label='d'\n",
" f = Value(-2.0, label='f')\n",
" L = d*f; L.label='L'\n",
" L1 = L.data #L is basically a node, so we need its data\n",
"\n",
" a = Value(2.0, label='a')\n",
" b = Value(-3.0, label='b')\n",
" c = Value(10.0, label='c')\n",
" e = a*b; e.label='e'\n",
" d= e + c; d.label='d'\n",
" f = Value(-2.0, label='f')\n",
" L = d*f; L.label='L'\n",
" L2 = L.data + h\n",
"\n",
" print((L2-L1)/h)\n",
"\n",
"lol()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "AWQsdevqCUks",
"outputId": "e08b83fd-b101-4fdc-a554-41561c00a08b"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"1.000000000000334\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"This was theoritically obvious as well. The derivitive of L wrt L will be one.\n",
"\n",
" \n",
"\n",
"So, lets add that value manually. (Remember to run the global variables for this)"
],
"metadata": {
"id": "HbfBzVQ4EEHM"
}
},
{
"cell_type": "code",
"source": [
"L.grad = 1.0"
],
"metadata": {
"id": "3TCgz-n6DbzI"
},
"execution_count": 23,
"outputs": []
},
{
"cell_type": "code",
"source": [
"draw_dot(L)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 212
},
"id": "RS6YodRTEX43",
"outputId": "3b58ed12-b486-4452-b700-8048b1e03e3b"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"image/svg+xml": "\n\n\n\n\n",
"text/plain": [
""
]
},
"metadata": {},
"execution_count": 11
}
]
},
{
"cell_type": "markdown",
"source": [
"-----------"
],
"metadata": {
"id": "4x1UEdrOHalT"
}
},
{
"cell_type": "markdown",
"source": [
"**Now, we find the derivative of L wrt to f and d**"
],
"metadata": {
"id": "Hhj8DrcUF7fI"
}
},
{
"cell_type": "markdown",
"source": [
"So, mathematically:\n",
"\n",
"dL/dd = ?\n",
"\n",
"**L = d * f**\n",
"\n",
"Therefore, dL/dd = f\n",
"\n",
"If we do manual calculation to verify, \\\n",
"\n",
"=> f(x+h) - f(x) / h \\\n",
"\n",
"(Remember the f(x) is basically L here) \\\n",
"=> (d+h)*f - d*f / h \\\n",
"=> df + hf - df / h \\\n",
"=> hf/h \\\n",
"= f\n"
],
"metadata": {
"id": "SUi_wdLTGCsq"
}
},
{
"cell_type": "markdown",
"source": [
"So here if you see,\n",
"\n",
"The derivative of L wrt f is the value in d \\\n",
"& \\\n",
"The derivative of L wrt d is the value in f\n",
"\n",
"So, grad f is 4.0 \\\n",
"and grad d is -2.0\n",
"\n",
" \n",
"\n",
"Lets check this in code!"
],
"metadata": {
"id": "8ApC2l-HHfHi"
}
},
{
"cell_type": "code",
"source": [
"# STARTING WITH d\n",
"\n",
"#This is just a staging function to show how the calculation of each of the derivative is taking place\n",
"def lol():\n",
"\n",
" h = 0.001\n",
"\n",
" a = Value(2.0, label='a')\n",
" b = Value(-3.0, label='b')\n",
" c = Value(10.0, label='c')\n",
" e = a*b; e.label='e'\n",
" d= e + c; d.label='d'\n",
" f = Value(-2.0, label='f')\n",
" L = d*f; L.label='L'\n",
" L1 = L.data #L is basically a node, so we need its data\n",
"\n",
" a = Value(2.0, label='a')\n",
" b = Value(-3.0, label='b')\n",
" c = Value(10.0, label='c')\n",
" e = a*b; e.label='e'\n",
" d= e + c; d.label='d'\n",
" d.data = d.data + h\n",
" f = Value(-2.0, label='f')\n",
" L = d*f; L.label='L'\n",
" L2 = L.data\n",
"\n",
" print((L2-L1)/h)\n",
"\n",
"lol()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "2wx02cE6EYOR",
"outputId": "f284e1ac-4c6f-490b-c8f3-e94dfbd9923e"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"-2.000000000000668\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# NOW WITH f\n",
"\n",
"#This is just a staging function to show how the calculation of each of the derivative is taking place\n",
"def lol():\n",
"\n",
" h = 0.00001\n",
"\n",
" a = Value(2.0, label='a')\n",
" b = Value(-3.0, label='b')\n",
" c = Value(10.0, label='c')\n",
" e = a*b; e.label='e'\n",
" d= e + c; d.label='d'\n",
" f = Value(-2.0, label='f')\n",
" L = d*f; L.label='L'\n",
" L1 = L.data #L is basically a node, so we need its data\n",
"\n",
" a = Value(2.0, label='a')\n",
" b = Value(-3.0, label='b')\n",
" c = Value(10.0, label='c')\n",
" e = a*b; e.label='e'\n",
" d= e + c; d.label='d'\n",
" f = Value(-2.0 + h, label='f')\n",
" L = d*f; L.label='L'\n",
" L2 = L.data\n",
"\n",
" print((L2-L1)/h)\n",
"\n",
"lol()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "5haMwvymIRxx",
"outputId": "7b372e31-8fa4-42d3-c591-371d3c49c78d"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"4.000000000026205\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"So, now that we have verified that mathematically and on our code. Lets manually add those variables to the graph"
],
"metadata": {
"id": "EB8w0lF0IofD"
}
},
{
"cell_type": "code",
"source": [
"f.grad = 4.0\n",
"d.grad = -2.0"
],
"metadata": {
"id": "pS4NnAZVIML9"
},
"execution_count": 24,
"outputs": []
},
{
"cell_type": "code",
"source": [
"draw_dot(L)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 212
},
"id": "ko5oltNPJDtc",
"outputId": "d78137fe-0afa-449f-db42-0a079ebfffa4"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"image/svg+xml": "\n\n\n\n\n",
"text/plain": [
""
]
},
"metadata": {},
"execution_count": 20
}
]
},
{
"cell_type": "markdown",
"source": [
"----------------------------"
],
"metadata": {
"id": "39nOWCuEskoQ"
}
},
{
"cell_type": "markdown",
"source": [
"### **VERY IMPORTANT PART**"
],
"metadata": {
"id": "UKflj5yEsl6i"
}
},
{
"cell_type": "markdown",
"source": [
"Now we'll be calculating the derivatives of the middle nodes"
],
"metadata": {
"id": "AM3bFT0Eswmz"
}
},
{
"cell_type": "markdown",
"source": [
"##### **Starting with c & e**\n",
"\n",
"dL/dd had already been calculated (Check end of 4_1-manual-backpropagation notebook)\n",
"\n",
"d = c + e\n",
"\n",
"now,\n",
"Derivative of d wrt c, will be 1 \\\n",
"Derivative of d wrt e, will be 1 \\\n",
"\n",
" \n",
"\n",
"Because the derivative of '+' operation variables will lead to 1 (Calculus basics, it leads to constant, so 1)\n",
"\n",
" \n",
"\n",
"If we try to prove this mathematically:\n",
"\n",
" \n",
"\n",
"\td = c + e\n",
"\tf(x+h) - f(x) / h\n",
"\tNow, we'll calculate wrt c\n",
"\t=> ( ((c+h)+e) - (c+e) ) / h\n",
"\t=> c + h + e - c - e / h\n",
"\t=> h / h\n",
"\t=> 1\n",
"\tTherefore, dd/dc = 1\n",
"\n",
" \n",
"\n",
"Therefore, we can just substitute the value respectively.\n",
"\n",
"For node c:\n",
"\tdL/dc = dL/dd . dd/dc \\\n",
" So here, the values should be -> dL/dc = -2.0 * 1 = -2.0\n",
"\n",
"For node e:\n",
"\tdL/de = dL/dd . dd/de \\\n",
" So here, the values should be -> dL/de = -2.0 * 1 = -2.0"
],
"metadata": {
"id": "Fu0K97wfs4m9"
}
},
{
"cell_type": "code",
"source": [
"# NOW WITH c\n",
"\n",
"#This is just a staging function to show how the calculation of each of the derivative is taking place\n",
"def lol():\n",
"\n",
" h = 0.00001\n",
"\n",
" a = Value(2.0, label='a')\n",
" b = Value(-3.0, label='b')\n",
" c = Value(10.0, label='c')\n",
" e = a*b; e.label='e'\n",
" d= e + c; d.label='d'\n",
" f = Value(-2.0, label='f')\n",
" L = d*f; L.label='L'\n",
" L1 = L.data #L is basically a node, so we need its data\n",
"\n",
" a = Value(2.0, label='a')\n",
" b = Value(-3.0, label='b')\n",
" c = Value(10.0 + h, label='c')\n",
" e = a*b; e.label='e'\n",
" d= e + c; d.label='d'\n",
" f = Value(-2.0, label='f')\n",
" L = d*f; L.label='L'\n",
" L2 = L.data\n",
"\n",
" print((L2-L1)/h)\n",
"\n",
"lol()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "m_zBXwc6sgGA",
"outputId": "70b0d9cd-59a3-48eb-8a24-c69aeaf776de"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"-1.999999987845058\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# NOW WITH e\n",
"\n",
"#This is just a staging function to show how the calculation of each of the derivative is taking place\n",
"def lol():\n",
"\n",
" h = 0.00001\n",
"\n",
" a = Value(2.0, label='a')\n",
" b = Value(-3.0, label='b')\n",
" c = Value(10.0, label='c')\n",
" e = a*b; e.label='e'\n",
" d= e + c; d.label='d'\n",
" f = Value(-2.0, label='f')\n",
" L = d*f; L.label='L'\n",
" L1 = L.data #L is basically a node, so we need its data\n",
"\n",
" a = Value(2.0, label='a')\n",
" b = Value(-3.0, label='b')\n",
" c = Value(10.0, label='c')\n",
" e = a*b; e.label='e'\n",
" e.data += h\n",
" d= e + c; d.label='d'\n",
" f = Value(-2.0, label='f')\n",
" L = d*f; L.label='L'\n",
" L2 = L.data\n",
"\n",
" print((L2-L1)/h)\n",
"\n",
"lol()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "R-EE419ftb_x",
"outputId": "26c102c8-5214-4f4c-f566-c0df6d13fb15"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"-1.9999999999242843\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# Therefore, we now add those values manually\n",
"c.grad = -2.0\n",
"e.grad = -2.0"
],
"metadata": {
"id": "v6VKbkQAtw7H"
},
"execution_count": 25,
"outputs": []
},
{
"cell_type": "code",
"source": [
"draw_dot(L)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 247
},
"id": "QubwLaK6t6fb",
"outputId": "dbe3f5cf-1576-495a-9e24-9e16c9454d72"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"image/svg+xml": "\n\n\n\n\n",
"text/plain": [
""
]
},
"metadata": {},
"execution_count": 15
}
]
},
{
"cell_type": "markdown",
"source": [
"--------------"
],
"metadata": {
"id": "m8wvNP7tuH5_"
}
},
{
"cell_type": "markdown",
"source": [
"##### **Continuing with a & b**\n",
"\n",
"Same principle as above, but a different kind of equation here.\n",
"\n",
" \n",
"\n",
"Also remember here, derivative of L wrt e was just calculated above^ (dL/de)\n",
"\n",
"e = a * b\n",
"\n",
"Therefore,\n",
"Derivative of e wrt a, will be b\n",
"Derivative of e wrt b, will be a\n",
"\n",
" \n",
"\n",
"Because the derivative of the same variable at the denominator gets out, so the other variable in the product remains (Calculus derivative theory itself)\n",
"\td/da(a * b) = b\n",
"\n",
" \n",
"\n",
"If we try to prove this mathematically,\n",
"\n",
" \n",
"\n",
"\te = a * b\n",
"\tf(x+h) - f(x) / h\n",
"\tRemember, f(x) is equation here. So, finding wrt a, substituting the values\n",
"\t=> ( ((a + h) * b) - (a * b) ) / h\n",
"\t=> ab + hb - ab / h\n",
"\t=> hb / h\n",
"\t=> b\n",
"\tTherefore, de/da = b\n",
"\n",
" \n",
"\n",
"Therefore, we can just substitute the value respectively.\n",
"\n",
"For node a:\n",
"\tdL/da = dL/de . dd/da \\\n",
" So here, the values should be -> dL/da = -2.0 * -3.0 = 6.0\n",
"\n",
"For node b:\n",
"\tdL/db = dL/de . dd/db \\\n",
" So here, the values should be -> dL/db = -2.0 * 2.0 = -4.0"
],
"metadata": {
"id": "O8n9U7ZJuIyd"
}
},
{
"cell_type": "code",
"source": [
"# NOW WITH a\n",
"\n",
"#This is just a staging function to show how the calculation of each of the derivative is taking place\n",
"def lol():\n",
"\n",
" h = 0.00001\n",
"\n",
" a = Value(2.0, label='a')\n",
" b = Value(-3.0, label='b')\n",
" c = Value(10.0, label='c')\n",
" e = a*b; e.label='e'\n",
" d= e + c; d.label='d'\n",
" f = Value(-2.0, label='f')\n",
" L = d*f; L.label='L'\n",
" L1 = L.data #L is basically a node, so we need its data\n",
"\n",
" a = Value(2.0 + h, label='a')\n",
" b = Value(-3.0, label='b')\n",
" c = Value(10.0, label='c')\n",
" e = a*b; e.label='e'\n",
" d= e + c; d.label='d'\n",
" f = Value(-2.0, label='f')\n",
" L = d*f; L.label='L'\n",
" L2 = L.data\n",
"\n",
" print((L2-L1)/h)\n",
"\n",
"lol()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "YVDtrQyxuBet",
"outputId": "b41d3c16-53f3-4c65-9a8e-c8bd44d5084f"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"6.000000000128124\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# NOW WITH b\n",
"\n",
"#This is just a staging function to show how the calculation of each of the derivative is taking place\n",
"def lol():\n",
"\n",
" h = 0.00001\n",
"\n",
" a = Value(2.0, label='a')\n",
" b = Value(-3.0, label='b')\n",
" c = Value(10.0, label='c')\n",
" e = a*b; e.label='e'\n",
" d= e + c; d.label='d'\n",
" f = Value(-2.0, label='f')\n",
" L = d*f; L.label='L'\n",
" L1 = L.data #L is basically a node, so we need its data\n",
"\n",
" a = Value(2.0, label='a')\n",
" b = Value(-3.0 + h, label='b')\n",
" c = Value(10.0, label='c')\n",
" e = a*b; e.label='e'\n",
" d= e + c; d.label='d'\n",
" f = Value(-2.0, label='f')\n",
" L = d*f; L.label='L'\n",
" L2 = L.data\n",
"\n",
" print((L2-L1)/h)\n",
"\n",
"lol()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "sHZl0wacuaTI",
"outputId": "48107eca-924f-4546-8088-cdc78f93f0bf"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"-4.000000000026205\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"#Now, we add those values manually\n",
"a.grad = 6.0\n",
"b.grad = -4.0"
],
"metadata": {
"id": "9chK6a53ugZ1"
},
"execution_count": 26,
"outputs": []
},
{
"cell_type": "code",
"source": [
"draw_dot(L)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 247
},
"id": "UTIlEpQSundk",
"outputId": "434f0ad9-3ae0-4e38-b88f-0a9a5f87e8b1"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"image/svg+xml": "\n\n\n\n\n",
"text/plain": [
""
]
},
"metadata": {},
"execution_count": 19
}
]
},
{
"cell_type": "markdown",
"source": [
"----------------"
],
"metadata": {
"id": "gzgHxzqZwEzZ"
}
},
{
"cell_type": "markdown",
"source": [
"### **Hence the FINAL GENERATED GRAPH AFTER MANUAL BACKPROPAGATION!!**"
],
"metadata": {
"id": "1ucLHBf8uso3"
}
},
{
"cell_type": "code",
"source": [
"draw_dot(L)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 247
},
"id": "Witeo84_wF-p",
"outputId": "ad44a9d6-d533-475d-f80b-46c681a1e6de"
},
"execution_count": 27,
"outputs": [
{
"output_type": "execute_result",
"data": {
"image/svg+xml": "\n\n\n\n\n",
"text/plain": [
""
]
},
"metadata": {},
"execution_count": 27
}
]
}
]
}