From 1bd1f8dc565efcf0f861691baf0b3284bc76cf4e Mon Sep 17 00:00:00 2001
From: Andrew <desousa.andrew11@gmail.com>
Date: Sat, 22 Jun 2024 16:08:49 -0700
Subject: [PATCH] shared ptr implementation

---
 .vscode/settings.json |  32 ++++++-
 CMakeLists.txt        |   1 -
 example/and.cpp       |  54 ++++++++++++
 scalar.h              | 191 ++++++++++++++----------------------------
 tests/test_main.cpp   | 133 +++++++++++++++++++++--------
 5 files changed, 243 insertions(+), 168 deletions(-)
 create mode 100644 example/and.cpp

diff --git a/.vscode/settings.json b/.vscode/settings.json
index 52c6dd9..d13dec5 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -50,6 +50,36 @@
         "vector": "cpp",
         "algorithm": "cpp",
         "iterator": "cpp",
-        "memory_resource": "cpp"
+        "memory_resource": "cpp",
+        "any": "cpp",
+        "charconv": "cpp",
+        "codecvt": "cpp",
+        "condition_variable": "cpp",
+        "deque": "cpp",
+        "fstream": "cpp",
+        "iomanip": "cpp",
+        "list": "cpp",
+        "map": "cpp",
+        "numbers": "cpp",
+        "queue": "cpp",
+        "semaphore": "cpp",
+        "shared_mutex": "cpp",
+        "source_location": "cpp",
+        "span": "cpp",
+        "stack": "cpp",
+        "unordered_set": "cpp",
+        "atomic": "cpp",
+        "*.tcc": "cpp",
+        "compare": "cpp",
+        "concepts": "cpp",
+        "exception": "cpp",
+        "functional": "cpp",
+        "memory": "cpp",
+        "system_error": "cpp",
+        "type_traits": "cpp",
+        "utility": "cpp",
+        "__memory": "cpp",
+        "chrono": "cpp",
+        "format": "cpp"
     }
 }
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c4e9901..4147c0f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,5 +2,4 @@ cmake_minimum_required(VERSION 3.12)
 project(backprop)
 
 set(CMAKE_CXX_STANDARD 17)
-
 add_subdirectory(tests)
\ No newline at end of file
diff --git a/example/and.cpp b/example/and.cpp
new file mode 100644
index 0000000..33295a5
--- /dev/null
+++ b/example/and.cpp
@@ -0,0 +1,54 @@
+#include <iostream>
+#include "../scalar.h"
+
+
+Scalar<double> cross_entropy(Scalar<double> y, Scalar<double> y_hat) {
+    return -y * log(y_hat) - (1 - y) * log(1 - y_hat);
+}
+
+Scalar<double> sigmoid(Scalar<double> x) {
+    return 1 / (1 + exp(-x));
+}
+
+int main() {
+    // 4x2 dataset maxtrix
+    Scalar<double> X[4][2] = {
+        {Scalar<double>(0), Scalar<double>(0)},
+        {Scalar<double>(0), Scalar<double>(1)},
+        {Scalar<double>(1), Scalar<double>(0)},
+        {Scalar<double>(1), Scalar<double>(1)}
+    };
+
+    Scalar<double> Y[4] = {
+        Scalar<double>(0),
+        Scalar<double>(0),
+        Scalar<double>(0),
+        Scalar<double>(1)
+    };
+
+    Scalar<double> w1(.1), w2(.1), b(.1);
+
+
+    int num_epochs = 1000, num_samples = 4;
+    for (int i = 0; i < num_epochs; i++) {
+        Scalar<double> loss(0);
+        
+        for (int j = 0; j < num_samples; j++) {
+            Scalar<double> z = X[j][0] * w1 + X[j][1] * w2 + b;
+            Scalar<double> a = sigmoid(z);
+            loss += cross_entropy(Y[j], a);
+        }
+        
+        loss.backward();
+
+        w1.value -= w1.grad * 0.1;
+        w2.value -= w2.grad * 0.1;
+        b.value -= b.grad * 0.1;
+
+        w1.grad = 0;
+        w2.grad = 0;
+        b.grad = 0;
+    }
+
+    return 0;
+}
\ No newline at end of file
diff --git a/scalar.h b/scalar.h
index ea4f351..eec42cc 100644
--- a/scalar.h
+++ b/scalar.h
@@ -5,7 +5,7 @@
 
 
 template <typename T>
-class Scalar {
+class Scalar: public std::enable_shared_from_this<Scalar<T>> {
 public:
     T value;
     T grad = 0;
@@ -18,170 +18,101 @@ class Scalar {
         _backward = []() {};
     }
 
-    void backward() {
+    void backward(bool is_root=true) {
         // if in_degrees is not zero, return
         if (in_degrees != 0) { throw std::runtime_error("in_degrees is not zero"); }
+        if (is_root) { this->grad = 1.0; }
+        
         _backward();
 
         // backward children
         for (auto child : children) {
             child->in_degrees--;
-            if (child->in_degrees == 0) { child->backward(); }
+            if (child->in_degrees == 0) { child->backward(false); }
         }
     }
 
-    Scalar<T>& operator+(Scalar<T>& other) {
-        auto result = std::make_shared<Scalar<T>>(this->value + other.value);
+    friend std::shared_ptr<Scalar<T>> operator+(std::shared_ptr<Scalar<T>> lhs, std::shared_ptr<Scalar<T>> rhs) {
+        auto result = std::make_shared<Scalar<T>>(lhs->value + rhs->value);
         
-        auto other_shared_ptr = std::shared_ptr<Scalar<T>>(&other);
-        auto this_shared_ptr = std::shared_ptr<Scalar<T>>(this);
-        result->children.insert(this_shared_ptr);
-        result->children.insert(other_shared_ptr);
-        this->in_degrees++;
-        other.in_degrees++;
+        result->children.insert(lhs);
+        result->children.insert(rhs);
+        lhs->in_degrees++;
+        rhs->in_degrees++;
 
         result->_backward = [&, result]() {
-            this->grad += 1.0 * result->grad;
-            other.grad += 1.0 * result->grad;
+            lhs->grad += 1.0 * result->grad;
+            rhs->grad += 1.0 * result->grad;
         };
 
-        return *result;
+        return result;
     }
 
-    Scalar<T>& operator-(Scalar<T>& other) {
-        auto result = std::make_shared<Scalar<T>>(this->value - other.value);
-        
-        auto other_shared_ptr = std::shared_ptr<Scalar<T>>(&other);
-        auto this_shared_ptr = std::shared_ptr<Scalar<T>>(this);
-        result->children.insert(this_shared_ptr);
-        result->children.insert(other_shared_ptr);
+    friend std::shared_ptr<Scalar<T>> operator-(std::shared_ptr<Scalar<T>> lhs, std::shared_ptr<Scalar<T>> rhs) {
+        auto result = std::make_shared<Scalar<T>>(lhs->value - rhs->value);
 
-        this->in_degrees++;
-        other.in_degrees++;
+        result->children.insert(lhs);
+        result->children.insert(rhs);
+        lhs->in_degrees++;
+        rhs->in_degrees++;
 
         result->_backward = [&, result]() {
-            this->grad += 1.0 * result->grad;
-            other.grad -= 1.0 * result->grad;
+            lhs->grad += 1.0 * result->grad;
+            rhs->grad += -1.0 * result->grad;
         };
 
-        return *result;
+        return result;
     }
 
-    Scalar<T>& operator*(Scalar<T>& other) {
-        auto result = std::make_shared<Scalar<T>>(this->value * other.value);
-        
-        auto other_shared_ptr = std::shared_ptr<Scalar<T>>(&other);
-        auto this_shared_ptr = std::shared_ptr<Scalar<T>>(this);
-        result->children.insert(this_shared_ptr);
-        result->children.insert(other_shared_ptr);
+    friend std::shared_ptr<Scalar<T>> operator*(std::shared_ptr<Scalar<T>> lhs, std::shared_ptr<Scalar<T>> rhs) {
+        auto result = std::make_shared<Scalar<T>>(lhs->value * rhs->value);
 
-        this->in_degrees++;
-        other.in_degrees++;
+        result->children.insert(lhs);
+        result->children.insert(rhs);
+        lhs->in_degrees++;
+        rhs->in_degrees++;
 
         result->_backward = [&, result]() {
-            this->grad += other.value * result->grad;
-            other.grad += this->value * result->grad;
+            lhs->grad += rhs->value * result->grad;
+            rhs->grad += lhs->value * result->grad;
         };
 
-        return *result;
+        return result;
     }
 
-    Scalar<T>& operator/(Scalar<T>& other) {
-        auto result = std::make_shared<Scalar<T>>(this->value / other.value);
-
-        auto other_shared_ptr = std::shared_ptr<Scalar<T>>(&other);
-        auto this_shared_ptr = std::shared_ptr<Scalar<T>>(this);
-        result->children.insert(this_shared_ptr);
-        result->children.insert(other_shared_ptr);
+    friend std::shared_ptr<Scalar<T>> operator/(std::shared_ptr<Scalar<T>> lhs, std::shared_ptr<Scalar<T>> rhs) {
+        auto result = std::make_shared<Scalar<T>>(lhs->value / rhs->value);
 
-        this->in_degrees++;
-        other.in_degrees++;
+        result->children.insert(lhs);
+        result->children.insert(rhs);
+        lhs->in_degrees++;
+        rhs->in_degrees++;
 
         result->_backward = [&, result]() {
-            this->grad += 1.0 / other.value * result->grad;
-            other.grad -= this->value / (other.value * other.value) * result->grad;
+            lhs->grad += 1.0 / rhs->value * result->grad;
+            rhs->grad += -lhs->value / (rhs->value * rhs->value) * result->grad;
         };
 
-        return *result;
+        return result;
     }
-};
-
-
-// numerical and scalar overload operators
-template <typename T>
-Scalar<T>& operator+(Scalar<T>& s1, T s2) {
-    auto result = std::make_shared<Scalar<T>>(s1.value + s2);
-    auto s1_shared_ptr = std::shared_ptr<Scalar<T>>(&s1);
-    result->children.insert(s1_shared_ptr);
-    s1.in_degrees++;
-
-    result->_backward = [&, result]() {
-        s1.grad += 1.0 * result->grad;
-    };
-
-    return *result;
-}
-
-// other order
-template <typename T>
-Scalar<T>& operator+(T s1, Scalar<T>& s2) {
-    return s2 + s1;
-}
-
-template <typename T>
-Scalar<T>& operator-(Scalar<T>& s1, T s2) {
-    auto result = std::make_shared<Scalar<T>>(s1.value - s2);
-    auto s1_shared_ptr = std::shared_ptr<Scalar<T>>(&s1);
-    result->children.insert(s1_shared_ptr);
-    s1.in_degrees++;
-
-    result->_backward = [&, result]() {
-        s1.grad += 1.0 * result->grad;
-    };
 
-    return *result;
-}
-
-// other order
-template <typename T>
-Scalar<T>& operator-(T s1, Scalar<T>& s2) {
-    return s2 - s1;
-}
-
-template <typename T>
-Scalar<T>& operator*(Scalar<T>& s1, T s2) {
-    auto result = std::make_shared<Scalar<T>>(s1.value * s2);
-    auto s1_shared_ptr = std::shared_ptr<Scalar<T>>(&s1);
-    result->children.insert(s1_shared_ptr);
-    s1.in_degrees++;
-
-    result->_backward = [&, result]() {
-        s1.grad += s2 * result->grad;
-    };
-
-    return *result;
-}
-
-template <typename T>
-Scalar<T>& operator*(T s1, Scalar<T>& s2) {
-    return s2 * s1;
-}
-
-template <typename T>
-Scalar<T>& operator/(Scalar<T>& s1, T s2) {
-    auto result = std::make_shared<Scalar<T>>(s1.value / s2);
-    auto s1_shared_ptr = std::shared_ptr<Scalar<T>>(&s1);
-    result->children.insert(s1_shared_ptr);
-    s1.in_degrees++;
-
-    result->_backward = [&, result]() {
-        s1.grad += 1.0 / s2 * result->grad;
-    };
-
-    return *result;
-}
-
-template <typename T>
-Scalar<T>& operator/(T s1, Scalar<T>& s2) {
-    return s2 / s1;
-}
+    // // unary operators
+    // std::shared_ptr<Scalar<T>> operator-() {
+    //     auto result = std::make_shared<Scalar<T>>(-this->value);
+    //     auto this_shared_ptr = std::shared_ptr<Scalar<T>>(this);
+    //     result->children.insert(this_shared_ptr);
+    //     this->in_degrees++;
+
+    //     result->_backward = [&, result]() {
+    //         this->grad += -1.0 * result->grad;
+    //     };
+
+    //     return *result;
+    // }
+
+    // // += operator
+    // std::shared_ptr<Scalar<T>> operator+=(std::shared_ptr<Scalar<T>> other) {
+    //     *this = *this + other;
+    //     return *this;
+    // }
+};
\ No newline at end of file
diff --git a/tests/test_main.cpp b/tests/test_main.cpp
index c4f192b..5509fad 100644
--- a/tests/test_main.cpp
+++ b/tests/test_main.cpp
@@ -2,46 +2,107 @@
 #include "../scalar.h"
 
 
-TEST(ScalarTest, Test1) {
-    Scalar<float> s1(1.0);
-    Scalar<float> s2(2.0);
-    Scalar<float> s4(5.0);
-    Scalar<float> s5(10.0);
-    Scalar<float>& s3 = s1 + s2;
-    Scalar<float>& s6 = (s3 * s4 * s5) - s4;
-
-    s6.grad = 1.0;
-    s6.backward();
-
-    // verify the gradient of s1, s2, s3, s4, s5
-    EXPECT_FLOAT_EQ(s1.grad, 50.0);
-    EXPECT_FLOAT_EQ(s2.grad, 50.0);
-    EXPECT_FLOAT_EQ(s3.grad, 50.0);
-    EXPECT_FLOAT_EQ(s4.grad, 29.0);
-    EXPECT_FLOAT_EQ(s5.grad, 15.0);
+TEST(ScalarAddition, Test1) {
+    // shared pointer to a scalar object
+    auto s1 = std::make_shared<Scalar<float>>(1.0);
+    auto s2 = std::make_shared<Scalar<float>>(2.0);
+    auto s3 = s1 + s2;
+
+    EXPECT_FLOAT_EQ(s3->value, 3.0);
+
+    s3->backward();
+    EXPECT_FLOAT_EQ(s1->grad, 1.0);
+    EXPECT_FLOAT_EQ(s2->grad, 1.0);
+}
+
+// TEST(ScalarAddition, Test2) {
+//     Scalar<double> s1(1.0);
+//     Scalar<double>& s2 = 1 + s1;
+//     EXPECT_DOUBLE_EQ(s2.value, 2.0);
+
+//     s2.backward();
+//     EXPECT_DOUBLE_EQ(s1.grad, 1.0);
+// }
+
+TEST(ScalarSubtraction, Test1) {
+    auto s1 = std::make_shared<Scalar<float>>(1.0);
+    auto s2 = std::make_shared<Scalar<float>>(2.0); 
+    auto s3 = s1 - s2;
+
+    EXPECT_FLOAT_EQ(s3->value, -1.0);
+
+    s3->backward();
+    EXPECT_FLOAT_EQ(s1->grad, 1.0);
+    EXPECT_FLOAT_EQ(s2->grad, -1.0);
 }
 
-TEST(ScalarTest, Test2) {
-    Scalar<double> s1(1.0);
-    Scalar<double> s2(2.0);
-    Scalar<double> s3(2.0);
-    Scalar<double> s4(5.0);
-    Scalar<double> s5(10.0);
-
-    // expression containg all operations
-    Scalar<double>& s6 = (s1 + s2) * s3 - s4 * s5;
-
-    s6.grad = 1.0;
-    s6.backward();
-
-    // verify the gradient of s1, s2, s3, s4, s5
-    EXPECT_DOUBLE_EQ(s1.grad, 2.0);
-    EXPECT_DOUBLE_EQ(s2.grad, 2.0);
-    EXPECT_DOUBLE_EQ(s3.grad, 3.0);
-    EXPECT_DOUBLE_EQ(s4.grad, -10);
-    EXPECT_DOUBLE_EQ(s5.grad, -5);
+TEST(ScalarMultiplication, Test1) {
+    auto s1 = std::make_shared<Scalar<float>>(1.0);
+    auto s2 = std::make_shared<Scalar<float>>(2.0);
+    auto s3 = s1 * s2;
+
+    EXPECT_FLOAT_EQ(s3->value, 2.0);
+
+    s3->backward();
+    EXPECT_FLOAT_EQ(s1->grad, 2.0);
+    EXPECT_FLOAT_EQ(s2->grad, 1.0);
 }
 
+TEST(ScalarDivision, Test1) {
+    auto s1 = std::make_shared<Scalar<float>>(1.0);
+    auto s2 = std::make_shared<Scalar<float>>(2.0);
+    auto s3 = s1 / s2;
+
+    EXPECT_FLOAT_EQ(s3->value, 0.5);
+
+    s3->backward();
+    EXPECT_FLOAT_EQ(s1->grad, 0.5);
+    EXPECT_FLOAT_EQ(s2->grad, -0.25);
+}
+
+// // TEST(ScalarDivision, Test2) {
+// //     Scalar<double> s1(1.0);
+// //     Scalar<double>& s2 = 2 / s1;
+// //     EXPECT_DOUBLE_EQ(s2.value, 2.0);
+
+// //     s2.backward();
+// //     EXPECT_DOUBLE_EQ(s1.grad, -2.0);
+// // }
+
+TEST(ScalarMixedOperationsTest, Test1) {
+    auto s1 = std::make_shared<Scalar<float>>(1.0);
+    auto s2 = std::make_shared<Scalar<float>>(2.0);
+    auto s3 = std::make_shared<Scalar<float>>(2.0);
+    auto s4 = std::make_shared<Scalar<float>>(5.0);
+    auto s5 = std::make_shared<Scalar<float>>(10.0);
+
+    auto s6 = (s1 + s2) * s3 - s4 * s5;
+
+    s6->backward();
+    EXPECT_FLOAT_EQ(s1->grad, 2.0);
+    EXPECT_FLOAT_EQ(s2->grad, 2.0);
+    EXPECT_FLOAT_EQ(s3->grad, 3.0);
+    EXPECT_FLOAT_EQ(s4->grad, -10.0);
+    EXPECT_FLOAT_EQ(s5->grad, -5.0);
+}
+
+// TEST(ScalarMixedOperationsTest, Test2) {
+//     auto s1 = std::make_shared<Scalar<float>>(1.0);
+//     auto s2 = std::make_shared<Scalar<float>>(2.0);
+//     auto s3 = std::make_shared<Scalar<float>>(2.0);
+//     auto s4 = std::make_shared<Scalar<float>>(5.0);
+//     auto s5 = std::make_shared<Scalar<float>>(10.0);
+
+//     auto s6 = (s1 + s2) * (s3 - s4) / s5;
+
+//     s6->backward();
+//     EXPECT_FLOAT_EQ(s1->grad, 0.1);
+//     EXPECT_FLOAT_EQ(s2->grad, 0.1);
+//     EXPECT_FLOAT_EQ(s3->grad, 0.1);
+//     EXPECT_FLOAT_EQ(s4->grad, -0.02);
+//     EXPECT_FLOAT_EQ(s5->grad, -0.02);
+// }
+
 int main(int argc, char **argv) {
     ::testing::InitGoogleTest(&argc, argv);
     int ret = RUN_ALL_TESTS();