From 1bd1f8dc565efcf0f861691baf0b3284bc76cf4e Mon Sep 17 00:00:00 2001 From: Andrew <desousa.andrew11@gmail.com> Date: Sat, 22 Jun 2024 16:08:49 -0700 Subject: [PATCH] shared ptr implementation --- .vscode/settings.json | 32 ++++++- CMakeLists.txt | 1 - example/and.cpp | 54 ++++++++++++ scalar.h | 191 ++++++++++++++---------------------------- tests/test_main.cpp | 133 +++++++++++++++++++++-------- 5 files changed, 243 insertions(+), 168 deletions(-) create mode 100644 example/and.cpp diff --git a/.vscode/settings.json b/.vscode/settings.json index 52c6dd9..d13dec5 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -50,6 +50,36 @@ "vector": "cpp", "algorithm": "cpp", "iterator": "cpp", - "memory_resource": "cpp" + "memory_resource": "cpp", + "any": "cpp", + "charconv": "cpp", + "codecvt": "cpp", + "condition_variable": "cpp", + "deque": "cpp", + "fstream": "cpp", + "iomanip": "cpp", + "list": "cpp", + "map": "cpp", + "numbers": "cpp", + "queue": "cpp", + "semaphore": "cpp", + "shared_mutex": "cpp", + "source_location": "cpp", + "span": "cpp", + "stack": "cpp", + "unordered_set": "cpp", + "atomic": "cpp", + "*.tcc": "cpp", + "compare": "cpp", + "concepts": "cpp", + "exception": "cpp", + "functional": "cpp", + "memory": "cpp", + "system_error": "cpp", + "type_traits": "cpp", + "utility": "cpp", + "__memory": "cpp", + "chrono": "cpp", + "format": "cpp" } } \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index c4e9901..4147c0f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,5 +2,4 @@ cmake_minimum_required(VERSION 3.12) project(backprop) set(CMAKE_CXX_STANDARD 17) - add_subdirectory(tests) \ No newline at end of file diff --git a/example/and.cpp b/example/and.cpp new file mode 100644 index 0000000..33295a5 --- /dev/null +++ b/example/and.cpp @@ -0,0 +1,54 @@ +#include <iostream> +#include "../scalar.h" + + +Scalar<double> cross_entropy(Scalar<double> y, Scalar<double> y_hat) { + return -y * log(y_hat) - (1 - y) * log(1 - y_hat); +} + +Scalar<double> sigmoid(Scalar<double> x) { + return 1 / (1 + exp(-x)); +} + +int main() { + // 4x2 dataset maxtrix + Scalar<double> X[4][2] = { + {Scalar<double>(0), Scalar<double>(0)}, + {Scalar<double>(0), Scalar<double>(1)}, + {Scalar<double>(1), Scalar<double>(0)}, + {Scalar<double>(1), Scalar<double>(1)} + }; + + Scalar<double> Y[4] = { + Scalar<double>(0), + Scalar<double>(0), + Scalar<double>(0), + Scalar<double>(1) + }; + + Scalar<double> w1(.1), w2(.1), b(.1); + + + int num_epochs = 1000, num_samples = 4; + for (int i = 0; i < num_epochs; i++) { + Scalar<double> loss(0); + + for (int j = 0; j < num_samples; j++) { + Scalar<double> z = X[j][0] * w1 + X[j][1] * w2 + b; + Scalar<double> a = sigmoid(z); + loss += cross_entropy(Y[j], a); + } + + loss.backward(); + + w1.value -= w1.grad * 0.1; + w2.value -= w2.grad * 0.1; + b.value -= b.grad * 0.1; + + w1.grad = 0; + w2.grad = 0; + b.grad = 0; + } + + return 0; +} \ No newline at end of file diff --git a/scalar.h b/scalar.h index ea4f351..eec42cc 100644 --- a/scalar.h +++ b/scalar.h @@ -5,7 +5,7 @@ template <typename T> -class Scalar { +class Scalar: public std::enable_shared_from_this<Scalar<T>> { public: T value; T grad = 0; @@ -18,170 +18,101 @@ class Scalar { _backward = []() {}; } - void backward() { + void backward(bool is_root=true) { // if in_degrees is not zero, return if (in_degrees != 0) { throw std::runtime_error("in_degrees is not zero"); } + if (is_root) { this->grad = 1.0; } + _backward(); // backward children for (auto child : children) { child->in_degrees--; - if (child->in_degrees == 0) { child->backward(); } + if (child->in_degrees == 0) { child->backward(false); } } } - Scalar<T>& operator+(Scalar<T>& other) { - auto result = std::make_shared<Scalar<T>>(this->value + other.value); + friend std::shared_ptr<Scalar<T>> operator+(std::shared_ptr<Scalar<T>> lhs, std::shared_ptr<Scalar<T>> rhs) { + auto result = std::make_shared<Scalar<T>>(lhs->value + rhs->value); - auto other_shared_ptr = std::shared_ptr<Scalar<T>>(&other); - auto this_shared_ptr = std::shared_ptr<Scalar<T>>(this); - result->children.insert(this_shared_ptr); - result->children.insert(other_shared_ptr); - this->in_degrees++; - other.in_degrees++; + result->children.insert(lhs); + result->children.insert(rhs); + lhs->in_degrees++; + rhs->in_degrees++; result->_backward = [&, result]() { - this->grad += 1.0 * result->grad; - other.grad += 1.0 * result->grad; + lhs->grad += 1.0 * result->grad; + rhs->grad += 1.0 * result->grad; }; - return *result; + return result; } - Scalar<T>& operator-(Scalar<T>& other) { - auto result = std::make_shared<Scalar<T>>(this->value - other.value); - - auto other_shared_ptr = std::shared_ptr<Scalar<T>>(&other); - auto this_shared_ptr = std::shared_ptr<Scalar<T>>(this); - result->children.insert(this_shared_ptr); - result->children.insert(other_shared_ptr); + friend std::shared_ptr<Scalar<T>> operator-(std::shared_ptr<Scalar<T>> lhs, std::shared_ptr<Scalar<T>> rhs) { + auto result = std::make_shared<Scalar<T>>(lhs->value - rhs->value); - this->in_degrees++; - other.in_degrees++; + result->children.insert(lhs); + result->children.insert(rhs); + lhs->in_degrees++; + rhs->in_degrees++; result->_backward = [&, result]() { - this->grad += 1.0 * result->grad; - other.grad -= 1.0 * result->grad; + lhs->grad += 1.0 * result->grad; + rhs->grad += -1.0 * result->grad; }; - return *result; + return result; } - Scalar<T>& operator*(Scalar<T>& other) { - auto result = std::make_shared<Scalar<T>>(this->value * other.value); - - auto other_shared_ptr = std::shared_ptr<Scalar<T>>(&other); - auto this_shared_ptr = std::shared_ptr<Scalar<T>>(this); - result->children.insert(this_shared_ptr); - result->children.insert(other_shared_ptr); + friend std::shared_ptr<Scalar<T>> operator*(std::shared_ptr<Scalar<T>> lhs, std::shared_ptr<Scalar<T>> rhs) { + auto result = std::make_shared<Scalar<T>>(lhs->value * rhs->value); - this->in_degrees++; - other.in_degrees++; + result->children.insert(lhs); + result->children.insert(rhs); + lhs->in_degrees++; + rhs->in_degrees++; result->_backward = [&, result]() { - this->grad += other.value * result->grad; - other.grad += this->value * result->grad; + lhs->grad += rhs->value * result->grad; + rhs->grad += lhs->value * result->grad; }; - return *result; + return result; } - Scalar<T>& operator/(Scalar<T>& other) { - auto result = std::make_shared<Scalar<T>>(this->value / other.value); - - auto other_shared_ptr = std::shared_ptr<Scalar<T>>(&other); - auto this_shared_ptr = std::shared_ptr<Scalar<T>>(this); - result->children.insert(this_shared_ptr); - result->children.insert(other_shared_ptr); + friend std::shared_ptr<Scalar<T>> operator/(std::shared_ptr<Scalar<T>> lhs, std::shared_ptr<Scalar<T>> rhs) { + auto result = std::make_shared<Scalar<T>>(lhs->value / rhs->value); - this->in_degrees++; - other.in_degrees++; + result->children.insert(lhs); + result->children.insert(rhs); + lhs->in_degrees++; + rhs->in_degrees++; result->_backward = [&, result]() { - this->grad += 1.0 / other.value * result->grad; - other.grad -= this->value / (other.value * other.value) * result->grad; + lhs->grad += 1.0 / rhs->value * result->grad; + rhs->grad += -lhs->value / (rhs->value * rhs->value) * result->grad; }; - return *result; + return result; } -}; - - -// numerical and scalar overload operators -template <typename T> -Scalar<T>& operator+(Scalar<T>& s1, T s2) { - auto result = std::make_shared<Scalar<T>>(s1.value + s2); - auto s1_shared_ptr = std::shared_ptr<Scalar<T>>(&s1); - result->children.insert(s1_shared_ptr); - s1.in_degrees++; - - result->_backward = [&, result]() { - s1.grad += 1.0 * result->grad; - }; - - return *result; -} - -// other order -template <typename T> -Scalar<T>& operator+(T s1, Scalar<T>& s2) { - return s2 + s1; -} - -template <typename T> -Scalar<T>& operator-(Scalar<T>& s1, T s2) { - auto result = std::make_shared<Scalar<T>>(s1.value - s2); - auto s1_shared_ptr = std::shared_ptr<Scalar<T>>(&s1); - result->children.insert(s1_shared_ptr); - s1.in_degrees++; - - result->_backward = [&, result]() { - s1.grad += 1.0 * result->grad; - }; - return *result; -} - -// other order -template <typename T> -Scalar<T>& operator-(T s1, Scalar<T>& s2) { - return s2 - s1; -} - -template <typename T> -Scalar<T>& operator*(Scalar<T>& s1, T s2) { - auto result = std::make_shared<Scalar<T>>(s1.value * s2); - auto s1_shared_ptr = std::shared_ptr<Scalar<T>>(&s1); - result->children.insert(s1_shared_ptr); - s1.in_degrees++; - - result->_backward = [&, result]() { - s1.grad += s2 * result->grad; - }; - - return *result; -} - -template <typename T> -Scalar<T>& operator*(T s1, Scalar<T>& s2) { - return s2 * s1; -} - -template <typename T> -Scalar<T>& operator/(Scalar<T>& s1, T s2) { - auto result = std::make_shared<Scalar<T>>(s1.value / s2); - auto s1_shared_ptr = std::shared_ptr<Scalar<T>>(&s1); - result->children.insert(s1_shared_ptr); - s1.in_degrees++; - - result->_backward = [&, result]() { - s1.grad += 1.0 / s2 * result->grad; - }; - - return *result; -} - -template <typename T> -Scalar<T>& operator/(T s1, Scalar<T>& s2) { - return s2 / s1; -} + // // unary operators + // std::shared_ptr<Scalar<T>> operator-() { + // auto result = std::make_shared<Scalar<T>>(-this->value); + // auto this_shared_ptr = std::shared_ptr<Scalar<T>>(this); + // result->children.insert(this_shared_ptr); + // this->in_degrees++; + + // result->_backward = [&, result]() { + // this->grad += -1.0 * result->grad; + // }; + + // return *result; + // } + + // // += operator + // std::shared_ptr<Scalar<T>> operator+=(std::shared_ptr<Scalar<T>> other) { + // *this = *this + other; + // return *this; + // } +}; \ No newline at end of file diff --git a/tests/test_main.cpp b/tests/test_main.cpp index c4f192b..5509fad 100644 --- a/tests/test_main.cpp +++ b/tests/test_main.cpp @@ -2,46 +2,107 @@ #include "../scalar.h" -TEST(ScalarTest, Test1) { - Scalar<float> s1(1.0); - Scalar<float> s2(2.0); - Scalar<float> s4(5.0); - Scalar<float> s5(10.0); - Scalar<float>& s3 = s1 + s2; - Scalar<float>& s6 = (s3 * s4 * s5) - s4; - - s6.grad = 1.0; - s6.backward(); - - // verify the gradient of s1, s2, s3, s4, s5 - EXPECT_FLOAT_EQ(s1.grad, 50.0); - EXPECT_FLOAT_EQ(s2.grad, 50.0); - EXPECT_FLOAT_EQ(s3.grad, 50.0); - EXPECT_FLOAT_EQ(s4.grad, 29.0); - EXPECT_FLOAT_EQ(s5.grad, 15.0); +TEST(ScalarAddition, Test1) { + // shared pointer to a scalar object + auto s1 = std::make_shared<Scalar<float>>(1.0); + auto s2 = std::make_shared<Scalar<float>>(2.0); + auto s3 = s1 + s2; + + EXPECT_FLOAT_EQ(s3->value, 3.0); + + s3->backward(); + EXPECT_FLOAT_EQ(s1->grad, 1.0); + EXPECT_FLOAT_EQ(s2->grad, 1.0); +} + +// TEST(ScalarAddition, Test2) { +// Scalar<double> s1(1.0); +// Scalar<double>& s2 = 1 + s1; +// EXPECT_DOUBLE_EQ(s2.value, 2.0); + +// s2.backward(); +// EXPECT_DOUBLE_EQ(s1.grad, 1.0); +// } + +TEST(ScalarSubtraction, Test1) { + auto s1 = std::make_shared<Scalar<float>>(1.0); + auto s2 = std::make_shared<Scalar<float>>(2.0); + auto s3 = s1 - s2; + + EXPECT_FLOAT_EQ(s3->value, -1.0); + + s3->backward(); + EXPECT_FLOAT_EQ(s1->grad, 1.0); + EXPECT_FLOAT_EQ(s2->grad, -1.0); } -TEST(ScalarTest, Test2) { - Scalar<double> s1(1.0); - Scalar<double> s2(2.0); - Scalar<double> s3(2.0); - Scalar<double> s4(5.0); - Scalar<double> s5(10.0); - - // expression containg all operations - Scalar<double>& s6 = (s1 + s2) * s3 - s4 * s5; - - s6.grad = 1.0; - s6.backward(); - - // verify the gradient of s1, s2, s3, s4, s5 - EXPECT_DOUBLE_EQ(s1.grad, 2.0); - EXPECT_DOUBLE_EQ(s2.grad, 2.0); - EXPECT_DOUBLE_EQ(s3.grad, 3.0); - EXPECT_DOUBLE_EQ(s4.grad, -10); - EXPECT_DOUBLE_EQ(s5.grad, -5); +TEST(ScalarMultiplication, Test1) { + auto s1 = std::make_shared<Scalar<float>>(1.0); + auto s2 = std::make_shared<Scalar<float>>(2.0); + auto s3 = s1 * s2; + + EXPECT_FLOAT_EQ(s3->value, 2.0); + + s3->backward(); + EXPECT_FLOAT_EQ(s1->grad, 2.0); + EXPECT_FLOAT_EQ(s2->grad, 1.0); } +TEST(ScalarDivision, Test1) { + auto s1 = std::make_shared<Scalar<float>>(1.0); + auto s2 = std::make_shared<Scalar<float>>(2.0); + auto s3 = s1 / s2; + + EXPECT_FLOAT_EQ(s3->value, 0.5); + + s3->backward(); + EXPECT_FLOAT_EQ(s1->grad, 0.5); + EXPECT_FLOAT_EQ(s2->grad, -0.25); +} + +// // TEST(ScalarDivision, Test2) { +// // Scalar<double> s1(1.0); +// // Scalar<double>& s2 = 2 / s1; +// // EXPECT_DOUBLE_EQ(s2.value, 2.0); + +// // s2.backward(); +// // EXPECT_DOUBLE_EQ(s1.grad, -2.0); +// // } + +TEST(ScalarMixedOperationsTest, Test1) { + auto s1 = std::make_shared<Scalar<float>>(1.0); + auto s2 = std::make_shared<Scalar<float>>(2.0); + auto s3 = std::make_shared<Scalar<float>>(2.0); + auto s4 = std::make_shared<Scalar<float>>(5.0); + auto s5 = std::make_shared<Scalar<float>>(10.0); + + auto s6 = (s1 + s2) * s3 - s4 * s5; + + s6->backward(); + EXPECT_FLOAT_EQ(s1->grad, 2.0); + EXPECT_FLOAT_EQ(s2->grad, 2.0); + EXPECT_FLOAT_EQ(s3->grad, 3.0); + EXPECT_FLOAT_EQ(s4->grad, -10.0); + EXPECT_FLOAT_EQ(s5->grad, -5.0); +} + +// TEST(ScalarMixedOperationsTest, Test2) { +// auto s1 = std::make_shared<Scalar<float>>(1.0); +// auto s2 = std::make_shared<Scalar<float>>(2.0); +// auto s3 = std::make_shared<Scalar<float>>(2.0); +// auto s4 = std::make_shared<Scalar<float>>(5.0); +// auto s5 = std::make_shared<Scalar<float>>(10.0); + +// auto s6 = (s1 + s2) * (s3 - s4) / s5; + +// s6->backward(); +// EXPECT_FLOAT_EQ(s1->grad, 0.1); +// EXPECT_FLOAT_EQ(s2->grad, 0.1); +// EXPECT_FLOAT_EQ(s3->grad, 0.1); +// EXPECT_FLOAT_EQ(s4->grad, -0.02); +// EXPECT_FLOAT_EQ(s5->grad, -0.02); +// } + int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); int ret = RUN_ALL_TESTS();