diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2836d0b..4ba4ea2 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -14,4 +14,5 @@ endif()
 set(CMAKE_CXX_STANDARD 17)
 add_subdirectory(tests)
 
-add_executable(and_example examples/and.cpp)
\ No newline at end of file
+add_executable(and_example apps/and.cpp)
+add_executable(linear apps/linear.cpp)
\ No newline at end of file
diff --git a/README.md b/README.md
index dcacf0f..930289b 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,16 @@ Then include the header file in your source file.
 #include "backprop.h"
 ```
 
+## Binary Classification Example
+The following example demonstrates how to train a model to classify a binary dataset.
+```bash
+mkdir build
+cd build
+cmake ..
+make
+./and_example
+```
+
 ## Tests
 ```bash
 mkdir build
diff --git a/examples/and.cpp b/apps/and.cpp
similarity index 63%
rename from examples/and.cpp
rename to apps/and.cpp
index ef763e2..e9d2033 100644
--- a/examples/and.cpp
+++ b/apps/and.cpp
@@ -10,33 +10,31 @@ int main() {
     std::shared_ptr<Scalar<double>> b = Scalar<double>::make(0);
 
     int num_epochs = 10000, num_samples = 4;
-    float learning_rate = 0.0001;
+    float learning_rate = 0.001;
     auto& graph = ComputationalGraph<double>::get_instance();
 
+    std::shared_ptr<Scalar<double>> X[4][2];
+    X[0][0] = Scalar<double>::make(0);
+    X[0][1] = Scalar<double>::make(0);
+    X[1][0] = Scalar<double>::make(0);
+    X[1][1] = Scalar<double>::make(1);
+    X[2][0] = Scalar<double>::make(1);
+    X[2][1] = Scalar<double>::make(0);
+    X[3][0] = Scalar<double>::make(1);
+    X[3][1] = Scalar<double>::make(1);
+
+    // labels shared pointer
+    std::shared_ptr<Scalar<double>> Y[4];
+    Y[0] = Scalar<double>::make(0);
+    Y[1] = Scalar<double>::make(0);
+    Y[2] = Scalar<double>::make(0);
+    Y[3] = Scalar<double>::make(1);
+
     for (int i = 0; i < num_epochs; i++) {
         graph.clear();
-
-        std::shared_ptr<Scalar<double>> X[4][2];
-        X[0][0] = Scalar<double>::make(0);
-        X[0][1] = Scalar<double>::make(0);
-        X[1][0] = Scalar<double>::make(0);
-        X[1][1] = Scalar<double>::make(1);
-        X[2][0] = Scalar<double>::make(1);
-        X[2][1] = Scalar<double>::make(0);
-        X[3][0] = Scalar<double>::make(1);
-        X[3][1] = Scalar<double>::make(1);
-
-        // labels shared pointer
-        std::shared_ptr<Scalar<double>> Y[4];
-        Y[0] = Scalar<double>::make(0);
-        Y[1] = Scalar<double>::make(0);
-        Y[2] = Scalar<double>::make(0);
-        Y[3] = Scalar<double>::make(1);
-
         
         std::shared_ptr<Scalar<double>> loss = std::make_shared<Scalar<double>>(0);
         for (int j = 0; j < num_samples; j++) {
-            // forward
             auto z = w1 * X[j][0] + w2 * X[j][1] + b;
             auto a = sigmoid(z);
             loss = cross_entropy(Y[j], a) + loss;
diff --git a/functions.h b/functions.h
index 17b4ed3..dda0bb8 100644
--- a/functions.h
+++ b/functions.h
@@ -19,10 +19,19 @@ std::shared_ptr<Scalar<T>> sigmoid(std::shared_ptr<Scalar<T>> x) {
     auto negative_x = -x;
     auto exp_negative_x = exp(negative_x);    
     auto numerator = Scalar<T>::make(1);
-    
     auto denominator_one = Scalar<T>::make(1);
     auto denominator = denominator_one + exp_negative_x;
     auto result = numerator / denominator;
 
+    return result;
+}
+
+// mse loss function leveraging shared
+template <typename T>
+std::shared_ptr<Scalar<T>> mse(std::shared_ptr<Scalar<T>> y, std::shared_ptr<Scalar<T>> y_hat) {
+    auto diff = y - y_hat;
+    auto diff_squared = square(diff);
+    auto two = Scalar<T>::make(2);
+    auto result = diff_squared / two;
     return result;
 }
\ No newline at end of file
diff --git a/scalar.h b/scalar.h
index 4c8ccc5..61dcf63 100644
--- a/scalar.h
+++ b/scalar.h
@@ -60,7 +60,6 @@ class Scalar: public std::enable_shared_from_this<Scalar<T>> {
     static std::shared_ptr<Scalar<T>> make(T value) {
         auto s = std::make_shared<Scalar<T>>(value);
         ComputationalGraph<T>::get_instance().add_node(s);
-        
         return s;
     }
 
@@ -82,6 +81,7 @@ class Scalar: public std::enable_shared_from_this<Scalar<T>> {
         auto result = Scalar<T>::make(
             lhs->value + rhs->value
         );
+        
         result->children.insert(lhs);
         result->children.insert(rhs);
 
@@ -189,4 +189,26 @@ class Scalar: public std::enable_shared_from_this<Scalar<T>> {
 
         return result;
     }
+
+    // square operator
+    friend std::shared_ptr<Scalar<T>> square(std::shared_ptr<Scalar<T>> rhs) {
+        auto result = Scalar<T>::make(rhs->value * rhs->value);
+
+        result->children.insert(rhs);
+        rhs->in_degrees++;
+
+        result->_backward = [rhs, result]() {
+            rhs->grad += 2.0 * rhs->value * result->grad;
+        };
+
+        return result;
+    }
+
+
+    // dont allow inplace operations
+    Scalar<T>& operator+=(const Scalar<T>& rhs) = delete;
+    Scalar<T>& operator-=(const Scalar<T>& rhs) = delete;
+    Scalar<T>& operator*=(const Scalar<T>& rhs) = delete;
+    Scalar<T>& operator/=(const Scalar<T>& rhs) = delete;
+
 };
\ No newline at end of file
diff --git a/tests/test_functions.cpp b/tests/test_functions.cpp
index 6cf8474..a8db0b6 100644
--- a/tests/test_functions.cpp
+++ b/tests/test_functions.cpp
@@ -20,4 +20,15 @@ TEST(CrossEntropy, Test1) {
 
     EXPECT_FLOAT_EQ(s3->value, 0.6931472);
     EXPECT_FLOAT_EQ(s2->grad, -2.0);
+}
+
+TEST(MSE, Test1) {
+    auto y = Scalar<float>::make(1);
+    auto y_hat = Scalar<float>::make(2);
+
+    auto loss = mse(y, y_hat);
+    loss->backward();
+
+    EXPECT_FLOAT_EQ(loss->value, 0.5);
+    EXPECT_FLOAT_EQ(y_hat->grad, 1.0);
 }
\ No newline at end of file
diff --git a/tests/test_main.cpp b/tests/test_main.cpp
index a348436..1829e4c 100644
--- a/tests/test_main.cpp
+++ b/tests/test_main.cpp
@@ -39,6 +39,18 @@ TEST(ScalarMultiplication, Test1) {
     EXPECT_FLOAT_EQ(s2->grad, 1.0);
 }
 
+TEST(ScalarMultiplication, Test2) {
+    // multiply by itself
+    auto s1 = Scalar<float>::make(2.0);
+
+    auto s2 = s1 * s1;
+
+    EXPECT_FLOAT_EQ(s2->value, 4.0);
+
+    s2->backward();
+    EXPECT_FLOAT_EQ(s1->grad, 4.0);
+}
+
 TEST(ScalarDivision, Test1) {
     auto s1 = Scalar<float>::make(1.0);
     auto s2 = Scalar<float>::make(2.0);
@@ -243,6 +255,23 @@ TEST(ScalarLog, Test3) {
     EXPECT_FLOAT_EQ(s1->grad, 0.1);
 }
 
+TEST(MSEExample, Test1) {
+    auto y = Scalar<float>::make(1);
+    auto y_hat = Scalar<float>::make(2);
+
+    auto diff = y - y_hat;
+    auto diff_squared = square(diff);
+    auto two = Scalar<float>::make(2);
+
+    auto loss = diff_squared / two;
+
+    loss->backward();
+
+    // yhat grad
+    EXPECT_FLOAT_EQ(y_hat->grad, 1.0);
+
+}
+
 int main(int argc, char **argv) {
     ::testing::InitGoogleTest(&argc, argv);
     int ret = RUN_ALL_TESTS();