-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtrain.c
127 lines (101 loc) · 3.43 KB
/
train.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "micrograd.c/nn.h"
#include "micrograd.c/engine.h"
#define N_SAMPLES 100
#define HIDDEN_SIZE 16
#define OUTPUT_SIZE 1
void load_data(const char* filename, double X[N_SAMPLES][2], int y[N_SAMPLES]) {
FILE* file = fopen(filename, "r");
if (file == NULL) {
printf("Error opening file: %s\n", filename);
exit(1);
}
char line[1024];
fgets(line, sizeof(line), file); // Skip header
for (int i = 0; i < N_SAMPLES; i++) {
if (fscanf(file, "%lf,%lf,%d", &X[i][0], &X[i][1], &y[i]) != 3) {
printf("Error reading line %d\n", i + 1);
exit(1);
}
}
fclose(file);
}
double loss(MLP* model, double X[N_SAMPLES][2], int y[N_SAMPLES], double* accuracy) {
double total_loss = 0.0;
int correct = 0;
model->base.zero_grad((Module*)model);
for (int i = 0; i < N_SAMPLES; i++) {
// Forward pass
Value* inputs[2];
for (int j = 0; j < 2; j++) {
inputs[j] = value_new(X[i][j], NULL, 0, "");
}
Value* output = mlp_call(model, inputs);
// Compute margin loss
Value* target = value_new(y[i] * 2.0 - 1.0, NULL, 0, ""); // Convert 0/1 to -1/+1
Value* margin_loss = value_relu(value_add(value_neg(value_mul(target, output)), value_new(1.0, NULL, 0, "")));
total_loss += margin_loss->data;
// Accumulate gradients
backward(margin_loss);
// Compute accuracy
if ((y[i] == 1 && output->data > 0) || (y[i] == 0 && output->data <= 0)) {
correct++;
}
// Free memory
for (int j = 0; j < 2; j++) {
value_free(inputs[j]);
}
value_free(output);
value_free(target);
value_free(margin_loss);
}
// Compute regularization loss and add to total loss
double reg_loss = 0.0;
double alpha = 1e-4;
Value** params = model->base.parameters((Module*)model);
int param_count = model->base.parameters_count((Module*)model);
for (int p = 0; p < param_count; p++) {
reg_loss += params[p]->data * params[p]->data;
}
reg_loss *= alpha;
total_loss += reg_loss;
// Average loss over samples
total_loss /= N_SAMPLES;
// Compute accuracy
*accuracy = (double)correct / N_SAMPLES * 100.0;
// Free parameter array
free(params);
return total_loss;
}
int main(void) {
srand(time(NULL));
// load data
double X[N_SAMPLES][2];
int y[N_SAMPLES];
load_data("data/make_moons.csv", X, y);
// Initialize model
int layer_sizes[] = {2, HIDDEN_SIZE, HIDDEN_SIZE, OUTPUT_SIZE};
MLP* model = mlp_new(2, &layer_sizes[1], 3);
// Training loop
for (int epoch = 0; epoch < 100; epoch++) {
double accuracy = 0.0;
double avg_loss = loss(model, X, y, &accuracy);
// Update weights
double learning_rate = 0.01;
Value** params = model->base.parameters((Module*)model);
int param_count = model->base.parameters_count((Module*)model);
for (int i = 0; i < param_count; i++) {
params[i]->data -= learning_rate * params[i]->grad;
}
free(params);
// Print progress
printf("step %d loss %f, accuracy %f%%\n", epoch, avg_loss, accuracy);
// Zero out gradients for next epoch
model->base.zero_grad((Module*)model);
}
// Free model
mlp_free(model);
return 0;
}