1+ #include <math.h>
12#include <stdio.h>
23#include <float.h>
3-
4+ #include <stdlib.h>
45
56#include "kmeans.h"
6- #include "gaussian_distribution .h"
7+ #include "point .h"
78
89
910/*
@@ -15,31 +16,6 @@ double compute_euclidean_distance(Point p1, Point p2)
1516 return sqrt ( (p1 .x - p2 .x )* (p1 .x - p2 .x ) + (p1 .y - p2 .y )* (p1 .y - p2 .y ) );
1617}
1718
18- /*
19- * Generate a dataset having "number_of_distributions" gaussian clusters
20- */
21- void generate_gaussian_clusters_dataset (Point * point_array , int counts , GaussianDistribution * gaussian_distribution_array , int number_of_distributions )
22- {
23-
24- for (int i = 0 ; i < counts ; i ++ )
25- {
26-
27- int selected_distribution = i %number_of_distributions ;
28-
29- // Draw a sample from the selected distribution
30- // new point class is initialized to -1 as point is unclassified
31- Point p = {0 ,0 ,-1 };
32- p .x = generate_gaussian (gaussian_distribution_array [selected_distribution ].mu_x ,gaussian_distribution_array [selected_distribution ].sigma_x );
33- p .y = generate_gaussian (gaussian_distribution_array [selected_distribution ].mu_y ,gaussian_distribution_array [selected_distribution ].sigma_y );
34-
35- // Save point to array
36- point_array [i ] = p ;
37- }
38-
39- return ;
40-
41- }
42-
4319/*
4420* Initialize centroid position using the specified initialization method
4521*/
@@ -74,7 +50,7 @@ void classify_dataset(Point* dataset, int dataset_size, Point* centroids, int k)
7450 for (int i = 0 ; i < dataset_size ; i ++ )
7551 {
7652 // Assign dataset element to the closest class
77- dataset [i ]._class = classify_point (dataset [i ], centroids , k );
53+ dataset [i ].label = classify_point (dataset [i ], centroids , k );
7854 }
7955
8056 return ;
@@ -91,29 +67,29 @@ void recompute_centroids(Point* dataset, int dataset_size, Point* centroids, int
9167 centroids [i ].x = 0 ;
9268 centroids [i ].y = 0 ;
9369
94- // Here we can use Point '_class ' member as element counter
95- centroids [i ]._class = 0 ;
70+ // Here we can use Point 'label ' member as element counter
71+ centroids [i ].label = 0 ;
9672 }
9773
9874
9975 for (int i = 0 ; i < dataset_size ; i ++ )
10076 {
10177
102- int centroid_id = dataset [i ]._class ;
78+ int centroid_id = dataset [i ].label ;
10379
10480 centroids [centroid_id ].x += dataset [i ].x ;
10581 centroids [centroid_id ].y += dataset [i ].y ;
106- centroids [centroid_id ]._class += 1 ;
82+ centroids [centroid_id ].label += 1 ;
10783 }
10884
10985 // Compute means
11086 for (int i = 0 ; i < k ; i ++ )
11187 {
112- centroids [i ].x /= centroids [i ]._class ;
113- centroids [i ].y /= centroids [i ]._class ;
88+ centroids [i ].x /= centroids [i ].label ;
89+ centroids [i ].y /= centroids [i ].label ;
11490
115- // Assign centroid._class back to its class for consistency (optional)
116- centroids [i ]._class = i ;
91+ // Assign centroid.label back to its class for consistency (optional)
92+ centroids [i ].label = i ;
11793 }
11894
11995 return ;
@@ -124,7 +100,7 @@ void recompute_centroids(Point* dataset, int dataset_size, Point* centroids, int
124100*/
125101int classify_point (Point point , Point * centroids , int k )
126102{
127- int current_element_class ;
103+ int current_elementlabel ;
128104 // Set current dataset element to double maximum allowed value;
129105 double minimum_distance = DBL_MAX ;
130106
@@ -136,10 +112,10 @@ int classify_point(Point point, Point* centroids, int k)
136112 if (current_distance < minimum_distance )
137113 {
138114 minimum_distance = current_distance ;
139- current_element_class = j ;
115+ current_elementlabel = j ;
140116 }
141117
142118 }
143119
144- return current_element_class ;
120+ return current_elementlabel ;
145121}
0 commit comments