Skip to content
Snippets Groups Projects
Commit 317e27d2 authored by Boris Stefanovic's avatar Boris Stefanovic
Browse files

ADD: check random centers are not already in set

parent a7dcebdd
Branches
Tags
No related merge requests found
...@@ -45,8 +45,8 @@ void cluster_add_point_fpt(cluster_fpt_t* cluster, vector_fpt_t* point) { ...@@ -45,8 +45,8 @@ void cluster_add_point_fpt(cluster_fpt_t* cluster, vector_fpt_t* point) {
void cluster_update_center_int(cluster_int_t* cluster) { void cluster_update_center_int(cluster_int_t* cluster) {
vector_destroy_int(cluster->center); vector_destroy_int(cluster->center);
cluster->center = vector_create_int(cluster->points->head->point->dim);
list_points_node_int_t* node = cluster->points->head; list_points_node_int_t* node = cluster->points->head;
cluster->center = vector_create_int(node->point->dim);
while (node != NULL) { while (node != NULL) {
vector_add_inplace_int(cluster->center, *(node->point)); vector_add_inplace_int(cluster->center, *(node->point));
node = node->next; node = node->next;
...@@ -56,8 +56,8 @@ void cluster_update_center_int(cluster_int_t* cluster) { ...@@ -56,8 +56,8 @@ void cluster_update_center_int(cluster_int_t* cluster) {
void cluster_update_center_fpt(cluster_fpt_t* cluster) { void cluster_update_center_fpt(cluster_fpt_t* cluster) {
vector_destroy_fpt(cluster->center); vector_destroy_fpt(cluster->center);
cluster->center = vector_create_fpt(cluster->points->head->point->dim);
list_points_node_fpt_t* node = cluster->points->head; list_points_node_fpt_t* node = cluster->points->head;
cluster->center = vector_create_fpt(node->point->dim);
while (node != NULL) { while (node != NULL) {
vector_add_inplace_fpt(cluster->center, *(node->point)); vector_add_inplace_fpt(cluster->center, *(node->point));
node = node->next; node = node->next;
......
...@@ -2,13 +2,14 @@ ...@@ -2,13 +2,14 @@
cluster_int_t** kmeans_init_clusters_int(const vector_int_t** points, const size_t point_count, const size_t nclusters) { cluster_int_t** kmeans_init_clusters_int(const vector_int_t** points, const size_t point_count, const size_t nclusters) {
// check args and init
if (NULL == points || point_count < 2 || nclusters < 2) return NULL; if (NULL == points || point_count < 2 || nclusters < 2) return NULL;
cluster_int_t** clusters = calloc(nclusters, sizeof(vector_int_t*)); cluster_int_t** clusters = calloc(nclusters, sizeof(vector_int_t*));
if (NULL == clusters) return NULL; if (NULL == clusters) return NULL;
// determine range in which we are working // determine range in which we are working
vector_int_t* min = vector_copy_int(points[0]); vector_int_t* min = vector_copy_int(points[0]);
vector_int_t* max = vector_copy_int(points[0]); vector_int_t* max = vector_copy_int(points[0]);
for (size_t i = 0; i < point_count; ++i) { for (size_t i = 1; i < point_count; ++i) {
for (size_t p = 0; p < max->dim; ++p) { for (size_t p = 0; p < max->dim; ++p) {
const int_t value = points[i]->data[p]; const int_t value = points[i]->data[p];
if (value < min->data[p]) min->data[p] = value; if (value < min->data[p]) min->data[p] = value;
...@@ -18,21 +19,89 @@ cluster_int_t** kmeans_init_clusters_int(const vector_int_t** points, const size ...@@ -18,21 +19,89 @@ cluster_int_t** kmeans_init_clusters_int(const vector_int_t** points, const size
// until we have enough centers // until we have enough centers
for (size_t i = 0; i < nclusters; ++i) { for (size_t i = 0; i < nclusters; ++i) {
vector_int_t* center = vector_create_int(max->dim); vector_int_t* center = vector_create_int(max->dim);
for (size_t p = 0; p < center->dim; ++p) { bool valid = false;
center->data[p] = rand_int_range(min->data[p], max->data[p]); while (!valid) {
// initialise center values randomly, within the "polygon" of our set of points
for (size_t p = 0; p < center->dim; ++p) {
center->data[p] = rand_int_range(min->data[p], max->data[p] + 1);
}
// check center is not already in clusters, although probability is extremely low...
for (size_t k = 0; k < i; ++k) {
vector_int_t* kth_center = clusters[k]->center;
bool neq = false;
for (size_t p = 0; p < center->dim; ++p) {
if (center->data[p] != kth_center->data[p]) {
neq = true;
break;
}
}
if (neq) {
valid = true;
}
}
}
clusters[i]->center = center;
}
return clusters;
}
cluster_fpt_t** kmeans_init_clusters_fpt(const vector_fpt_t** points, const size_t point_count, const size_t nclusters) {
// check args and init
if (NULL == points || point_count < 2 || nclusters < 2) return NULL;
cluster_fpt_t** clusters = calloc(nclusters, sizeof(vector_fpt_t*));
if (NULL == clusters) return NULL;
// determine range in which we are working
vector_fpt_t* min = vector_copy_fpt(points[0]);
vector_fpt_t* max = vector_copy_fpt(points[0]);
for (size_t i = 1; i < point_count; ++i) {
for (size_t p = 0; p < max->dim; ++p) {
const fpt_t value = points[i]->data[p];
if (value < min->data[p]) min->data[p] = value;
if (value > max->data[p]) max->data[p] = value;
}
}
// until we have enough centers
for (size_t i = 0; i < nclusters; ++i) {
vector_fpt_t* center = vector_create_fpt(max->dim);
bool valid = false;
while (!valid) {
// initialise center values randomly, within the "polygon" of our set of points
for (size_t p = 0; p < center->dim; ++p) {
center->data[p] = rand_double_range(min->data[p], max->data[p]);
}
// check center is not already in clusters, although probability is extremely low...
for (size_t k = 0; k < i; ++k) {
vector_fpt_t* kth_center = clusters[k]->center;
bool neq = false;
for (size_t p = 0; p < center->dim; ++p) {
if (center->data[p] != kth_center->data[p]) {
neq = true;
break;
}
}
if (neq) {
valid = true;
}
}
} }
// TODO: maybe check center is not already in clusters, although probability is extremely low...
clusters[i]->center = center; clusters[i]->center = center;
} }
return clusters; return clusters;
} }
void kmeans_int( void kmeans_int(vector_int_t** points, const size_t point_count, cluster_int_t** clusters, const size_t nb_clusters, fpt_t (* distance_function)(const vector_int_t*, const vector_int_t*)) {
vector_int_t** points, const size_t point_count,
cluster_int_t** clusters, const size_t nb_clusters,
fpt_t (* distance_function)(const vector_int_t*, const vector_int_t*)) {
//TODO //TODO
bool changed = true; bool changed = true;
while (changed) {} while (changed) {
changed = false;
}
}
void kmeans_fpt(vector_fpt_t** points, const size_t point_count, cluster_fpt_t** clusters, const size_t nb_clusters, fpt_t (* distance_function)(const vector_fpt_t*, const vector_fpt_t*)) {
//TODO
bool changed = true;
while (changed) {
changed = false;
}
} }
...@@ -7,10 +7,12 @@ ...@@ -7,10 +7,12 @@
cluster_int_t** kmeans_init_clusters_int(const vector_int_t** points, const size_t point_count, const size_t nclusters); cluster_int_t** kmeans_init_clusters_int(const vector_int_t** points, const size_t point_count, const size_t nclusters);
void kmeans_int( cluster_fpt_t** kmeans_init_clusters_fpt(const vector_fpt_t** points, const size_t point_count, const size_t nclusters);
vector_int_t** points, const size_t point_count,
cluster_int_t** clusters, const size_t nb_clusters,
fpt_t (* distance_function)(const vector_int_t*, const vector_int_t*)); void kmeans_int(vector_int_t** points, const size_t point_count, cluster_int_t** clusters, const size_t nb_clusters, fpt_t (* distance_function)(const vector_int_t*, const vector_int_t*));
void kmeans_fpt(vector_fpt_t** points, const size_t point_count, cluster_fpt_t** clusters, const size_t nb_clusters, fpt_t (* distance_function)(const vector_fpt_t*, const vector_fpt_t*));
#endif //PROG_KMEANS_KMEANS_H #endif //PROG_KMEANS_KMEANS_H
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment