Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
prog_kmeans
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
This is an archived project. Repository and other project resources are read-only.
Show more breadcrumbs
boris.stefanov
prog_kmeans
Commits
13fc6a10
Commit
13fc6a10
authored
2 years ago
by
Boris Stefanovic
Browse files
Options
Downloads
Patches
Plain Diff
cleanup
parent
ab4e705b
Branches
Branches containing commit
No related tags found
No related merge requests found
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
Makefile
+3
-0
3 additions, 0 deletions
Makefile
src/cluster.c
+31
-2
31 additions, 2 deletions
src/cluster.c
src/io.c
+11
-6
11 additions, 6 deletions
src/io.c
src/kmeans.c
+90
-6
90 additions, 6 deletions
src/kmeans.c
src/kmeans.h
+5
-0
5 additions, 0 deletions
src/kmeans.h
src/main.c
+71
-36
71 additions, 36 deletions
src/main.c
with
211 additions
and
50 deletions
Makefile
+
3
−
0
View file @
13fc6a10
...
@@ -55,5 +55,8 @@ clean:
...
@@ -55,5 +55,8 @@ clean:
debug
:
${DEBUG_TARGET}
debug
:
${DEBUG_TARGET}
./
$<
-i
test
/data.txt
./
$<
-i
test
/data.txt
test
:
${TARGET}
./
$<
-i
test
/data.txt
-o
~/test_kmeans
exec
:
${TARGET}
exec
:
${TARGET}
./
$<
./
$<
This diff is collapsed.
Click to expand it.
src/cluster.c
+
31
−
2
View file @
13fc6a10
...
@@ -52,8 +52,37 @@ void cluster_add_point_fpt(cluster_fpt_t* cluster, vector_fpt_t* point) {
...
@@ -52,8 +52,37 @@ void cluster_add_point_fpt(cluster_fpt_t* cluster, vector_fpt_t* point) {
bool
cluster_update_center_int
(
cluster_int_t
*
cluster
)
{
bool
cluster_update_center_int
(
cluster_int_t
*
cluster
)
{
//TODO
// save old center
return
true
;
vector_int_t
*
old_center
=
cluster
->
center
;
assert
(
old_center
!=
NULL
);
assert
(
cluster
!=
NULL
);
// create new center
list_points_node_int_t
*
node
=
cluster
->
points
->
head
;
// if cluster is empty
if
(
NULL
==
node
)
{
return
false
;
// center has not been changed
}
else
{
cluster
->
center
=
vector_create_int
(
node
->
point
->
dim
);
// sum all values in center
while
(
node
!=
NULL
)
{
vector_add_inplace_int
(
cluster
->
center
,
*
(
node
->
point
));
node
=
node
->
next
;
}
// divide by number of points
vector_div_inplace_int
(
cluster
->
center
,
(
int_t
)
cluster
->
points
->
size
);
// check whether center has changed
bool
changed
=
false
;
for
(
size_t
p
=
0
;
p
<
cluster
->
center
->
dim
;
++
p
)
{
if
(
cluster
->
center
->
data
[
p
]
==
old_center
->
data
[
p
])
{
changed
=
true
;
break
;
}
}
// destroy old center
vector_destroy_int
(
old_center
);
// return true if center has changed
return
changed
;
}
}
}
bool
cluster_update_center_fpt
(
cluster_fpt_t
*
cluster
)
{
bool
cluster_update_center_fpt
(
cluster_fpt_t
*
cluster
)
{
...
...
This diff is collapsed.
Click to expand it.
src/io.c
+
11
−
6
View file @
13fc6a10
#define _GNU_SOURCE
#define _GNU_SOURCE
#include
"io.h"
#include
"io.h"
#include
<assert.h>
#include
<stdio.h>
#include
<stdio.h>
#include
<stdlib.h>
#include
<stdlib.h>
#include
<string.h>
#include
<string.h>
...
@@ -9,17 +10,21 @@
...
@@ -9,17 +10,21 @@
int_t
io_read_int
(
FILE
*
file
)
{
int_t
io_read_int
(
FILE
*
file
)
{
char
*
line
;
char
*
line
=
NULL
;
size_t
len
;
size_t
len
=
0
;
getline
(
&
line
,
&
len
,
file
);
getline
(
&
line
,
&
len
,
file
);
return
strtol
(
line
,
NULL
,
10
);
long
res
=
strtol
(
line
,
NULL
,
10
);
free
(
line
);
return
res
;
}
}
fpt_t
io_read_fpt
(
FILE
*
file
)
{
fpt_t
io_read_fpt
(
FILE
*
file
)
{
char
*
line
;
char
*
line
=
NULL
;
size_t
len
;
size_t
len
=
0
;
getline
(
&
line
,
&
len
,
file
);
getline
(
&
line
,
&
len
,
file
);
return
strtod
(
line
,
NULL
);
double
res
=
strtod
(
line
,
NULL
);
free
(
line
);
return
res
;
}
}
...
...
This diff is collapsed.
Click to expand it.
src/kmeans.c
+
90
−
6
View file @
13fc6a10
...
@@ -3,11 +3,49 @@
...
@@ -3,11 +3,49 @@
#include
"vector.h"
#include
"vector.h"
cluster_int_t
**
kmeans_init_clusters_int
(
const
vector_int_t
**
points
,
const
size_t
point_count
,
const
size_t
nclusters
)
{
bool
is_vector_in_centers_int
(
const
vector_int_t
*
center
,
const
cluster_int_t
**
clusters
,
const
size_t
i
)
{
//TODO
for
(
size_t
k
=
0
;
k
<
i
;
++
k
)
{
return
NULL
;
if
(
vector_equals_int
(
clusters
[
k
]
->
center
,
center
))
{
return
true
;
}
}
return
false
;
}
}
cluster_int_t
**
kmeans_init_clusters_int
(
const
vector_int_t
**
points
,
const
size_t
point_count
,
const
size_t
nclusters
)
{
// check args and init
if
(
NULL
==
points
||
point_count
<
2
||
nclusters
<
2
)
return
NULL
;
cluster_int_t
**
clusters
=
calloc
(
nclusters
,
sizeof
(
vector_int_t
*
));
if
(
NULL
==
clusters
)
return
NULL
;
for
(
size_t
k
=
0
;
k
<
nclusters
;
++
k
)
{
clusters
[
k
]
=
cluster_create_int
(
NULL
);
}
// determine range in which we are working
vector_int_t
*
min
=
vector_copy_int
(
points
[
0
]);
vector_int_t
*
max
=
vector_copy_int
(
points
[
0
]);
for
(
size_t
i
=
1
;
i
<
point_count
;
++
i
)
{
for
(
size_t
p
=
0
;
p
<
max
->
dim
;
++
p
)
{
const
int_t
value
=
points
[
i
]
->
data
[
p
];
if
(
value
<
min
->
data
[
p
])
min
->
data
[
p
]
=
value
;
if
(
value
>
max
->
data
[
p
])
max
->
data
[
p
]
=
value
;
}
}
// until we have enough centers
for
(
size_t
i
=
0
;
i
<
nclusters
;
++
i
)
{
vector_int_t
*
center
=
vector_create_int
(
max
->
dim
);
bool
valid
=
false
;
while
(
!
valid
)
{
// initialise center values randomly, within the "multidimensional rectangle" of our set of points
for
(
size_t
p
=
0
;
p
<
center
->
dim
;
++
p
)
{
center
->
data
[
p
]
=
rand_int_range
(
min
->
data
[
p
],
max
->
data
[
p
]);
// TODO: create a rand_long_range(...) function
}
// check center is not already in clusters, although probability is extremely low...
valid
=
!
is_vector_in_centers_int
(
center
,
(
const
cluster_int_t
**
)
clusters
,
i
);
}
clusters
[
i
]
=
cluster_create_int
(
center
);
}
return
clusters
;
}
bool
is_vector_in_centers_fpt
(
const
vector_fpt_t
*
center
,
const
cluster_fpt_t
**
clusters
,
const
size_t
i
)
{
bool
is_vector_in_centers_fpt
(
const
vector_fpt_t
*
center
,
const
cluster_fpt_t
**
clusters
,
const
size_t
i
)
{
for
(
size_t
k
=
0
;
k
<
i
;
++
k
)
{
for
(
size_t
k
=
0
;
k
<
i
;
++
k
)
{
...
@@ -18,7 +56,6 @@ bool is_vector_in_centers_fpt(const vector_fpt_t* center, const cluster_fpt_t**
...
@@ -18,7 +56,6 @@ bool is_vector_in_centers_fpt(const vector_fpt_t* center, const cluster_fpt_t**
return
false
;
return
false
;
}
}
cluster_fpt_t
**
kmeans_init_clusters_fpt
(
const
vector_fpt_t
**
points
,
const
size_t
point_count
,
const
size_t
nclusters
)
{
cluster_fpt_t
**
kmeans_init_clusters_fpt
(
const
vector_fpt_t
**
points
,
const
size_t
point_count
,
const
size_t
nclusters
)
{
// check args and init
// check args and init
if
(
NULL
==
points
||
point_count
<
2
||
nclusters
<
2
)
return
NULL
;
if
(
NULL
==
points
||
point_count
<
2
||
nclusters
<
2
)
return
NULL
;
...
@@ -55,9 +92,57 @@ cluster_fpt_t** kmeans_init_clusters_fpt(const vector_fpt_t** points, const size
...
@@ -55,9 +92,57 @@ cluster_fpt_t** kmeans_init_clusters_fpt(const vector_fpt_t** points, const size
}
}
void
kmeans_destroy_clusters_int
(
cluster_int_t
**
clusters
,
const
size_t
nb_clusters
)
{
if
(
NULL
==
clusters
)
return
;
for
(
size_t
i
=
0
;
i
<
nb_clusters
;
++
i
)
{
cluster_destroy_int
(
clusters
[
i
]);
}
}
void
kmeans_destroy_clusters_fpt
(
cluster_fpt_t
**
clusters
,
const
size_t
nb_clusters
)
{
if
(
NULL
==
clusters
)
return
;
for
(
size_t
i
=
0
;
i
<
nb_clusters
;
++
i
)
{
cluster_destroy_fpt
(
clusters
[
i
]);
}
}
void
kmeans_int
(
vector_int_t
**
points
,
const
size_t
point_count
,
cluster_int_t
**
clusters
,
const
size_t
nb_clusters
,
void
kmeans_int
(
vector_int_t
**
points
,
const
size_t
point_count
,
cluster_int_t
**
clusters
,
const
size_t
nb_clusters
,
fpt_t
(
*
distance_function
)(
const
vector_int_t
*
,
const
vector_int_t
*
))
{
fpt_t
(
*
distance_function
)(
const
vector_int_t
*
,
const
vector_int_t
*
))
{
//TODO
bool
changed
=
true
;
while
(
changed
)
{
// reset condition
changed
=
false
;
// empty all clusters, keeping only their centers (virtual)
for
(
size_t
k
=
0
;
k
<
nb_clusters
;
++
k
)
{
cluster_reset_int
(
clusters
[
k
]);
}
// for each point
for
(
size_t
i
=
0
;
i
<
point_count
;
++
i
)
{
vector_int_t
*
point
=
points
[
i
];
// find closest cluster
cluster_int_t
*
cmin
=
clusters
[
0
];
fpt_t
dmin
=
distance_function
(
point
,
cmin
->
center
);
for
(
size_t
k
=
0
;
k
<
nb_clusters
;
++
k
)
{
cluster_int_t
*
current_cluster
=
clusters
[
k
];
fpt_t
dist
=
distance_function
(
point
,
current_cluster
->
center
);
if
(
dist
<
dmin
)
{
cmin
=
current_cluster
;
dmin
=
dist
;
}
}
// add point to closest cluster
cluster_add_point_int
(
cmin
,
point
);
}
// update all cluster centers
for
(
size_t
k
=
0
;
k
<
nb_clusters
;
++
k
)
{
assert
(
clusters
[
k
]
!=
NULL
);
assert
(
clusters
[
k
]
->
points
!=
NULL
);
if
(
cluster_update_center_int
(
clusters
[
k
]))
{
changed
=
true
;
}
}
}
}
}
...
@@ -94,7 +179,6 @@ void kmeans_fpt(vector_fpt_t** points, const size_t point_count, cluster_fpt_t**
...
@@ -94,7 +179,6 @@ void kmeans_fpt(vector_fpt_t** points, const size_t point_count, cluster_fpt_t**
assert
(
clusters
[
k
]
->
points
!=
NULL
);
assert
(
clusters
[
k
]
->
points
!=
NULL
);
if
(
cluster_update_center_fpt
(
clusters
[
k
]))
{
if
(
cluster_update_center_fpt
(
clusters
[
k
]))
{
changed
=
true
;
changed
=
true
;
printf
(
"%lud
\n
<%lf %lf %lf>
\n\n
"
,
nb_clusters
,
clusters
[
k
]
->
center
->
data
[
0
],
clusters
[
k
]
->
center
->
data
[
1
],
clusters
[
k
]
->
center
->
data
[
2
]);
}
}
}
}
}
}
...
...
This diff is collapsed.
Click to expand it.
src/kmeans.h
+
5
−
0
View file @
13fc6a10
...
@@ -10,6 +10,11 @@ cluster_int_t** kmeans_init_clusters_int(const vector_int_t** points, const size
...
@@ -10,6 +10,11 @@ cluster_int_t** kmeans_init_clusters_int(const vector_int_t** points, const size
cluster_fpt_t
**
kmeans_init_clusters_fpt
(
const
vector_fpt_t
**
points
,
const
size_t
point_count
,
const
size_t
nclusters
);
cluster_fpt_t
**
kmeans_init_clusters_fpt
(
const
vector_fpt_t
**
points
,
const
size_t
point_count
,
const
size_t
nclusters
);
void
kmeans_destroy_clusters_int
(
cluster_int_t
**
clusters
,
const
size_t
nb_clusters
);
void
kmeans_destroy_clusters_fpt
(
cluster_fpt_t
**
clusters
,
const
size_t
nb_clusters
);
void
kmeans_int
(
vector_int_t
**
points
,
const
size_t
point_count
,
cluster_int_t
**
clusters
,
const
size_t
nb_clusters
,
fpt_t
(
*
distance_function
)(
const
vector_int_t
*
,
const
vector_int_t
*
));
void
kmeans_int
(
vector_int_t
**
points
,
const
size_t
point_count
,
cluster_int_t
**
clusters
,
const
size_t
nb_clusters
,
fpt_t
(
*
distance_function
)(
const
vector_int_t
*
,
const
vector_int_t
*
));
void
kmeans_fpt
(
vector_fpt_t
**
points
,
const
size_t
point_count
,
cluster_fpt_t
**
clusters
,
const
size_t
nb_clusters
,
fpt_t
(
*
distance_function
)(
const
vector_fpt_t
*
,
const
vector_fpt_t
*
));
void
kmeans_fpt
(
vector_fpt_t
**
points
,
const
size_t
point_count
,
cluster_fpt_t
**
clusters
,
const
size_t
nb_clusters
,
fpt_t
(
*
distance_function
)(
const
vector_fpt_t
*
,
const
vector_fpt_t
*
));
...
...
This diff is collapsed.
Click to expand it.
src/main.c
+
71
−
36
View file @
13fc6a10
#include
<assert.h>
#include
<getopt.h>
#include
<getopt.h>
#include
<stdbool.h>
#include
<stdbool.h>
#include
<stdio.h>
#include
<stdio.h>
...
@@ -70,45 +71,59 @@ void parse_args(int argc, char** argv, char** ipath, char** opath, enum Distance
...
@@ -70,45 +71,59 @@ void parse_args(int argc, char** argv, char** ipath, char** opath, enum Distance
}
}
int
main_int
(
const
char
*
ipath
,
const
char
*
opath
,
const
enum
DistanceFunctionType
dist_func_type
)
{
int
main_int
(
FILE
*
ifile
,
FILE
*
ofile
,
const
size_t
dim
,
const
size_t
nb_clusters
,
const
enum
DistanceFunctionType
dist_func_type
)
{
//TODO
// INIT
return
EXIT_FAILURE
;
vector_int_t
**
points
=
NULL
;
cluster_int_t
**
clusters
=
NULL
;
// READ
list_points_int_t
*
list
=
io_get_vector_list_int
(
ifile
,
dim
);
const
size_t
point_count
=
list
->
size
;
points
=
list_points_to_array_int
(
list
);
list_points_destroy_int
(
list
,
false
);
list
=
NULL
;
// ALGORITHM
printf
(
"INIT: "
);
clusters
=
kmeans_init_clusters_int
((
const
vector_int_t
**
)
points
,
point_count
,
nb_clusters
);
printf
(
"DONE
\n
"
);
printf
(
"KMEANS: begin
\n
"
);
kmeans_int
(
points
,
point_count
,
clusters
,
nb_clusters
,
DIST_FUNC_INT
[
dist_func_type
]);
printf
(
"KMEANS: DONE !
\n
"
);
// WRITE
fprintf
(
ofile
,
"%lu
\n
%lu
\n
"
,
dim
,
nb_clusters
);
io_write_clusters_to_file_int
(
ofile
,
clusters
,
nb_clusters
);
// CLEANUP
for
(
size_t
i
=
0
;
i
<
nb_clusters
;
++
i
)
cluster_destroy_int
(
clusters
[
i
]);
for
(
size_t
i
=
0
;
i
<
point_count
;
++
i
)
vector_destroy_int
(
points
[
i
]);
// EXIT
return
EXIT_SUCCESS
;
}
}
int
main_fpt
(
const
char
*
ipath
,
const
char
*
opath
,
const
enum
DistanceFunctionType
dist_func_type
)
{
int
main_fpt
(
FILE
*
ifile
,
FILE
*
ofile
,
const
size_t
dim
,
const
size_t
nb_clusters
,
const
enum
DistanceFunctionType
dist_func_type
)
{
// INIT
vector_fpt_t
**
points
=
NULL
;
cluster_fpt_t
**
clusters
=
NULL
;
// READ
// READ
FILE
*
ifile
=
ipath
!=
NULL
?
fopen
(
ipath
,
"r"
)
:
stdin
;
const
size_t
dim
=
io_read_int
(
ifile
);
const
size_t
nb_clusters
=
io_read_int
(
ifile
);
if
(
0
==
dim
)
{
printf
(
"DIMENSION MUST BE STRICTLY POSITIVE !
\n
"
);
fclose
(
ifile
);
return
EXIT_FAILURE
;
}
if
(
0
==
nb_clusters
)
{
printf
(
"NUMBER OF CLUSTERS MUST BE STRICTLY POSITIVE !
\n
"
);
fclose
(
ifile
);
return
EXIT_FAILURE
;
}
list_points_fpt_t
*
list
=
io_get_vector_list_fpt
(
ifile
,
dim
);
list_points_fpt_t
*
list
=
io_get_vector_list_fpt
(
ifile
,
dim
);
fclose
(
ifile
);
ifile
=
NULL
;
const
size_t
point_count
=
list
->
size
;
const
size_t
point_count
=
list
->
size
;
vector_fpt_t
**
points
=
list_points_to_array_fpt
(
list
);
points
=
list_points_to_array_fpt
(
list
);
list_points_destroy_fpt
(
list
,
false
);
list_points_destroy_fpt
(
list
,
false
);
list
=
NULL
;
list
=
NULL
;
// ALGORITHM
// ALGORITHM
printf
(
"INIT:
...
"
);
printf
(
"INIT: "
);
cluster_fpt_t
**
clusters
=
kmeans_init_clusters_fpt
((
const
vector_fpt_t
**
)
points
,
point_count
,
nb_clusters
);
clusters
=
kmeans_init_clusters_fpt
((
const
vector_fpt_t
**
)
points
,
point_count
,
nb_clusters
);
printf
(
"DONE
\n
"
);
printf
(
"DONE
\n
"
);
printf
(
"
STARTING KMEANS ALGORITHM: ...
\n
"
);
printf
(
"
KMEANS: begin
\n
"
);
kmeans_fpt
(
points
,
point_count
,
clusters
,
nb_clusters
,
DIST_FUNC_FPT
[
dist_func_type
]);
kmeans_fpt
(
points
,
point_count
,
clusters
,
nb_clusters
,
DIST_FUNC_FPT
[
dist_func_type
]);
printf
(
"KMEANS DONE !
\n
"
);
printf
(
"KMEANS
:
DONE !
\n
"
);
// WRITE
// WRITE
FILE
*
ofile
=
opath
!=
NULL
?
fopen
(
opath
,
"w"
)
:
stdout
;
fprintf
(
ofile
,
"%lu
\n
%lu
\n
"
,
dim
,
nb_clusters
);
fprintf
(
ofile
,
"%lud
\n
%lud
\n
"
,
dim
,
nb_clusters
);
io_write_clusters_to_file_fpt
(
ofile
,
clusters
,
nb_clusters
);
io_write_clusters_to_file_fpt
(
ofile
,
clusters
,
nb_clusters
);
fclose
(
ofile
);
// CLEANUP
for
(
size_t
i
=
0
;
i
<
nb_clusters
;
++
i
)
cluster_destroy_fpt
(
clusters
[
i
]);
free
(
clusters
);
for
(
size_t
i
=
0
;
i
<
point_count
;
++
i
)
vector_destroy_fpt
(
points
[
i
]);
free
(
points
);
// EXIT
return
EXIT_SUCCESS
;
return
EXIT_SUCCESS
;
}
}
...
@@ -120,14 +135,34 @@ int main(int argc, char** argv) {
...
@@ -120,14 +135,34 @@ int main(int argc, char** argv) {
enum
DataType
datatype
=
FLOAT
;
enum
DataType
datatype
=
FLOAT
;
// parse args
// parse args
parse_args
(
argc
,
argv
,
&
ipath
,
&
opath
,
&
disttype
,
&
datatype
);
parse_args
(
argc
,
argv
,
&
ipath
,
&
opath
,
&
disttype
,
&
datatype
);
switch
(
datatype
)
{
// open files
case
FLOAT
:
FILE
*
ifile
=
ipath
!=
NULL
?
fopen
(
ipath
,
"r"
)
:
stdin
;
printf
(
"FLOAT
\n
"
);
FILE
*
ofile
=
opath
!=
NULL
?
fopen
(
opath
,
"w"
)
:
stdout
;
return
main_fpt
(
ipath
,
opath
,
disttype
);
// read dimension and desired number of clusters from file
case
INT
:
const
size_t
dim
=
io_read_int
(
ifile
);
printf
(
"INT
\n
"
);
const
size_t
nb_clusters
=
io_read_int
(
ifile
);
return
main_int
(
ipath
,
opath
,
disttype
);
if
(
0
==
dim
)
{
default:
printf
(
"DIMENSION MUST BE STRICTLY POSITIVE !
\n
"
);
abort
();
fclose
(
ifile
);
fclose
(
ofile
);
return
EXIT_FAILURE
;
}
}
if
(
0
==
nb_clusters
)
{
printf
(
"NUMBER OF CLUSTERS MUST BE STRICTLY POSITIVE !
\n
"
);
fclose
(
ifile
);
fclose
(
ofile
);
return
EXIT_FAILURE
;
}
// type specific code
int
return_value
=
EXIT_FAILURE
;
int
(
*
main_routine
)(
FILE
*
,
FILE
*
,
const
size_t
,
const
size_t
,
const
enum
DistanceFunctionType
);
main_routine
=
INT
==
datatype
?
main_int
:
main_fpt
;
printf
(
INT
==
datatype
?
"TYPE: INT
\n
"
:
"TYPE: FLOAT
\n
"
);
assert
(
ifile
!=
NULL
);
assert
(
ofile
!=
NULL
);
return_value
=
main_routine
(
ifile
,
ofile
,
dim
,
nb_clusters
,
disttype
);
// cleanup
fclose
(
ifile
);
fclose
(
ofile
);
return
return_value
;
}
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment