view pthreads_main.c @ 0:e69e4c2d612a

Initial pthreads version
author Merten Sach <msach@mailbox.tu-berlin.de>
date Wed, 03 Aug 2011 19:30:34 +0200
parents
children 8e7bdab2840f
line source
1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2 /* File: pthreads_main.c (an OpenMP version) */
3 /* Description: This program shows an example on how to call a subroutine */
4 /* that implements a simple k-means clustering algorithm */
5 /* based on Euclid distance. */
6 /* Input file format: */
7 /* ascii file: each line contains 1 data object */
8 /* binary file: first 4-byte integer is the number of data */
9 /* objects and 2nd integer is the no. of features (or */
10 /* coordinates) of each object */
11 /* */
12 /* Author: Wei-keng Liao */
13 /* ECE Department Northwestern University */
14 /* email: wkliao@ece.northwestern.edu */
15 /* Copyright, 2005, Wei-keng Liao */
16 /* */
17 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h> /* strtok() */
22 #include <sys/types.h> /* open() */
23 #include <sys/stat.h>
24 #include <sys/time.h>
25 #include <fcntl.h>
26 #include <unistd.h> /* getopt() */
27 #include <time.h>
28 #include "kmeans.h"
30 #define seconds(tm) gettimeofday(&tp,(struct timezone *)0);\
31 tm=tp.tv_sec+tp.tv_usec/1000000.0
33 struct timeval tp;
35 int numClusters, numCoords, numObjs, nthreads;
37 /*
38 * Function: usage
39 * ---------------
40 * Prints information on how to call the program.
41 */
42 static void usage(char *argv0) {
43 char *help =
44 "Usage: %s [switches] -i filename -n num_clusters [OPTIONS]\n"
45 " -i filename : file containing data to be clustered\n"
46 " -b : input file is in binary format (default no)\n"
47 " -n num_clusters: number of clusters (K must be > 1)\n"
48 " -p nproc : number of threads (default 1)\n"
49 " -o filename : write output to file\n";
50 fprintf(stderr, help, argv0);
51 exit(-1);
52 }
54 /*---< main() >-------------------------------------------------------------*/
55 int main(int argc, char **argv) {
56 int opt;
57 extern char *optarg;
58 extern int optind;
59 int i, j;
60 int isBinaryFile;
62 int *membership; /* [numObjs] */
63 char *filename, *outfile;
64 double **objects; /* [numObjs][numCoords] data objects */
65 double **clusters; /* [numClusters][numCoords] cluster center */
66 double threshold;
67 double timing, io_timing, clustering_timing;
69 /* some default values */
70 nthreads = 1; /* Amount of threads to use */
71 numClusters = 1; /* Amount of cluster centers */
72 threshold = 0.001; /* Percentage of objects that need to change membership for the clusting to continue */
73 isBinaryFile = 0; /* 0 if the input file is in ASCII format, 1 for binary format */
74 filename = NULL; /* Name of the input file */
75 outfile = NULL; /* Name of the output file */
77 /* Parse command line options */
78 while ( (opt=getopt(argc,argv,"o:p:i:n:t:bh"))!= EOF) {
79 switch (opt) {
80 case 'i': filename=optarg;
81 break;
82 case 'b': isBinaryFile = 1;
83 break;
84 case 'n': numClusters = atoi(optarg);
85 break;
86 case 'p': nthreads = atoi(optarg);
87 break;
88 case 'h': usage(argv[0]);
89 break;
90 case 'o': outfile=optarg;
91 break;
92 default: usage(argv[0]);
93 break;
94 }
95 }
97 if (filename == NULL) usage(argv[0]);
99 seconds(io_timing);
101 /* Read input data points from given input file */
102 objects = file_read(isBinaryFile, filename, &numObjs, &numCoords);
103 assert(objects != NULL);
105 seconds(timing);
106 io_timing = timing - io_timing;
107 clustering_timing = timing;
109 membership = (int*) malloc(numObjs * sizeof(int));
110 assert(membership != NULL);
112 /* Launch the core computation algorithm */
113 clusters = pthreads_kmeans(0, objects, numCoords, numObjs,
114 numClusters, threshold, membership);
116 free(objects[0]);
117 free(objects);
119 seconds(timing);
120 clustering_timing = timing - clustering_timing;
122 /* Memory cleanup */
123 free(membership);
125 if(outfile != NULL) {
126 int l;
127 FILE* fp = fopen(outfile, "w");
128 for(j = 0; j < numClusters; j++) {
129 fprintf(fp, "Cluster %d: ", j);
130 for(l = 0; l < numCoords; l++)
131 fprintf(fp, "%f ", clusters[j][l]);
132 fprintf(fp, "\n");
133 }
134 fclose(fp);
135 }
137 free(clusters[0]);
138 free(clusters);
140 /* Print performance numbers on stdout */
141 double t1;
142 io_timing += seconds(t1) - timing;
144 printf("\n---- kMeans Clustering ----\n");
145 printf("Number of threads = %d\n", nthreads);
146 printf("Input file: %s\n", filename);
147 printf("numObjs = %d\n", numObjs);
148 printf("numCoords = %d\n", numCoords);
149 printf("numClusters = %d\n", numClusters);
150 printf("threshold = %.4f\n", threshold);
152 printf("I/O time = %10.4f sec\n", io_timing);
153 printf("Computation timing = %10.4f sec\n", clustering_timing);
155 return(0);
156 }