Mercurial > cgi-bin > hgwebdir.cgi > PR > Applications > Vthread > Vthread__KMeans__Bench
changeset 0:e69e4c2d612a
Initial pthreads version
| author | Merten Sach <msach@mailbox.tu-berlin.de> |
|---|---|
| date | Wed, 03 Aug 2011 19:30:34 +0200 |
| parents | |
| children | 8e7bdab2840f |
| files | .hgignore Makefile README.txt color edge file_io.c kmeans.h pthread.txt pthreads_kmeans.c pthreads_main.c wtime.c |
| diffstat | 11 files changed, 817 insertions(+), 0 deletions(-) [+] |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/.hgignore Wed Aug 03 19:30:34 2011 +0200 1.3 @@ -0,0 +1,6 @@ 1.4 +syntax: glob 1.5 + 1.6 +nbproject 1.7 +c-ray-mt 1.8 +*.ppm 1.9 +*.o
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 2.2 +++ b/Makefile Wed Aug 03 19:30:34 2011 +0200 2.3 @@ -0,0 +1,43 @@ 2.4 +CC = gcc 2.5 +CFLAGS = -m64 -ffast-math -fwrapv -fno-omit-frame-pointer -O0 -D VPTHREAD -D APPLICATION=KMEANS -g -Wall 2.6 +LDFLAGS = 2.7 + 2.8 +LIBS = -lm -lpthread 2.9 +TARGET = kmeans 2.10 +OBJ = \ 2.11 + VPThread_lib/VMS/Histogram/Histogram.o \ 2.12 + VPThread_lib/VMS/Histogram/FloatHist.o \ 2.13 + VPThread_lib/VMS/CoreLoop.o \ 2.14 + VPThread_lib/VMS/VMS.o \ 2.15 + VPThread_lib/VMS/MasterLoop.o \ 2.16 + VPThread_lib/VMS/Queue_impl/PrivateQueue.o \ 2.17 + VPThread_lib/VMS/Hash_impl/PrivateHash.o \ 2.18 + VPThread_lib/VMS/DynArray/DynArray.o \ 2.19 + VPThread_lib/VPThread_PluginFns.o \ 2.20 + VPThread_lib/VPThread_lib.o \ 2.21 + VPThread_lib/VMS/Histogram/DblHist.o \ 2.22 + VPThread_lib/VPThread.o \ 2.23 + VPThread_lib/VMS/probes.o \ 2.24 + VPThread_lib/VMS/ProcrContext.o \ 2.25 + VPThread_lib/VPThread_Request_Handlers.o \ 2.26 + VPThread_lib/VPThread_helper.o \ 2.27 + VPThread_lib/VMS/Hash_impl/MurmurHash2.o \ 2.28 + VPThread_lib/VMS/vmalloc.o \ 2.29 + VPThread_lib/VMS/contextSwitch.o \ 2.30 + VPThread_lib/VMS/Queue_impl/BlockingQueue.o \ 2.31 + VPThread_lib/VMS/vutilities.o \ 2.32 + wtime.o \ 2.33 + file_io.o \ 2.34 + pthreads_kmeans.o \ 2.35 + pthreads_main.o 2.36 + 2.37 +all: $(TARGET) 2.38 + 2.39 +$(TARGET): $(OBJ) 2.40 + $(CC) -o $@ $(OBJ) $(LDFLAGS) $(LIBS) 2.41 + 2.42 +%.o : %.c 2.43 + $(CC) -c $(CFLAGS) -o $@ $< 2.44 + 2.45 +clean: 2.46 + rm -f $(OBJ) $(TARGET)
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3.2 +++ b/README.txt Wed Aug 03 19:30:34 2011 +0200 3.3 @@ -0,0 +1,22 @@ 3.4 +Kernel: k-means Clustering 3.5 + 3.6 +This is a kernel-type benchmark of a simple off-line clustering algorithm. Careful: Hash Checksum Verification of the benchmark output is not feasible for kmeans. Usage of the float data type leads to imprecisions, caused by threading, up to the fifth place after the decimal point in the results. 3.7 + 3.8 +Installation: 3.9 + 3.10 +To install the kernel benchmark, navigate to the directory this file is located in, open up a terminal and simply type 'make'. For certain architectures 3.11 +or special compilation options, you might need to change compilation parameters in the makefile. 3.12 + 3.13 +Usage: 3.14 + 3.15 +You may execute the benchmark by navigating to this directory after compilation and typing 3.16 + 3.17 +./kmeans -b -i <input filename> -n <cluster center count> 3.18 + 3.19 +The specification of the number of threads used to perform the clustering process depends on the parallel programming model. 3.20 + 3.21 +Benchmark versions: 3.22 + 3.23 +Serial 3.24 +POSIX Threads 3.25 +OpenMP SuperScalar
4.1 Binary file color has changed
5.1 Binary file edge has changed
6.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 6.2 +++ b/file_io.c Wed Aug 03 19:30:34 2011 +0200 6.3 @@ -0,0 +1,162 @@ 6.4 +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 6.5 +/* File: file_io.c */ 6.6 +/* Description: This program reads point data from a file */ 6.7 +/* and write cluster output to files */ 6.8 +/* Input file format: */ 6.9 +/* ascii file: each line contains 1 data object */ 6.10 +/* binary file: first 4-byte integer is the number of data */ 6.11 +/* objects and 2nd integer is the no. of features (or */ 6.12 +/* coordinates) of each object */ 6.13 +/* */ 6.14 +/* Author: Wei-keng Liao */ 6.15 +/* ECE Department Northwestern University */ 6.16 +/* email: wkliao@ece.northwestern.edu */ 6.17 +/* Copyright, 2005, Wei-keng Liao */ 6.18 +/* */ 6.19 +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 6.20 + 6.21 +#include <stdio.h> 6.22 +#include <stdlib.h> 6.23 +#include <string.h> /* strtok() */ 6.24 +#include <sys/types.h> /* open() */ 6.25 +#include <sys/stat.h> 6.26 +#include <fcntl.h> 6.27 +#include <unistd.h> /* read(), close() */ 6.28 + 6.29 +#include "kmeans.h" 6.30 + 6.31 +#define MAX_CHAR_PER_LINE 128 6.32 + 6.33 + 6.34 +/* 6.35 +* Function: file_read 6.36 +* ------------------- 6.37 +* Function for loading input data into memory. 6.38 +*/ 6.39 +double** file_read(int isBinaryFile, /* flag: 0 or 1 */ 6.40 + char *filename, /* input file name */ 6.41 + int *numObjs, /* count of data objects (local) */ 6.42 + int *numCoords) /* count of coordinates */ 6.43 +{ 6.44 + float **objects; 6.45 + int i, j, len; 6.46 + ssize_t numBytesRead; 6.47 + 6.48 + if (isBinaryFile) { /* input file is in raw binary format -------------*/ 6.49 + int infile; 6.50 + fprintf(stderr, "Trying to read from binary file: %s", filename); 6.51 + if ((infile = open(filename, O_RDONLY, "0600")) == -1) { 6.52 + fprintf(stderr, "Error: Input File Not Found\n"); 6.53 + exit(EXIT_FAILURE); 6.54 + } 6.55 + numBytesRead = read(infile, numObjs, sizeof(int)); 6.56 + assert(numBytesRead == sizeof(int)); 6.57 + numBytesRead = read(infile, numCoords, sizeof(int)); 6.58 + assert(numBytesRead == sizeof(int)); 6.59 + 6.60 + /* allocate space for objects[][] and read all objects */ 6.61 + len = (*numObjs) * (*numCoords); 6.62 + objects = (float**)malloc((*numObjs) * sizeof(float*)); 6.63 + objects[0] = (float*) malloc(len * sizeof(float)); 6.64 + 6.65 + if(objects == NULL || objects[0] == NULL) { 6.66 + fprintf(stderr, "Could Not Allocate Memory\n"); 6.67 + exit(EXIT_FAILURE); 6.68 + } 6.69 + 6.70 + for (i = 1; i < (*numObjs); i++) 6.71 + objects[i] = objects[i-1] + (*numCoords); 6.72 + 6.73 + numBytesRead = read(infile, objects[0], len*sizeof(float)); 6.74 + assert(numBytesRead == len*sizeof(float)); 6.75 + fprintf(stderr, " ... Input read successfully!\n"); 6.76 + close(infile); 6.77 + 6.78 + } else { /* input file is in ASCII format -------------------------------*/ 6.79 + FILE *infile; 6.80 + char *line, *ret; 6.81 + int lineLen; 6.82 + 6.83 + fprintf(stderr, "Trying to read from ASCII file: %s", filename); 6.84 + if ((infile = fopen(filename, "r")) == NULL) { 6.85 + fprintf(stderr, "Error: Input File Not Found\n"); 6.86 + exit(EXIT_FAILURE); 6.87 + } 6.88 + 6.89 + /* first find the number of objects */ 6.90 + lineLen = MAX_CHAR_PER_LINE; 6.91 + line = (char*) malloc(lineLen); 6.92 + assert(line != NULL); 6.93 + 6.94 + (*numObjs) = 0; 6.95 + while (fgets(line, lineLen, infile) != NULL) { 6.96 + /* check each line to find the max line length */ 6.97 + while (strlen(line) == lineLen-1) { 6.98 + /* this line read is not complete */ 6.99 + len = strlen(line); 6.100 + fseek(infile, -len, SEEK_CUR); 6.101 + 6.102 + /* increase lineLen */ 6.103 + lineLen += MAX_CHAR_PER_LINE; 6.104 + line = (char*) realloc(line, lineLen); 6.105 + assert(line != NULL); 6.106 + 6.107 + ret = fgets(line, lineLen, infile); 6.108 + assert(ret != NULL); 6.109 + } 6.110 + 6.111 + if (strtok(line, " \t\n") != 0) 6.112 + (*numObjs)++; 6.113 + } 6.114 + rewind(infile); 6.115 + 6.116 + /* find the no. objects of each object */ 6.117 + (*numCoords) = 0; 6.118 + while (fgets(line, lineLen, infile) != NULL) { 6.119 + if (strtok(line, " \t\n") != 0) { 6.120 + /* ignore the id (first coordiinate): numCoords = 1; */ 6.121 + while (strtok(NULL, " ,\t\n") != NULL) (*numCoords)++; 6.122 + break; /* this makes read from 1st object */ 6.123 + } 6.124 + } 6.125 + rewind(infile); 6.126 + 6.127 + /* allocate space for objects[][] and read all objects */ 6.128 + len = (*numObjs) * (*numCoords); 6.129 + objects = (float**)malloc((*numObjs) * sizeof(float*)); 6.130 + assert(objects != NULL); 6.131 + objects[0] = (float*) malloc(len * sizeof(float)); 6.132 + assert(objects[0] != NULL); 6.133 + for (i=1; i<(*numObjs); i++) 6.134 + objects[i] = objects[i-1] + (*numCoords); 6.135 + 6.136 + i = 0; 6.137 + /* read all objects */ 6.138 + while (fgets(line, lineLen, infile) != NULL) { 6.139 + if (strtok(line, " \t\n") == NULL) continue; 6.140 + for (j=0; j<(*numCoords); j++) 6.141 + objects[i][j] = atof(strtok(NULL, " ,\t\n")); 6.142 + i++; 6.143 + } 6.144 + fprintf(stderr, " ... Input read successfully!\n"); 6.145 + fclose(infile); 6.146 + free(line); 6.147 + } 6.148 + 6.149 + 6.150 + double** objects_d = (double**)malloc((*numObjs) * sizeof(double*)); 6.151 + objects_d[0] = (double*) malloc(len * sizeof(double)); 6.152 + for (i = 1; i < (*numObjs); i++) 6.153 + objects_d[i] = objects_d[i-1] + (*numCoords); 6.154 + 6.155 + for (i=0; i< (*numObjs); i++){ 6.156 + for (j=0; j<(*numCoords); j++){ 6.157 + objects_d[i][j] = objects[i][j]; 6.158 + } 6.159 + } 6.160 + free(objects[0]); 6.161 + free(objects); 6.162 + 6.163 + return objects_d; 6.164 +} 6.165 +
7.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 7.2 +++ b/kmeans.h Wed Aug 03 19:30:34 2011 +0200 7.3 @@ -0,0 +1,23 @@ 7.4 +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 7.5 +/* File: kmeans.h (an OpenMP version) */ 7.6 +/* Description: header file for a simple k-means clustering program */ 7.7 +/* */ 7.8 +/* Author: Wei-keng Liao */ 7.9 +/* ECE Department Northwestern University */ 7.10 +/* email: wkliao@ece.northwestern.edu */ 7.11 +/* Copyright, 2005, Wei-keng Liao */ 7.12 +/* */ 7.13 +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 7.14 + 7.15 +#ifndef _H_KMEANS 7.16 +#define _H_KMEANS 7.17 + 7.18 +#include <assert.h> 7.19 + 7.20 +double** pthreads_kmeans(int, double**, int, int, int, double, int*); 7.21 + 7.22 +double** file_read(int, char*, int*, int*); 7.23 + 7.24 +double wtime(void); 7.25 + 7.26 +#endif
8.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 8.2 +++ b/pthread.txt Wed Aug 03 19:30:34 2011 +0200 8.3 @@ -0,0 +1,23 @@ 8.4 +kmeans Benchmark 8.5 +Input: color 8.6 + 8.7 +Workers: 1 8.8 +time: 4.30710 8.9 +Workers: 2 8.10 +time: 2.95336 8.11 +Workers: 4 8.12 +time: 2.64520 8.13 +Workers: 8 8.14 +time: 3.01213 8.15 +------------------- 8.16 +Input: edge 8.17 + 8.18 +Workers: 1 8.19 +time: 9.44506 8.20 +Workers: 2 8.21 +time: 5.79456 8.22 +Workers: 4 8.23 +time: 4.76466 8.24 +Workers: 8 8.25 +time: 4.35773 8.26 +-------------------
9.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 9.2 +++ b/pthreads_kmeans.c Wed Aug 03 19:30:34 2011 +0200 9.3 @@ -0,0 +1,344 @@ 9.4 +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 9.5 +/* File: pthreads_kmeans.c (OpenMP version) */ 9.6 +/* Description: Implementation of simple k-means clustering algorithm */ 9.7 +/* This program takes an array of N data objects, each with */ 9.8 +/* M coordinates and performs a k-means clustering given a */ 9.9 +/* user-provided value of the number of clusters (K). The */ 9.10 +/* clustering results are saved in 2 arrays: */ 9.11 +/* 1. a returned array of size [K][N] indicating the center */ 9.12 +/* coordinates of K clusters */ 9.13 +/* 2. membership[N] stores the cluster center ids, each */ 9.14 +/* corresponding to the cluster a data object is assigned */ 9.15 +/* */ 9.16 +/* Author: Wei-keng Liao */ 9.17 +/* ECE Department, Northwestern University */ 9.18 +/* email: wkliao@ece.northwestern.edu */ 9.19 +/* Copyright, 2005, Wei-keng Liao */ 9.20 +/* */ 9.21 +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 9.22 + 9.23 +#include <stdio.h> 9.24 +#include <stdlib.h> 9.25 +#include <pthread.h> 9.26 +#include <time.h> 9.27 +#include <math.h> 9.28 +#include "kmeans.h" 9.29 + 9.30 +#define PREC 300 9.31 + 9.32 +char __ProgrammName[] = "kmeans"; 9.33 +char __DataSet[255]; 9.34 + 9.35 +extern int nthreads; /* Thread count */ 9.36 +double delta; /* Delta is a value between 0 and 1 describing the percentage of objects which changed cluster membership */ 9.37 +volatile int finished; 9.38 + 9.39 +pthread_barrier_t barr; 9.40 +pthread_mutex_t lock1; 9.41 +pthread_attr_t attr; 9.42 + 9.43 +/* 9.44 +* Struct: input 9.45 +* ------------- 9.46 +* Encapsulates all the input data for the benchmark, i.e. the object list, 9.47 +* clustering output and the input statistics. 9.48 +*/ 9.49 +struct input { 9.50 + int t; 9.51 + double **objects; /* Object list */ 9.52 + double **clusters; /* Cluster list, out: [numClusters][numCoords] */ 9.53 + int *membership; /* For each object, contains the index of the cluster it currently belongs to */ 9.54 + int **local_newClusterSize; /* Thread-safe, [nthreads][numClusters] */ 9.55 + double ***local_newClusters; /* Thread-safe, [nthreads][numClusters][numCoords] */ 9.56 + int numObjs,numClusters,numCoords; 9.57 +}; 9.58 + 9.59 +/* 9.60 +* Function: euclid_dist_2 9.61 +* ----------------------- 9.62 +* Computes the square of the euclidean distance between two multi-dimensional points. 9.63 +*/ 9.64 +__inline static double euclid_dist_2(int numdims, double *coord1, double *coord2) { 9.65 + int i; 9.66 + double ans=0.0; 9.67 + 9.68 + for (i=0; i<numdims; i++) 9.69 + ans += (coord1[i]-coord2[i]) * (coord1[i]-coord2[i]); 9.70 + 9.71 + return(ans); 9.72 +} 9.73 + 9.74 +/* 9.75 +* Function: find_nearest_cluster 9.76 +* ------------------------------ 9.77 +* Function determining the cluster center which is closest to the given object. 9.78 +* Returns the index of that cluster center. 9.79 +*/ 9.80 +__inline static int find_nearest_cluster(int numClusters, int numCoords, double *object, double **clusters) { 9.81 + int index, i; 9.82 + double dist, min_dist; 9.83 + 9.84 + /* find the cluster id that has min distance to object */ 9.85 + index = 0; 9.86 + min_dist = euclid_dist_2(numCoords, object, clusters[0]); 9.87 + for (i=1; i<numClusters; i++) { 9.88 + dist = euclid_dist_2(numCoords, object, clusters[i]); 9.89 + 9.90 + /* no need square root */ 9.91 + if (dist < min_dist) { /* find the min and its array index */ 9.92 + min_dist = dist; 9.93 + index = i; 9.94 + } 9.95 + } 9.96 + return index; 9.97 +} 9.98 + 9.99 +void work(struct input *x){ 9.100 + int tid = x->t; 9.101 + double local_delta=0; 9.102 + int i; 9.103 + for (i = tid; i < x->numObjs; i += nthreads) { 9.104 + /* find the array index of nearest cluster center */ 9.105 + int index = find_nearest_cluster(x->numClusters, x->numCoords, 9.106 + x->objects[i], x->clusters); 9.107 + 9.108 + /* if membership changes, increase delta by 1 */ 9.109 + if (x->membership[i] != index) 9.110 + local_delta += 1.0; 9.111 + /* assign the membership to object i */ 9.112 + x->membership[i] = index; 9.113 + 9.114 + /* update new cluster centers : sum of all objects located 9.115 + within (average will be performed later) */ 9.116 + x->local_newClusterSize[tid][index]++; 9.117 + int j; 9.118 + for (j=0; j < x->numCoords; j++) 9.119 + x->local_newClusters[tid][index][j] += x->objects[i][j]; 9.120 + 9.121 + } 9.122 + pthread_mutex_lock(&lock1); 9.123 + delta +=local_delta; 9.124 + pthread_mutex_unlock(&lock1); 9.125 +} 9.126 +/* 9.127 +* Function: thread function work 9.128 +* -------------- 9.129 +* Worker function for threading. Work distribution is done so that each thread computers 9.130 +*/ 9.131 +void* tfwork(void *ip) 9.132 +{ 9.133 + struct input *x; 9.134 + x = (struct input *)ip; 9.135 + 9.136 + for(;;){ 9.137 + pthread_barrier_wait(&barr); 9.138 + if (finished){ 9.139 + break; 9.140 + } 9.141 + work(x); 9.142 + pthread_barrier_wait(&barr); 9.143 + } 9.144 + 9.145 + pthread_exit(NULL); 9.146 +} 9.147 + 9.148 +/* 9.149 +* Function: create_array_2d_f 9.150 +* -------------------------- 9.151 +* Allocates memory for a 2-dim double array as needed for the algorithm. 9.152 +*/ 9.153 +double** create_array_2d_f(int height, int width) { 9.154 + double** ptr; 9.155 + int i; 9.156 + ptr = calloc(height, sizeof(double*)); 9.157 + assert(ptr != NULL); 9.158 + ptr[0] = calloc(width * height, sizeof(double)); 9.159 + assert(ptr[0] != NULL); 9.160 + /* Assign pointers correctly */ 9.161 + for(i = 1; i < height; i++) 9.162 + ptr[i] = ptr[i-1] + width; 9.163 + return ptr; 9.164 +} 9.165 + 9.166 +/* 9.167 +* Function: create_array_2Dd_i 9.168 +* -------------------------- 9.169 +* Allocates memory for a 2-dim integer array as needed for the algorithm. 9.170 +*/ 9.171 +int** create_array_2d_i(int height, int width) { 9.172 + int** ptr; 9.173 + int i; 9.174 + ptr = calloc(height, sizeof(int*)); 9.175 + assert(ptr != NULL); 9.176 + ptr[0] = calloc(width * height, sizeof(int)); 9.177 + assert(ptr[0] != NULL); 9.178 + /* Assign pointers correctly */ 9.179 + for(i = 1; i < height; i++) 9.180 + ptr[i] = ptr[i-1] + width; 9.181 + return ptr; 9.182 +} 9.183 + 9.184 +/* 9.185 +* Function: pthreads_kmeans 9.186 +* ------------------------- 9.187 +* Algorithm main function. Returns a 2D array of cluster centers of size [numClusters][numCoords]. 9.188 +*/ 9.189 +double** pthreads_kmeans(int is_perform_atomic, /* in: */ 9.190 + double **objects, /* in: [numObjs][numCoords] */ 9.191 + int numCoords, /* no. coordinates */ 9.192 + int numObjs, /* no. objects */ 9.193 + int numClusters, /* no. clusters */ 9.194 + double threshold, /* % objects change membership */ 9.195 + int *membership) /* out: [numObjs] */ 9.196 +{ 9.197 + 9.198 + int i, j, k, index, loop = 0, rc; 9.199 + int *newClusterSize; /* [numClusters]: no. objects assigned in each 9.200 + new cluster */ 9.201 + double **clusters; /* out: [numClusters][numCoords] */ 9.202 + double **newClusters; /* [numClusters][numCoords] */ 9.203 + double timing; 9.204 + int **local_newClusterSize; /* [nthreads][numClusters] */ 9.205 + double ***local_newClusters; /* [nthreads][numClusters][numCoords] */ 9.206 + 9.207 + pthread_t *thread; 9.208 + 9.209 + /* === MEMORY SETUP === */ 9.210 + 9.211 + /* [numClusters] clusters of [numCoords] double coordinates each */ 9.212 + clusters = create_array_2d_f(numClusters, numCoords); 9.213 + 9.214 + /* Pick first numClusters elements of objects[] as initial cluster centers */ 9.215 + for (i=0; i < numClusters; i++) 9.216 + for (j=0; j < numCoords; j++) 9.217 + clusters[i][j] = objects[i][j]; 9.218 + 9.219 + /* Initialize membership, no object belongs to any cluster yet */ 9.220 + for (i = 0; i < numObjs; i++) 9.221 + membership[i] = -1; 9.222 + 9.223 + /* newClusterSize holds information on the count of members in each cluster */ 9.224 + newClusterSize = (int*)calloc(numClusters, sizeof(int)); 9.225 + assert(newClusterSize != NULL); 9.226 + 9.227 + /* newClusters holds the coordinates of the freshly created clusters */ 9.228 + newClusters = create_array_2d_f(numClusters, numCoords); 9.229 + local_newClusterSize = create_array_2d_i(nthreads, numClusters); 9.230 + 9.231 + /* local_newClusters is a 3D array */ 9.232 + local_newClusters = (double***)malloc(nthreads * sizeof(double**)); 9.233 + assert(local_newClusters != NULL); 9.234 + local_newClusters[0] = (double**) malloc(nthreads * numClusters * sizeof(double*)); 9.235 + assert(local_newClusters[0] != NULL); 9.236 + 9.237 + /* Set up the pointers */ 9.238 + for (i = 1; i < nthreads; i++) 9.239 + local_newClusters[i] = local_newClusters[i-1] + numClusters; 9.240 + 9.241 + for (i = 0; i < nthreads; i++) { 9.242 + for (j = 0; j < numClusters; j++) { 9.243 + local_newClusters[i][j] = (double*)calloc(numCoords, sizeof(double)); 9.244 + assert(local_newClusters[i][j] != NULL); 9.245 + } 9.246 + } 9.247 + /* Perform thread setup */ 9.248 + thread = (pthread_t*)calloc(nthreads, sizeof(pthread_t)); 9.249 + 9.250 + printf("nthreads %d\n", nthreads); 9.251 + pthread_barrier_init(&barr, NULL, nthreads); 9.252 + pthread_attr_init(&attr); 9.253 + pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE); 9.254 + pthread_mutex_init(&lock1, NULL); 9.255 + finished=0; 9.256 + 9.257 + struct input *ip = malloc(nthreads * sizeof(struct input)); 9.258 + /* Provide thread-safe memory locations for each worker */ 9.259 + for(i = 0; i < nthreads; i++){ 9.260 + ip[i].t = i; 9.261 + ip[i].objects=objects; 9.262 + ip[i].clusters=clusters; 9.263 + ip[i].membership=membership; 9.264 + ip[i].local_newClusterSize=local_newClusterSize; 9.265 + ip[i].local_newClusters=local_newClusters; 9.266 + ip[i].numObjs=numObjs; 9.267 + ip[i].numClusters=numClusters; 9.268 + ip[i].numCoords=numCoords; 9.269 + 9.270 + if (i>0){ 9.271 + rc = pthread_create(&thread[i], &attr, tfwork, (void *)&ip[i]); 9.272 + if (rc) { 9.273 + fprintf(stderr, "ERROR: Return Code For Thread Creation Is %d\n", rc); 9.274 + exit(EXIT_FAILURE); 9.275 + } 9.276 + } 9.277 + } 9.278 + 9.279 + /* === COMPUTATIONAL PHASE === */ 9.280 + 9.281 + do { 9.282 + delta = 0.0; 9.283 + pthread_barrier_wait(&barr); 9.284 + work(&ip[0]); 9.285 + 9.286 + pthread_barrier_wait(&barr); 9.287 + /* Let the main thread perform the array reduction */ 9.288 + for (i = 0; i < numClusters; i++) { 9.289 + for (j = 0; j < nthreads; j++) { 9.290 + newClusterSize[i] += local_newClusterSize[j][i]; 9.291 + local_newClusterSize[j][i] = 0.0; 9.292 + for (k = 0; k < numCoords; k++) { 9.293 + newClusters[i][k] += local_newClusters[j][i][k]; 9.294 + local_newClusters[j][i][k] = 0.0; 9.295 + } 9.296 + } 9.297 + } 9.298 + 9.299 + /* Average the sum and replace old cluster centers with newClusters */ 9.300 + for (i = 0; i < numClusters; i++) { 9.301 + for (j = 0; j < numCoords; j++) { 9.302 + if (newClusterSize[i] > 1) 9.303 + clusters[i][j] = newClusters[i][j] / newClusterSize[i]; 9.304 + newClusters[i][j] = 0.0; /* set back to 0 */ 9.305 + } 9.306 + newClusterSize[i] = 0; /* set back to 0 */ 9.307 + } 9.308 + 9.309 + delta /= numObjs; 9.310 + } while (loop++ < PREC && delta > threshold); 9.311 + 9.312 + // Changing to a fixed number of iterations is for benchmarking reasons. I know it affects the results compared to the original program, 9.313 + // but minor double precision floating point inaccuracies caused by threading would otherwise lead to huge differences in computed 9.314 + // iterations, therefore making benchmarking completely unreliable. 9.315 + 9.316 + finished=1; 9.317 + pthread_barrier_wait(&barr); 9.318 + 9.319 + for(i = 1; i < nthreads; i++) { 9.320 + rc = pthread_join(thread[i], NULL); 9.321 + if (rc) { 9.322 + fprintf(stderr, "ERROR: Return Code For Thread Join Is %d\n", rc); 9.323 + exit(EXIT_FAILURE); 9.324 + } 9.325 + } 9.326 + 9.327 + free(ip); 9.328 + free(thread); 9.329 + pthread_barrier_destroy(&barr); 9.330 + pthread_mutex_destroy(&lock1); 9.331 + pthread_attr_destroy(&attr); 9.332 + 9.333 + free(local_newClusterSize[0]); 9.334 + free(local_newClusterSize); 9.335 + 9.336 + for (i = 0; i < nthreads; i++) 9.337 + for (j = 0; j < numClusters; j++) 9.338 + free(local_newClusters[i][j]); 9.339 + free(local_newClusters[0]); 9.340 + free(local_newClusters); 9.341 + 9.342 + free(newClusters[0]); 9.343 + free(newClusters); 9.344 + free(newClusterSize); 9.345 + return clusters; 9.346 +} 9.347 +
10.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 10.2 +++ b/pthreads_main.c Wed Aug 03 19:30:34 2011 +0200 10.3 @@ -0,0 +1,157 @@ 10.4 +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 10.5 +/* File: pthreads_main.c (an OpenMP version) */ 10.6 +/* Description: This program shows an example on how to call a subroutine */ 10.7 +/* that implements a simple k-means clustering algorithm */ 10.8 +/* based on Euclid distance. */ 10.9 +/* Input file format: */ 10.10 +/* ascii file: each line contains 1 data object */ 10.11 +/* binary file: first 4-byte integer is the number of data */ 10.12 +/* objects and 2nd integer is the no. of features (or */ 10.13 +/* coordinates) of each object */ 10.14 +/* */ 10.15 +/* Author: Wei-keng Liao */ 10.16 +/* ECE Department Northwestern University */ 10.17 +/* email: wkliao@ece.northwestern.edu */ 10.18 +/* Copyright, 2005, Wei-keng Liao */ 10.19 +/* */ 10.20 +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 10.21 + 10.22 +#include <stdio.h> 10.23 +#include <stdlib.h> 10.24 +#include <string.h> /* strtok() */ 10.25 +#include <sys/types.h> /* open() */ 10.26 +#include <sys/stat.h> 10.27 +#include <sys/time.h> 10.28 +#include <fcntl.h> 10.29 +#include <unistd.h> /* getopt() */ 10.30 +#include <time.h> 10.31 +#include "kmeans.h" 10.32 + 10.33 +#define seconds(tm) gettimeofday(&tp,(struct timezone *)0);\ 10.34 + tm=tp.tv_sec+tp.tv_usec/1000000.0 10.35 + 10.36 +struct timeval tp; 10.37 + 10.38 +int numClusters, numCoords, numObjs, nthreads; 10.39 + 10.40 +/* 10.41 +* Function: usage 10.42 +* --------------- 10.43 +* Prints information on how to call the program. 10.44 +*/ 10.45 +static void usage(char *argv0) { 10.46 + char *help = 10.47 + "Usage: %s [switches] -i filename -n num_clusters [OPTIONS]\n" 10.48 + " -i filename : file containing data to be clustered\n" 10.49 + " -b : input file is in binary format (default no)\n" 10.50 + " -n num_clusters: number of clusters (K must be > 1)\n" 10.51 + " -p nproc : number of threads (default 1)\n" 10.52 + " -o filename : write output to file\n"; 10.53 + fprintf(stderr, help, argv0); 10.54 + exit(-1); 10.55 +} 10.56 + 10.57 +/*---< main() >-------------------------------------------------------------*/ 10.58 +int main(int argc, char **argv) { 10.59 + int opt; 10.60 + extern char *optarg; 10.61 + extern int optind; 10.62 + int i, j; 10.63 + int isBinaryFile; 10.64 + 10.65 + int *membership; /* [numObjs] */ 10.66 + char *filename, *outfile; 10.67 + double **objects; /* [numObjs][numCoords] data objects */ 10.68 + double **clusters; /* [numClusters][numCoords] cluster center */ 10.69 + double threshold; 10.70 + double timing, io_timing, clustering_timing; 10.71 + 10.72 + /* some default values */ 10.73 + nthreads = 1; /* Amount of threads to use */ 10.74 + numClusters = 1; /* Amount of cluster centers */ 10.75 + threshold = 0.001; /* Percentage of objects that need to change membership for the clusting to continue */ 10.76 + isBinaryFile = 0; /* 0 if the input file is in ASCII format, 1 for binary format */ 10.77 + filename = NULL; /* Name of the input file */ 10.78 + outfile = NULL; /* Name of the output file */ 10.79 + 10.80 + /* Parse command line options */ 10.81 + while ( (opt=getopt(argc,argv,"o:p:i:n:t:bh"))!= EOF) { 10.82 + switch (opt) { 10.83 + case 'i': filename=optarg; 10.84 + break; 10.85 + case 'b': isBinaryFile = 1; 10.86 + break; 10.87 + case 'n': numClusters = atoi(optarg); 10.88 + break; 10.89 + case 'p': nthreads = atoi(optarg); 10.90 + break; 10.91 + case 'h': usage(argv[0]); 10.92 + break; 10.93 + case 'o': outfile=optarg; 10.94 + break; 10.95 + default: usage(argv[0]); 10.96 + break; 10.97 + } 10.98 + } 10.99 + 10.100 + if (filename == NULL) usage(argv[0]); 10.101 + 10.102 + seconds(io_timing); 10.103 + 10.104 + /* Read input data points from given input file */ 10.105 + objects = file_read(isBinaryFile, filename, &numObjs, &numCoords); 10.106 + assert(objects != NULL); 10.107 + 10.108 + seconds(timing); 10.109 + io_timing = timing - io_timing; 10.110 + clustering_timing = timing; 10.111 + 10.112 + membership = (int*) malloc(numObjs * sizeof(int)); 10.113 + assert(membership != NULL); 10.114 + 10.115 + /* Launch the core computation algorithm */ 10.116 + clusters = pthreads_kmeans(0, objects, numCoords, numObjs, 10.117 + numClusters, threshold, membership); 10.118 + 10.119 + free(objects[0]); 10.120 + free(objects); 10.121 + 10.122 + seconds(timing); 10.123 + clustering_timing = timing - clustering_timing; 10.124 + 10.125 + /* Memory cleanup */ 10.126 + free(membership); 10.127 + 10.128 + if(outfile != NULL) { 10.129 + int l; 10.130 + FILE* fp = fopen(outfile, "w"); 10.131 + for(j = 0; j < numClusters; j++) { 10.132 + fprintf(fp, "Cluster %d: ", j); 10.133 + for(l = 0; l < numCoords; l++) 10.134 + fprintf(fp, "%f ", clusters[j][l]); 10.135 + fprintf(fp, "\n"); 10.136 + } 10.137 + fclose(fp); 10.138 + } 10.139 + 10.140 + free(clusters[0]); 10.141 + free(clusters); 10.142 + 10.143 + /* Print performance numbers on stdout */ 10.144 + double t1; 10.145 + io_timing += seconds(t1) - timing; 10.146 + 10.147 + printf("\n---- kMeans Clustering ----\n"); 10.148 + printf("Number of threads = %d\n", nthreads); 10.149 + printf("Input file: %s\n", filename); 10.150 + printf("numObjs = %d\n", numObjs); 10.151 + printf("numCoords = %d\n", numCoords); 10.152 + printf("numClusters = %d\n", numClusters); 10.153 + printf("threshold = %.4f\n", threshold); 10.154 + 10.155 + printf("I/O time = %10.4f sec\n", io_timing); 10.156 + printf("Computation timing = %10.4f sec\n", clustering_timing); 10.157 + 10.158 + return(0); 10.159 +} 10.160 +
11.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 11.2 +++ b/wtime.c Wed Aug 03 19:30:34 2011 +0200 11.3 @@ -0,0 +1,37 @@ 11.4 +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 11.5 +/* File: wtime.c */ 11.6 +/* Description: a timer that reports the current wall time */ 11.7 +/* */ 11.8 +/* Author: Wei-keng Liao */ 11.9 +/* ECE Department Northwestern University */ 11.10 +/* email: wkliao@ece.northwestern.edu */ 11.11 +/* Copyright, 2005, Wei-keng Liao */ 11.12 +/* */ 11.13 +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 11.14 + 11.15 +#include <sys/time.h> 11.16 +#include <stdio.h> 11.17 +#include <stdlib.h> 11.18 + 11.19 +/* 11.20 +* Function: wtime 11.21 +* --------------- 11.22 +* Provides a millisecond-resolution timer for measurement purposes. 11.23 +*/ 11.24 +double wtime(void) 11.25 +{ 11.26 + double now_time; 11.27 + struct timeval etstart; 11.28 + struct timezone tzp; 11.29 + 11.30 + if (gettimeofday(&etstart, &tzp) == -1) { 11.31 + fprintf(stderr, "Timing Error: Could Not Get Current Time\n"); 11.32 + exit(EXIT_FAILURE); 11.33 + } 11.34 + 11.35 + now_time = ((double)etstart.tv_sec) + /* in seconds */ 11.36 + ((double)etstart.tv_usec) / 1000000.0; /* in microseconds */ 11.37 + return now_time; 11.38 +} 11.39 + 11.40 +
