| rev |
line source |
|
Me@4
|
1 /*
|
|
Me@4
|
2 *
|
|
Me@4
|
3 */
|
|
Me@4
|
4 #include <stdio.h>
|
|
Me@4
|
5 #include <stdlib.h>
|
|
Me@4
|
6 #include <string.h>
|
|
Me@4
|
7 #include <math.h>
|
|
Me@4
|
8 #include <ctype.h>
|
|
Me@4
|
9 #include <errno.h>
|
|
Me@4
|
10 #include <pthread.h>
|
|
msach@6
|
11 #include <unistd.h>
|
|
msach@19
|
12 #include "VMS_Implementations/Vthread_impl/VPThread.h"
|
|
msach@19
|
13 #include "C_Libraries/Queue_impl/PrivateQueue.h"
|
|
msach@20
|
14 #include "C_Libraries/DynArray/DynArray.h"
|
|
msach@20
|
15 #include "C_Libraries/BestEffortMessaging/LossyCom.h"
|
|
Me@4
|
16
|
|
msach@6
|
17 #include <linux/perf_event.h>
|
|
msach@6
|
18 #include <linux/prctl.h>
|
|
msach@6
|
19 #include <sys/syscall.h>
|
|
msach@6
|
20
|
|
Me@4
|
21 #undef DEBUG
|
|
Me@4
|
22 //#define DEBUG
|
|
Me@4
|
23
|
|
msach@20
|
24 //#define MEASURE_PERF
|
|
msach@15
|
25
|
|
Me@4
|
26 #if !defined(unix) && !defined(__unix__)
|
|
Me@4
|
27 #ifdef __MACH__
|
|
Me@4
|
28 #define unix 1
|
|
Me@4
|
29 #define __unix__ 1
|
|
Me@4
|
30 #endif /* __MACH__ */
|
|
Me@4
|
31 #endif /* unix */
|
|
Me@4
|
32
|
|
Me@4
|
33 /* find the appropriate way to define explicitly sized types */
|
|
Me@4
|
34 /* for C99 or GNU libc (also mach's libc) we can use stdint.h */
|
|
Me@4
|
35 #if (__STDC_VERSION__ >= 199900) || defined(__GLIBC__) || defined(__MACH__)
|
|
Me@4
|
36 #include <stdint.h>
|
|
Me@4
|
37 #elif defined(unix) || defined(__unix__) /* some UNIX systems have them in sys/types.h */
|
|
Me@4
|
38 #include <sys/types.h>
|
|
Me@4
|
39 #elif defined(__WIN32__) || defined(WIN32) /* the nameless one */
|
|
Me@4
|
40 typedef unsigned __int8 uint8_t;
|
|
Me@4
|
41 typedef unsigned __int32 uint32_t;
|
|
Me@4
|
42 #endif /* sized type detection */
|
|
Me@4
|
43
|
|
Me@4
|
44 /* provide a millisecond-resolution timer for each system */
|
|
Me@4
|
45 #if defined(unix) || defined(__unix__)
|
|
Me@4
|
46 #include <time.h>
|
|
Me@4
|
47 #include <sys/time.h>
|
|
Me@4
|
48 unsigned long get_msec(void) {
|
|
Me@4
|
49 static struct timeval timeval, first_timeval;
|
|
Me@4
|
50
|
|
Me@4
|
51 gettimeofday(&timeval, 0);
|
|
Me@4
|
52 if(first_timeval.tv_sec == 0) {
|
|
Me@4
|
53 first_timeval = timeval;
|
|
Me@4
|
54 return 0;
|
|
Me@4
|
55 }
|
|
Me@4
|
56 return (timeval.tv_sec - first_timeval.tv_sec) * 1000 + (timeval.tv_usec - first_timeval.tv_usec) / 1000;
|
|
Me@4
|
57 }
|
|
Me@4
|
58 #elif defined(__WIN32__) || defined(WIN32)
|
|
Me@4
|
59 #include <windows.h>
|
|
Me@4
|
60 unsigned long get_msec(void) {
|
|
Me@4
|
61 return GetTickCount();
|
|
Me@4
|
62 }
|
|
Me@4
|
63 #else
|
|
Me@4
|
64 //#error "I don't know how to measure time on your platform"
|
|
Me@4
|
65 #endif
|
|
Me@4
|
66
|
|
Me@4
|
67 //======================== Defines =========================
|
|
kshalle@8
|
68 typedef struct perfData measurement_t;
|
|
kshalle@8
|
69 struct perfData{
|
|
kshalle@8
|
70 uint64 cycles;
|
|
kshalle@8
|
71 uint64 instructions;
|
|
kshalle@8
|
72 };
|
|
Me@4
|
73
|
|
Me@4
|
74 const char *usage = {
|
|
msach@20
|
75 "Usage: msg_passing_test [options]\n"
|
|
msach@20
|
76 " Starts threads equal to the number of cores and sends\n"
|
|
msach@20
|
77 " messages to random receivers\n\n"
|
|
Me@4
|
78 "Options:\n"
|
|
msach@20
|
79 " -n <num> This specifies the number of sends done by each thread.\n"
|
|
Me@4
|
80 " -h this help screen\n\n"
|
|
Me@4
|
81 };
|
|
Me@4
|
82
|
|
msach@20
|
83 /***************************
|
|
msach@20
|
84 * Barrier Implementation
|
|
msach@20
|
85 ***************************/
|
|
msach@20
|
86
|
|
Me@4
|
87 struct barrier_t
|
|
Me@4
|
88 {
|
|
Me@4
|
89 int counter;
|
|
Me@4
|
90 int nthreads;
|
|
Me@4
|
91 int32 mutex;
|
|
Me@4
|
92 int32 cond;
|
|
kshalle@8
|
93 measurement_t endBarrierCycles;
|
|
kshalle@8
|
94
|
|
Me@4
|
95 };
|
|
Me@4
|
96 typedef struct barrier_t barrier;
|
|
Me@4
|
97
|
|
Me@4
|
98 void inline barrier_init(barrier *barr, int nthreads, VirtProcr *animatingPr)
|
|
Me@4
|
99 {
|
|
Me@4
|
100 barr->counter = 0;
|
|
Me@4
|
101 barr->nthreads = nthreads;
|
|
Me@4
|
102 barr->mutex = VPThread__make_mutex(animatingPr);
|
|
Me@4
|
103 barr->cond = VPThread__make_cond(barr->mutex, animatingPr);
|
|
Me@4
|
104 }
|
|
Me@4
|
105
|
|
kshalle@8
|
106 int cycles_counter_main_fd;
|
|
Me@4
|
107 void inline barrier_wait(barrier *barr, VirtProcr *animatingPr)
|
|
Me@4
|
108 { int i;
|
|
Me@4
|
109
|
|
Me@4
|
110 VPThread__mutex_lock(barr->mutex, animatingPr);
|
|
Me@4
|
111 barr->counter++;
|
|
Me@4
|
112 if(barr->counter == barr->nthreads)
|
|
kshalle@8
|
113 {
|
|
msach@15
|
114 #ifdef MEASURE_PERF
|
|
kshalle@8
|
115 read(cycles_counter_main_fd, &(barr->endBarrierCycles.cycles), \
|
|
kshalle@8
|
116 sizeof(barr->endBarrierCycles.cycles));
|
|
msach@15
|
117 #endif
|
|
kshalle@8
|
118
|
|
kshalle@8
|
119 barr->counter = 0;
|
|
Me@4
|
120 for(i=0; i < barr->nthreads; i++)
|
|
Me@4
|
121 VPThread__cond_signal(barr->cond, animatingPr);
|
|
Me@4
|
122 }
|
|
Me@4
|
123 else
|
|
Me@4
|
124 { VPThread__cond_wait(barr->cond, animatingPr);
|
|
Me@4
|
125 }
|
|
Me@4
|
126 VPThread__mutex_unlock(barr->mutex, animatingPr);
|
|
Me@4
|
127 }
|
|
Me@4
|
128
|
|
kshalle@8
|
129
|
|
msach@20
|
130 /**************************
|
|
msach@20
|
131 * Worker Parameters
|
|
msach@20
|
132 **************************/
|
|
msach@9
|
133 typedef struct
|
|
msach@9
|
134 { struct barrier_t* barrier;
|
|
msach@9
|
135 uint64_t totalWorkCycles;
|
|
msach@9
|
136 uint64_t totalBadCycles;
|
|
msach@9
|
137 uint64_t totalSyncCycles;
|
|
msach@9
|
138 uint64_t totalBadSyncCycles;
|
|
msach@9
|
139 uint64 numGoodSyncs;
|
|
msach@9
|
140 uint64 numGoodTasks;
|
|
msach@20
|
141 uint64_t coreID;
|
|
msach@20
|
142 lossyCom__endpoint_t* localEndpoint;
|
|
msach@20
|
143 lossyCom__exchange_t* centralMsgExchange;
|
|
msach@20
|
144 unsigned int receivedACKs;
|
|
msach@20
|
145 unsigned int broadcasterStatus;
|
|
msach@20
|
146 unsigned int terminate;
|
|
msach@9
|
147 }
|
|
msach@9
|
148 WorkerParams;
|
|
Me@4
|
149
|
|
kshalle@8
|
150 typedef struct
|
|
kshalle@8
|
151 { measurement_t *startExeCycles;
|
|
kshalle@8
|
152 measurement_t *endExeCycles;
|
|
kshalle@8
|
153 }
|
|
kshalle@8
|
154 BenchParams;
|
|
Me@4
|
155
|
|
msach@20
|
156 typedef struct
|
|
msach@20
|
157 {
|
|
msach@20
|
158 lossyCom__endpointID_t receiverID;
|
|
msach@20
|
159 lossyCom__msgBody_t msg;
|
|
msach@20
|
160 } savedMsg_t;
|
|
msach@20
|
161
|
|
Me@4
|
162 //======================== Globals =========================
|
|
Me@4
|
163 char __ProgrammName[] = "overhead_test";
|
|
Me@4
|
164 char __DataSet[255];
|
|
Me@4
|
165
|
|
msach@20
|
166 int num_msg_to_send;
|
|
Me@4
|
167 size_t chunk_size = 0;
|
|
Me@4
|
168
|
|
msach@6
|
169 int cycles_counter_fd[NUM_CORES];
|
|
msach@7
|
170 struct perf_event_attr* hw_event;
|
|
Me@4
|
171
|
|
kshalle@8
|
172 WorkerParams *workerParamsArray;
|
|
kshalle@8
|
173
|
|
msach@20
|
174 // init random number
|
|
msach@20
|
175 uint32_t seed1;
|
|
msach@20
|
176 uint32_t seed2;
|
|
msach@20
|
177
|
|
Me@4
|
178 //======================== App Code =========================
|
|
Me@4
|
179 /*
|
|
Me@4
|
180 * Workload
|
|
Me@4
|
181 */
|
|
msach@6
|
182
|
|
msach@6
|
183 #define saveCyclesAndInstrs(core,cycles) do{ \
|
|
msach@6
|
184 int cycles_fd = cycles_counter_fd[core]; \
|
|
msach@6
|
185 int nread; \
|
|
msach@6
|
186 \
|
|
msach@6
|
187 nread = read(cycles_fd,&(cycles),sizeof(cycles)); \
|
|
msach@7
|
188 if(nread<0){ \
|
|
msach@6
|
189 perror("Error reading cycles counter"); \
|
|
msach@6
|
190 cycles = 0; \
|
|
msach@6
|
191 } \
|
|
msach@6
|
192 } while (0) //macro magic for scoping
|
|
msach@6
|
193
|
|
msach@20
|
194 extern inline uint32_t
|
|
msach@20
|
195 randomNumber(uint32_t* seed1, uint32_t* seed2);
|
|
msach@20
|
196
|
|
msach@20
|
197 #define BROADCAST BROADCAST_ID
|
|
msach@20
|
198 #define BROADCAST_ACK BROADCAST_ID-1
|
|
msach@20
|
199 #define TERMINATE BROADCAST_ID-2
|
|
msach@20
|
200
|
|
msach@20
|
201 #define RECEIVING_BROADCAST 0
|
|
msach@20
|
202 #define BROADCASTING 1
|
|
msach@20
|
203 #define RECEIVING_ACK 2
|
|
msach@20
|
204
|
|
msach@20
|
205 /*
|
|
msach@20
|
206 * Message Handler Function
|
|
msach@20
|
207 */
|
|
msach@20
|
208 void msgHandler(lossyCom__endpointID_t senderID, lossyCom__msgBody_t msg, void* data)
|
|
msach@20
|
209 {
|
|
msach@20
|
210 WorkerParams* threadData = (WorkerParams*)data;
|
|
msach@20
|
211 lossyCom__endpoint_t* comEndpoint = threadData->localEndpoint;
|
|
msach@20
|
212 lossyCom__endpointID_t receiverID;
|
|
msach@20
|
213
|
|
msach@20
|
214 if(msg == BROADCAST_ID) //answer broadcast message
|
|
msach@20
|
215 {
|
|
msach@20
|
216 lossyCom__sendMsg(comEndpoint, senderID, BROADCAST_ACK);
|
|
msach@20
|
217 return;
|
|
msach@20
|
218 }
|
|
msach@20
|
219 if(msg == (BROADCAST_ACK) && threadData->broadcasterStatus == RECEIVING_ACK)
|
|
msach@20
|
220 {
|
|
msach@20
|
221 threadData->receivedACKs++;
|
|
msach@20
|
222 if(threadData->receivedACKs == NUM_CORES/2)//chose next broadcaster
|
|
msach@20
|
223 {
|
|
msach@20
|
224 do{
|
|
msach@20
|
225 receiverID = randomNumber(&seed1, &seed2) % NUM_CORES;
|
|
msach@20
|
226 }while(receiverID == comEndpoint->endpointID);
|
|
msach@20
|
227
|
|
msach@20
|
228 //send the receiverID to the receiver to notify him that he is next
|
|
msach@20
|
229 lossyCom__sendMsg(comEndpoint, receiverID, receiverID);
|
|
msach@20
|
230 threadData->broadcasterStatus = RECEIVING_BROADCAST;
|
|
msach@20
|
231 }
|
|
msach@20
|
232 return;
|
|
msach@20
|
233 }
|
|
msach@20
|
234 if(msg == TERMINATE) //termination message
|
|
msach@20
|
235 {
|
|
msach@20
|
236 printf("endpoint %d received termination request\n", comEndpoint->endpointID);
|
|
msach@20
|
237 threadData->terminate = TRUE;
|
|
msach@20
|
238 return;
|
|
msach@20
|
239 }
|
|
msach@20
|
240 //
|
|
msach@20
|
241 threadData->broadcasterStatus = BROADCASTING;
|
|
msach@20
|
242 }
|
|
msach@20
|
243
|
|
msach@20
|
244 unsigned int global_broadcast_counter;
|
|
msach@7
|
245
|
|
msach@9
|
246 double
|
|
msach@9
|
247 worker_TLF(void* _params, VirtProcr* animatingPr)
|
|
Me@5
|
248 {
|
|
msach@20
|
249 unsigned int msgCounter;
|
|
msach@20
|
250 unsigned int broadcaster;
|
|
msach@20
|
251 uint32_t wait_iterations;
|
|
msach@9
|
252 WorkerParams* params = (WorkerParams*)_params;
|
|
msach@9
|
253 unsigned int totalWorkCycles = 0, totalBadCycles = 0;
|
|
msach@9
|
254 unsigned int totalSyncCycles = 0, totalBadSyncCycles = 0;
|
|
msach@9
|
255 unsigned int workspace1=0, numGoodSyncs = 0, numGoodTasks = 0;
|
|
kshalle@8
|
256 double workspace2=0.0;
|
|
msach@20
|
257
|
|
msach@20
|
258 //core 0 always starts
|
|
msach@20
|
259 params->broadcasterStatus = params->coreID==0?BROADCASTING:RECEIVING_BROADCAST;
|
|
msach@20
|
260
|
|
msach@20
|
261 /*
|
|
Me@5
|
262 int32 privateMutex = VPThread__make_mutex(animatingPr);
|
|
msach@6
|
263
|
|
msach@6
|
264 int cpuid = sched_getcpu();
|
|
msach@9
|
265
|
|
msach@11
|
266 measurement_t startWorkload, endWorkload, startWorkload2, endWorkload2;
|
|
msach@9
|
267 uint64 numCycles;
|
|
msach@20
|
268 */
|
|
msach@15
|
269 #ifdef MEASURE_PERF
|
|
msach@10
|
270 saveCyclesAndInstrs(cpuid,startWorkload.cycles);
|
|
msach@15
|
271 #endif
|
|
msach@20
|
272
|
|
msach@20
|
273 //initialize endpoint for communication
|
|
msach@20
|
274 lossyCom__endpoint_t comEndpoint;
|
|
msach@20
|
275 params->localEndpoint = &comEndpoint;
|
|
msach@20
|
276 lossyCom__initialize_endpoint(&comEndpoint,
|
|
msach@20
|
277 params->centralMsgExchange,
|
|
msach@20
|
278 params->coreID,
|
|
msach@20
|
279 msgHandler,
|
|
msach@20
|
280 params);
|
|
msach@20
|
281
|
|
msach@20
|
282 lossyCom__endpointID_t receiverID;
|
|
msach@20
|
283 msgCounter = 0;
|
|
msach@20
|
284 while(msgCounter <= num_msg_to_send)
|
|
msach@20
|
285 {
|
|
msach@20
|
286 int i;
|
|
msach@9
|
287
|
|
msach@20
|
288 if(params->broadcasterStatus == BROADCASTING)
|
|
Me@5
|
289 {
|
|
msach@20
|
290 if(msgCounter == num_msg_to_send)//send termination msg
|
|
msach@20
|
291 {
|
|
msach@20
|
292 lossyCom__sendMsg(&comEndpoint,BROADCAST_ID, TERMINATE);
|
|
msach@20
|
293 break;
|
|
msach@20
|
294 }else{ //generate and send random message
|
|
msach@20
|
295 params->receivedACKs = 0;
|
|
msach@20
|
296 lossyCom__sendMsg(&comEndpoint, BROADCAST_ID, BROADCAST);
|
|
msach@20
|
297 global_broadcast_counter++;
|
|
msach@20
|
298 if(global_broadcast_counter % 1000 == 0){
|
|
msach@20
|
299 printf("broadcast count: %d\n", global_broadcast_counter);
|
|
msach@20
|
300 }
|
|
msach@20
|
301 params->broadcasterStatus = RECEIVING_ACK; //mark msg as send
|
|
msach@20
|
302 msgCounter++;
|
|
msach@20
|
303 }
|
|
Me@5
|
304 }
|
|
msach@20
|
305
|
|
msach@20
|
306 //check if the benchmark should terminate
|
|
msach@20
|
307 if(params->terminate)
|
|
msach@20
|
308 break;
|
|
msach@20
|
309
|
|
msach@20
|
310 //receive msg
|
|
msach@20
|
311 lossyCom__receiveMsg(&comEndpoint);
|
|
msach@20
|
312 }
|
|
msach@20
|
313
|
|
msach@15
|
314
|
|
msach@15
|
315 #ifdef MEASURE_PERF
|
|
msach@10
|
316 saveCyclesAndInstrs(cpuid,endWorkload.cycles);
|
|
msach@10
|
317 numCycles = endWorkload.cycles - startWorkload.cycles;
|
|
msach@9
|
318 //sanity check (400K is about 20K iters)
|
|
msach@9
|
319 if( numCycles < 400000 ) {totalWorkCycles += numCycles; numGoodTasks++;}
|
|
msach@9
|
320 else {totalBadCycles += numCycles; }
|
|
msach@15
|
321 #endif
|
|
msach@9
|
322
|
|
msach@20
|
323 barrier_wait(params->barrier, animatingPr);
|
|
Me@5
|
324
|
|
kshalle@8
|
325 params->totalWorkCycles = totalWorkCycles;
|
|
msach@9
|
326 params->totalBadCycles = totalBadCycles;
|
|
msach@9
|
327 params->numGoodTasks = numGoodTasks;
|
|
msach@9
|
328 params->totalSyncCycles = totalSyncCycles;
|
|
msach@9
|
329 params->totalBadSyncCycles = totalBadSyncCycles;
|
|
msach@9
|
330 params->numGoodSyncs = numGoodSyncs;
|
|
msach@9
|
331 /*
|
|
msach@9
|
332 params->totalSyncCycles = VMS__give_num_plugin_cycles();
|
|
msach@9
|
333 params->totalBadSyncCycles = 0;
|
|
msach@9
|
334 params->numGoodSyncs = VMS__give_num_plugin_animations();
|
|
msach@9
|
335 */
|
|
Me@5
|
336 //Shutdown worker
|
|
Me@5
|
337 VPThread__dissipate_thread(animatingPr);
|
|
msach@9
|
338
|
|
msach@9
|
339 //below return never reached --> there for gcc
|
|
msach@9
|
340 return (workspace1 + workspace2); //to prevent gcc from optimizing work out
|
|
Me@5
|
341 }
|
|
Me@4
|
342
|
|
kshalle@8
|
343
|
|
Me@4
|
344 /* this is run after the VMS is set up*/
|
|
kshalle@8
|
345 void benchmark(void *_params, VirtProcr *animatingPr)
|
|
Me@4
|
346 {
|
|
msach@20
|
347 int i, cpuID, idx;
|
|
msach@20
|
348 struct barrier_t barr;
|
|
kshalle@8
|
349 BenchParams *params;
|
|
kshalle@8
|
350
|
|
kshalle@8
|
351 params = (BenchParams *)_params;
|
|
msach@20
|
352
|
|
msach@20
|
353 barrier_init(&barr, NUM_CORES+1, animatingPr);
|
|
msach@20
|
354
|
|
msach@20
|
355 //Init central communication exchange
|
|
msach@20
|
356 lossyCom__exchange_t* centralMsgExchange = lossyCom__initialize(NUM_CORES);
|
|
msach@20
|
357
|
|
msach@6
|
358 //prepare input
|
|
msach@20
|
359 for(i=0; i<NUM_CORES; i++)
|
|
msach@6
|
360 {
|
|
kshalle@8
|
361 workerParamsArray[i].barrier = &barr;
|
|
msach@20
|
362 workerParamsArray[i].coreID = i;
|
|
msach@20
|
363 workerParamsArray[i].centralMsgExchange = centralMsgExchange;
|
|
msach@20
|
364 workerParamsArray[i].terminate = FALSE;
|
|
Me@4
|
365 }
|
|
msach@20
|
366 global_broadcast_counter = 0;
|
|
msach@20
|
367
|
|
msach@20
|
368 // init random number generator for wait and msg content
|
|
msach@20
|
369 seed1 = rand()%1000;
|
|
msach@20
|
370 seed2 = rand()%1000;
|
|
msach@7
|
371
|
|
kshalle@8
|
372 //save cycles before execution of threads, to get total exe cycles
|
|
kshalle@8
|
373 measurement_t *startExeCycles, *endExeCycles;
|
|
kshalle@8
|
374 startExeCycles = params->startExeCycles;
|
|
kshalle@8
|
375
|
|
msach@15
|
376 #ifdef MEASURE_PERF
|
|
kshalle@8
|
377 int nread = read(cycles_counter_main_fd, &(startExeCycles->cycles),
|
|
kshalle@8
|
378 sizeof(startExeCycles->cycles));
|
|
msach@9
|
379 if(nread<0) perror("Error reading cycles counter");
|
|
msach@15
|
380 #endif
|
|
msach@9
|
381
|
|
msach@9
|
382 //create (which starts running) all threads
|
|
msach@20
|
383 for(i=NUM_CORES-1; i>=0; i--)
|
|
msach@20
|
384 {
|
|
msach@20
|
385 VPThread__create_thread_with_affinity((VirtProcrFnPtr)worker_TLF,
|
|
msach@20
|
386 &(workerParamsArray[i]),
|
|
msach@20
|
387 animatingPr,
|
|
msach@20
|
388 i);//schedule to core i
|
|
kshalle@8
|
389 }
|
|
msach@6
|
390
|
|
msach@15
|
391 #ifdef MEASURE_PERF
|
|
msach@9
|
392 //endBarrierCycles read in barrier_wait()! Merten, email me if want to chg
|
|
kshalle@8
|
393 params->endExeCycles->cycles = barr.endBarrierCycles.cycles;
|
|
msach@15
|
394 #endif
|
|
kshalle@8
|
395
|
|
msach@20
|
396 barrier_wait(&barr, animatingPr);
|
|
msach@20
|
397 printf("Total broadcast count: %d\n", global_broadcast_counter);
|
|
msach@20
|
398
|
|
msach@20
|
399 //print send msgs
|
|
msach@20
|
400 /*
|
|
msach@20
|
401 printf("sendMsgs = []\n");
|
|
msach@20
|
402 for(i = 0; i<NUM_CORES; i++)
|
|
msach@20
|
403 {
|
|
msach@20
|
404 printf("sendMsgs.append([");
|
|
msach@20
|
405 for(idx = 0; idx< workerParamsArray[i].sendMsgs->numInArray; idx++)
|
|
msach@20
|
406 {
|
|
msach@20
|
407 printf("(%lu, %lu),",
|
|
msach@20
|
408 (uint64_t)(workerParamsArray[i].ptrToArrayOfSendMsgs[idx]) & 0xFFFFFFFF,
|
|
msach@20
|
409 ((uint64_t)(workerParamsArray[i].ptrToArrayOfSendMsgs[idx]) >> 32 ) & 0xFFFFFFFF);
|
|
msach@20
|
410 }
|
|
msach@20
|
411 printf("])\n");
|
|
msach@20
|
412 }
|
|
msach@20
|
413
|
|
msach@20
|
414
|
|
msach@20
|
415 //print received msgs
|
|
msach@20
|
416 printf("receivedMsgs = []\n");
|
|
msach@20
|
417 for(i = 0; i<NUM_CORES; i++)
|
|
msach@20
|
418 {
|
|
msach@20
|
419 printf("receivedMsgs.append([");
|
|
msach@20
|
420 for(idx = 0; idx< workerParamsArray[i].receivedMsgs->numInArray; idx++)
|
|
msach@20
|
421 {
|
|
msach@20
|
422 printf("(%lu, %lu),",
|
|
msach@20
|
423 (uint64_t)(workerParamsArray[i].ptrToArrayOfReceivedMsgs[idx]) & 0xFFFFFFFF,
|
|
msach@20
|
424 ((uint64_t)(workerParamsArray[i].ptrToArrayOfReceivedMsgs[idx]) >> 32 ) & 0xFFFFFFFF);
|
|
msach@20
|
425 }
|
|
msach@20
|
426 printf("])\n");
|
|
msach@20
|
427 }*/
|
|
Me@4
|
428
|
|
kshalle@8
|
429 /*
|
|
msach@6
|
430 uint64_t overallWorkCycles = 0;
|
|
msach@6
|
431 for(i=0; i<num_threads; i++){
|
|
msach@7
|
432 printf("WorkCycles: %lu\n",input[i].totalWorkCycles);
|
|
msach@6
|
433 overallWorkCycles += input[i].totalWorkCycles;
|
|
Me@4
|
434 }
|
|
msach@6
|
435
|
|
msach@6
|
436 printf("Sum across threads of work cycles: %lu\n", overallWorkCycles);
|
|
msach@6
|
437 printf("Total Execution: %lu\n", endBenchTime.cycles-startBenchTime.cycles);
|
|
kshalle@8
|
438 printf("Runtime/Workcycle Ratio %lu\n",
|
|
kshalle@8
|
439 ((endBenchTime.cycles-startBenchTime.cycles)*100)/overallWorkCycles);
|
|
kshalle@8
|
440 */
|
|
Me@4
|
441
|
|
Me@4
|
442 //======================================================
|
|
Me@4
|
443
|
|
Me@4
|
444 VPThread__dissipate_thread(animatingPr);
|
|
Me@4
|
445 }
|
|
Me@4
|
446
|
|
Me@4
|
447 int main(int argc, char **argv)
|
|
Me@4
|
448 {
|
|
Me@4
|
449 int i;
|
|
Me@4
|
450
|
|
Me@4
|
451 //set global static variables, based on cmd-line args
|
|
Me@4
|
452 for(i=1; i<argc; i++)
|
|
Me@4
|
453 {
|
|
Me@4
|
454 if(argv[i][0] == '-' && argv[i][2] == 0)
|
|
Me@4
|
455 {
|
|
Me@4
|
456 switch(argv[i][1])
|
|
Me@4
|
457 {
|
|
msach@20
|
458 case 'n':
|
|
Me@4
|
459 if(!isdigit(argv[++i][0]))
|
|
Me@4
|
460 {
|
|
msach@20
|
461 fprintf(stderr, "-t must be followed by the number messages to send per core\n");
|
|
Me@4
|
462 return EXIT_FAILURE;
|
|
Me@4
|
463 }
|
|
msach@20
|
464 num_msg_to_send = atoi(argv[i]);
|
|
msach@20
|
465 if(!num_msg_to_send)
|
|
Me@4
|
466 {
|
|
msach@20
|
467 fprintf(stderr, "invalid number of messages to send: %d\n", num_msg_to_send);
|
|
Me@4
|
468 return EXIT_FAILURE;
|
|
Me@4
|
469 }
|
|
Me@4
|
470 break;
|
|
Me@4
|
471 case 'h':
|
|
Me@4
|
472 fputs(usage, stdout);
|
|
msach@20
|
473 return 0;
|
|
Me@4
|
474 default:
|
|
Me@4
|
475 fprintf(stderr, "unrecognized argument: %s\n", argv[i]);
|
|
Me@4
|
476 fputs(usage, stderr);
|
|
Me@4
|
477 return EXIT_FAILURE;
|
|
Me@4
|
478 }//switch
|
|
Me@4
|
479 }//if arg
|
|
Me@4
|
480 else
|
|
Me@4
|
481 {
|
|
msach@20
|
482 fprintf(stderr, "unrecognized argument: %s\n", argv[i]);
|
|
msach@20
|
483 fputs(usage, stderr);
|
|
msach@20
|
484 return EXIT_FAILURE;
|
|
Me@4
|
485 }
|
|
Me@4
|
486 }//for
|
|
msach@7
|
487
|
|
kshalle@8
|
488
|
|
msach@15
|
489 #ifdef MEASURE_PERF
|
|
msach@7
|
490 //setup performance counters
|
|
msach@7
|
491 hw_event = malloc(sizeof(struct perf_event_attr));
|
|
msach@7
|
492 memset(hw_event,0,sizeof(struct perf_event_attr));
|
|
msach@7
|
493
|
|
msach@7
|
494 hw_event->type = PERF_TYPE_HARDWARE;
|
|
msach@7
|
495 hw_event->size = sizeof(hw_event);
|
|
msach@7
|
496 hw_event->disabled = 0;
|
|
msach@7
|
497 hw_event->freq = 0;
|
|
msach@7
|
498 hw_event->inherit = 1; /* children inherit it */
|
|
msach@7
|
499 hw_event->pinned = 1; /* says this virt counter must always be on HW */
|
|
msach@7
|
500 hw_event->exclusive = 0; /* only group on PMU */
|
|
msach@7
|
501 hw_event->exclude_user = 0; /* don't count user */
|
|
msach@7
|
502 hw_event->exclude_kernel = 1; /* don't count kernel */
|
|
msach@7
|
503 hw_event->exclude_hv = 1; /* ditto hypervisor */
|
|
msach@7
|
504 hw_event->exclude_idle = 1; /* don't count when idle */
|
|
msach@7
|
505 hw_event->mmap = 0; /* include mmap data */
|
|
msach@7
|
506 hw_event->comm = 0; /* include comm data */
|
|
msach@7
|
507
|
|
msach@7
|
508 hw_event->config = PERF_COUNT_HW_CPU_CYCLES; //cycles
|
|
msach@7
|
509
|
|
msach@7
|
510 int cpuID, retries;
|
|
msach@7
|
511
|
|
msach@7
|
512 for( cpuID = 0; cpuID < NUM_CORES; cpuID++ )
|
|
msach@7
|
513 { retries = 0;
|
|
msach@7
|
514 do
|
|
msach@7
|
515 { retries += 1;
|
|
msach@7
|
516 cycles_counter_fd[cpuID] =
|
|
msach@7
|
517 syscall(__NR_perf_event_open, hw_event,
|
|
msach@7
|
518 0,//pid_t: 0 is "pid of calling process"
|
|
msach@7
|
519 cpuID,//int: cpu, the value returned by "CPUID" instr(?)
|
|
msach@7
|
520 -1,//int: group_fd, -1 is "leader" or independent
|
|
msach@7
|
521 0//unsigned long: flags
|
|
msach@7
|
522 );
|
|
msach@7
|
523 }
|
|
msach@7
|
524 while(cycles_counter_fd[cpuID]<0 && retries < 100);
|
|
msach@7
|
525 if(retries >= 100)
|
|
msach@7
|
526 {
|
|
msach@7
|
527 fprintf(stderr,"On core %d: ",cpuID);
|
|
msach@7
|
528 perror("Failed to open cycles counter");
|
|
msach@7
|
529 }
|
|
msach@7
|
530 }
|
|
msach@7
|
531
|
|
msach@7
|
532 //Set up counter to accumulate total cycles to process, across all CPUs
|
|
msach@7
|
533
|
|
msach@7
|
534 retries = 0;
|
|
msach@7
|
535 do
|
|
msach@7
|
536 { retries += 1;
|
|
msach@7
|
537 cycles_counter_main_fd =
|
|
msach@7
|
538 syscall(__NR_perf_event_open, hw_event,
|
|
msach@7
|
539 0,//pid_t: 0 is "pid of calling process"
|
|
msach@7
|
540 -1,//int: cpu, -1 means accumulate from all cores
|
|
msach@7
|
541 -1,//int: group_fd, -1 is "leader" == independent
|
|
msach@7
|
542 0//unsigned long: flags
|
|
msach@7
|
543 );
|
|
msach@7
|
544 }
|
|
msach@7
|
545 while(cycles_counter_main_fd<0 && retries < 100);
|
|
msach@7
|
546 if(retries >= 100)
|
|
msach@7
|
547 {
|
|
msach@7
|
548 fprintf(stderr,"in main ");
|
|
msach@7
|
549 perror("Failed to open cycles counter");
|
|
msach@7
|
550 }
|
|
msach@15
|
551 #endif
|
|
kshalle@8
|
552
|
|
msach@9
|
553 measurement_t startExeCycles, endExeCycles;
|
|
msach@9
|
554 BenchParams *benchParams;
|
|
msach@9
|
555
|
|
msach@9
|
556 benchParams = malloc(sizeof(BenchParams));
|
|
msach@9
|
557
|
|
msach@9
|
558 benchParams->startExeCycles = &startExeCycles;
|
|
msach@9
|
559 benchParams->endExeCycles = &endExeCycles;
|
|
msach@9
|
560
|
|
msach@20
|
561 workerParamsArray = (WorkerParams *)malloc( (NUM_CORES) * sizeof(WorkerParams) );
|
|
kshalle@8
|
562 if(workerParamsArray == NULL ) printf("error mallocing worker params array\n");
|
|
kshalle@8
|
563
|
|
msach@9
|
564
|
|
kshalle@8
|
565 //This is the transition to the VMS runtime
|
|
kshalle@8
|
566 VPThread__create_seed_procr_and_do_work( &benchmark, benchParams );
|
|
kshalle@8
|
567
|
|
msach@15
|
568 #ifdef MEASURE_PERF
|
|
msach@9
|
569 uint64_t totalWorkCyclesAcrossCores = 0, totalBadCyclesAcrossCores = 0;
|
|
msach@9
|
570 uint64_t totalSyncCyclesAcrossCores = 0, totalBadSyncCyclesAcrossCores = 0;
|
|
kshalle@8
|
571 for(i=0; i<num_threads; i++){
|
|
kshalle@8
|
572 printf("WorkCycles: %lu\n",workerParamsArray[i].totalWorkCycles);
|
|
msach@9
|
573 // printf("Num Good Tasks: %lu\n",workerParamsArray[i].numGoodTasks);
|
|
msach@9
|
574 // printf("SyncCycles: %lu\n",workerParamsArray[i].totalSyncCycles);
|
|
msach@9
|
575 // printf("Num Good Syncs: %lu\n",workerParamsArray[i].numGoodSyncs);
|
|
kshalle@8
|
576 totalWorkCyclesAcrossCores += workerParamsArray[i].totalWorkCycles;
|
|
msach@9
|
577 totalBadCyclesAcrossCores += workerParamsArray[i].totalBadCycles;
|
|
msach@9
|
578 totalSyncCyclesAcrossCores += workerParamsArray[i].totalSyncCycles;
|
|
msach@9
|
579 totalBadSyncCyclesAcrossCores += workerParamsArray[i].totalBadSyncCycles;
|
|
kshalle@8
|
580 }
|
|
msach@7
|
581
|
|
kshalle@8
|
582 uint64_t totalExeCycles = endExeCycles.cycles - startExeCycles.cycles;
|
|
msach@9
|
583 totalExeCycles -= totalBadCyclesAcrossCores;
|
|
msach@10
|
584 uint64 totalOverhead = totalExeCycles - totalWorkCyclesAcrossCores;
|
|
msach@10
|
585 int32 numSyncs = outer_iters * num_threads * 2;
|
|
msach@10
|
586 printf("Total Execution Cycles: %lu\n", totalExeCycles);
|
|
kshalle@8
|
587 printf("Sum across threads of work cycles: %lu\n", totalWorkCyclesAcrossCores);
|
|
msach@10
|
588 printf("Sum across threads of bad work cycles: %lu\n", totalBadCyclesAcrossCores);
|
|
msach@10
|
589 // printf("Sum across threads of Bad Sync cycles: %lu\n", totalBadSyncCyclesAcrossCores);
|
|
msach@10
|
590 printf("Overhead per sync: %f\n", (double)totalOverhead / (double)numSyncs );
|
|
kshalle@8
|
591 printf("ExeCycles/WorkCycles Ratio %f\n",
|
|
kshalle@8
|
592 (double)totalExeCycles / (double)totalWorkCyclesAcrossCores);
|
|
msach@15
|
593 #else
|
|
msach@20
|
594 printf("#No measurement done!\n");
|
|
msach@15
|
595 #endif
|
|
Me@4
|
596 return 0;
|
|
msach@7
|
597 }
|