Mercurial > cgi-bin > hgwebdir.cgi > VMS > 2__runs_and_data
comparison scripts/overhead.py @ 1:75c55af8338f
Overhead.py: Script to generate the results of the exec_vs_task_size benchmarks
| author | Merten Sach <msach@mailbox.tu-berlin.de> |
|---|---|
| date | Fri, 09 Dec 2011 15:09:08 +0100 |
| parents | |
| children | 8323aae8c303 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:30682dd71414 |
|---|---|
| 1 #! /usr/bin/env python | |
| 2 # -*- coding: utf-8 -*- | |
| 3 | |
| 4 import sys | |
| 5 from re import match, search | |
| 6 from datetime import datetime | |
| 7 from subprocess import call,Popen,PIPE | |
| 8 | |
| 9 """ | |
| 10 This script generates a graph that represents the overhead | |
| 11 | |
| 12 involved in synchronisation operations | |
| 13 """ | |
| 14 | |
| 15 usage=""" | |
| 16 This runs the exec time vs task size in three levels of loop nest. The outer most iterates through | |
| 17 a selection of numbers-of-thread. For each of those, the next lever iterates over a number of work-loops-per-task | |
| 18 values. The innermost repeats several times and chooses the best. | |
| 19 Finally, it generates an output file for each value of number-of-threads that a companion gluplot script turns | |
| 20 into a .eps graph. | |
| 21 It is expected that the output directory's path is meaningful, such as machine-name, date, and so on | |
| 22 Usage: | |
| 23 overhead.py [executable binary] [path to output dir] | |
| 24 """ | |
| 25 | |
| 26 NUM_CORES = 4 #Number of Cores the code was compiled for | |
| 27 ITERS_PER_TASK_TABLE = [2, 5, 10, 20, 40, 80, 160, 320, 640] #Number of iterations of inner loop | |
| 28 TASKS_PER_THREAD = 30000 #Number of interations of outer loop | |
| 29 TOTAL_THREADS_TABLE = [8, 32, 128, 512] | |
| 30 | |
| 31 def getNumber(line): | |
| 32 match_obj = search("(\d+\.?\d*)", line) | |
| 33 if match_obj != None: | |
| 34 return match_obj.groups()[0] | |
| 35 else: | |
| 36 raise ValueError | |
| 37 | |
| 38 if len(sys.argv) != 3: | |
| 39 print usage | |
| 40 sys.exit(0) | |
| 41 | |
| 42 cmd=sys.argv[1] | |
| 43 try: | |
| 44 f = open(cmd) | |
| 45 except IOError: | |
| 46 print "Please provide a valid executable." | |
| 47 f.close() | |
| 48 sys.exit(1) | |
| 49 finally: | |
| 50 f.close() | |
| 51 | |
| 52 output_dir_path = sys.argv[2] | |
| 53 | |
| 54 #=================================================================== | |
| 55 # Done with parsing cmd line inputs, start doing the runs | |
| 56 # | |
| 57 | |
| 58 for totalThreads in TOTAL_THREADS_TABLE: | |
| 59 print "\nDoing run with %d threads" % totalThreads | |
| 60 output = "%s/%d_thds__o%d__perfCtrs.meas" % (output_dir_path, totalThreads, TASKS_PER_THREAD) | |
| 61 print "output file: %s" % output | |
| 62 threadsPerCore = totalThreads/NUM_CORES | |
| 63 array_of_results = [] | |
| 64 for workload_iterations_in_task in ITERS_PER_TASK_TABLE: | |
| 65 print "Run for %s workload iterations in a task" % workload_iterations_in_task | |
| 66 results = [] | |
| 67 for run in range(5): | |
| 68 print "Run %d" % run, | |
| 69 program_output = Popen("%s -t %d -i %d -o %d" % (cmd, | |
| 70 totalThreads, | |
| 71 workload_iterations_in_task, | |
| 72 TASKS_PER_THREAD), | |
| 73 stdout=PIPE, stderr=None, shell=True).stdout.read() | |
| 74 #parse arguments for | |
| 75 for line in program_output.split("\n"): | |
| 76 if match("^Sum across threads of work cycles:", line) != None: | |
| 77 total_workcycles = int(getNumber(line)) | |
| 78 if match("^Total Execution Cycles:", line) != None: | |
| 79 total_exe_cycles = int(getNumber(line)) | |
| 80 if match("^ExeCycles/WorkCycles Ratio", line) != None: | |
| 81 exeCycles_workCycles_ratio = float(getNumber(line)) | |
| 82 results.append({"total_workcycles" : total_workcycles, | |
| 83 "total_exe_cycles" : total_exe_cycles, | |
| 84 "exeCycles_workCycles_ratio" : exeCycles_workCycles_ratio}) | |
| 85 print "ratio %f" % exeCycles_workCycles_ratio | |
| 86 array_of_results.append(results) | |
| 87 | |
| 88 | |
| 89 #open gnuplot output | |
| 90 try: | |
| 91 gnuplot_output = open(output,"w") | |
| 92 except IOError: | |
| 93 print "Cannot open output file %s" % output | |
| 94 sys.exit(1) | |
| 95 | |
| 96 table_header = "# %20s\t%20s\t%20s\t%20s\t%20s\t%20s\t%20s\t%20s\n" % ( | |
| 97 "<iters per task>", | |
| 98 "<total exe cycles>", | |
| 99 "<total work cyc>", | |
| 100 "<one task cyc>", | |
| 101 "<total overhead cyc>", | |
| 102 "<num syncs>", | |
| 103 "<overhead per Sync cyc>", | |
| 104 "<Exe/Work ratio>") | |
| 105 | |
| 106 #write header to file | |
| 107 gnuplot_output.writelines(["# Output file name: %s\n" % output, | |
| 108 "# Date of Run: %s\n" % str(datetime.now()), | |
| 109 "# Number of Cores: %d\n" % NUM_CORES, | |
| 110 "# Number of Threads: %f per Core, %d total\n" % (threadsPerCore, totalThreads), | |
| 111 table_header, | |
| 112 "# " + (len(table_header)-3)*"-" + "\n"]) | |
| 113 | |
| 114 #Now print the results out | |
| 115 idx = -1 | |
| 116 for workload_iterations_in_task in ITERS_PER_TASK_TABLE: | |
| 117 idx += 1 | |
| 118 results = array_of_results[idx] | |
| 119 | |
| 120 #take shortest run | |
| 121 results.sort(lambda x,y: cmp(x["total_exe_cycles"],y["total_exe_cycles"])) | |
| 122 total_workcycles = results[0]["total_workcycles"] | |
| 123 total_exe_cycles = results[0]["total_exe_cycles"] | |
| 124 exeCycles_workCycles_ratio = results[0]["exeCycles_workCycles_ratio"] | |
| 125 | |
| 126 #Calculate numbers | |
| 127 overhead = total_exe_cycles - total_workcycles | |
| 128 total_syncs = totalThreads * TASKS_PER_THREAD * 2 | |
| 129 overhead_per_sync = float(overhead) / float(total_syncs) | |
| 130 cycles_of_task = float(total_workcycles) / float(TASKS_PER_THREAD * totalThreads) | |
| 131 overhead_per_core = float(overhead) / NUM_CORES | |
| 132 workcycles_per_core = total_workcycles / NUM_CORES | |
| 133 | |
| 134 gnuplot_output.write("%20d\t%20d\t%20d\t%20f\t%20d\t%20d\t%20f\t%20f\n" % ( | |
| 135 workload_iterations_in_task, | |
| 136 total_exe_cycles, | |
| 137 total_workcycles, | |
| 138 cycles_of_task, | |
| 139 overhead, | |
| 140 total_syncs, | |
| 141 overhead_per_sync, | |
| 142 exeCycles_workCycles_ratio)) | |
| 143 | |
| 144 gnuplot_output.close(); |
