| rev |
line source |
|
msach@1
|
1 #! /usr/bin/env python
|
|
msach@1
|
2 # -*- coding: utf-8 -*-
|
|
msach@1
|
3
|
|
msach@1
|
4 import sys
|
|
msach@1
|
5 from re import match, search
|
|
msach@1
|
6 from datetime import datetime
|
|
msach@1
|
7 from subprocess import call,Popen,PIPE
|
|
msach@1
|
8
|
|
msach@1
|
9 """
|
|
msach@1
|
10 This script generates a graph that represents the overhead
|
|
msach@1
|
11
|
|
msach@1
|
12 involved in synchronisation operations
|
|
msach@1
|
13 """
|
|
msach@1
|
14
|
|
msach@1
|
15 usage="""
|
|
msach@1
|
16 This runs the exec time vs task size in three levels of loop nest. The outer most iterates through
|
|
msach@1
|
17 a selection of numbers-of-thread. For each of those, the next lever iterates over a number of work-loops-per-task
|
|
msach@1
|
18 values. The innermost repeats several times and chooses the best.
|
|
msach@1
|
19 Finally, it generates an output file for each value of number-of-threads that a companion gluplot script turns
|
|
msach@1
|
20 into a .eps graph.
|
|
msach@1
|
21 It is expected that the output directory's path is meaningful, such as machine-name, date, and so on
|
|
msach@1
|
22 Usage:
|
|
msach@1
|
23 overhead.py [executable binary] [path to output dir]
|
|
msach@1
|
24 """
|
|
msach@1
|
25
|
|
msach@1
|
26 NUM_CORES = 4 #Number of Cores the code was compiled for
|
|
msach@1
|
27 ITERS_PER_TASK_TABLE = [2, 5, 10, 20, 40, 80, 160, 320, 640] #Number of iterations of inner loop
|
|
msach@1
|
28 TASKS_PER_THREAD = 30000 #Number of interations of outer loop
|
|
msach@1
|
29 TOTAL_THREADS_TABLE = [8, 32, 128, 512]
|
|
msach@1
|
30
|
|
msach@1
|
31 def getNumber(line):
|
|
msach@1
|
32 match_obj = search("(\d+\.?\d*)", line)
|
|
msach@1
|
33 if match_obj != None:
|
|
msach@1
|
34 return match_obj.groups()[0]
|
|
msach@1
|
35 else:
|
|
msach@1
|
36 raise ValueError
|
|
msach@1
|
37
|
|
msach@1
|
38 if len(sys.argv) != 3:
|
|
msach@1
|
39 print usage
|
|
msach@1
|
40 sys.exit(0)
|
|
msach@1
|
41
|
|
msach@1
|
42 cmd=sys.argv[1]
|
|
msach@1
|
43 try:
|
|
msach@1
|
44 f = open(cmd)
|
|
msach@1
|
45 except IOError:
|
|
msach@1
|
46 print "Please provide a valid executable."
|
|
msach@1
|
47 f.close()
|
|
msach@1
|
48 sys.exit(1)
|
|
msach@1
|
49 finally:
|
|
msach@1
|
50 f.close()
|
|
msach@1
|
51
|
|
msach@1
|
52 output_dir_path = sys.argv[2]
|
|
msach@1
|
53
|
|
msach@1
|
54 #===================================================================
|
|
msach@1
|
55 # Done with parsing cmd line inputs, start doing the runs
|
|
msach@1
|
56 #
|
|
msach@1
|
57
|
|
msach@1
|
58 for totalThreads in TOTAL_THREADS_TABLE:
|
|
msach@1
|
59 print "\nDoing run with %d threads" % totalThreads
|
|
msach@1
|
60 output = "%s/%d_thds__o%d__perfCtrs.meas" % (output_dir_path, totalThreads, TASKS_PER_THREAD)
|
|
msach@1
|
61 print "output file: %s" % output
|
|
msach@1
|
62 threadsPerCore = totalThreads/NUM_CORES
|
|
msach@1
|
63 array_of_results = []
|
|
msach@1
|
64 for workload_iterations_in_task in ITERS_PER_TASK_TABLE:
|
|
msach@1
|
65 print "Run for %s workload iterations in a task" % workload_iterations_in_task
|
|
msach@1
|
66 results = []
|
|
msach@1
|
67 for run in range(5):
|
|
msach@1
|
68 print "Run %d" % run,
|
|
msach@1
|
69 program_output = Popen("%s -t %d -i %d -o %d" % (cmd,
|
|
msach@1
|
70 totalThreads,
|
|
msach@1
|
71 workload_iterations_in_task,
|
|
msach@1
|
72 TASKS_PER_THREAD),
|
|
msach@1
|
73 stdout=PIPE, stderr=None, shell=True).stdout.read()
|
|
msach@1
|
74 #parse arguments for
|
|
msach@1
|
75 for line in program_output.split("\n"):
|
|
msach@1
|
76 if match("^Sum across threads of work cycles:", line) != None:
|
|
msach@1
|
77 total_workcycles = int(getNumber(line))
|
|
msach@1
|
78 if match("^Total Execution Cycles:", line) != None:
|
|
msach@1
|
79 total_exe_cycles = int(getNumber(line))
|
|
msach@1
|
80 if match("^ExeCycles/WorkCycles Ratio", line) != None:
|
|
msach@1
|
81 exeCycles_workCycles_ratio = float(getNumber(line))
|
|
msach@1
|
82 results.append({"total_workcycles" : total_workcycles,
|
|
msach@1
|
83 "total_exe_cycles" : total_exe_cycles,
|
|
msach@1
|
84 "exeCycles_workCycles_ratio" : exeCycles_workCycles_ratio})
|
|
msach@1
|
85 print "ratio %f" % exeCycles_workCycles_ratio
|
|
msach@1
|
86 array_of_results.append(results)
|
|
msach@1
|
87
|
|
msach@1
|
88
|
|
msach@1
|
89 #open gnuplot output
|
|
msach@1
|
90 try:
|
|
msach@1
|
91 gnuplot_output = open(output,"w")
|
|
msach@1
|
92 except IOError:
|
|
msach@1
|
93 print "Cannot open output file %s" % output
|
|
msach@1
|
94 sys.exit(1)
|
|
msach@1
|
95
|
|
msach@1
|
96 table_header = "# %20s\t%20s\t%20s\t%20s\t%20s\t%20s\t%20s\t%20s\n" % (
|
|
msach@1
|
97 "<iters per task>",
|
|
msach@1
|
98 "<total exe cycles>",
|
|
msach@1
|
99 "<total work cyc>",
|
|
msach@1
|
100 "<one task cyc>",
|
|
msach@1
|
101 "<total overhead cyc>",
|
|
msach@1
|
102 "<num syncs>",
|
|
msach@1
|
103 "<overhead per Sync cyc>",
|
|
msach@1
|
104 "<Exe/Work ratio>")
|
|
msach@1
|
105
|
|
msach@1
|
106 #write header to file
|
|
msach@1
|
107 gnuplot_output.writelines(["# Output file name: %s\n" % output,
|
|
msach@1
|
108 "# Date of Run: %s\n" % str(datetime.now()),
|
|
msach@1
|
109 "# Number of Cores: %d\n" % NUM_CORES,
|
|
msach@1
|
110 "# Number of Threads: %f per Core, %d total\n" % (threadsPerCore, totalThreads),
|
|
msach@1
|
111 table_header,
|
|
msach@1
|
112 "# " + (len(table_header)-3)*"-" + "\n"])
|
|
msach@1
|
113
|
|
msach@1
|
114 #Now print the results out
|
|
msach@1
|
115 idx = -1
|
|
msach@1
|
116 for workload_iterations_in_task in ITERS_PER_TASK_TABLE:
|
|
msach@1
|
117 idx += 1
|
|
msach@1
|
118 results = array_of_results[idx]
|
|
msach@1
|
119
|
|
msach@1
|
120 #take shortest run
|
|
msach@1
|
121 results.sort(lambda x,y: cmp(x["total_exe_cycles"],y["total_exe_cycles"]))
|
|
msach@1
|
122 total_workcycles = results[0]["total_workcycles"]
|
|
msach@1
|
123 total_exe_cycles = results[0]["total_exe_cycles"]
|
|
msach@3
|
124 #exeCycles_workCycles_ratio = results[0]["exeCycles_workCycles_ratio"]
|
|
msach@1
|
125
|
|
msach@1
|
126 #Calculate numbers
|
|
msach@1
|
127 overhead = total_exe_cycles - total_workcycles
|
|
msach@1
|
128 total_syncs = totalThreads * TASKS_PER_THREAD * 2
|
|
msach@1
|
129 overhead_per_sync = float(overhead) / float(total_syncs)
|
|
msach@1
|
130 cycles_of_task = float(total_workcycles) / float(TASKS_PER_THREAD * totalThreads)
|
|
msach@1
|
131 overhead_per_core = float(overhead) / NUM_CORES
|
|
msach@1
|
132 workcycles_per_core = total_workcycles / NUM_CORES
|
|
msach@3
|
133
|
|
msach@3
|
134 exeCycles_workCycles_ratio = float(total_workcycles+float(overhead)/2)/float(total_workcycles)
|
|
msach@1
|
135
|
|
msach@1
|
136 gnuplot_output.write("%20d\t%20d\t%20d\t%20f\t%20d\t%20d\t%20f\t%20f\n" % (
|
|
msach@1
|
137 workload_iterations_in_task,
|
|
msach@1
|
138 total_exe_cycles,
|
|
msach@1
|
139 total_workcycles,
|
|
msach@1
|
140 cycles_of_task,
|
|
msach@1
|
141 overhead,
|
|
msach@1
|
142 total_syncs,
|
|
msach@1
|
143 overhead_per_sync,
|
|
msach@1
|
144 exeCycles_workCycles_ratio))
|
|
msach@1
|
145
|
|
msach@1
|
146 gnuplot_output.close();
|