# HG changeset patch # User Merten Sach # Date 1328542176 -3600 # Node ID db3409eab3220978236f85191e4c4cfcf9cb63e4 # Parent 3716aef85ba71b06da1ee51e4b21d15a32ba67df exec_time_vs_task_size: data generation script for xoanon, because of higher core count diff -r 3716aef85ba7 -r db3409eab322 scripts/overhead_2_tasks_per_outer_iter.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/scripts/overhead_2_tasks_per_outer_iter.py Mon Feb 06 16:29:36 2012 +0100 @@ -0,0 +1,147 @@ +#! /usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +from re import match, search +from datetime import datetime +from subprocess import call,Popen,PIPE + +""" +This script generates a graph that represents the overhead + +involved in synchronisation operations +""" + +usage=""" + This runs the exec time vs task size in three levels of loop nest. The outer most iterates through + a selection of numbers-of-thread. For each of those, the next lever iterates over a number of work-loops-per-task + values. The innermost repeats several times and chooses the best. + Finally, it generates an output file for each value of number-of-threads that a companion gluplot script turns + into a .eps graph. + It is expected that the output directory's path is meaningful, such as machine-name, date, and so on + Usage: + overhead.py [executable binary] [path to output dir] +""" + +NUM_CORES = 4 #Number of Cores the code was compiled for +ITERS_PER_TASK_TABLE = [2, 5, 10, 20, 40, 80, 160, 320, 640] #Number of iterations of inner loop +ITERS_PER_OUTER_LOOP = 30000 #Number of interations of outer loop +TOTAL_THREADS_TABLE = [8, 32, 128, 512] + +tasks_per_thread = ITERS_PER_OUTER_LOOP * 2 + +def getNumber(line): + match_obj = search("(\d+\.?\d*)", line) + if match_obj != None: + return match_obj.groups()[0] + else: + raise ValueError + +if len(sys.argv) != 3: + print usage + sys.exit(0) + +cmd=sys.argv[1] +try: + f = open(cmd) +except IOError: + print "Please provide a valid executable." + f.close() + sys.exit(1) +finally: + f.close() + +output_dir_path = sys.argv[2] + +#=================================================================== +# Done with parsing cmd line inputs, start doing the runs +# + +for totalThreads in TOTAL_THREADS_TABLE: + print "\nDoing run with %d threads" % totalThreads + output = "%s/%d_thds__o%d__perfCtrs.meas" % (output_dir_path, totalThreads, ITERS_PER_OUTER_LOOP) + print "output file: %s" % output + threadsPerCore = totalThreads/NUM_CORES + array_of_results = [] + for workload_iterations_in_task in ITERS_PER_TASK_TABLE: + print "Run for %s workload iterations in a task" % workload_iterations_in_task + results = [] + for run in range(5): + print "Run %d" % run, + program_output = Popen("%s -t %d -i %d -o %d" % (cmd, + totalThreads, + workload_iterations_in_task, + ITERS_PER_OUTER_LOOP), + stdout=PIPE, stderr=None, shell=True).stdout.read() + #parse arguments for + for line in program_output.split("\n"): + if match("^Sum across threads of work cycles:", line) != None: + total_workcycles = int(getNumber(line)) + if match("^Total Execution Cycles:", line) != None: + total_exe_cycles = int(getNumber(line)) + if match("^ExeCycles/WorkCycles Ratio", line) != None: + exeCycles_workCycles_ratio = float(getNumber(line)) + results.append({"total_workcycles" : total_workcycles, + "total_exe_cycles" : total_exe_cycles, + "exeCycles_workCycles_ratio" : exeCycles_workCycles_ratio}) + print "ratio %f" % exeCycles_workCycles_ratio + array_of_results.append(results) + + + #open gnuplot output + try: + gnuplot_output = open(output,"w") + except IOError: + print "Cannot open output file %s" % output + sys.exit(1) + + table_header = "# %20s\t%20s\t%20s\t%20s\t%20s\t%20s\t%20s\t%20s\n" % ( + "", + "", + "", + "", + "", + "", + "", + "") + + #write header to file + gnuplot_output.writelines(["# Output file name: %s\n" % output, + "# Date of Run: %s\n" % str(datetime.now()), + "# Number of Cores: %d\n" % NUM_CORES, + "# Number of Threads: %f per Core, %d total\n" % (threadsPerCore, totalThreads), + table_header, + "# " + (len(table_header)-3)*"-" + "\n"]) + + #Now print the results out + idx = -1 + for workload_iterations_in_task in ITERS_PER_TASK_TABLE: + idx += 1 + results = array_of_results[idx] + + #take shortest run + results.sort(lambda x,y: cmp(x["total_exe_cycles"],y["total_exe_cycles"])) + total_workcycles = results[0]["total_workcycles"] + total_exe_cycles = results[0]["total_exe_cycles"] + #exeCycles_workCycles_ratio = results[0]["exeCycles_workCycles_ratio"] + exeCycles_workCycles_ratio = float(total_exe_cycles)/float(total_workcycles) + + #Calculate numbers + overhead = total_exe_cycles - total_workcycles + total_syncs = totalThreads * tasks_per_thread + overhead_per_sync = float(overhead) / float(total_syncs) + cycles_of_task = float(total_workcycles) / float(tasks_per_thread * totalThreads) + overhead_per_core = float(overhead) / NUM_CORES + workcycles_per_core = total_workcycles / NUM_CORES + + gnuplot_output.write("%20d\t%20d\t%20d\t%20f\t%20d\t%20d\t%20f\t%20f\n" % ( + workload_iterations_in_task, + total_exe_cycles, + total_workcycles, + cycles_of_task, + overhead, + total_syncs, + overhead_per_sync, + exeCycles_workCycles_ratio)) + + gnuplot_output.close();