msach@1: #! /usr/bin/env python msach@1: # -*- coding: utf-8 -*- msach@1: msach@1: import sys msach@1: from re import match, search msach@1: from datetime import datetime msach@1: from subprocess import call,Popen,PIPE msach@1: msach@1: """ msach@1: This script generates a graph that represents the overhead msach@1: msach@1: involved in synchronisation operations msach@1: """ msach@1: msach@1: usage=""" msach@1: This runs the exec time vs task size in three levels of loop nest. The outer most iterates through msach@1: a selection of numbers-of-thread. For each of those, the next lever iterates over a number of work-loops-per-task msach@1: values. The innermost repeats several times and chooses the best. msach@1: Finally, it generates an output file for each value of number-of-threads that a companion gluplot script turns msach@1: into a .eps graph. msach@1: It is expected that the output directory's path is meaningful, such as machine-name, date, and so on msach@1: Usage: msach@1: overhead.py [executable binary] [path to output dir] msach@1: """ msach@1: msach@1: NUM_CORES = 4 #Number of Cores the code was compiled for msach@1: ITERS_PER_TASK_TABLE = [2, 5, 10, 20, 40, 80, 160, 320, 640] #Number of iterations of inner loop msach@1: TASKS_PER_THREAD = 30000 #Number of interations of outer loop msach@1: TOTAL_THREADS_TABLE = [8, 32, 128, 512] msach@1: msach@1: def getNumber(line): msach@1: match_obj = search("(\d+\.?\d*)", line) msach@1: if match_obj != None: msach@1: return match_obj.groups()[0] msach@1: else: msach@1: raise ValueError msach@1: msach@1: if len(sys.argv) != 3: msach@1: print usage msach@1: sys.exit(0) msach@1: msach@1: cmd=sys.argv[1] msach@1: try: msach@1: f = open(cmd) msach@1: except IOError: msach@1: print "Please provide a valid executable." msach@1: f.close() msach@1: sys.exit(1) msach@1: finally: msach@1: f.close() msach@1: msach@1: output_dir_path = sys.argv[2] msach@1: msach@1: #=================================================================== msach@1: # Done with parsing cmd line inputs, start doing the runs msach@1: # msach@1: msach@1: for totalThreads in TOTAL_THREADS_TABLE: msach@1: print "\nDoing run with %d threads" % totalThreads msach@1: output = "%s/%d_thds__o%d__perfCtrs.meas" % (output_dir_path, totalThreads, TASKS_PER_THREAD) msach@1: print "output file: %s" % output msach@1: threadsPerCore = totalThreads/NUM_CORES msach@1: array_of_results = [] msach@1: for workload_iterations_in_task in ITERS_PER_TASK_TABLE: msach@1: print "Run for %s workload iterations in a task" % workload_iterations_in_task msach@1: results = [] msach@1: for run in range(5): msach@1: print "Run %d" % run, msach@1: program_output = Popen("%s -t %d -i %d -o %d" % (cmd, msach@1: totalThreads, msach@1: workload_iterations_in_task, msach@1: TASKS_PER_THREAD), msach@1: stdout=PIPE, stderr=None, shell=True).stdout.read() msach@1: #parse arguments for msach@1: for line in program_output.split("\n"): msach@1: if match("^Sum across threads of work cycles:", line) != None: msach@1: total_workcycles = int(getNumber(line)) msach@1: if match("^Total Execution Cycles:", line) != None: msach@1: total_exe_cycles = int(getNumber(line)) msach@1: if match("^ExeCycles/WorkCycles Ratio", line) != None: msach@1: exeCycles_workCycles_ratio = float(getNumber(line)) msach@1: results.append({"total_workcycles" : total_workcycles, msach@1: "total_exe_cycles" : total_exe_cycles, msach@1: "exeCycles_workCycles_ratio" : exeCycles_workCycles_ratio}) msach@1: print "ratio %f" % exeCycles_workCycles_ratio msach@1: array_of_results.append(results) msach@1: msach@1: msach@1: #open gnuplot output msach@1: try: msach@1: gnuplot_output = open(output,"w") msach@1: except IOError: msach@1: print "Cannot open output file %s" % output msach@1: sys.exit(1) msach@1: msach@1: table_header = "# %20s\t%20s\t%20s\t%20s\t%20s\t%20s\t%20s\t%20s\n" % ( msach@1: "", msach@1: "", msach@1: "", msach@1: "", msach@1: "", msach@1: "", msach@1: "", msach@1: "") msach@1: msach@1: #write header to file msach@1: gnuplot_output.writelines(["# Output file name: %s\n" % output, msach@1: "# Date of Run: %s\n" % str(datetime.now()), msach@1: "# Number of Cores: %d\n" % NUM_CORES, msach@1: "# Number of Threads: %f per Core, %d total\n" % (threadsPerCore, totalThreads), msach@1: table_header, msach@1: "# " + (len(table_header)-3)*"-" + "\n"]) msach@1: msach@1: #Now print the results out msach@1: idx = -1 msach@1: for workload_iterations_in_task in ITERS_PER_TASK_TABLE: msach@1: idx += 1 msach@1: results = array_of_results[idx] msach@1: msach@1: #take shortest run msach@1: results.sort(lambda x,y: cmp(x["total_exe_cycles"],y["total_exe_cycles"])) msach@1: total_workcycles = results[0]["total_workcycles"] msach@1: total_exe_cycles = results[0]["total_exe_cycles"] msach@3: #exeCycles_workCycles_ratio = results[0]["exeCycles_workCycles_ratio"] msach@1: msach@1: #Calculate numbers msach@1: overhead = total_exe_cycles - total_workcycles msach@1: total_syncs = totalThreads * TASKS_PER_THREAD * 2 msach@1: overhead_per_sync = float(overhead) / float(total_syncs) msach@1: cycles_of_task = float(total_workcycles) / float(TASKS_PER_THREAD * totalThreads) msach@1: overhead_per_core = float(overhead) / NUM_CORES msach@1: workcycles_per_core = total_workcycles / NUM_CORES msach@3: msach@3: exeCycles_workCycles_ratio = float(total_workcycles+float(overhead)/2)/float(total_workcycles) msach@1: msach@1: gnuplot_output.write("%20d\t%20d\t%20d\t%20f\t%20d\t%20d\t%20f\t%20f\n" % ( msach@1: workload_iterations_in_task, msach@1: total_exe_cycles, msach@1: total_workcycles, msach@1: cycles_of_task, msach@1: overhead, msach@1: total_syncs, msach@1: overhead_per_sync, msach@1: exeCycles_workCycles_ratio)) msach@1: msach@1: gnuplot_output.close();