msach@1: #! /usr/bin/env python
msach@1: # -*- coding: utf-8 -*-
msach@1: 
msach@1: import sys
msach@1: from re import match, search
msach@1: from datetime import datetime
msach@1: from subprocess import call,Popen,PIPE
msach@1: 
msach@1: """
msach@1: This script generates a graph that represents the overhead
msach@1: 
msach@1: involved in synchronisation operations
msach@1: """
msach@1: 
msach@1: usage="""
msach@1: 	This runs the exec time vs task size in three levels of loop nest.  The outer most iterates through 
msach@1: 	a selection of numbers-of-thread.  For each of those, the next lever iterates over a number of work-loops-per-task
msach@1: 	values.  The innermost repeats several times and chooses the best.
msach@1: 	Finally, it generates an output file for each value of number-of-threads that a companion gluplot script turns
msach@1: 	into a .eps graph.
msach@1: 	It is expected that the output directory's path is meaningful, such as machine-name, date, and so on
msach@1: 	Usage:
msach@1: 		overhead.py [executable binary] [path to output dir]
msach@1: """
msach@1: 
msach@1: NUM_CORES = 4 #Number of Cores the code was compiled for
msach@1: ITERS_PER_TASK_TABLE = [2, 5, 10, 20, 40, 80, 160, 320, 640] #Number of iterations of inner loop
msach@1: TASKS_PER_THREAD = 30000 #Number of interations of outer loop 
msach@1: TOTAL_THREADS_TABLE = [8, 32, 128, 512]
msach@1: 
msach@1: def getNumber(line):
msach@1: 	match_obj = search("(\d+\.?\d*)", line)
msach@1: 	if match_obj != None:
msach@1: 		return match_obj.groups()[0]
msach@1: 	else:
msach@1: 		raise ValueError
msach@1: 
msach@1: if len(sys.argv) != 3:
msach@1: 	print usage
msach@1: 	sys.exit(0)
msach@1:     
msach@1: cmd=sys.argv[1]
msach@1: try:
msach@1: 	f = open(cmd)
msach@1: except IOError:
msach@1: 	print "Please provide a valid executable."
msach@1: 	f.close()
msach@1: 	sys.exit(1)
msach@1: finally:
msach@1: 	f.close()
msach@1: 
msach@1: output_dir_path = sys.argv[2]
msach@1: 
msach@1: #===================================================================
msach@1: #  Done with parsing cmd line inputs, start doing the runs 
msach@1: #
msach@1: 
msach@1: for totalThreads in TOTAL_THREADS_TABLE:
msach@1: 	print "\nDoing run with %d threads" % totalThreads
msach@1: 	output = "%s/%d_thds__o%d__perfCtrs.meas" % (output_dir_path, totalThreads, TASKS_PER_THREAD)
msach@1: 	print "output file: %s" % output
msach@1: 	threadsPerCore = totalThreads/NUM_CORES
msach@1: 	array_of_results = []
msach@1: 	for workload_iterations_in_task in ITERS_PER_TASK_TABLE:
msach@1: 		print "Run for %s workload iterations in a task" % workload_iterations_in_task
msach@1: 		results = []
msach@1: 		for run in range(5):
msach@1: 			print "Run %d" % run,
msach@1: 			program_output = Popen("%s -t %d -i %d -o %d" % (cmd,
msach@1: 												totalThreads,
msach@1: 												workload_iterations_in_task,
msach@1: 												TASKS_PER_THREAD),
msach@1: 								stdout=PIPE, stderr=None, shell=True).stdout.read()
msach@1: 			#parse arguments for
msach@1: 			for line in program_output.split("\n"):
msach@1: 				if match("^Sum across threads of work cycles:", line) != None:
msach@1: 					total_workcycles = int(getNumber(line))
msach@1: 				if match("^Total Execution Cycles:", line) != None:
msach@1: 					total_exe_cycles = int(getNumber(line))
msach@1: 				if match("^ExeCycles/WorkCycles Ratio", line) != None:
msach@1: 					exeCycles_workCycles_ratio = float(getNumber(line))
msach@1: 			results.append({"total_workcycles"            : total_workcycles,
msach@1: 						"total_exe_cycles"            : total_exe_cycles,
msach@1: 						"exeCycles_workCycles_ratio" : exeCycles_workCycles_ratio})
msach@1: 			print "ratio %f" % exeCycles_workCycles_ratio
msach@1: 		array_of_results.append(results)
msach@1: 
msach@1: 
msach@1: 	#open gnuplot output
msach@1: 	try:
msach@1: 		gnuplot_output = open(output,"w")
msach@1: 	except IOError:
msach@1: 		print "Cannot open output file %s" % output
msach@1: 		sys.exit(1)
msach@1: 	
msach@1: 	table_header = "# %20s\t%20s\t%20s\t%20s\t%20s\t%20s\t%20s\t%20s\n" % (
msach@1: 							 "<iters per task>",
msach@1: 							 "<total exe cycles>",
msach@1: 							 "<total work cyc>",
msach@1: 							 "<one task cyc>",
msach@1: 							 "<total overhead cyc>",
msach@1: 							 "<num syncs>",
msach@1: 							 "<overhead per Sync cyc>",
msach@1: 							 "<Exe/Work ratio>")
msach@1:     
msach@1: 	#write header to file
msach@1: 	gnuplot_output.writelines(["# Output file name: %s\n" % output,
msach@1: 							"# Date of Run: %s\n" % str(datetime.now()),
msach@1: 							"# Number of Cores: %d\n" % NUM_CORES,
msach@1: 							"# Number of Threads: %f per Core, %d total\n" % (threadsPerCore, totalThreads),
msach@1: 							table_header,
msach@1: 							"# " + (len(table_header)-3)*"-" + "\n"])
msach@1: 
msach@1: 	#Now print the results out
msach@1: 	idx = -1		
msach@1: 	for workload_iterations_in_task in ITERS_PER_TASK_TABLE:
msach@1: 		idx += 1
msach@1: 		results = array_of_results[idx]
msach@1: 	
msach@1: 		#take shortest run
msach@1: 		results.sort(lambda x,y: cmp(x["total_exe_cycles"],y["total_exe_cycles"]))
msach@1: 		total_workcycles = results[0]["total_workcycles"]
msach@1: 		total_exe_cycles  = results[0]["total_exe_cycles"]
msach@3: 		#exeCycles_workCycles_ratio = results[0]["exeCycles_workCycles_ratio"]
msach@1: 	
msach@1: 		#Calculate numbers
msach@1: 		overhead             = total_exe_cycles - total_workcycles
msach@1: 		total_syncs          = totalThreads * TASKS_PER_THREAD * 2
msach@1: 		overhead_per_sync    = float(overhead) / float(total_syncs)
msach@1: 		cycles_of_task       = float(total_workcycles) / float(TASKS_PER_THREAD * totalThreads)
msach@1: 		overhead_per_core    = float(overhead) / NUM_CORES
msach@1: 		workcycles_per_core  = total_workcycles / NUM_CORES
msach@3: 		
msach@3: 		exeCycles_workCycles_ratio = float(total_workcycles+float(overhead)/2)/float(total_workcycles)
msach@1: 	
msach@1: 		gnuplot_output.write("%20d\t%20d\t%20d\t%20f\t%20d\t%20d\t%20f\t%20f\n" % (
msach@1: 						  workload_iterations_in_task,
msach@1: 						  total_exe_cycles,
msach@1: 						  total_workcycles,
msach@1: 						  cycles_of_task,
msach@1: 						  overhead,
msach@1: 						  total_syncs,
msach@1: 						  overhead_per_sync,
msach@1: 						  exeCycles_workCycles_ratio))
msach@1: 
msach@1: 	gnuplot_output.close();