# HG changeset patch
# User Merten Sach <msach@mailbox.tu-berlin.de>
# Date 1328542176 -3600
# Node ID db3409eab3220978236f85191e4c4cfcf9cb63e4
# Parent  3716aef85ba71b06da1ee51e4b21d15a32ba67df
exec_time_vs_task_size: data generation script for xoanon, because of higher core count

diff -r 3716aef85ba7 -r db3409eab322 scripts/overhead_2_tasks_per_outer_iter.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/scripts/overhead_2_tasks_per_outer_iter.py	Mon Feb 06 16:29:36 2012 +0100
@@ -0,0 +1,147 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+from re import match, search
+from datetime import datetime
+from subprocess import call,Popen,PIPE
+
+"""
+This script generates a graph that represents the overhead
+
+involved in synchronisation operations
+"""
+
+usage="""
+	This runs the exec time vs task size in three levels of loop nest.  The outer most iterates through 
+	a selection of numbers-of-thread.  For each of those, the next lever iterates over a number of work-loops-per-task
+	values.  The innermost repeats several times and chooses the best.
+	Finally, it generates an output file for each value of number-of-threads that a companion gluplot script turns
+	into a .eps graph.
+	It is expected that the output directory's path is meaningful, such as machine-name, date, and so on
+	Usage:
+		overhead.py [executable binary] [path to output dir]
+"""
+
+NUM_CORES = 4 #Number of Cores the code was compiled for
+ITERS_PER_TASK_TABLE = [2, 5, 10, 20, 40, 80, 160, 320, 640] #Number of iterations of inner loop
+ITERS_PER_OUTER_LOOP = 30000 #Number of interations of outer loop 
+TOTAL_THREADS_TABLE = [8, 32, 128, 512]
+
+tasks_per_thread = ITERS_PER_OUTER_LOOP * 2
+
+def getNumber(line):
+	match_obj = search("(\d+\.?\d*)", line)
+	if match_obj != None:
+		return match_obj.groups()[0]
+	else:
+		raise ValueError
+
+if len(sys.argv) != 3:
+	print usage
+	sys.exit(0)
+    
+cmd=sys.argv[1]
+try:
+	f = open(cmd)
+except IOError:
+	print "Please provide a valid executable."
+	f.close()
+	sys.exit(1)
+finally:
+	f.close()
+
+output_dir_path = sys.argv[2]
+
+#===================================================================
+#  Done with parsing cmd line inputs, start doing the runs 
+#
+
+for totalThreads in TOTAL_THREADS_TABLE:
+	print "\nDoing run with %d threads" % totalThreads
+	output = "%s/%d_thds__o%d__perfCtrs.meas" % (output_dir_path, totalThreads, ITERS_PER_OUTER_LOOP)
+	print "output file: %s" % output
+	threadsPerCore = totalThreads/NUM_CORES
+	array_of_results = []
+	for workload_iterations_in_task in ITERS_PER_TASK_TABLE:
+		print "Run for %s workload iterations in a task" % workload_iterations_in_task
+		results = []
+		for run in range(5):
+			print "Run %d" % run,
+			program_output = Popen("%s -t %d -i %d -o %d" % (cmd,
+												totalThreads,
+												workload_iterations_in_task,
+												ITERS_PER_OUTER_LOOP),
+								stdout=PIPE, stderr=None, shell=True).stdout.read()
+			#parse arguments for
+			for line in program_output.split("\n"):
+				if match("^Sum across threads of work cycles:", line) != None:
+					total_workcycles = int(getNumber(line))
+				if match("^Total Execution Cycles:", line) != None:
+					total_exe_cycles = int(getNumber(line))
+				if match("^ExeCycles/WorkCycles Ratio", line) != None:
+					exeCycles_workCycles_ratio = float(getNumber(line))
+			results.append({"total_workcycles"            : total_workcycles,
+						"total_exe_cycles"            : total_exe_cycles,
+						"exeCycles_workCycles_ratio" : exeCycles_workCycles_ratio})
+			print "ratio %f" % exeCycles_workCycles_ratio
+		array_of_results.append(results)
+
+
+	#open gnuplot output
+	try:
+		gnuplot_output = open(output,"w")
+	except IOError:
+		print "Cannot open output file %s" % output
+		sys.exit(1)
+	
+	table_header = "# %20s\t%20s\t%20s\t%20s\t%20s\t%20s\t%20s\t%20s\n" % (
+							 "<iters per task>",
+							 "<total exe cycles>",
+							 "<total work cyc>",
+							 "<one task cyc>",
+							 "<total overhead cyc>",
+							 "<num syncs>",
+							 "<overhead per Sync cyc>",
+							 "<Exe/Work ratio>")
+    
+	#write header to file
+	gnuplot_output.writelines(["# Output file name: %s\n" % output,
+							"# Date of Run: %s\n" % str(datetime.now()),
+							"# Number of Cores: %d\n" % NUM_CORES,
+							"# Number of Threads: %f per Core, %d total\n" % (threadsPerCore, totalThreads),
+							table_header,
+							"# " + (len(table_header)-3)*"-" + "\n"])
+
+	#Now print the results out
+	idx = -1		
+	for workload_iterations_in_task in ITERS_PER_TASK_TABLE:
+		idx += 1
+		results = array_of_results[idx]
+	
+		#take shortest run
+		results.sort(lambda x,y: cmp(x["total_exe_cycles"],y["total_exe_cycles"]))
+		total_workcycles = results[0]["total_workcycles"]
+		total_exe_cycles  = results[0]["total_exe_cycles"]
+		#exeCycles_workCycles_ratio = results[0]["exeCycles_workCycles_ratio"]
+		exeCycles_workCycles_ratio = float(total_exe_cycles)/float(total_workcycles)
+	
+		#Calculate numbers
+		overhead             = total_exe_cycles - total_workcycles
+		total_syncs          = totalThreads * tasks_per_thread
+		overhead_per_sync    = float(overhead) / float(total_syncs)
+		cycles_of_task       = float(total_workcycles) / float(tasks_per_thread * totalThreads)
+		overhead_per_core    = float(overhead) / NUM_CORES
+		workcycles_per_core  = total_workcycles / NUM_CORES
+	
+		gnuplot_output.write("%20d\t%20d\t%20d\t%20f\t%20d\t%20d\t%20f\t%20f\n" % (
+						  workload_iterations_in_task,
+						  total_exe_cycles,
+						  total_workcycles,
+						  cycles_of_task,
+						  overhead,
+						  total_syncs,
+						  overhead_per_sync,
+						  exeCycles_workCycles_ratio))
+
+	gnuplot_output.close();