diff VMS.h @ 208:eaf7e4c58c9e

Create common_ancestor brch -- all branches will be closed, then new ones created with this as the common ancestor of all branches -- it is incomplete! only code that is common to all HW and Feat and FeatDev branches is in here
author Some Random Person <seanhalle@yahoo.com>
date Wed, 22 Feb 2012 11:39:12 -0800
parents
children 0c83ea8adefc
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/VMS.h	Wed Feb 22 11:39:12 2012 -0800
     1.3 @@ -0,0 +1,377 @@
     1.4 +/*
     1.5 + *  Copyright 2009 OpenSourceStewardshipFoundation.org
     1.6 + *  Licensed under GNU General Public License version 2
     1.7 + *
     1.8 + * Author: seanhalle@yahoo.com
     1.9 + * 
    1.10 + */
    1.11 +
    1.12 +#ifndef _VMS_H
    1.13 +#define	_VMS_H
    1.14 +#define _GNU_SOURCE
    1.15 +
    1.16 +#include "VMS_primitive_data_types.h"
    1.17 +#include "C_Libraries/DynArray/DynArray.h"
    1.18 +#include "C_Libraries/Hash_impl/PrivateHash.h"
    1.19 +#include "C_Libraries/Histogram/Histogram.h"
    1.20 +#include "C_Libraries/Queue_impl/PrivateQueue.h"
    1.21 +#include "vmalloc.h"
    1.22 +
    1.23 +#include <pthread.h>
    1.24 +#include <sys/time.h>
    1.25 +
    1.26 +//=================  Defines: included from separate files  =================
    1.27 +//
    1.28 +// Note: ALL defines are in other files, none are in here
    1.29 +//
    1.30 +#include "VMS_defs__main.h"
    1.31 +
    1.32 +
    1.33 +//================================ Typedefs =================================
    1.34 +//
    1.35 +typedef unsigned long long TSCount;
    1.36 +typedef union
    1.37 + { uint32 lowHigh[2];
    1.38 +   uint64 longVal;
    1.39 + }
    1.40 +TSCountLowHigh;
    1.41 +
    1.42 +typedef struct _SchedSlot     SchedSlot;
    1.43 +typedef struct _VMSReqst      VMSReqst;
    1.44 +typedef struct _SlaveVP       SlaveVP;
    1.45 +typedef struct _MasterVP      MasterVP;
    1.46 +typedef struct _IntervalProbe IntervalProbe;
    1.47 +typedef struct _GateStruc     GateStruc;
    1.48 +
    1.49 +
    1.50 +typedef SlaveVP * (*Sched_Assigner)  ( void *, int );   //semEnv, coreIdx
    1.51 +typedef void  (*RequestHandler)  ( SlaveVP *, void * ); //prWReqst, semEnv
    1.52 +typedef void  (*TopLevelFnPtr)  ( void *, SlaveVP * ); //initData, animPr
    1.53 +typedef void    TopLevelFn      ( void *, SlaveVP * ); //initData, animPr
    1.54 +typedef void  (*ResumeVPFnPtr)   ( SlaveVP *, void * );
    1.55 +
    1.56 +//============================= Statistics ==================================
    1.57 +
    1.58 +inline TSCount getTSCount();
    1.59 +
    1.60 +//============= Request Related ===========
    1.61 +//
    1.62 +
    1.63 +enum VMSReqstType   //avoid starting enums at 0, for debug reasons
    1.64 + {
    1.65 +   semantic = 1,
    1.66 +   createReq,
    1.67 +   dissipate,
    1.68 +   VMSSemantic      //goes with VMSSemReqst below
    1.69 + };
    1.70 +
    1.71 +struct _VMSReqst
    1.72 + {
    1.73 +   enum VMSReqstType  reqType;//used for dissipate and in future for IO requests
    1.74 +   void              *semReqData;
    1.75 +
    1.76 +   VMSReqst *nextReqst;
    1.77 + };
    1.78 +//VMSReqst
    1.79 +
    1.80 +enum VMSSemReqstType   //These are equivalent to semantic requests, but for
    1.81 + {                     // VMS's services available directly to app, like OS
    1.82 +   createProbe = 1,    // and probe services -- like a VMS-wide built-in lang
    1.83 +   openFile,
    1.84 +   otherIO
    1.85 + };
    1.86 +
    1.87 +typedef struct
    1.88 + { enum VMSSemReqstType reqType;
    1.89 +   SlaveVP           *requestingPr;
    1.90 +   char                *nameStr;  //for create probe
    1.91 + }
    1.92 + VMSSemReq;
    1.93 +
    1.94 +
    1.95 +//====================  Core data structures  ===================
    1.96 +
    1.97 +struct _SchedSlot
    1.98 + {
    1.99 +   int         workIsDone;
   1.100 +   int         needsProcrAssigned;
   1.101 +   SlaveVP  *procrAssignedToSlot;
   1.102 + };
   1.103 +//SchedSlot
   1.104 +
   1.105 +/*WARNING: re-arranging this data structure could cause VP switching
   1.106 + *         assembly code to fail -- hard-codes offsets of fields
   1.107 + */
   1.108 +struct _SlaveVP
   1.109 + { int         procrID;  //for debugging -- count up each time create
   1.110 +   int         coreAnimatedBy;
   1.111 +   void       *startOfStack;
   1.112 +   void       *stackPtr;
   1.113 +   void       *framePtr;
   1.114 +   void       *resumeInstrPtr;
   1.115 +   
   1.116 +   void       *coreLoopStartPt;  //allows proto-runtime to be linked later
   1.117 +   void       *coreLoopFramePtr; //restore before jmp back to core loop
   1.118 +   void       *coreLoopStackPtr; //restore before jmp back to core loop
   1.119 +
   1.120 +   SchedSlot  *schedSlot;
   1.121 +   VMSReqst   *requests;
   1.122 +
   1.123 +   void       *semanticData; //this livesUSE_GNU here for the life of VP
   1.124 +   void       *dataRetFromReq;//values returned from plugin to VP go here
   1.125 +
   1.126 +      //=========== MEASUREMENT STUFF ==========
   1.127 +       #ifdef MEAS__TIME_STAMP_SUSP
   1.128 +       uint32  preSuspTSCLow;
   1.129 +       uint32  postSuspTSCLow;
   1.130 +       #endif
   1.131 +       #ifdef MEAS__TIME_MASTER /* in SlaveVP because multiple masterVPs*/
   1.132 +       uint32  startMasterTSCLow;USE_GNU
   1.133 +       uint32  endMasterTSCLow;
   1.134 +       #endif
   1.135 +       #ifdef MEAS__TIME_2011_SYS
   1.136 +       TSCountLowHigh  startSusp;
   1.137 +       uint64  totalSuspCycles;
   1.138 +       uint32  numGoodSusp;
   1.139 +       #endif
   1.140 +      //========================================
   1.141 +   
   1.142 +   float64      createPtInSecs;  //have space but don't use on some configs
   1.143 + };
   1.144 +//SlaveVP
   1.145 +
   1.146 +
   1.147 +/*WARNING: re-arranging this data structure could cause VP-switching
   1.148 + *         assembly code to fail -- hard-codes offsets of fields
   1.149 + *         (because -O3 messes with things otherwise)
   1.150 + */
   1.151 +typedef struct
   1.152 + {
   1.153 +   union{ //adds padding to put masterLock on its own cache-line to elim
   1.154 +          // false sharing (masterLock is most-accessed var in VMS)
   1.155 +        volatile int32   masterLock;
   1.156 +        char             padding[CACHE_LINE_SZ];    
   1.157 +   } masterLockUnion;
   1.158 +   Sched_Assigner   slaveSchedAssigner;
   1.159 +   RequestHandler   requestHandler;
   1.160 +   
   1.161 +   SchedSlot     ***allSchedSlots;
   1.162 +   VMSQueueStruc **readyToAnimateQs;
   1.163 +   SlaveVP      **masterVPs;
   1.164 +
   1.165 +   void            *semanticEnv;
   1.166 +   void            *OSEventStruc;   //for future, when add I/O to BLIS
   1.167 +   MallocArrays    *freeLists;
   1.168 +   int32            amtOfOutstandingMem; //total currently allocated
   1.169 +
   1.170 +   void            *coreLoopReturnPt;//addr to jump to to re-enter coreLoop
   1.171 +
   1.172 +   int32            setupComplete;
   1.173 +   //int32            numMasterInARow[NUM_CORES];//detect back-to-back masterVP
   1.174 +   GateStruc       *workStealingGates[ NUM_CORES ]; //concurrent work-steal
   1.175 +   int32            workStealingLock;
   1.176 +   
   1.177 +   int32            numVPsCreated; //gives ordering to processor creation
   1.178 +
   1.179 +      //=========== MEASUREMENT STUFF =============
   1.180 +       IntervalProbe   **intervalProbes;
   1.181 +       PrivDynArrayInfo *dynIntervalProbesInfo;
   1.182 +       HashTable        *probeNameHashTbl;
   1.183 +       int32             masterCreateProbeID;
   1.184 +       float64           createPtInSecs;
   1.185 +       Histogram       **measHists;
   1.186 +       PrivDynArrayInfo *measHistsInfo;
   1.187 +       #ifdef MEAS__TIME_PLUGIN
   1.188 +       Histogram       *reqHdlrLowTimeHist;
   1.189 +       Histogram       *reqHdlrHighTimeHist;
   1.190 +       #endif
   1.191 +       #ifdef MEAS__TIME_MALLOC
   1.192 +       Histogram       *mallocTimeHist;
   1.193 +       Histogram       *freeTimeHist;
   1.194 +       #endif
   1.195 +       #ifdef MEAS__TIME_MASTER_LOCK
   1.196 +       Histogram       *masterLockLowTimeHist;
   1.197 +       Histogram       *masterLockHighTimeHist;
   1.198 +       #endif
   1.199 +       #ifdef MEAS__TIME_2011_SYS
   1.200 +       TSCountLowHigh   startMaster;
   1.201 +       uint64           totalMasterCycles;
   1.202 +       uint32           numMasterAnimations;
   1.203 +       TSCountLowHigh   startReqHdlr;
   1.204 +       uint64           totalPluginCycles;
   1.205 +       uint32           numPluginAnimations;
   1.206 +       uint64           cyclesTillStartMasterLoop;
   1.207 +       TSCountLowHigh   endMasterLoop;
   1.208 +       #endif
   1.209 +      //==========================================
   1.210 + }
   1.211 +MasterEnv;
   1.212 +
   1.213 +//=========================  Extra Stuff Data Strucs  =======================
   1.214 +typedef struct
   1.215 + {
   1.216 +
   1.217 + }
   1.218 +VMSExcp;
   1.219 +
   1.220 +struct _GateStruc
   1.221 + {
   1.222 +   int32 gateClosed;
   1.223 +   int32 preGateProgress;
   1.224 +   int32 waitProgress;
   1.225 +   int32 exitProgress;
   1.226 + };
   1.227 +//GateStruc
   1.228 +
   1.229 +//=======================  OS Thread related  ===============================
   1.230 +
   1.231 +void * coreLoop( void *paramsIn );  //standard PThreads fn prototype
   1.232 +void * coreLoop_Seq( void *paramsIn );  //standard PThreads fn prototype
   1.233 +void masterLoop( void *initData, SlaveVP *masterVP );
   1.234 +
   1.235 +
   1.236 +typedef struct
   1.237 + {
   1.238 +   void           *endThdPt;
   1.239 +   unsigned int    coreNum;
   1.240 + }
   1.241 +ThdParams;
   1.242 +
   1.243 +pthread_t       coreLoopThdHandles[ NUM_CORES ];  //pthread's virt-procr state
   1.244 +ThdParams      *coreLoopThdParams [ NUM_CORES ];
   1.245 +pthread_mutex_t suspendLock;
   1.246 +pthread_cond_t  suspend_cond;
   1.247 +
   1.248 +
   1.249 +
   1.250 +//=============================  Global Vars ================================
   1.251 +
   1.252 +volatile MasterEnv      *_VMSMasterEnv __align_to_cacheline__;
   1.253 +
   1.254 +
   1.255 +
   1.256 +
   1.257 +//=========================  Function Prototypes  ===========================
   1.258 +
   1.259 +
   1.260 +//========== Setup and shutdown ==========
   1.261 +void
   1.262 +VMS_int__init();
   1.263 +
   1.264 +Fix seed-procr creation -- put box around language, have lang register stuff
   1.265 +        with VMS.
   1.266 +        have main program explicitly INIT Lang! -- makes more sense to
   1.267 +        C programmers -- makes it clear that there's a transition.
   1.268 +(might need to have the pthreads remain waiting for
   1.269 +        cond until work is scheduled)
   1.270 +Have main do call to tell language to perform work -- like did with DKU
   1.271 +
   1.272 +Ex: "HWSim__run_a_simulation(netlist, paramBag);"
   1.273 +        "processID = SSR__run_program(seed_fn, seedData); "
   1.274 +        "SSR__Wait_for_program_to_end(processID);"
   1.275 +        "SSR__run_program_and_wait_till_it_ends(seed_fn, seedData);"
   1.276 +        
   1.277 +        allows multiple languages to be started, and programs run in several,
   1.278 +        overlapped, or one program to be run that uses multiple langs..?
   1.279 +        So, each program is in separate directory:
   1.280 +            "HWSim_ArchDef__PingPong"  "SSR_Program__Blocked_Matrix_Mult"
   1.281 +        
   1.282 +        Those programs can talk to each other, via VMS, by handles they each
   1.283 +        return
   1.284 +        "processIDs[0] = SSR__run_program(seed_fn1, seedData1);"
   1.285 +        "processIDs[1] = SSR__run_program(seed_fn2, seedData2);"
   1.286 +        "SSR__link_programs(processIDs, 2);"
   1.287 +or even
   1.288 +        "processIDs[0] = Vthread__run_program(seed_fn1, seedData1);"
   1.289 +        "processIDs[1] = SSR__run_program(seed_fn2, seedData2);"
   1.290 +        "VMS__link_programs(processIDs, 2);"
   1.291 +        Then, the programs just know they sync with other prog, but use own
   1.292 +        lang's sync constructs -- VMS uses message system to establish tie-pt,
   1.293 +        each lang defines what a tie-point means to it..  (work with the
   1.294 +        diff semantics?)
   1.295 +void
   1.296 +VMS_WL__start_the_work_then_wait_until_done();
   1.297 +
   1.298 +void
   1.299 +VMS_int__shutdown();
   1.300 +
   1.301 +void
   1.302 +VMS_int__cleanup_at_end_of_shutdown();
   1.303 +
   1.304 +
   1.305 +//==============    ===============
   1.306 +
   1.307 +inline SlaveVP *
   1.308 +VMS_int__create_procr( TopLevelFnPtr fnPtr, void *dataParam );
   1.309 +
   1.310 +inline void
   1.311 +VMS_int__point_slave_to_Fn( SlaveVP *slaveVP, TopLevelFnPtr fnPtr,
   1.312 +                            void    *dataParam);
   1.313 +
   1.314 +void
   1.315 +VMS_int__save_return_addr_into_ptd_to_loc(void *ptrToReturnAddrHoldingLoc);
   1.316 +
   1.317 +void
   1.318 +VMS_int__write_return_addr_from_ptd_to_loc(void *ptrToReturnAddrHoldingLoc);
   1.319 +
   1.320 +void
   1.321 +VMS_int__dissipate_procr( SlaveVP *procrToDissipate );
   1.322 +
   1.323 +   //Use this to create processor inside entry point & other places outside
   1.324 +   // the VMS system boundary (IE, not run in slave nor Master)
   1.325 +SlaveVP *
   1.326 +VMS_ext__create_procr( TopLevelFnPtr fnPtr, void *dataParam );
   1.327 +
   1.328 +void
   1.329 +VMS_ext__dissipate_procr( SlaveVP *procrToDissipate );
   1.330 +
   1.331 +void
   1.332 +VMS_PI__throw_exception( char *msgStr, SlaveVP *reqstPr, VMSExcp *excpData );
   1.333 +
   1.334 +void *
   1.335 +VMS_WL__give_sem_env_for( SlaveVP *animPr );
   1.336 +
   1.337 +//==============  Request Related  ===============
   1.338 +
   1.339 +void
   1.340 +VMS_int__suspend_procr( SlaveVP *callingPr );
   1.341 +
   1.342 +inline void
   1.343 +VMS_WL__add_sem_request_in_mallocd_VMSReqst( void *semReqData, SlaveVP *callingPr );
   1.344 +
   1.345 +inline void
   1.346 +VMS_WL__send_sem_request( void *semReqData, SlaveVP *callingPr );
   1.347 +
   1.348 +void
   1.349 +VMS_WL__send_create_procr_req( void *semReqData, SlaveVP *reqstingPr );
   1.350 +
   1.351 +void inline
   1.352 +VMS_WL__send_dissipate_req( SlaveVP *prToDissipate );
   1.353 +
   1.354 +inline void
   1.355 +VMS_WL__send_VMSSem_request( void *semReqData, SlaveVP *callingPr );
   1.356 +
   1.357 +VMSReqst *
   1.358 +VMS_PI__take_next_request_out_of( SlaveVP *procrWithReq );
   1.359 +
   1.360 +inline void *
   1.361 +VMS_PI__take_sem_reqst_from( VMSReqst *req );
   1.362 +
   1.363 +void inline
   1.364 +VMS_PI__handle_VMSSemReq( VMSReqst *req, SlaveVP *requestingPr, void *semEnv,
   1.365 +                       ResumeVPFnPtr resumePrFnPtr );
   1.366 +
   1.367 +//======================== MEASUREMENT ======================
   1.368 +uint64
   1.369 +VMS_WL__give_num_plugin_cycles();
   1.370 +uint32
   1.371 +VMS_WL__give_num_plugin_animations();
   1.372 +
   1.373 +
   1.374 +
   1.375 +#include "VMS__HW_dependent.h"
   1.376 +#include "probes.h"
   1.377 +#include "vutilities.h"
   1.378 +
   1.379 +#endif	/* _VMS_H */
   1.380 +