diff AnimationMaster.c @ 260:999f2966a3e5

new branch -- Dev_ML -- for making VMS take langlets whose constructs can be mixed
author Sean Halle <seanhalle@yahoo.com>
date Wed, 19 Sep 2012 23:12:44 -0700
parents 7ed97c961901
children dafae55597ce
line diff
     1.1 --- a/AnimationMaster.c	Mon Sep 03 03:34:54 2012 -0700
     1.2 +++ b/AnimationMaster.c	Wed Sep 19 23:12:44 2012 -0700
     1.3 @@ -9,7 +9,7 @@
     1.4  #include <stdio.h>
     1.5  #include <stddef.h>
     1.6  
     1.7 -#include "VMS.h"
     1.8 +#include "PR.h"
     1.9  
    1.10  
    1.11  
    1.12 @@ -20,11 +20,39 @@
    1.13   * 
    1.14   *Within the code, this is the top-level-function of the masterVPs, and
    1.15   * runs when the coreController has no more slave VPs.  It's job is to
    1.16 - * refill the animation slots with slaves.
    1.17 + * refill the animation slots with slaves that have work.
    1.18   *
    1.19 - *To do this, it scans the animation slots for just-completed slaves.
    1.20 - * Each of these has a request in it.  So, the master hands each to the
    1.21 - * plugin's request handler.
    1.22 + *There are multiple versions of the master, each tuned to a specific 
    1.23 + * combination of modes.  This keeps the master simple, with reduced overhead,
    1.24 + * when the application is not using the extra complexity.
    1.25 + * 
    1.26 + *As of Sept 2012, the versions available will be:
    1.27 + * 1) Single langauge, which only exposes slaves (such as SSR or Vthread)
    1.28 + * 2) Single language, which only exposes tasks  (such as pure dataflow)
    1.29 + * 3) Single language, which exposes both (like Cilk, StarSs, and OpenMP)
    1.30 + * 4) Multi-language, which always assumes both tasks and slaves
    1.31 + * 5) Multi-language and multi-process, which also assumes both tasks and slaves
    1.32 + *
    1.33 + * 
    1.34 + *
    1.35 + */
    1.36 +
    1.37 +
    1.38 +//=====================  The versions of the Animation Master  =================
    1.39 +//
    1.40 +//==============================================================================
    1.41 +
    1.42 +/* 1) This version is for a single language, that has only slaves, no tasks,
    1.43 + *    such as Vthread or SSR.
    1.44 + *This version is for when an application has only a single language, and
    1.45 + * that language exposes slaves explicitly (as opposed to a task based 
    1.46 + * language like pure dataflow).
    1.47 + * 
    1.48 + *
    1.49 + *It scans the animation slots for just-completed slaves.
    1.50 + * Each completed slave has a request in it.  So, the master hands each to
    1.51 + * the plugin's request handler (there is only one plugin, because only one
    1.52 + * lang).
    1.53   *Each request represents a language construct that has been encountered
    1.54   * by the application code in the slave. Passing the request to the
    1.55   * request handler is how that language construct's behavior gets invoked.
    1.56 @@ -77,24 +105,24 @@
    1.57   *There is a separate masterVP for each core, but a single semantic
    1.58   * environment shared by all cores.  Each core also has its own scheduling
    1.59   * slots, which are used to communicate slaves between animationMaster and
    1.60 - * coreController.  There is only one global variable, _VMSMasterEnv, which
    1.61 + * coreController.  There is only one global variable, _PRMasterEnv, which
    1.62   * holds the semantic env and other things shared by the different
    1.63   * masterVPs.  The request handler and Assigner are registered with
    1.64   * the animationMaster by the language's init function, and a pointer to
    1.65 - * each is in the _VMSMasterEnv. (There are also some pthread related global
    1.66 - * vars, but they're only used during init of VMS).
    1.67 - *VMS gains control over the cores by essentially "turning off" the OS's
    1.68 + * each is in the _PRMasterEnv. (There are also some pthread related global
    1.69 + * vars, but they're only used during init of PR).
    1.70 + *PR gains control over the cores by essentially "turning off" the OS's
    1.71   * scheduler, using pthread pin-to-core commands.
    1.72   *
    1.73   *The masterVPs are created during init, with this animationMaster as their
    1.74   * top level function.  The masterVPs use the same SlaveVP data structure,
    1.75   * even though they're not slave VPs.
    1.76   *A "seed slave" is also created during init -- this is equivalent to the
    1.77 - * "main" function in C, and acts as the entry-point to the VMS-language-
    1.78 + * "main" function in C, and acts as the entry-point to the PR-language-
    1.79   * based application.
    1.80 - *The masterVPs shared a single system-wide master-lock, so only one
    1.81 + *The masterVPs share a single system-wide master-lock, so only one
    1.82   * masterVP may be animated at a time.
    1.83 - *The core controllers access _VMSMasterEnv to get the masterVP, and when
    1.84 + *The core controllers access _PRMasterEnv to get the masterVP, and when
    1.85   * they start, the slots are all empty, so they run their associated core's
    1.86   * masterVP.  The first of those to get the master lock sees the seed slave
    1.87   * in the shared semantic environment, so when it runs the Assigner, that
    1.88 @@ -104,14 +132,14 @@
    1.89   * constructs to create more slaves, and so on.  Each of those constructs
    1.90   * causes the seed slave to suspend, switching over to the core controller,
    1.91   * which eventually switches to the masterVP, which executes the 
    1.92 - * request handler, which uses VMS primitives to carry out the creation of
    1.93 + * request handler, which uses PR primitives to carry out the creation of
    1.94   * new slave VPs, which are marked as ready for the Assigner, and so on..
    1.95   * 
    1.96   *On animation slots, and system behavior:
    1.97 - * A request may linger in a animation slot for a long time while
    1.98 + * A request may linger in an animation slot for a long time while
    1.99   * the slaves in the other slots are animated.  This only becomes a problem
   1.100   * when such a request is a choke-point in the constraints, and is needed
   1.101 - * to free work for *other* cores.  To reduce this occurance, the number
   1.102 + * to free work for *other* cores.  To reduce this occurrence, the number
   1.103   * of animation slots should be kept low.  In balance, having multiple
   1.104   * animation slots amortizes the overhead of switching to the masterVP and
   1.105   * executing the animationMaster code, which drives for more than one. In
   1.106 @@ -163,7 +191,29 @@
   1.107         HOLISTIC__Record_AppResponder_start;
   1.108                 MEAS__startReqHdlr;
   1.109                 
   1.110 -            //process the requests made by the slave (held inside slave struc)
   1.111 +           currSlot->workIsDone         = FALSE;
   1.112 +            currSlot->needsSlaveAssigned = TRUE;
   1.113 +            SlaveVP *currSlave = currSlot->slaveAssignedToSlot;
   1.114 +            
   1.115 +	justAddedReqHdlrChg();
   1.116 +			//handle the request, either by VMS or by the language
   1.117 +            if( currSlave->requests->reqType != LangReq )
   1.118 +             {    //The request is a standard VMS one, not one defined by the
   1.119 +                  // language, so VMS handles it, then queues slave to be assigned
   1.120 +               handleReqInVMS( currSlave );
   1.121 +               writePrivQ( currSlave, VMSReadyQ ); //Q slave to be assigned below
   1.122 +             }
   1.123 +            else
   1.124 +             {       MEAS__startReqHdlr;
   1.125 +
   1.126 +                  //Language handles request, which is held inside slave struc
   1.127 +               (*requestHandler)( currSlave, semanticEnv );
   1.128 +
   1.129 +                     MEAS__endReqHdlr;
   1.130 +             }
   1.131 +          }
   1.132 +
   1.133 +		  //process the requests made by the slave (held inside slave struc)
   1.134           (*requestHandler)( currSlot->slaveAssignedToSlot, semanticEnv );
   1.135           
   1.136           HOLISTIC__Record_AppResponder_end;
   1.137 @@ -196,3 +246,756 @@
   1.138     }//while(1) 
   1.139   }
   1.140  
   1.141 +
   1.142 +/* 2)  This version is for a single language that has only tasks, which 
   1.143 + *     cannot be suspended.
   1.144 + */
   1.145 +void animationMaster( void *initData, SlaveVP *masterVP )
   1.146 + { 
   1.147 +      //Used while scanning and filling animation slots
   1.148 +   int32           slotIdx, numSlotsFilled;
   1.149 +   AnimSlot       *currSlot, **animSlots;
   1.150 +   SlaveVP        *assignedSlaveVP;  //the slave chosen by the assigner
   1.151 +   
   1.152 +      //Local copies, for performance
   1.153 +   MasterEnv      *masterEnv;
   1.154 +   SlaveAssigner   slaveAssigner;
   1.155 +   RequestHandler  requestHandler;
   1.156 +   PRSemEnv       *semanticEnv;
   1.157 +   int32           thisCoresIdx;
   1.158 +
   1.159 +   //#ifdef  MODE__MULTI_LANG
   1.160 +   SlaveVP        *slave;
   1.161 +   PRProcess      *process;
   1.162 +   PRConstrEnvHolder *constrEnvHolder;
   1.163 +   int32           langMagicNumber;
   1.164 +   //#endif
   1.165 +   
   1.166 +   //======================== Initializations ========================
   1.167 +   masterEnv        = (MasterEnv*)_PRMasterEnv;
   1.168 +   
   1.169 +   thisCoresIdx     = masterVP->coreAnimatedBy;
   1.170 +   animSlots        = masterEnv->allAnimSlots[thisCoresIdx];
   1.171 +
   1.172 +   requestHandler   = masterEnv->requestHandler;
   1.173 +   slaveAssigner    = masterEnv->slaveAssigner;
   1.174 +   semanticEnv      = masterEnv->semanticEnv;
   1.175 +   
   1.176 +      //initialize, for non-multi-lang, non multi-proc case
   1.177 +      // default handler gets put into master env by a registration call by lang
   1.178 +   endTaskHandler   = masterEnv->defaultTaskHandler;
   1.179 +   
   1.180 +      HOLISTIC__Insert_Master_Global_Vars;
   1.181 +   
   1.182 +   //======================== animationMaster ========================
   1.183 +   //Do loop gets requests handled and work assigned to slots..
   1.184 +   // work can either be a task or a resumed slave
   1.185 +   //Having two cases makes this logic complex.. can be finishing either, and 
   1.186 +   // then the next available work may be either.. so really have two distinct
   1.187 +   // loops that are inter-twined.. 
   1.188 +   while(1){
   1.189 +       
   1.190 +      MEAS__Capture_Pre_Master_Point
   1.191 +
   1.192 +      //Scan the animation slots
   1.193 +   numSlotsFilled = 0;
   1.194 +   for( slotIdx = 0; slotIdx < NUM_ANIM_SLOTS; slotIdx++)
   1.195 +    {
   1.196 +      currSlot = animSlots[ slotIdx ];
   1.197 +
   1.198 +         //Check if newly-done slave in slot, which will need request handled
   1.199 +      if( currSlot->workIsDone )
   1.200 +       { currSlot->workIsDone = FALSE;
   1.201 +       
   1.202 +               HOLISTIC__Record_AppResponder_start; //TODO: update to check which process for each slot
   1.203 +               MEAS__startReqHdlr;
   1.204 +               
   1.205 +         
   1.206 +            //process the request made by the slave (held inside slave struc)
   1.207 +         slave = currSlot->slaveAssignedToSlot;
   1.208 +         
   1.209 +            //check if the completed work was a task..
   1.210 +         if( slave->taskMetaInfo->isATask )
   1.211 +          {
   1.212 +             if( slave->reqst->type == TaskEnd ) 
   1.213 +              {    //do task end handler, which is registered separately
   1.214 +                   //note, end hdlr may use semantic data from reqst..
   1.215 +                //#ifdef  MODE__MULTI_LANG
   1.216 +                   //get end-task handler
   1.217 +                //taskEndHandler = lookup( slave->reqst->langMagicNumber, processEnv );
   1.218 +                taskEndHandler = slave->taskMetaInfo->endTaskHandler;
   1.219 +                //#endif
   1.220 +                (*taskEndHandler)( slave, semanticEnv );
   1.221 +                
   1.222 +                goto AssignWork;
   1.223 +              }
   1.224 +             else  //is a task, and just suspended
   1.225 +              {    //turn slot slave into free task slave & make replacement
   1.226 +                if( slave->typeOfVP == TaskSlotSlv ) changeSlvType();
   1.227 +                
   1.228 +                //goto normal slave request handling
   1.229 +                goto SlaveReqHandling; 
   1.230 +              }
   1.231 +          }
   1.232 +         else //is a slave that suspended
   1.233 +          {
   1.234 +          SlaveReqHandling:
   1.235 +            (*requestHandler)( slave, semanticEnv ); //(note: indirect Fn call more efficient when use fewer params, instead re-fetch from slave)
   1.236 +         
   1.237 +               HOLISTIC__Record_AppResponder_end;
   1.238 +               MEAS__endReqHdlr;
   1.239 +               
   1.240 +            goto AssignWork;
   1.241 +          }
   1.242 +       } //if has suspended slave that needs handling
   1.243 +      
   1.244 +         //if slot empty, hand to Assigner to fill with a slave
   1.245 +      if( currSlot->needsSlaveAssigned )
   1.246 +       {    //Call plugin's Assigner to give slot a new slave
   1.247 +               HOLISTIC__Record_Assigner_start;
   1.248 +               
   1.249 +       AssignWork:
   1.250 +     
   1.251 +         assignedSlaveVP = assignWork( semanticEnv, currSlot );
   1.252 +       
   1.253 +            //put the chosen slave into slot, and adjust flags and state
   1.254 +         if( assignedSlaveVP != NULL )
   1.255 +          { currSlot->slaveAssignedToSlot = assignedSlaveVP;
   1.256 +            assignedSlaveVP->animSlotAssignedTo = currSlot;
   1.257 +            currSlot->needsSlaveAssigned  = FALSE;
   1.258 +            numSlotsFilled               += 1;
   1.259 +          }
   1.260 +         else
   1.261 +          {
   1.262 +            currSlot->needsSlaveAssigned  = TRUE; //local write
   1.263 +          }
   1.264 +               HOLISTIC__Record_Assigner_end;
   1.265 +       }//if slot needs slave assigned
   1.266 +    }//for( slotIdx..
   1.267 +
   1.268 +         MEAS__Capture_Post_Master_Point;
   1.269 +   
   1.270 +   masterSwitchToCoreCtlr( masterVP ); //returns when ctlr switches back to master
   1.271 +   flushRegisters();
   1.272 +   }//while(1) 
   1.273 + }
   1.274 +
   1.275 +
   1.276 +/*This is the master when just multi-lang, but not multi-process mode is on.
   1.277 + * This version has to handle both tasks and slaves, and do extra work of 
   1.278 + * looking up the semantic env and handlers to use, for each completed bit of 
   1.279 + * work.
   1.280 + *It also has to search through the semantic envs to find one with work,
   1.281 + * then ask that env's assigner to return a unit of that work.
   1.282 + * 
   1.283 + *The language is written to startup in the same way as if it were the only
   1.284 + * language in the app, and it operates in the same way,
   1.285 + * the only difference between single language and multi-lang is here, in the
   1.286 + * master.
   1.287 + *This invisibility to mode is why the language has to use registration calls
   1.288 + * for everything during startup -- those calls do different things depending
   1.289 + * on whether it's single-language or multi-language mode.
   1.290 + * 
   1.291 + *In this version of the master, work can either be a task or a resumed slave
   1.292 + *Having two cases makes this logic complex.. can be finishing either, and
   1.293 + * then the next available work may be either.. so really have two distinct 
   1.294 + * loops that are inter-twined.. 
   1.295 + * 
   1.296 + *Some special cases:
   1.297 + * A task-end is a special case for a few reasons (below).
   1.298 + * A task-end can't block a slave (can't cause it to "logically suspend")
   1.299 + * A task available for work can only be assigned to a special slave, which 
   1.300 + *   has been set aside for doing tasks, one such task-slave is always 
   1.301 + *   assigned to each slot. So, when a task ends, a new task is assigned to
   1.302 + *   that slot's task-slave right away.  
   1.303 + * But if no tasks are available, then have to switch over to looking at
   1.304 + *   slaves to find one ready to resume, to find work for the slot.
   1.305 + * If a task just suspends, not ends, then its task-slave is no longer 
   1.306 + *   available to take new tasks, so a new task-slave has to be assigned to
   1.307 + *   that slot.  Then the slave of the suspended task is turned into a free
   1.308 + *   task-slave and request handling is done on it as if it were a slave 
   1.309 + *   that suspended.
   1.310 + * After request handling, do the same sequence of looking for a task to be
   1.311 + *   work, and if none, look for a slave ready to resume, as work for the slot.
   1.312 + * If a slave suspends, handle its request, then look for work.. first for a
   1.313 + *   task to assign, and if none, slaves ready to resume.
   1.314 + * Another special case is when task-end is done on a free task-slave.. in
   1.315 + *   that case, the slave has no more work and no way to get more.. so place
   1.316 + *   it into a recycle queue.
   1.317 + * If no work is found of either type, then do a special thing to prune down
   1.318 + *   the extra slaves in the recycle queue, just so don't get too many..
   1.319 + * 
   1.320 + *The multi-lang thing complicates matters..  
   1.321 + *
   1.322 + *For request handling, it means have to first fetch the semantic environment
   1.323 + * of the language, and then do the request handler pointed to by that
   1.324 + * semantic env.
   1.325 + *For assigning, things get more complex because of competing goals..  One
   1.326 + * goal is for language specific stuff to be used during assignment, so
   1.327 + * assigner can make higher quality decisions..  but with multiple languages,
   1.328 + * which only get mixed in the application, the assigners can't be written
   1.329 + * with knowledge of each other.  So, they can only make localized decisions,
   1.330 + * and so different language's assigners may interfere with each other..
   1.331 + * 
   1.332 + *So, have some possibilities available:
   1.333 + *1) can have a fixed scheduler in the proto-runtime, that all the
   1.334 + * languages give their work to..  (but then lose language-specific info, 
   1.335 + * there is a standard PR format for assignment info, and the langauge 
   1.336 + * attaches this to the work-unit when it gives it to PR.. also have issue
   1.337 + * with HWSim, which uses a priority Q instead of FIFO, and requests can 
   1.338 + * "undo" previous work put in, so request handlers need way to manipulate
   1.339 + * the work-holding Q..) (this might be fudgeable with
   1.340 + * HWSim, if the master did a lang-supplied callback each time it assigns a
   1.341 + * unit to a slot..  then HWSim can keep exactly one unit of work in PR's
   1.342 + * queue at a time..  but this is quite hack-like.. or perhaps HWSim supplies
   1.343 + * a task-end handler that kicks the next unit of work from HWSim internal
   1.344 + * priority queue, over to PR readyQ)
   1.345 + *2) can have each language have its own semantic env, that holds its own
   1.346 + * work, which is assigned by its own assigner.. then the master searches
   1.347 + * through all the semantic envs to find one with work and asks it give work..
   1.348 + * (this has downside of blinding assigners to each other.. but does work
   1.349 + * for HWSim case)
   1.350 + *3) could make PR have a different readyQ for each core, and ask the lang
   1.351 + * to put work to the core it prefers.. but the work may be moved by PR if
   1.352 + * needed, say if one core idles for too long. This is a hybrid approach, 
   1.353 + * letting the language decide which core, but PR keeps the work and does it
   1.354 + * FIFO style.. (this might als be fudgeable with HWSim, in similar fashion, 
   1.355 + * but it would be complicated by having to track cores separately) 
   1.356 + *
   1.357 + *Choosing 2, to keep compatibility with single-lang mode..  it allows the same
   1.358 + * assigner to be used for single-lang as for multi-lang..  the overhead of
   1.359 + * the extra master search for work is part of the price of the flexibility,
   1.360 + * but should be fairly small.. takes the first env that has work available, 
   1.361 + * and whatever it returns is assigned to the slot..
   1.362 + * 
   1.363 + *As a hybrid, giving an option for a unified override assigner to be registered
   1.364 + * and used..  This allows something like a static analysis to detect
   1.365 + * which languages are grouped together, and then analyze the pattern of 
   1.366 + * construct calls, and generate a custom assigner that uses info from all
   1.367 + * the languages in a unified way..  Don't really expect this to happen, 
   1.368 + * but making it possible.
   1.369 + */
   1.370 +#ifdef  MODE__MULTI_LANG
   1.371 +void animationMaster( void *initData, SlaveVP *masterVP )
   1.372 + { 
   1.373 +      //Used while scanning and filling animation slots
   1.374 +   int32           slotIdx, numSlotsFilled;
   1.375 +   AnimSlot       *currSlot, **animSlots;
   1.376 +   SlaveVP        *assignedSlaveVP;  //the slave chosen by the assigner
   1.377 +   
   1.378 +      //Local copies, for performance
   1.379 +   MasterEnv      *masterEnv;
   1.380 +   SlaveAssigner   slaveAssigner;
   1.381 +   RequestHandler  requestHandler;
   1.382 +   PRSemEnv       *semanticEnv;
   1.383 +   int32           thisCoresIdx;
   1.384 +
   1.385 +   //#ifdef  MODE__MULTI_LANG
   1.386 +   SlaveVP        *slave;
   1.387 +   PRProcess      *process;
   1.388 +   PRConstrEnvHolder *constrEnvHolder;
   1.389 +   int32           langMagicNumber;
   1.390 +   //#endif
   1.391 +   
   1.392 +   //======================== Initializations ========================
   1.393 +   masterEnv        = (MasterEnv*)_PRMasterEnv;
   1.394 +   
   1.395 +   thisCoresIdx     = masterVP->coreAnimatedBy;
   1.396 +   animSlots        = masterEnv->allAnimSlots[thisCoresIdx];
   1.397 +
   1.398 +   requestHandler   = masterEnv->requestHandler;
   1.399 +   slaveAssigner    = masterEnv->slaveAssigner;
   1.400 +   semanticEnv      = masterEnv->semanticEnv;
   1.401 +   
   1.402 +      //initialize, for non-multi-lang, non multi-proc case
   1.403 +      // default handler gets put into master env by a registration call by lang
   1.404 +   endTaskHandler   = masterEnv->defaultTaskHandler;
   1.405 +   
   1.406 +      HOLISTIC__Insert_Master_Global_Vars;
   1.407 +   
   1.408 +   //======================== animationMaster ========================
   1.409 +   //Do loop gets requests handled and work assigned to slots..
   1.410 +   // work can either be a task or a resumed slave
   1.411 +   //Having two cases makes this logic complex.. can be finishing either, and 
   1.412 +   // then the next available work may be either.. so really have two distinct
   1.413 +   // loops that are inter-twined.. 
   1.414 +   while(1){
   1.415 +       
   1.416 +      MEAS__Capture_Pre_Master_Point
   1.417 +
   1.418 +      //Scan the animation slots
   1.419 +   numSlotsFilled = 0;
   1.420 +   for( slotIdx = 0; slotIdx < NUM_ANIM_SLOTS; slotIdx++)
   1.421 +    {
   1.422 +      currSlot = animSlots[ slotIdx ];
   1.423 +
   1.424 +         //Check if newly-done slave in slot, which will need request handled
   1.425 +      if( currSlot->workIsDone )
   1.426 +       { currSlot->workIsDone = FALSE;
   1.427 +       
   1.428 +               HOLISTIC__Record_AppResponder_start; //TODO: update to check which process for each slot
   1.429 +               MEAS__startReqHdlr;
   1.430 +               
   1.431 +         
   1.432 +            //process the request made by the slave (held inside slave struc)
   1.433 +         slave = currSlot->slaveAssignedToSlot;
   1.434 +         
   1.435 +            //check if the completed work was a task..
   1.436 +         if( slave->taskMetaInfo->isATask )
   1.437 +          {
   1.438 +             if( slave->reqst->type == TaskEnd ) 
   1.439 +              {    //do task end handler, which is registered separately
   1.440 +                   //note, end hdlr may use semantic data from reqst..
   1.441 +                //#ifdef  MODE__MULTI_LANG
   1.442 +                   //get end-task handler
   1.443 +                //taskEndHandler = lookup( slave->reqst->langMagicNumber, processEnv );
   1.444 +                taskEndHandler = slave->taskMetaInfo->endTaskHandler;
   1.445 +                //#endif
   1.446 +                (*taskEndHandler)( slave, semanticEnv );
   1.447 +                
   1.448 +                goto AssignWork;
   1.449 +              }
   1.450 +             else  //is a task, and just suspended
   1.451 +              {    //turn slot slave into free task slave & make replacement
   1.452 +                if( slave->typeOfVP == TaskSlotSlv ) changeSlvType();
   1.453 +                
   1.454 +                //goto normal slave request handling
   1.455 +                goto SlaveReqHandling; 
   1.456 +              }
   1.457 +          }
   1.458 +         else //is a slave that suspended
   1.459 +          {
   1.460 +          SlaveReqHandling:
   1.461 +            (*requestHandler)( slave, semanticEnv ); //(note: indirect Fn call more efficient when use fewer params, instead re-fetch from slave)
   1.462 +         
   1.463 +               HOLISTIC__Record_AppResponder_end;
   1.464 +               MEAS__endReqHdlr;
   1.465 +               
   1.466 +            goto AssignWork;
   1.467 +          }
   1.468 +       } //if has suspended slave that needs handling
   1.469 +      
   1.470 +         //if slot empty, hand to Assigner to fill with a slave
   1.471 +      if( currSlot->needsSlaveAssigned )
   1.472 +       {    //Call plugin's Assigner to give slot a new slave
   1.473 +               HOLISTIC__Record_Assigner_start;
   1.474 +               
   1.475 +       AssignWork:
   1.476 +     
   1.477 +         assignedSlaveVP = assignWork( semanticEnv, currSlot );
   1.478 +       
   1.479 +            //put the chosen slave into slot, and adjust flags and state
   1.480 +         if( assignedSlaveVP != NULL )
   1.481 +          { currSlot->slaveAssignedToSlot = assignedSlaveVP;
   1.482 +            assignedSlaveVP->animSlotAssignedTo = currSlot;
   1.483 +            currSlot->needsSlaveAssigned  = FALSE;
   1.484 +            numSlotsFilled               += 1;
   1.485 +          }
   1.486 +         else
   1.487 +          {
   1.488 +            currSlot->needsSlaveAssigned  = TRUE; //local write
   1.489 +          }
   1.490 +               HOLISTIC__Record_Assigner_end;
   1.491 +       }//if slot needs slave assigned
   1.492 +    }//for( slotIdx..
   1.493 +
   1.494 +         MEAS__Capture_Post_Master_Point;
   1.495 +   
   1.496 +   masterSwitchToCoreCtlr( masterVP ); //returns when ctlr switches back to master
   1.497 +   flushRegisters();
   1.498 +   }//while(1) 
   1.499 + }
   1.500 +#endif //MODE__MULTI_LANG
   1.501 +
   1.502 +
   1.503 +
   1.504 +//This is the master when both multi-lang and multi-process modes are turned on
   1.505 +//#ifdef MODE__MULTI_LANG
   1.506 +//#ifdef MODE__MULTI_PROCESS
   1.507 +void animationMaster( void *initData, SlaveVP *masterVP )
   1.508 + { 
   1.509 +      //Used while scanning and filling animation slots
   1.510 +   int32           slotIdx, numSlotsFilled;
   1.511 +   AnimSlot       *currSlot, **animSlots;
   1.512 +   SlaveVP        *assignedSlaveVP;  //the slave chosen by the assigner
   1.513 +   
   1.514 +      //Local copies, for performance
   1.515 +   MasterEnv      *masterEnv;
   1.516 +   SlaveAssigner   slaveAssigner;
   1.517 +   RequestHandler  requestHandler;
   1.518 +   PRSemEnv       *semanticEnv;
   1.519 +   int32           thisCoresIdx;
   1.520 +
   1.521 +   SlaveVP        *slave;
   1.522 +   PRProcess      *process;
   1.523 +   PRConstrEnvHolder *constrEnvHolder;
   1.524 +   int32           langMagicNumber;
   1.525 +   
   1.526 +   //======================== Initializations ========================
   1.527 +   masterEnv        = (MasterEnv*)_PRMasterEnv;
   1.528 +   
   1.529 +   thisCoresIdx     = masterVP->coreAnimatedBy;
   1.530 +   animSlots        = masterEnv->allAnimSlots[thisCoresIdx];
   1.531 +
   1.532 +   requestHandler   = masterEnv->requestHandler;
   1.533 +   slaveAssigner    = masterEnv->slaveAssigner;
   1.534 +   semanticEnv      = masterEnv->semanticEnv;
   1.535 +   
   1.536 +      //initialize, for non-multi-lang, non multi-proc case
   1.537 +      // default handler gets put into master env by a registration call by lang
   1.538 +   endTaskHandler   = masterEnv->defaultTaskHandler;
   1.539 +   
   1.540 +      HOLISTIC__Insert_Master_Global_Vars;
   1.541 +   
   1.542 +   //======================== animationMaster ========================
   1.543 +   //Do loop gets requests handled and work assigned to slots..
   1.544 +   // work can either be a task or a resumed slave
   1.545 +   //Having two cases makes this logic complex.. can be finishing either, and 
   1.546 +   // then the next available work may be either.. so really have two distinct
   1.547 +   // loops that are inter-twined.. 
   1.548 +   while(1){
   1.549 +       
   1.550 +      MEAS__Capture_Pre_Master_Point
   1.551 +
   1.552 +      //Scan the animation slots
   1.553 +   numSlotsFilled = 0;
   1.554 +   for( slotIdx = 0; slotIdx < NUM_ANIM_SLOTS; slotIdx++)
   1.555 +    {
   1.556 +      currSlot = animSlots[ slotIdx ];
   1.557 +
   1.558 +         //Check if newly-done slave in slot, which will need request handled
   1.559 +      if( currSlot->workIsDone )
   1.560 +       { currSlot->workIsDone = FALSE;
   1.561 +       
   1.562 +               HOLISTIC__Record_AppResponder_start; //TODO: update to check which process for each slot
   1.563 +               MEAS__startReqHdlr;
   1.564 +               
   1.565 +         
   1.566 +            //process the request made by the slave (held inside slave struc)
   1.567 +         slave = currSlot->slaveAssignedToSlot;
   1.568 +         
   1.569 +            //check if the completed work was a task..
   1.570 +         if( slave->taskMetaInfo->isATask )
   1.571 +          {
   1.572 +             if( slave->reqst->type == TaskEnd ) 
   1.573 +              {    //do task end handler, which is registered separately
   1.574 +                   //note, end hdlr may use semantic data from reqst..
   1.575 +                   //get end-task handler
   1.576 +                //taskEndHandler = lookup( slave->reqst->langMagicNumber, processEnv );
   1.577 +                taskEndHandler = slave->taskMetaInfo->endTaskHandler;
   1.578 +                
   1.579 +                (*taskEndHandler)( slave, semanticEnv );
   1.580 +                
   1.581 +                goto AssignWork;
   1.582 +              }
   1.583 +             else  //is a task, and just suspended
   1.584 +              {    //turn slot slave into free task slave & make replacement
   1.585 +                if( slave->typeOfVP == TaskSlotSlv ) changeSlvType();
   1.586 +                
   1.587 +                //goto normal slave request handling
   1.588 +                goto SlaveReqHandling; 
   1.589 +              }
   1.590 +          }
   1.591 +         else //is a slave that suspended
   1.592 +          {
   1.593 +             
   1.594 +          SlaveReqHandling:
   1.595 +            (*requestHandler)( slave, semanticEnv ); //(note: indirect Fn call more efficient when use fewer params, instead re-fetch from slave)
   1.596 +         
   1.597 +               HOLISTIC__Record_AppResponder_end;
   1.598 +               MEAS__endReqHdlr;
   1.599 +               
   1.600 +            goto AssignWork;
   1.601 +          }
   1.602 +       } //if has suspended slave that needs handling
   1.603 +      
   1.604 +         //if slot empty, hand to Assigner to fill with a slave
   1.605 +      if( currSlot->needsSlaveAssigned )
   1.606 +       {    //Scan sem environs, looking for one with ready work.
   1.607 +            // call the Assigner for that sem Env, to give slot a new slave
   1.608 +               HOLISTIC__Record_Assigner_start;
   1.609 +               
   1.610 +       AssignWork:
   1.611 +     
   1.612 +         assignedSlaveVP = assignWork( semanticEnv, currSlot );
   1.613 +       
   1.614 +            //put the chosen slave into slot, and adjust flags and state
   1.615 +         if( assignedSlaveVP != NULL )
   1.616 +          { currSlot->slaveAssignedToSlot = assignedSlaveVP;
   1.617 +            assignedSlaveVP->animSlotAssignedTo = currSlot;
   1.618 +            currSlot->needsSlaveAssigned  = FALSE;
   1.619 +            numSlotsFilled               += 1;
   1.620 +          }
   1.621 +         else
   1.622 +          {
   1.623 +            currSlot->needsSlaveAssigned  = TRUE; //local write
   1.624 +          }
   1.625 +               HOLISTIC__Record_Assigner_end;
   1.626 +       }//if slot needs slave assigned
   1.627 +    }//for( slotIdx..
   1.628 +
   1.629 +         MEAS__Capture_Post_Master_Point;
   1.630 +   
   1.631 +   masterSwitchToCoreCtlr( masterVP ); //returns when ctlr switches back to master
   1.632 +   flushRegisters();
   1.633 +   }//while(1) 
   1.634 + }
   1.635 +#endif  //MODE__MULTI_LANG
   1.636 +#endif  //MODE__MULTI_PROCESS
   1.637 +
   1.638 +
   1.639 +/*This does three things:
   1.640 + * 1) ask for a slave ready to resume
   1.641 + * 2) if none, then ask for a task, and assign to the slot slave
   1.642 + * 3) if none, then prune former task slaves waiting to be recycled.
   1.643 + *
   1.644 +   //Have two separate assigners in each semantic env,
   1.645 +   // which keeps its own work in its own structures.. the master, here, 
   1.646 +   // searches through the semantic environs, takes the first that has work
   1.647 +   // available, and whatever it returns is assigned to the slot..
   1.648 +   //However, also have an override assigner.. because static analysis tools know
   1.649 +   // which languages are grouped together.. and the override enables them to
   1.650 +   // generate a custom assigner that uses info from all the languages in a 
   1.651 +   // unified way..  Don't really expect this to happen, but making it possible.
   1.652 + */
   1.653 +inline SlaveVP *
   1.654 +assignWork( PRProcessEnv *processEnv, AnimSlot *slot )
   1.655 + { SlaveVP     *returnSlv;
   1.656 +   //VSsSemEnv   *semEnv;
   1.657 +   //VSsSemData  *semData;
   1.658 +   int32        coreNum, slotNum;
   1.659 +   PRTaskMetaInfo *newTaskStub;
   1.660 +   SlaveVP     *freeTaskSlv;
   1.661 +
   1.662 +   
   1.663 +      //master has to handle slot slaves.. so either assigner returns
   1.664 +      // taskMetaInfo or else two assigners, one for slaves, other for tasks..     
   1.665 +   semEnvs = processEnv->semEnvs;
   1.666 +   numEnvs = processEnv->numSemEnvs;
   1.667 +   for( envIdx = 0; envIdx < numEnvs; envIdx++ )
   1.668 +    { semEnv = semEnvs[envIdx];
   1.669 +      if( semEnv->hasWork )
   1.670 +       { assigner = semEnv->assigner; 
   1.671 +         retTaskMetaInfo = (*assigner)( semEnv, slot );
   1.672 +         
   1.673 +         return retTaskMetaInfo; //quit, have work
   1.674 +       }
   1.675 +    }
   1.676 +   
   1.677 +   coreNum = slot->coreSlotIsOn;
   1.678 +   slotNum = slot->slotIdx;
   1.679 + 
   1.680 +      //first try to get a ready slave
   1.681 +   returnSlv = getReadySlave();
   1.682 +
   1.683 +   if( returnSlv != NULL )
   1.684 +    { returnSlv->coreAnimatedBy   = coreNum;
   1.685 +    
   1.686 +         //have work, so reset Done flag (when work generated on other core)
   1.687 +      if( processEnv->coreIsDone[coreNum] == TRUE ) //reads are higher perf
   1.688 +         processEnv->coreIsDone[coreNum] = FALSE;   //don't just write always
   1.689 +    
   1.690 +      goto ReturnTheSlv;
   1.691 +    }
   1.692 +   
   1.693 +      //were no slaves, so try to get a ready task.. 
   1.694 +   newTaskStub = getTaskStub();
   1.695 +   
   1.696 +   if( newTaskStub != NULL )
   1.697 +    { 
   1.698 +         //get the slot slave to assign the task to..
   1.699 +      returnSlv = processEnv->slotTaskSlvs[coreNum][slotNum];
   1.700 +
   1.701 +         //point slave to task's function, and mark slave as having task
   1.702 +      PR_int__reset_slaveVP_to_TopLvlFn( returnSlv, 
   1.703 +                          newTaskStub->taskType->fn, newTaskStub->args );
   1.704 +      returnSlv->taskStub          = newTaskStub;
   1.705 +      newTaskStub->slaveAssignedTo = returnSlv;
   1.706 +      returnSlv->needsTaskAssigned = FALSE;  //slot slave is a "Task" slave type
   1.707 +      
   1.708 +         //have work, so reset Done flag, if was set
   1.709 +      if( processEnv->coreIsDone[coreNum] == TRUE ) //reads are higher perf
   1.710 +         processEnv->coreIsDone[coreNum] = FALSE;   //don't just write always
   1.711 +      
   1.712 +      goto ReturnTheSlv;
   1.713 +    }
   1.714 +   else
   1.715 +    {    //no task, so prune the recycle pool of free task slaves
   1.716 +      freeTaskSlv = readPrivQ( processEnv->freeTaskSlvRecycleQ );
   1.717 +      if( freeTaskSlv != NULL )
   1.718 +       {    //delete to bound the num extras, and deliver shutdown cond
   1.719 +         handleDissipate( freeTaskSlv, processEnv );
   1.720 +            //then return NULL
   1.721 +         returnSlv = NULL;
   1.722 +         
   1.723 +         goto ReturnTheSlv;
   1.724 +       }
   1.725 +      else
   1.726 +       { //candidate for shutdown.. if all extras dissipated, and no tasks
   1.727 +         // and no ready to resume slaves, then no way to generate
   1.728 +         // more tasks (on this core -- other core might have task still)
   1.729 +         if( processEnv->numLiveExtraTaskSlvs == 0 && 
   1.730 +             processEnv->numLiveThreadSlvs == 0 )
   1.731 +          { //This core sees no way to generate more tasks, so say it
   1.732 +            if( processEnv->coreIsDone[coreNum] == FALSE )
   1.733 +             { processEnv->numCoresDone += 1;
   1.734 +               processEnv->coreIsDone[coreNum] = TRUE;
   1.735 +               #ifdef DEBUG__TURN_ON_SEQUENTIAL_MODE
   1.736 +               processEnv->shutdownInitiated = TRUE;
   1.737 +               
   1.738 +               #else
   1.739 +               if( processEnv->numCoresDone == NUM_CORES )
   1.740 +                { //means no cores have work, and none can generate more
   1.741 +                  processEnv->shutdownInitiated = TRUE;
   1.742 +                }
   1.743 +               #endif
   1.744 +             }
   1.745 +          }
   1.746 +            //check if shutdown has been initiated by this or other core
   1.747 +         if(processEnv->shutdownInitiated) 
   1.748 +          { returnSlv = PR_SS__create_shutdown_slave();
   1.749 +          }
   1.750 +         else
   1.751 +            returnSlv = NULL;
   1.752 +
   1.753 +         goto ReturnTheSlv; //don't need, but completes pattern
   1.754 +       } //if( freeTaskSlv != NULL )
   1.755 +    } //if( newTaskStub == NULL )
   1.756 +   //outcome: 1)slave was just pointed to task, 2)no tasks, so slave NULL
   1.757 + 
   1.758 +
   1.759 + ReturnTheSlv:  //All paths goto here.. to provide single point for holistic..
   1.760 +
   1.761 +   #ifdef HOLISTIC__TURN_ON_OBSERVE_UCC
   1.762 +   if( returnSlv == NULL )
   1.763 +    { returnSlv = processEnv->idleSlv[coreNum][slotNum]; 
   1.764 +    
   1.765 +         //things that would normally happen in resume(), but idle VPs
   1.766 +         // never go there
   1.767 +      returnSlv->assignCount++; //gives each idle unit a unique ID
   1.768 +      Unit newU;
   1.769 +      newU.vp = returnSlv->slaveID;
   1.770 +      newU.task = returnSlv->assignCount;
   1.771 +      addToListOfArrays(Unit,newU,processEnv->unitList);
   1.772 +
   1.773 +      if (returnSlv->assignCount > 1) //make a dependency from prev idle unit
   1.774 +       { Dependency newD;             // to this one
   1.775 +         newD.from_vp = returnSlv->slaveID;
   1.776 +         newD.from_task = returnSlv->assignCount - 1;
   1.777 +         newD.to_vp = returnSlv->slaveID;
   1.778 +         newD.to_task = returnSlv->assignCount;
   1.779 +         addToListOfArrays(Dependency, newD ,processEnv->ctlDependenciesList);  
   1.780 +       }
   1.781 +    }
   1.782 +   else //have a slave will be assigned to the slot
   1.783 +    { //assignSlv->numTimesAssigned++;
   1.784 +         //get previous occupant of the slot
   1.785 +      Unit prev_in_slot = 
   1.786 +         processEnv->last_in_slot[coreNum * NUM_ANIM_SLOTS + slotNum];
   1.787 +      if(prev_in_slot.vp != 0) //if not first slave in slot, make dependency
   1.788 +       { Dependency newD;      // is a hardware dependency
   1.789 +         newD.from_vp = prev_in_slot.vp;
   1.790 +         newD.from_task = prev_in_slot.task;
   1.791 +         newD.to_vp = returnSlv->slaveID;
   1.792 +         newD.to_task = returnSlv->assignCount;
   1.793 +         addToListOfArrays(Dependency,newD,processEnv->hwArcs);   
   1.794 +       }
   1.795 +      prev_in_slot.vp = returnSlv->slaveID; //make new slave the new previous
   1.796 +      prev_in_slot.task = returnSlv->assignCount;
   1.797 +      processEnv->last_in_slot[coreNum * NUM_ANIM_SLOTS + slotNum] =
   1.798 +         prev_in_slot;        
   1.799 +    }
   1.800 +   #endif
   1.801 +
   1.802 +   return( returnSlv );
   1.803 + }
   1.804 +
   1.805 +      
   1.806 +//=================================================================
   1.807 +         //#else  //is MODE__MULTI_LANG
   1.808 +            //For multi-lang mode, first, get the constraint-env holder out of
   1.809 +            // the process, which is in the slave.
   1.810 +            //Second, get the magic number out of the request, use it to look up
   1.811 +            // the constraint Env within the constraint-env holder.
   1.812 +            //Then get the request handler out of the constr env
   1.813 +         constrEnvHolder = slave->process->constrEnvHolder;
   1.814 +         reqst = slave->request;
   1.815 +         langMagicNumber = reqst->langMagicNumber;
   1.816 +         semanticEnv = lookup( langMagicNumber, constrEnvHolder ); //a macro
   1.817 +         if( slave->reqst->type == taskEnd ) //end-task is special
   1.818 +          {    //need to know what lang's task ended
   1.819 +            taskEndHandler = semanticEnv->taskEndHandler;
   1.820 +            (*taskEndHandler)( slave, reqst, semanticEnv ); //can put semantic data into task end reqst, for continuation, etc
   1.821 +               //this is a slot slave, get a new task for it
   1.822 +            if( !existsOverrideAssigner )//if exists, is set above, before loop
   1.823 +             {    //search for task assigner that has work
   1.824 +               for( a = 0; a < num_assigners; a++ )
   1.825 +                { if( taskAssigners[a]->hasWork )
   1.826 +                   { newTaskAssigner = taskAssigners[a];
   1.827 +                     (*newTaskAssigner)( slave, semanticEnv );
   1.828 +                     goto GotTask;
   1.829 +                   }
   1.830 +                }
   1.831 +               goto NoTasks;
   1.832 +             }
   1.833 +            
   1.834 +           GotTask:
   1.835 +            continue; //have work, so do next iter of loop, don't call slave assigner
   1.836 +          }
   1.837 +         if( slave->typeOfVP == taskSlotSlv ) changeSlvType();//is suspended task
   1.838 +            //now do normal suspended slave request handler
   1.839 +         requestHandler = semanticEnv->requestHandler;
   1.840 +         //#endif
   1.841 +
   1.842 +         
   1.843 +       }
   1.844 +         //If make it here, then was no task for this slot
   1.845 +         //slot empty, hand to Assigner to fill with a slave
   1.846 +      if( currSlot->needsSlaveAssigned )
   1.847 +       {    //Call plugin's Assigner to give slot a new slave
   1.848 +               HOLISTIC__Record_Assigner_start;
   1.849 +               
   1.850 +         //#ifdef  MODE__MULTI_LANG
   1.851 +        NoTasks:
   1.852 +            //First, choose an Assigner..
   1.853 +            //There are several Assigners, one for each langlet.. they all
   1.854 +            // indicate whether they have work available.. just pick the first
   1.855 +            // one that has work..  Or, if there's a Unified Assigner, call
   1.856 +            // that one..  So, go down array, checking..
   1.857 +         if( !existsOverrideAssigner ) 
   1.858 +          { for( a = 0; a < num_assigners; a++ )
   1.859 +             { if( assigners[a]->hasWork )
   1.860 +                { slaveAssigner = assigners[a];
   1.861 +                  goto GotAssigner;
   1.862 +                }
   1.863 +             }
   1.864 +            //no work, so just continue to next iter of scan loop
   1.865 +            continue;
   1.866 +          }
   1.867 +         //when exists override, the assigner is set, once, above, so do nothing
   1.868 +        GotAssigner:
   1.869 +         //#endif
   1.870 +        
   1.871 +         assignedSlaveVP =
   1.872 +          (*slaveAssigner)( semanticEnv, currSlot );
   1.873 +         
   1.874 +            //put the chosen slave into slot, and adjust flags and state
   1.875 +         if( assignedSlaveVP != NULL )
   1.876 +          { currSlot->slaveAssignedToSlot = assignedSlaveVP;
   1.877 +            assignedSlaveVP->animSlotAssignedTo = currSlot;
   1.878 +            currSlot->needsSlaveAssigned  = FALSE;
   1.879 +            numSlotsFilled               += 1;
   1.880 +            
   1.881 +            HOLISTIC__Record_Assigner_end;
   1.882 +          }
   1.883 +       }//if slot needs slave assigned
   1.884 +    }//for( slotIdx..
   1.885 +
   1.886 +         MEAS__Capture_Post_Master_Point;
   1.887 +   
   1.888 +   masterSwitchToCoreCtlr( masterVP );
   1.889 +   flushRegisters();
   1.890 +         DEBUG__printf(FALSE,"came back after switch to core -- so lock released!");
   1.891 +   }//while(1) 
   1.892 + }
   1.893 +