/*
 * Copyright 2010  OpenSourceStewardshipFoundation
 * 
 * Licensed under BSD
 */


#include <stdio.h>
#include <stddef.h>

#include "PR.h"


/*The animationMaster embodies most of the animator of the language.  The
 * animator is what emodies the behavior of language constructs. 
 * As such, it is the animationMaster, in combination with the plugin
 * functions, that make the language constructs do their behavior.   
 * 
 *Within the code, this is the top-level-function of the masterVPs, and
 * runs when the coreController has no more slave VPs.  It's job is to
 * refill the animation slots with slaves that have work.
 *
 *There are multiple versions of the master, each tuned to a specific 
 * combination of modes.  This keeps the master simple, with reduced overhead,
 * when the application is not using the extra complexity.
 * 
 *As of Sept 2012, the versions available will be:
 * 1) Single langauge, which only exposes slaves (such as SSR or Vthread)
 * 2) Single language, which only exposes tasks  (such as pure dataflow)
 * 3) Single language, which exposes both (like Cilk, StarSs, and OpenMP)
 * 4) Multi-language, which always assumes both tasks and slaves
 * 5) Multi-language and multi-process, which also assumes both tasks and slaves
 *
 * 
 *
 */


//=====================  The versions of the Animation Master  =================
//
//==============================================================================

/* 1) This version is for a single language, that has only slaves, no tasks,
 *    such as Vthread or SSR.
 *This version is for when an application has only a single language, and
 * that language exposes slaves explicitly (as opposed to a task based 
 * language like pure dataflow).
 * 
 *
 *It scans the animation slots for just-completed slaves.
 * Each completed slave has a request in it.  So, the master hands each to
 * the plugin's request handler (there is only one plugin, because only one
 * lang).
 *Each request represents a language construct that has been encountered
 * by the application code in the slave. Passing the request to the
 * request handler is how that language construct's behavior gets invoked.
 * The request handler then performs the actions of the construct's
 * behavior. So, the request handler encodes the behavior of the 
 * language's parallelism constructs, and performs that when the master
 * hands it a slave containing a request to perform that construct.
 * 
 *On a shared-memory machine, the behavior of parallelism constructs
 * equals control, over order of execution of code.  Hence, the behavior
 * of the language constructs performed by the request handler is to 
 * choose the order that slaves get animated, and thereby control the
 * order that application code in the slaves executes.
 * 
 *To control order of animation of slaves, the request handler has a
 * semantic environment that holds data structures used to hold slaves
 * and choose when they're ready to be animated.
 *
 *Once a slave is marked as ready to be animated by the request handler,
 * it is the second plugin function, the Assigner, which chooses the core
 * the slave gets assigned to for animation.  Hence, the Assigner doesn't
 * perform any of the semantic behavior of language constructs, rather
 * it gives the language a chance to improve performance. The performance
 * of application code is strongly related to communication between
 * cores. On shared-memory machines, communication is caused during
 * execution of code, by memory accesses, and how much depends on contents
 * of caches connected to the core executing the code.  So, the placement
 * of slaves determines the communication caused during execution of the
 * slave's code.
 *The point of the Assigner, then, is to use application information during
 * execution of the program, to make choices about slave placement onto
 * cores, with the aim to put slaves close to caches containing the data
 * used by the slave's code.
 * 
 *==========================================================================
 *In summary, the animationMaster scans the slots, finds slaves
 * just-finished, which hold requests, pass those to the request handler,
 * along with the semantic environment, and the request handler then manages
 * the structures in the semantic env, which controls the order of
 * animation of slaves, and so embodies the behavior of the language
 * constructs.
 *The animationMaster then rescans the slots, offering each empty one to
 * the Assigner, along with the semantic environment.  The Assigner chooses
 * among the ready slaves in the semantic Env, finding the one best suited
 * to be animated by that slot's associated core.
 * 
 *==========================================================================
 *Implementation Details:
 * 
 *There is a separate masterVP for each core, but a single semantic
 * environment shared by all cores.  Each core also has its own scheduling
 * slots, which are used to communicate slaves between animationMaster and
 * coreController.  There is only one global variable, _PRMasterEnv, which
 * holds the semantic env and other things shared by the different
 * masterVPs.  The request handler and Assigner are registered with
 * the animationMaster by the language's init function, and a pointer to
 * each is in the _PRMasterEnv. (There are also some pthread related global
 * vars, but they're only used during init of PR).
 *PR gains control over the cores by essentially "turning off" the OS's
 * scheduler, using pthread pin-to-core commands.
 *
 *The masterVPs are created during init, with this animationMaster as their
 * top level function.  The masterVPs use the same SlaveVP data structure,
 * even though they're not slave VPs.
 *A "seed slave" is also created during init -- this is equivalent to the
 * "main" function in C, and acts as the entry-point to the PR-language-
 * based application.
 *The masterVPs share a single system-wide master-lock, so only one
 * masterVP may be animated at a time.
 *The core controllers access _PRMasterEnv to get the masterVP, and when
 * they start, the slots are all empty, so they run their associated core's
 * masterVP.  The first of those to get the master lock sees the seed slave
 * in the shared semantic environment, so when it runs the Assigner, that
 * returns the seed slave, which the animationMaster puts into a scheduling
 * slot then switches to the core controller.  That then switches the core
 * over to the seed slave, which then proceeds to execute language
 * constructs to create more slaves, and so on.  Each of those constructs
 * causes the seed slave to suspend, switching over to the core controller,
 * which eventually switches to the masterVP, which executes the 
 * request handler, which uses PR primitives to carry out the creation of
 * new slave VPs, which are marked as ready for the Assigner, and so on..
 * 
 *On animation slots, and system behavior:
 * A request may linger in an animation slot for a long time while
 * the slaves in the other slots are animated.  This only becomes a problem
 * when such a request is a choke-point in the constraints, and is needed
 * to free work for *other* cores.  To reduce this occurrence, the number
 * of animation slots should be kept low.  In balance, having multiple
 * animation slots amortizes the overhead of switching to the masterVP and
 * executing the animationMaster code, which drives for more than one. In
 * practice, the best balance should be discovered by profiling.
 */
void animationMaster( void *initData, SlaveVP *masterVP )
 { 
      //Used while scanning and filling animation slots
   int32           slotIdx, numSlotsFilled;
   AnimSlot      *currSlot, **animSlots;
   SlaveVP        *assignedSlaveVP;  //the slave chosen by the assigner

      //Local copies, for performance
   MasterEnv      *masterEnv;
   SlaveAssigner   slaveAssigner;
   RequestHandler  requestHandler;
   void           *semanticEnv;
   int32           thisCoresIdx;
  
   //======================== Initializations ========================
   masterEnv        = (MasterEnv*)_VMSMasterEnv;
   
   thisCoresIdx     = masterVP->coreAnimatedBy;
   animSlots       = masterEnv->allAnimSlots[thisCoresIdx];

   requestHandler   = masterEnv->requestHandler;
   slaveAssigner    = masterEnv->slaveAssigner;
   semanticEnv      = masterEnv->semanticEnv;
   
      HOLISTIC__Insert_Master_Global_Vars;
   
   //======================== animationMaster ========================
   while(1){
       
      MEAS__Capture_Pre_Master_Point

      //Scan the animation slots
   numSlotsFilled = 0;
   for( slotIdx = 0; slotIdx < NUM_ANIM_SLOTS; slotIdx++)
    {
      currSlot = animSlots[ slotIdx ];

         //Check if newly-done slave in slot, which will need request handled
      if( currSlot->workIsDone )
       {
         currSlot->workIsDone         = FALSE;
         currSlot->needsSlaveAssigned = TRUE;
         
       HOLISTIC__Record_AppResponder_start;
               MEAS__startReqHdlr;
               
           currSlot->workIsDone         = FALSE;
            currSlot->needsSlaveAssigned = TRUE;
            SlaveVP *currSlave = currSlot->slaveAssignedToSlot;
            
	justAddedReqHdlrChg();
			//handle the request, either by VMS or by the language
            if( currSlave->requests->reqType != LangReq )
             {    //The request is a standard VMS one, not one defined by the
                  // language, so VMS handles it, then queues slave to be assigned
               handleReqInVMS( currSlave );
               writePrivQ( currSlave, VMSReadyQ ); //Q slave to be assigned below
             }
            else
             {       MEAS__startReqHdlr;

                  //Language handles request, which is held inside slave struc
               (*requestHandler)( currSlave, semanticEnv );

                     MEAS__endReqHdlr;
             }
          }

		  //process the requests made by the slave (held inside slave struc)
         (*requestHandler)( currSlot->slaveAssignedToSlot, semanticEnv );
         
         HOLISTIC__Record_AppResponder_end;
               MEAS__endReqHdlr;
       }
         //If slot empty, hand to Assigner to fill with a slave
      if( currSlot->needsSlaveAssigned )
       {    //Call plugin's Assigner to give slot a new slave
          HOLISTIC__Record_Assigner_start;
         assignedSlaveVP =
          (*slaveAssigner)( semanticEnv, currSlot );
         
            //put the chosen slave into slot, and adjust flags and state
         if( assignedSlaveVP != NULL )
          { currSlot->slaveAssignedToSlot = assignedSlaveVP;
            assignedSlaveVP->animSlotAssignedTo       = currSlot;
            currSlot->needsSlaveAssigned  = FALSE;
            numSlotsFilled               += 1;
            
            HOLISTIC__Record_Assigner_end;
          }
       }
    }

         MEAS__Capture_Post_Master_Point;
   
   masterSwitchToCoreCtlr( masterVP );
   flushRegisters();
         DEBUG__printf(FALSE,"came back after switch to core -- so lock released!");
   }//while(1) 
 }


/* 2)  This version is for a single language that has only tasks, which 
 *     cannot be suspended.
 */
void animationMaster( void *initData, SlaveVP *masterVP )
 { 
      //Used while scanning and filling animation slots
   int32           slotIdx, numSlotsFilled;
   AnimSlot       *currSlot, **animSlots;
   SlaveVP        *assignedSlaveVP;  //the slave chosen by the assigner
   
      //Local copies, for performance
   MasterEnv      *masterEnv;
   SlaveAssigner   slaveAssigner;
   RequestHandler  requestHandler;
   PRSemEnv       *semanticEnv;
   int32           thisCoresIdx;

   //#ifdef  MODE__MULTI_LANG
   SlaveVP        *slave;
   PRProcess      *process;
   PRConstrEnvHolder *constrEnvHolder;
   int32           langMagicNumber;
   //#endif
   
   //======================== Initializations ========================
   masterEnv        = (MasterEnv*)_PRMasterEnv;
   
   thisCoresIdx     = masterVP->coreAnimatedBy;
   animSlots        = masterEnv->allAnimSlots[thisCoresIdx];

   requestHandler   = masterEnv->requestHandler;
   slaveAssigner    = masterEnv->slaveAssigner;
   semanticEnv      = masterEnv->semanticEnv;
   
      //initialize, for non-multi-lang, non multi-proc case
      // default handler gets put into master env by a registration call by lang
   endTaskHandler   = masterEnv->defaultTaskHandler;
   
      HOLISTIC__Insert_Master_Global_Vars;
   
   //======================== animationMaster ========================
   //Do loop gets requests handled and work assigned to slots..
   // work can either be a task or a resumed slave
   //Having two cases makes this logic complex.. can be finishing either, and 
   // then the next available work may be either.. so really have two distinct
   // loops that are inter-twined.. 
   while(1){
       
      MEAS__Capture_Pre_Master_Point

      //Scan the animation slots
   numSlotsFilled = 0;
   for( slotIdx = 0; slotIdx < NUM_ANIM_SLOTS; slotIdx++)
    {
      currSlot = animSlots[ slotIdx ];

         //Check if newly-done slave in slot, which will need request handled
      if( currSlot->workIsDone )
       { currSlot->workIsDone = FALSE;
       
               HOLISTIC__Record_AppResponder_start; //TODO: update to check which process for each slot
               MEAS__startReqHdlr;
               
         
            //process the request made by the slave (held inside slave struc)
         slave = currSlot->slaveAssignedToSlot;
         
            //check if the completed work was a task..
         if( slave->taskMetaInfo->isATask )
          {
             if( slave->reqst->type == TaskEnd ) 
              {    //do task end handler, which is registered separately
                   //note, end hdlr may use semantic data from reqst..
                //#ifdef  MODE__MULTI_LANG
                   //get end-task handler
                //taskEndHandler = lookup( slave->reqst->langMagicNumber, processEnv );
                taskEndHandler = slave->taskMetaInfo->endTaskHandler;
                //#endif
                (*taskEndHandler)( slave, semanticEnv );
                
                goto AssignWork;
              }
             else  //is a task, and just suspended
              {    //turn slot slave into free task slave & make replacement
                if( slave->typeOfVP == TaskSlotSlv ) changeSlvType();
                
                //goto normal slave request handling
                goto SlaveReqHandling; 
              }
          }
         else //is a slave that suspended
          {
          SlaveReqHandling:
            (*requestHandler)( slave, semanticEnv ); //(note: indirect Fn call more efficient when use fewer params, instead re-fetch from slave)
         
               HOLISTIC__Record_AppResponder_end;
               MEAS__endReqHdlr;
               
            goto AssignWork;
          }
       } //if has suspended slave that needs handling
      
         //if slot empty, hand to Assigner to fill with a slave
      if( currSlot->needsSlaveAssigned )
       {    //Call plugin's Assigner to give slot a new slave
               HOLISTIC__Record_Assigner_start;
               
       AssignWork:
     
         assignedSlaveVP = assignWork( semanticEnv, currSlot );
       
            //put the chosen slave into slot, and adjust flags and state
         if( assignedSlaveVP != NULL )
          { currSlot->slaveAssignedToSlot = assignedSlaveVP;
            assignedSlaveVP->animSlotAssignedTo = currSlot;
            currSlot->needsSlaveAssigned  = FALSE;
            numSlotsFilled               += 1;
          }
         else
          {
            currSlot->needsSlaveAssigned  = TRUE; //local write
          }
               HOLISTIC__Record_Assigner_end;
       }//if slot needs slave assigned
    }//for( slotIdx..

         MEAS__Capture_Post_Master_Point;
   
   masterSwitchToCoreCtlr( masterVP ); //returns when ctlr switches back to master
   flushRegisters();
   }//while(1) 
 }


/*This is the master when just multi-lang, but not multi-process mode is on.
 * This version has to handle both tasks and slaves, and do extra work of 
 * looking up the semantic env and handlers to use, for each completed bit of 
 * work.
 *It also has to search through the semantic envs to find one with work,
 * then ask that env's assigner to return a unit of that work.
 * 
 *The language is written to startup in the same way as if it were the only
 * language in the app, and it operates in the same way,
 * the only difference between single language and multi-lang is here, in the
 * master.
 *This invisibility to mode is why the language has to use registration calls
 * for everything during startup -- those calls do different things depending
 * on whether it's single-language or multi-language mode.
 * 
 *In this version of the master, work can either be a task or a resumed slave
 *Having two cases makes this logic complex.. can be finishing either, and
 * then the next available work may be either.. so really have two distinct 
 * loops that are inter-twined.. 
 * 
 *Some special cases:
 * A task-end is a special case for a few reasons (below).
 * A task-end can't block a slave (can't cause it to "logically suspend")
 * A task available for work can only be assigned to a special slave, which 
 *   has been set aside for doing tasks, one such task-slave is always 
 *   assigned to each slot. So, when a task ends, a new task is assigned to
 *   that slot's task-slave right away.  
 * But if no tasks are available, then have to switch over to looking at
 *   slaves to find one ready to resume, to find work for the slot.
 * If a task just suspends, not ends, then its task-slave is no longer 
 *   available to take new tasks, so a new task-slave has to be assigned to
 *   that slot.  Then the slave of the suspended task is turned into a free
 *   task-slave and request handling is done on it as if it were a slave 
 *   that suspended.
 * After request handling, do the same sequence of looking for a task to be
 *   work, and if none, look for a slave ready to resume, as work for the slot.
 * If a slave suspends, handle its request, then look for work.. first for a
 *   task to assign, and if none, slaves ready to resume.
 * Another special case is when task-end is done on a free task-slave.. in
 *   that case, the slave has no more work and no way to get more.. so place
 *   it into a recycle queue.
 * If no work is found of either type, then do a special thing to prune down
 *   the extra slaves in the recycle queue, just so don't get too many..
 * 
 *The multi-lang thing complicates matters..  
 *
 *For request handling, it means have to first fetch the semantic environment
 * of the language, and then do the request handler pointed to by that
 * semantic env.
 *For assigning, things get more complex because of competing goals..  One
 * goal is for language specific stuff to be used during assignment, so
 * assigner can make higher quality decisions..  but with multiple languages,
 * which only get mixed in the application, the assigners can't be written
 * with knowledge of each other.  So, they can only make localized decisions,
 * and so different language's assigners may interfere with each other..
 * 
 *So, have some possibilities available:
 *1) can have a fixed scheduler in the proto-runtime, that all the
 * languages give their work to..  (but then lose language-specific info, 
 * there is a standard PR format for assignment info, and the langauge 
 * attaches this to the work-unit when it gives it to PR.. also have issue
 * with HWSim, which uses a priority Q instead of FIFO, and requests can 
 * "undo" previous work put in, so request handlers need way to manipulate
 * the work-holding Q..) (this might be fudgeable with
 * HWSim, if the master did a lang-supplied callback each time it assigns a
 * unit to a slot..  then HWSim can keep exactly one unit of work in PR's
 * queue at a time..  but this is quite hack-like.. or perhaps HWSim supplies
 * a task-end handler that kicks the next unit of work from HWSim internal
 * priority queue, over to PR readyQ)
 *2) can have each language have its own semantic env, that holds its own
 * work, which is assigned by its own assigner.. then the master searches
 * through all the semantic envs to find one with work and asks it give work..
 * (this has downside of blinding assigners to each other.. but does work
 * for HWSim case)
 *3) could make PR have a different readyQ for each core, and ask the lang
 * to put work to the core it prefers.. but the work may be moved by PR if
 * needed, say if one core idles for too long. This is a hybrid approach, 
 * letting the language decide which core, but PR keeps the work and does it
 * FIFO style.. (this might als be fudgeable with HWSim, in similar fashion, 
 * but it would be complicated by having to track cores separately) 
 *
 *Choosing 2, to keep compatibility with single-lang mode..  it allows the same
 * assigner to be used for single-lang as for multi-lang..  the overhead of
 * the extra master search for work is part of the price of the flexibility,
 * but should be fairly small.. takes the first env that has work available, 
 * and whatever it returns is assigned to the slot..
 * 
 *As a hybrid, giving an option for a unified override assigner to be registered
 * and used..  This allows something like a static analysis to detect
 * which languages are grouped together, and then analyze the pattern of 
 * construct calls, and generate a custom assigner that uses info from all
 * the languages in a unified way..  Don't really expect this to happen, 
 * but making it possible.
 */
#ifdef  MODE__MULTI_LANG
void animationMaster( void *initData, SlaveVP *masterVP )
 { 
      //Used while scanning and filling animation slots
   int32           slotIdx, numSlotsFilled;
   AnimSlot       *currSlot, **animSlots;
   SlaveVP        *assignedSlaveVP;  //the slave chosen by the assigner
   
      //Local copies, for performance
   MasterEnv      *masterEnv;
   SlaveAssigner   slaveAssigner;
   RequestHandler  requestHandler;
   PRSemEnv       *semanticEnv;
   int32           thisCoresIdx;

   //#ifdef  MODE__MULTI_LANG
   SlaveVP        *slave;
   PRProcess      *process;
   PRConstrEnvHolder *constrEnvHolder;
   int32           langMagicNumber;
   //#endif
   
   //======================== Initializations ========================
   masterEnv        = (MasterEnv*)_PRMasterEnv;
   
   thisCoresIdx     = masterVP->coreAnimatedBy;
   animSlots        = masterEnv->allAnimSlots[thisCoresIdx];

   requestHandler   = masterEnv->requestHandler;
   slaveAssigner    = masterEnv->slaveAssigner;
   semanticEnv      = masterEnv->semanticEnv;
   
      //initialize, for non-multi-lang, non multi-proc case
      // default handler gets put into master env by a registration call by lang
   endTaskHandler   = masterEnv->defaultTaskHandler;
   
      HOLISTIC__Insert_Master_Global_Vars;
   
   //======================== animationMaster ========================
   //Do loop gets requests handled and work assigned to slots..
   // work can either be a task or a resumed slave
   //Having two cases makes this logic complex.. can be finishing either, and 
   // then the next available work may be either.. so really have two distinct
   // loops that are inter-twined.. 
   while(1){
       
      MEAS__Capture_Pre_Master_Point

      //Scan the animation slots
   numSlotsFilled = 0;
   for( slotIdx = 0; slotIdx < NUM_ANIM_SLOTS; slotIdx++)
    {
      currSlot = animSlots[ slotIdx ];

         //Check if newly-done slave in slot, which will need request handled
      if( currSlot->workIsDone )
       { currSlot->workIsDone = FALSE;
       
               HOLISTIC__Record_AppResponder_start; //TODO: update to check which process for each slot
               MEAS__startReqHdlr;
               
         
            //process the request made by the slave (held inside slave struc)
         slave = currSlot->slaveAssignedToSlot;
         
            //check if the completed work was a task..
         if( slave->taskMetaInfo->isATask )
          {
             if( slave->reqst->type == TaskEnd ) 
              {    //do task end handler, which is registered separately
                   //note, end hdlr may use semantic data from reqst..
                //#ifdef  MODE__MULTI_LANG
                   //get end-task handler
                //taskEndHandler = lookup( slave->reqst->langMagicNumber, processEnv );
                taskEndHandler = slave->taskMetaInfo->endTaskHandler;
                //#endif
                (*taskEndHandler)( slave, semanticEnv );
                
                goto AssignWork;
              }
             else  //is a task, and just suspended
              {    //turn slot slave into free task slave & make replacement
                if( slave->typeOfVP == TaskSlotSlv ) changeSlvType();
                
                //goto normal slave request handling
                goto SlaveReqHandling; 
              }
          }
         else //is a slave that suspended
          {
          SlaveReqHandling:
            (*requestHandler)( slave, semanticEnv ); //(note: indirect Fn call more efficient when use fewer params, instead re-fetch from slave)
         
               HOLISTIC__Record_AppResponder_end;
               MEAS__endReqHdlr;
               
            goto AssignWork;
          }
       } //if has suspended slave that needs handling
      
         //if slot empty, hand to Assigner to fill with a slave
      if( currSlot->needsSlaveAssigned )
       {    //Call plugin's Assigner to give slot a new slave
               HOLISTIC__Record_Assigner_start;
               
       AssignWork:
     
         assignedSlaveVP = assignWork( semanticEnv, currSlot );
       
            //put the chosen slave into slot, and adjust flags and state
         if( assignedSlaveVP != NULL )
          { currSlot->slaveAssignedToSlot = assignedSlaveVP;
            assignedSlaveVP->animSlotAssignedTo = currSlot;
            currSlot->needsSlaveAssigned  = FALSE;
            numSlotsFilled               += 1;
          }
         else
          {
            currSlot->needsSlaveAssigned  = TRUE; //local write
          }
               HOLISTIC__Record_Assigner_end;
       }//if slot needs slave assigned
    }//for( slotIdx..

         MEAS__Capture_Post_Master_Point;
   
   masterSwitchToCoreCtlr( masterVP ); //returns when ctlr switches back to master
   flushRegisters();
   }//while(1) 
 }
#endif //MODE__MULTI_LANG


//This is the master when both multi-lang and multi-process modes are turned on
//#ifdef MODE__MULTI_LANG
//#ifdef MODE__MULTI_PROCESS
void animationMaster( void *initData, SlaveVP *masterVP )
 { 
      //Used while scanning and filling animation slots
   int32           slotIdx, numSlotsFilled;
   AnimSlot       *currSlot, **animSlots;
   SlaveVP        *assignedSlaveVP;  //the slave chosen by the assigner
   
      //Local copies, for performance
   MasterEnv      *masterEnv;
   SlaveAssigner   slaveAssigner;
   RequestHandler  requestHandler;
   PRSemEnv       *semanticEnv;
   int32           thisCoresIdx;

   SlaveVP        *slave;
   PRProcess      *process;
   PRConstrEnvHolder *constrEnvHolder;
   int32           langMagicNumber;
   
   //======================== Initializations ========================
   masterEnv        = (MasterEnv*)_PRMasterEnv;
   
   thisCoresIdx     = masterVP->coreAnimatedBy;
   animSlots        = masterEnv->allAnimSlots[thisCoresIdx];

   requestHandler   = masterEnv->requestHandler;
   slaveAssigner    = masterEnv->slaveAssigner;
   semanticEnv      = masterEnv->semanticEnv;
   
      //initialize, for non-multi-lang, non multi-proc case
      // default handler gets put into master env by a registration call by lang
   endTaskHandler   = masterEnv->defaultTaskHandler;
   
      HOLISTIC__Insert_Master_Global_Vars;
   
   //======================== animationMaster ========================
   //Do loop gets requests handled and work assigned to slots..
   // work can either be a task or a resumed slave
   //Having two cases makes this logic complex.. can be finishing either, and 
   // then the next available work may be either.. so really have two distinct
   // loops that are inter-twined.. 
   while(1){
       
      MEAS__Capture_Pre_Master_Point

      //Scan the animation slots
   numSlotsFilled = 0;
   for( slotIdx = 0; slotIdx < NUM_ANIM_SLOTS; slotIdx++)
    {
      currSlot = animSlots[ slotIdx ];

         //Check if newly-done slave in slot, which will need request handled
      if( currSlot->workIsDone )
       { currSlot->workIsDone = FALSE;
       
               HOLISTIC__Record_AppResponder_start; //TODO: update to check which process for each slot
               MEAS__startReqHdlr;
               
         
            //process the request made by the slave (held inside slave struc)
         slave = currSlot->slaveAssignedToSlot;
         
            //check if the completed work was a task..
         if( slave->taskMetaInfo->isATask )
          {
             if( slave->reqst->type == TaskEnd ) 
              {    //do task end handler, which is registered separately
                   //note, end hdlr may use semantic data from reqst..
                   //get end-task handler
                //taskEndHandler = lookup( slave->reqst->langMagicNumber, processEnv );
                taskEndHandler = slave->taskMetaInfo->endTaskHandler;
                
                (*taskEndHandler)( slave, semanticEnv );
                
                goto AssignWork;
              }
             else  //is a task, and just suspended
              {    //turn slot slave into free task slave & make replacement
                if( slave->typeOfVP == TaskSlotSlv ) changeSlvType();
                
                //goto normal slave request handling
                goto SlaveReqHandling; 
              }
          }
         else //is a slave that suspended
          {
             
          SlaveReqHandling:
            (*requestHandler)( slave, semanticEnv ); //(note: indirect Fn call more efficient when use fewer params, instead re-fetch from slave)
         
               HOLISTIC__Record_AppResponder_end;
               MEAS__endReqHdlr;
               
            goto AssignWork;
          }
       } //if has suspended slave that needs handling
      
         //if slot empty, hand to Assigner to fill with a slave
      if( currSlot->needsSlaveAssigned )
       {    //Scan sem environs, looking for one with ready work.
            // call the Assigner for that sem Env, to give slot a new slave
               HOLISTIC__Record_Assigner_start;
               
       AssignWork:
     
         assignedSlaveVP = assignWork( semanticEnv, currSlot );
       
            //put the chosen slave into slot, and adjust flags and state
         if( assignedSlaveVP != NULL )
          { currSlot->slaveAssignedToSlot = assignedSlaveVP;
            assignedSlaveVP->animSlotAssignedTo = currSlot;
            currSlot->needsSlaveAssigned  = FALSE;
            numSlotsFilled               += 1;
          }
         else
          {
            currSlot->needsSlaveAssigned  = TRUE; //local write
          }
               HOLISTIC__Record_Assigner_end;
       }//if slot needs slave assigned
    }//for( slotIdx..

         MEAS__Capture_Post_Master_Point;
   
   masterSwitchToCoreCtlr( masterVP ); //returns when ctlr switches back to master
   flushRegisters();
   }//while(1) 
 }
#endif  //MODE__MULTI_LANG
#endif  //MODE__MULTI_PROCESS


/*This does three things:
 * 1) ask for a slave ready to resume
 * 2) if none, then ask for a task, and assign to the slot slave
 * 3) if none, then prune former task slaves waiting to be recycled.
 *
   //Have two separate assigners in each semantic env,
   // which keeps its own work in its own structures.. the master, here, 
   // searches through the semantic environs, takes the first that has work
   // available, and whatever it returns is assigned to the slot..
   //However, also have an override assigner.. because static analysis tools know
   // which languages are grouped together.. and the override enables them to
   // generate a custom assigner that uses info from all the languages in a 
   // unified way..  Don't really expect this to happen, but making it possible.
 */
inline SlaveVP *
assignWork( PRProcessEnv *processEnv, AnimSlot *slot )
 { SlaveVP     *returnSlv;
   //VSsSemEnv   *semEnv;
   //VSsSemData  *semData;
   int32        coreNum, slotNum;
   PRTaskMetaInfo *newTaskStub;
   SlaveVP     *freeTaskSlv;

   
      //master has to handle slot slaves.. so either assigner returns
      // taskMetaInfo or else two assigners, one for slaves, other for tasks..     
   semEnvs = processEnv->semEnvs;
   numEnvs = processEnv->numSemEnvs;
   for( envIdx = 0; envIdx < numEnvs; envIdx++ )
    { semEnv = semEnvs[envIdx];
      if( semEnv->hasWork )
       { assigner = semEnv->assigner; 
         retTaskMetaInfo = (*assigner)( semEnv, slot );
         
         return retTaskMetaInfo; //quit, have work
       }
    }
   
   coreNum = slot->coreSlotIsOn;
   slotNum = slot->slotIdx;
 
      //first try to get a ready slave
   returnSlv = getReadySlave();

   if( returnSlv != NULL )
    { returnSlv->coreAnimatedBy   = coreNum;
    
         //have work, so reset Done flag (when work generated on other core)
      if( processEnv->coreIsDone[coreNum] == TRUE ) //reads are higher perf
         processEnv->coreIsDone[coreNum] = FALSE;   //don't just write always
    
      goto ReturnTheSlv;
    }
   
      //were no slaves, so try to get a ready task.. 
   newTaskStub = getTaskStub();
   
   if( newTaskStub != NULL )
    { 
         //get the slot slave to assign the task to..
      returnSlv = processEnv->slotTaskSlvs[coreNum][slotNum];

         //point slave to task's function, and mark slave as having task
      PR_int__reset_slaveVP_to_TopLvlFn( returnSlv, 
                          newTaskStub->taskType->fn, newTaskStub->args );
      returnSlv->taskStub          = newTaskStub;
      newTaskStub->slaveAssignedTo = returnSlv;
      returnSlv->needsTaskAssigned = FALSE;  //slot slave is a "Task" slave type
      
         //have work, so reset Done flag, if was set
      if( processEnv->coreIsDone[coreNum] == TRUE ) //reads are higher perf
         processEnv->coreIsDone[coreNum] = FALSE;   //don't just write always
      
      goto ReturnTheSlv;
    }
   else
    {    //no task, so prune the recycle pool of free task slaves
      freeTaskSlv = readPrivQ( processEnv->freeTaskSlvRecycleQ );
      if( freeTaskSlv != NULL )
       {    //delete to bound the num extras, and deliver shutdown cond
         handleDissipate( freeTaskSlv, processEnv );
            //then return NULL
         returnSlv = NULL;
         
         goto ReturnTheSlv;
       }
      else
       { //candidate for shutdown.. if all extras dissipated, and no tasks
         // and no ready to resume slaves, then no way to generate
         // more tasks (on this core -- other core might have task still)
         if( processEnv->numLiveExtraTaskSlvs == 0 && 
             processEnv->numLiveThreadSlvs == 0 )
          { //This core sees no way to generate more tasks, so say it
            if( processEnv->coreIsDone[coreNum] == FALSE )
             { processEnv->numCoresDone += 1;
               processEnv->coreIsDone[coreNum] = TRUE;
               #ifdef DEBUG__TURN_ON_SEQUENTIAL_MODE
               processEnv->shutdownInitiated = TRUE;
               
               #else
               if( processEnv->numCoresDone == NUM_CORES )
                { //means no cores have work, and none can generate more
                  processEnv->shutdownInitiated = TRUE;
                }
               #endif
             }
          }
            //check if shutdown has been initiated by this or other core
         if(processEnv->shutdownInitiated) 
          { returnSlv = PR_SS__create_shutdown_slave();
          }
         else
            returnSlv = NULL;

         goto ReturnTheSlv; //don't need, but completes pattern
       } //if( freeTaskSlv != NULL )
    } //if( newTaskStub == NULL )
   //outcome: 1)slave was just pointed to task, 2)no tasks, so slave NULL
 

 ReturnTheSlv:  //All paths goto here.. to provide single point for holistic..

   #ifdef HOLISTIC__TURN_ON_OBSERVE_UCC
   if( returnSlv == NULL )
    { returnSlv = processEnv->idleSlv[coreNum][slotNum]; 
    
         //things that would normally happen in resume(), but idle VPs
         // never go there
      returnSlv->assignCount++; //gives each idle unit a unique ID
      Unit newU;
      newU.vp = returnSlv->slaveID;
      newU.task = returnSlv->assignCount;
      addToListOfArrays(Unit,newU,processEnv->unitList);

      if (returnSlv->assignCount > 1) //make a dependency from prev idle unit
       { Dependency newD;             // to this one
         newD.from_vp = returnSlv->slaveID;
         newD.from_task = returnSlv->assignCount - 1;
         newD.to_vp = returnSlv->slaveID;
         newD.to_task = returnSlv->assignCount;
         addToListOfArrays(Dependency, newD ,processEnv->ctlDependenciesList);  
       }
    }
   else //have a slave will be assigned to the slot
    { //assignSlv->numTimesAssigned++;
         //get previous occupant of the slot
      Unit prev_in_slot = 
         processEnv->last_in_slot[coreNum * NUM_ANIM_SLOTS + slotNum];
      if(prev_in_slot.vp != 0) //if not first slave in slot, make dependency
       { Dependency newD;      // is a hardware dependency
         newD.from_vp = prev_in_slot.vp;
         newD.from_task = prev_in_slot.task;
         newD.to_vp = returnSlv->slaveID;
         newD.to_task = returnSlv->assignCount;
         addToListOfArrays(Dependency,newD,processEnv->hwArcs);   
       }
      prev_in_slot.vp = returnSlv->slaveID; //make new slave the new previous
      prev_in_slot.task = returnSlv->assignCount;
      processEnv->last_in_slot[coreNum * NUM_ANIM_SLOTS + slotNum] =
         prev_in_slot;        
    }
   #endif

   return( returnSlv );
 }

      
//=================================================================
         //#else  //is MODE__MULTI_LANG
            //For multi-lang mode, first, get the constraint-env holder out of
            // the process, which is in the slave.
            //Second, get the magic number out of the request, use it to look up
            // the constraint Env within the constraint-env holder.
            //Then get the request handler out of the constr env
         constrEnvHolder = slave->process->constrEnvHolder;
         reqst = slave->request;
         langMagicNumber = reqst->langMagicNumber;
         semanticEnv = lookup( langMagicNumber, constrEnvHolder ); //a macro
         if( slave->reqst->type == taskEnd ) //end-task is special
          {    //need to know what lang's task ended
            taskEndHandler = semanticEnv->taskEndHandler;
            (*taskEndHandler)( slave, reqst, semanticEnv ); //can put semantic data into task end reqst, for continuation, etc
               //this is a slot slave, get a new task for it
            if( !existsOverrideAssigner )//if exists, is set above, before loop
             {    //search for task assigner that has work
               for( a = 0; a < num_assigners; a++ )
                { if( taskAssigners[a]->hasWork )
                   { newTaskAssigner = taskAssigners[a];
                     (*newTaskAssigner)( slave, semanticEnv );
                     goto GotTask;
                   }
                }
               goto NoTasks;
             }
            
           GotTask:
            continue; //have work, so do next iter of loop, don't call slave assigner
          }
         if( slave->typeOfVP == taskSlotSlv ) changeSlvType();//is suspended task
            //now do normal suspended slave request handler
         requestHandler = semanticEnv->requestHandler;
         //#endif

         
       }
         //If make it here, then was no task for this slot
         //slot empty, hand to Assigner to fill with a slave
      if( currSlot->needsSlaveAssigned )
       {    //Call plugin's Assigner to give slot a new slave
               HOLISTIC__Record_Assigner_start;
               
         //#ifdef  MODE__MULTI_LANG
        NoTasks:
            //First, choose an Assigner..
            //There are several Assigners, one for each langlet.. they all
            // indicate whether they have work available.. just pick the first
            // one that has work..  Or, if there's a Unified Assigner, call
            // that one..  So, go down array, checking..
         if( !existsOverrideAssigner ) 
          { for( a = 0; a < num_assigners; a++ )
             { if( assigners[a]->hasWork )
                { slaveAssigner = assigners[a];
                  goto GotAssigner;
                }
             }
            //no work, so just continue to next iter of scan loop
            continue;
          }
         //when exists override, the assigner is set, once, above, so do nothing
        GotAssigner:
         //#endif
        
         assignedSlaveVP =
          (*slaveAssigner)( semanticEnv, currSlot );
         
            //put the chosen slave into slot, and adjust flags and state
         if( assignedSlaveVP != NULL )
          { currSlot->slaveAssignedToSlot = assignedSlaveVP;
            assignedSlaveVP->animSlotAssignedTo = currSlot;
            currSlot->needsSlaveAssigned  = FALSE;
            numSlotsFilled               += 1;
            
            HOLISTIC__Record_Assigner_end;
          }
       }//if slot needs slave assigned
    }//for( slotIdx..

         MEAS__Capture_Post_Master_Point;
   
   masterSwitchToCoreCtlr( masterVP );
   flushRegisters();
         DEBUG__printf(FALSE,"came back after switch to core -- so lock released!");
   }//while(1) 
 }