/*
 * Copyright 2010  OpenSourceStewardshipFoundation
 * 
 * Licensed under BSD
 */


#include <stdio.h>
#include <stddef.h>

#include "PR.h"
#include "VSs_impl/VSs.h"

inline void
replaceWithNewSlotSlv( SlaveVP *requestingSlv, PRProcessEnv *processEnv );


/*The animationMaster embodies most of the animator of the language.  The
 * animator is what emodies the behavior of language constructs. 
 * As such, it is the animationMaster, in combination with the plugin
 * functions, that make the language constructs do their behavior.   
 * 
 *Within the code, this is the top-level-function of the masterVPs, and
 * runs when the coreController has no more slave VPs.  It's job is to
 * refill the animation slots with slaves that have work.
 *
 *There are multiple versions of the master, each tuned to a specific 
 * combination of modes.  This keeps the master simple, with reduced overhead,
 * when the application is not using the extra complexity.
 * 
 *As of Sept 2012, the versions available will be:
 * 1) Single langauge, which only exposes slaves (such as SSR or Vthread)
 * 2) Single language, which only exposes tasks  (such as pure dataflow)
 * 3) Single language, which exposes both (like Cilk, StarSs, and OpenMP)
 * 4) Multi-language, which always assumes both tasks and slaves
 * 5) Multi-language and multi-process, which also assumes both tasks and slaves
 *
 * 
 *
 */

        
//=====================  The versions of the Animation Master  =================
//
//==============================================================================

/* 1) This version is for a single language, that has only slaves, no tasks,
 *    such as Vthread or SSR.
 *This version is for when an application has only a single language, and
 * that language exposes slaves explicitly (as opposed to a task based 
 * language like pure dataflow).
 * 
 *
 *It scans the animation slots for just-completed slaves.
 * Each completed slave has a request in it.  So, the master hands each to
 * the plugin's request handler (there is only one plugin, because only one
 * lang).
 *Each request represents a language construct that has been encountered
 * by the application code in the slave. Passing the request to the
 * request handler is how that language construct's behavior gets invoked.
 * The request handler then performs the actions of the construct's
 * behavior. So, the request handler encodes the behavior of the 
 * language's parallelism constructs, and performs that when the master
 * hands it a slave containing a request to perform that construct.
 * 
 *On a shared-memory machine, the behavior of parallelism constructs
 * equals control, over order of execution of code.  Hence, the behavior
 * of the language constructs performed by the request handler is to 
 * choose the order that slaves get animated, and thereby control the
 * order that application code in the slaves executes.
 * 
 *To control order of animation of slaves, the request handler has a
 * semantic environment that holds data structures used to hold slaves
 * and choose when they're ready to be animated.
 *
 *Once a slave is marked as ready to be animated by the request handler,
 * it is the second plugin function, the Assigner, which chooses the core
 * the slave gets assigned to for animation.  Hence, the Assigner doesn't
 * perform any of the semantic behavior of language constructs, rather
 * it gives the language a chance to improve performance. The performance
 * of application code is strongly related to communication between
 * cores. On shared-memory machines, communication is caused during
 * execution of code, by memory accesses, and how much depends on contents
 * of caches connected to the core executing the code.  So, the placement
 * of slaves determines the communication caused during execution of the
 * slave's code.
 *The point of the Assigner, then, is to use application information during
 * execution of the program, to make choices about slave placement onto
 * cores, with the aim to put slaves close to caches containing the data
 * used by the slave's code.
 * 
 *==========================================================================
 *In summary, the animationMaster scans the slots, finds slaves
 * just-finished, which hold requests, pass those to the request handler,
 * along with the semantic environment, and the request handler then manages
 * the structures in the semantic env, which controls the order of
 * animation of slaves, and so embodies the behavior of the language
 * constructs.
 *The animationMaster then rescans the slots, offering each empty one to
 * the Assigner, along with the semantic environment.  The Assigner chooses
 * among the ready slaves in the semantic Env, finding the one best suited
 * to be animated by that slot's associated core.
 * 
 *==========================================================================
 *Implementation Details:
 * 
 *There is a separate masterVP for each core, but a single semantic
 * environment shared by all cores.  Each core also has its own scheduling
 * slots, which are used to communicate slaves between animationMaster and
 * coreController.  There is only one global variable, _PRTopEnv, which
 * holds the semantic env and other things shared by the different
 * masterVPs.  The request handler and Assigner are registered with
 * the animationMaster by the language's init function, and a pointer to
 * each is in the _PRTopEnv. (There are also some pthread related global
 * vars, but they're only used during init of PR).
 *PR gains control over the cores by essentially "turning off" the OS's
 * scheduler, using pthread pin-to-core commands.
 *
 *The masterVPs are created during init, with this animationMaster as their
 * top level function.  The masterVPs use the same SlaveVP data structure,
 * even though they're not slave VPs.
 *A "seed slave" is also created during init -- this is equivalent to the
 * "main" function in C, and acts as the entry-point to the PR-language-
 * based application.
 *The masterVPs share a single system-wide master-lock, so only one
 * masterVP may be animated at a time.
 *The core controllers access _PRTopEnv to get the masterVP, and when
 * they start, the slots are all empty, so they run their associated core's
 * masterVP.  The first of those to get the master lock sees the seed slave
 * in the shared semantic environment, so when it runs the Assigner, that
 * returns the seed slave, which the animationMaster puts into a scheduling
 * slot then switches to the core controller.  That then switches the core
 * over to the seed slave, which then proceeds to execute language
 * constructs to create more slaves, and so on.  Each of those constructs
 * causes the seed slave to suspend, switching over to the core controller,
 * which eventually switches to the masterVP, which executes the 
 * request handler, which uses PR primitives to carry out the creation of
 * new slave VPs, which are marked as ready for the Assigner, and so on..
 * 
 *On animation slots, and system behavior:
 * A request may linger in an animation slot for a long time while
 * the slaves in the other slots are animated.  This only becomes a problem
 * when such a request is a choke-point in the constraints, and is needed
 * to free work for *other* cores.  To reduce this occurrence, the number
 * of animation slots should be kept low.  In balance, having multiple
 * animation slots amortizes the overhead of switching to the masterVP and
 * executing the animationMaster code, which drives for more than one. In
 * practice, the best balance should be discovered by profiling.
 */
void animationMaster( void *initData, SlaveVP *masterVP )
 { 
      //Used while scanning and filling animation slots
   int32           slotIdx, numSlotsFilled;
   AnimSlot      *currSlot, **animSlots;
   SlaveVP        *assignedSlaveVP;  //the slave chosen by the assigner

      //Local copies, for performance
   MasterEnv      *masterEnv;
   SlaveAssigner   slaveAssigner;
   RequestHandler  requestHandler;
   void           *semanticEnv;
   int32           thisCoresIdx;
  
   //======================== Initializations ========================
   masterEnv        = (MasterEnv*)_PRTopEnv;
   
   thisCoresIdx     = masterVP->coreAnimatedBy;
   animSlots       = masterEnv->allAnimSlots[thisCoresIdx];

   requestHandler   = masterEnv->requestHandler;
   slaveAssigner    = masterEnv->slaveAssigner;
   semanticEnv      = masterEnv->semanticEnv;
   
      HOLISTIC__Insert_Master_Global_Vars;
   
   //======================== animationMaster ========================
   while(1){
       
      MEAS__Capture_Pre_Master_Point

      //Scan the animation slots
   numSlotsFilled = 0;
   for( slotIdx = 0; slotIdx < NUM_ANIM_SLOTS; slotIdx++)
    {
      currSlot = animSlots[ slotIdx ];

         //Check if newly-done slave in slot, which will need request handled
      if( currSlot->workIsDone )
       {
         currSlot->workIsDone         = FALSE;
         currSlot->needsSlaveAssigned = TRUE;
         
       HOLISTIC__Record_AppResponder_start;
               MEAS__startReqHdlr;
               
           currSlot->workIsDone         = FALSE;
            currSlot->needsSlaveAssigned = TRUE;
            SlaveVP *currSlave = currSlot->slaveAssignedToSlot;
            
	justAddedReqHdlrChg();
			//handle the request, either by PR or by the language
            if( currSlave->requests->reqType != LangReq )
             {    //The request is a standard PR one, not one defined by the
                  // language, so PR handles it, then queues slave to be assigned
               handleReqInPR( currSlave );
               writePrivQ( currSlave, PRReadyQ ); //Q slave to be assigned below
             }
            else
             {       MEAS__startReqHdlr;

                  //Language handles request, which is held inside slave struc
               (*requestHandler)( currSlave, semanticEnv );

                     MEAS__endReqHdlr;
             }
          }

		  //process the requests made by the slave (held inside slave struc)
         (*requestHandler)( currSlot->slaveAssignedToSlot, semanticEnv );
         
         HOLISTIC__Record_AppResponder_end;
               MEAS__endReqHdlr;
       }
         //If slot empty, hand to Assigner to fill with a slave
      if( currSlot->needsSlaveAssigned )
       {    //Call plugin's Assigner to give slot a new slave
          HOLISTIC__Record_Assigner_start;
         assignedSlaveVP =
          (*slaveAssigner)( semanticEnv, currSlot );
         
            //put the chosen slave into slot, and adjust flags and state
         if( assignedSlaveVP != NULL )
          { currSlot->slaveAssignedToSlot = assignedSlaveVP;
            assignedSlaveVP->animSlotAssignedTo       = currSlot;
            currSlot->needsSlaveAssigned  = FALSE;
            numSlotsFilled               += 1;
            
            HOLISTIC__Record_Assigner_end;
          }
       }
    }

         MEAS__Capture_Post_Master_Point;
   
   masterSwitchToCoreCtlr( masterVP );
   flushRegisters();
         DEBUG__printf(FALSE,"came back after switch to core -- so lock released!");
   }//while(1) 
 }


/* 2)  This version is for a single language that has only tasks, which 
 *     cannot be suspended.
 */
void animationMaster( void *initData, SlaveVP *masterVP )
 { 
      //Used while scanning and filling animation slots
   int32           slotIdx, numSlotsFilled;
   AnimSlot       *currSlot, **animSlots;
   SlaveVP        *assignedSlaveVP;  //the slave chosen by the assigner
   
      //Local copies, for performance
   MasterEnv      *masterEnv;
   SlaveAssigner   slaveAssigner;
   RequestHandler  requestHandler;
   PRSemEnv       *semanticEnv;
   int32           thisCoresIdx;

   //#ifdef  MODE__MULTI_LANG
   SlaveVP        *slave;
   PRProcess      *process;
   PRConstrEnvHolder *constrEnvHolder;
   int32           langMagicNumber;
   //#endif
   
   //======================== Initializations ========================
   masterEnv        = (MasterEnv*)_PRTopEnv;
   
   thisCoresIdx     = masterVP->coreAnimatedBy;
   animSlots        = masterEnv->allAnimSlots[thisCoresIdx];

   requestHandler   = masterEnv->requestHandler;
   slaveAssigner    = masterEnv->slaveAssigner;
   semanticEnv      = masterEnv->semanticEnv;
   
      //initialize, for non-multi-lang, non multi-proc case
      // default handler gets put into master env by a registration call by lang
   endTaskHandler   = masterEnv->defaultTaskHandler;
   
      HOLISTIC__Insert_Master_Global_Vars;
   
   //======================== animationMaster ========================
   //Do loop gets requests handled and work assigned to slots..
   // work can either be a task or a resumed slave
   //Having two cases makes this logic complex.. can be finishing either, and 
   // then the next available work may be either.. so really have two distinct
   // loops that are inter-twined.. 
   while(1){
       
      MEAS__Capture_Pre_Master_Point

      //Scan the animation slots
   numSlotsFilled = 0;
   for( slotIdx = 0; slotIdx < NUM_ANIM_SLOTS; slotIdx++)
    {
      currSlot = animSlots[ slotIdx ];

         //Check if newly-done slave in slot, which will need request handled
      if( currSlot->workIsDone )
       { currSlot->workIsDone = FALSE;
       
               HOLISTIC__Record_AppResponder_start; //TODO: update to check which process for each slot
               MEAS__startReqHdlr;
               
         
            //process the request made by the slave (held inside slave struc)
         slave = currSlot->slaveAssignedToSlot;
         
            //check if the completed work was a task..
         if( slave->taskMetaInfo->isATask )
          {
             if( slave->reqst->type == TaskEnd ) 
              {    //do task end handler, which is registered separately
                   //note, end hdlr may use semantic data from reqst..
                //#ifdef  MODE__MULTI_LANG
                   //get end-task handler
                //taskEndHandler = lookup( slave->reqst->langMagicNumber, processEnv );
                taskEndHandler = slave->taskMetaInfo->endTaskHandler;
                //#endif
                (*taskEndHandler)( slave, semanticEnv );
                
                goto AssignWork;
              }
             else  //is a task, and just suspended
              {    //turn slot slave into free task slave & make replacement
                if( slave->typeOfVP == SlotTaskSlv ) changeSlvType();
                
                //goto normal slave request handling
                goto SlaveReqHandling; 
              }
          }
         else //is a slave that suspended
          {
          SlaveReqHandling:
            (*requestHandler)( slave, semanticEnv ); //(note: indirect Fn call more efficient when use fewer params, instead re-fetch from slave)
         
               HOLISTIC__Record_AppResponder_end;
               MEAS__endReqHdlr;
               
            goto AssignWork;
          }
       } //if has suspended slave that needs handling
      
         //if slot empty, hand to Assigner to fill with a slave
      if( currSlot->needsSlaveAssigned )
       {    //Call plugin's Assigner to give slot a new slave
               HOLISTIC__Record_Assigner_start;
               
       AssignWork:
     
         assignedSlaveVP = assignWork( semanticEnv, currSlot );
       
            //put the chosen slave into slot, and adjust flags and state
         if( assignedSlaveVP != NULL )
          { currSlot->slaveAssignedToSlot = assignedSlaveVP;
            assignedSlaveVP->animSlotAssignedTo = currSlot;
            currSlot->needsSlaveAssigned  = FALSE;
            numSlotsFilled               += 1;
          }
         else
          {
            currSlot->needsSlaveAssigned  = TRUE; //local write
          }
               HOLISTIC__Record_Assigner_end;
       }//if slot needs slave assigned
    }//for( slotIdx..

         MEAS__Capture_Post_Master_Point;
   
   masterSwitchToCoreCtlr( masterVP ); //returns when ctlr switches back to master
   flushRegisters();
   }//while(1) 
 }


/*This is the master when just multi-lang, but not multi-process mode is on.
 * This version has to handle both tasks and slaves, and do extra work of 
 * looking up the semantic env and handlers to use, for each completed bit of 
 * work.
 *It also has to search through the semantic envs to find one with work,
 * then ask that env's assigner to return a unit of that work.
 * 
 *The language is written to startup in the same way as if it were the only
 * language in the app, and it operates in the same way,
 * the only difference between single language and multi-lang is here, in the
 * master.
 *This invisibility to mode is why the language has to use registration calls
 * for everything during startup -- those calls do different things depending
 * on whether it's single-language or multi-language mode.
 * 
 *In this version of the master, work can either be a task or a resumed slave
 *Having two cases makes this logic complex.. can be finishing either, and
 * then the next available work may be either.. so really have two distinct 
 * loops that are inter-twined.. 
 * 
 *Some special cases:
 * A task-end is a special case for a few reasons (below).
 * A task-end can't block a slave (can't cause it to "logically suspend")
 * A task available for work can only be assigned to a special slave, which 
 *   has been set aside for doing tasks, one such task-slave is always 
 *   assigned to each slot. So, when a task ends, a new task is assigned to
 *   that slot's task-slave right away.  
 * But if no tasks are available, then have to switch over to looking at
 *   slaves to find one ready to resume, to find work for the slot.
 * If a task just suspends, not ends, then its task-slave is no longer 
 *   available to take new tasks, so a new task-slave has to be assigned to
 *   that slot.  Then the slave of the suspended task is turned into a free
 *   task-slave and request handling is done on it as if it were a slave 
 *   that suspended.
 * After request handling, do the same sequence of looking for a task to be
 *   work, and if none, look for a slave ready to resume, as work for the slot.
 * If a slave suspends, handle its request, then look for work.. first for a
 *   task to assign, and if none, slaves ready to resume.
 * Another special case is when task-end is done on a free task-slave.. in
 *   that case, the slave has no more work and no way to get more.. so place
 *   it into a recycle queue.
 * If no work is found of either type, then do a special thing to prune down
 *   the extra slaves in the recycle queue, just so don't get too many..
 * 
 *The multi-lang thing complicates matters..  
 *
 *For request handling, it means have to first fetch the semantic environment
 * of the language, and then do the request handler pointed to by that
 * semantic env.
 *For assigning, things get more complex because of competing goals..  One
 * goal is for language specific stuff to be used during assignment, so
 * assigner can make higher quality decisions..  but with multiple languages,
 * which only get mixed in the application, the assigners can't be written
 * with knowledge of each other.  So, they can only make localized decisions,
 * and so different language's assigners may interfere with each other..
 * 
 *So, have some possibilities available:
 *1) can have a fixed scheduler in the proto-runtime, that all the
 * languages give their work to..  (but then lose language-specific info, 
 * there is a standard PR format for assignment info, and the langauge 
 * attaches this to the work-unit when it gives it to PR.. also have issue
 * with HWSim, which uses a priority Q instead of FIFO, and requests can 
 * "undo" previous work put in, so request handlers need way to manipulate
 * the work-holding Q..) (this might be fudgeable with
 * HWSim, if the master did a lang-supplied callback each time it assigns a
 * unit to a slot..  then HWSim can keep exactly one unit of work in PR's
 * queue at a time..  but this is quite hack-like.. or perhaps HWSim supplies
 * a task-end handler that kicks the next unit of work from HWSim internal
 * priority queue, over to PR readyQ)
 *2) can have each language have its own semantic env, that holds its own
 * work, which is assigned by its own assigner.. then the master searches
 * through all the semantic envs to find one with work and asks it give work..
 * (this has downside of blinding assigners to each other.. but does work
 * for HWSim case)
 *3) could make PR have a different readyQ for each core, and ask the lang
 * to put work to the core it prefers.. but the work may be moved by PR if
 * needed, say if one core idles for too long. This is a hybrid approach, 
 * letting the language decide which core, but PR keeps the work and does it
 * FIFO style.. (this might als be fudgeable with HWSim, in similar fashion, 
 * but it would be complicated by having to track cores separately) 
 *
 *Choosing 2, to keep compatibility with single-lang mode..  it allows the same
 * assigner to be used for single-lang as for multi-lang..  the overhead of
 * the extra master search for work is part of the price of the flexibility,
 * but should be fairly small.. takes the first env that has work available, 
 * and whatever it returns is assigned to the slot..
 * 
 *As a hybrid, giving an option for a unified override assigner to be registered
 * and used..  This allows something like a static analysis to detect
 * which languages are grouped together, and then analyze the pattern of 
 * construct calls, and generate a custom assigner that uses info from all
 * the languages in a unified way..  Don't really expect this to happen, 
 * but making it possible.
 */
#ifdef  MODE__MULTI_LANG
void animationMaster( void *initData, SlaveVP *masterVP )
 { 
      //Used while scanning and filling animation slots
   int32           slotIdx, numSlotsFilled;
   AnimSlot       *currSlot, **animSlots;
   SlaveVP        *assignedSlaveVP;  //the slave chosen by the assigner
   
      //Local copies, for performance
   MasterEnv      *masterEnv;
   SlaveAssigner   slaveAssigner;
   RequestHandler  requestHandler;
   PRSemEnv       *semanticEnv;
   int32           thisCoresIdx;

   //#ifdef  MODE__MULTI_LANG
   SlaveVP        *slave;
   PRProcess      *process;
   PRConstrEnvHolder *constrEnvHolder;
   int32           langMagicNumber;
   //#endif
   
   //======================== Initializations ========================
   masterEnv        = (MasterEnv*)_PRTopEnv;
   
   thisCoresIdx     = masterVP->coreAnimatedBy;
   animSlots        = masterEnv->allAnimSlots[thisCoresIdx];

   requestHandler   = masterEnv->requestHandler;
   slaveAssigner    = masterEnv->slaveAssigner;
   semanticEnv      = masterEnv->semanticEnv;
   
      //initialize, for non-multi-lang, non multi-proc case
      // default handler gets put into master env by a registration call by lang
   endTaskHandler   = masterEnv->defaultTaskHandler;
   
      HOLISTIC__Insert_Master_Global_Vars;
   
   //======================== animationMaster ========================
   //Do loop gets requests handled and work assigned to slots..
   // work can either be a task or a resumed slave
   //Having two cases makes this logic complex.. can be finishing either, and 
   // then the next available work may be either.. so really have two distinct
   // loops that are inter-twined.. 
   while(1){
       
      MEAS__Capture_Pre_Master_Point

      //Scan the animation slots
   numSlotsFilled = 0;
   for( slotIdx = 0; slotIdx < NUM_ANIM_SLOTS; slotIdx++)
    {
      currSlot = animSlots[ slotIdx ];

         //Check if newly-done slave in slot, which will need request handled
      if( currSlot->workIsDone )
       { currSlot->workIsDone = FALSE;
       
               HOLISTIC__Record_AppResponder_start; //TODO: update to check which process for each slot
               MEAS__startReqHdlr;
               
         
            //process the request made by the slave (held inside slave struc)
         slave = currSlot->slaveAssignedToSlot;
         
            //check if the completed work was a task..
         if( slave->taskMetaInfo->isATask )
          {
             if( slave->reqst->type == TaskEnd ) 
              {    //do task end handler, which is registered separately
                   //note, end hdlr may use semantic data from reqst..
                //#ifdef  MODE__MULTI_LANG
                   //get end-task handler
                //taskEndHandler = lookup( slave->reqst->langMagicNumber, processEnv );
                taskEndHandler = slave->taskMetaInfo->endTaskHandler;
                //#endif
                (*taskEndHandler)( slave, semanticEnv );
                
                goto AssignWork;
              }
             else  //is a task, and just suspended
              {    //turn slot slave into free task slave & make replacement
                if( slave->typeOfVP == SlotTaskSlv ) changeSlvType();
                
                //goto normal slave request handling
                goto SlaveReqHandling; 
              }
          }
         else //is a slave that suspended
          {
          SlaveReqHandling:
            (*requestHandler)( slave, semanticEnv ); //(note: indirect Fn call more efficient when use fewer params, instead re-fetch from slave)
         
               HOLISTIC__Record_AppResponder_end;
               MEAS__endReqHdlr;
               
            goto AssignWork;
          }
       } //if has suspended slave that needs handling
      
         //if slot empty, hand to Assigner to fill with a slave
      if( currSlot->needsSlaveAssigned )
       {    //Call plugin's Assigner to give slot a new slave
               HOLISTIC__Record_Assigner_start;
               
       AssignWork:
     
         assignedSlaveVP = assignWork( semanticEnv, currSlot );
       
            //put the chosen slave into slot, and adjust flags and state
         if( assignedSlaveVP != NULL )
          { currSlot->slaveAssignedToSlot = assignedSlaveVP;
            assignedSlaveVP->animSlotAssignedTo = currSlot;
            currSlot->needsSlaveAssigned  = FALSE;
            numSlotsFilled               += 1;
          }
         else
          {
            currSlot->needsSlaveAssigned  = TRUE; //local write
          }
               HOLISTIC__Record_Assigner_end;
       }//if slot needs slave assigned
    }//for( slotIdx..

         MEAS__Capture_Post_Master_Point;
   
   masterSwitchToCoreCtlr( masterVP ); //returns when ctlr switches back to master
   flushRegisters();
   }//while(1) 
 }
#endif //MODE__MULTI_LANG


//This is the master when both multi-lang and multi-process modes are turned on
//#ifdef MODE__MULTI_LANG
//#ifdef MODE__MULTI_PROCESS
void animationMaster( void *initData, SlaveVP *masterVP )
 { 
   int32           slotIdx;
//   int32           numSlotsFilled;
   AnimSlot       *currSlot;
      //Used while scanning and filling animation slots
   AnimSlot      **animSlots;
   
      //Local copies, for performance
   MasterEnv      *masterEnv;
   int32           thisCoresIdx;
   
   //======================== Initializations ========================
   masterEnv        = (MasterEnv*)_PRTopEnv;
   
   thisCoresIdx     = masterVP->coreAnimatedBy;
   animSlots        = masterEnv->allAnimSlots[thisCoresIdx];
      
      HOLISTIC__Insert_Master_Global_Vars;
   
   //======================== animationMaster ========================
   //Do loop gets requests handled and work assigned to slots..
   // work can either be a task or a resumed slave
   //Having two cases makes this logic complex.. can be finishing either, and 
   // then the next available work may be either.. so really have two distinct
   // loops that are inter-twined.. 
   while(1)
    {  
            MEAS__Capture_Pre_Master_Point
      
      for( slotIdx = 0; slotIdx < NUM_ANIM_SLOTS; slotIdx++)
       {
         currSlot = animSlots[ slotIdx ];

         masterFunction_multiLang( currSlot );
       }
            
            MEAS__Capture_Post_Master_Point;
    
      masterSwitchToCoreCtlr( masterVP ); //returns when ctlr switches back to master
      flushRegisters();
    } 
 }
#endif  //MODE__MULTI_LANG
#endif  //MODE__MULTI_PROCESS

inline
void
masterFunction_multiLang( AnimSlot  *currSlot )
 {    //Scan the animation slots
   int32           magicNumber;
   SlaveVP        *slave;
   SlaveVP        *assignedSlaveVP;
   PRSemEnv       *semanticEnv;
   PRReqst        *req;
   RequestHandler  requestHandler;

         //Check if newly-done slave in slot, which will need request handled
      if( currSlot->workIsDone )
       { currSlot->workIsDone = FALSE;
       
               HOLISTIC__Record_AppResponder_start; //TODO: update to check which process for each slot
               MEAS__startReqHdlr;
               
         
            //process the request made by the slave (held inside slave struc)
         slave = currSlot->slaveAssignedToSlot;
         
            //check if the slave was doing a task..
         //Action depends on both on the request type, and whether it's on
         // a generic slave vs a suspended task
         if( slave->metaTask->taskType == AtomicTask ||
             slave->metaTask->taskType == SuspendedTask )
          { 
            switch( slave->request->reqType )
             { case TaskEnd: 
                { PRHandle_EndTask( slave ); //if free task slave, update count, put into recycle Q -- do handler before lang's handler

                     //do task end handler, which is registered separately
                     //note, end hdlr may use semantic data from reqst..
                     //get end-task handler

                  RequestHandler
                  taskEndHandler = slave->request->handler;
                  semanticEnv = PR_int__give_sem_env_for_slave( slave, 
                                              slave->request->langMagicNumber );
                  (*taskEndHandler)( slave, semanticEnv );

                  goto AssignWork;
                }
               case TaskCreate:
                { PRHandle_CreateTask( slave );
                  RequestHandler
                  taskCreateHandler = slave->request->handler;
                  semanticEnv = PR_int__give_sem_env_for_slave( slave, 
                                              slave->request->langMagicNumber );
                  (*taskCreateHandler)( slave, semanticEnv ); //resumes creating slave
                  goto AssignWork;
                }
               default:  
                {    //is a task, and just suspended, so tied to a free task slave
                     //First turn slot slave into free task slave & make replacement
                  if( slave->typeOfVP == SlotTaskSlv )
                     replaceWithNewSlotSlv( slave, slave->processSlaveIsIn->processEnv );

                  //goto normal slave request handling
                  goto SlaveReqHandling; 
                }
             }
          }
         else //is a slave that suspended
          {
             
          SlaveReqHandling:
               //Q: put the switch in inline call, to clean up code?
            req = slave->request;
            switch( req->reqType )
             { case SlvCreate:    PRHandle_CreateSlave( slave );    break;
               case SlvDissipate: PRHandle_Dissipate( slave ); break;
               case Service:      PR_int__handle_PRServiceReq( slave );  break; //resume into PR's own semantic env
               case Hardware: //for future expansion
               case IO:       //for future expansion
               case OSCall:   //for future expansion
               case Language: //normal sem request
                  magicNumber = slave->request->langMagicNumber;
                  semanticEnv = PR_PI__give_sem_env_for( slave, magicNumber );
                  requestHandler = semanticEnv->requestHdlr;
                  (*requestHandler)( slave, semanticEnv ); //(note: indirect Fn call more efficient when use fewer params, instead re-fetch from slave)
             }
            
               HOLISTIC__Record_AppResponder_end;
               MEAS__endReqHdlr;
               
            goto AssignWork;
          }
       } //if has suspended slave that needs handling
      
         //End up here when the slot did not have ended work in it (no req)
         //So, here, if slot empty, look for work to fill the slot
      if( currSlot->needsSlaveAssigned )
       {       HOLISTIC__Record_Assigner_start;
               
       AssignWork:
            //Scan sem environs, looking for semEnv with ready work.
            // call the Assigner for that sem Env, to get a slave for the slot
         assignedSlaveVP = assignWork( semanticEnv, currSlot );
       
            //put the chosen slave into slot, and adjust flags and state
         if( assignedSlaveVP != NULL )
          { currSlot->slaveAssignedToSlot = assignedSlaveVP;
            assignedSlaveVP->animSlotAssignedTo = currSlot;
            currSlot->needsSlaveAssigned  = FALSE;
          }
         else
          { currSlot->needsSlaveAssigned  = TRUE; //local write
          }
               HOLISTIC__Record_Assigner_end;
       }//if slot needs slave assigned
 }

//==========================================================================
/*When a task in a slot slave suspends, the slot slave has to be changed to
 * a free task slave, then the slot slave replaced.  The replacement can be
 * either a recycled free task slave that finished it's task and has been
 * idle in the recycle queue, or else create a new slave to be the slot slave.
 *The master only calls this with a slot slave that needs to be replaced.
 */
inline void
replaceWithNewSlotSlv( SlaveVP *requestingSlv, PRProcess *process )
 { SlaveVP *newSlotSlv;

      //get a new slave to be the slot slave
   newSlotSlv     = readPrivQ( process->freeTaskSlvRecycleQ );
   if( newSlotSlv == NULL )
    { newSlotSlv  = PR_int__create_slaveVP( &idle_fn, NULL, process, 0);
         //just made a new free task slave, so count it
      process->numLiveFreeTaskSlvs += 1;
    }
   
      //set slave values to make it the slot slave
   newSlotSlv->metaTask              = NULL;
   newSlotSlv->typeOfVP              = SlotTaskSlv;
   newSlotSlv->needsTaskAssigned     = TRUE;
   
      //a slot slave is pinned to a particular slot on a particular core
      //Note, this happens before the request is seen by handler, so nothing
      // has had a chance to change the coreAnimatedBy or anything else..
   newSlotSlv->animSlotAssignedTo = requestingSlv->animSlotAssignedTo;
   newSlotSlv->coreAnimatedBy     = requestingSlv->coreAnimatedBy;
    
      //put it into the slot slave matrix
   int32 slotNum = requestingSlv->animSlotAssignedTo->slotIdx;
   int32 coreNum = requestingSlv->coreAnimatedBy;
   process->slotTaskSlvs[coreNum][slotNum] = newSlotSlv;

      //Fix up requester, to be an extra slave now (but not an ended one)
      // because it's active, doesn't go into freeTaskSlvRecycleQ
   requestingSlv->typeOfVP = FreeTaskSlv;
   check_if_need_to_change_metaTask_type_or_something;
 }


/*This does:
 * 1) searches the semantic environments for one with work ready
 *    if finds one, asks its assigner to return work
 * 2) checks what kind of work: new task, resuming task, resuming slave
 *    if new task, gets the slot slave and assigns task to it and returns slave
 *    else, gets the slave attached to the metaTask and returns that.
 * 3) if no work found, then prune former task slaves waiting to be recycled.
 *    If no work and no slaves to prune, check for shutdown conditions.
 * 
 * Semantic env keeps its own work in its own structures, and has its own
 *  assigner.  It chooses 
 * However, include a switch that switches-in an override assigner, which
 *  sees all the work in all the semantic env's.  This is most likely  
 *  generated by static tools and included in the executable.  That means it
 *  has to be called via a registered pointer from here.  The idea is that
 *  the static tools know which languages are grouped together.. and the
 *  override enables them to generate a custom assigner that uses info from
 *  all the languages in a unified way..  Don't really expect this to happen,
 *  but am making it possible.
 */
inline SlaveVP *
assignWork( PRProcess *process, AnimSlot *slot )
 { SlaveVP        *returnSlv;
   //VSsSemEnv      *semEnv;
   //VSsSemData     *semData;
   int32           coreNum, slotNum;
   PRMetaTask     *newMetaTask, *assignedMetaTask;
   SlaveVP        *freeTaskSlv;

   coreNum = slot->coreSlotIsOn;
   
   if( _PRTopEnv->overrideAssigner != NULL )
    { assignedMetaTask = (*_PRTopEnv->overrideAssigner)( process, slot );
      if( assignedMetaTask != NULL )
       {
            //have work, so reset Done flag (caused by work generated on other core)
         if( process->coreIsDone[coreNum] == TRUE ) //reads are higher perf
            process->coreIsDone[coreNum] = FALSE;   //don't just write always
         
         switch( assignedMetaTask->taskType )
          { case GenericSlave:  goto AssignSlave;
            case SuspendedTask: goto AssignSlave;
            case AtomicTask:    goto AssignNewTask;
            default:      PR_int__throw_exception( "unknown task type ret by assigner" );
          }
       }
      else
         goto NoWork;
    }
   
      //If here, then no override assigner, so search semantic envs for work
   int32 envIdx, numEnvs; PRSemEnv **semEnvs, *semEnv; SlaveAssigner assigner;
   semEnvs = process->semEnvs;
   numEnvs = process->numSemEnvs;
   for( envIdx = 0; envIdx < numEnvs; envIdx++ ) //keep semEnvs in hash AND array
    { semEnv = semEnvs[envIdx];
      if( semEnv->hasWork )
       { assigner = semEnv->slaveAssigner; 
         assignedMetaTask = (*assigner)( semEnv, slot );
         
            //have work, so reset Done flag (caused by work generated on other core)
         if( process->coreIsDone[coreNum] == TRUE ) //reads are higher perf
            process->coreIsDone[coreNum] = FALSE;   //don't just write always
         
         switch( assignedMetaTask->taskType )
          { case GenericSlave:  goto AssignSlave;
            case SuspendedTask: goto AssignSlave;
            case AtomicTask:    goto AssignNewTask;
            default:      PR_int__throw_exception( "unknown task type ret by assigner" );
          }
       }
    }
   
 NoWork:
      //No work, if reach here..
    { goto ReturnTheSlv;
    }
 
 AssignSlave:     //Have a metaTask attached to a slave, so get the slave out
    {    //get slave pointed to by meta task.
      returnSlv = assignedMetaTask->slaveAssignedTo;

      returnSlv->coreAnimatedBy   = coreNum;
    
      goto ReturnTheSlv;
    }
 
 AssignNewTask:
    { 
         //get the slot slave to assign the task to..
      coreNum = slot->coreSlotIsOn;
      slotNum = slot->slotIdx;
      returnSlv = process->slotTaskSlvs[coreNum][slotNum];

         //point slave to task's function, and mark slave as having task
      PR_int__reset_slaveVP_to_TopLvlFn( returnSlv, 
                       assignedMetaTask->topLevelFn, assignedMetaTask->initData );
      returnSlv->metaTask          = assignedMetaTask;
      assignedMetaTask->slaveAssignedTo = returnSlv;
      returnSlv->needsTaskAssigned = FALSE;  //slot slave is a "Task" slave type
      
         //have work, so reset Done flag, if was set
      if( process->coreIsDone[coreNum] == TRUE ) //reads are higher perf
         process->coreIsDone[coreNum] = FALSE;   //don't just write always
      
      goto ReturnTheSlv;
    }
 

 ReturnTheSlv:  //All paths goto here.. to provide single point for holistic..

   #ifdef HOLISTIC__TURN_ON_OBSERVE_UCC
   if( returnSlv == NULL )
    { returnSlv = process->idleSlv[coreNum][slotNum]; 
    
         //things that would normally happen in resume(), but idle VPs
         // never go there
      returnSlv->numTimesAssignedToASlot++; //gives each idle unit a unique ID
      Unit newU;
      newU.vp = returnSlv->slaveID;
      newU.task = returnSlv->numTimesAssignedToASlot;
      addToListOfArrays(Unit,newU,process->unitList);

      if (returnSlv->numTimesAssignedToASlot > 1) //make a dependency from prev idle unit
       { Dependency newD;             // to this one
         newD.from_vp = returnSlv->slaveID;
         newD.from_task = returnSlv->numTimesAssignedToASlot - 1;
         newD.to_vp = returnSlv->slaveID;
         newD.to_task = returnSlv->numTimesAssignedToASlot;
         addToListOfArrays(Dependency, newD ,process->ctlDependenciesList);  
       }
    }
   else //have a slave will be assigned to the slot
    { //assignSlv->numTimesAssigned++;
         //get previous occupant of the slot
      Unit prev_in_slot = 
         process->last_in_slot[coreNum * NUM_ANIM_SLOTS + slotNum];
      if(prev_in_slot.vp != 0) //if not first slave in slot, make dependency
       { Dependency newD;      // is a hardware dependency
         newD.from_vp = prev_in_slot.vp;
         newD.from_task = prev_in_slot.task;
         newD.to_vp = returnSlv->slaveID;
         newD.to_task = returnSlv->numTimesAssignedToASlot;
         addToListOfArrays(Dependency,newD,process->hwArcs);   
       }
      prev_in_slot.vp = returnSlv->slaveID; //make new slave the new previous
      prev_in_slot.task = returnSlv->numTimesAssignedToASlot;
      process->last_in_slot[coreNum * NUM_ANIM_SLOTS + slotNum] =
         prev_in_slot;        
    }
   #endif

   return( returnSlv );
 }


/*In creator, only PR related things happen, and things in the langlet whose
 * creator construct was used.
 *Other langlet still gets a chance to create semData -- but by registering a
 * "createSemData" handler in the semEnv.  When a construct  of the langlet
 * calls "PR__give_sem_data()", if there is no semData for that langlet,
 * the PR will call the creator in the langlet's semEnv, place whatever it
 * makes as the semData in that slave for that langlet, and return that semData
 *
 *So, as far as counting things, a langlet is only allowed to count creation
 * of slaves it creates itself..  may have to change this later.. add a way for
 * langlet to register a trigger Fn called each time a slave gets created.. 
 * need more experience with what langlets will do at create time..  think Cilk
 * has interesting create behavior..  not sure how that will differ in light
 * of true tasks and langlet approach.  Look at it after all done and start
 * modifying the langs to be langlets..
 * 
 *PR itself needs to create the slave, then update numLiveSlaves in process,
 * copy processID from requestor to newly created
 */
PRHandle_CreateSlave( PRReqst *req, SlaveVP *requestingSlv )
 { SlaveVP *newSlv;
   PRMetaTask metaTask;
   PRProcess *process;
 
   process = requestingSlv->processSlaveIsIn;
   newSlv = PR_int__create_slaveVP();
   newSlv->typeOfVP = GenericSlv;
   newSlv->processSlaveIsIn = process;
   process->numLiveGenericSlvs += 1;
   metaTask = PR_int__create_slave_meta_task();
   metaTask->taskID = req->ID;
   metaTask->taskType = GenericSlave;
   
   (*req->handler)(newSlv);
 }

/*The dissipate handler has to update the number of slaves of the type, within
 * the process, and call the langlet handler linked into the request,
 * and after that returns, then call the PR function that frees the slave state
 * (or recycles the slave).
 * 
 *The PR function that frees the slave state has to also free all of the
 * semData in the slave..  or else reset all of the semDatas.. by, say, marking
 * them, then in PR__give_semData( magicNum ) call the langlet registered
 * "resetSemData" Fn.
 */
PRHandle_Dissipate( SlaveVP *slave )
 { PRProcess *process;
   void      *semEnv;
   
   process = slave->processSlaveIsIn;
   
      //do the language's dissipate handler
   semEnv = PR_int__give_sem_env_for( slave, slave->request->langMagicNumber );
   (*slave->request->handler)( slave, semEnv );
   
   process->numLiveGenericSlvs -= 1;
   PR_int__dissipate_slaveVP_multilang( slave ); //recycles and resets semDatas
   
      //check End Of Process Condition
   if( process->numLiveTasks == 0 &&
       process->numLiveGenericSlvs == 0 )
      signalEndOfProcess;
 }

/*Create task is a special form, that has PR behavior in addition to plugin
 * behavior.  Master calls this first, and this in turn calls the plugin's
 * create task handler.
 */
inline void
PRHandle_CreateTask( TopLevelFn topLevelFn, void *initData, PRReqst *req, 
                                                        SlaveVP *requestingSlv )
 { PRMetaTask    *metaTask;
   PRProcess     *process;
   void          *semEnv, _langMetaTask;
   PRLangMetaTask *langMetaTask;
                    
   process = requestingSlv->processSlaveIsIn;

   metaTask         = PR_int__create_meta_task( req );
   metaTask->taskID = req->ID; //may be NULL
   metaTask->topLevelFn = topLevelFn;
   metaTask->initData   = initData;
           
   process->numLiveTasks += 1;
      
      //plugin tracks tasks ready, and has its own assigner, so task doesn't
      // come back from lang's handler -- it's consumed and stays in semEnv.
      //But handler gives back the language-specific meta-task it creates, and
      // then hook that into the PR meta-task
      //(Could also do PRMetaTask as a prolog -- make a Fn that takes the size
      // of the lang's metaTask, and alloc's that plus the prolog and returns
      // ptr to position just above the prolog)
   semEnv = PR_int__give_semEnv_of_req( req, requestingSlv ); //magic num in req
   _langMetaTask = (*requestingSlv->request->handler)(req, semEnv);
   langMetaTask  = (PRLangMetaTask *)_langMetaTask;
   metaTask->langMetaTask      = langMetaTask;
   langMetaTask->protoMetaTask = metaTask;
   
   return;
 }

/*When a task ends, are two scenarios: 1) task ran to completion, or 2) task
 * suspended at some point in its code.
 *For 1, just decr count of live tasks (and check for end condition) -- the
 * master loop will decide what goes into the slot freed up by this task end,
 * so, here, don't worry about assigning a new task to the slot slave.
 *For 2, the task's slot slave has been converted to a free task slave, which
 * now has nothing more to do, so send it to the recycle Q (which includes
 * freeing all the semData and meta task structs alloc'd for it).  Then
 * decrement the live task count and check end condition.
 * 
 *PR has to update count of live tasks, and check end of process condition.
 * There are constructs that wait for a process to end, so when end detected,
 * have to resume what's waiting..
 *Thing is, the wait is used in "main", so it's an OS thread.  That means
 * PR internals have to do OS thread signaling.  Want to do that in the
 * core controller, which has the original stack of an OS thread.
 * 
 *So here, when detect process end, signal to the core controller, which will
 * then do the condition variable notify to the OS thread that's waiting. 
 */
inline void
PRHandle_EndTask( SlaveVP *requestingSlv )
 { void *semEnv;
   PRReqst *req;  
   PRMetaTask *metaTask;
   PRProcess  *process;
 
   req = requestingSlv->request;
   semEnv = PR_int__give_semEnv_of_req( req, requestingSlv ); //magic num in req
   metaTask = req->metaTask;
      //Want to keep PRMetaTask hidden from plugin, so extract semReq..
   (*req->handler)( metaTask, req->semReq, semEnv );
   
   recycleFreeTaskSlave( requestingSlv );
   
   process->numLiveTasks -= 1;
  
      //check End Of Process Condition
   if( process->numLiveTasks == 0 &&
       process->numLiveGenericSlvs == 0 )
      signalEndOfProcessToCoreCtlr;
 }