/*
 * Copyright 2010  OpenSourceResearchInstitute
 * 
 * Licensed under BSD
 */


#include <stdio.h>
#include <stddef.h>

#include "PR.h"
#include "VSs_impl/VSs.h"

/*
void        PRHandle_CreateTask_SL(SlaveVP *slave);

void        PRHandle_CreateSlave_SL(SlaveVP *slave);
void        PRHandle_Dissipate_SL(SlaveVP *slave);
void        PR_int__handle_PRServiceReq_SL(SlaveVP *slave);
*/
inline void PRHandle_CreateTask( PRReqst *req, SlaveVP *slave );
inline void PRHandle_EndTask(    PRReqst *req, SlaveVP *slave );
inline void PRHandle_CreateSlave(PRReqst *req, SlaveVP *slave );
void        PRHandle_Dissipate(  PRReqst *req, SlaveVP *slave );


//inline void  masterFunction_SingleLang( PRLangEnv *protoLangEnv, AnimSlot *slot );
inline void masterFunction_MultiLang( AnimSlot  *slot );
inline PRProcess * pickAProcess( AnimSlot *slot );
inline SlaveVP * assignWork( PRProcess *process, AnimSlot *slot );

/*The animationMaster embodies most of the animator of the language.  The
 * animator is what emodies the behavior of language constructs. 
 * As such, it is the animationMaster, in combination with the plugin
 * functions, that make the language constructs do their behavior.   
 * 
 *Within the code, this is the top-level-function of the masterVPs, and
 * runs when the coreController has no more slave VPs.  It's job is to
 * refill the animation slots with slaves that have work.
 *
 *There are multiple versions of the master, each tuned to a specific 
 * combination of modes.  This keeps the master simple, with reduced overhead,
 * when the application is not using the extra complexity.
 * 
 *As of Sept 2012, the versions available will be:
 * 1) Single langauge, which only exposes slaves (such as SSR or Vthread)
 * 2) Single language, which only exposes tasks  (such as pure dataflow)
 * 3) Single language, which exposes both (like Cilk, StarSs, and OpenMP)
 * 4) Multi-language, which always assumes both tasks and slaves
 * 5) Multi-language and multi-process, which also assumes both tasks and slaves
 *
 * 
 *
 */

//This version of the master selects one of three loops, depending upon
// whether stand-alone single language (just slaves), or standalone with
// tasks, or multi-lang (implies multi-process)
void animationMaster( void *_environment, SlaveVP *masterVP )
 { 
   TopEnv         *masterEnv = (TopEnv *)_environment;
   int32           slotIdx;
   AnimSlot       *currSlot;
      //Used while scanning and filling animation slots
   AnimSlot      **animSlots;
   
      //Local copies, for performance
   int32           thisCoresIdx;
   
   //======================== Initializations ========================
   thisCoresIdx     = masterVP->coreAnimatedBy;
   animSlots        = masterEnv->allAnimSlots[thisCoresIdx];
      
      HOLISTIC__Insert_Master_Global_Vars;
   
   //======================== animationMaster ========================
   //Have three different modes, and the master behavior is different for
   // each, so jump to the loop that corresponds to the mode.
   //
   switch(masterEnv->mode)
    {
/*
    { case SingleLang: 
         while(1)
          {       MEAS__Capture_Pre_Master_Point
            for( slotIdx = 0; slotIdx < NUM_ANIM_SLOTS; slotIdx++)
             {
               currSlot = animSlots[ slotIdx ];

               masterFunction_StandaloneSlavesOnly( masterEnv, currSlot );
             }
                  MEAS__Capture_Post_Master_Point;
            masterSwitchToCoreCtlr( masterVP ); //returns when ctlr switches back to master
            flushRegisters();
          } 

      case SingleLang:     
       { PRLangEnv  *protoLangEnv =  _PRTopEnv->protoLangEnv;
         while(1)
          {       MEAS__Capture_Pre_Master_Point
            for( slotIdx = 0; slotIdx < NUM_ANIM_SLOTS; slotIdx++)
             {
               currSlot = animSlots[ slotIdx ];

               masterFunction_SingleLang( protoLangEnv, currSlot );
             }
                  MEAS__Capture_Post_Master_Point;
            masterSwitchToCoreCtlr( masterVP ); //returns when ctlr switches back to master
            flushRegisters();
          }
       }
 */
      case MultiLang:
       { while(1)
          {       MEAS__Capture_Pre_Master_Point
            for( slotIdx = 0; slotIdx < NUM_ANIM_SLOTS; slotIdx++)
             {
               currSlot = animSlots[ slotIdx ];

               masterFunction_MultiLang( currSlot );
             }
                  MEAS__Capture_Post_Master_Point;
            masterSwitchToCoreCtlr( masterVP ); //returns when ctlr switches back to master
            flushRegisters();
          }
       }
    }
 }


//=====================  The versions of the Animation Master  =================
//
//==============================================================================

/* 1) This version is for a single language, that has only slaves, no tasks,
 *    such as Vthread or SSR.
 *This version is for when an application has only a single language, and
 * that language exposes slaves explicitly (as opposed to a task based 
 * language like pure dataflow).
 * 
 *
 *It scans the animation slots for just-completed slaves.
 * Each completed slave has a request in it.  So, the master hands each to
 * the plugin's request handler (there is only one plugin, because only one
 * lang).
 *Each request represents a language construct that has been encountered
 * by the application code in the slave. Passing the request to the
 * request handler is how that language construct's behavior gets invoked.
 * The request handler then performs the actions of the construct's
 * behavior. So, the request handler encodes the behavior of the 
 * language's parallelism constructs, and performs that when the master
 * hands it a slave containing a request to perform that construct.
 * 
 *On a shared-memory machine, the behavior of parallelism constructs
 * equals control, over order of execution of code.  Hence, the behavior
 * of the language constructs performed by the request handler is to 
 * choose the order that slaves get animated, and thereby control the
 * order that application code in the slaves executes.
 * 
 *To control order of animation of slaves, the request handler has a
 * language environment that holds data structures used to hold slaves
 * and choose when they're ready to be animated.
 *
 *Once a slave is marked as ready to be animated by the request handler,
 * it is the second plugin function, the Assigner, which chooses the core
 * the slave gets assigned to for animation.  Hence, the Assigner doesn't
 * perform any of the semantic behavior of language constructs, rather
 * it gives the language a chance to improve performance. The performance
 * of application code is strongly related to communication between
 * cores. On shared-memory machines, communication is caused during
 * execution of code, by memory accesses, and how much depends on contents
 * of caches connected to the core executing the code.  So, the placement
 * of slaves determines the communication caused during execution of the
 * slave's code.
 *The point of the Assigner, then, is to use application information during
 * execution of the program, to make choices about slave placement onto
 * cores, with the aim to put slaves close to caches containing the data
 * used by the slave's code.
 * 
 *==========================================================================
 *In summary, the animationMaster scans the slots, finds slaves
 * just-finished, which hold requests, pass those to the request handler,
 * along with the language environment, and the request handler then manages
 * the structures in the language env, which controls the order of
 * animation of slaves, and so embodies the behavior of the language
 * constructs.
 *The animationMaster then rescans the slots, offering each empty one to
 * the Assigner, along with the language environment.  The Assigner chooses
 * among the ready slaves in the language env, finding the one best suited
 * to be animated by that slot's associated core.
 * 
 *==========================================================================
 *Implementation Details:
 * 
 *There is a separate masterVP for each core, but a single language
 * environment shared by all cores.  Each core also has its own scheduling
 * slots, which are used to communicate slaves between animationMaster and
 * coreController.  There is only one global variable, _PRTopEnv, which
 * holds the language env and other things shared by the different
 * masterVPs.  The request handler and Assigner are registered with
 * the animationMaster by the language's init function, and a pointer to
 * each is in the _PRTopEnv. (There are also some pthread related global
 * vars, but they're only used during init of PR).
 *PR gains control over the cores by essentially "turning off" the OS's
 * scheduler, using pthread pin-to-core commands.
 *
 *The masterVPs are created during init, with this animationMaster as their
 * top level function.  The masterVPs use the same SlaveVP data structure,
 * even though they're not slave VPs.
 *A "seed slave" is also created during init -- this is equivalent to the
 * "main" function in C, and acts as the entry-point to the PR-language-
 * based application.
 *The masterVPs share a single system-wide master-lock, so only one
 * masterVP may be animated at a time.
 *The core controllers access _PRTopEnv to get the masterVP, and when
 * they start, the slots are all empty, so they run their associated core's
 * masterVP.  The first of those to get the master lock sees the seed slave
 * in the shared language environment, so when it runs the Assigner, that
 * returns the seed slave, which the animationMaster puts into a scheduling
 * slot then switches to the core controller.  That then switches the core
 * over to the seed slave, which then proceeds to execute language
 * constructs to create more slaves, and so on.  Each of those constructs
 * causes the seed slave to suspend, switching over to the core controller,
 * which eventually switches to the masterVP, which executes the 
 * request handler, which uses PR primitives to carry out the creation of
 * new slave VPs, which are marked as ready for the Assigner, and so on..
 * 
 *On animation slots, and system behavior:
 * A request may linger in an animation slot for a long time while
 * the slaves in the other slots are animated.  This only becomes a problem
 * when such a request is a choke-point in the constraints, and is needed
 * to free work for *other* cores.  To reduce this occurrence, the number
 * of animation slots should be kept low.  In balance, having multiple
 * animation slots amortizes the overhead of switching to the masterVP and
 * executing the animationMaster code, which drives for more than one. In
 * practice, the best balance should be discovered by profiling.
 */
/*
void masterFunction_StandaloneSlavesOnly( AnimSlot  *slot )
 { 
   SlaveVP        *slave;
   PRReqst        *req;
   PRLangEnv      *langEnv = _PRTopEnv->langEnv;
    
   
   //======================== animationMaster ========================
      
      //Check if newly-done slave in slot, which will need request handled
   if( slot->workIsDone )
    { slot->workIsDone = FALSE;
      slot->needsWorkAssigned = TRUE;


            HOLISTIC__Record_AppResponder_start;
            MEAS__startReqHdlr;
         //process the request made by the slave (held inside slave struc)
      slave = slot->slaveAssignedToSlot;
      req = slave->request;

      //Handle task create and end first -- they're special cases..
      switch( req->reqType )
       { case SlvCreate:    PRHandle_CreateSlave( slave );           break;
         case SlvDissipate: PRHandle_Dissipate( slave );             break;
         case Service:      PR_int__handle_PRServiceReq( slave );    break; //resume into PR's own language env
         case Hardware: //for future expansion
         case IO:       //for future expansion
         case OSCall:   //for future expansion
            PR_int__throw_exception("Not implemented");             break;
         case Language: //normal lang request
          { 
            (*langEnv->requestHdlr)( req->langReq, slave, langEnv );
          }
       }
            HOLISTIC__Record_AppResponder_end;
            MEAS__endReqHdlr;
    }
      //If slot empty, hand to Assigner to fill with a slave
   if( slot->needsWorkAssigned )
    {    //Call plugin's Assigner to give slot a new slave
            HOLISTIC__Record_Assigner_start;

      if( langEnv->hasWork )
       {  (*langEnv->slaveAssigner)( langEnv, slot ); //calls PR fn that inserts work into slot
         goto ReturnAfterAssigningWork; //quit for-loop, cause found work
       }
      else
         goto NoWork;
    }
   
 NoWork:
      //No work, if reach here..
    { 
   #ifdef HOLISTIC__TURN_ON_OBSERVE_UCC
      coreNum = slot->coreSlotIsOn;
      returnSlv = process->idleSlv[coreNum][slotNum]; 
    
         //things that would normally happen in resume(), but idle VPs
         // never go there
      returnSlv->numTimesAssignedToASlot++; //gives each idle unit a unique ID
      Unit newU;
      newU.vp = returnSlv->slaveNum;
      newU.task = returnSlv->numTimesAssignedToASlot;
      addToListOfArrays(Unit,newU,process->unitList);

      if (returnSlv->numTimesAssignedToASlot > 1) //make a dependency from prev idle unit
       { Dependency newD;             // to this one
         newD.from_vp = returnSlv->slaveNum;
         newD.from_task = returnSlv->numTimesAssignedToASlot - 1;
         newD.to_vp = returnSlv->slaveNum;
         newD.to_task = returnSlv->numTimesAssignedToASlot;
         addToListOfArrays(Dependency, newD ,process->ctlDependenciesList);  
       }
   #endif
            HOLISTIC__Record_Assigner_end;
      return;
    }
 
 ReturnAfterAssigningWork:  //All paths goto here.. to provide single point for holistic..
    {
            HOLISTIC__Record_Assigner_end;
      return;
    }
 }
*/


/*This is the master when just multi-lang, but not multi-process mode is on.
 * This version has to handle both tasks and slaves, and do extra work of 
 * looking up the language env and handlers to use, for each completed bit of 
 * work.
 *It also has to search through the language envs to find one with work,
 * then ask that env's assigner to return a unit of that work.
 * 
 *The language is written to startup in the same way as if it were the only
 * language in the app, and it operates in the same way,
 * the only difference between single language and multi-lang is here, in the
 * master.
 *This invisibility to mode is why the language has to use registration calls
 * for everything during startup -- those calls do different things depending
 * on whether it's single-language or multi-language mode.
 * 
 *In this version of the master, work can either be a task or a resumed slave
 *Having two cases makes this logic complex.. can be finishing either, and
 * then the next available work may be either.. so really have two distinct 
 * loops that are inter-twined.. 
 * 
 *Some special cases:
 * A task-end is a special case for a few reasons (below).
 * A task-end can't block a slave (can't cause it to "logically suspend")
 * A task available for work can only be assigned to a special slave, which 
 *   has been set aside for doing tasks, one such task-slave is always 
 *   assigned to each slot. So, when a task ends, a new task is assigned to
 *   that slot's task-slave right away.  
 * But if no tasks are available, then have to switch over to looking at
 *   slaves to find one ready to resume, to find work for the slot.
 * If a task just suspends, not ends, then its task-slave is no longer 
 *   available to take new tasks, so a new task-slave has to be assigned to
 *   that slot.  Then the slave of the suspended task is turned into a free
 *   task-slave and request handling is done on it as if it were a slave 
 *   that suspended.
 * After request handling, do the same sequence of looking for a task to be
 *   work, and if none, look for a slave ready to resume, as work for the slot.
 * If a slave suspends, handle its request, then look for work.. first for a
 *   task to assign, and if none, slaves ready to resume.
 * Another special case is when task-end is done on a free task-slave.. in
 *   that case, the slave has no more work and no way to get more.. so place
 *   it into a recycle queue.
 * If no work is found of either type, then do a special thing to prune down
 *   the extra slaves in the recycle queue, just so don't get too many..
 * 
 *The multi-lang thing complicates matters..  
 *
 *For request handling, it means have to first fetch the language environment
 * of the language, and then do the request handler pointed to by that
 * language env.
 *For assigning, things get more complex because of competing goals..  One
 * goal is for language specific stuff to be used during assignment, so
 * assigner can make higher quality decisions..  but with multiple languages,
 * which only get mixed in the application, the assigners can't be written
 * with knowledge of each other.  So, they can only make localized decisions,
 * and so different language's assigners may interfere with each other..
 * 
 *So, have some possibilities available:
 *1) can have a fixed scheduler in the proto-runtime, that all the
 * languages give their work to..  (but then lose language-specific info, 
 * there is a standard PR format for assignment info, and the langauge 
 * attaches this to the work-unit when it gives it to PR.. also have issue
 * with HWSim, which uses a priority Q instead of FIFO, and requests can 
 * "undo" previous work put in, so request handlers need way to manipulate
 * the work-holding Q..) (this might be fudgeable with
 * HWSim, if the master did a lang-supplied callback each time it assigns a
 * unit to a slot..  then HWSim can keep exactly one unit of work in PR's
 * queue at a time..  but this is quite hack-like.. or perhaps HWSim supplies
 * a task-end handler that kicks the next unit of work from HWSim internal
 * priority queue, over to PR readyQ)
 *2) can have each language have its own language env, that holds its own
 * work, which is assigned by its own assigner.. then the master searches
 * through all the language envs to find one with work and asks it give work..
 * (this has downside of blinding assigners to each other.. but does work
 * for HWSim case)
 *3) could make PR have a different readyQ for each core, and ask the lang
 * to put work to the core it prefers.. but the work may be moved by PR if
 * needed, say if one core idles for too long. This is a hybrid approach, 
 * letting the language decide which core, but PR keeps the work and does it
 * FIFO style.. (this might als be fudgeable with HWSim, in similar fashion, 
 * but it would be complicated by having to track cores separately) 
 *
 *Choosing 2, to keep compatibility with single-lang mode..  it allows the same
 * assigner to be used for single-lang as for multi-lang..  the overhead of
 * the extra master search for work is part of the price of the flexibility,
 * but should be fairly small.. takes the first env that has work available, 
 * and whatever it returns is assigned to the slot..
 * 
 *As a hybrid, giving an option for a unified override assigner to be registered
 * and used..  This allows something like a static analysis to detect
 * which languages are grouped together, and then analyze the pattern of 
 * construct calls, and generate a custom assigner that uses info from all
 * the languages in a unified way..  Don't really expect this to happen, 
 * but making it possible.
 */
/*
inline
void 
masterFunction_SingleLang( PRLangEnv *protoLangEnv, AnimSlot *slot )
 {    //Scan the animation slots
   SlaveVP        *slave;
   PRReqst        *req;

      //Check if newly-done slave in slot, which will need request handled
   if( slot->workIsDone )
    { slot->workIsDone = FALSE;
      slot->needsWorkAssigned = TRUE;

            HOLISTIC__Record_AppResponder_start; //TODO: update to check which process for each slot
            MEAS__startReqHdlr;


         //process the request made by the slave (held inside slave struc)
      slave = slot->slaveAssignedToSlot;
      req = slave->request;

         //If the requesting slave is a slot slave, and request is not
         // task-end, then turn it into a free task slave. 
      if( slave->typeOfVP == SlotTaskSlv && req->reqType != TaskEnd )
         PR_int__replace_with_new_slot_slv( slave );

      //Handle task create and end first -- they're special cases..
      switch( req->reqType )
       { case TaskEnd: 
          { //do PR handler, which calls lang's hdlr and does recycle of
            // free task slave if needed -- PR handler checks for free task Slv
            PRHandle_EndTask_SL( slave );                            break;
          }
         case TaskCreate:
          { //Do PR's create-task handler, which calls the lang's hdlr
            // PR handler checks for free task Slv
            PRHandle_CreateTask_SL( slave );                         break;
          }
         case SlvCreate:    PRHandle_CreateSlave_SL( slave );        break;
         case SlvDissipate: PRHandle_Dissipate_SL( slave );          break;
         case Service:      PR_int__handle_PRServiceReq_SL( slave ); break; //resume into PR's own language env
         case Hardware: //for future expansion
         case IO:       //for future expansion
         case OSCall:   //for future expansion
            PR_int__throw_exception("Not implemented", slave, NULL); break;
         case Language: //normal lang request
          { 
            (*protoLangEnv->requestHdlr)( req->langReq, slave, (void*)PR_int__give_lang_env(protoLangEnv ));
          }
       }
              
            MEAS__endReqHdlr;          
            HOLISTIC__Record_AppResponder_end;
    } //if have request to be handled

      //If slot empty, hand to Assigner to fill with a slave
   if( slot->needsWorkAssigned )
    {    //Call plugin's Assigner to give slot a new slave
            HOLISTIC__Record_Assigner_start;

      if( protoLangEnv->hasWork )
       {  (*protoLangEnv->slaveAssigner)( protoLangEnv, slot ); //calls PR fn that inserts work into slot
         goto ReturnAfterAssigningWork; //quit for-loop, cause found work
       }
      else
         goto NoWork;
    }
   
 NoWork:
      //No work, if reach here..
    { 
   #ifdef HOLISTIC__TURN_ON_OBSERVE_UCC
      coreNum = slot->coreSlotIsOn;
      returnSlv = process->idleSlv[coreNum][slotNum]; 
    
         //things that would normally happen in resume(), but idle VPs
         // never go there
      returnSlv->numTimesAssignedToASlot++; //gives each idle unit a unique ID
      Unit newU;
      newU.vp = returnSlv->slaveNum;
      newU.task = returnSlv->numTimesAssignedToASlot;
      addToListOfArrays(Unit,newU,process->unitList);

      if (returnSlv->numTimesAssignedToASlot > 1) //make a dependency from prev idle unit
       { Dependency newD;             // to this one
         newD.from_vp = returnSlv->slaveNum;
         newD.from_task = returnSlv->numTimesAssignedToASlot - 1;
         newD.to_vp = returnSlv->slaveNum;
         newD.to_task = returnSlv->numTimesAssignedToASlot;
         addToListOfArrays(Dependency, newD ,process->ctlDependenciesList);  
       }
   #endif
            HOLISTIC__Record_Assigner_end;
      return;
    }
 
 ReturnAfterAssigningWork:  //All paths goto here.. to provide single point for holistic..
    {
            HOLISTIC__Record_Assigner_end;
      return;
    }
 }
*/

inline
void
masterFunction_MultiLang( AnimSlot  *slot )
 {    //Scan the animation slots
   int32           magicNumber;
   SlaveVP        *slave;
   PRLangEnv      *langEnv;
   PRReqst        *req;
   RequestHandler  requestHandler;
   PRProcess      *process;

      //Check if newly-done slave in slot, which will need request handled
   if( slot->workIsDone )
    { slot->workIsDone = FALSE;
      slot->needsWorkAssigned = TRUE;

            HOLISTIC__Record_AppResponder_start; //TODO: update to check which process for each slot
            MEAS__startReqHdlr;


         //process the request made by the slave (held inside slave struc)
      slave = slot->slaveAssignedToSlot;
      req = slave->request;

         //If the requesting slave is a slot slave, and request is not
         // task-end, then turn it into a free task slave. 
      if( slave->typeOfVP == SlotTaskSlv && req->reqType != TaskEnd )
         PR_int__replace_with_new_slot_slv( slave );

      //Handle task create and end first -- they're special cases..
      switch( req->reqType )
       { case TaskEnd: 
          { //do PR handler, which calls lang's hdlr and does recycle of
            // free task slave if needed -- PR handler checks for free task Slv
            PRHandle_EndTask( req, slave );                          break;
          }
         case TaskCreate:
          { //Do PR's create-task handler, which calls the lang's hdlr
            // PR handler checks for free task Slv
            PRHandle_CreateTask( req, slave );                       break;
          }
         case SlvCreate:    PRHandle_CreateSlave( req, slave );      break;
         case SlvDissipate: PRHandle_Dissipate( req, slave );        break;
         case Service:      PR_int__handle_PRServiceReq( slave );    break; //resume into PR's own language env
         case Hardware: //for future expansion
         case IO:       //for future expansion
         case OSCall:   //for future expansion
            PR_int__throw_exception("Not implemented", slave, NULL); break;
         case Language: //normal lang request
          { magicNumber = req->langMagicNumber;
            langEnv = PR_PI__give_lang_env_for( slave, magicNumber );
            (*req->handler)( req->langReq, slave, langEnv );
          }
       }

           MEAS__endReqHdlr;          
           HOLISTIC__Record_AppResponder_end;
    } //if have request to be handled

   if( slot->needsWorkAssigned )
    {
            HOLISTIC__Record_Assigner_start;

         //Pick a process to get this slot
      process = pickAProcess( slot );

         //Scan lang environs, looking for langEnv with ready work.
         // call the Assigner for that lang Env, to get a slave for the slot
      assignWork( process, slot );

            HOLISTIC__Record_Assigner_end;
    }//if slot needs slave assigned
 }

/*When several processes exist, use some pattern for picking one to give
 * the animation slot to.
 *First, it has to be a process that has work available.
 *For now, just do a round-robin
 */
inline
PRProcess *
pickAProcess( AnimSlot *slot )
 { int32 idx;
   PRProcess *process;
 
   for( idx = _PRTopEnv->currProcessIdx; idx < _PRTopEnv->numProcesses; idx++)
    {
      process = _PRTopEnv->processes[ idx ];
      if( process->numEnvsWithWork != 0 )
       { _PRTopEnv->currProcessIdx = idx;
         return process;
       }
    }
   for( idx = 0; idx < _PRTopEnv->currProcessIdx; idx++)
    {
      process = _PRTopEnv->processes[ idx ];
      if( process->numEnvsWithWork != 0 )
       { _PRTopEnv->currProcessIdx = idx;
         return process;
       }
    }
      //none found
   return NULL;
 }

/*This does:
 * 1) searches the language environments for one with work ready
 *    if finds one, asks its assigner to return work
 * 2) checks what kind of work: new task, resuming task, resuming slave
 *    if new task, gets the slot slave and assigns task to it and returns slave
 *    else, gets the slave attached to the metaTask and returns that.
 * 3) if no work found, then prune former task slaves waiting to be recycled.
 *    If no work and no slaves to prune, check for shutdown conditions.
 * 
 * language env keeps its own work in its own structures, and has its own
 *  assigner.  It chooses 
 * However, include a switch that switches-in an override assigner, which
 *  sees all the work in all the language env's.  This is most likely  
 *  generated by static tools and included in the executable.  That means it
 *  has to be called via a registered pointer from here.  The idea is that
 *  the static tools know which languages are grouped together.. and the
 *  override enables them to generate a custom assigner that uses info from
 *  all the languages in a unified way..  Don't really expect this to happen,
 *  but am making it possible.
 */
inline 
SlaveVP *
assignWork( PRProcess *process, AnimSlot *slot )
 { SlaveVP        *returnSlv;
   int32           coreNum, slotNum;
   PRMetaTask     *assignedMetaTask;

   coreNum = slot->coreSlotIsOn;
   
   if( process->overrideAssigner != NULL )
    { if( process->numEnvsWithWork != 0 )
       {  (*process->overrideAssigner)( process, slot ); //calls PR fn that inserts work into slot
         goto ReturnAfterAssigningWork; //quit for-loop, cause found work
       }
      else
         goto NoWork;
    }
   
      //If here, then no override assigner, so search language envs for work
   int32 envIdx, numEnvs; PRLangEnv **langEnvsList, *langEnv;
   langEnvsList = process->langEnvsList;
   numEnvs = process->numLangEnvs;
   for( envIdx = 0; envIdx < numEnvs; envIdx++ ) //keep langEnvs in hash & array
    { langEnv = langEnvsList[envIdx];
      if( langEnv->hasWork )
       { (*langEnv->slaveAssigner)( langEnv, slot ); //assigner calls PR to put slave/task into slot
         goto ReturnAfterAssigningWork; //quit for-loop, cause found work
         //NOTE: bad search alg -- should start where left off, then wrap around
       }
    }
   //If reach here, then have searched all langEnv's & none have work..
   
 NoWork:     //No work, if end up here..
    { 
   #ifdef HOLISTIC__TURN_ON_OBSERVE_UCC
      returnSlv = process->idleSlv[coreNum][slotNum]; 
    
         //things that would normally happen in resume(), but idle VPs
         // never go there
      returnSlv->numTimesAssignedToASlot++; //gives each idle unit a unique ID
      Unit newU;
      newU.vp = returnSlv->slaveNum;
      newU.task = returnSlv->numTimesAssignedToASlot;
      addToListOfArrays(Unit,newU,process->unitList);

      if (returnSlv->numTimesAssignedToASlot > 1) //make a dependency from prev idle unit
       { Dependency newD;             // to this one
         newD.from_vp = returnSlv->slaveNum;
         newD.from_task = returnSlv->numTimesAssignedToASlot - 1;
         newD.to_vp = returnSlv->slaveNum;
         newD.to_task = returnSlv->numTimesAssignedToASlot;
         addToListOfArrays(Dependency, newD ,process->ctlDependenciesList);  
       }
   #endif
            HOLISTIC__Record_Assigner_end;
      return;
    }
 
 ReturnAfterAssigningWork:  //All paths goto here.. to provide single point for holistic..
    {
            HOLISTIC__Record_Assigner_end;
      return;
    }
 }


/*This is first thing called when creating a slave..  it hands off to the 
 * langlet's creator, then adds updates of its own..
 * 
 *There's a question of things like lang data, meta tasks, and such..
 *In creator, only PR related things happen, and things for the langlet whose
 * creator construct was used.
 * 
 *Other langlets still get a chance to create langData -- but by registering a
 * "createLangData" handler in the langEnv.  When a construct  of the langlet
 * calls "PR__give_lang_data()", if there is no langData for that langlet,
 * the PR will call the creator in the langlet's langEnv, place whatever it
 * makes as the langData in that slave for that langlet, and return that langData
 *
 *So, as far as counting things, a langlet is only allowed to count creation
 * of slaves it creates itself..  may have to change this later.. add a way for
 * langlet to register a trigger Fn called each time a slave gets created.. 
 * need more experience with what langlets will do at create time..  think Cilk
 * has interesting create behavior..  not sure how that will differ in light
 * of true tasks and langlet approach.  Look at it after all done and start
 * modifying the langs to be langlets..
 * 
 *PR itself needs to create the slave, then update numLiveSlaves in process,
 * copy processID from requestor to newly created
 */
inline
void
PRHandle_CreateSlave( PRReqst *req, SlaveVP *slave )
 { SlaveVP   *newSlv;
   PRProcess *process;
   PRLangEnv *protoLangEnv;
 
   process = slave->processSlaveIsIn;
   protoLangEnv = PR_int__give_proto_lang_env_for_slave__ML( slave, req->langMagicNumber );
   
//   newSlv  = PR_int__create_slave( req->topLevelFn, req->initData );
   
   //create slv has diff prototype than standard reqst hdlr
   newSlv = 
      (*req->createHdlr)(req->langReq, slave, PR_int__give_lang_env(protoLangEnv)); 
   
   newSlv->typeOfVP = GenericSlv;
   newSlv->processSlaveIsIn = process;
   newSlv->ID = req->ID;
   process->numLiveGenericSlvs += 1;
 }

/*The dissipate handler has to, update the number of slaves of the type, within
 * the process, and call the langlet handler linked into the request,
 * and after that returns, then call the PR function that frees the slave state
 * (or recycles the slave).
 * 
 *The PR function that frees the slave state has to also free all of the
 * langData in the slave..  or else reset all of the langDatas.. by, say, marking
 * them, then in PR__give_langData( magicNum ) call the langlet registered
 * "resetLangData" Fn.
 */
inline
void
PRHandle_Dissipate( PRReqst *req, SlaveVP *slave )
 { PRProcess *process;
   PRLangEnv *protoLangEnv;
   
   process = slave->processSlaveIsIn;
   
      //do the language's dissipate handler
   protoLangEnv = PR_int__give_proto_lang_env_for_slave__ML( slave, slave->request->langMagicNumber );
   
   if(req->handler != NULL)
      (*req->handler)( req->langReq, slave, PR_int__give_lang_env(protoLangEnv) );
   
   process->numLiveGenericSlvs -= 1;
   PR_int__recycle_slave__ML( slave );
  
      //check End Of Process Condition
   if( process->numLiveTasks == 0 &&
       process->numLiveGenericSlvs == 0 )
      PR_SS__shutdown_process__ML( process );
 }

/*Create task is a special form, that has PR behavior in addition to plugin
 * behavior.  Master calls this first, and it then calls the plugin's
 * create task handler.
 * 
 *Note: the requesting slave must be either generic slave or free task slave
 */
inline
void
PRHandle_CreateTask( PRReqst *req, SlaveVP *slave )
 { PRMetaTask     *metaTask;
   PRProcess      *process;
   PRLangEnv      *protoLangEnv;
   void           *task;
                
   process = slave->processSlaveIsIn;
   
   protoLangEnv = PR_int__give_proto_lang_env_for_slave__ML( slave, 
                                                        req->langMagicNumber );
   
   //Do the langlet's create-task handler, which keeps the task
   // inside the langlet's lang env, but returns the langMetaTask
   // so PR can put stuff into the prolog
   task = 
      (*req->createHdlr)(req->langReq, slave, PR_int__give_lang_env(protoLangEnv) );
   metaTask = PR_int__give_prolog_of_task( task );
   metaTask->ID         = req->ID; //may be NULL
   metaTask->topLevelFn = req->topLevelFn;
   metaTask->initData   = req->initData;
           
   process->numLiveTasks += 1;

   return;
 }

/*When a task ends, are two scenarios: 1) task ran to completion, or 2) task
 * suspended at some point in its code.
 *For 1, just decr count of live tasks (and check for end condition) -- the
 * master loop will decide what goes into the slot freed up by this task end,
 * so, here, don't worry about assigning a new task to the slot slave.
 *For 2, the task's slot slave has been converted to a free task slave, which
 * now has nothing more to do, so send it to the recycle Q (which includes
 * freeing all the langData and meta task structs alloc'd for it).  Then
 * decrement the live task count and check end condition.
 * 
 *PR has to update count of live tasks, and check end of process condition.
 * The "main" can invoke constructs that wait for a process to end, so when
 * end detected, have to resume what's waiting..
 *Thing is, that wait involves the main OS thread.  That means
 * PR internals have to do OS thread signaling.  Want to do that in the
 * core controller, which has the original stack of an OS thread.  So the
 * end process handling happens in the core controller.
 * 
 *So here, when detect process end, signal to the core controller, which will
 * then do the condition variable notify to the OS thread that's waiting.
 * 
 *Note: slave may be either a slot slave or a free task slave. 
 */
inline 
void
PRHandle_EndTask( PRReqst *req, SlaveVP *requestingSlv )
 { void       *langEnv;
   PRProcess  *process;
   void       *langMetaTask;
   
   langEnv = PR_int__give_lang_env_of_req__ML( req, requestingSlv ); //magic num in req
   langMetaTask = PR_int__give_lang_meta_task_from_slave__ML( requestingSlv, req->langMagicNumber);
   
   //Do the langlet's request handler
   //Want to keep PR structs hidden from plugin, so extract langReq..
   (*req->handler)( req->langReq, requestingSlv, langEnv );
   
   //Now that the langlet's done with it, recycle the slave if it's a freeTaskSlv
   if( requestingSlv->typeOfVP == FreeTaskSlv )
      PR_int__recycle_slave__ML( requestingSlv );
   
   process->numLiveTasks -= 1;
  
      //check End Of Process Condition
   if( process->numLiveTasks == 0 &&
       process->numLiveGenericSlvs == 0 )
    { //Tell the core controller to do wakeup of any waiting OS thread
      PR_SS__shutdown_process__ML( process );
    }
 }