/*
 * Copyright 2010  OpenSourceCodeStewardshipFoundation
 *
 * Licensed under BSD
 */

#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>

#include "Queue_impl/PrivateQueue.h"
#include "Hash_impl/PrivateHash.h"

#include "VSs.h"
#include "PR_impl/Services_Offered_by_PR/Measurement_and_Stats/PR_MEAS__Counter_Recording.h"
//==========================================================================
void
VSs__init_Helper();

SlaveVP *
VSs__create_thread_w_ID_and_affinity( TopLevelFnPtr fnPtr,   void *initData, 
                    int32 *thdID, int32 coreToAssignOnto, SlaveVP *creatingThd );

//==========================================================================



//===========================================================================


/*These are the library functions *called in the application*
 * 
 *There's a pattern for the outside sequential code to interact with the
 * PR_HW code.
 *The PR_HW system is inside a boundary..  every VSs system is in its
 * own directory that contains the functions for each of the processor types.
 * One of the processor types is the "seed" processor that starts the
 * cascade of creating all the processors that do the work.
 *So, in the directory is a file called "EntryPoint.c" that contains the
 * function, named appropriately to the work performed, that the outside
 * sequential code calls.  This function follows a pattern:
 *1) it calls VSs__init()
 *2) it creates the initial data for the seed processor, which is passed
 *    in to the function
 *3) it creates the seed VSs processor, with the data to start it with.
 *4) it calls startVSsThenWaitUntilWorkDone
 *5) it gets the returnValue from the transfer struc and returns that
 *    from the function
 *
 *For now, a new VSs system has to be created via VSs__init every
 * time an entry point function is called -- later, might add letting the
 * VSs system be created once, and let all the entry points just reuse
 * it -- want to be as simple as possible now, and see by using what makes
 * sense for later..
 */



//===========================================================================

int32
VSs__giveMinWorkUnitCycles( float32 percentOverhead )
 {
   return MIN_WORK_UNIT_CYCLES;
 }

int32
VSs__giveIdealNumWorkUnits()
 {
   return NUM_ANIM_SLOTS * NUM_CORES;
 }

int32
VSs__give_number_of_cores_to_schedule_onto()
 {
   return NUM_CORES;
 }

/*For now, use TSC -- later, make these two macros with assembly that first
 * saves jump point, and second jumps back several times to get reliable time
 */
void
VSs__begin_primitive( SlaveVP *animSlv )
 { VSsLangData *langData;
   
   langData = (VSsLangData *)PR_WL__give_lang_data( animSlv, VSs_MAGIC_NUMBER);

   saveLowTimeStampCountInto( langData->primitiveStartTime );
 }

/*Just quick and dirty for now -- make reliable later
 * will want this to jump back several times -- to be sure cache is warm
 * because don't want comm time included in calc-time measurement -- and
 * also to throw out any "weird" values due to OS interrupt or TSC rollover
 */
int32
VSs__end_primitive_and_give_cycles( SlaveVP *animSlv )
 { int32 endTime, startTime;
   VSsLangData *langData;
   
   //TODO: fix by repeating time-measurement
   saveLowTimeStampCountInto( endTime );
   langData = (VSsLangData *)PR_WL__give_lang_data( animSlv, VSs_MAGIC_NUMBER);
   startTime = langData->primitiveStartTime;
   return (endTime - startTime);
 }



//===========================================================================

SlaveVP *
VSs__create_thread( TopLevelFnPtr fnPtr,   void *initData,
                        SlaveVP *creatingThd )
 { 
   return VSs__create_thread_w_ID_and_affinity( fnPtr, initData, NO_ID,
                                                        ANY_CORE, creatingThd );
 }

SlaveVP *
VSs__create_thread_w_ID( TopLevelFnPtr fnPtr,   void *initData, int32 *thdID,
                         SlaveVP *creatingThd )
 { 
   return VSs__create_thread_w_ID_and_affinity( fnPtr, initData, thdID, 
                                                        ANY_CORE, creatingThd );
 }

/* old version -- looks safe to delete
SlaveVP *
VSs__create_slave_with_affinity( TopLevelFnPtr fnPtr, void *initData,
                        SlaveVP *creatingSlv,  int32  coreToAssignOnto )
 { VSsLangReq  reqData;

      //the lang request data is on the stack and disappears when this
      // call returns -- it's guaranteed to remain in the VP's stack for as
      // long as the VP is suspended.
   reqData.reqType            = create_slave_w_aff; //not used, May 2012
   reqData.coreToAssignOnto   = coreToAssignOnto;
   reqData.fnPtr              = fnPtr;
   reqData.initData           = initData;
   reqData.callingSlv         = creatingSlv;

   PR_WL__send_create_slaveVP_req( &reqData, creatingSlv, VSs_MAGIC_NUMBER );

   return creatingSlv->dataRetFromReq;
 }
*/


SlaveVP *
VSs__create_thread_w_ID_and_affinity( TopLevelFnPtr fnPtr,   void *initData, 
                    int32 *thdID, int32 coreToAssignOnto, SlaveVP *creatingThd )
 { VSsLangReq reqData;

      //the lang request data is on the stack and disappears when this
      // call returns -- it's guaranteed to remain in the VP's stack for as
      // long as the VP is suspended.
   reqData.reqType          = create_slave; //know type because in a PR create req
   reqData.coreToAssignOnto = coreToAssignOnto;
   reqData.fnPtr            = fnPtr;
   reqData.initData         = initData;
   
   PR_WL__send_create_slaveVP_req( &reqData, thdID, (CreateHandler)&handleCreateThd,
                                                creatingThd, VSs_MAGIC_NUMBER );
   return (SlaveVP *)creatingThd->dataRetFromReq;
 }

/*This is always the last thing done in the code animated by a thread VP.
 * Normally, this would be the last line of the thread's top level function.
 * But, if the thread exits from any point, it has to do so by calling
 * this.
 *
 *It simply sends a dissipate request, which handles all the state cleanup.
 */
void
VSs__end_thread( SlaveVP *thdToEnd )
 {    
   //the lang request is null for VSs version of end slave 
   PR_WL__send_end_slave_req( NULL, (RequestHandler)&handleDissipate, thdToEnd, 
                              VSs_MAGIC_NUMBER );
 }



//===========================================================================


//======================= task submit and end ==============================
/*
 */
void
VSs__submit_task( VSsTaskType *taskType, void *args, SlaveVP *animSlv)
 { VSsLangReq  reqData;

   reqData.reqType    = submit_task;
   
   reqData.taskType   = taskType;
   reqData.args       = args;
   reqData.callingSlv = animSlv;
   
      //Create task is a special form, so have to pass as parameters, the
      // top-level-fn of task and the data for that fn, plus lang's req,
      // animating slave, and lang's magic number
   PR_WL__send_create_task_req( taskType->fn, args, &reqData, NO_ID,
                                &handleSubmitTask, animSlv, VSs_MAGIC_NUMBER );
 }

void
VSs__submit_task_with_ID( VSsTaskType *taskType, void *args, int32 *taskID, 
                          SlaveVP     *animSlv )
 { VSsLangReq  reqData;
 
   reqData.reqType    = submit_task;
   
   reqData.taskType   = taskType;
   reqData.args       = args;
   reqData.callingSlv = animSlv;
 
   PR_WL__send_create_task_req( taskType->fn, args, &reqData, taskID,
                                &handleSubmitTask, animSlv, VSs_MAGIC_NUMBER );
 }


/*This call is the last to happen in every task.  It causes the slave to
 * suspend and get the next task out of the task-queue.  Notice there is no
 * assigner here.. only one slave, no slave ReadyQ, and so on..
 *Can either make the assigner take the next task out of the taskQ, or can
 * leave all as it is, and make task-end take the next task.
 *Note: this fits the case in the new PR for no-context tasks, so will use
 * the built-in taskQ of new PR, and should be local and much faster.
 * 
 *The task-stub is saved in the animSlv, so the request handler will get it
 * from there, along with the task-type which has arg types, and so on..
 * 
 * NOTE: if want, don't need to send the animating SlaveVP around.. 
 * instead, can make a single slave per core, and coreCtrlr looks up the
 * slave from having the core number.
 * 
 *But, to stay compatible with all the other PR languages, leave it in..
 */
void
VSs__end_task( SlaveVP *animSlv )
 { VSsLangReq  reqData;
   
   //VSs has nothing extra to communicate to end task handler, so lang req is NULL
   PR_WL__send_end_task_request( NULL, &handleEndTask, animSlv, VSs_MAGIC_NUMBER );
 }


/*Waits for all tasks that are direct children to end, then resumes calling
 * task or thread
 */
void
VSs__taskwait(SlaveVP *animSlv)
 {
    VSsLangReq  reqData;

   reqData.reqType      = taskwait;
   reqData.callingSlv   = animSlv;
   
   PR_WL__send_lang_request( &reqData, (RequestHandler)&handleTaskwait, animSlv,
                             VSs_MAGIC_NUMBER );
 }



//==========================  send and receive ============================
//

inline 
int32 *
VSs__give_self_taskID( SlaveVP *animSlv )
 {
   return PR__give_ID_from_slave( animSlv, VSs_MAGIC_NUMBER );
 }

//================================ send ===================================

void
VSs__send_of_type_to( void *msg, const int32 type, int32 *receiverID,
                      SlaveVP *senderSlv )
 { VSsLangReq  reqData;

   reqData.reqType    = send_type_to;
   
   reqData.msg        = msg;
   reqData.msgType    = type;
   reqData.receiverID = receiverID;
   reqData.senderSlv  = senderSlv;
   
   reqData.nextReqInHashEntry = NULL;

   PR_WL__send_lang_request( &reqData, (RequestHandler)&handleSendTypeTo,
                                                  senderSlv, VSs_MAGIC_NUMBER );

      //When come back from suspend, no longer own data reachable from msg
 }

void
VSs__send_from_to( void *msg, int32 *senderID, int32 *receiverID, SlaveVP *senderSlv )
 { VSsLangReq  reqData;

   reqData.reqType     = send_from_to;
   
   reqData.msg         = msg;
   reqData.senderID    = senderID;
   reqData.receiverID  = receiverID;
   reqData.senderSlv   = senderSlv;

   reqData.nextReqInHashEntry = NULL;

   PR_WL__send_lang_request( &reqData, (RequestHandler)&handleSendFromTo,
                                                  senderSlv, VSs_MAGIC_NUMBER );
 }


//================================ receive ================================

/*The "type" version of send and receive creates a many-to-one relationship.
 * The sender is anonymous, and many sends can stack up, waiting to be
 * received.  The same receiver can also have send from-to's
 * waiting for it, and those will be kept separate from the "type"
 * messages.
 */
void *
VSs__receive_type_to( const int32 type, int32* receiverID, SlaveVP *receiverSlv )
 {       DEBUG__printf1(dbgRqstHdlr,"WL: receive type to %d",receiverID[1] );
   VSsLangReq  reqData;

   reqData.reqType     = receive_type_to;
   
   reqData.msgType     = type;
   reqData.receiverID  = receiverID;
   reqData.receiverSlv = receiverSlv;
   
   reqData.nextReqInHashEntry = NULL;

   PR_WL__send_lang_request( &reqData, (RequestHandler)&handleReceiveTypeTo,
                                                receiverSlv, VSs_MAGIC_NUMBER );
   
   return receiverSlv->dataRetFromReq;
 }



/*Call this at the point a receiving task wants in-coming data.
 * Use this from-to form when know senderID -- it makes a direct channel
 * between sender and receiver.
 */
void *
VSs__receive_from_to( int32 *senderID, int32 *receiverID, SlaveVP *receiverSlv )
 { 
   VSsLangReq  reqData;

   reqData.reqType     = receive_from_to;

   reqData.senderID    = senderID;
   reqData.receiverID  = receiverID;
   reqData.receiverSlv = receiverSlv;

   reqData.nextReqInHashEntry = NULL;
      DEBUG__printf2(dbgRqstHdlr,"WL: receive from %d to: %d", reqData.senderID[1], reqData.receiverID[1]);
      
   PR_WL__send_lang_request( &reqData, (RequestHandler)&handleReceiveFromTo,
                             receiverSlv, VSs_MAGIC_NUMBER );

   return receiverSlv->dataRetFromReq;
 }




//==========================================================================
//
/*A function singleton is a function whose body executes exactly once, on a
 * single core, no matter how many times the fuction is called and no
 * matter how many cores or the timing of cores calling it.
 *
 *A data singleton is a ticket attached to data.  That ticket can be used
 * to get the data through the function exactly once, no matter how many
 * times the data is given to the function, and no matter the timing of
 * trying to get the data through from different cores.
 */

/*asm function declarations*/
void asm_save_ret_to_singleton(VSsSingleton *singletonPtrAddr);
void asm_write_ret_from_singleton(VSsSingleton *singletonPtrAddr);

/*Fn singleton uses ID as index into array of singleton structs held in the
 * language environment.
 */
void
VSs__start_fn_singleton( int32 singletonID,   SlaveVP *animSlv )
 {
   VSsLangReq  reqData;

      //
   reqData.reqType     = singleton_fn_start;
   reqData.singletonID = singletonID;

   PR_WL__send_lang_request( &reqData, (RequestHandler)&handleStartFnSingleton,
                             animSlv, VSs_MAGIC_NUMBER );
   if( animSlv->dataRetFromReq ) //will be 0 or addr of label in end singleton
    {
      VSsLangEnv *langEnv =
              PR_int__give_lang_env_for_slave( animSlv, VSs_MAGIC_NUMBER );
      asm_write_ret_from_singleton(&(langEnv->fnSingletons[ singletonID]));
    }
 }

/*Data singleton hands addr of loc holding a pointer to a singleton struct.
 * The start_data_singleton makes the structure and puts its addr into the
 * location.
 */
void
VSs__start_data_singleton( VSsSingleton **singletonAddr,  SlaveVP *animSlv )
 {
   VSsLangReq  reqData;

   if( *singletonAddr && (*singletonAddr)->hasFinished )
       goto JmpToEndSingleton;
   
   reqData.reqType          = singleton_data_start;
   reqData.singletonPtrAddr = singletonAddr;

   PR_WL__send_lang_request( &reqData, (RequestHandler)&handleStartDataSingleton,
                             animSlv, VSs_MAGIC_NUMBER );
   if( animSlv->dataRetFromReq ) //either 0 or end singleton's return addr
    {    //Assembly code changes the return addr on the stack to the one
         // saved into the singleton by the end-singleton-fn
         //The return addr is at 0x4(%%ebp)
        JmpToEndSingleton:
          asm_write_ret_from_singleton(*singletonAddr);
    }
   //now, simply return
   //will exit either from the start singleton call or the end-singleton call
 }

/*Uses ID as index into array of flags.  If flag already set, resumes from
 * end-label.  Else, sets flag and resumes normally.
 *
 *Note, this call cannot be inlined because the instr addr at the label
 * inside is shared by all invocations of a given singleton ID.
 */
void
VSs__end_fn_singleton( int32 singletonID, SlaveVP *animSlv )
 {
   VSsLangReq  reqData;

      //don't need this addr until after at least one singleton has reached
      // this function
   VSsLangEnv *
   langEnv = PR_int__give_lang_env_for_slave( animSlv, VSs_MAGIC_NUMBER );
   
   asm_write_ret_from_singleton(&(langEnv->fnSingletons[ singletonID]));

   reqData.reqType     = singleton_fn_end;
   reqData.singletonID = singletonID;

   PR_WL__send_lang_request( &reqData, (RequestHandler)&handleEndFnSingleton, 
                             animSlv, VSs_MAGIC_NUMBER );

EndSingletonInstrAddr:
   return;
 }

void
VSs__end_data_singleton(  VSsSingleton **singletonPtrAddr, SlaveVP *animSlv )
 {
   VSsLangReq  reqData;

      //don't need this addr until after singleton struct has reached
      // this function for first time
      //do assembly that saves the return addr of this fn call into the
      // data singleton -- that data-singleton can only be given to exactly
      // one instance in the code of this function.  However, can use this
      // function in different places for different data-singletons.
   asm_save_ret_to_singleton(*singletonPtrAddr);

   reqData.reqType          = singleton_data_end;
   reqData.singletonPtrAddr = singletonPtrAddr;

   PR_WL__send_lang_request( &reqData, (RequestHandler)&handleEndDataSingleton,
                             animSlv, VSs_MAGIC_NUMBER );
 }

/*This executes the function in the masterVP, so it executes in isolation
 * from any other copies -- only one copy of the function can ever execute
 * at a time.
 *
 *It suspends to the master, and the request handler takes the function
 * pointer out of the request and calls it, then resumes the VP.
 *Only very short functions should be called this way -- for longer-running
 * isolation, use transaction-start and transaction-end, which run the code
 * between as work-code.
 */
void
VSs__animate_short_fn_in_isolation( PtrToAtomicFn ptrToFnToExecInMaster,
                                    void *data, SlaveVP *animSlv )
 {
   VSsLangReq  reqData;

      //
   reqData.reqType          = atomic;
   reqData.fnToExecInMaster = ptrToFnToExecInMaster;
   reqData.dataForFn        = data;

   PR_WL__send_lang_request( &reqData, (RequestHandler)&handleAtomic, 
                             animSlv, VSs_MAGIC_NUMBER );
 }


/*This suspends to the master.
 *First, it looks at the VP's data, to see the highest transactionID that VP
 * already has entered.  If the current ID is not larger, it throws an
 * exception stating a bug in the code.  Otherwise it puts the current ID
 * there, and adds the ID to a linked list of IDs entered -- the list is
 * used to check that exits are properly ordered.
 *Next it is uses transactionID as index into an array of transaction
 * structures.
 *If the "VP_currently_executing" field is non-null, then put requesting VP
 * into queue in the struct.  (At some point a holder will request
 * end-transaction, which will take this VP from the queue and resume it.)
 *If NULL, then write requesting into the field and resume.
 */
void
VSs__start_transaction( int32 transactionID, SlaveVP *animSlv )
 {
   VSsLangReq  reqData;

      //
   reqData.callingSlv  = animSlv;
   reqData.reqType     = trans_start;
   reqData.transID     = transactionID;

   PR_WL__send_lang_request( &reqData, (RequestHandler)&handleTransStart,
                             animSlv, VSs_MAGIC_NUMBER );
 }

/*This suspends to the master, then uses transactionID as index into an
 * array of transaction structures.
 *It looks at VP_currently_executing to be sure it's same as requesting VP.
 * If different, throws an exception, stating there's a bug in the code.
 *Next it looks at the queue in the structure.
 *If it's empty, it sets VP_currently_executing field to NULL and resumes.
 *If something in, gets it, sets VP_currently_executing to that VP, then
 * resumes both.
 */
void
VSs__end_transaction( int32 transactionID, SlaveVP *animSlv )
 {
   VSsLangReq  reqData;

      //
   reqData.callingSlv      = animSlv;
   reqData.reqType     = trans_end;
   reqData.transID     = transactionID;

   PR_WL__send_lang_request( &reqData, (RequestHandler)&handleTransEnd,
                             animSlv, VSs_MAGIC_NUMBER );
 }

//======================== Internal ==================================

