/*
 * Copyright 2010  OpenSourceCodeStewardshipFoundation
 *
 * Licensed under BSD
 */

#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>

#include "Queue_impl/PrivateQueue.h"
#include "Hash_impl/PrivateHash.h"

#include "VSs.h"
#include "Measurement/VSs_Counter_Recording.h"

//==========================================================================

void
VSs__init();

void
VSs__init_Helper();
//==========================================================================


//===========================================================================


/*These are the library functions *called in the application*
 * 
 *There's a pattern for the outside sequential code to interact with the
 * VMS_HW code.
 *The VMS_HW system is inside a boundary..  every VSs system is in its
 * own directory that contains the functions for each of the processor types.
 * One of the processor types is the "seed" processor that starts the
 * cascade of creating all the processors that do the work.
 *So, in the directory is a file called "EntryPoint.c" that contains the
 * function, named appropriately to the work performed, that the outside
 * sequential code calls.  This function follows a pattern:
 *1) it calls VSs__init()
 *2) it creates the initial data for the seed processor, which is passed
 *    in to the function
 *3) it creates the seed VSs processor, with the data to start it with.
 *4) it calls startVSsThenWaitUntilWorkDone
 *5) it gets the returnValue from the transfer struc and returns that
 *    from the function
 *
 *For now, a new VSs system has to be created via VSs__init every
 * time an entry point function is called -- later, might add letting the
 * VSs system be created once, and let all the entry points just reuse
 * it -- want to be as simple as possible now, and see by using what makes
 * sense for later..
 */


//===========================================================================

/*This is the "border crossing" function -- the thing that crosses from the
 * outside world, into the VMS_HW world.  It initializes and starts up the
 * VMS system, then creates one processor from the specified function and
 * puts it into the readyQ.  From that point, that one function is resp.
 * for creating all the other processors, that then create others, and so
 * forth.
 *When all the processors, including the seed, have dissipated, then this
 * function returns.  The results will have been written by side-effect via
 * pointers read from, or written into initData.
 *
 *NOTE: no Threads should exist in the outside program that might touch
 * any of the data reachable from initData passed in to here
 */
void
VSs__create_seed_slave_and_do_work( TopLevelFnPtr fnPtr, void *initData )
 { VSsSemEnv   *semEnv;
   SlaveVP     *seedSlv;
   VSsSemData  *semData;
   VSsTaskStub *threadTaskStub, *parentTaskStub;
   int32* taskID;

   VSs__init();      //normal multi-thd
   
   semEnv = _VMSMasterEnv->semanticEnv;

      //VSs starts with one processor, which is put into initial environ,
      // and which then calls create() to create more, thereby expanding work
   seedSlv = VSs__create_slave_helper( &VSs__run_thread , fnPtr, initData, semEnv, semEnv->nextCoreToGetNewSlv++ );
   //NB: this assumes that after VSs_init() nextCoreToGetNewSlv is still 0, 
   //    and also that there is more than 1 core.
   
      //seed slave is a thread slave, so make a thread's task stub for it
      // and then make another to stand for the seed's parent task.  Make
      // the parent be already ended, and have one child (the seed).  This
      // will make the dissipate handler do the right thing when the seed
      // is dissipated.
   threadTaskStub = create_thread_task_stub( initData);
   parentTaskStub = create_thread_task_stub( NULL );
   parentTaskStub->isEnded = TRUE;
   parentTaskStub->numLiveChildThreads = 1; //so dissipate works for seed
   threadTaskStub->parentTaskStub = parentTaskStub;
   threadTaskStub->slaveAssignedTo = seedSlv;
   
   taskID = VMS_WL__malloc(2 * sizeof(int32) );
   taskID[0] = 1;
   taskID[1] = -1;
   threadTaskStub->taskID = taskID;

   semData = (VSsSemData *)seedSlv->semanticData;
      //seedVP is a thread, so has a permanent task
   semData->needsTaskAssigned = FALSE;
   semData->taskStub = threadTaskStub;
   semData->slaveType = ThreadSlv;

   resume_slaveVP( seedSlv, semEnv ); //returns right away, just queues Slv
   
   VMS_SS__start_the_work_then_wait_until_done();      //normal multi-thd

   VSs__cleanup_after_shutdown();
 }


int32
VSs__giveMinWorkUnitCycles( float32 percentOverhead )
 {
   return MIN_WORK_UNIT_CYCLES;
 }

int32
VSs__giveIdealNumWorkUnits()
 {
   return NUM_ANIM_SLOTS * NUM_CORES;
 }

int32
VSs__give_number_of_cores_to_schedule_onto()
 {
   return NUM_CORES;
 }

/*For now, use TSC -- later, make these two macros with assembly that first
 * saves jump point, and second jumps back several times to get reliable time
 */
void
VSs__start_primitive()
 { saveLowTimeStampCountInto( ((VSsSemEnv *)(_VMSMasterEnv->semanticEnv))->
                              primitiveStartTime );
 }

/*Just quick and dirty for now -- make reliable later
 * will want this to jump back several times -- to be sure cache is warm
 * because don't want comm time included in calc-time measurement -- and
 * also to throw out any "weird" values due to OS interrupt or TSC rollover
 */
int32
VSs__end_primitive_and_give_cycles()
 { int32 endTime, startTime;
   //TODO: fix by repeating time-measurement
   saveLowTimeStampCountInto( endTime );
   startTime =((VSsSemEnv*)(_VMSMasterEnv->semanticEnv))->primitiveStartTime;
   return (endTime - startTime);
 }

//===========================================================================

/*Initializes all the data-structures for a VSs system -- but doesn't
 * start it running yet!
 *
 *This runs in the main thread -- before VMS starts up
 * 
 *This sets up the semantic layer over the VMS system
 *
 *First, calls VMS_Setup, then creates own environment, making it ready
 * for creating the seed processor and then starting the work.
 */
void
VSs__init()
 {
   VMS_SS__init();
      //masterEnv, a global var, now is partially set up by init_VMS
      // after this, have VMS_int__malloc and VMS_int__free available

   VSs__init_Helper();
 }


void idle_fn(void* data){
    while(1){
        VMS_int__suspend_slaveVP_and_send_req(currVP);
    }
}

void
VSs__init_Helper()
 { VSsSemEnv       *semanticEnv;
   int32            i, coreNum, slotNum;
   VSsSemData      *semData;
 
      //Hook up the semantic layer's plug-ins to the Master virt procr
   _VMSMasterEnv->requestHandler = &VSs__Request_Handler;
   _VMSMasterEnv->slaveAssigner  = &VSs__assign_slaveVP_to_slot;

      //create the semantic layer's environment (all its data) and add to
      // the master environment
   semanticEnv = VMS_int__malloc( sizeof( VSsSemEnv ) );
   _VMSMasterEnv->semanticEnv = semanticEnv;
   
   #ifdef HOLISTIC__TURN_ON_PERF_COUNTERS
   _VMSMasterEnv->counterHandler = &VSs__counter_handler;
   VSs__init_counter_data_structs();
   #endif

   //semanticEnv->shutdownInitiated = FALSE;
   semanticEnv->coreIsDone = VMS_int__malloc( NUM_CORES * sizeof( bool32 ) );
   semanticEnv->numCoresDone = 0;
      //For each animation slot, there is an idle slave, and an initial
      // slave assigned as the current-task-slave.  Create them here.
   SlaveVP *idleSlv, *slotTaskSlv;
   for( coreNum = 0; coreNum < NUM_CORES; coreNum++ )
    { semanticEnv->coreIsDone[coreNum] = FALSE; //use during shutdown
    
      for( slotNum = 0; slotNum < NUM_ANIM_SLOTS; ++slotNum )
        {
#ifdef IDLE_SLAVES
         idleSlv = VSs__create_slave_helper( &VSs__run_thread, &idle_fn, NULL, semanticEnv, 0);
         idleSlv->coreAnimatedBy                = coreNum;
         idleSlv->animSlotAssignedTo            =
                               _VMSMasterEnv->allAnimSlots[coreNum][slotNum];
         semanticEnv->idleSlv[coreNum][slotNum] = idleSlv;
#endif
         
         slotTaskSlv = VSs__create_slave_helper(&VSs__run_thread, &idle_fn, NULL, semanticEnv, 0);
         slotTaskSlv->coreAnimatedBy            = coreNum;
         slotTaskSlv->animSlotAssignedTo        = 
                               _VMSMasterEnv->allAnimSlots[coreNum][slotNum];
         
         semData                    = slotTaskSlv->semanticData;
         semData->needsTaskAssigned = TRUE;
         semData->slaveType         = SlotTaskSlv;
         semanticEnv->slotTaskSlvs[coreNum][slotNum] = slotTaskSlv;
       }
    }

      //create the ready queues, hash tables used for matching and so forth
   semanticEnv->slavesReadyToResumeQ = makeVMSQ();
   semanticEnv->freeExtraTaskSlvQ    = makeVMSQ();
   semanticEnv->taskReadyQ           = makeVMSQ();
   semanticEnv->barrierQ             = makeVMSQ();
   
   semanticEnv->argPtrHashTbl  = makeHashTable32( 20, &free_pointer_entry );
   semanticEnv->commHashTbl    = makeHashTable32( 16, &VMS_int__free );
   semanticEnv->criticalHashTbl = makeHashTable32( 16, &VMS_int__free );
   
   semanticEnv->nextCoreToGetNewSlv = 0;
   
   semanticEnv->numInFlightTasks = 0;
   semanticEnv->deferredSubmitsQ = makeVMSQ();
#ifdef EXTERNAL_SCHEDULER
   VSs__init_ext_scheduler();
#endif
   //TODO: bug -- turn these arrays into dyn arrays to eliminate limit
   //semanticEnv->singletonHasBeenExecutedFlags = makeDynArrayInfo( );
   //semanticEnv->transactionStrucs = makeDynArrayInfo( );
   for( i = 0; i < NUM_STRUCS_IN_SEM_ENV; i++ )
    {
      semanticEnv->fnSingletons[i].endInstrAddr      = NULL;
      semanticEnv->fnSingletons[i].hasBeenStarted    = FALSE;
      semanticEnv->fnSingletons[i].hasFinished       = FALSE;
      semanticEnv->fnSingletons[i].waitQ             = makeVMSQ();
      semanticEnv->transactionStrucs[i].waitingVPQ   = makeVMSQ();
    }

   semanticEnv->numLiveExtraTaskSlvs   = 0; //must be last
   semanticEnv->numLiveThreadSlvs      = 1; //must be last, counts the seed

   #ifdef HOLISTIC__TURN_ON_OBSERVE_UCC
   semanticEnv->unitList = makeListOfArrays(sizeof(Unit),128);
   semanticEnv->ctlDependenciesList = makeListOfArrays(sizeof(Dependency),128);
   semanticEnv->commDependenciesList = makeListOfArrays(sizeof(Dependency),128);
   semanticEnv->dynDependenciesList = makeListOfArrays(sizeof(Dependency),128);
   semanticEnv->dataDependenciesList = makeListOfArrays(sizeof(Dependency),128);
   semanticEnv->singletonDependenciesList = makeListOfArrays(sizeof(Dependency),128);
   semanticEnv->warDependenciesList = makeListOfArrays(sizeof(Dependency),128);
   semanticEnv->ntonGroupsInfo = makePrivDynArrayOfSize((void***)&(semanticEnv->ntonGroups),8);
   
   semanticEnv->hwArcs = makeListOfArrays(sizeof(Dependency),128);
   memset(semanticEnv->last_in_slot,0,sizeof(NUM_CORES * NUM_ANIM_SLOTS * sizeof(Unit)));
   #endif
 }


/*Frees any memory allocated by VSs__init() then calls VMS_int__shutdown
 */
void
VSs__cleanup_after_shutdown()
 { VSsSemEnv *semanticEnv;
   
   semanticEnv = _VMSMasterEnv->semanticEnv;
 
   #ifdef HOLISTIC__TURN_ON_OBSERVE_UCC
   FILE* output;
   int n;
   char filename[255];
   //UCC   
    for(n=0;n<255;n++)
    {
        sprintf(filename, "./counters/UCC.%d",n);
        output = fopen(filename,"r");
        if(output)
        {
            fclose(output);
        }else{
            break;
        }
    }
   if(n<255){
    printf("Saving UCC to File: %s ...\n", filename);
    output = fopen(filename,"w+");
    if(output!=NULL){
        set_dependency_file(output);
        //fprintf(output,"digraph Dependencies {\n");
        //set_dot_file(output);
        //FIXME:  first line still depends on counters being enabled, replace w/ unit struct!
        //forAllInDynArrayDo(_VMSMasterEnv->counter_history_array_info, &print_dot_node_info );
        forAllInListOfArraysDo(semanticEnv->unitList, &print_unit_to_file);
        forAllInListOfArraysDo( semanticEnv->commDependenciesList, &print_comm_dependency_to_file );
        forAllInListOfArraysDo( semanticEnv->ctlDependenciesList, &print_ctl_dependency_to_file );
        forAllInListOfArraysDo( semanticEnv->dataDependenciesList, &print_data_dependency_to_file );
        forAllInListOfArraysDo( semanticEnv->singletonDependenciesList, &print_singleton_dependency_to_file );
        forAllInListOfArraysDo( semanticEnv->warDependenciesList, &print_war_dependency_to_file );
        forAllInDynArrayDo(semanticEnv->ntonGroupsInfo,&print_nton_to_file);
        //fprintf(output,"}\n");
        fflush(output);

    } else
        printf("Opening UCC file failed. Please check that folder \"counters\" exists in run directory and has write permission.\n");
   } else {
       printf("Could not open UCC file, please clean \"counters\" folder. (Must contain less than 255 files.)\n");
   }
   //Loop Graph
   for(n=0;n<255;n++)
    {
        sprintf(filename, "./counters/LoopGraph.%d",n);
        output = fopen(filename,"r");
        if(output)
        {
            fclose(output);
        }else{
            break;
        }
    }
   if(n<255){
    printf("Saving LoopGraph to File: %s ...\n", filename);
    output = fopen(filename,"w+");
    if(output!=NULL){
        set_dependency_file(output);
        //fprintf(output,"digraph Dependencies {\n");
        //set_dot_file(output);
        //FIXME:  first line still depends on counters being enabled, replace w/ unit struct!
        //forAllInDynArrayDo(_VMSMasterEnv->counter_history_array_info, &print_dot_node_info );
        forAllInListOfArraysDo( semanticEnv->unitList, &print_unit_to_file );
        forAllInListOfArraysDo( semanticEnv->commDependenciesList, &print_comm_dependency_to_file );
        forAllInListOfArraysDo( semanticEnv->ctlDependenciesList, &print_ctl_dependency_to_file );
        forAllInListOfArraysDo( semanticEnv->dataDependenciesList, &print_data_dependency_to_file );
        forAllInListOfArraysDo( semanticEnv->singletonDependenciesList, &print_singleton_dependency_to_file );
        forAllInListOfArraysDo( semanticEnv->dynDependenciesList, &print_dyn_dependency_to_file );
        forAllInListOfArraysDo( semanticEnv->warDependenciesList, &print_war_dependency_to_file );
        forAllInListOfArraysDo( semanticEnv->hwArcs, &print_hw_dependency_to_file );
        //fprintf(output,"}\n");
        fflush(output);

    } else
        printf("Opening LoopGraph file failed. Please check that folder \"counters\" exists in run directory and has write permission.\n");
   } else {
       printf("Could not open LoopGraph file, please clean \"counters\" folder. (Must contain less than 255 files.)\n");
   }
   
   
   freeListOfArrays(semanticEnv->unitList);
   freeListOfArrays(semanticEnv->commDependenciesList);
   freeListOfArrays(semanticEnv->ctlDependenciesList);
   freeListOfArrays(semanticEnv->dynDependenciesList);
   freeListOfArrays(semanticEnv->dataDependenciesList);
   freeListOfArrays(semanticEnv->warDependenciesList);
   freeListOfArrays(semanticEnv->singletonDependenciesList);
   freeListOfArrays(semanticEnv->hwArcs);
   
   #endif
#ifdef HOLISTIC__TURN_ON_PERF_COUNTERS   
   FILE* output2;
   int n2;
   char filename2[255];
    for(n2=0;n2<255;n2++)
    {
        sprintf(filename2, "./counters/Counters.%d.csv",n2);
        output2 = fopen(filename2,"r");
        if(output2)
        {
            fclose(output2);
        }else{
            break;
        }
    }
    if(n2<255){
    printf("Saving Counter measurements to File: %s ...\n", filename2);
    output2 = fopen(filename2,"w+");
    if(output2!=NULL){
        set_counter_file(output2);
        int i;
        for(i=0;i<NUM_CORES;i++){
            forAllInListOfArraysDo( semanticEnv->counterList[i], &print_counter_events_to_file );
            fflush(output2);
        }

    } else
        printf("Opening UCC file failed. Please check that folder \"counters\" exists in run directory and has write permission.\n");
   } else {
       printf("Could not open UCC file, please clean \"counters\" folder. (Must contain less than 255 files.)\n");
   }
    
#endif
    /* It's all allocated inside VMS's big chunk -- that's about to be freed, so
     *  nothing to do here */
   //_VMSMasterEnv->shutdownInitiated = TRUE;
    int coreIdx, slotIdx;
    SlaveVP* slotSlv;
    for (coreIdx = 0; coreIdx < NUM_CORES; coreIdx++) {
        for (slotIdx = 0; slotIdx < NUM_ANIM_SLOTS; slotIdx++) {
            slotSlv = semanticEnv->slotTaskSlvs[coreIdx][slotIdx];
            VMS_int__free(slotSlv->semanticData);
            VMS_int__dissipate_slaveVP(slotSlv);
#ifdef IDLE_SLAVES
            slotSlv = semanticEnv->idleSlv[coreIdx][slotIdx];
            VMS_int__free(slotSlv->semanticData);
            VMS_int__dissipate_slaveVP(slotSlv);
#endif
        }
    }
    int i;
    for (i = 0; i < NUM_STRUCS_IN_SEM_ENV; i++) {
        freePrivQ(semanticEnv->fnSingletons[i].waitQ);
        freePrivQ(semanticEnv->transactionStrucs[i].waitingVPQ);
    }

    freePrivQ(semanticEnv->freeExtraTaskSlvQ);
    freePrivQ(semanticEnv->slavesReadyToResumeQ);
    freePrivQ(semanticEnv->taskReadyQ);
    freePrivQ(semanticEnv->barrierQ);
    freePrivQ(semanticEnv->deferredSubmitsQ);
    freeHashTable(semanticEnv->argPtrHashTbl);
    freeHashTable(semanticEnv->commHashTbl);
    freeHashTable(semanticEnv->criticalHashTbl);
    VMS_int__free(semanticEnv->coreIsDone);
    VMS_int__free(_VMSMasterEnv->semanticEnv);

    VMS_SS__cleanup_at_end_of_shutdown();
}


//===========================================================================

SlaveVP *
VSs__create_thread( TopLevelFnPtr fnPtr,   void *initData,
                        SlaveVP *creatingThd )
 { VSsSemReq reqData;

      //the semantic request data is on the stack and disappears when this
      // call returns -- it's guaranteed to remain in the VP's stack for as
      // long as the VP is suspended.
   reqData.reqType            = 0; //know type because in a VMS create req
   reqData.fnPtr              = fnPtr;
   reqData.initData           = initData;
   reqData.callingSlv         = creatingThd;

   VMS_WL__send_create_slaveVP_req( &reqData, creatingThd );

   return creatingThd->dataRetFromReq;
 }

/*This is always the last thing done in the code animated by a thread VP.
 * Normally, this would be the last line of the thread's top level function.
 * But, if the thread exits from any point, it has to do so by calling
 * this.
 *
 *It simply sends a dissipate request, which handles all the state cleanup.
 */
void
VSs__end_thread()
 { 
   
   VMS_WL__send_dissipate_req( currVP );
 }

void VSs__run_thread(TopLevelFnPtr fnPtr, void *initData){
    (*fnPtr)(initData);
    VSs__end_thread();
}

//===========================================================================


//======================= task submit and end ==============================

/*
 */
void VSs__submit_task(VSsTaskType *taskType, void *args, void* deps) {
    VSsSemReq reqData;

    reqData.reqType = submit_task;

    reqData.taskType = taskType;
    reqData.args = args;
    reqData.deps = deps;
    reqData.callingSlv = currVP;

    reqData.taskID = NULL;

    VMS_WL__send_sem_request(&reqData, currVP);
}

int32 *
VSs__create_taskID_of_size( int32 numInts)
 { int32 *taskID;
   
   taskID    = VMS_WL__malloc( sizeof(int32) + numInts * sizeof(int32) );
   taskID[0] = numInts;
   return taskID;
}

void VSs__submit_task_with_ID(VSsTaskType *taskType, void *args, void* deps, int32 *taskID) {
    VSsSemReq reqData;

    reqData.reqType = submit_task;

    reqData.taskType = taskType;
    reqData.args = args;
    reqData.deps = deps;
    reqData.taskID = taskID;
    reqData.callingSlv = currVP;

    VMS_WL__send_sem_request(&reqData, currVP);
}


/*This call is the last to happen in every task.  It causes the slave to
 * suspend and get the next task out of the task-queue.  Notice there is no
 * assigner here.. only one slave, no slave ReadyQ, and so on..
 *Can either make the assigner take the next task out of the taskQ, or can
 * leave all as it is, and make task-end take the next task.
 *Note: this fits the case in the new VMS for no-context tasks, so will use
 * the built-in taskQ of new VMS, and should be local and much faster.
 * 
 *The task-stub is saved in the animSlv, so the request handler will get it
 * from there, along with the task-type which has arg types, and so on..
 * 
 * NOTE: if want, don't need to send the animating SlaveVP around.. 
 * instead, can make a single slave per core, and coreCtrlr looks up the
 * slave from having the core number.
 * 
 *But, to stay compatible with all the other VMS languages, leave it in..
 */
void
VSs__end_task()
 { VSsSemReq  reqData;

   reqData.reqType      = end_task;
   reqData.callingSlv   = currVP;
   
   VMS_WL__send_sem_request( &reqData, currVP );
 }

void VSs__run_task(TopLevelFnPtr fnPtr, void *initData){
    (*fnPtr)(initData);
    VSs__end_task();
}

void
VSs__taskwait()
{
    VSsSemReq  reqData;

   reqData.reqType      = taskwait;
   reqData.callingSlv   = currVP;
   
   VMS_WL__send_sem_request( &reqData, currVP );
}

void
VSs__taskwait_on(void* ptr){
    VSsSemReq  reqData;

   reqData.reqType      = taskwait_on;
   reqData.callingSlv   = currVP;
   
   reqData.args = ptr;
   
   VMS_WL__send_sem_request( &reqData, currVP );
}

void
VSs__start_critical(void* lock){
    VSsSemReq  reqData;

   reqData.reqType      = critical_start;
   reqData.callingSlv   = currVP;
   
   reqData.criticalID = lock;
   
   VMS_WL__send_sem_request( &reqData, currVP );
}

void
VSs__end_critical(void* lock){
    VSsSemReq  reqData;

   reqData.reqType      = critical_end;
   reqData.callingSlv   = currVP;
   
   reqData.criticalID = lock;
   
   VMS_WL__send_sem_request( &reqData, currVP );
}

//==========================  send and receive ============================
//

int32 *
VSs__give_self_taskID()
 {
   return ((VSsSemData*)currVP->semanticData)->taskStub->taskID;
 }

//================================ send ===================================

void
VSs__send_of_type_to( void *msg, const int32 type, int32 *receiverID)
 { VSsSemReq  reqData;

   reqData.reqType    = send_type_to;
   
   reqData.msg        = msg;
   reqData.msgType    = type;
   reqData.receiverID = receiverID;
   reqData.senderSlv  = currVP;
   
   reqData.nextReqInHashEntry = NULL;

   VMS_WL__send_sem_request( &reqData, currVP );

      //When come back from suspend, no longer own data reachable from msg
 }

void
VSs__send_from_to( void *msg, int32 *senderID, int32 *receiverID)
 { VSsSemReq  reqData;

   reqData.reqType     = send_from_to;
   
   reqData.msg         = msg;
   reqData.senderID    = senderID;
   reqData.receiverID  = receiverID;
   reqData.senderSlv   = currVP;

   reqData.nextReqInHashEntry = NULL;

   VMS_WL__send_sem_request( &reqData, currVP );
 }


//================================ receive ================================

/*The "type" version of send and receive creates a many-to-one relationship.
 * The sender is anonymous, and many sends can stack up, waiting to be
 * received.  The same receiver can also have send from-to's
 * waiting for it, and those will be kept separate from the "type"
 * messages.
 */
void *
VSs__receive_type_to( const int32 type, int32* receiverID )
 {       DEBUG__printf1(dbgRqstHdlr,"WL: receive type to %d",receiverID[1] );
   VSsSemReq  reqData;

   reqData.reqType     = receive_type_to;
   
   reqData.msgType     = type;
   reqData.receiverID  = receiverID;
   reqData.receiverSlv = currVP;
   
   reqData.nextReqInHashEntry = NULL;

   VMS_WL__send_sem_request( &reqData, currVP );
   
   return currVP->dataRetFromReq;
 }


/*Call this at the point a receiving task wants in-coming data.
 * Use this from-to form when know senderID -- it makes a direct channel
 * between sender and receiver.
 */
void *
VSs__receive_from_to( int32 *senderID, int32 *receiverID)
 { 
   VSsSemReq  reqData;

   reqData.reqType     = receive_from_to;

   reqData.senderID    = senderID;
   reqData.receiverID  = receiverID;
   reqData.receiverSlv = currVP;

   reqData.nextReqInHashEntry = NULL;
      DEBUG__printf2(dbgRqstHdlr,"WL: receive from %d to: %d", reqData.senderID[1], reqData.receiverID[1]);
      
   VMS_WL__send_sem_request( &reqData, currVP );

   return currVP->dataRetFromReq;
 }


//==========================================================================
//
/*A function singleton is a function whose body executes exactly once, on a
 * single core, no matter how many times the fuction is called and no
 * matter how many cores or the timing of cores calling it.
 *
 *A data singleton is a ticket attached to data.  That ticket can be used
 * to get the data through the function exactly once, no matter how many
 * times the data is given to the function, and no matter the timing of
 * trying to get the data through from different cores.
 */

/*asm function declarations*/
void asm_save_ret_to_singleton(VSsSingleton *singletonPtrAddr);
void asm_write_ret_from_singleton(VSsSingleton *singletonPtrAddr);

/*Fn singleton uses ID as index into array of singleton structs held in the
 * semantic environment.
 */
void
VSs__start_fn_singleton( int32 singletonID)
 {
   VSsSemReq  reqData;

      //
   reqData.reqType     = singleton_fn_start;
   reqData.singletonID = singletonID;

   VMS_WL__send_sem_request( &reqData, currVP );
   if( currVP->dataRetFromReq ) //will be 0 or addr of label in end singleton
    {
       VSsSemEnv *semEnv = VMS_int__give_sem_env_for( currVP );
       asm_write_ret_from_singleton(&(semEnv->fnSingletons[ singletonID]));
    }
 }

/*Data singleton hands addr of loc holding a pointer to a singleton struct.
 * The start_data_singleton makes the structure and puts its addr into the
 * location.
 */
void
VSs__start_data_singleton( VSsSingleton **singletonAddr )
 {
   VSsSemReq  reqData;

   if( *singletonAddr && (*singletonAddr)->hasFinished )
       goto JmpToEndSingleton;
   
   reqData.reqType          = singleton_data_start;
   reqData.singletonPtrAddr = singletonAddr;

   VMS_WL__send_sem_request( &reqData, currVP );
   if( currVP->dataRetFromReq ) //either 0 or end singleton's return addr
    {    //Assembly code changes the return addr on the stack to the one
         // saved into the singleton by the end-singleton-fn
         //The return addr is at 0x4(%%ebp)
        JmpToEndSingleton:
          asm_write_ret_from_singleton(*singletonAddr);
    }
   //now, simply return
   //will exit either from the start singleton call or the end-singleton call
 }

/*Uses ID as index into array of flags.  If flag already set, resumes from
 * end-label.  Else, sets flag and resumes normally.
 *
 *Note, this call cannot be inlined because the instr addr at the label
 * inside is shared by all invocations of a given singleton ID.
 */
void
VSs__end_fn_singleton( int32 singletonID )
 {
   VSsSemReq  reqData;

      //don't need this addr until after at least one singleton has reached
      // this function
   VSsSemEnv *semEnv = VMS_int__give_sem_env_for( currVP );
   asm_write_ret_from_singleton(&(semEnv->fnSingletons[ singletonID]));

   reqData.reqType     = singleton_fn_end;
   reqData.singletonID = singletonID;

   VMS_WL__send_sem_request( &reqData, currVP );

//EndSingletonInstrAddr:
   return;
 }

void
VSs__end_data_singleton(  VSsSingleton **singletonPtrAddr )
 {
   VSsSemReq  reqData;

      //don't need this addr until after singleton struct has reached
      // this function for first time
      //do assembly that saves the return addr of this fn call into the
      // data singleton -- that data-singleton can only be given to exactly
      // one instance in the code of this function.  However, can use this
      // function in different places for different data-singletons.
//   (*(singletonAddr))->endInstrAddr =  &&EndDataSingletonInstrAddr;


   asm_save_ret_to_singleton(*singletonPtrAddr);

   reqData.reqType          = singleton_data_end;
   reqData.singletonPtrAddr = singletonPtrAddr;

   VMS_WL__send_sem_request( &reqData, currVP );
 }

/*This executes the function in the masterVP, so it executes in isolation
 * from any other copies -- only one copy of the function can ever execute
 * at a time.
 *
 *It suspends to the master, and the request handler takes the function
 * pointer out of the request and calls it, then resumes the VP.
 *Only very short functions should be called this way -- for longer-running
 * isolation, use transaction-start and transaction-end, which run the code
 * between as work-code.
 */
void
VSs__animate_short_fn_in_isolation( PtrToAtomicFn ptrToFnToExecInMaster,
                                    void *data )
 {
   VSsSemReq  reqData;

      //
   reqData.reqType          = atomic;
   reqData.fnToExecInMaster = ptrToFnToExecInMaster;
   reqData.dataForFn        = data;

   VMS_WL__send_sem_request( &reqData, currVP );
 }


/*This suspends to the master.
 *First, it looks at the VP's data, to see the highest transactionID that VP
 * already has entered.  If the current ID is not larger, it throws an
 * exception stating a bug in the code.  Otherwise it puts the current ID
 * there, and adds the ID to a linked list of IDs entered -- the list is
 * used to check that exits are properly ordered.
 *Next it is uses transactionID as index into an array of transaction
 * structures.
 *If the "VP_currently_executing" field is non-null, then put requesting VP
 * into queue in the struct.  (At some point a holder will request
 * end-transaction, which will take this VP from the queue and resume it.)
 *If NULL, then write requesting into the field and resume.
 */
void
VSs__start_transaction( int32 transactionID )
 {
   VSsSemReq  reqData;

      //
   reqData.callingSlv      = currVP;
   reqData.reqType     = trans_start;
   reqData.transID     = transactionID;

   VMS_WL__send_sem_request( &reqData, currVP );
 }

/*This suspends to the master, then uses transactionID as index into an
 * array of transaction structures.
 *It looks at VP_currently_executing to be sure it's same as requesting VP.
 * If different, throws an exception, stating there's a bug in the code.
 *Next it looks at the queue in the structure.
 *If it's empty, it sets VP_currently_executing field to NULL and resumes.
 *If something in, gets it, sets VP_currently_executing to that VP, then
 * resumes both.
 */
void
VSs__end_transaction( int32 transactionID )
 {
   VSsSemReq  reqData;

      //
   reqData.callingSlv      = currVP;
   reqData.reqType     = trans_end;
   reqData.transID     = transactionID;

   VMS_WL__send_sem_request( &reqData, currVP );
 }

//======================== Internal ==================================
/*
 */
SlaveVP *
VSs__create_slave_with( TopLevelFnPtr fnPtr,   void *initData,
                        SlaveVP *creatingSlv )
 { VSsSemReq reqData;

      //the semantic request data is on the stack and disappears when this
      // call returns -- it's guaranteed to remain in the VP's stack for as
      // long as the VP is suspended.
   reqData.reqType            = 0; //know type because in a VMS create req
   reqData.coreToAssignOnto = -1; //means round-robin assign
   reqData.fnPtr              = fnPtr;
   reqData.initData           = initData;
   reqData.callingSlv             = creatingSlv;

   VMS_WL__send_create_slaveVP_req( &reqData, creatingSlv );

   return creatingSlv->dataRetFromReq;
 }

SlaveVP *
VSs__create_slave_with_affinity( TopLevelFnPtr fnPtr, void *initData,
                        SlaveVP *creatingSlv,  int32  coreToAssignOnto )
 { VSsSemReq  reqData;

      //the semantic request data is on the stack and disappears when this
      // call returns -- it's guaranteed to remain in the VP's stack for as
      // long as the VP is suspended.
   reqData.reqType            = create_slave_w_aff; //not used, May 2012
   reqData.coreToAssignOnto   = coreToAssignOnto;
   reqData.fnPtr              = fnPtr;
   reqData.initData           = initData;
   reqData.callingSlv         = creatingSlv;

   VMS_WL__send_create_slaveVP_req( &reqData, creatingSlv );

   return creatingSlv->dataRetFromReq;
 }

int __main_ret;

void __entry_point(void* _args) {
    __main_args* args = (__main_args*) _args;
    __main_ret = __program_main(args->argc, args->argv);
}

#undef main

int main(int argc, char** argv) {
    __main_args args;
    args.argc = argc;
    args.argv = argv;
    VSs__create_seed_slave_and_do_work(__entry_point, (void*) &args);
    return __main_ret;
}