/*
 * Copyright 2010  OpenSourceCodeStewardshipFoundation
 *
 * Licensed under BSD
 */

#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>

#include "VMS.h"
#include "Queue_impl/BlockingQueue.h"


/*Setup has two phases:
 * 1) Semantic layer first calls init_VMS, which creates masterEnv, and puts
 *    the master work-unit into the work-queue
 * 2) Semantic layer then does its own init, which creates the initial
 *    work-units inside the semantic layer, ready to schedule them when
 *    asked by the first run of the masterLoop.
 *
 *This part is bit weird because VMS really wants to be "always there", and
 * have applications attach and detach..  for now, this VMS is part of
 * the app, so the VMS system starts up as part of running the app.
 *
 *The semantic layer is fully isolated from the VMS internasl by
 * making the semantic layer setup into a state that it's ready with its
 * initial work-units, ready to schedule them to slaves when the masterLoop
 * asks.  Without this pattern, the semantic layer's setup would
 * have to modify slaves directly to assign the initial work-units, and put
 * them into the workQ itself, breaking the isolation completely.
 *
 * 
 *The semantic layer creates the initial work-unit(s), and adds its
 * own environment data to masterEnv, and fills in the pointers to
 * the requestHandler and slaveScheduler plug-in functions
 *
 *This allocates VMS data structures, populates the master VMSProc,
 * and master environment, and returns the master environment to the semantic
 * layer.
 */
   //Global vars are all inside VMS.h
MasterEnv *
VMS__init(  )
 { MasterEnv  *masterEnv;
   QueueStruc *workQ;

      //Make the central work-queue
   _VMSWorkQ = makeQ();
   workQ     = _VMSWorkQ;

   _VMSMasterEnv = malloc( sizeof(MasterEnv) );
   masterEnv     = _VMSMasterEnv;

   create_master( masterEnv );

   create_sched_slots( masterEnv );

   masterEnv->schedSlots[0]->needsProcrAssigned  = FALSE;  //never checked
   masterEnv->schedSlots[0]->workIsDone          = FALSE;  //never checked
   masterEnv->schedSlots[0]->procrAssignedToSlot = masterEnv->masterVirtPr;

      //First core loop to start up gets this, which will schedule seed Pr
      //TODO: debug: check address of masterVirtPr
   writeQ( &(masterEnv->masterVirtPr), workQ );
 }


/*Fill up the master VirtProcr data structure, which is already alloc'd
 * in the masterEnv.
 * The coreLoop treats master virt pr same as the slave virt processors
 * 
 *The first time it runs, will jump to the function ptr so have to, in here,
 * create the stack, which will be used by the plug-in functions, and set
 * up __cdecl just like do for the other virtual processors.
 */
void
create_master( MasterEnv *masterEnv )
 { VirtProcr masterPr;
   char * stackLocs, stackPtr;

      //TODO: debug this to be sure got addr of struct in masterEnv correctly
   masterPr                = &(masterEnv->masterVirtPr);
   masterPr->initialData   = masterEnv;

   masterPr->nextInstrPt   = &masterLoop;

      //alloc stack locations, make stackPtr be the last addr in the locs,
      // minus room for the two parameters.  Put initData at stackPtr,
      // animatingPr just above
   stackLocs = malloc( 0x100000 ); //1 meg stack -- default Win thread's size
   stackPtr = ( (char *)stackLocs + 0x100000 - 0x8 );
   masterPr->stackPtr = stackPtr;
   masterPr->framePtr = stackPtr;
   asm volatile("movl %0, %%esp;
                 movl %1, (%%esp);
                 movl %2, $0x4(%%esp);
                 movl %%esp, %%ebp;   " /*framePtr in ebp never used*/
   /* outputs */ : 
   /* inputs  */ : "g" (stackPtr), "g" (initData),  "g" (animPr)
   /* clobber */ : 
                );
 }

void
create_sched_slots( MasterEnv *masterEnv )
 { SchedSlot  *slots;
   int i;

   slots = masterEnv->schedSlots;  //TODO: make sure this is right
   for( i = 0; i < NUM_SCHED_SLOTS; i++ )
    {
         //Set state to mean "handling requests done, slot needs filling"
      slots[i].workIsDone         = FALSE;
      slots[i].needsProcrAssigned = TRUE;
    }
 }

/*Semantic layer calls this when it want the system to start running..
 *
 *This creates the core loops, pins them to physical cores, gives them the
 * pointer to the workQ, and starts them running.
 */
 void
VMS__start()
 { int retCode, coreIdx;

//TODO: still just skeleton code -- figure out right way to do this

      //Create the PThread loops that take from work-queue, and start them
   for( coreIdx=0; coreIdx < NUM_WORKERS; coreIdx++ )
    {
      thdParams[coreIdx]        = (ThdParams *)malloc( sizeof(ThdParams) );
      thdParams[coreIdx]->workQ = _VMSWorkQ;
      thdParams[coreIdx]->id    = coreIdx;

         //Now make and start thd..  the coreLoopThds entry
         // has all the info needed to later stop the thread.
      retCode =
       pthread_create( &(coreLoopThds[coreIdx]), thdAttrs, &coreLoop,
                       (void *)(thdParams[coreIdx]) );
      if( retCode != 0 )
       { //error
         printf("ERROR creating coreLoop %d, code: %d\n", coreIdx, retCode);
         exit(-1);
       }

      pinThdToCore( );  //figure out how to specify this..

      startThd(); //look up PThread call to start the thread running, if it's
                  // not automatic
    }
 }

 /*there is a label inside this function -- save the addr of this label in
 * the callingPr struc, as the pick-up point from which to start the next
 * work-unit for that procr.  If turns out have to save registers, then
 * save them in the procr struc too.  Then do assembly jump to the CoreLoop's
 * "done with work-unit" label.  The procr struc is in the request in the
 * slave that animated the just-ended work-unit, so all the state is saved
 * there, and will get passed along, inside the request handler, to the
 * next work-unit for that procr.
 */
VMS__suspend_processor( VirtProcr *callingPr )
 { void *jmpPt;

   callingPr->nextInstrPt = &&ResumePt;

      //return ownership of the virt procr and sched slot to Master virt pr
   callingPr->schedSlot->workIsDone = TRUE;

   jmpPt    = callingPr->coreLoopStartPt;

      //put all regs in the clobber list to make sure GCC has saved all
      // so safe to jump to core loop, where they *will* get clobbered
   asm volatile("movl %%esp, %0;
                 movl %%ebp, %1;
                 jmp  %2        "
   /* outputs */ : "=m" (currPr->stackPtr), "=m" (currPr->framePtr)
   /* inputs  */ : "g" (jmpPt)
   /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi","%esi"
                );

ResumePt:
   return;
 }


/*Create stack, then create __cdecl structure on it and put initialData and
 * pointer to the new structure instance into the parameter positions on
 * the stack
 *Then put function pointer into nextInstrPt -- the stack is setup in std
 * call structure, so jumping to function ptr is same as a GCC generated
 * function call
 *No need to save registers on old stack frame, because there's no old
 * animator state to return to -- 
 *
 */
VirtProcr *
VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData )
 { VirtProcr   newPr;

   newPr              = malloc( sizeof(VirtProcr) );
   newPr->nextInstrPt = fnPtr;
   newPr->initialData = initialData;
   newPr->stackPtr    = createNewStack();
   newPr->framePtr    = newPr->stackPtr;
   put params onto stack and setup __cdecl call structure

   return newPr;
 }


/*The semantic virt procr is available in the request sent from the slave
 * 
 * The request handler has to add the work-unit created to the semantic
 * virtual processor the work-unit is a section of its time-line -- does this when create the
 * work-unit -- means the procr data struc is available in the request sent
 * from the slave, from which the new work-unit is generated..
 */
inline void
VMS__add_request_to_slave( SlaveReqst req, VirtProcr callingPr )
 { 
   req->nextRequest =  callingPr->requests;
   callingPr->requests = req;
 }