/*
 * Copyright 2010  OpenSourceCodeStewardshipFoundation
 *
 * Licensed under BSD
 */

#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>

#include "VMS.h"
#include "Queue_impl/BlockingQueue.h"


/*Setup has two phases:
 * 1) Semantic layer first calls init_VMS, which creates masterEnv, and puts
 *    the master virt procr into the work-queue, ready for first "call"
 * 2) Semantic layer then does its own init, which creates the seed virt
 *    procr inside the semantic layer, ready to schedule it when
 *    asked by the first run of the masterLoop.
 *
 *This part is bit weird because VMS really wants to be "always there", and
 * have applications attach and detach..  for now, this VMS is part of
 * the app, so the VMS system starts up as part of running the app.
 *
 *The semantic layer is isolated from the VMS internals by making the
 * semantic layer do setup to a state that it's ready with its
 * initial virt procrs, ready to schedule them to slots when the masterLoop
 * asks.  Without this pattern, the semantic layer's setup would
 * have to modify slots directly to assign the initial virt-procrs, and put
 * them into the workQ itself, breaking the isolation completely.
 *
 * 
 *The semantic layer creates the initial virt procr(s), and adds its
 * own environment to masterEnv, and fills in the pointers to
 * the requestHandler and slaveScheduler plug-in functions
 */

void
create_sched_slots( MasterEnv *masterEnv );


/*This allocates VMS data structures, populates the master VMSProc,
 * and master environment, and returns the master environment to the semantic
 * layer.
 */
void
VMS__init()
 { MasterEnv  *masterEnv;
   CASQueueStruc *workQ;

      //Make the central work-queue
   _VMSWorkQ = makeCASQ();
   workQ     = _VMSWorkQ;

   _VMSMasterEnv = malloc( sizeof(MasterEnv) );
   masterEnv     = _VMSMasterEnv;

      //create the master virtual processor
   masterEnv->masterVirtPr = VMS__create_procr( &masterLoop, masterEnv );

   create_sched_slots( masterEnv );

     //Set slot 0 to be the master virt procr & set flags just in case
   masterEnv->schedSlots[0]->needsProcrAssigned  = FALSE;  //says don't touch
   masterEnv->schedSlots[0]->workIsDone          = FALSE;  //says don't touch
   masterEnv->schedSlots[0]->procrAssignedToSlot = masterEnv->masterVirtPr;

      //First core loop to start up gets this, which will schedule seed Pr
      //TODO: debug: check address of masterVirtPr
//TODO: commented out for debugging -- put it back in!!
//   writeCASQ( masterEnv->masterVirtPr, workQ );

   numProcrsCreated = 1;
 }


void
create_sched_slots( MasterEnv *masterEnv )
 { SchedSlot  **schedSlots, **filledSlots;
   int i;

   schedSlots  = malloc( NUM_SCHED_SLOTS * sizeof(SchedSlot *) );
   filledSlots = malloc( NUM_SCHED_SLOTS * sizeof(SchedSlot *) );
   masterEnv->schedSlots  = schedSlots;
   masterEnv->filledSlots = filledSlots;

   for( i = 0; i < NUM_SCHED_SLOTS; i++ )
    {
      schedSlots[i] = malloc( sizeof(SchedSlot) );

         //Set state to mean "handling requests done, slot needs filling"
      schedSlots[i]->workIsDone         = FALSE;
      schedSlots[i]->needsProcrAssigned = TRUE;
    }
 }


/*Semantic layer calls this when it want the system to start running..
 *
 *This creates the core loops, pins them to physical cores, gives them the
 * pointer to the workQ, and starts them running.
 */
void
VMS__start()
 { int coreIdx;

   //Create the win threads that animate the core loops
   for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ )
    {
      coreLoopThdParams[coreIdx] = (ThdParams *)malloc( sizeof(ThdParams) );
      coreLoopThdParams[coreIdx]->coreNum = coreIdx;

      coreLoopThdHandles[coreIdx] =
          CreateThread ( NULL, // Security attributes
                         0, // Stack size
                         coreLoop,
                         coreLoopThdParams[coreIdx],
                         CREATE_SUSPENDED,
                         &(coreLoopThdIds[coreIdx])
                        );
      ResumeThread( coreLoopThdHandles[coreIdx] ); //starts thread
    }
 }


/*Create stack, then create __cdecl structure on it and put initialData and
 * pointer to the new structure instance into the parameter positions on
 * the stack
 *Then put function pointer into nextInstrPt -- the stack is setup in std
 * call structure, so jumping to function ptr is same as a GCC generated
 * function call
 *No need to save registers on old stack frame, because there's no old
 * animator state to return to --
 *
 */
VirtProcr *
VMS__create_procr( VirtProcrFnPtr fnPtr, void *initialData )
 { VirtProcr *newPr;
   char      *stackLocs, *stackPtr;

   newPr              = malloc( sizeof(VirtProcr) );
   newPr->procrID     = numProcrsCreated++;
   newPr->nextInstrPt = fnPtr;
   newPr->initialData = initialData;

      //alloc stack locations, make stackPtr be the highest addr minus room
      // for 2 params.  Put initData at stackPtr, animatingPr just above
   stackLocs = malloc( 0x100000 ); //1 meg stack -- default Win thread's size
   stackPtr = ( (char *)stackLocs + 0x100000 - 0x8 );
      //setup __cdecl on stack -- coreloop will switch to stackPtr before jmp
   *( (int *)stackPtr + 1) = (int) newPr;  //rightmost param -- 32bit pointer
   *( (int *)stackPtr )    = (int) initialData;  //next  param to left
   newPr->stackPtr = stackPtr; //core loop will switch to this, then
   newPr->framePtr = stackPtr; //suspend loop will save new stack & frame ptr

   return newPr;
 }


/*This inserts the semantic-layer's data into the standard VMS carrier
 */
inline void
VMS__send_sem_request( void *semReqData, VirtProcr *callingPr )
 { SlaveReqst *req;

   req = malloc( sizeof(SlaveReqst) );
   req->slaveFrom      = callingPr;
   req->semReqData     = semReqData;
   req->nextRequest    = callingPr->requests;
   callingPr->requests = req;
 }

 /*there is a label inside this function -- save the addr of this label in
 * the callingPr struc, as the pick-up point from which to start the next
 * work-unit for that procr.  If turns out have to save registers, then
 * save them in the procr struc too.  Then do assembly jump to the CoreLoop's
 * "done with work-unit" label.  The procr struc is in the request in the
 * slave that animated the just-ended work-unit, so all the state is saved
 * there, and will get passed along, inside the request handler, to the
 * next work-unit for that procr.
 */
void
VMS__suspend_processor( VirtProcr *callingPr )
 { void *jmpPt, *stackPtr, *framePtr;

   callingPr->nextInstrPt = &&ResumePt;

      //return ownership of the virt procr and sched slot to Master virt pr
   callingPr->schedSlot->workIsDone = TRUE;

   jmpPt    = callingPr->coreLoopStartPt;
   stackPtr = &(callingPr->stackPtr);
   framePtr = &(callingPr->framePtr);

      //put all regs in the clobber list to make sure GCC has saved all
      // so safe to jump to core loop, where they *will* get clobbered
   asm volatile("movl %%esp, %0; \
                 movl %%ebp, %1; \
                 jmp  %2         "
   /* outputs */ : "=g" (stackPtr), "=g" (framePtr)
   /* inputs  */ : "g" (jmpPt)
   /* clobber */ : "memory", "%eax", "%ebx", "%ecx", "%edx", "%edi","%esi"
                ); //list everything as clobbered to force GCC to save all
                   // live vars that are in regs on stack before this
                   // assembly, so that stack pointer is correct, before jmp

ResumePt:
   return;
 }

void
VMS__dissipate_animating_processor( VirtProcr *animatingPr )
 {

 }

/*This runs in main thread -- so can only signal to the core loop to shut
 * itself down --
 *
 *Want the master to decide when to shut down -- when semantic layer tells it
 * to -- say, when all the application-virtual processors have dissipated.
 *
 *Maybe return a special code from scheduling plug-in..  master checks and
 * when sees, it shuts down the core loops -- does this by scheduling a
 * special virt processor whose next instr pt is the core-end label.
 */
void
VMS__shutdown()
 { int coreIdx;
 
   //Create the win threads that animate the core loops
   for( coreIdx=0; coreIdx < NUM_CORES; coreIdx++ )
    {

    }
 }


inline TSCount getTSCount()
 { unsigned int low, high;
   TSCount  out;

   saveTimeStampCountInto( low, high );
   out = high;
   out = (out << 32) + low;
   return out;
 }