/*
 *  Copyright 2009 OpenSourceCodeStewardshipFoundation.org
 *  Licensed under GNU General Public License version 2
 *
 * Author: seanhalle@yahoo.com
 *
 * Created on November 14, 2009, 9:07 PM
 */

#include <malloc.h>
#include <inttypes.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>

#include "VMS.h"
#include "Histogram/Histogram.h"

//A MallocProlog is a head element if the HigherInMem variable is NULL
//A Chunk is free if the prevChunkInFreeList variable is NULL

/*
 * This calculates the container which fits the given size.
 */
inline
uint32 getContainer(size_t size)
{
    return (log10(size)-LOG128)/LOG54;
}

/*
 * This calculates the size of a given container
 */
inline
size_t getContainerSize(uint32 idx)
{
    return pow(CHUNK_INCREASE_RATE,idx)*LOWER_BOUND;
}

/*
 * Removes the first chunk of a freeList
 * The chunk is removed but not set as free. There is no check if
 * the free list is empty, so make sure this is not the case.
 */
inline
MallocProlog *removeChunk(MallocProlog** container)
{
    MallocProlog *removedChunk = *container;
    *container = removedChunk->nextChunkInFreeList;
    
    if(removedChunk->nextChunkInFreeList)
        removedChunk->nextChunkInFreeList->prevChunkInFreeList = 
                (MallocProlog*)container;
    
    return removedChunk;
}

inline
void recycleSmallChunks(MallocProlog** container)
{
    //TODO recycle small chunks
}

/*
 * Removes a chunk from a free list.
 */
inline
void extractChunk(MallocProlog* chunk)
{
   chunk->prevChunkInFreeList->nextChunkInFreeList = chunk->nextChunkInFreeList;
   if(chunk->nextChunkInFreeList)
       chunk->nextChunkInFreeList->prevChunkInFreeList = chunk->prevChunkInFreeList;
}

/*
 * Merges two chunks.
 * Chunk A has to be before chunk B in memory. Both have to be removed from
 * a free list
 */
inline
MallocProlog *mergeChunks(MallocProlog* chunkA, MallocProlog* chunkB)
{
    chunkA->nextHigherInMem = chunkB->nextHigherInMem;
    return chunkA;
}
/*
 * Inserts a chunk into a free list.
 */
inline
void insertChunk(MallocProlog* chunk, MallocProlog** container)
{
    chunk->nextChunkInFreeList = *container;
    chunk->prevChunkInFreeList = (MallocProlog*)container;
    if(*container)
        (*container)->prevChunkInFreeList = chunk;
    *container = chunk;
}

/*
 * Divides the chunk that a new chunk of newSize is created.
 * There is no size check, so make sure the size value is valid.
 */
inline
MallocProlog *divideChunk(MallocProlog* chunk, size_t newSize)
{
    MallocProlog* newChunk = (MallocProlog*)((uintptr_t)chunk->nextHigherInMem -
            newSize - sizeof(MallocProlog));
    
    newChunk->nextLowerInMem  = chunk;
    newChunk->nextHigherInMem = chunk->nextHigherInMem;
    
    chunk->nextHigherInMem = newChunk;
    chunk->nextHigherInMem->nextLowerInMem = newChunk;
    
    return newChunk;
}

inline
size_t getChunkSize(MallocProlog* chunk)
{
    return (uintptr_t)chunk->nextHigherInMem -
            (uintptr_t)chunk - sizeof(MallocProlog);
}


/*
 * This makes 5 Chunks of the requested size.
 */
inline
void makeChunks(size_t size, MallocProlog **container)
{
    MallocArrays *freeLists = _VMSMasterEnv->freeLists;
    size_t addedSize = 5*(size + sizeof(MallocProlog));
    
    uint32 containerIdx = getContainer(addedSize)+4;
    while(freeLists->bigChunks[containerIdx] == NULL )
        containerIdx++;
    
    MallocProlog *foundChunk = removeChunk(&freeLists->bigChunks[containerIdx]);
    
    int i;
    for(i=0; i<5; i++)
    {
        insertChunk(divideChunk(foundChunk,size), container);
    }
    containerIdx = getContainer(getChunkSize(foundChunk));
    insertChunk(foundChunk, &freeLists->bigChunks[containerIdx]);
}

/*
 * This is sequential code, meant to only be called from the Master, not from
 * any slave VPs.
 */
void *VMS__malloc( size_t sizeRequested )
 {     
   //============================= MEASUREMENT STUFF ========================
   #ifdef MEAS__TIME_MALLOC
   int32 startStamp, endStamp;
   saveLowTimeStampCountInto( startStamp );
   #endif
   //========================================================================
   
   
   MallocArrays* freeList = _VMSMasterEnv->freeLists;
   
   //Return a small chunk if the requested size is smaller than 128B
   if(sizeRequested <= LOWER_BOUND)
   {
       uint32 freeListIdx = (sizeRequested-1)/32;
       if(freeList->smallChunkCount[freeListIdx] == 0)
       {
           makeChunks((freeListIdx+1)*32, &freeList->smallChunks[freeListIdx]);
           freeList->smallChunkCount[freeListIdx] += 5;
       }
       
       freeList->smallChunkCount[freeListIdx]--;
       return removeChunk(&freeList->smallChunks[freeListIdx]) + 1;
   }
   
   //Calculate the expected container. Start one higher to have a Chunk that's
   //always big enough.
   uint32 targetContainerIdx = getContainer(sizeRequested);
   uint32 containerIdx = targetContainerIdx + 1;
   
   MallocProlog* foundChunk;
   if(freeList->bigChunks[containerIdx] == NULL)
   {
       while(freeList->bigChunks[containerIdx] == NULL)
       {
           containerIdx++;
           if(containerIdx >= freeList->containerCount)
           {
               printf("VMS malloc failed: low memory");
               exit(1);
           }
       }
       
       foundChunk = removeChunk(&freeList->bigChunks[containerIdx]);
       size_t chunkSize     = getChunkSize(foundChunk);
       
       if(chunkSize > sizeRequested + sizeof(MallocProlog))
       {
           MallocProlog *newChunk = divideChunk(foundChunk,sizeRequested);
           containerIdx = getContainer(chunkSize - sizeRequested - sizeof(MallocProlog));
           insertChunk(foundChunk,&freeList->bigChunks[containerIdx]);
           foundChunk = newChunk;
       }        
   }
   else
   {
        foundChunk = removeChunk(&freeList->bigChunks[containerIdx]); 
   }
   
   //Mark as allocated
   foundChunk->prevChunkInFreeList = NULL;      
   
   //============================= MEASUREMENT STUFF ========================
   #ifdef MEAS__TIME_MALLOC
   saveLowTimeStampCountInto( endStamp );
   addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->mallocTimeHist );
   #endif
   //========================================================================
   
   //skip over the prolog by adding its size to the pointer return
   return foundChunk + 1;
 }

/*
 * This is sequential code, meant to only be called from the Master, not from
 * any slave VPs.
 */
void
VMS__free( void *ptrToFree )
 {

   //============================= MEASUREMENT STUFF ========================
   #ifdef MEAS__TIME_MALLOC
   int32 startStamp, endStamp;
   saveLowTimeStampCountInto( startStamp );
   #endif
   //========================================================================
   
   MallocArrays* freeLists = _VMSMasterEnv->freeLists;
   MallocProlog *chunkToFree = (MallocProlog*)ptrToFree - 1;
   uint32 containerIdx;

   size_t chunkSize = getChunkSize(chunkToFree);
   if(chunkSize <= LOWER_BOUND)
   {
       containerIdx = (chunkSize-1)/32;
       insertChunk(chunkToFree,&freeLists->smallChunks[containerIdx]);
       freeLists->smallChunkCount[containerIdx]++;
       
       if(freeLists->smallChunkCount[containerIdx] > 20)
           recycleSmallChunks(&freeLists->smallChunks[containerIdx]);
       
       return;
   }
   
   //Check for free neighbors
   if(chunkToFree->nextLowerInMem)
   {
       if(chunkToFree->nextLowerInMem->prevChunkInFreeList != NULL)
       {//Chunk is not allocated
           if(getChunkSize(chunkToFree->nextLowerInMem) > LOWER_BOUND)
           {
               extractChunk(chunkToFree->nextLowerInMem);
               chunkToFree = mergeChunks(chunkToFree->nextLowerInMem, chunkToFree);
           }
       }
   }
   if(chunkToFree->nextHigherInMem)
   {
       if(chunkToFree->nextHigherInMem->prevChunkInFreeList != NULL)
       {//Chunk is not allocated
           if(getChunkSize(chunkToFree->nextHigherInMem) > LOWER_BOUND)
           {
               extractChunk(chunkToFree->nextHigherInMem);
               chunkToFree = mergeChunks(chunkToFree, chunkToFree->nextHigherInMem);
           }
       }
   }

   containerIdx = getContainer(chunkSize);
   insertChunk(chunkToFree, &freeLists->bigChunks[containerIdx]);
   
   //============================= MEASUREMENT STUFF ========================
   #ifdef MEAS__TIME_MALLOC
   saveLowTimeStampCountInto( endStamp );
   addIntervalToHist( startStamp, endStamp, _VMSMasterEnv->freeTimeHist );
   #endif
   //========================================================================

 }


/*Allocates memory from the external system -- higher overhead
 *
 *Because of Linux's malloc throwing bizarre random faults when malloc is
 * used inside a VMS virtual processor, have to pass this as a request and
 * have the core loop do it when it gets around to it -- will look for these
 * chores leftover from the previous animation of masterVP the next time it
 * goes to animate the masterVP -- so it takes two separate masterVP
 * animations, separated by work, to complete an external malloc or
 * external free request.
 *
 *Thinking core loop accepts signals -- just looks if signal-location is
 * empty or not --
 */
void *
VMS__malloc_in_ext( size_t sizeRequested )
 {
 /*
      //This is running in the master, so no chance for multiple cores to be
      // competing for the core's flag.
   if(  *(_VMSMasterEnv->coreLoopSignalAddr[ 0 ]) != 0 )
    {    //something has already signalled to core loop, so save the signal
         // and look, next time master animated, to see if can send it.
         //Note, the addr to put a signal is in the coreloop's frame, so just
         // checks it each time through -- make it volatile to avoid GCC
         // optimizations -- it's a coreloop local var that only changes
         // after jumping away.  The signal includes the addr to send the
         //return to -- even if just empty return completion-signal
         //
         //save the signal in some queue that the master looks at each time
         // it starts up -- one loc says if empty for fast common case --
         //something like that -- want to hide this inside this call -- but
         // think this has to come as a request -- req handler gives procr
         // back to master loop, which gives it back to req handler at point
         // it sees that core loop has sent return signal.  Something like
         // that.
      saveTheSignal

    }
  coreSigData->type = malloc;
  coreSigData->sizeToMalloc = sizeRequested;
  coreSigData->locToSignalCompletion = &figureOut;
   _VMSMasterEnv->coreLoopSignals[ 0 ] = coreSigData;
  */
      //just risk system-stack faults until get this figured out
   return malloc( sizeRequested );
 }


/*Frees memory that was allocated in the external system -- higher overhead
 *
 *As noted in external malloc comment, this is clunky 'cause the free has
 * to be called in the core loop.
 */
void
VMS__free_in_ext( void *ptrToFree )
 {
      //just risk system-stack faults until get this figured out
   free( ptrToFree );

      //TODO: fix this -- so 
 }


/*Designed to be called from the main thread outside of VMS, during init
 */
MallocArrays *
VMS_ext__create_free_list()
{     
   //Initialize containers for small chunks and fill with zeros
   _VMSMasterEnv->freeLists = (MallocArrays*)malloc( sizeof(MallocArrays) );
   MallocArrays *freeLists = _VMSMasterEnv->freeLists;
   
   freeLists->smallChunks = 
           (MallocProlog**)malloc(SMALL_CHUNK_COUNT*sizeof(MallocProlog*));
   memset((void*)freeLists->smallChunks,
           0,SMALL_CHUNK_COUNT*sizeof(MallocProlog*));
   memset((void*)freeLists->smallChunkCount,
           0,SMALL_CHUNK_COUNT*sizeof(uint32));
   
   //Calculate number of containers for big chunks
   uint32 container = getContainer(MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE)+1;
   freeLists->bigChunks = (MallocProlog**)malloc(container*sizeof(MallocProlog*));
   memset((void*)freeLists->bigChunks,0,container*sizeof(MallocProlog*));
   freeLists->containerCount = container;
   
   //Create first element in lastContainer 
   MallocProlog *firstChunk = malloc( MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE );
   if( firstChunk == NULL ) {printf("malloc error\n"); exit(1);}
   freeLists->memSpace = firstChunk;
   
   firstChunk->nextLowerInMem = NULL;
   firstChunk->nextHigherInMem = (MallocProlog*)((uintptr_t)firstChunk +
                        MALLOC_ADDITIONAL_MEM_FROM_OS_SIZE - sizeof(MallocProlog*));
   firstChunk->nextChunkInFreeList = NULL;
   //previous element in the queue is the container
   firstChunk->prevChunkInFreeList = freeLists->bigChunks[container-1];
   
   freeLists->bigChunks[container-1] = firstChunk;
   
   //Create dummy chunk to mark the top of stack this is of course
   //never freed
   MallocProlog *dummyChunk = firstChunk->nextHigherInMem;
   dummyChunk->nextHigherInMem = dummyChunk+1;
   dummyChunk->nextLowerInMem  = NULL;
   dummyChunk->nextChunkInFreeList = NULL;
   dummyChunk->prevChunkInFreeList = NULL;
   
   return freeLists;
 }


/*Designed to be called from the main thread outside of VMS, during cleanup
 */
void
VMS_ext__free_free_list( MallocArrays *freeLists )
 {    
   free(freeLists->memSpace);
   free(freeLists->bigChunks);
   free(freeLists->smallChunks);
   
 }

