Logo Search packages:      
Sourcecode: heaplayers version File versions

winhoard.cpp

/* -*- C++ -*- */

/*
  The Hoard Multiprocessor Memory Allocator
  www.hoard.org

  Author: Emery Berger, http://www.cs.umass.edu/~emery
 
  Copyright (c) 1998-2004, The University of Texas at Austin

  This program is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
  the Free Software Foundation; either version 2 of the License, or
  (at your option) any later version.
  
  This program is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  GNU General Public License for more details.
  
  You should have received a copy of the GNU General Public License
  along with this program; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

*/

/*

  Compile with:

  cl /LD /I../../heaplayers /nologo /Ox /DNDEBUG /D_MT /D_DLL /DWIN32 /D_WIN32 /D_WINDOWS winhoard.cpp /link /subsystem:console /dll /incremental:no

  and link usewinhoard.cpp with your executable.

 */


#include <windows.h>

#define WIN32_LEAN_AND_MEAN
#define _WIN32_WINNT 0x0500

#pragma inline_depth(255)

#pragma warning(disable: 4273)
#pragma warning(disable: 4098)  // Library conflict.
#pragma warning(disable: 4355)  // 'this' used in base member initializer list.
#pragma warning(disable: 4074)      // initializers put in compiler reserved area.

#pragma init_seg(compiler)
#pragma comment(linker, "/merge:.CRT=.data")
#pragma comment(linker, "/disallowlib:libc.lib")
#pragma comment(linker, "/disallowlib:libcd.lib")
#pragma comment(linker, "/disallowlib:libcmt.lib")
#pragma comment(linker, "/disallowlib:libcmtd.lib")
#pragma comment(linker, "/disallowlib:msvcrtd.lib")

void (*hoard_memcpy_ptr)(void *dest, const void *src, size_t count);
void (*hoard_memset_ptr)(void *dest, int c, size_t count);


// Disable lock optimization.
volatile int anyThreadCreated = 1;

/// The maximum amount of memory that each TLAB may hold, in bytes.
enum { MAX_MEMORY_PER_TLAB = 64 * 1024 };

/// The maximum number of threads supported (sort of).
enum { MaxThreads = 1024 };

/// The maximum number of heaps supported.
enum { NumHeaps = 128 };

#include "computethreadstacksize.h"
#include "cpuinfo.h"
#include "hoard.h"
#include "heapmanager.h"
#include "tlab.h"

class TheCustomHeapType :
  public HeapManager<TheLockType, HoardHeap<MaxThreads, NumHeaps> > {};

/// Return the custom (Hoard) heap.

inline static TheCustomHeapType * getCustomHeap (void) {
  // This function is C++ magic to ensure that the heap is initialized
  // before its first use.

  // Allocate a static buffer to hold the heap.
  static double thBuf[sizeof(TheCustomHeapType) / sizeof(double) + 1];

  // Now initialize the heap into that buffer.
  static TheCustomHeapType * th = new (thBuf) TheCustomHeapType;
  return th;
}

typedef ThreadLocalAllocationBuffer<bins<NoHeader, SUPERBLOCK_SIZE>::NUM_BINS,
                            bins<NoHeader, SUPERBLOCK_SIZE>::getSizeClass,
                            bins<NoHeader, SUPERBLOCK_SIZE>::getClassSize,
                            MAX_MEMORY_PER_TLAB,
                            TheCustomHeapType::SuperblockType,
                            SUPERBLOCK_SIZE,
                            TheCustomHeapType::PerThreadHeap> TLAB;

__declspec(thread) double tlabBuf[sizeof(TLAB) / sizeof(double) + 1];
__declspec(thread) TLAB * tlab;


static TLAB * getTLABslowPath (void) {
  tlab = new (tlabBuf) TLAB (&getCustomHeap()->getHeap());
  return tlab;
}

static __forceinline TLAB * getTLAB (void) {
  // We can just use thread-specific data here.
  if (tlab != NULL) {
    return tlab;
  } else {
    return getTLABslowPath();
  }
}


extern "C" size_t hoard_getsize (void * ptr)
{
  static TheCustomHeapType * theCustomHeap = getCustomHeap();
  if (ptr == NULL) {
    return 0;
  }
  return theCustomHeap->getSize(ptr);
}

// Intercept the exit functions.

static const int HOARD_MAX_EXIT_FUNCTIONS = 255;

static int exitCount = 0;

extern "C" {

  typedef void (*exitFunctionType) (void);
  exitFunctionType exitFunctionBuffer[255];

  void hoard_onexit (void (*function)(void)) {
    if (exitCount < HOARD_MAX_EXIT_FUNCTIONS) {
      exitFunctionBuffer[exitCount] = function;
      exitCount++;
    }
  }

  void hoard_exit (int code) {
    while (exitCount > 0) {
      exitCount--;
      (exitFunctionBuffer[exitCount])();
    }
  }
}

extern "C" void * hoard_malloc (size_t sz) {

  if (sz < 2 * sizeof(size_t)) {
    // Make sure it's at least big enough to hold two pointers. 
    sz = 2 * sizeof(size_t);
  }

#if 0
  // Not needed because all requests will be rounded up in the TLAB.

  // Align to a double-word boundary.
  sz = (sz + sizeof(double) - 1) & ~(sizeof(double) - 1);
#endif

  // Allocate small objects locally.
  if (sz <= TheCustomHeapType::BIG_OBJECT) {
    // Use the TLAB, if we haven't created a stack yet.
    TLAB * t = getTLAB();
    return t->malloc (sz);
  }

  {
    // Otherwise, just use the base heap.
    // Wrapped in braces here to avoid the static check
    // when not needed.
    static TheCustomHeapType * heap = getCustomHeap();
    return heap->malloc (sz);
  }
}


extern "C" void hoard_free (void * ptr) {

  // Use the TLAB for small objects, and if we haven't created a thread stack.
  size_t sz = getCustomHeap()->getSize (ptr);
  if (sz <= TheCustomHeapType::BIG_OBJECT) {
    TLAB * t = getTLAB();
    t->free (ptr);
  } else {
    static TheCustomHeapType * heap = getCustomHeap();
    heap->free (ptr);
  }
}

/*** below are generic replacement functions for the malloc family ***/

extern "C" void * hoard_calloc (size_t nelem, size_t elsize)
{
  size_t n = nelem * elsize;
  void * ptr = hoard_malloc (n);
  // Zero out the malloc'd block.
  if (ptr != NULL) {
    (hoard_memset_ptr) (ptr, 0, n);
  }
  return ptr;
}


extern "C" char * hoard_strdup (const char * s)
{
  char * newString = NULL;
  if (s != NULL) {
    if ((newString = (char *) hoard_malloc(strlen(s) + 1))) {
      strcpy(newString, s);
    }
  }
  return newString;
}


extern "C" void * hoard_realloc (void * ptr, size_t sz)
{
  static TheCustomHeapType * theCustomHeap = getCustomHeap();
  if (ptr == NULL) {
    ptr = theCustomHeap->malloc (sz);
    return ptr;
  }
  if (sz == 0) {
    theCustomHeap->free (ptr);
    return NULL;
  }

  size_t objSize = theCustomHeap->getSize(ptr);
  if (objSize >= sz) {
    return ptr;
  }
  void * buf = theCustomHeap->malloc ((size_t) (sz));

  if (buf != NULL) {
    // Copy the contents of the original object
    // up to the size of the new block.
    size_t minSize = (objSize < sz) ? objSize : sz;
    (hoard_memcpy_ptr) (buf, ptr, minSize);
  }

  // Free the old block.
  theCustomHeap->free(ptr);

  // Return a pointer to the new one.
  return buf;
}

//const char *RlsCRTLibraryName = "MSVCRT.DLL";
const char *RlsCRTLibraryName = "MSVCR71.DLL";
const char *DbgCRTLibraryName = "MSVCRTD.DLL";

#define IAX86_NEARJMP_OPCODE    0xe9
#define MakeIAX86Offset(to,from)  ((unsigned)((char*)(to)-(char*)(from)) - 5)

typedef struct
{
  const char *import;         // import name of patch routine
  FARPROC replacement;        // pointer to replacement function
  FARPROC original;           // pointer to original function
  unsigned char codebytes[5]; // 5 bytes of original code storage
} PATCH;


/* ------------------------------------------------------------------------ */

static PATCH rls_patches[] = 
  {
    // RELEASE CRT library routines supported by this memory manager.

#if 0
    {"_expand",         (FARPROC) hoard__expand,      0},
    {"_heapchk",  (FARPROC) hoard__heapchk,     0},
    {"_heapmin",  (FARPROC) hoard__heapmin,     0},
    {"_heapset",  (FARPROC) hoard__heapset,     0},
    {"_heapwalk", (FARPROC) hoard__heapwalk,    0},
#endif

    {"_onexit",         (FARPROC) hoard_onexit,    0},
    {"_exit",           (FARPROC) hoard_exit,      0},

    // operator new, new[], delete, delete[].

    {"??2@YAPAXI@Z",    (FARPROC) hoard_malloc,    0},
    {"??_U@YAPAXI@Z",   (FARPROC) hoard_malloc,    0},
    {"??3@YAXPAX@Z",    (FARPROC) hoard_free,      0},
    {"??_V@YAXPAX@Z",   (FARPROC) hoard_free,      0},

    // the nothrow variants new, new[].

    {"??2@YAPAXIABUnothrow_t@std@@@Z",  (FARPROC) hoard_malloc, 0},
    {"??_U@YAPAXIABUnothrow_t@std@@@Z", (FARPROC) hoard_malloc, 0},

    {"_msize",    (FARPROC) hoard_getsize,            0},
    {"calloc",    (FARPROC) hoard_calloc,       0},
    {"malloc",    (FARPROC) hoard_malloc,       0},
    {"realloc",   (FARPROC) hoard_realloc,            0},
    {"free",      (FARPROC) hoard_free,              0},
  };

#ifdef _DEBUG
static PATCH dbg_patches[] = 
  {
    // DEBUG CRT library routines supported by this memory manager.

    {"_calloc_dbg",               (FARPROC) hoard__calloc_dbg,0},
    {"_CrtCheckMemory",           (FARPROC) hoard__CrtCheckMemory,      0},
    {"_CrtDoForAllClientObjects", (FARPROC) hoard__CrtDoForAllClientObjects, 0},
    {"_CrtDumpMemoryLeaks",       (FARPROC) hoard__CrtDumpMemoryLeaks, 0},
    {"_CrtIsMemoryBlock",         (FARPROC) hoard__CrtIsMemoryBlock, 0},
    {"_CrtIsValidHeapPointer",        (FARPROC) hoard__CrtIsValidHeapPointer, 0},
    {"_CrtMemCheckpoint",         (FARPROC) hoard__CrtMemCheckpoint, 0},
    {"_CrtMemDifference",         (FARPROC) hoard__CrtMemDifference, 0},
    {"_CrtMemDumpAllObjectsSince",(FARPROC) hoard__CrtMemDumpAllObjectsSince, 0},
    {"_CrtMemDumpStatistics",   (FARPROC) hoard__CrtMemDumpStatistics, 0},
    {"_CrtSetAllocHook",        (FARPROC) hoard__CrtSetAllocHook, 0},
    {"_CrtSetBreakAlloc",         (FARPROC) hoard__CrtSetBreakAlloc,0},
    {"_CrtSetDbgFlag",            (FARPROC) hoard__CrtSetDbgFlag, 0},
    {"_CrtSetDumpClient",(FARPROC) hoard__CrtSetDumpClient, 0},
    {"_expand",          (FARPROC) hoard__expand, 0},
    {"_expand_dbg",      (FARPROC) hoard__expand_dbg, 0},
    {"_free_dbg",  (FARPROC) hoard__free_dbg, 0},
    {"_malloc_dbg",      (FARPROC) hoard__malloc_dbg, 0},
    {"_msize",           (FARPROC) hoard__msize, 0},
    {"_msize_dbg",       (FARPROC) hoard__msize_dbg, 0},
    {"_realloc_dbg",     (FARPROC) hoard__realloc_dbg, 0},
    {"_heapchk",   (FARPROC) hoard__heapchk,    0},
    {"_heapmin",   (FARPROC) hoard__heapmin,    0},
    {"_heapset",   (FARPROC) hoard__heapset,    0},
    {"_heapwalk",  (FARPROC) hoard__heapwalk, 0},
    {"_msize",           (FARPROC) hoard__msize, 0},
    {"calloc",           (FARPROC) hoard_calloc, 0},
    {"malloc",           (FARPROC) hoard_malloc, 0},
    {"realloc",          (FARPROC) hoard_realloc, 0},
    {"free",             (FARPROC) hoard_free, 0},

    // operator new, new[], delete, delete[].

    {"??2@YAPAXI@Z",     (FARPROC) hoard_malloc, 0},
    {"??_U@YAPAXI@Z",    (FARPROC) hoard_malloc, 0},
    {"??3@YAXPAX@Z",     (FARPROC) hoard_free,   0},
    {"??_V@YAXPAX@Z",    (FARPROC) hoard_free,   0},

    // the nothrow variants new, new[].

    {"??2@YAPAXIABUnothrow_t@std@@@Z",  (FARPROC) hoard_new_nothrow, 0},
    {"??_U@YAPAXIABUnothrow_t@std@@@Z", (FARPROC) hoard_new_nothrow, 0},

    // The debug versions of operator new & delete.

    {"??2@YAPAXIHPBDH@Z", (FARPROC) hoard_debug_operator_new, 0},
    {"??3@YAXPAXHPBDH@Z", (FARPROC) hoard_debug_operator_delete, 0},
    // And the nh_malloc_foo.

    {"_nh_malloc_dbg",   (FARPROC)hoard_nh_malloc_dbg, 0},
  };
#endif


static void PatchIt (PATCH *patch)
{
  // Change rights on CRT Library module to execute/read/write.

  MEMORY_BASIC_INFORMATION mbi_thunk;
  VirtualQuery((void*)patch->original, &mbi_thunk, 
             sizeof(MEMORY_BASIC_INFORMATION));
  VirtualProtect(mbi_thunk.BaseAddress, mbi_thunk.RegionSize, 
             PAGE_EXECUTE_READWRITE, &mbi_thunk.Protect);

  // Patch CRT library original routine:
  //  save original 5 code bytes for exit restoration
  //        write jmp <patch_routine> (5 bytes long) to original.

  memcpy(patch->codebytes, patch->original, sizeof(patch->codebytes));
  unsigned char *patchloc = (unsigned char*)patch->original;
  *patchloc++ = IAX86_NEARJMP_OPCODE;
  *(unsigned*)patchloc = MakeIAX86Offset(patch->replacement, patch->original);
      
  // Reset CRT library code to original page protection.

  VirtualProtect(mbi_thunk.BaseAddress, mbi_thunk.RegionSize, 
             mbi_thunk.Protect, &mbi_thunk.Protect);
}


static bool PatchMeIn (void)
{
  // acquire the module handles for the CRT libraries (release and debug)
  HMODULE RlsCRTLibrary = GetModuleHandle(RlsCRTLibraryName);

#ifdef _DEBUG
  HMODULE DbgCRTLibrary = GetModuleHandle(DbgCRTLibraryName);
#endif

  HMODULE DefCRTLibrary = 
#ifdef _DEBUG
    DbgCRTLibrary? DbgCRTLibrary: 
#endif      
    RlsCRTLibrary;

  // assign function pointers for required CRT support functions
#if 1
  if (DefCRTLibrary)
    {
      hoard_memcpy_ptr = (void(*)(void*,const void*,size_t))
      GetProcAddress(DefCRTLibrary, "memcpy");
      hoard_memset_ptr = (void(*)(void*,int,size_t))
      GetProcAddress(DefCRTLibrary, "memset");
    }
#endif

  // patch all relevant Release CRT Library entry points
  unsigned i;
  bool patchedRls = false;
  if (RlsCRTLibrary)
    for (i = 0; i < sizeof(rls_patches) / sizeof(*rls_patches); i++)
      if (rls_patches[i].original = GetProcAddress(RlsCRTLibrary, rls_patches[i].import))
      {
        PatchIt(&rls_patches[i]);
        patchedRls = true;
      }

#ifdef _DEBUG
  // patch all relevant Debug CRT Library entry points
  bool patchedDbg = false;
  if (DbgCRTLibrary)
    for (i = 0; i < sizeof(dbg_patches) / sizeof(*dbg_patches); i++)
      if (dbg_patches[i].original = GetProcAddress(DbgCRTLibrary, dbg_patches[i].import))
      {
        PatchIt(&dbg_patches[i]);
        patchedDbg = true;
      }

  // no point in staying loaded if we didn't patch anything...
  return patchedRls || patchedDbg;
#else
  return patchedRls;
#endif
}

extern "C" 
{
  // This global data item is used by the app-linked obj to reference
  // winhoard.dll asap in the executable. We want winhoard.dll's
  // HoardDllMain to run first.

  __declspec(dllexport) int ReferenceHoardStub;

  BOOL WINAPI HoardDllMain (HANDLE hinstDLL,
                      DWORD fdwReason,
                      LPVOID lpreserved)
  {
    void * str;
    static int np = CPUInfo::computeNumProcessors();
    int r;
    switch (fdwReason)
      {
      case DLL_PROCESS_ATTACH:
      DisableThreadLibraryCalls ((HMODULE)hinstDLL);
      r = PatchMeIn();
      return TRUE;

      case DLL_PROCESS_DETACH:
      // Notice that we haven't replaced all heap calls! Here's one now.
      str = HeapAlloc (GetProcessHeap(), 0, 1);
      return TRUE;

      case DLL_THREAD_ATTACH:
      if (np == 1) {
        // Assign the thread to heap 0.
        getCustomHeap()->chooseZero();
      } else {
        getCustomHeap()->findUnusedHeap();
      }
      // Reset the thread-local allocation buffer so it will get
      // properly initialized.
      tlab = NULL;
      return TRUE;
      break;
      
      case DLL_THREAD_DETACH:
      // Dump the memory from the TLAB.
      tlab->clear();
      if (np != 1) {
        getCustomHeap()->releaseHeap();
      }
      return TRUE;
      break;

      default:
      return TRUE;
      }
    return TRUE;
  }

} // extern "C"

Generated by  Doxygen 1.6.0   Back to index