Efficient memory management with Delphi

The commonly used way of dealing with large amounts of data in non manageable programming languages like C/C++ and Delphi is using of custom memory managers with the following API.

  TItemStorage = class
  public
    constructor Create(ItemSize:integer);
    function GetNewItem: pointer;
    property Top: integer read GetTop;
    property Item[i: integer]: pointer read GetItem; default;
    procedure Clear;
  end;

What it actually is just a dynamic array, but a very efficient dynamic array. And unlike standard dynamic arrays this class guarantees that if you put an item in it the pointer to the item stays the same all the time no matter how many items you add after this. The trick is to allocate new memory in fixed sized blocks which can reside sufficient amount of items.

function TItemStorage.GetNewItem: pointer;
var
  i, bi, ii: integer;
begin
  i := AtomicAdd(1, fItemCnt);
  bi := i div fBlockCapacity;
  ii := i mod fBlockCapacity;
  if bi>fTopBlock then begin
    ReallocMem(fBlocks, (bi+1)*SizeOf(pointer));
    fBlocks[bi] := AllocMem(fBlockSize);;
    inc(fTopBlock);
  end;
  STIntPtr(result) := STIntPtr(fBlocks[bi]) + ii*fItemSize;
end;

function TItemStorage.GetItem(i: integer): pointer;
var
  bi, ii: integer;
begin
  bi := i div fBlockCapacity;
  ii := i mod fBlockCapacity;
  STIntPtr(result) := STIntPtr(fBlocks[bi])+ii*fItemSize;
end;

The important moment which often lacks of attention here is the AllocMem function. Everybody just use the standard GetMem(var p: pointer; Size: NativeInt) function for this. But I discovered that the standard delphi memory manager deals very bad with large blocks of memory. It tends to not free the virtual address space (VAS) after you call FreeMem. For example, In SprutCAM when a user generated a toolpath on a complex part and a massive amounts of memory were allocated for temporal data structures needed in the calculation process and than the user hit the reset button, the VAS which was allocated during the calculation process was not freed. The software just consumed more and more memory with every new toolpath generation.

The solution is to use the Win32 API VirtualAlloc/VirtualFree functions. Unfortunately the minimum size of a block which can allocate the VirtualAlloc function is 64kbytes, and this is too much for our needs. So we need a memory manager for a memory manager) which will split 64kbytes blocks into smaller chunks. I chose the size of 1Kb, which is big enough to reside such data as points, triangles etc, and small enough to not waste too much space if the number of used items in an array is small.

So I wrote such a class.

  TSTMemoryManager = class
  protected type
    TFreeBlockArray = array of pointer;
  protected
    fLock: integer;
    fFreeBlocks: TFreeBlockArray;
    fTopFreeBlock: integer;
    CntBlocks: integer;
    function AddFreeBlock(p: pointer): integer;
    function ExtractFreeBlock(p: pointer): integer;
    const BlockSize=64*1024;
    const PageSize=1024;
    function BlockInfo(p: pointer): PInt64;
    function ChunkInfo(p: pointer): PByte;
  public
    const ChunkSize=PageSize-SizeOf(int64);
    constructor Create;
    function AllocMem: pointer;
    function FreeMem(p: pointer): boolean;
  end;

function TSTMemoryManager.AllocMem: pointer;
var
  Block: pointer;
  bi: PInt64;
  i: integer;
begin
  Lock(fLock);
  try
    if fTopFreeBlock<0 then begin
      Block := VirtualAlloc(nil, BlockSize, MEM_RESERVE OR MEM_COMMIT, 
        PAGE_READWRITE);
      BlockInfo(Block)^ := -1;
      AddFreeBlock(Block);
      inc(CntBlocks);
    end else
      Block := fFreeBlocks[fTopFreeBlock];
    ASSERT((STIntPtr(Block) AND $FFFF)=0);
    bi := BlockInfo(Block);
    i := FetchFreeChunk(bi);
    ASSERT((i>=0) and (i<=63));
    if bi^=0 then
      dec(fTopFreeBlock);
    result := pointer(STIntPtr(Block)+i*PageSize);
  finally
    UnLock(fLock);
  end;
end;

function TSTMemoryManager.FreeMem(p: pointer): boolean;
var
  CI: byte;
  Block: pointer;
  bi: PInt64;
begin
  result := p<>nil;
  if result then begin
    Lock(fLock);
    try
      {$IFDEF CPUX64}
      Block := pointer(STIntPtr(p) AND $FFFFFFFFFFFF0000);
      {$ELSE}
      Block := pointer(STIntPtr(p) AND $FFFF0000);
      {$ENDIF}
      ci := (STIntPtr(p)-STIntPtr(Block)) div PageSize;
      ASSERT(ci<=63);
      ASSERT((STIntPtr(Block) AND $FFFF)=0);
      bi := BlockInfo(Block);
      if bi^=0 then
        AddFreeBlock(Block);
      FreeChunk(bi, ci);
      if bi^=-1 then begin
        ExtractFreeBlock(Block);
        VirtualFree(Block, 0, MEM_RELEASE);
        dec(CntBlocks);
      end;
    finally
      UnLock(fLock);
    end;
  end;
end;

function TSTMemoryManager.BlockInfo(p: pointer): PInt64;
begin
  result := pointer(NativeInt(P)+BlockSize-SizeOf(Int64));
end;

function FetchFreeChunk(bi: PInt64): integer;
asm
{$IFDEF CPUX64}
  mov rdx, [bi]
  BSF rax, rdx
  JZ @End;
  BTR[rcx], rax;
@End:
{$ELSE}
  mov edx, [eax]
  BSF ecx, edx
  JNZ @End32
  add eax, 4;
  mov edx, [eax];
  BSF ecx, edx
  JZ @End64
  BTR[eax], ecx;
@End64:
  mov eax, ecx;
  add eax, 32;
  ret
@End32:
  BTR[eax], ecx;
  mov eax, ecx;
{$ENDIF}
end;

function TSTMemoryManager.AddFreeBlock(p: pointer): integer;
begin
  fTopFreeBlock := fTopFreeBlock+1;
  result := fTopFreeBlock;
  if result>=Length(fFreeBlocks) then
    SetLength(fFreeBlocks, result+10);
  fFreeBlocks[result] := p;
end;

function TSTMemoryManager.ChunkInfo(p: pointer): PByte;
begin
  result := pointer(NativeInt(p)+PageSize-9);
end;

function TSTMemoryManager.ExtractFreeBlock(p: pointer): integer;
begin
  result := fTopFreeBlock;
  while (result>=0) and (fFreeBlocks[result]<>p) do
    dec(result);
  if fFreeBlocks[result]=p then begin
    fFreeBlocks[result] := fFreeBlocks[fTopFreeBlock];
    dec(fTopFreeBlock);
  end;
end;

procedure FreeChunk(Block: Pint64; iChunk: integer);
asm
  BTS [Block], iChunk;
end;

The class works the following way. With VirtualAlloc we allocate a block of 64kbytes and put it into fFreeBlocks, in the end of the block we store a 64bit mask every bit of which says which one of 64 block pages are currently free (we initialize it with Int64(-1)) . If the bit mask becomes zero in the AllocMem function it means the whole block is used and we remove it from the fFreeBlocks array, if the int64(mask) becomes –1 in the FreeMem function it means the whole block is free and we release it using the VirtualFree function.

After replacing the standard GetMem function with this class AllocMem routine SprutCAM now frees all the memory it uses during toolpath generation. Here is the source code. http://dl.dropbox.com/u/45498379/PageStorage.pas

Advertisements
Efficient memory management with Delphi

2 thoughts on “Efficient memory management with Delphi

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s