tbbmalloc_internal.h | tbbmalloc_internal.h | |||
---|---|---|---|---|
/* | /* | |||
Copyright 2005-2012 Intel Corporation. All Rights Reserved. | Copyright 2005-2013 Intel Corporation. All Rights Reserved. | |||
This file is part of Threading Building Blocks. | This file is part of Threading Building Blocks. | |||
Threading Building Blocks is free software; you can redistribute it | Threading Building Blocks is free software; you can redistribute it | |||
and/or modify it under the terms of the GNU General Public License | and/or modify it under the terms of the GNU General Public License | |||
version 2 as published by the Free Software Foundation. | version 2 as published by the Free Software Foundation. | |||
Threading Building Blocks is distributed in the hope that it will be | Threading Building Blocks is distributed in the hope that it will be | |||
useful, but WITHOUT ANY WARRANTY; without even the implied warranty | useful, but WITHOUT ANY WARRANTY; without even the implied warranty | |||
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
skipping to change at line 108 | skipping to change at line 108 | |||
/********** Various numeric parameters controlling allocations ********/ | /********** Various numeric parameters controlling allocations ********/ | |||
/* | /* | |||
* smabSize - the size of a block for allocation of small objects, | * smabSize - the size of a block for allocation of small objects, | |||
* it must be larger than maxSegregatedObjectSize. | * it must be larger than maxSegregatedObjectSize. | |||
*/ | */ | |||
const uintptr_t slabSize = 16*1024; | const uintptr_t slabSize = 16*1024; | |||
/* | /* | |||
* Difference between object sizes in large block bins | ||||
*/ | ||||
const uint32_t largeBlockCacheStep = 8*1024; | ||||
/* | ||||
* Large blocks cache cleanup frequency. | * Large blocks cache cleanup frequency. | |||
* It should be power of 2 for the fast checking. | * It should be power of 2 for the fast checking. | |||
*/ | */ | |||
const unsigned cacheCleanupFreq = 256; | const unsigned cacheCleanupFreq = 256; | |||
/* | /* | |||
* Best estimate of cache line size, for the purpose of avoiding false shar ing. | * Best estimate of cache line size, for the purpose of avoiding false shar ing. | |||
* Too high causes memory overhead, too low causes false-sharing overhead. | * Too high causes memory overhead, too low causes false-sharing overhead. | |||
* Because, e.g., 32-bit code might run on a 64-bit system with a larger ca che line size, | * Because, e.g., 32-bit code might run on a 64-bit system with a larger ca che line size, | |||
* it would probably be better to probe at runtime where possible and/or al low for an environment variable override, | * it would probably be better to probe at runtime where possible and/or al low for an environment variable override, | |||
skipping to change at line 162 | skipping to change at line 157 | |||
~TLSKey(); | ~TLSKey(); | |||
TLSData* getThreadMallocTLS() const; | TLSData* getThreadMallocTLS() const; | |||
void setThreadMallocTLS( TLSData * newvalue ); | void setThreadMallocTLS( TLSData * newvalue ); | |||
TLSData* createTLS(MemoryPool *memPool, Backend *backend); | TLSData* createTLS(MemoryPool *memPool, Backend *backend); | |||
}; | }; | |||
// TODO: make BitMaskBasic more general | // TODO: make BitMaskBasic more general | |||
// (currenty, it fits BitMaskMin well, but not as suitable for BitMaskMax) | // (currenty, it fits BitMaskMin well, but not as suitable for BitMaskMax) | |||
template<unsigned NUM> | template<unsigned NUM> | |||
class BitMaskBasic { | class BitMaskBasic { | |||
static const int SZ = NUM/( CHAR_BIT*sizeof(uintptr_t)) + (NUM % sizeof (uintptr_t) ? 1:0); | static const int SZ = (NUM-1)/(CHAR_BIT*sizeof(uintptr_t))+1; | |||
static const unsigned WORD_LEN = CHAR_BIT*sizeof(uintptr_t); | static const unsigned WORD_LEN = CHAR_BIT*sizeof(uintptr_t); | |||
uintptr_t mask[SZ]; | uintptr_t mask[SZ]; | |||
protected: | protected: | |||
void set(size_t idx, bool val) { | void set(size_t idx, bool val) { | |||
MALLOC_ASSERT(idx<NUM, ASSERT_TEXT); | MALLOC_ASSERT(idx<NUM, ASSERT_TEXT); | |||
size_t i = idx / WORD_LEN; | size_t i = idx / WORD_LEN; | |||
int pos = WORD_LEN - idx % WORD_LEN - 1; | int pos = WORD_LEN - idx % WORD_LEN - 1; | |||
if (val) | if (val) | |||
AtomicOr(&mask[i], 1ULL << pos); | AtomicOr(&mask[i], 1ULL << pos); | |||
skipping to change at line 218 | skipping to change at line 213 | |||
public: | public: | |||
void set(size_t idx, bool val) { | void set(size_t idx, bool val) { | |||
BitMaskBasic<NUM>::set(NUM - 1 - idx, val); | BitMaskBasic<NUM>::set(NUM - 1 - idx, val); | |||
} | } | |||
int getMaxTrue(unsigned startIdx) const { | int getMaxTrue(unsigned startIdx) const { | |||
int p = BitMaskBasic<NUM>::getMinTrue(NUM-startIdx-1); | int p = BitMaskBasic<NUM>::getMinTrue(NUM-startIdx-1); | |||
return -1==p? -1 : (int)NUM - 1 - p; | return -1==p? -1 : (int)NUM - 1 - p; | |||
} | } | |||
}; | }; | |||
class LargeObjectCache { | /* cache blocks in range [MinSize; MaxSize) in bins with CacheStep | |||
TooLargeFactor -- when cache size treated "too large" in comparison to use | ||||
r data size | ||||
OnMissFactor -- If cache miss occured and cache was cleaned, | ||||
set ageThreshold to OnMissFactor * the difference | ||||
between current time and last time cache was cleaned. | ||||
LongWaitFactor -- to detect rarely-used bins and forget about their usage | ||||
history | ||||
*/ | ||||
template<size_t MIN_SIZE, size_t MAX_SIZE, uint32_t CACHE_STEP, int TOO_LAR | ||||
GE, | ||||
int ON_MISS, int LONG_WAIT> | ||||
struct LargeObjectCacheProps { | ||||
static const size_t MinSize = MIN_SIZE, MaxSize = MAX_SIZE; | ||||
static const uint32_t CacheStep = CACHE_STEP; | ||||
static const int TooLargeFactor = TOO_LARGE, OnMissFactor = ON_MISS, | ||||
LongWaitFactor = LONG_WAIT; | ||||
}; | ||||
template<typename Props> | ||||
class LargeObjectCacheImpl { | ||||
// The number of bins to cache large objects. | // The number of bins to cache large objects. | |||
#if __TBB_DEFINE_MIC | static const uint32_t numBins = (Props::MaxSize-Props::MinSize)/Props:: | |||
static const uint32_t numLargeBlockBins = 11; // for 100KB max cached s | CacheStep; | |||
ize | ||||
#else | ||||
static const uint32_t numLargeBlockBins = 1024; // for ~8MB max cached | ||||
size | ||||
#endif | ||||
typedef BitMaskMax<numLargeBlockBins> BinBitMask; | typedef BitMaskMax<numBins> BinBitMask; | |||
// Current sizes of used and cached objects. It's calculated while we a re | // Current sizes of used and cached objects. It's calculated while we a re | |||
// traversing bins, and used for isLOCTooLarge() check at the same time . | // traversing bins, and used for isLOCTooLarge() check at the same time . | |||
class BinsSummary { | class BinsSummary { | |||
size_t usedSz; | size_t usedSz; | |||
size_t cachedSz; | size_t cachedSz; | |||
public: | public: | |||
BinsSummary() : usedSz(0), cachedSz(0) {} | BinsSummary() : usedSz(0), cachedSz(0) {} | |||
// "too large" criteria | // "too large" criteria | |||
bool isLOCTooLarge() const { return cachedSz > 2*usedSz; } | bool isLOCTooLarge() const { return cachedSz > Props::TooLargeFacto r*usedSz; } | |||
void update(size_t usedSize, size_t cachedSize) { | void update(size_t usedSize, size_t cachedSize) { | |||
usedSz += usedSize; | usedSz += usedSize; | |||
cachedSz += cachedSize; | cachedSz += cachedSize; | |||
} | } | |||
void reset() { usedSz = cachedSz = 0; } | void reset() { usedSz = cachedSz = 0; } | |||
}; | }; | |||
// 2-linked list of same-size cached blocks | // 2-linked list of same-size cached blocks ordered by age (oldest on t | |||
op) | ||||
// TODO: are we really want the list to be 2-linked? This allows us | ||||
// reduce memory consumption and do less operations under lock. | ||||
// TODO: try to switch to 32-bit logical time to save space in CacheBin | ||||
// and move bins to different cache lines. | ||||
class CacheBin { | class CacheBin { | |||
LargeMemoryBlock *first, | LargeMemoryBlock *first, | |||
*last; | *last; | |||
/* age of an oldest block in the list; equal to last->age, if last define d, | /* age of an oldest block in the list; equal to last->age, if last define d, | |||
used for quick cheching it without acquiring the lock. */ | used for quick cheching it without acquiring the lock. */ | |||
uintptr_t oldest; | uintptr_t oldest; | |||
/* currAge when something was excluded out of list because of the age, | /* currAge when something was excluded out of list because of the age, | |||
not because of cache hit */ | not because of cache hit */ | |||
uintptr_t lastCleanedAge; | uintptr_t lastCleanedAge; | |||
/* Current threshold value for the blocks of a particular size. | /* Current threshold value for the blocks of a particular size. | |||
skipping to change at line 270 | skipping to change at line 282 | |||
/* total size of all objects cached in the bin */ | /* total size of all objects cached in the bin */ | |||
cachedSize; | cachedSize; | |||
/* time of last hit for the bin */ | /* time of last hit for the bin */ | |||
intptr_t lastHit; | intptr_t lastHit; | |||
/* time of last get called for the bin */ | /* time of last get called for the bin */ | |||
uintptr_t lastGet; | uintptr_t lastGet; | |||
MallocMutex lock; | MallocMutex lock; | |||
/* should be placed in zero-initialized memory, ctor not needed. */ | /* should be placed in zero-initialized memory, ctor not needed. */ | |||
CacheBin(); | CacheBin(); | |||
enum BinStatus { | ||||
NOT_CHANGED, | ||||
SET_NON_EMPTY, | ||||
SET_EMPTY | ||||
}; | ||||
void forgetOutdatedState(uintptr_t currT); | void forgetOutdatedState(uintptr_t currT); | |||
public: | public: | |||
void init() { memset(this, 0, sizeof(CacheBin)); } | void init() { memset(this, 0, sizeof(CacheBin)); } | |||
inline bool put(ExtMemoryPool *extMemPool, LargeMemoryBlock* ptr, i | LargeMemoryBlock *putList(ExtMemoryPool *extMemPool, LargeMemoryBlo | |||
nt idx); | ck *head, BinBitMask *bitMask, int idx); | |||
LargeMemoryBlock *putList(ExtMemoryPool *extMemPool, LargeMemoryBlo | inline LargeMemoryBlock *get(size_t size, uintptr_t currTime, bool | |||
ck *head, int num, | *setNonEmpty); | |||
int idx); | ||||
inline LargeMemoryBlock *get(ExtMemoryPool *extMemPool, size_t size | ||||
, int idx); | ||||
void decreaseThreshold() { | void decreaseThreshold() { | |||
if (ageThreshold) | if (ageThreshold) | |||
ageThreshold = (ageThreshold + lastHit)/2; | ageThreshold = (ageThreshold + lastHit)/2; | |||
} | } | |||
void updateBinsSummary(BinsSummary *binsSummary) const { | void updateBinsSummary(BinsSummary *binsSummary) const { | |||
binsSummary->update(usedSize, cachedSize); | binsSummary->update(usedSize, cachedSize); | |||
} | } | |||
bool cleanToThreshold(ExtMemoryPool *extMemPool, uintptr_t currTime | bool cleanToThreshold(Backend *backend, BinBitMask *bitMask, uintpt | |||
, int idx); | r_t currTime, int idx); | |||
bool cleanAll(ExtMemoryPool *extMemPool, BinBitMask *bitMask, int i | bool cleanAll(Backend *backend, BinBitMask *bitMask, int idx); | |||
dx); | ||||
void decrUsedSize(size_t size, BinBitMask *bitMask, int idx) { | void decrUsedSize(size_t size, BinBitMask *bitMask, int idx) { | |||
MallocMutex::scoped_lock scoped_cs(lock); | MallocMutex::scoped_lock scoped_cs(lock); | |||
usedSize -= size; | usedSize -= size; | |||
if (!usedSize && !first) | if (!usedSize && !first) | |||
bitMask->set(idx, false); | bitMask->set(idx, false); | |||
} | } | |||
size_t getSize() const { return cachedSize; } | size_t getSize() const { return cachedSize; } | |||
size_t getUsedSize() const { return usedSize; } | size_t getUsedSize() const { return usedSize; } | |||
size_t reportStat(int num, FILE *f); | size_t reportStat(int num, FILE *f); | |||
}; | }; | |||
intptr_t tooLargeLOC; // how many times LOC was "too large" | intptr_t tooLargeLOC; // how many times LOC was "too large" | |||
// for fast finding of used bins and bins with non-zero usedSize; | // for fast finding of used bins and bins with non-zero usedSize; | |||
// indexed from the end, as we need largest 1st | // indexed from the end, as we need largest 1st | |||
BinBitMask bitMask; | BinBitMask bitMask; | |||
// bins with lists of recently freed large blocks cached for re-use | // bins with lists of recently freed large blocks cached for re-use | |||
CacheBin bin[numLargeBlockBins]; | CacheBin bin[numBins]; | |||
public: | ||||
static int sizeToIdx(size_t size) { | static int sizeToIdx(size_t size) { | |||
// minLargeObjectSize is minimal size of a large object | MALLOC_ASSERT(Props::MinSize <= size && size < Props::MaxSize, ASSE | |||
return (size-minLargeObjectSize)/largeBlockCacheStep; | RT_TEXT); | |||
return (size-Props::MinSize)/Props::CacheStep; | ||||
} | } | |||
void addToBin(ExtMemoryPool *extMemPool, LargeMemoryBlock *toCache, int | static int getNumBins() { return numBins; } | |||
num, int idx); | ||||
LargeMemoryBlock *sort(ExtMemoryPool *extMemPool, LargeMemoryBlock *lis | void putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *largeBlock); | |||
t); | LargeMemoryBlock *get(uintptr_t currTime, size_t size); | |||
void rollbackCacheState(size_t size); | ||||
uintptr_t cleanupCacheIfNeeded(ExtMemoryPool *extMemPool, uintptr_t cur | ||||
rTime); | ||||
bool regularCleanup(Backend *backend, uintptr_t currAge); | ||||
bool cleanAll(Backend *backend); | ||||
void reset() { | ||||
tooLargeLOC = 0; | ||||
for (int i = numBins-1; i >= 0; i--) | ||||
bin[i].init(); | ||||
bitMask.reset(); | ||||
} | ||||
#if __TBB_MALLOC_LOCACHE_STAT | ||||
void reportStat(FILE *f); | ||||
#endif | ||||
#if __TBB_MALLOC_WHITEBOX_TEST | ||||
size_t getLOCSize() const; | ||||
size_t getUsedSize() const; | ||||
#endif | ||||
}; | ||||
class LargeObjectCache { | ||||
static const size_t minLargeSize = 8*1024, | ||||
maxLargeSize = 8*1024*1024, | ||||
maxHugeSize = 128*1024*1024; | ||||
public: | ||||
// Difference between object sizes in large block bins | ||||
static const uint32_t largeBlockCacheStep = 8*1024, | ||||
hugeBlockCacheStep = 512*1024; | ||||
private: | ||||
typedef LargeObjectCacheImpl< LargeObjectCacheProps<minLargeSize, maxLa | ||||
rgeSize, largeBlockCacheStep, 2, 2, 16> > LargeCacheType; | ||||
typedef LargeObjectCacheImpl< LargeObjectCacheProps<maxLargeSize, maxHu | ||||
geSize, hugeBlockCacheStep, 1, 1, 4> > HugeCacheType; | ||||
LargeCacheType largeCache; | ||||
HugeCacheType hugeCache; | ||||
/* logical time, incremented on each put/get operation | ||||
To prevent starvation between pools, keep separatly for each pool. | ||||
Overflow is OK, as we only want difference between | ||||
its current value and some recent. | ||||
Both malloc and free should increment logical time, as in | ||||
a different case multiple cached blocks would have same age, | ||||
and accuracy of predictors suffers. | ||||
*/ | ||||
uintptr_t cacheCurrTime; | ||||
static int sizeToIdx(size_t size); | ||||
bool doRegularCleanup(Backend *backend, uintptr_t currTime); | ||||
public: | public: | |||
void put(ExtMemoryPool *extMemPool, LargeMemoryBlock *largeBlock); | void put(ExtMemoryPool *extMemPool, LargeMemoryBlock *largeBlock); | |||
void putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *head); | void putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *head); | |||
LargeMemoryBlock *get(ExtMemoryPool *extMemPool, size_t size); | LargeMemoryBlock *get(Backend *backend, size_t size); | |||
void rollbackCacheState(size_t size); | void rollbackCacheState(size_t size); | |||
uintptr_t cleanupCacheIfNeeded(ExtMemoryPool *extMemPool); | void cleanupCacheIfNeeded(Backend *backend, uintptr_t currTime); | |||
bool regularCleanup(ExtMemoryPool *extMemPool, uintptr_t currAge); | void cleanupCacheIfNeededOnRange(Backend *backend, uintptr_t range, uin | |||
bool cleanAll(ExtMemoryPool *extMemPool) { | tptr_t currTime); | |||
bool released = false; | bool regularCleanup(Backend *backend) { | |||
for (int i = numLargeBlockBins-1; i >= 0; i--) | return doRegularCleanup(backend, FencedLoad((intptr_t&)cacheCurrTim | |||
released |= bin[i].cleanAll(extMemPool, &bitMask, i); | e)); | |||
return released; | ||||
} | } | |||
bool cleanAll(Backend *backend); | ||||
void reset() { | void reset() { | |||
tooLargeLOC = 0; | largeCache.reset(); | |||
for (int i = numLargeBlockBins-1; i >= 0; i--) | hugeCache.reset(); | |||
bin[i].init(); | ||||
bitMask.reset(); | ||||
} | } | |||
#if __TBB_MALLOC_LOCACHE_STAT | #if __TBB_MALLOC_LOCACHE_STAT | |||
void reportStat(FILE *f); | void reportStat(FILE *f); | |||
#endif | #endif | |||
#if __TBB_MALLOC_WHITEBOX_TEST | #if __TBB_MALLOC_WHITEBOX_TEST | |||
size_t getLOCSize() const; | size_t getLOCSize() const; | |||
size_t getUsedSize() const; | size_t getUsedSize() const; | |||
#endif | #endif | |||
static size_t alignToBin(size_t size) { | ||||
return size<maxLargeSize? alignUp(size, largeBlockCacheStep) | ||||
: alignUp(size, hugeBlockCacheStep); | ||||
} | ||||
uintptr_t getCurrTime(); | ||||
uintptr_t getCurrTimeRange(uintptr_t range); | ||||
}; | }; | |||
class BackRefIdx { // composite index to backreference array | class BackRefIdx { // composite index to backreference array | |||
private: | private: | |||
uint16_t master; // index in BackRefMaster | uint16_t master; // index in BackRefMaster | |||
uint16_t largeObj:1; // is this object "large"? | uint16_t largeObj:1; // is this object "large"? | |||
uint16_t offset :15; // offset from beginning of BackRefBlock | uint16_t offset :15; // offset from beginning of BackRefBlock | |||
public: | public: | |||
BackRefIdx() : master((uint16_t)-1) {} | BackRefIdx() : master((uint16_t)-1) {} | |||
bool isInvalid() const { return master == (uint16_t)-1; } | bool isInvalid() const { return master == (uint16_t)-1; } | |||
skipping to change at line 469 | skipping to change at line 528 | |||
enum { | enum { | |||
minBinnedSize = 8*1024UL, | minBinnedSize = 8*1024UL, | |||
/* If huge pages are available, maxBinned_HugePage used. | /* If huge pages are available, maxBinned_HugePage used. | |||
If not, maxBinned_SmallPage is the thresold. | If not, maxBinned_SmallPage is the thresold. | |||
TODO: use pool's granularity for upper bound setting.*/ | TODO: use pool's granularity for upper bound setting.*/ | |||
maxBinned_SmallPage = 1024*1024UL, | maxBinned_SmallPage = 1024*1024UL, | |||
maxBinned_HugePage = 4*1024*1024UL | maxBinned_HugePage = 4*1024*1024UL | |||
}; | }; | |||
public: | public: | |||
static const int freeBinsNum = | static const int freeBinsNum = | |||
(maxBinned_HugePage-minBinnedSize)/largeBlockCacheStep + 1; | (maxBinned_HugePage-minBinnedSize)/LargeObjectCache::largeBlockCach eStep + 1; | |||
// if previous access missed per-thread slabs pool, | // if previous access missed per-thread slabs pool, | |||
// allocate numOfSlabAllocOnMiss blocks in advance | // allocate numOfSlabAllocOnMiss blocks in advance | |||
static const int numOfSlabAllocOnMiss = 2; | static const int numOfSlabAllocOnMiss = 2; | |||
enum { | enum { | |||
NO_BIN = -1, | NO_BIN = -1, | |||
HUGE_BIN = freeBinsNum-1 | HUGE_BIN = freeBinsNum-1 | |||
}; | }; | |||
skipping to change at line 504 | skipping to change at line 563 | |||
// array of bins accomplished bitmask for fast finding of non-empty bin s | // array of bins accomplished bitmask for fast finding of non-empty bin s | |||
class IndexedBins { | class IndexedBins { | |||
BitMaskMin<Backend::freeBinsNum> bitMask; | BitMaskMin<Backend::freeBinsNum> bitMask; | |||
Bin freeBins[Backend::freeBinsNum]; | Bin freeBins[Backend::freeBinsNum]; | |||
public: | public: | |||
FreeBlock *getBlock(int binIdx, BackendSync *sync, size_t size, | FreeBlock *getBlock(int binIdx, BackendSync *sync, size_t size, | |||
bool resSlabAligned, bool alignedBin, bool wait , | bool resSlabAligned, bool alignedBin, bool wait , | |||
int *resLocked); | int *resLocked); | |||
void lockRemoveBlock(int binIdx, FreeBlock *fBlock); | void lockRemoveBlock(int binIdx, FreeBlock *fBlock); | |||
void addBlock(int binIdx, FreeBlock *fBlock, size_t blockSz); | void addBlock(int binIdx, FreeBlock *fBlock, size_t blockSz, bool a ddToTail); | |||
bool tryAddBlock(int binIdx, FreeBlock *fBlock, bool addToTail); | bool tryAddBlock(int binIdx, FreeBlock *fBlock, bool addToTail); | |||
int getMinNonemptyBin(unsigned startBin) const { | int getMinNonemptyBin(unsigned startBin) const { | |||
int p = bitMask.getMinTrue(startBin); | int p = bitMask.getMinTrue(startBin); | |||
return p == -1 ? Backend::freeBinsNum : p; | return p == -1 ? Backend::freeBinsNum : p; | |||
} | } | |||
void verify(); | void verify(); | |||
#if __TBB_MALLOC_BACKEND_STAT | #if __TBB_MALLOC_BACKEND_STAT | |||
void reportStat(FILE *f); | void reportStat(FILE *f); | |||
#endif | #endif | |||
void reset(); | void reset(); | |||
skipping to change at line 556 | skipping to change at line 615 | |||
bool askMemFromOS(size_t totalReqSize, intptr_t startModifiedCnt, | bool askMemFromOS(size_t totalReqSize, intptr_t startModifiedCnt, | |||
int *lockedBinsThreshold, | int *lockedBinsThreshold, | |||
int numOfLockedBins, bool *largeBinsUpdated); | int numOfLockedBins, bool *largeBinsUpdated); | |||
FreeBlock *genericGetBlock(int num, size_t size, bool resSlabAligned); | FreeBlock *genericGetBlock(int num, size_t size, bool resSlabAligned); | |||
void genericPutBlock(FreeBlock *fBlock, size_t blockSz); | void genericPutBlock(FreeBlock *fBlock, size_t blockSz); | |||
FreeBlock *getFromAlignedSpace(int binIdx, int num, size_t size, bool r esSlabAligned, bool wait, int *locked); | FreeBlock *getFromAlignedSpace(int binIdx, int num, size_t size, bool r esSlabAligned, bool wait, int *locked); | |||
FreeBlock *getFromBin(int binIdx, int num, size_t size, bool resSlabAli gned, int *locked); | FreeBlock *getFromBin(int binIdx, int num, size_t size, bool resSlabAli gned, int *locked); | |||
FreeBlock *doCoalesc(FreeBlock *fBlock, MemRegion **memRegion); | FreeBlock *doCoalesc(FreeBlock *fBlock, MemRegion **memRegion); | |||
void coalescAndPutList(FreeBlock *head, bool forceCoalescQDrop, bool do Stat); | void coalescAndPutList(FreeBlock *head, bool forceCoalescQDrop); | |||
bool scanCoalescQ(bool forceCoalescQDrop); | bool scanCoalescQ(bool forceCoalescQDrop); | |||
void coalescAndPut(FreeBlock *fBlock, size_t blockSz); | void coalescAndPut(FreeBlock *fBlock, size_t blockSz); | |||
void removeBlockFromBin(FreeBlock *fBlock); | void removeBlockFromBin(FreeBlock *fBlock); | |||
void *getRawMem(size_t &size) const; | void *getRawMem(size_t &size) const; | |||
void freeRawMem(void *object, size_t size) const; | void freeRawMem(void *object, size_t size) const; | |||
void putLargeBlock(LargeMemoryBlock *lmb); | ||||
public: | public: | |||
void verify(); | void verify(); | |||
#if __TBB_MALLOC_BACKEND_STAT | #if __TBB_MALLOC_BACKEND_STAT | |||
void reportStat(FILE *f); | void reportStat(FILE *f); | |||
#endif | #endif | |||
bool bootstrap(ExtMemoryPool *extMemoryPool) { | bool bootstrap(ExtMemoryPool *extMemoryPool) { | |||
extMemPool = extMemoryPool; | extMemPool = extMemoryPool; | |||
return addNewRegion(2*1024*1024, /*exact=*/false); | return addNewRegion(2*1024*1024, /*exact=*/false); | |||
} | } | |||
void reset(); | void reset(); | |||
skipping to change at line 592 | skipping to change at line 652 | |||
} | } | |||
void putSlabBlock(BlockI *block) { | void putSlabBlock(BlockI *block) { | |||
genericPutBlock((FreeBlock *)block, slabSize); | genericPutBlock((FreeBlock *)block, slabSize); | |||
} | } | |||
void *getBackRefSpace(size_t size, bool *rawMemUsed); | void *getBackRefSpace(size_t size, bool *rawMemUsed); | |||
void putBackRefSpace(void *b, size_t size, bool rawMemUsed); | void putBackRefSpace(void *b, size_t size, bool rawMemUsed); | |||
bool inUserPool() const; | bool inUserPool() const; | |||
LargeMemoryBlock *getLargeBlock(size_t size); | LargeMemoryBlock *getLargeBlock(size_t size); | |||
void putLargeBlock(LargeMemoryBlock *lmb); | void returnLargeObject(LargeMemoryBlock *lmb); | |||
AskMemFromOSCounter askMemFromOSCounter; | AskMemFromOSCounter askMemFromOSCounter; | |||
private: | private: | |||
static int sizeToBin(size_t size) { | static int sizeToBin(size_t size) { | |||
if (size >= maxBinned_HugePage) | if (size >= maxBinned_HugePage) | |||
return HUGE_BIN; | return HUGE_BIN; | |||
else if (size < minBinnedSize) | else if (size < minBinnedSize) | |||
return NO_BIN; | return NO_BIN; | |||
int bin = (size - minBinnedSize)/largeBlockCacheStep; | int bin = (size - minBinnedSize)/LargeObjectCache::largeBlockCacheS tep; | |||
MALLOC_ASSERT(bin < HUGE_BIN, "Invalid size."); | MALLOC_ASSERT(bin < HUGE_BIN, "Invalid size."); | |||
return bin; | return bin; | |||
} | } | |||
#if __TBB_MALLOC_BACKEND_STAT | #if __TBB_MALLOC_BACKEND_STAT | |||
static size_t binToSize(int bin) { | static size_t binToSize(int bin) { | |||
MALLOC_ASSERT(bin < HUGE_BIN, "Invalid bin."); | MALLOC_ASSERT(bin < HUGE_BIN, "Invalid bin."); | |||
return bin*largeBlockCacheStep + minBinnedSize; | return bin*largeBlockCacheStep + minBinnedSize; | |||
} | } | |||
#endif | #endif | |||
static bool toAlignedBin(FreeBlock *block, size_t size) { | static bool toAlignedBin(FreeBlock *block, size_t size) { | |||
return isAligned((uintptr_t)block+size, slabSize) | return isAligned((char*)block+size, slabSize) | |||
&& size >= slabSize; | && size >= slabSize; | |||
} | } | |||
inline size_t getMaxBinnedSize(); | inline size_t getMaxBinnedSize(); | |||
IndexedBins freeLargeBins, | IndexedBins freeLargeBins, | |||
freeAlignedBins; | freeAlignedBins; | |||
}; | }; | |||
class AllLargeBlocksList { | class AllLargeBlocksList { | |||
MallocMutex largeObjLock; | MallocMutex largeObjLock; | |||
skipping to change at line 668 | skipping to change at line 728 | |||
size_t granularity, bool keepAllMemory, bool fixedPool); | size_t granularity, bool keepAllMemory, bool fixedPool); | |||
void initTLS(); | void initTLS(); | |||
// i.e., not system default pool for scalable_malloc/scalable_free | // i.e., not system default pool for scalable_malloc/scalable_free | |||
bool userPool() const { return rawAlloc; } | bool userPool() const { return rawAlloc; } | |||
// true if something has beed released | // true if something has beed released | |||
bool softCachesCleanup(); | bool softCachesCleanup(); | |||
bool releaseTLCaches(); | bool releaseTLCaches(); | |||
// TODO: to release all thread's pools, not just current thread | // TODO: to release all thread's pools, not just current thread | |||
bool hardCachesCleanup() { | bool hardCachesCleanup(); | |||
// thread-local caches must be cleaned before LOC, | ||||
// because object from thread-local cache can be released to LOC | ||||
bool tlCaches = releaseTLCaches(), locCaches = loc.cleanAll(this); | ||||
return tlCaches || locCaches; | ||||
} | ||||
void reset() { | void reset() { | |||
lmbList.removeAll(&backend); | lmbList.removeAll(&backend); | |||
loc.reset(); | loc.reset(); | |||
tlsPointerKey.~TLSKey(); | tlsPointerKey.~TLSKey(); | |||
backend.reset(); | backend.reset(); | |||
} | } | |||
void destroy() { | void destroy() { | |||
// pthread_key_dtors must be disabled before memory unmapping | // pthread_key_dtors must be disabled before memory unmapping | |||
// TODO: race-free solution | // TODO: race-free solution | |||
tlsPointerKey.~TLSKey(); | tlsPointerKey.~TLSKey(); | |||
skipping to change at line 695 | skipping to change at line 750 | |||
backend.destroy(); | backend.destroy(); | |||
} | } | |||
bool mustBeAddedToGlobalLargeBlockList() const { return userPool(); } | bool mustBeAddedToGlobalLargeBlockList() const { return userPool(); } | |||
void delayRegionsReleasing(bool mode) { delayRegsReleasing = mode; } | void delayRegionsReleasing(bool mode) { delayRegsReleasing = mode; } | |||
inline bool regionsAreReleaseable() const; | inline bool regionsAreReleaseable() const; | |||
LargeMemoryBlock *mallocLargeObject(size_t allocationSize); | LargeMemoryBlock *mallocLargeObject(size_t allocationSize); | |||
void freeLargeObject(LargeMemoryBlock *lmb); | void freeLargeObject(LargeMemoryBlock *lmb); | |||
void freeLargeObjectList(LargeMemoryBlock *head); | void freeLargeObjectList(LargeMemoryBlock *head); | |||
void returnLargeObjectToBackend(LargeMemoryBlock *lmb); | ||||
static void reportHugePageStatus(bool available); | static void reportHugePageStatus(bool available); | |||
}; | }; | |||
inline bool Backend::inUserPool() const { return extMemPool->userPool(); } | inline bool Backend::inUserPool() const { return extMemPool->userPool(); } | |||
struct LargeObjectHdr { | struct LargeObjectHdr { | |||
LargeMemoryBlock *memoryBlock; | LargeMemoryBlock *memoryBlock; | |||
/* Backreference points to LargeObjectHdr. | /* Backreference points to LargeObjectHdr. | |||
Duplicated in LargeMemoryBlock to reuse in subsequent allocations. * / | Duplicated in LargeMemoryBlock to reuse in subsequent allocations. * / | |||
BackRefIdx backRefIdx; | BackRefIdx backRefIdx; | |||
End of changes. 29 change blocks. | ||||
66 lines changed or deleted | 125 lines changed or added | |||
This html diff was produced by rfcdiff 1.41. The latest version is available from http://tools.ietf.org/tools/rfcdiff/ |