tbbmalloc_internal.h   tbbmalloc_internal.h 
/* /*
Copyright 2005-2012 Intel Corporation. All Rights Reserved. Copyright 2005-2013 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 108 skipping to change at line 108
/********** Various numeric parameters controlling allocations ********/ /********** Various numeric parameters controlling allocations ********/
/* /*
* smabSize - the size of a block for allocation of small objects, * smabSize - the size of a block for allocation of small objects,
* it must be larger than maxSegregatedObjectSize. * it must be larger than maxSegregatedObjectSize.
*/ */
const uintptr_t slabSize = 16*1024; const uintptr_t slabSize = 16*1024;
/* /*
* Difference between object sizes in large block bins
*/
const uint32_t largeBlockCacheStep = 8*1024;
/*
* Large blocks cache cleanup frequency. * Large blocks cache cleanup frequency.
* It should be power of 2 for the fast checking. * It should be power of 2 for the fast checking.
*/ */
const unsigned cacheCleanupFreq = 256; const unsigned cacheCleanupFreq = 256;
/* /*
* Best estimate of cache line size, for the purpose of avoiding false shar ing. * Best estimate of cache line size, for the purpose of avoiding false shar ing.
* Too high causes memory overhead, too low causes false-sharing overhead. * Too high causes memory overhead, too low causes false-sharing overhead.
* Because, e.g., 32-bit code might run on a 64-bit system with a larger ca che line size, * Because, e.g., 32-bit code might run on a 64-bit system with a larger ca che line size,
* it would probably be better to probe at runtime where possible and/or al low for an environment variable override, * it would probably be better to probe at runtime where possible and/or al low for an environment variable override,
skipping to change at line 162 skipping to change at line 157
~TLSKey(); ~TLSKey();
TLSData* getThreadMallocTLS() const; TLSData* getThreadMallocTLS() const;
void setThreadMallocTLS( TLSData * newvalue ); void setThreadMallocTLS( TLSData * newvalue );
TLSData* createTLS(MemoryPool *memPool, Backend *backend); TLSData* createTLS(MemoryPool *memPool, Backend *backend);
}; };
// TODO: make BitMaskBasic more general // TODO: make BitMaskBasic more general
// (currenty, it fits BitMaskMin well, but not as suitable for BitMaskMax) // (currenty, it fits BitMaskMin well, but not as suitable for BitMaskMax)
template<unsigned NUM> template<unsigned NUM>
class BitMaskBasic { class BitMaskBasic {
static const int SZ = NUM/( CHAR_BIT*sizeof(uintptr_t)) + (NUM % sizeof (uintptr_t) ? 1:0); static const int SZ = (NUM-1)/(CHAR_BIT*sizeof(uintptr_t))+1;
static const unsigned WORD_LEN = CHAR_BIT*sizeof(uintptr_t); static const unsigned WORD_LEN = CHAR_BIT*sizeof(uintptr_t);
uintptr_t mask[SZ]; uintptr_t mask[SZ];
protected: protected:
void set(size_t idx, bool val) { void set(size_t idx, bool val) {
MALLOC_ASSERT(idx<NUM, ASSERT_TEXT); MALLOC_ASSERT(idx<NUM, ASSERT_TEXT);
size_t i = idx / WORD_LEN; size_t i = idx / WORD_LEN;
int pos = WORD_LEN - idx % WORD_LEN - 1; int pos = WORD_LEN - idx % WORD_LEN - 1;
if (val) if (val)
AtomicOr(&mask[i], 1ULL << pos); AtomicOr(&mask[i], 1ULL << pos);
skipping to change at line 218 skipping to change at line 213
public: public:
void set(size_t idx, bool val) { void set(size_t idx, bool val) {
BitMaskBasic<NUM>::set(NUM - 1 - idx, val); BitMaskBasic<NUM>::set(NUM - 1 - idx, val);
} }
int getMaxTrue(unsigned startIdx) const { int getMaxTrue(unsigned startIdx) const {
int p = BitMaskBasic<NUM>::getMinTrue(NUM-startIdx-1); int p = BitMaskBasic<NUM>::getMinTrue(NUM-startIdx-1);
return -1==p? -1 : (int)NUM - 1 - p; return -1==p? -1 : (int)NUM - 1 - p;
} }
}; };
class LargeObjectCache { /* cache blocks in range [MinSize; MaxSize) in bins with CacheStep
TooLargeFactor -- when cache size treated "too large" in comparison to use
r data size
OnMissFactor -- If cache miss occured and cache was cleaned,
set ageThreshold to OnMissFactor * the difference
between current time and last time cache was cleaned.
LongWaitFactor -- to detect rarely-used bins and forget about their usage
history
*/
template<size_t MIN_SIZE, size_t MAX_SIZE, uint32_t CACHE_STEP, int TOO_LAR
GE,
int ON_MISS, int LONG_WAIT>
struct LargeObjectCacheProps {
static const size_t MinSize = MIN_SIZE, MaxSize = MAX_SIZE;
static const uint32_t CacheStep = CACHE_STEP;
static const int TooLargeFactor = TOO_LARGE, OnMissFactor = ON_MISS,
LongWaitFactor = LONG_WAIT;
};
template<typename Props>
class LargeObjectCacheImpl {
// The number of bins to cache large objects. // The number of bins to cache large objects.
#if __TBB_DEFINE_MIC static const uint32_t numBins = (Props::MaxSize-Props::MinSize)/Props::
static const uint32_t numLargeBlockBins = 11; // for 100KB max cached s CacheStep;
ize
#else
static const uint32_t numLargeBlockBins = 1024; // for ~8MB max cached
size
#endif
typedef BitMaskMax<numLargeBlockBins> BinBitMask; typedef BitMaskMax<numBins> BinBitMask;
// Current sizes of used and cached objects. It's calculated while we a re // Current sizes of used and cached objects. It's calculated while we a re
// traversing bins, and used for isLOCTooLarge() check at the same time . // traversing bins, and used for isLOCTooLarge() check at the same time .
class BinsSummary { class BinsSummary {
size_t usedSz; size_t usedSz;
size_t cachedSz; size_t cachedSz;
public: public:
BinsSummary() : usedSz(0), cachedSz(0) {} BinsSummary() : usedSz(0), cachedSz(0) {}
// "too large" criteria // "too large" criteria
bool isLOCTooLarge() const { return cachedSz > 2*usedSz; } bool isLOCTooLarge() const { return cachedSz > Props::TooLargeFacto r*usedSz; }
void update(size_t usedSize, size_t cachedSize) { void update(size_t usedSize, size_t cachedSize) {
usedSz += usedSize; usedSz += usedSize;
cachedSz += cachedSize; cachedSz += cachedSize;
} }
void reset() { usedSz = cachedSz = 0; } void reset() { usedSz = cachedSz = 0; }
}; };
// 2-linked list of same-size cached blocks // 2-linked list of same-size cached blocks ordered by age (oldest on t
op)
// TODO: are we really want the list to be 2-linked? This allows us
// reduce memory consumption and do less operations under lock.
// TODO: try to switch to 32-bit logical time to save space in CacheBin
// and move bins to different cache lines.
class CacheBin { class CacheBin {
LargeMemoryBlock *first, LargeMemoryBlock *first,
*last; *last;
/* age of an oldest block in the list; equal to last->age, if last define d, /* age of an oldest block in the list; equal to last->age, if last define d,
used for quick cheching it without acquiring the lock. */ used for quick cheching it without acquiring the lock. */
uintptr_t oldest; uintptr_t oldest;
/* currAge when something was excluded out of list because of the age, /* currAge when something was excluded out of list because of the age,
not because of cache hit */ not because of cache hit */
uintptr_t lastCleanedAge; uintptr_t lastCleanedAge;
/* Current threshold value for the blocks of a particular size. /* Current threshold value for the blocks of a particular size.
skipping to change at line 270 skipping to change at line 282
/* total size of all objects cached in the bin */ /* total size of all objects cached in the bin */
cachedSize; cachedSize;
/* time of last hit for the bin */ /* time of last hit for the bin */
intptr_t lastHit; intptr_t lastHit;
/* time of last get called for the bin */ /* time of last get called for the bin */
uintptr_t lastGet; uintptr_t lastGet;
MallocMutex lock; MallocMutex lock;
/* should be placed in zero-initialized memory, ctor not needed. */ /* should be placed in zero-initialized memory, ctor not needed. */
CacheBin(); CacheBin();
enum BinStatus {
NOT_CHANGED,
SET_NON_EMPTY,
SET_EMPTY
};
void forgetOutdatedState(uintptr_t currT); void forgetOutdatedState(uintptr_t currT);
public: public:
void init() { memset(this, 0, sizeof(CacheBin)); } void init() { memset(this, 0, sizeof(CacheBin)); }
inline bool put(ExtMemoryPool *extMemPool, LargeMemoryBlock* ptr, i LargeMemoryBlock *putList(ExtMemoryPool *extMemPool, LargeMemoryBlo
nt idx); ck *head, BinBitMask *bitMask, int idx);
LargeMemoryBlock *putList(ExtMemoryPool *extMemPool, LargeMemoryBlo inline LargeMemoryBlock *get(size_t size, uintptr_t currTime, bool
ck *head, int num, *setNonEmpty);
int idx);
inline LargeMemoryBlock *get(ExtMemoryPool *extMemPool, size_t size
, int idx);
void decreaseThreshold() { void decreaseThreshold() {
if (ageThreshold) if (ageThreshold)
ageThreshold = (ageThreshold + lastHit)/2; ageThreshold = (ageThreshold + lastHit)/2;
} }
void updateBinsSummary(BinsSummary *binsSummary) const { void updateBinsSummary(BinsSummary *binsSummary) const {
binsSummary->update(usedSize, cachedSize); binsSummary->update(usedSize, cachedSize);
} }
bool cleanToThreshold(ExtMemoryPool *extMemPool, uintptr_t currTime bool cleanToThreshold(Backend *backend, BinBitMask *bitMask, uintpt
, int idx); r_t currTime, int idx);
bool cleanAll(ExtMemoryPool *extMemPool, BinBitMask *bitMask, int i bool cleanAll(Backend *backend, BinBitMask *bitMask, int idx);
dx);
void decrUsedSize(size_t size, BinBitMask *bitMask, int idx) { void decrUsedSize(size_t size, BinBitMask *bitMask, int idx) {
MallocMutex::scoped_lock scoped_cs(lock); MallocMutex::scoped_lock scoped_cs(lock);
usedSize -= size; usedSize -= size;
if (!usedSize && !first) if (!usedSize && !first)
bitMask->set(idx, false); bitMask->set(idx, false);
} }
size_t getSize() const { return cachedSize; } size_t getSize() const { return cachedSize; }
size_t getUsedSize() const { return usedSize; } size_t getUsedSize() const { return usedSize; }
size_t reportStat(int num, FILE *f); size_t reportStat(int num, FILE *f);
}; };
intptr_t tooLargeLOC; // how many times LOC was "too large" intptr_t tooLargeLOC; // how many times LOC was "too large"
// for fast finding of used bins and bins with non-zero usedSize; // for fast finding of used bins and bins with non-zero usedSize;
// indexed from the end, as we need largest 1st // indexed from the end, as we need largest 1st
BinBitMask bitMask; BinBitMask bitMask;
// bins with lists of recently freed large blocks cached for re-use // bins with lists of recently freed large blocks cached for re-use
CacheBin bin[numLargeBlockBins]; CacheBin bin[numBins];
public:
static int sizeToIdx(size_t size) { static int sizeToIdx(size_t size) {
// minLargeObjectSize is minimal size of a large object MALLOC_ASSERT(Props::MinSize <= size && size < Props::MaxSize, ASSE
return (size-minLargeObjectSize)/largeBlockCacheStep; RT_TEXT);
return (size-Props::MinSize)/Props::CacheStep;
} }
void addToBin(ExtMemoryPool *extMemPool, LargeMemoryBlock *toCache, int static int getNumBins() { return numBins; }
num, int idx);
LargeMemoryBlock *sort(ExtMemoryPool *extMemPool, LargeMemoryBlock *lis void putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *largeBlock);
t); LargeMemoryBlock *get(uintptr_t currTime, size_t size);
void rollbackCacheState(size_t size);
uintptr_t cleanupCacheIfNeeded(ExtMemoryPool *extMemPool, uintptr_t cur
rTime);
bool regularCleanup(Backend *backend, uintptr_t currAge);
bool cleanAll(Backend *backend);
void reset() {
tooLargeLOC = 0;
for (int i = numBins-1; i >= 0; i--)
bin[i].init();
bitMask.reset();
}
#if __TBB_MALLOC_LOCACHE_STAT
void reportStat(FILE *f);
#endif
#if __TBB_MALLOC_WHITEBOX_TEST
size_t getLOCSize() const;
size_t getUsedSize() const;
#endif
};
class LargeObjectCache {
static const size_t minLargeSize = 8*1024,
maxLargeSize = 8*1024*1024,
maxHugeSize = 128*1024*1024;
public:
// Difference between object sizes in large block bins
static const uint32_t largeBlockCacheStep = 8*1024,
hugeBlockCacheStep = 512*1024;
private:
typedef LargeObjectCacheImpl< LargeObjectCacheProps<minLargeSize, maxLa
rgeSize, largeBlockCacheStep, 2, 2, 16> > LargeCacheType;
typedef LargeObjectCacheImpl< LargeObjectCacheProps<maxLargeSize, maxHu
geSize, hugeBlockCacheStep, 1, 1, 4> > HugeCacheType;
LargeCacheType largeCache;
HugeCacheType hugeCache;
/* logical time, incremented on each put/get operation
To prevent starvation between pools, keep separatly for each pool.
Overflow is OK, as we only want difference between
its current value and some recent.
Both malloc and free should increment logical time, as in
a different case multiple cached blocks would have same age,
and accuracy of predictors suffers.
*/
uintptr_t cacheCurrTime;
static int sizeToIdx(size_t size);
bool doRegularCleanup(Backend *backend, uintptr_t currTime);
public: public:
void put(ExtMemoryPool *extMemPool, LargeMemoryBlock *largeBlock); void put(ExtMemoryPool *extMemPool, LargeMemoryBlock *largeBlock);
void putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *head); void putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *head);
LargeMemoryBlock *get(ExtMemoryPool *extMemPool, size_t size); LargeMemoryBlock *get(Backend *backend, size_t size);
void rollbackCacheState(size_t size); void rollbackCacheState(size_t size);
uintptr_t cleanupCacheIfNeeded(ExtMemoryPool *extMemPool); void cleanupCacheIfNeeded(Backend *backend, uintptr_t currTime);
bool regularCleanup(ExtMemoryPool *extMemPool, uintptr_t currAge); void cleanupCacheIfNeededOnRange(Backend *backend, uintptr_t range, uin
bool cleanAll(ExtMemoryPool *extMemPool) { tptr_t currTime);
bool released = false; bool regularCleanup(Backend *backend) {
for (int i = numLargeBlockBins-1; i >= 0; i--) return doRegularCleanup(backend, FencedLoad((intptr_t&)cacheCurrTim
released |= bin[i].cleanAll(extMemPool, &bitMask, i); e));
return released;
} }
bool cleanAll(Backend *backend);
void reset() { void reset() {
tooLargeLOC = 0; largeCache.reset();
for (int i = numLargeBlockBins-1; i >= 0; i--) hugeCache.reset();
bin[i].init();
bitMask.reset();
} }
#if __TBB_MALLOC_LOCACHE_STAT #if __TBB_MALLOC_LOCACHE_STAT
void reportStat(FILE *f); void reportStat(FILE *f);
#endif #endif
#if __TBB_MALLOC_WHITEBOX_TEST #if __TBB_MALLOC_WHITEBOX_TEST
size_t getLOCSize() const; size_t getLOCSize() const;
size_t getUsedSize() const; size_t getUsedSize() const;
#endif #endif
static size_t alignToBin(size_t size) {
return size<maxLargeSize? alignUp(size, largeBlockCacheStep)
: alignUp(size, hugeBlockCacheStep);
}
uintptr_t getCurrTime();
uintptr_t getCurrTimeRange(uintptr_t range);
}; };
class BackRefIdx { // composite index to backreference array class BackRefIdx { // composite index to backreference array
private: private:
uint16_t master; // index in BackRefMaster uint16_t master; // index in BackRefMaster
uint16_t largeObj:1; // is this object "large"? uint16_t largeObj:1; // is this object "large"?
uint16_t offset :15; // offset from beginning of BackRefBlock uint16_t offset :15; // offset from beginning of BackRefBlock
public: public:
BackRefIdx() : master((uint16_t)-1) {} BackRefIdx() : master((uint16_t)-1) {}
bool isInvalid() const { return master == (uint16_t)-1; } bool isInvalid() const { return master == (uint16_t)-1; }
skipping to change at line 469 skipping to change at line 528
enum { enum {
minBinnedSize = 8*1024UL, minBinnedSize = 8*1024UL,
/* If huge pages are available, maxBinned_HugePage used. /* If huge pages are available, maxBinned_HugePage used.
If not, maxBinned_SmallPage is the thresold. If not, maxBinned_SmallPage is the thresold.
TODO: use pool's granularity for upper bound setting.*/ TODO: use pool's granularity for upper bound setting.*/
maxBinned_SmallPage = 1024*1024UL, maxBinned_SmallPage = 1024*1024UL,
maxBinned_HugePage = 4*1024*1024UL maxBinned_HugePage = 4*1024*1024UL
}; };
public: public:
static const int freeBinsNum = static const int freeBinsNum =
(maxBinned_HugePage-minBinnedSize)/largeBlockCacheStep + 1; (maxBinned_HugePage-minBinnedSize)/LargeObjectCache::largeBlockCach eStep + 1;
// if previous access missed per-thread slabs pool, // if previous access missed per-thread slabs pool,
// allocate numOfSlabAllocOnMiss blocks in advance // allocate numOfSlabAllocOnMiss blocks in advance
static const int numOfSlabAllocOnMiss = 2; static const int numOfSlabAllocOnMiss = 2;
enum { enum {
NO_BIN = -1, NO_BIN = -1,
HUGE_BIN = freeBinsNum-1 HUGE_BIN = freeBinsNum-1
}; };
skipping to change at line 504 skipping to change at line 563
// array of bins accomplished bitmask for fast finding of non-empty bin s // array of bins accomplished bitmask for fast finding of non-empty bin s
class IndexedBins { class IndexedBins {
BitMaskMin<Backend::freeBinsNum> bitMask; BitMaskMin<Backend::freeBinsNum> bitMask;
Bin freeBins[Backend::freeBinsNum]; Bin freeBins[Backend::freeBinsNum];
public: public:
FreeBlock *getBlock(int binIdx, BackendSync *sync, size_t size, FreeBlock *getBlock(int binIdx, BackendSync *sync, size_t size,
bool resSlabAligned, bool alignedBin, bool wait , bool resSlabAligned, bool alignedBin, bool wait ,
int *resLocked); int *resLocked);
void lockRemoveBlock(int binIdx, FreeBlock *fBlock); void lockRemoveBlock(int binIdx, FreeBlock *fBlock);
void addBlock(int binIdx, FreeBlock *fBlock, size_t blockSz); void addBlock(int binIdx, FreeBlock *fBlock, size_t blockSz, bool a ddToTail);
bool tryAddBlock(int binIdx, FreeBlock *fBlock, bool addToTail); bool tryAddBlock(int binIdx, FreeBlock *fBlock, bool addToTail);
int getMinNonemptyBin(unsigned startBin) const { int getMinNonemptyBin(unsigned startBin) const {
int p = bitMask.getMinTrue(startBin); int p = bitMask.getMinTrue(startBin);
return p == -1 ? Backend::freeBinsNum : p; return p == -1 ? Backend::freeBinsNum : p;
} }
void verify(); void verify();
#if __TBB_MALLOC_BACKEND_STAT #if __TBB_MALLOC_BACKEND_STAT
void reportStat(FILE *f); void reportStat(FILE *f);
#endif #endif
void reset(); void reset();
skipping to change at line 556 skipping to change at line 615
bool askMemFromOS(size_t totalReqSize, intptr_t startModifiedCnt, bool askMemFromOS(size_t totalReqSize, intptr_t startModifiedCnt,
int *lockedBinsThreshold, int *lockedBinsThreshold,
int numOfLockedBins, bool *largeBinsUpdated); int numOfLockedBins, bool *largeBinsUpdated);
FreeBlock *genericGetBlock(int num, size_t size, bool resSlabAligned); FreeBlock *genericGetBlock(int num, size_t size, bool resSlabAligned);
void genericPutBlock(FreeBlock *fBlock, size_t blockSz); void genericPutBlock(FreeBlock *fBlock, size_t blockSz);
FreeBlock *getFromAlignedSpace(int binIdx, int num, size_t size, bool r esSlabAligned, bool wait, int *locked); FreeBlock *getFromAlignedSpace(int binIdx, int num, size_t size, bool r esSlabAligned, bool wait, int *locked);
FreeBlock *getFromBin(int binIdx, int num, size_t size, bool resSlabAli gned, int *locked); FreeBlock *getFromBin(int binIdx, int num, size_t size, bool resSlabAli gned, int *locked);
FreeBlock *doCoalesc(FreeBlock *fBlock, MemRegion **memRegion); FreeBlock *doCoalesc(FreeBlock *fBlock, MemRegion **memRegion);
void coalescAndPutList(FreeBlock *head, bool forceCoalescQDrop, bool do Stat); void coalescAndPutList(FreeBlock *head, bool forceCoalescQDrop);
bool scanCoalescQ(bool forceCoalescQDrop); bool scanCoalescQ(bool forceCoalescQDrop);
void coalescAndPut(FreeBlock *fBlock, size_t blockSz); void coalescAndPut(FreeBlock *fBlock, size_t blockSz);
void removeBlockFromBin(FreeBlock *fBlock); void removeBlockFromBin(FreeBlock *fBlock);
void *getRawMem(size_t &size) const; void *getRawMem(size_t &size) const;
void freeRawMem(void *object, size_t size) const; void freeRawMem(void *object, size_t size) const;
void putLargeBlock(LargeMemoryBlock *lmb);
public: public:
void verify(); void verify();
#if __TBB_MALLOC_BACKEND_STAT #if __TBB_MALLOC_BACKEND_STAT
void reportStat(FILE *f); void reportStat(FILE *f);
#endif #endif
bool bootstrap(ExtMemoryPool *extMemoryPool) { bool bootstrap(ExtMemoryPool *extMemoryPool) {
extMemPool = extMemoryPool; extMemPool = extMemoryPool;
return addNewRegion(2*1024*1024, /*exact=*/false); return addNewRegion(2*1024*1024, /*exact=*/false);
} }
void reset(); void reset();
skipping to change at line 592 skipping to change at line 652
} }
void putSlabBlock(BlockI *block) { void putSlabBlock(BlockI *block) {
genericPutBlock((FreeBlock *)block, slabSize); genericPutBlock((FreeBlock *)block, slabSize);
} }
void *getBackRefSpace(size_t size, bool *rawMemUsed); void *getBackRefSpace(size_t size, bool *rawMemUsed);
void putBackRefSpace(void *b, size_t size, bool rawMemUsed); void putBackRefSpace(void *b, size_t size, bool rawMemUsed);
bool inUserPool() const; bool inUserPool() const;
LargeMemoryBlock *getLargeBlock(size_t size); LargeMemoryBlock *getLargeBlock(size_t size);
void putLargeBlock(LargeMemoryBlock *lmb); void returnLargeObject(LargeMemoryBlock *lmb);
AskMemFromOSCounter askMemFromOSCounter; AskMemFromOSCounter askMemFromOSCounter;
private: private:
static int sizeToBin(size_t size) { static int sizeToBin(size_t size) {
if (size >= maxBinned_HugePage) if (size >= maxBinned_HugePage)
return HUGE_BIN; return HUGE_BIN;
else if (size < minBinnedSize) else if (size < minBinnedSize)
return NO_BIN; return NO_BIN;
int bin = (size - minBinnedSize)/largeBlockCacheStep; int bin = (size - minBinnedSize)/LargeObjectCache::largeBlockCacheS tep;
MALLOC_ASSERT(bin < HUGE_BIN, "Invalid size."); MALLOC_ASSERT(bin < HUGE_BIN, "Invalid size.");
return bin; return bin;
} }
#if __TBB_MALLOC_BACKEND_STAT #if __TBB_MALLOC_BACKEND_STAT
static size_t binToSize(int bin) { static size_t binToSize(int bin) {
MALLOC_ASSERT(bin < HUGE_BIN, "Invalid bin."); MALLOC_ASSERT(bin < HUGE_BIN, "Invalid bin.");
return bin*largeBlockCacheStep + minBinnedSize; return bin*largeBlockCacheStep + minBinnedSize;
} }
#endif #endif
static bool toAlignedBin(FreeBlock *block, size_t size) { static bool toAlignedBin(FreeBlock *block, size_t size) {
return isAligned((uintptr_t)block+size, slabSize) return isAligned((char*)block+size, slabSize)
&& size >= slabSize; && size >= slabSize;
} }
inline size_t getMaxBinnedSize(); inline size_t getMaxBinnedSize();
IndexedBins freeLargeBins, IndexedBins freeLargeBins,
freeAlignedBins; freeAlignedBins;
}; };
class AllLargeBlocksList { class AllLargeBlocksList {
MallocMutex largeObjLock; MallocMutex largeObjLock;
skipping to change at line 668 skipping to change at line 728
size_t granularity, bool keepAllMemory, bool fixedPool); size_t granularity, bool keepAllMemory, bool fixedPool);
void initTLS(); void initTLS();
// i.e., not system default pool for scalable_malloc/scalable_free // i.e., not system default pool for scalable_malloc/scalable_free
bool userPool() const { return rawAlloc; } bool userPool() const { return rawAlloc; }
// true if something has beed released // true if something has beed released
bool softCachesCleanup(); bool softCachesCleanup();
bool releaseTLCaches(); bool releaseTLCaches();
// TODO: to release all thread's pools, not just current thread // TODO: to release all thread's pools, not just current thread
bool hardCachesCleanup() { bool hardCachesCleanup();
// thread-local caches must be cleaned before LOC,
// because object from thread-local cache can be released to LOC
bool tlCaches = releaseTLCaches(), locCaches = loc.cleanAll(this);
return tlCaches || locCaches;
}
void reset() { void reset() {
lmbList.removeAll(&backend); lmbList.removeAll(&backend);
loc.reset(); loc.reset();
tlsPointerKey.~TLSKey(); tlsPointerKey.~TLSKey();
backend.reset(); backend.reset();
} }
void destroy() { void destroy() {
// pthread_key_dtors must be disabled before memory unmapping // pthread_key_dtors must be disabled before memory unmapping
// TODO: race-free solution // TODO: race-free solution
tlsPointerKey.~TLSKey(); tlsPointerKey.~TLSKey();
skipping to change at line 695 skipping to change at line 750
backend.destroy(); backend.destroy();
} }
bool mustBeAddedToGlobalLargeBlockList() const { return userPool(); } bool mustBeAddedToGlobalLargeBlockList() const { return userPool(); }
void delayRegionsReleasing(bool mode) { delayRegsReleasing = mode; } void delayRegionsReleasing(bool mode) { delayRegsReleasing = mode; }
inline bool regionsAreReleaseable() const; inline bool regionsAreReleaseable() const;
LargeMemoryBlock *mallocLargeObject(size_t allocationSize); LargeMemoryBlock *mallocLargeObject(size_t allocationSize);
void freeLargeObject(LargeMemoryBlock *lmb); void freeLargeObject(LargeMemoryBlock *lmb);
void freeLargeObjectList(LargeMemoryBlock *head); void freeLargeObjectList(LargeMemoryBlock *head);
void returnLargeObjectToBackend(LargeMemoryBlock *lmb);
static void reportHugePageStatus(bool available); static void reportHugePageStatus(bool available);
}; };
inline bool Backend::inUserPool() const { return extMemPool->userPool(); } inline bool Backend::inUserPool() const { return extMemPool->userPool(); }
struct LargeObjectHdr { struct LargeObjectHdr {
LargeMemoryBlock *memoryBlock; LargeMemoryBlock *memoryBlock;
/* Backreference points to LargeObjectHdr. /* Backreference points to LargeObjectHdr.
Duplicated in LargeMemoryBlock to reuse in subsequent allocations. * / Duplicated in LargeMemoryBlock to reuse in subsequent allocations. * /
BackRefIdx backRefIdx; BackRefIdx backRefIdx;
 End of changes. 29 change blocks. 
66 lines changed or deleted 125 lines changed or added

This html diff was produced by rfcdiff 1.41. The latest version is available from http://tools.ietf.org/tools/rfcdiff/