memblock.cc   memblock.cc 
/* /**************************************************************************
* Copyright (C) 1997 Todd Veldhuizen <tveldhui@oonumerics.org> *
* All rights reserved. Please see <blitz/blitz.h> for terms and * blitz/memblock.cc MemoryBlock<T> and MemoryBlockReference<T> method
* conditions of use. s
* *
*/ * $Id$
*
* Copyright (C) 1997-2011 Todd Veldhuizen <tveldhui@acm.org>
*
* This file is a part of Blitz.
*
* Blitz is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* Blitz is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Blitz. If not, see <http://www.gnu.org/licenses/>.
*
* Suggestions: blitz-devel@lists.sourceforge.net
* Bugs: blitz-support@lists.sourceforge.net
*
* For more information, please see the Blitz++ Home Page:
* https://sourceforge.net/projects/blitz/
*
**************************************************************************
*/
#ifndef BZ_MEMBLOCK_CC #ifndef BZ_MEMBLOCK_CC
#define BZ_MEMBLOCK_CC #define BZ_MEMBLOCK_CC
#include <blitz/numtrait.h> #include <blitz/numtrait.h>
BZ_NAMESPACE(blitz) BZ_NAMESPACE(blitz)
// Null memory block for each (template) instantiation of MemoryBlockRefere
nce
template<typename P_type>
NullMemoryBlock<P_type> MemoryBlockReference<P_type>::nullBlock_;
template<typename P_type> template<typename P_type>
void MemoryBlock<P_type>::deallocate() void MemoryBlock<P_type>::deallocate()
{ {
#ifndef BZ_ALIGN_BLOCKS_ON_CACHELINE_BOUNDARY const sizeType byteWidth= simdTypes<P_type>::byteWidth;
delete [] dataBlockAddress_; const int cacheLineSize = BZ_L1_CACHE_LINE_SIZE;
#ifdef BZ_ALIGN_BLOCKS_ON_CACHELINE_BOUNDARY
const sizeType minLengthToAlign =
BZ_CACHE_LINES_TO_ALIGN*cacheLineSize;
#else #else
if (!NumericTypeTraits<T_type>::hasTrivialCtor) { const sizeType minLengthToAlign = blitz::huge(size_t());
for (int i=0; i < length_; ++i) #endif
data_[i].~T_type();
delete [] reinterpret_cast<char*>(dataBlockAddress_); const sizeType numBytes = length_ * sizeof(T_type);
// depending on the situation, the block was allocated as any of
// three different types, so we need to take care to get it right
// when freeing
if (!allocatedByUs_)
// the block was allocated by someone else, so it should just be freed
delete [] dataBlockAddress_;
else {
// we allocated the block, so it may have been shifted
if ((numBytes < minLengthToAlign) || (cacheLineSize%byteWidth !=0)) {
// block was not cache line shifted
delete [] dBA_tv_;
} }
else { else {
delete [] dataBlockAddress_; // block was cache line shifted, manually call destructors
if (!NumericTypeTraits<T_type>::hasTrivialCtor)
for (sizeType i=0; i < length_; ++i)
data_[i].~T_type();
delete [] dBA_char_;
} }
#endif }
} }
/** Allocate a memory block. If we're allocating a large array it may
be desireable for performance reasons to force the array to start
on a cache line boundary. We do this by allocating a little more
memory than necessary, then shifting the pointer to the next cache
line boundary. Note that this only works if the SIMD byte width of
P_type fits evenly into the cache line size, otherwise we may have
to allocate a lot more memory to be sure to get to a common
multiple of both the cache line size and the size of the
object. This can easily be the case for multicomponent containers,
and in this case, nothing is done.x */
template<typename P_type> template<typename P_type>
inline void MemoryBlock<P_type>::allocate(size_t length) inline void MemoryBlock<P_type>::allocate(sizeType length)
{ {
TAU_TYPE_STRING(p1, "MemoryBlock<T>::allocate() [T=" TAU_TYPE_STRING(p1, "MemoryBlock<T>::allocate() [T="
+ CT(P_type) + "]"); + CT(P_type) + "]");
TAU_PROFILE(p1, "void ()", TAU_BLITZ); TAU_PROFILE(p1, "void ()", TAU_BLITZ);
#ifndef BZ_ALIGN_BLOCKS_ON_CACHELINE_BOUNDARY const sizeType vecWidth= simdTypes<P_type>::vecWidth;
dataBlockAddress_ = new T_type[length]; const sizeType byteWidth= simdTypes<P_type>::byteWidth;
data_ = dataBlockAddress_; const int cacheLineSize = BZ_L1_CACHE_LINE_SIZE;
BZASSERT(length%vecWidth==0);
#ifdef BZ_ALIGN_BLOCKS_ON_CACHELINE_BOUNDARY
const sizeType minLengthToAlign =
BZ_CACHE_LINES_TO_ALIGN*cacheLineSize;
#else #else
size_t numBytes = length * sizeof(T_type); const sizeType minLengthToAlign = blitz::huge(size_t());
#endif
const sizeType numBytes = length * sizeof(T_type);
if (numBytes < 1024) //cout << "Size: " << sizeof(P_type) << ", alignment " << __alignof__(P
{ _type) << endl;
dataBlockAddress_ = new T_type[length];
data_ = dataBlockAddress_; if ((numBytes < minLengthToAlign) || (cacheLineSize%byteWidth !=0)) {
// no shifting to cache line
dBA_tv_ =
new typename simdTypes<P_type>::vecType[length/vecWidth];
data_= dataBlockAddress_;
} }
else else {
{ // shift to cache line
// We're allocating a large array. For performance reasons,
// it's advantageous to force the array to start on a dBA_char_ = new char[numBytes + cacheLineSize + 1];
// cache line boundary. We do this by allocating a little
// more memory than necessary, then shifting the pointer // Shift to the next cache line boundary.
// to the next cache line boundary. diffType offsetToCacheLine =
diffType(dBA_char_) % cacheLineSize;
// Patches by Petter Urkedal to support types with nontrivial diffType shift = (offsetToCacheLine == 0) ? 0 :
// constructors. (cacheLineSize - offsetToCacheLine);
data_char_ = dBA_char_ + shift;
const int cacheBlockSize = 128; // Will work for 32, 16 also
// Use placement new to construct types with nontrival ctors
dataBlockAddress_ = reinterpret_cast<T_type*> if (!NumericTypeTraits<T_type>::hasTrivialCtor) {
(new char[numBytes + cacheBlockSize - 1]); for (sizeType i=0; i < length; ++i)
new(&data_[i]) T_type;
// Shift to the next cache line boundary }
ptrdiff_t offset = ptrdiff_t(dataBlockAddress_) % cacheBlockSize;
ptrdiff_t shift = (offset == 0) ? 0 : (cacheBlockSize - offset);
data_ = reinterpret_cast<T_type*>
(reinterpret_cast<char*>(dataBlockAddress_) + shift);
// Use placement new to construct types with nontrival ctors
if (!NumericTypeTraits<T_type>::hasTrivialCtor) {
for (int i=0; i < length; ++i)
new(&data_[i]) T_type;
}
} }
allocatedByUs_ = true;
#ifdef BZ_DEBUG_LOG_ALLOCATIONS
cout << "MemoryBlock: allocated " << setw(8) << length
<< " at " << ((void *)dataBlockAddress_) << endl;
#endif #endif
BZASSERT(isVectorAligned(data_));
} }
BZ_NAMESPACE_END BZ_NAMESPACE_END
#endif // BZ_MEMBLOCK_CC #endif // BZ_MEMBLOCK_CC
 End of changes. 15 change blocks. 
56 lines changed or deleted 118 lines changed or added

This html diff was produced by rfcdiff 1.41. The latest version is available from http://tools.ietf.org/tools/rfcdiff/