_tbb_windef.h   _tbb_windef.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 30 skipping to change at line 30
library without restriction. Specifically, if other files instantiate library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you comp ile templates or use macros or inline functions from this file, or you comp ile
this file and link it with other files to produce an executable, this this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered b y invalidate any other reasons why the executable file might be covered b y
the GNU General Public License. the GNU General Public License.
*/ */
#ifndef __TBB_tbb_windef_H #ifndef __TBB_tbb_windef_H
#error Do not #include this file directly. Use "#include tbb/tbb_stddef.h" instead. #error Do not #include this internal file directly; use public TBB headers instead.
#endif /* __TBB_tbb_windef_H */ #endif /* __TBB_tbb_windef_H */
// Check that the target Windows version has all API calls requried for TBB . // Check that the target Windows version has all API calls requried for TBB .
// Do not increase the version in condition beyond 0x0500 without prior dis cussion! // Do not increase the version in condition beyond 0x0500 without prior dis cussion!
#if defined(_WIN32_WINNT) && _WIN32_WINNT<0x0400 #if defined(_WIN32_WINNT) && _WIN32_WINNT<0x0400
#error TBB is unable to run on old Windows versions; _WIN32_WINNT must be 0 x0400 or greater. #error TBB is unable to run on old Windows versions; _WIN32_WINNT must be 0 x0400 or greater.
#endif #endif
#if !defined(_MT) #if !defined(_MT)
#error TBB requires linkage with multithreaded C/C++ runtime library. \ #error TBB requires linkage with multithreaded C/C++ runtime library. \
skipping to change at line 63 skipping to change at line 63
#ifdef TBB_USE_DEBUG #ifdef TBB_USE_DEBUG
# if TBB_USE_DEBUG # if TBB_USE_DEBUG
# if !defined(_DEBUG) # if !defined(_DEBUG)
# pragma message(__FILE__ "(" __TBB_STRING(__LINE__) ") : Warnin g: Recommend using /MDd if compiling with TBB_USE_DEBUG!=0") # pragma message(__FILE__ "(" __TBB_STRING(__LINE__) ") : Warnin g: Recommend using /MDd if compiling with TBB_USE_DEBUG!=0")
# endif # endif
# else # else
# if defined(_DEBUG) # if defined(_DEBUG)
# pragma message(__FILE__ "(" __TBB_STRING(__LINE__) ") : Warnin g: Recommend using /MD if compiling with TBB_USE_DEBUG==0") # pragma message(__FILE__ "(" __TBB_STRING(__LINE__) ") : Warnin g: Recommend using /MD if compiling with TBB_USE_DEBUG==0")
# endif # endif
# endif # endif
#else
# ifdef _DEBUG
# define TBB_USE_DEBUG 1
# endif
#endif #endif
#if __TBB_BUILD && !defined(__TBB_NO_IMPLICIT_LINKAGE) #if __TBB_BUILD && !defined(__TBB_NO_IMPLICIT_LINKAGE)
#define __TBB_NO_IMPLICIT_LINKAGE 1 #define __TBB_NO_IMPLICIT_LINKAGE 1
#endif #endif
#if _MSC_VER #if _MSC_VER
#if !__TBB_NO_IMPLICIT_LINKAGE #if !__TBB_NO_IMPLICIT_LINKAGE
#ifdef _DEBUG #ifdef __TBB_LIB_NAME
#pragma comment(lib, "tbb_debug.lib") #pragma comment(lib, __TBB_STRING(__TBB_LIB_NAME))
#else #else
#pragma comment(lib, "tbb.lib") #ifdef _DEBUG
#pragma comment(lib, "tbb_debug.lib")
#else
#pragma comment(lib, "tbb.lib")
#endif
#endif #endif
#endif #endif
#endif #endif
 End of changes. 5 change blocks. 
9 lines changed or deleted 9 lines changed or added


 aligned_space.h   aligned_space.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 47 skipping to change at line 47
//! Block of space aligned sufficiently to construct an array T with N elem ents. //! Block of space aligned sufficiently to construct an array T with N elem ents.
/** The elements are not constructed or destroyed by this class. /** The elements are not constructed or destroyed by this class.
@ingroup memory_allocation */ @ingroup memory_allocation */
template<typename T,size_t N> template<typename T,size_t N>
class aligned_space { class aligned_space {
private: private:
typedef __TBB_TypeWithAlignmentAtLeastAsStrict(T) element_type; typedef __TBB_TypeWithAlignmentAtLeastAsStrict(T) element_type;
element_type array[(sizeof(T)*N+sizeof(element_type)-1)/sizeof(element_ type)]; element_type array[(sizeof(T)*N+sizeof(element_type)-1)/sizeof(element_ type)];
public: public:
//! Pointer to beginning of array //! Pointer to beginning of array
T* begin() {return reinterpret_cast<T*>(this);} T* begin() {return internal::punned_cast<T*>(this);}
//! Pointer to one past last element in array. //! Pointer to one past last element in array.
T* end() {return begin()+N;} T* end() {return begin()+N;}
}; };
} // namespace tbb } // namespace tbb
#endif /* __TBB_aligned_space_H */ #endif /* __TBB_aligned_space_H */
 End of changes. 2 change blocks. 
2 lines changed or deleted 2 lines changed or added


 atomic.h   atomic.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 53 skipping to change at line 53
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
// Workaround for overzealous compiler warnings // Workaround for overzealous compiler warnings
#pragma warning (push) #pragma warning (push)
#pragma warning (disable: 4244 4267) #pragma warning (disable: 4244 4267)
#endif #endif
namespace tbb { namespace tbb {
//! Specifies memory fencing. //! Specifies memory fencing.
enum memory_semantics { enum memory_semantics {
//! For internal use only. //! Sequentially consistent fence.
__TBB_full_fence, full_fence,
//! Acquire fence //! Acquire fence
acquire, acquire,
//! Release fence //! Release fence
release release,
//! No ordering
relaxed
}; };
//! @cond INTERNAL //! @cond INTERNAL
namespace internal { namespace internal {
#if __GNUC__ || __SUNPRO_CC #if __TBB_ATTRIBUTE_ALIGNED_PRESENT
#define __TBB_DECL_ATOMIC_FIELD(t,f,a) t f __attribute__ ((aligned(a))); #define __TBB_DECL_ATOMIC_FIELD(t,f,a) t f __attribute__ ((aligned(a))
#elif defined(__INTEL_COMPILER)||_MSC_VER >= 1300 );
#define __TBB_DECL_ATOMIC_FIELD(t,f,a) __declspec(align(a)) t f; #elif __TBB_DECLSPEC_ALIGN_PRESENT
#define __TBB_DECL_ATOMIC_FIELD(t,f,a) __declspec(align(a)) t f;
#else #else
#error Do not know syntax for forcing alignment. #error Do not know syntax for forcing alignment.
#endif /* __GNUC__ */ #endif
template<size_t S> template<size_t S>
struct atomic_rep; // Primary template declared, but never define d. struct atomic_rep; // Primary template declared, but never define d.
template<> template<>
struct atomic_rep<1> { // Specialization struct atomic_rep<1> { // Specialization
typedef int8_t word; typedef int8_t word;
int8_t value; int8_t value;
}; };
template<> template<>
skipping to change at line 95 skipping to change at line 97
template<> template<>
struct atomic_rep<4> { // Specialization struct atomic_rep<4> { // Specialization
#if _MSC_VER && __TBB_WORDSIZE==4 #if _MSC_VER && __TBB_WORDSIZE==4
// Work-around that avoids spurious /Wp64 warnings // Work-around that avoids spurious /Wp64 warnings
typedef intptr_t word; typedef intptr_t word;
#else #else
typedef int32_t word; typedef int32_t word;
#endif #endif
__TBB_DECL_ATOMIC_FIELD(int32_t,value,4) __TBB_DECL_ATOMIC_FIELD(int32_t,value,4)
}; };
#if __TBB_64BIT_ATOMICS
template<> template<>
struct atomic_rep<8> { // Specialization struct atomic_rep<8> { // Specialization
typedef int64_t word; typedef int64_t word;
__TBB_DECL_ATOMIC_FIELD(int64_t,value,8) __TBB_DECL_ATOMIC_FIELD(int64_t,value,8)
}; };
#endif
template<size_t Size, memory_semantics M> template<size_t Size, memory_semantics M>
struct atomic_traits; // Primary template declared, but not defined. struct atomic_traits; // Primary template declared, but not defined.
#define __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(S,M) \ #define __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(S,M)
template<> struct atomic_traits<S,M> { \ \
typedef atomic_rep<S>::word word; \ template<> struct atomic_traits<S,M> {
inline static word compare_and_swap( volatile void* location, word \
new_value, word comparand ) {\ typedef atomic_rep<S>::word word;
return __TBB_CompareAndSwap##S##M(location,new_value,comparand) \
; \ inline static word compare_and_swap( volatile void* location, word
} new_value, word comparand ) { \
\ return __TBB_machine_cmpswp##S##M(location,new_value,comparand)
inline static word fetch_and_add( volatile void* location, word add ; \
end ) { \ }
return __TBB_FetchAndAdd##S##M(location,addend); \
\ inline static word fetch_and_add( volatile void* location, word add
} end ) { \
\ return __TBB_machine_fetchadd##S##M(location,addend);
inline static word fetch_and_store( volatile void* location, word v \
alue ) {\ }
return __TBB_FetchAndStore##S##M(location,value); \
\ inline static word fetch_and_store( volatile void* location, word v
} alue ) { \
\ return __TBB_machine_fetchstore##S##M(location,value);
\
}
\
}; };
#define __TBB_DECL_ATOMIC_PRIMITIVES(S) \ #define __TBB_DECL_ATOMIC_PRIMITIVES(S)
template<memory_semantics M> \ \
struct atomic_traits<S,M> { \ template<memory_semantics M>
typedef atomic_rep<S>::word word; \ \
inline static word compare_and_swap( volatile void* location, word struct atomic_traits<S,M> {
new_value, word comparand ) {\ \
return __TBB_CompareAndSwap##S(location,new_value,comparand); typedef atomic_rep<S>::word word;
\ \
} inline static word compare_and_swap( volatile void* location, word
\ new_value, word comparand ) { \
inline static word fetch_and_add( volatile void* location, word add return __TBB_machine_cmpswp##S(location,new_value,comparand);
end ) { \ \
return __TBB_FetchAndAdd##S(location,addend); }
\ \
} inline static word fetch_and_add( volatile void* location, word add
\ end ) { \
inline static word fetch_and_store( volatile void* location, word v return __TBB_machine_fetchadd##S(location,addend);
alue ) {\ \
return __TBB_FetchAndStore##S(location,value); }
\ \
} inline static word fetch_and_store( volatile void* location, word v
\ alue ) { \
return __TBB_machine_fetchstore##S(location,value);
\
}
\
}; };
#if __TBB_DECL_FENCED_ATOMICS template<memory_semantics M>
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(1,__TBB_full_fence) struct atomic_load_store_traits; // Primary template declaration
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(2,__TBB_full_fence)
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(4,__TBB_full_fence) #define __TBB_DECL_ATOMIC_LOAD_STORE_PRIMITIVES(M) \
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(8,__TBB_full_fence) template<> struct atomic_load_store_traits<M> { \
template <typename T> \
inline static T load( const volatile T& location ) { \
return __TBB_load_##M( location ); \
} \
template <typename T> \
inline static void store( volatile T& location, T value ) { \
__TBB_store_##M( location, value ); \
} \
}
#if __TBB_USE_FENCED_ATOMICS
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(1,full_fence)
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(2,full_fence)
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(4,full_fence)
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(1,acquire) __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(1,acquire)
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(2,acquire) __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(2,acquire)
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(4,acquire) __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(4,acquire)
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(8,acquire)
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(1,release) __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(1,release)
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(2,release) __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(2,release)
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(4,release) __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(4,release)
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(1,relaxed)
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(2,relaxed)
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(4,relaxed)
#if __TBB_64BIT_ATOMICS
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(8,full_fence)
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(8,acquire)
__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(8,release) __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(8,release)
#else __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(8,relaxed)
#endif
#else /* !__TBB_USE_FENCED_ATOMICS */
__TBB_DECL_ATOMIC_PRIMITIVES(1) __TBB_DECL_ATOMIC_PRIMITIVES(1)
__TBB_DECL_ATOMIC_PRIMITIVES(2) __TBB_DECL_ATOMIC_PRIMITIVES(2)
__TBB_DECL_ATOMIC_PRIMITIVES(4) __TBB_DECL_ATOMIC_PRIMITIVES(4)
#if __TBB_64BIT_ATOMICS
__TBB_DECL_ATOMIC_PRIMITIVES(8) __TBB_DECL_ATOMIC_PRIMITIVES(8)
#endif #endif
#endif /* !__TBB_USE_FENCED_ATOMICS */
__TBB_DECL_ATOMIC_LOAD_STORE_PRIMITIVES(full_fence);
__TBB_DECL_ATOMIC_LOAD_STORE_PRIMITIVES(acquire);
__TBB_DECL_ATOMIC_LOAD_STORE_PRIMITIVES(release);
__TBB_DECL_ATOMIC_LOAD_STORE_PRIMITIVES(relaxed);
//! Additive inverse of 1 for type T. //! Additive inverse of 1 for type T.
/** Various compilers issue various warnings if -1 is used with various int eger types. /** Various compilers issue various warnings if -1 is used with various int eger types.
The baroque expression below avoids all the warnings (we hope). */ The baroque expression below avoids all the warnings (we hope). */
#define __TBB_MINUS_ONE(T) (T(T(0)-T(1))) #define __TBB_MINUS_ONE(T) (T(T(0)-T(1)))
//! Base class that provides basic functionality for atomic<T> without fetc h_and_add. //! Base class that provides basic functionality for atomic<T> without fetc h_and_add.
/** Works for any type T that has the same size as an integral type, has a trivial constructor/destructor, /** Works for any type T that has the same size as an integral type, has a trivial constructor/destructor,
and can be copied/compared by memcpy/memcmp. */ and can be copied/compared by memcpy/memcmp. */
template<typename T> template<typename T>
skipping to change at line 183 skipping to change at line 215
template<memory_semantics M> template<memory_semantics M>
value_type fetch_and_store( value_type value ) { value_type fetch_and_store( value_type value ) {
converter u, w; converter u, w;
u.value = value; u.value = value;
w.bits = internal::atomic_traits<sizeof(value_type),M>::fetch_and_s tore(&rep.value,u.bits); w.bits = internal::atomic_traits<sizeof(value_type),M>::fetch_and_s tore(&rep.value,u.bits);
return w.value; return w.value;
} }
value_type fetch_and_store( value_type value ) { value_type fetch_and_store( value_type value ) {
return fetch_and_store<__TBB_full_fence>(value); return fetch_and_store<full_fence>(value);
} }
template<memory_semantics M> template<memory_semantics M>
value_type compare_and_swap( value_type value, value_type comparand ) { value_type compare_and_swap( value_type value, value_type comparand ) {
converter u, v, w; converter u, v, w;
u.value = value; u.value = value;
v.value = comparand; v.value = comparand;
w.bits = internal::atomic_traits<sizeof(value_type),M>::compare_and _swap(&rep.value,u.bits,v.bits); w.bits = internal::atomic_traits<sizeof(value_type),M>::compare_and _swap(&rep.value,u.bits,v.bits);
return w.value; return w.value;
} }
value_type compare_and_swap( value_type value, value_type comparand ) { value_type compare_and_swap( value_type value, value_type comparand ) {
return compare_and_swap<__TBB_full_fence>(value,comparand); return compare_and_swap<full_fence>(value,comparand);
} }
operator value_type() const volatile { // volatile quali fier here for backwards compatibility operator value_type() const volatile { // volatile quali fier here for backwards compatibility
converter w; converter w;
w.bits = __TBB_load_with_acquire( rep.value ); w.bits = __TBB_load_with_acquire( rep.value );
return w.value; return w.value;
} }
template<memory_semantics M>
value_type load () const {
converter u;
u.bits = internal::atomic_load_store_traits<M>::load( rep.value );
return u.value;
}
value_type load () const {
return load<acquire>();
}
template<memory_semantics M>
void store ( value_type value ) {
converter u;
u.value = value;
internal::atomic_load_store_traits<M>::store( rep.value, u.bits );
}
void store ( value_type value ) {
store<release>( value );
}
protected: protected:
value_type store_with_release( value_type rhs ) { value_type store_with_release( value_type rhs ) {
converter u; converter u;
u.value = rhs; u.value = rhs;
__TBB_store_with_release(rep.value,u.bits); __TBB_store_with_release(rep.value,u.bits);
return rhs; return rhs;
} }
}; };
//! Base class that provides basic functionality for atomic<T> with fetch_a nd_add. //! Base class that provides basic functionality for atomic<T> with fetch_a nd_add.
skipping to change at line 229 skipping to change at line 283
struct atomic_impl_with_arithmetic: atomic_impl<I> { struct atomic_impl_with_arithmetic: atomic_impl<I> {
public: public:
typedef I value_type; typedef I value_type;
template<memory_semantics M> template<memory_semantics M>
value_type fetch_and_add( D addend ) { value_type fetch_and_add( D addend ) {
return value_type(internal::atomic_traits<sizeof(value_type),M>::fe tch_and_add( &this->rep.value, addend*sizeof(StepType) )); return value_type(internal::atomic_traits<sizeof(value_type),M>::fe tch_and_add( &this->rep.value, addend*sizeof(StepType) ));
} }
value_type fetch_and_add( D addend ) { value_type fetch_and_add( D addend ) {
return fetch_and_add<__TBB_full_fence>(addend); return fetch_and_add<full_fence>(addend);
} }
template<memory_semantics M> template<memory_semantics M>
value_type fetch_and_increment() { value_type fetch_and_increment() {
return fetch_and_add<M>(1); return fetch_and_add<M>(1);
} }
value_type fetch_and_increment() { value_type fetch_and_increment() {
return fetch_and_add(1); return fetch_and_add(1);
} }
skipping to change at line 278 skipping to change at line 332
value_type operator++(int) { value_type operator++(int) {
return fetch_and_add(1); return fetch_and_add(1);
} }
value_type operator--(int) { value_type operator--(int) {
return fetch_and_add(__TBB_MINUS_ONE(D)); return fetch_and_add(__TBB_MINUS_ONE(D));
} }
}; };
#if __TBB_WORDSIZE == 4
// Plaforms with 32-bit hardware require special effort for 64-bit loads an
d stores.
#if defined(__INTEL_COMPILER)||!defined(_MSC_VER)||_MSC_VER>=1400
template<>
inline atomic_impl<__TBB_LONG_LONG>::operator atomic_impl<__TBB_LONG_LONG>:
:value_type() const volatile {
return __TBB_Load8(&rep.value);
}
template<>
inline atomic_impl<unsigned __TBB_LONG_LONG>::operator atomic_impl<unsigned
__TBB_LONG_LONG>::value_type() const volatile {
return __TBB_Load8(&rep.value);
}
template<>
inline atomic_impl<__TBB_LONG_LONG>::value_type atomic_impl<__TBB_LONG_LONG
>::store_with_release( value_type rhs ) {
__TBB_Store8(&rep.value,rhs);
return rhs;
}
template<>
inline atomic_impl<unsigned __TBB_LONG_LONG>::value_type atomic_impl<unsign
ed __TBB_LONG_LONG>::store_with_release( value_type rhs ) {
__TBB_Store8(&rep.value,rhs);
return rhs;
}
#endif /* defined(__INTEL_COMPILER)||!defined(_MSC_VER)||_MSC_VER>=1400 */
#endif /* __TBB_WORDSIZE==4 */
} /* Internal */ } /* Internal */
//! @endcond //! @endcond
//! Primary template for atomic. //! Primary template for atomic.
/** See the Reference for details. /** See the Reference for details.
@ingroup synchronization */ @ingroup synchronization */
template<typename T> template<typename T>
struct atomic: internal::atomic_impl<T> { struct atomic: internal::atomic_impl<T> {
T operator=( T rhs ) { T operator=( T rhs ) {
// "this" required here in strict ISO C++ because store_with_releas e is a dependent name // "this" required here in strict ISO C++ because store_with_releas e is a dependent name
skipping to change at line 328 skipping to change at line 353
} }
atomic<T>& operator=( const atomic<T>& rhs ) {this->store_with_release( rhs); return *this;} atomic<T>& operator=( const atomic<T>& rhs ) {this->store_with_release( rhs); return *this;}
}; };
#define __TBB_DECL_ATOMIC(T) \ #define __TBB_DECL_ATOMIC(T) \
template<> struct atomic<T>: internal::atomic_impl_with_arithmetic<T,T, char> { \ template<> struct atomic<T>: internal::atomic_impl_with_arithmetic<T,T, char> { \
T operator=( T rhs ) {return store_with_release(rhs);} \ T operator=( T rhs ) {return store_with_release(rhs);} \
atomic<T>& operator=( const atomic<T>& rhs ) {store_with_release(rh s); return *this;} \ atomic<T>& operator=( const atomic<T>& rhs ) {store_with_release(rh s); return *this;} \
}; };
#if defined(__INTEL_COMPILER)||!defined(_MSC_VER)||_MSC_VER>=1400 #if __TBB_64BIT_ATOMICS
__TBB_DECL_ATOMIC(__TBB_LONG_LONG) __TBB_DECL_ATOMIC(__TBB_LONG_LONG)
__TBB_DECL_ATOMIC(unsigned __TBB_LONG_LONG) __TBB_DECL_ATOMIC(unsigned __TBB_LONG_LONG)
#else #else
// Some old versions of MVSC cannot correctly compile templates with "long // test_atomic will verify that sizeof(long long)==8
long". #endif
#endif /* defined(__INTEL_COMPILER)||!defined(_MSC_VER)||_MSC_VER>=1400 */
__TBB_DECL_ATOMIC(long) __TBB_DECL_ATOMIC(long)
__TBB_DECL_ATOMIC(unsigned long) __TBB_DECL_ATOMIC(unsigned long)
#if defined(_MSC_VER) && __TBB_WORDSIZE==4 #if defined(_MSC_VER) && __TBB_WORDSIZE==4
/* Special version of __TBB_DECL_ATOMIC that avoids gratuitous warnings fro m cl /Wp64 option. /* Special version of __TBB_DECL_ATOMIC that avoids gratuitous warnings fro m cl /Wp64 option.
It is identical to __TBB_DECL_ATOMIC(unsigned) except that it replaces o perator=(T) It is identical to __TBB_DECL_ATOMIC(unsigned) except that it replaces o perator=(T)
with an operator=(U) that explicitly converts the U to a T. Types T and U should be with an operator=(U) that explicitly converts the U to a T. Types T and U should be
type synonyms on the platform. Type U should be the wider variant of T from the type synonyms on the platform. Type U should be the wider variant of T from the
perspective of /Wp64. */ perspective of /Wp64. */
#define __TBB_DECL_ATOMIC_ALT(T,U) \ #define __TBB_DECL_ATOMIC_ALT(T,U) \
skipping to change at line 391 skipping to change at line 415
template<> struct atomic<void*>: internal::atomic_impl<void*> { template<> struct atomic<void*>: internal::atomic_impl<void*> {
void* operator=( void* rhs ) { void* operator=( void* rhs ) {
// "this" required here in strict ISO C++ because store_with_releas e is a dependent name // "this" required here in strict ISO C++ because store_with_releas e is a dependent name
return this->store_with_release(rhs); return this->store_with_release(rhs);
} }
atomic<void*>& operator=( const atomic<void*>& rhs ) { atomic<void*>& operator=( const atomic<void*>& rhs ) {
this->store_with_release(rhs); return *this; this->store_with_release(rhs); return *this;
} }
}; };
// Helpers to workaround ugly syntax of calling template member function of
a
// template class with template argument dependent on template parameters.
template <memory_semantics M, typename T>
T load ( const atomic<T>& a ) { return a.template load<M>(); }
template <memory_semantics M, typename T>
void store ( atomic<T>& a, T value ) { return a.template store<M>(value); }
} // namespace tbb } // namespace tbb
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
#pragma warning (pop) #pragma warning (pop)
#endif // warnings 4244, 4267 are back #endif // warnings 4244, 4267 are back
#endif /* __TBB_atomic_H */ #endif /* __TBB_atomic_H */
 End of changes. 23 change blocks. 
102 lines changed or deleted 138 lines changed or added


 blocked_range.h   blocked_range.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 End of changes. 1 change blocks. 
1 lines changed or deleted 1 lines changed or added


 blocked_range2d.h   blocked_range2d.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 42 skipping to change at line 42
#include "tbb_stddef.h" #include "tbb_stddef.h"
#include "blocked_range.h" #include "blocked_range.h"
namespace tbb { namespace tbb {
//! A 2-dimensional range that models the Range concept. //! A 2-dimensional range that models the Range concept.
/** @ingroup algorithms */ /** @ingroup algorithms */
template<typename RowValue, typename ColValue=RowValue> template<typename RowValue, typename ColValue=RowValue>
class blocked_range2d { class blocked_range2d {
public: public:
//! Type for size of an iteation range //! Type for size of an iteration range
typedef blocked_range<RowValue> row_range_type; typedef blocked_range<RowValue> row_range_type;
typedef blocked_range<ColValue> col_range_type; typedef blocked_range<ColValue> col_range_type;
private: private:
row_range_type my_rows; row_range_type my_rows;
col_range_type my_cols; col_range_type my_cols;
public: public:
blocked_range2d( RowValue row_begin, RowValue row_end, typename row_ran ge_type::size_type row_grainsize, blocked_range2d( RowValue row_begin, RowValue row_end, typename row_ran ge_type::size_type row_grainsize,
 End of changes. 2 change blocks. 
2 lines changed or deleted 2 lines changed or added


 blocked_range3d.h   blocked_range3d.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 42 skipping to change at line 42
#include "tbb_stddef.h" #include "tbb_stddef.h"
#include "blocked_range.h" #include "blocked_range.h"
namespace tbb { namespace tbb {
//! A 3-dimensional range that models the Range concept. //! A 3-dimensional range that models the Range concept.
/** @ingroup algorithms */ /** @ingroup algorithms */
template<typename PageValue, typename RowValue=PageValue, typename ColValue =RowValue> template<typename PageValue, typename RowValue=PageValue, typename ColValue =RowValue>
class blocked_range3d { class blocked_range3d {
public: public:
//! Type for size of an iteation range //! Type for size of an iteration range
typedef blocked_range<PageValue> page_range_type; typedef blocked_range<PageValue> page_range_type;
typedef blocked_range<RowValue> row_range_type; typedef blocked_range<RowValue> row_range_type;
typedef blocked_range<ColValue> col_range_type; typedef blocked_range<ColValue> col_range_type;
private: private:
page_range_type my_pages; page_range_type my_pages;
row_range_type my_rows; row_range_type my_rows;
col_range_type my_cols; col_range_type my_cols;
public: public:
 End of changes. 2 change blocks. 
2 lines changed or deleted 2 lines changed or added


 cache_aligned_allocator.h   cache_aligned_allocator.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 End of changes. 1 change blocks. 
1 lines changed or deleted 1 lines changed or added


 combinable.h   combinable.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 End of changes. 1 change blocks. 
1 lines changed or deleted 1 lines changed or added


 concurrent_hash_map.h   concurrent_hash_map.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 54 skipping to change at line 54
#if !TBB_USE_EXCEPTIONS && _MSC_VER #if !TBB_USE_EXCEPTIONS && _MSC_VER
#pragma warning (pop) #pragma warning (pop)
#endif #endif
#include "cache_aligned_allocator.h" #include "cache_aligned_allocator.h"
#include "tbb_allocator.h" #include "tbb_allocator.h"
#include "spin_rw_mutex.h" #include "spin_rw_mutex.h"
#include "atomic.h" #include "atomic.h"
#include "aligned_space.h" #include "aligned_space.h"
#include "tbb_exception.h" #include "tbb_exception.h"
#include "_concurrent_unordered_internal.h" // Need tbb_hasher #include "tbb_profiling.h"
#if TBB_USE_PERFORMANCE_WARNINGS #include "internal/_concurrent_unordered_impl.h" // Need tbb_hasher
#if TBB_USE_PERFORMANCE_WARNINGS || __TBB_STATISTICS
#include <typeinfo> #include <typeinfo>
#endif #endif
#if __TBB_STATISTICS
#include <stdio.h>
#endif
namespace tbb { namespace tbb {
//! @cond INTERNAL
namespace internal {
//! ITT instrumented routine that loads pointer from location pointed t
o by src.
void* __TBB_EXPORTED_FUNC itt_load_pointer_with_acquire_v3( const void*
src );
//! ITT instrumented routine that stores src into location pointed to b
y dst.
void __TBB_EXPORTED_FUNC itt_store_pointer_with_release_v3( void* dst,
void* src );
//! Routine that loads pointer from location pointed to by src without
causing ITT to report a race.
void* __TBB_EXPORTED_FUNC itt_load_pointer_v3( const void* src );
}
//! @endcond
//! hash_compare that is default argument for concurrent_hash_map //! hash_compare that is default argument for concurrent_hash_map
template<typename Key> template<typename Key>
struct tbb_hash_compare { struct tbb_hash_compare {
static size_t hash( const Key& a ) { return tbb_hasher(a); } static size_t hash( const Key& a ) { return tbb_hasher(a); }
static bool equal( const Key& a, const Key& b ) { return a == b; } static bool equal( const Key& a, const Key& b ) { return a == b; }
}; };
namespace interface4 { namespace interface5 {
template<typename Key, typename T, typename HashCompare = tbb_hash_comp are<Key>, typename A = tbb_allocator<std::pair<Key, T> > > template<typename Key, typename T, typename HashCompare = tbb_hash_comp are<Key>, typename A = tbb_allocator<std::pair<Key, T> > >
class concurrent_hash_map; class concurrent_hash_map;
//! @cond INTERNAL //! @cond INTERNAL
namespace internal { namespace internal {
//! Type of a hash code. //! Type of a hash code.
typedef size_t hashcode_t; typedef size_t hashcode_t;
//! Node base type //! Node base type
skipping to change at line 135 skipping to change at line 128
//! Count of segments in the first block //! Count of segments in the first block
static size_type const embedded_buckets = 1<<embedded_block; static size_type const embedded_buckets = 1<<embedded_block;
//! Count of segments in the first block //! Count of segments in the first block
static size_type const first_block = 8; //including embedded_block. perfect with bucket size 16, so the allocations are power of 4096 static size_type const first_block = 8; //including embedded_block. perfect with bucket size 16, so the allocations are power of 4096
//! Size of a pointer / table size //! Size of a pointer / table size
static size_type const pointers_per_table = sizeof(segment_index_t) * 8; // one segment per bit static size_type const pointers_per_table = sizeof(segment_index_t) * 8; // one segment per bit
//! Segment pointer //! Segment pointer
typedef bucket *segment_ptr_t; typedef bucket *segment_ptr_t;
//! Segment pointers table type //! Segment pointers table type
typedef segment_ptr_t segments_table_t[pointers_per_table]; typedef segment_ptr_t segments_table_t[pointers_per_table];
//! Hash mask = sum of allocated segments sizes - 1 //! Hash mask = sum of allocated segment sizes - 1
atomic<hashcode_t> my_mask; atomic<hashcode_t> my_mask;
//! Segment pointers table. Also prevents false sharing between my_ mask and my_size //! Segment pointers table. Also prevents false sharing between my_ mask and my_size
segments_table_t my_table; segments_table_t my_table;
//! Size of container in stored items //! Size of container in stored items
atomic<size_type> my_size; // It must be in separate cache line fro m my_mask due to performance effects atomic<size_type> my_size; // It must be in separate cache line fro m my_mask due to performance effects
//! Zero segment //! Zero segment
bucket my_embedded_segment[embedded_buckets]; bucket my_embedded_segment[embedded_buckets];
#if __TBB_STATISTICS
atomic<unsigned> my_info_resizes; // concurrent ones
mutable atomic<unsigned> my_info_restarts; // race collisions
atomic<unsigned> my_info_rehashes; // invocations of rehash_bucket
#endif
//! Constructor //! Constructor
hash_map_base() { hash_map_base() {
std::memset( this, 0, pointers_per_table*sizeof(segment_ptr_t) // 32*4=128 or 64*8=512 std::memset( this, 0, pointers_per_table*sizeof(segment_ptr_t) // 32*4=128 or 64*8=512
+ sizeof(my_size) + sizeof(my_mask) // 4+4 or 8+8 + sizeof(my_size) + sizeof(my_mask) // 4+4 or 8+8
+ embedded_buckets*sizeof(bucket) ); // n*8 or n*16 + embedded_buckets*sizeof(bucket) ); // n*8 or n*16
for( size_type i = 0; i < embedded_block; i++ ) // fill the tab le for( size_type i = 0; i < embedded_block; i++ ) // fill the tab le
my_table[i] = my_embedded_segment + segment_base(i); my_table[i] = my_embedded_segment + segment_base(i);
my_mask = embedded_buckets - 1; my_mask = embedded_buckets - 1;
__TBB_ASSERT( embedded_block <= first_block, "The first block n umber must include embedded blocks"); __TBB_ASSERT( embedded_block <= first_block, "The first block n umber must include embedded blocks");
#if __TBB_STATISTICS
my_info_resizes = 0; // concurrent ones
my_info_restarts = 0; // race collisions
my_info_rehashes = 0; // invocations of rehash_bucket
#endif
} }
//! @return segment index of given index in the array //! @return segment index of given index in the array
static segment_index_t segment_index_of( size_type index ) { static segment_index_t segment_index_of( size_type index ) {
return segment_index_t( __TBB_Log2( index|1 ) ); return segment_index_t( __TBB_Log2( index|1 ) );
} }
//! @return the first array index of given segment //! @return the first array index of given segment
static segment_index_t segment_base( segment_index_t k ) { static segment_index_t segment_base( segment_index_t k ) {
return (segment_index_t(1)<<k & ~segment_index_t(1)); return (segment_index_t(1)<<k & ~segment_index_t(1));
skipping to change at line 211 skipping to change at line 213
void enable_segment( segment_index_t k, bool is_initial = false ) { void enable_segment( segment_index_t k, bool is_initial = false ) {
__TBB_ASSERT( k, "Zero segment must be embedded" ); __TBB_ASSERT( k, "Zero segment must be embedded" );
enable_segment_failsafe watchdog( my_table, k ); enable_segment_failsafe watchdog( my_table, k );
cache_aligned_allocator<bucket> alloc; cache_aligned_allocator<bucket> alloc;
size_type sz; size_type sz;
__TBB_ASSERT( !is_valid(my_table[k]), "Wrong concurrent assignm ent"); __TBB_ASSERT( !is_valid(my_table[k]), "Wrong concurrent assignm ent");
if( k >= first_block ) { if( k >= first_block ) {
sz = segment_size( k ); sz = segment_size( k );
segment_ptr_t ptr = alloc.allocate( sz ); segment_ptr_t ptr = alloc.allocate( sz );
init_buckets( ptr, sz, is_initial ); init_buckets( ptr, sz, is_initial );
#if TBB_USE_THREADING_TOOLS itt_hide_store_word( my_table[k], ptr );
// TODO: actually, fence and notification are unnecessary h
ere and below
itt_store_pointer_with_release_v3( my_table + k, ptr );
#else
my_table[k] = ptr;// my_mask has release fence
#endif
sz <<= 1;// double it to get entire capacity of the contain er sz <<= 1;// double it to get entire capacity of the contain er
} else { // the first block } else { // the first block
__TBB_ASSERT( k == embedded_block, "Wrong segment index" ); __TBB_ASSERT( k == embedded_block, "Wrong segment index" );
sz = segment_size( first_block ); sz = segment_size( first_block );
segment_ptr_t ptr = alloc.allocate( sz - embedded_buckets ) ; segment_ptr_t ptr = alloc.allocate( sz - embedded_buckets ) ;
init_buckets( ptr, sz - embedded_buckets, is_initial ); init_buckets( ptr, sz - embedded_buckets, is_initial );
ptr -= segment_base(embedded_block); ptr -= segment_base(embedded_block);
for(segment_index_t i = embedded_block; i < first_block; i+ +) // calc the offsets for(segment_index_t i = embedded_block; i < first_block; i+ +) // calc the offsets
#if TBB_USE_THREADING_TOOLS itt_hide_store_word( my_table[i], ptr + segment_base(i)
itt_store_pointer_with_release_v3( my_table + i, ptr + );
segment_base(i) );
#else
my_table[i] = ptr + segment_base(i);
#endif
} }
#if TBB_USE_THREADING_TOOLS itt_store_word_with_release( my_mask, sz-1 );
itt_store_pointer_with_release_v3( &my_mask, (void*)(sz-1) );
#else
my_mask = sz - 1;
#endif
watchdog.my_segment_ptr = 0; watchdog.my_segment_ptr = 0;
} }
//! Get bucket by (masked) hashcode //! Get bucket by (masked) hashcode
bucket *get_bucket( hashcode_t h ) const throw() { // TODO: add thr ow() everywhere? bucket *get_bucket( hashcode_t h ) const throw() { // TODO: add thr ow() everywhere?
segment_index_t s = segment_index_of( h ); segment_index_t s = segment_index_of( h );
h -= segment_base(s); h -= segment_base(s);
segment_ptr_t seg = my_table[s]; segment_ptr_t seg = my_table[s];
__TBB_ASSERT( is_valid(seg), "hashcode must be cut by valid mas k for allocated segments" ); __TBB_ASSERT( is_valid(seg), "hashcode must be cut by valid mas k for allocated segments" );
return &seg[h]; return &seg[h];
} }
// internal serial rehashing helper // internal serial rehashing helper
void mark_rehashed_levels( hashcode_t h ) throw () { void mark_rehashed_levels( hashcode_t h ) throw () {
segment_index_t s = segment_index_of( h ); segment_index_t s = segment_index_of( h );
while( segment_ptr_t seg = my_table[++s] ) while( segment_ptr_t seg = my_table[++s] )
if( seg[h].node_list == rehash_req ) { if( seg[h].node_list == rehash_req ) {
seg[h].node_list = empty_rehashed; seg[h].node_list = empty_rehashed;
mark_rehashed_levels( h + segment_base(s) ); mark_rehashed_levels( h + ((hashcode_t)1<<s) ); // opti mized segment_base(s)
} }
} }
//! Check for mask race //! Check for mask race
// Splitting into two functions should help inlining // Splitting into two functions should help inlining
inline bool check_mask_race( const hashcode_t h, hashcode_t &m ) co nst { inline bool check_mask_race( const hashcode_t h, hashcode_t &m ) co nst {
hashcode_t m_now, m_old = m; hashcode_t m_now, m_old = m;
#if TBB_USE_THREADING_TOOLS m_now = (hashcode_t) itt_load_word_with_acquire( my_mask );
m_now = (hashcode_t) itt_load_pointer_with_acquire_v3( &my_mask
);
#else
m_now = my_mask;
#endif
if( m_old != m_now ) if( m_old != m_now )
return check_rehashing_collision( h, m_old, m = m_now ); return check_rehashing_collision( h, m_old, m = m_now );
return false; return false;
} }
//! Process mask race, check for rehashing collision //! Process mask race, check for rehashing collision
bool check_rehashing_collision( const hashcode_t h, hashcode_t m_ol d, hashcode_t m ) const { bool check_rehashing_collision( const hashcode_t h, hashcode_t m_ol d, hashcode_t m ) const {
__TBB_ASSERT(m_old != m, NULL); // TODO?: m arg could be optimi zed out by passing h = h&m __TBB_ASSERT(m_old != m, NULL); // TODO?: m arg could be optimi zed out by passing h = h&m
if( (h & m_old) != (h & m) ) { // mask changed for this hashcod e, rare event if( (h & m_old) != (h & m) ) { // mask changed for this hashcod e, rare event
// condition above proves that 'h' has some other bits set beside 'm_old' // condition above proves that 'h' has some other bits set beside 'm_old'
// find next applicable mask after m_old //TODO: look at bsl instruction // find next applicable mask after m_old //TODO: look at bsl instruction
for( ++m_old; !(h & m_old); m_old <<= 1 ) // at maximum few rounds depending on the first block size for( ++m_old; !(h & m_old); m_old <<= 1 ) // at maximum few rounds depending on the first block size
; ;
m_old = (m_old<<1) - 1; // get full mask from a bit m_old = (m_old<<1) - 1; // get full mask from a bit
__TBB_ASSERT((m_old&(m_old+1))==0 && m_old <= m, NULL); __TBB_ASSERT((m_old&(m_old+1))==0 && m_old <= m, NULL);
// check whether it is rehashing/ed // check whether it is rehashing/ed
#if TBB_USE_THREADING_TOOLS if( itt_load_word_with_acquire(get_bucket(h & m_old)->node_
if( itt_load_pointer_with_acquire_v3(&( get_bucket(h & m_ol list) != rehash_req )
d)->node_list )) != rehash_req ) {
#else #if __TBB_STATISTICS
if( __TBB_load_with_acquire(get_bucket( h & m_old )->node_l my_info_restarts++; // race collisions
ist) != rehash_req )
#endif #endif
return true; return true;
}
} }
return false; return false;
} }
//! Insert a node and check for load factor. @return segment index to enable. //! Insert a node and check for load factor. @return segment index to enable.
segment_index_t insert_new_node( bucket *b, node_base *n, hashcode_ t mask ) { segment_index_t insert_new_node( bucket *b, node_base *n, hashcode_ t mask ) {
size_type sz = ++my_size; // prefix form is to enforce allocati on after the first item inserted size_type sz = ++my_size; // prefix form is to enforce allocati on after the first item inserted
add_to_bucket( b, n ); add_to_bucket( b, n );
// check load factor // check load factor
if( sz >= mask ) { // TODO: add custom load_factor if( sz >= mask ) { // TODO: add custom load_factor
segment_index_t new_seg = segment_index_of( mask+1 ); segment_index_t new_seg = __TBB_Log2( mask+1 ); //optimized segment_index_of
__TBB_ASSERT( is_valid(my_table[new_seg-1]), "new allocatio ns must not publish new mask until segment has allocated"); __TBB_ASSERT( is_valid(my_table[new_seg-1]), "new allocatio ns must not publish new mask until segment has allocated");
#if TBB_USE_THREADING_TOOLS if( !itt_hide_load_word(my_table[new_seg])
if( !itt_load_pointer_v3(my_table+new_seg)
#else
if( !my_table[new_seg]
#endif
&& __TBB_CompareAndSwapW(&my_table[new_seg], 2, 0) == 0 ) && __TBB_CompareAndSwapW(&my_table[new_seg], 2, 0) == 0 )
return new_seg; // The value must be processed return new_seg; // The value must be processed
} }
return 0; return 0;
} }
//! Prepare enough segments for number of buckets //! Prepare enough segments for number of buckets
void reserve(size_type buckets) { void reserve(size_type buckets) {
if( !buckets-- ) return; if( !buckets-- ) return;
bool is_initial = !my_size; bool is_initial = !my_size;
skipping to change at line 377 skipping to change at line 359
my_node = static_cast<node*>( my_bucket->node_list ); my_node = static_cast<node*>( my_bucket->node_list );
if( hash_map_base::is_valid(my_node) ) { if( hash_map_base::is_valid(my_node) ) {
my_index = k; return; my_index = k; return;
} }
++k; ++k;
} }
my_bucket = 0; my_node = 0; my_index = k; // the end my_bucket = 0; my_node = 0; my_index = k; // the end
} }
#if !defined(_MSC_VER) || defined(__INTEL_COMPILER) #if !defined(_MSC_VER) || defined(__INTEL_COMPILER)
template<typename Key, typename T, typename HashCompare, typename A > template<typename Key, typename T, typename HashCompare, typename A >
friend class interface4::concurrent_hash_map; friend class interface5::concurrent_hash_map;
#else #else
public: // workaround public: // workaround
#endif #endif
//! concurrent_hash_map over which we are iterating. //! concurrent_hash_map over which we are iterating.
const Container *my_map; const Container *my_map;
//! Index in hash table for current item //! Index in hash table for current item
size_t my_index; size_t my_index;
//! Pointer to bucket //! Pointer to bucket
skipping to change at line 412 skipping to change at line 394
my_node(other.my_node) my_node(other.my_node)
{} {}
Value& operator*() const { Value& operator*() const {
__TBB_ASSERT( hash_map_base::is_valid(my_node), "iterator unini tialized or at end of container?" ); __TBB_ASSERT( hash_map_base::is_valid(my_node), "iterator unini tialized or at end of container?" );
return my_node->item; return my_node->item;
} }
Value* operator->() const {return &operator*();} Value* operator->() const {return &operator*();}
hash_map_iterator& operator++(); hash_map_iterator& operator++();
//! Post increment //! Post increment
Value* operator++(int) { hash_map_iterator operator++(int) {
Value* result = &operator*(); hash_map_iterator old(*this);
operator++(); operator++();
return result; return old;
} }
}; };
template<typename Container, typename Value> template<typename Container, typename Value>
hash_map_iterator<Container,Value>::hash_map_iterator( const Container &map, size_t index, const bucket *b, node_base *n ) : hash_map_iterator<Container,Value>::hash_map_iterator( const Container &map, size_t index, const bucket *b, node_base *n ) :
my_map(&map), my_map(&map),
my_index(index), my_index(index),
my_bucket(b), my_bucket(b),
my_node( static_cast<node*>(n) ) my_node( static_cast<node*>(n) )
{ {
skipping to change at line 634 skipping to change at line 616
node *search_bucket( const key_type &key, bucket *b ) const { node *search_bucket( const key_type &key, bucket *b ) const {
node *n = static_cast<node*>( b->node_list ); node *n = static_cast<node*>( b->node_list );
while( is_valid(n) && !my_hash_compare.equal(key, n->item.first) ) while( is_valid(n) && !my_hash_compare.equal(key, n->item.first) )
n = static_cast<node*>( n->next ); n = static_cast<node*>( n->next );
__TBB_ASSERT(n != internal::rehash_req, "Search can be executed onl y for rehashed bucket"); __TBB_ASSERT(n != internal::rehash_req, "Search can be executed onl y for rehashed bucket");
return n; return n;
} }
//! bucket accessor is to find, rehash, acquire a lock, and access a bu cket //! bucket accessor is to find, rehash, acquire a lock, and access a bu cket
class bucket_accessor : public bucket::scoped_t { class bucket_accessor : public bucket::scoped_t {
bool my_is_writer; // TODO: use it from base type
bucket *my_b; bucket *my_b;
public: public:
bucket_accessor( concurrent_hash_map *base, const hashcode_t h, boo l writer = false ) { acquire( base, h, writer ); } bucket_accessor( concurrent_hash_map *base, const hashcode_t h, boo l writer = false ) { acquire( base, h, writer ); }
//! find a bucket by masked hashcode, optionally rehash, and acquir e the lock //! find a bucket by masked hashcode, optionally rehash, and acquir e the lock
inline void acquire( concurrent_hash_map *base, const hashcode_t h, bool writer = false ) { inline void acquire( concurrent_hash_map *base, const hashcode_t h, bool writer = false ) {
my_b = base->get_bucket( h ); my_b = base->get_bucket( h );
#if TBB_USE_THREADING_TOOLS
// TODO: actually, notification is unnecessary here, just hidin g double-check // TODO: actually, notification is unnecessary here, just hidin g double-check
if( itt_load_pointer_with_acquire_v3(&my_b->node_list) == inter if( itt_load_word_with_acquire(my_b->node_list) == internal::re
nal::rehash_req hash_req
#else
if( __TBB_load_with_acquire(my_b->node_list) == internal::rehas
h_req
#endif
&& try_acquire( my_b->mutex, /*write=*/true ) ) && try_acquire( my_b->mutex, /*write=*/true ) )
{ {
if( my_b->node_list == internal::rehash_req ) base->rehash_ bucket( my_b, h ); //recursive rehashing if( my_b->node_list == internal::rehash_req ) base->rehash_ bucket( my_b, h ); //recursive rehashing
my_is_writer = true;
} }
else bucket::scoped_t::acquire( my_b->mutex, /*write=*/my_is_wr iter = writer ); else bucket::scoped_t::acquire( my_b->mutex, writer );
__TBB_ASSERT( my_b->node_list != internal::rehash_req, NULL); __TBB_ASSERT( my_b->node_list != internal::rehash_req, NULL);
} }
//! check whether bucket is locked for write //! check whether bucket is locked for write
bool is_writer() { return my_is_writer; } bool is_writer() { return bucket::scoped_t::is_writer; }
//! get bucket pointer //! get bucket pointer
bucket *operator() () { return my_b; } bucket *operator() () { return my_b; }
// TODO: optimize out
bool upgrade_to_writer() { my_is_writer = true; return bucket::scop
ed_t::upgrade_to_writer(); }
}; };
// TODO refactor to hash_base // TODO refactor to hash_base
void rehash_bucket( bucket *b_new, const hashcode_t h ) { void rehash_bucket( bucket *b_new, const hashcode_t h ) {
__TBB_ASSERT( *(intptr_t*)(&b_new->mutex), "b_new must be locked (f or write)"); __TBB_ASSERT( *(intptr_t*)(&b_new->mutex), "b_new must be locked (f or write)");
__TBB_ASSERT( h > 1, "The lowermost buckets can't be rehashed" ); __TBB_ASSERT( h > 1, "The lowermost buckets can't be rehashed" );
__TBB_store_with_release(b_new->node_list, internal::empty_rehashed ); // mark rehashed __TBB_store_with_release(b_new->node_list, internal::empty_rehashed ); // mark rehashed
hashcode_t mask = ( 1u<<__TBB_Log2( h ) ) - 1; // get parent mask f rom the topmost bit hashcode_t mask = ( 1u<<__TBB_Log2( h ) ) - 1; // get parent mask f rom the topmost bit
#if __TBB_STATISTICS
my_info_rehashes++; // invocations of rehash_bucket
#endif
bucket_accessor b_old( this, h & mask ); bucket_accessor b_old( this, h & mask );
mask = (mask<<1) | 1; // get full mask for new bucket mask = (mask<<1) | 1; // get full mask for new bucket
__TBB_ASSERT( (mask&(mask+1))==0 && (h & mask) == h, NULL ); __TBB_ASSERT( (mask&(mask+1))==0 && (h & mask) == h, NULL );
restart: restart:
for( node_base **p = &b_old()->node_list, *n = __TBB_load_with_acqu ire(*p); is_valid(n); n = *p ) { for( node_base **p = &b_old()->node_list, *n = __TBB_load_with_acqu ire(*p); is_valid(n); n = *p ) {
hashcode_t c = my_hash_compare.hash( static_cast<node*>(n)->ite m.first ); hashcode_t c = my_hash_compare.hash( static_cast<node*>(n)->ite m.first );
#if TBB_USE_ASSERT #if TBB_USE_ASSERT
hashcode_t bmask = h & (mask>>1); hashcode_t bmask = h & (mask>>1);
skipping to change at line 697 skipping to change at line 674
*p = n->next; // exclude from b_old *p = n->next; // exclude from b_old
add_to_bucket( b_new, n ); add_to_bucket( b_new, n );
} else p = &n->next; // iterate to next item } else p = &n->next; // iterate to next item
} }
} }
public: public:
class accessor; class accessor;
//! Combines data access, locking, and garbage collection. //! Combines data access, locking, and garbage collection.
class const_accessor { class const_accessor : private node::scoped_t /*which derived from no_c opy*/ {
friend class concurrent_hash_map<Key,T,HashCompare,Allocator>; friend class concurrent_hash_map<Key,T,HashCompare,Allocator>;
friend class accessor; friend class accessor;
void operator=( const accessor & ) const; // Deny access
const_accessor( const accessor & ); // Deny access
public: public:
//! Type of value //! Type of value
typedef const typename concurrent_hash_map::value_type value_type; typedef const typename concurrent_hash_map::value_type value_type;
//! True if result is empty. //! True if result is empty.
bool empty() const {return !my_node;} bool empty() const {return !my_node;}
//! Set to null //! Set to null
void release() { void release() {
if( my_node ) { if( my_node ) {
my_lock.release(); node::scoped_t::release();
my_node = 0; my_node = 0;
} }
} }
//! Return reference to associated value in hash table. //! Return reference to associated value in hash table.
const_reference operator*() const { const_reference operator*() const {
__TBB_ASSERT( my_node, "attempt to dereference empty accessor" ); __TBB_ASSERT( my_node, "attempt to dereference empty accessor" );
return my_node->item; return my_node->item;
} }
//! Return pointer to associated value in hash table. //! Return pointer to associated value in hash table.
const_pointer operator->() const { const_pointer operator->() const {
return &operator*(); return &operator*();
} }
//! Create empty result //! Create empty result
const_accessor() : my_node(NULL) {} const_accessor() : my_node(NULL) {}
//! Destroy result after releasing the underlying reference. //! Destroy result after releasing the underlying reference.
~const_accessor() { ~const_accessor() {
my_node = NULL; // my_lock.release() is called in scoped_lock d estructor my_node = NULL; // scoped lock's release() is called in its des tructor
} }
private: protected:
bool is_writer() { return node::scoped_t::is_writer; }
node *my_node; node *my_node;
typename node::scoped_t my_lock;
hashcode_t my_hash; hashcode_t my_hash;
}; };
//! Allows write access to elements and combines data access, locking, and garbage collection. //! Allows write access to elements and combines data access, locking, and garbage collection.
class accessor: public const_accessor { class accessor: public const_accessor {
public: public:
//! Type of value //! Type of value
typedef typename concurrent_hash_map::value_type value_type; typedef typename concurrent_hash_map::value_type value_type;
//! Return reference to associated value in hash table. //! Return reference to associated value in hash table.
skipping to change at line 916 skipping to change at line 891
insert( *first ); insert( *first );
} }
//! Erase item. //! Erase item.
/** Return true if item was erased by particularly this call. */ /** Return true if item was erased by particularly this call. */
bool erase( const Key& key ); bool erase( const Key& key );
//! Erase item by const_accessor. //! Erase item by const_accessor.
/** Return true if item was erased by particularly this call. */ /** Return true if item was erased by particularly this call. */
bool erase( const_accessor& item_accessor ) { bool erase( const_accessor& item_accessor ) {
return exclude( item_accessor, /*readonly=*/ true ); return exclude( item_accessor );
} }
//! Erase item by accessor. //! Erase item by accessor.
/** Return true if item was erased by particularly this call. */ /** Return true if item was erased by particularly this call. */
bool erase( accessor& item_accessor ) { bool erase( accessor& item_accessor ) {
return exclude( item_accessor, /*readonly=*/ false ); return exclude( item_accessor );
} }
protected: protected:
//! Insert or find item and optionally acquire a lock on the item. //! Insert or find item and optionally acquire a lock on the item.
bool lookup( bool op_insert, const Key &key, const T *t, const_accessor *result, bool write ); bool lookup( bool op_insert, const Key &key, const T *t, const_accessor *result, bool write );
//! delete item by accessor //! delete item by accessor
bool exclude( const_accessor &item_accessor, bool readonly ); bool exclude( const_accessor &item_accessor );
//! Returns an iterator for an item defined by the key, or for the next item after it (if upper==true) //! Returns an iterator for an item defined by the key, or for the next item after it (if upper==true)
template<typename I> template<typename I>
std::pair<I, I> internal_equal_range( const Key& key, I end ) const; std::pair<I, I> internal_equal_range( const Key& key, I end ) const;
//! Copy "source" to *this, where *this must start out empty. //! Copy "source" to *this, where *this must start out empty.
void internal_copy( const concurrent_hash_map& source ); void internal_copy( const concurrent_hash_map& source );
template<typename I> template<typename I>
void internal_copy(I first, I last); void internal_copy(I first, I last);
//! Fast find when no concurrent erasure is used. For internal use insi de TBB only! //! Fast find when no concurrent erasure is used. For internal use insi de TBB only!
/** Return pointer to item with given key, or NULL if no such item exis ts. /** Return pointer to item with given key, or NULL if no such item exis ts.
Must not be called concurrently with erasure operations. */ Must not be called concurrently with erasure operations. */
const_pointer internal_fast_find( const Key& key ) const { const_pointer internal_fast_find( const Key& key ) const {
hashcode_t h = my_hash_compare.hash( key ); hashcode_t h = my_hash_compare.hash( key );
#if TBB_USE_THREADING_TOOLS hashcode_t m = (hashcode_t) itt_load_word_with_acquire( my_mask );
hashcode_t m = (hashcode_t) itt_load_pointer_with_acquire_v3( &my_m
ask );
#else
hashcode_t m = my_mask;
#endif
node *n; node *n;
restart: restart:
__TBB_ASSERT((m&(m+1))==0, NULL); __TBB_ASSERT((m&(m+1))==0, NULL);
bucket *b = get_bucket( h & m ); bucket *b = get_bucket( h & m );
#if TBB_USE_THREADING_TOOLS
// TODO: actually, notification is unnecessary here, just hiding do uble-check // TODO: actually, notification is unnecessary here, just hiding do uble-check
if( itt_load_pointer_with_acquire_v3(&b->node_list) == internal::re if( itt_load_word_with_acquire(b->node_list) == internal::rehash_re
hash_req ) q )
#else
if( __TBB_load_with_acquire(b->node_list) == internal::rehash_req )
#endif
{ {
bucket::scoped_t lock; bucket::scoped_t lock;
if( lock.try_acquire( b->mutex, /*write=*/true ) ) { if( lock.try_acquire( b->mutex, /*write=*/true ) ) {
if( b->node_list == internal::rehash_req) if( b->node_list == internal::rehash_req)
const_cast<concurrent_hash_map*>(this)->rehash_bucket( b, h & m ); //recursive rehashing const_cast<concurrent_hash_map*>(this)->rehash_bucket( b, h & m ); //recursive rehashing
} }
else lock.acquire( b->mutex, /*write=*/false ); else lock.acquire( b->mutex, /*write=*/false );
__TBB_ASSERT(b->node_list!=internal::rehash_req,NULL); __TBB_ASSERT(b->node_list!=internal::rehash_req,NULL);
} }
n = search_bucket( key, b ); n = search_bucket( key, b );
skipping to change at line 989 skipping to change at line 956
#if _MSC_VER && !defined(__INTEL_COMPILER) #if _MSC_VER && !defined(__INTEL_COMPILER)
// Suppress "conditional expression is constant" warning. // Suppress "conditional expression is constant" warning.
#pragma warning( push ) #pragma warning( push )
#pragma warning( disable: 4127 ) #pragma warning( disable: 4127 )
#endif #endif
template<typename Key, typename T, typename HashCompare, typename A> template<typename Key, typename T, typename HashCompare, typename A>
bool concurrent_hash_map<Key,T,HashCompare,A>::lookup( bool op_insert, cons t Key &key, const T *t, const_accessor *result, bool write ) { bool concurrent_hash_map<Key,T,HashCompare,A>::lookup( bool op_insert, cons t Key &key, const T *t, const_accessor *result, bool write ) {
__TBB_ASSERT( !result || !result->my_node, NULL ); __TBB_ASSERT( !result || !result->my_node, NULL );
segment_index_t grow_segment;
bool return_value; bool return_value;
node *n, *tmp_n = 0;
hashcode_t const h = my_hash_compare.hash( key ); hashcode_t const h = my_hash_compare.hash( key );
#if TBB_USE_THREADING_TOOLS hashcode_t m = (hashcode_t) itt_load_word_with_acquire( my_mask );
hashcode_t m = (hashcode_t) itt_load_pointer_with_acquire_v3( &my_mask segment_index_t grow_segment = 0;
); node *n, *tmp_n = 0;
#else
hashcode_t m = my_mask;
#endif
restart: restart:
{//lock scope {//lock scope
__TBB_ASSERT((m&(m+1))==0, NULL); __TBB_ASSERT((m&(m+1))==0, NULL);
return_value = false; return_value = false;
// get bucket // get bucket
bucket_accessor b( this, h & m ); bucket_accessor b( this, h & m );
// find a node // find a node
n = search_bucket( key, b() ); n = search_bucket( key, b() );
if( op_insert ) { if( op_insert ) {
skipping to change at line 1028 skipping to change at line 991
b.downgrade_to_reader(); b.downgrade_to_reader();
goto exists; goto exists;
} }
} }
if( check_mask_race(h, m) ) if( check_mask_race(h, m) )
goto restart; // b.release() is done in ~b(). goto restart; // b.release() is done in ~b().
// insert and set flag to grow the container // insert and set flag to grow the container
grow_segment = insert_new_node( b(), n = tmp_n, m ); grow_segment = insert_new_node( b(), n = tmp_n, m );
tmp_n = 0; tmp_n = 0;
return_value = true; return_value = true;
} else {
exists: grow_segment = 0;
} }
} else { // find or count } else { // find or count
if( !n ) { if( !n ) {
if( check_mask_race( h, m ) ) if( check_mask_race( h, m ) )
goto restart; // b.release() is done in ~b(). TODO: rep lace by continue goto restart; // b.release() is done in ~b(). TODO: rep lace by continue
return false; return false;
} }
return_value = true; return_value = true;
grow_segment = 0;
} }
exists:
if( !result ) goto check_growth; if( !result ) goto check_growth;
// TODO: the following seems as generic/regular operation // TODO: the following seems as generic/regular operation
// acquire the item // acquire the item
if( !result->my_lock.try_acquire( n->mutex, write ) ) { if( !result->try_acquire( n->mutex, write ) ) {
// we are unlucky, prepare for longer wait // we are unlucky, prepare for longer wait
tbb::internal::atomic_backoff trials; tbb::internal::atomic_backoff trials;
do { do {
if( !trials.bounded_pause() ) { if( !trials.bounded_pause() ) {
// the wait takes really long, restart the operation // the wait takes really long, restart the operation
b.release(); b.release();
__TBB_ASSERT( !op_insert || !return_value, "Can't acqui re new item in locked bucket?" ); __TBB_ASSERT( !op_insert || !return_value, "Can't acqui re new item in locked bucket?" );
__TBB_Yield(); __TBB_Yield();
#if TBB_USE_THREADING_TOOLS m = (hashcode_t) itt_load_word_with_acquire( my_mask );
m = (hashcode_t) itt_load_pointer_with_acquire_v3( &my_
mask );
#else
m = my_mask;
#endif
goto restart; goto restart;
} }
} while( !result->my_lock.try_acquire( n->mutex, write ) ); } while( !result->try_acquire( n->mutex, write ) );
} }
}//lock scope }//lock scope
result->my_node = n; result->my_node = n;
result->my_hash = h; result->my_hash = h;
check_growth: check_growth:
// [opt] grow the container // [opt] grow the container
if( grow_segment ) if( grow_segment ) {
#if __TBB_STATISTICS
my_info_resizes++; // concurrent ones
#endif
enable_segment( grow_segment ); enable_segment( grow_segment );
}
if( tmp_n ) // if op_insert only if( tmp_n ) // if op_insert only
delete_node( tmp_n ); delete_node( tmp_n );
return return_value; return return_value;
} }
template<typename Key, typename T, typename HashCompare, typename A> template<typename Key, typename T, typename HashCompare, typename A>
template<typename I> template<typename I>
std::pair<I, I> concurrent_hash_map<Key,T,HashCompare,A>::internal_equal_ra nge( const Key& key, I end_ ) const { std::pair<I, I> concurrent_hash_map<Key,T,HashCompare,A>::internal_equal_ra nge( const Key& key, I end_ ) const {
hashcode_t h = my_hash_compare.hash( key ); hashcode_t h = my_hash_compare.hash( key );
hashcode_t m = my_mask; hashcode_t m = my_mask;
skipping to change at line 1093 skipping to change at line 1054
b = get_bucket( h &= m ); b = get_bucket( h &= m );
} }
node *n = search_bucket( key, b ); node *n = search_bucket( key, b );
if( !n ) if( !n )
return std::make_pair(end_, end_); return std::make_pair(end_, end_);
iterator lower(*this, h, b, n), upper(lower); iterator lower(*this, h, b, n), upper(lower);
return std::make_pair(lower, ++upper); return std::make_pair(lower, ++upper);
} }
template<typename Key, typename T, typename HashCompare, typename A> template<typename Key, typename T, typename HashCompare, typename A>
bool concurrent_hash_map<Key,T,HashCompare,A>::exclude( const_accessor &ite m_accessor, bool readonly ) { bool concurrent_hash_map<Key,T,HashCompare,A>::exclude( const_accessor &ite m_accessor ) {
__TBB_ASSERT( item_accessor.my_node, NULL ); __TBB_ASSERT( item_accessor.my_node, NULL );
node_base *const n = item_accessor.my_node; node_base *const n = item_accessor.my_node;
item_accessor.my_node = NULL; // we ought release accessor anyway
hashcode_t const h = item_accessor.my_hash; hashcode_t const h = item_accessor.my_hash;
#if TBB_USE_THREADING_TOOLS hashcode_t m = (hashcode_t) itt_load_word_with_acquire( my_mask );
hashcode_t m = (hashcode_t) itt_load_pointer_with_acquire_v3( &my_mask
);
#else
hashcode_t m = my_mask;
#endif
do { do {
// get bucket // get bucket
bucket_accessor b( this, h & m, /*writer=*/true ); bucket_accessor b( this, h & m, /*writer=*/true );
node_base **p = &b()->node_list; node_base **p = &b()->node_list;
while( *p && *p != n ) while( *p && *p != n )
p = &(*p)->next; p = &(*p)->next;
if( !*p ) { // someone else was the first if( !*p ) { // someone else was the first
if( check_mask_race( h, m ) ) if( check_mask_race( h, m ) )
continue; continue;
item_accessor.my_lock.release(); item_accessor.release();
return false; return false;
} }
__TBB_ASSERT( *p == n, NULL ); __TBB_ASSERT( *p == n, NULL );
*p = n->next; // remove from container *p = n->next; // remove from container
my_size--; my_size--;
break; break;
} while(true); } while(true);
if( readonly ) // need to get exclusive lock if( !item_accessor.is_writer() ) // need to get exclusive lock
item_accessor.my_lock.upgrade_to_writer(); // return value means no item_accessor.upgrade_to_writer(); // return value means nothing he
thing here re
item_accessor.my_lock.release(); item_accessor.release();
delete_node( n ); // Only one thread can delete it due to write lock on delete_node( n ); // Only one thread can delete it
the chain_mutex
return true; return true;
} }
template<typename Key, typename T, typename HashCompare, typename A> template<typename Key, typename T, typename HashCompare, typename A>
bool concurrent_hash_map<Key,T,HashCompare,A>::erase( const Key &key ) { bool concurrent_hash_map<Key,T,HashCompare,A>::erase( const Key &key ) {
node_base *n; node_base *n;
hashcode_t const h = my_hash_compare.hash( key ); hashcode_t const h = my_hash_compare.hash( key );
#if TBB_USE_THREADING_TOOLS hashcode_t m = (hashcode_t) itt_load_word_with_acquire( my_mask );
hashcode_t m = (hashcode_t) itt_load_pointer_with_acquire_v3( &my_mask
);
#else
hashcode_t m = my_mask;
#endif
restart: restart:
{//lock scope {//lock scope
// get bucket // get bucket
bucket_accessor b( this, h & m ); bucket_accessor b( this, h & m );
search: search:
node_base **p = &b()->node_list; node_base **p = &b()->node_list;
n = *p; n = *p;
while( is_valid(n) && !my_hash_compare.equal(key, static_cast<node* >(n)->item.first ) ) { while( is_valid(n) && !my_hash_compare.equal(key, static_cast<node* >(n)->item.first ) ) {
p = &n->next; p = &n->next;
n = *p; n = *p;
skipping to change at line 1245 skipping to change at line 1197
typeid(*this).name(), current_size, empty_buckets, overpopulate d_buckets ); typeid(*this).name(), current_size, empty_buckets, overpopulate d_buckets );
reported = true; reported = true;
} }
#endif #endif
} }
template<typename Key, typename T, typename HashCompare, typename A> template<typename Key, typename T, typename HashCompare, typename A>
void concurrent_hash_map<Key,T,HashCompare,A>::clear() { void concurrent_hash_map<Key,T,HashCompare,A>::clear() {
hashcode_t m = my_mask; hashcode_t m = my_mask;
__TBB_ASSERT((m&(m+1))==0, NULL); __TBB_ASSERT((m&(m+1))==0, NULL);
#if TBB_USE_ASSERT || TBB_USE_PERFORMANCE_WARNINGS #if TBB_USE_ASSERT || TBB_USE_PERFORMANCE_WARNINGS || __TBB_STATISTICS
#if TBB_USE_PERFORMANCE_WARNINGS #if TBB_USE_PERFORMANCE_WARNINGS || __TBB_STATISTICS
int current_size = int(my_size), buckets = int(m)+1, empty_buckets = 0, overpopulated_buckets = 0; // usage statistics int current_size = int(my_size), buckets = int(m)+1, empty_buckets = 0, overpopulated_buckets = 0; // usage statistics
static bool reported = false; static bool reported = false;
#endif #endif
bucket *bp = 0; bucket *bp = 0;
// check consistency // check consistency
for( segment_index_t b = 0; b <= m; b++ ) { for( segment_index_t b = 0; b <= m; b++ ) {
if( b & (b-2) ) ++bp; // not the beginning of a segment if( b & (b-2) ) ++bp; // not the beginning of a segment
else bp = get_bucket( b ); else bp = get_bucket( b );
node_base *n = bp->node_list; node_base *n = bp->node_list;
__TBB_ASSERT( is_valid(n) || n == internal::empty_rehashed || n == internal::rehash_req, "Broken internal structure" ); __TBB_ASSERT( is_valid(n) || n == internal::empty_rehashed || n == internal::rehash_req, "Broken internal structure" );
__TBB_ASSERT( *reinterpret_cast<intptr_t*>(&bp->mutex) == 0, "concu rrent or unexpectedly terminated operation during clear() execution" ); __TBB_ASSERT( *reinterpret_cast<intptr_t*>(&bp->mutex) == 0, "concu rrent or unexpectedly terminated operation during clear() execution" );
#if TBB_USE_PERFORMANCE_WARNINGS #if TBB_USE_PERFORMANCE_WARNINGS || __TBB_STATISTICS
if( n == internal::empty_rehashed ) empty_buckets++; if( n == internal::empty_rehashed ) empty_buckets++;
else if( n == internal::rehash_req ) buckets--; else if( n == internal::rehash_req ) buckets--;
else if( n->next ) overpopulated_buckets++; else if( n->next ) overpopulated_buckets++;
#endif #endif
#if __TBB_EXTRA_DEBUG #if __TBB_EXTRA_DEBUG
for(; is_valid(n); n = n->next ) { for(; is_valid(n); n = n->next ) {
hashcode_t h = my_hash_compare.hash( static_cast<node*>(n)->ite m.first ); hashcode_t h = my_hash_compare.hash( static_cast<node*>(n)->ite m.first );
h &= m; h &= m;
__TBB_ASSERT( h == b || get_bucket(h)->node_list == internal::r ehash_req, "hash() function changed for key in table or internal error" ); __TBB_ASSERT( h == b || get_bucket(h)->node_list == internal::r ehash_req, "hash() function changed for key in table or internal error" );
} }
#endif #endif
} }
#if TBB_USE_PERFORMANCE_WARNINGS #if TBB_USE_PERFORMANCE_WARNINGS || __TBB_STATISTICS
#if __TBB_STATISTICS
printf( "items=%d buckets: capacity=%d rehashed=%d empty=%d overpopulat
ed=%d"
" concurrent: resizes=%u rehashes=%u restarts=%u\n",
current_size, int(m+1), buckets, empty_buckets, overpopulated_bucke
ts,
unsigned(my_info_resizes), unsigned(my_info_rehashes), unsigned(my_
info_restarts) );
my_info_resizes = 0; // concurrent ones
my_info_restarts = 0; // race collisions
my_info_rehashes = 0; // invocations of rehash_bucket
#endif
if( buckets > current_size) empty_buckets -= buckets - current_size; if( buckets > current_size) empty_buckets -= buckets - current_size;
else overpopulated_buckets -= current_size - buckets; // TODO: load_fac tor? else overpopulated_buckets -= current_size - buckets; // TODO: load_fac tor?
if( !reported && buckets >= 512 && ( 2*empty_buckets > current_size || 2*overpopulated_buckets > current_size ) ) { if( !reported && buckets >= 512 && ( 2*empty_buckets > current_size || 2*overpopulated_buckets > current_size ) ) {
tbb::internal::runtime_warning( tbb::internal::runtime_warning(
"Performance is not optimal because the hash function produces bad randomness in lower bits in %s.\nSize: %d Empties: %d Overlaps: %d", "Performance is not optimal because the hash function produces bad randomness in lower bits in %s.\nSize: %d Empties: %d Overlaps: %d",
typeid(*this).name(), current_size, empty_buckets, overpopulate d_buckets ); typeid(*this).name(), current_size, empty_buckets, overpopulate d_buckets );
reported = true; reported = true;
} }
#endif #endif
#endif//TBB_USE_ASSERT || TBB_USE_PERFORMANCE_WARNINGS #endif//TBB_USE_ASSERT || TBB_USE_PERFORMANCE_WARNINGS || __TBB_STATISTICS
my_size = 0; my_size = 0;
segment_index_t s = segment_index_of( m ); segment_index_t s = segment_index_of( m );
__TBB_ASSERT( s+1 == pointers_per_table || !my_table[s+1], "wrong mask or concurrent grow" ); __TBB_ASSERT( s+1 == pointers_per_table || !my_table[s+1], "wrong mask or concurrent grow" );
cache_aligned_allocator<bucket> alloc; cache_aligned_allocator<bucket> alloc;
do { do {
__TBB_ASSERT( is_valid( my_table[s] ), "wrong mask or concurrent gr ow" ); __TBB_ASSERT( is_valid( my_table[s] ), "wrong mask or concurrent gr ow" );
segment_ptr_t buckets_ptr = my_table[s]; segment_ptr_t buckets_ptr = my_table[s];
size_type sz = segment_size( s ? s : 1 ); size_type sz = segment_size( s ? s : 1 );
for( segment_index_t i = 0; i < sz; i++ ) for( segment_index_t i = 0; i < sz; i++ )
for( node_base *n = buckets_ptr[i].node_list; is_valid(n); n = buckets_ptr[i].node_list ) { for( node_base *n = buckets_ptr[i].node_list; is_valid(n); n = buckets_ptr[i].node_list ) {
skipping to change at line 1342 skipping to change at line 1303
for(; first != last; ++first) { for(; first != last; ++first) {
hashcode_t h = my_hash_compare.hash( first->first ); hashcode_t h = my_hash_compare.hash( first->first );
bucket *b = get_bucket( h & m ); bucket *b = get_bucket( h & m );
__TBB_ASSERT( b->node_list != internal::rehash_req, "Invalid bucket in destination table"); __TBB_ASSERT( b->node_list != internal::rehash_req, "Invalid bucket in destination table");
node *n = new( my_allocator ) node(first->first, first->second); node *n = new( my_allocator ) node(first->first, first->second);
add_to_bucket( b, n ); add_to_bucket( b, n );
++my_size; // TODO: replace by non-atomic op ++my_size; // TODO: replace by non-atomic op
} }
} }
} // namespace interface4 } // namespace interface5
using interface4::concurrent_hash_map; using interface5::concurrent_hash_map;
template<typename Key, typename T, typename HashCompare, typename A1, typen ame A2> template<typename Key, typename T, typename HashCompare, typename A1, typen ame A2>
inline bool operator==(const concurrent_hash_map<Key, T, HashCompare, A1> & a, const concurrent_hash_map<Key, T, HashCompare, A2> &b) { inline bool operator==(const concurrent_hash_map<Key, T, HashCompare, A1> & a, const concurrent_hash_map<Key, T, HashCompare, A2> &b) {
if(a.size() != b.size()) return false; if(a.size() != b.size()) return false;
typename concurrent_hash_map<Key, T, HashCompare, A1>::const_iterator i (a.begin()), i_end(a.end()); typename concurrent_hash_map<Key, T, HashCompare, A1>::const_iterator i (a.begin()), i_end(a.end());
typename concurrent_hash_map<Key, T, HashCompare, A2>::const_iterator j , j_end(b.end()); typename concurrent_hash_map<Key, T, HashCompare, A2>::const_iterator j , j_end(b.end());
for(; i != i_end; ++i) { for(; i != i_end; ++i) {
j = b.equal_range(i->first).first; j = b.equal_range(i->first).first;
if( j == j_end || !(i->second == j->second) ) return false; if( j == j_end || !(i->second == j->second) ) return false;
} }
 End of changes. 63 change blocks. 
147 lines changed or deleted 95 lines changed or added


 concurrent_queue.h   concurrent_queue.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 32 skipping to change at line 32
this file and link it with other files to produce an executable, this this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered b y invalidate any other reasons why the executable file might be covered b y
the GNU General Public License. the GNU General Public License.
*/ */
#ifndef __TBB_concurrent_queue_H #ifndef __TBB_concurrent_queue_H
#define __TBB_concurrent_queue_H #define __TBB_concurrent_queue_H
#include "_concurrent_queue_internal.h" #include "internal/_concurrent_queue_impl.h"
namespace tbb { namespace tbb {
namespace strict_ppl { namespace strict_ppl {
//! A high-performance thread-safe non-blocking concurrent queue. //! A high-performance thread-safe non-blocking concurrent queue.
/** Multiple threads may each push and pop concurrently. /** Multiple threads may each push and pop concurrently.
Assignment construction is not allowed. Assignment construction is not allowed.
@ingroup containers */ @ingroup containers */
template<typename T, typename A = cache_aligned_allocator<T> > template<typename T, typename A = cache_aligned_allocator<T> >
class concurrent_queue: public internal::concurrent_queue_base_v3<T> { class concurrent_queue: public internal::concurrent_queue_base_v3<T> {
template<typename Container, typename Value> friend class internal::con current_queue_iterator; template<typename Container, typename Value> friend class internal::con current_queue_iterator;
//! Allocator type //! Allocator type
typedef typename A::template rebind<char>::other page_allocator_type; typedef typename A::template rebind<char>::other page_allocator_type;
page_allocator_type my_allocator; page_allocator_type my_allocator;
//! Allocates a block of size n (bytes) //! Allocates a block of size n (bytes)
/*overide*/ virtual void *allocate_block( size_t n ) { /*override*/ virtual void *allocate_block( size_t n ) {
void *b = reinterpret_cast<void*>(my_allocator.allocate( n )); void *b = reinterpret_cast<void*>(my_allocator.allocate( n ));
if( !b ) if( !b )
internal::throw_exception(internal::eid_bad_alloc); internal::throw_exception(internal::eid_bad_alloc);
return b; return b;
} }
//! Deallocates block created by allocate_block. //! Deallocates block created by allocate_block.
/*override*/ virtual void deallocate_block( void *b, size_t n ) { /*override*/ virtual void deallocate_block( void *b, size_t n ) {
my_allocator.deallocate( reinterpret_cast<char*>(b), n ); my_allocator.deallocate( reinterpret_cast<char*>(b), n );
} }
skipping to change at line 94 skipping to change at line 94
my_allocator( a ) my_allocator( a )
{ {
} }
//! [begin,end) constructor //! [begin,end) constructor
template<typename InputIterator> template<typename InputIterator>
concurrent_queue( InputIterator begin, InputIterator end, const allocat or_type& a = allocator_type()) : concurrent_queue( InputIterator begin, InputIterator end, const allocat or_type& a = allocator_type()) :
my_allocator( a ) my_allocator( a )
{ {
for( ; begin != end; ++begin ) for( ; begin != end; ++begin )
internal_push(&*begin); this->internal_push(&*begin);
} }
//! Copy constructor //! Copy constructor
concurrent_queue( const concurrent_queue& src, const allocator_type& a = allocator_type()) : concurrent_queue( const concurrent_queue& src, const allocator_type& a = allocator_type()) :
internal::concurrent_queue_base_v3<T>(), my_allocator( a ) internal::concurrent_queue_base_v3<T>(), my_allocator( a )
{ {
assign( src ); this->assign( src );
} }
//! Destroy queue //! Destroy queue
~concurrent_queue(); ~concurrent_queue();
//! Enqueue an item at tail of queue. //! Enqueue an item at tail of queue.
void push( const T& source ) { void push( const T& source ) {
internal_push( &source ); this->internal_push( &source );
} }
//! Attempt to dequeue an item from head of queue. //! Attempt to dequeue an item from head of queue.
/** Does not wait for item to become available. /** Does not wait for item to become available.
Returns true if successful; false otherwise. */ Returns true if successful; false otherwise. */
bool try_pop( T& result ) { bool try_pop( T& result ) {
return internal_try_pop( &result ); return this->internal_try_pop( &result );
} }
//! Return the number of items in the queue; thread unsafe //! Return the number of items in the queue; thread unsafe
size_type unsafe_size() const {return this->internal_size();} size_type unsafe_size() const {return this->internal_size();}
//! Equivalent to size()==0. //! Equivalent to size()==0.
bool empty() const {return this->internal_empty();} bool empty() const {return this->internal_empty();}
//! Clear the queue. not thread-safe. //! Clear the queue. not thread-safe.
void clear() ; void clear() ;
skipping to change at line 202 skipping to change at line 202
/*override*/ virtual void copy_page_item( page& dst, size_t dindex, con st page& src, size_t sindex ) { /*override*/ virtual void copy_page_item( page& dst, size_t dindex, con st page& src, size_t sindex ) {
new( &get_ref(dst,dindex) ) T( get_ref( const_cast<page&>(src), sin dex ) ); new( &get_ref(dst,dindex) ) T( get_ref( const_cast<page&>(src), sin dex ) );
} }
/*override*/ virtual void assign_and_destroy_item( void* dst, page& src , size_t index ) { /*override*/ virtual void assign_and_destroy_item( void* dst, page& src , size_t index ) {
T& from = get_ref(src,index); T& from = get_ref(src,index);
destroyer d(from); destroyer d(from);
*static_cast<T*>(dst) = from; *static_cast<T*>(dst) = from;
} }
/*overide*/ virtual page *allocate_page() { /*override*/ virtual page *allocate_page() {
size_t n = sizeof(padded_page) + (items_per_page-1)*sizeof(T); size_t n = sizeof(padded_page) + (items_per_page-1)*sizeof(T);
page *p = reinterpret_cast<page*>(my_allocator.allocate( n )); page *p = reinterpret_cast<page*>(my_allocator.allocate( n ));
if( !p ) if( !p )
internal::throw_exception(internal::eid_bad_alloc); internal::throw_exception(internal::eid_bad_alloc);
return p; return p;
} }
/*override*/ virtual void deallocate_page( page *p ) { /*override*/ virtual void deallocate_page( page *p ) {
size_t n = sizeof(padded_page) + items_per_page*sizeof(T); size_t n = sizeof(padded_page) + (items_per_page-1)*sizeof(T);
my_allocator.deallocate( reinterpret_cast<char*>(p), n ); my_allocator.deallocate( reinterpret_cast<char*>(p), n );
} }
public: public:
//! Element type in the queue. //! Element type in the queue.
typedef T value_type; typedef T value_type;
//! Allocator type //! Allocator type
typedef A allocator_type; typedef A allocator_type;
//! Reference type //! Reference type
typedef T& reference; typedef T& reference;
//! Const reference type //! Const reference type
typedef const T& const_reference; typedef const T& const_reference;
//! Integral type for representing size of the queue. //! Integral type for representing size of the queue.
/** Notice that the size_type is a signed integral type. /** Note that the size_type is a signed integral type.
This is because the size can be negative if there are pending pops without corresponding pushes. */ This is because the size can be negative if there are pending pops without corresponding pushes. */
typedef std::ptrdiff_t size_type; typedef std::ptrdiff_t size_type;
//! Difference type for iterator //! Difference type for iterator
typedef std::ptrdiff_t difference_type; typedef std::ptrdiff_t difference_type;
//! Construct empty queue //! Construct empty queue
explicit concurrent_bounded_queue(const allocator_type& a = allocator_t ype()) : explicit concurrent_bounded_queue(const allocator_type& a = allocator_t ype()) :
concurrent_queue_base_v3( sizeof(T) ), my_allocator( a ) concurrent_queue_base_v3( sizeof(T) ), my_allocator( a )
{ {
skipping to change at line 378 skipping to change at line 378
template<typename InputIterator> template<typename InputIterator>
concurrent_queue( InputIterator b /*begin*/, InputIterator e /*end*/, c onst A& a = A()) : concurrent_queue( InputIterator b /*begin*/, InputIterator e /*end*/, c onst A& a = A()) :
concurrent_bounded_queue<T,A>( b, e, a ) concurrent_bounded_queue<T,A>( b, e, a )
{ {
} }
//! Enqueue an item at tail of queue if queue is not already full. //! Enqueue an item at tail of queue if queue is not already full.
/** Does not wait for queue to become not full. /** Does not wait for queue to become not full.
Returns true if item is pushed; false if queue was already full. */ Returns true if item is pushed; false if queue was already full. */
bool push_if_not_full( const T& source ) { bool push_if_not_full( const T& source ) {
return try_push( source ); return this->try_push( source );
} }
//! Attempt to dequeue an item from head of queue. //! Attempt to dequeue an item from head of queue.
/** Does not wait for item to become available. /** Does not wait for item to become available.
Returns true if successful; false otherwise. Returns true if successful; false otherwise.
@deprecated Use try_pop() @deprecated Use try_pop()
*/ */
bool pop_if_present( T& destination ) { bool pop_if_present( T& destination ) {
return try_pop( destination ); return this->try_pop( destination );
} }
typedef typename concurrent_bounded_queue<T,A>::iterator iterator; typedef typename concurrent_bounded_queue<T,A>::iterator iterator;
typedef typename concurrent_bounded_queue<T,A>::const_iterator const_it erator; typedef typename concurrent_bounded_queue<T,A>::const_iterator const_it erator;
// //
//--------------------------------------------------------------------- --- //--------------------------------------------------------------------- ---
// The iterators are intended only for debugging. They are slow and no t thread safe. // The iterators are intended only for debugging. They are slow and no t thread safe.
//--------------------------------------------------------------------- --- //--------------------------------------------------------------------- ---
iterator begin() {return this->unsafe_begin();} iterator begin() {return this->unsafe_begin();}
iterator end() {return this->unsafe_end();} iterator end() {return this->unsafe_end();}
 End of changes. 12 change blocks. 
12 lines changed or deleted 12 lines changed or added


 concurrent_unordered_map.h   concurrent_unordered_map.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 35 skipping to change at line 35
invalidate any other reasons why the executable file might be covered b y invalidate any other reasons why the executable file might be covered b y
the GNU General Public License. the GNU General Public License.
*/ */
/* Container implementations in this header are based on PPL implementation s /* Container implementations in this header are based on PPL implementation s
provided by Microsoft. */ provided by Microsoft. */
#ifndef __TBB_concurrent_unordered_map_H #ifndef __TBB_concurrent_unordered_map_H
#define __TBB_concurrent_unordered_map_H #define __TBB_concurrent_unordered_map_H
#include "_concurrent_unordered_internal.h" #include "internal/_concurrent_unordered_impl.h"
namespace tbb namespace tbb
{ {
// Template class for hash compare
template<typename Key>
class tbb_hash
{
public:
tbb_hash() {}
size_t operator()(const Key& key) const
{
return tbb_hasher(key);
}
};
namespace interface5 { namespace interface5 {
// Template class for hash map traits // Template class for hash map traits
template<typename Key, typename T, typename Hash_compare, typename Allocato r, bool Allow_multimapping> template<typename Key, typename T, typename Hash_compare, typename Allocato r, bool Allow_multimapping>
class concurrent_unordered_map_traits class concurrent_unordered_map_traits
{ {
protected: protected:
typedef std::pair<const Key, T> value_type; typedef std::pair<const Key, T> value_type;
typedef Key key_type; typedef Key key_type;
typedef Hash_compare hash_compare; typedef Hash_compare hash_compare;
skipping to change at line 93 skipping to change at line 80
}; };
template<class Type1, class Type2> template<class Type1, class Type2>
static const Key& get_key(const std::pair<Type1, Type2>& value) { static const Key& get_key(const std::pair<Type1, Type2>& value) {
return (value.first); return (value.first);
} }
hash_compare my_hash_compare; // the comparator predicate for keys hash_compare my_hash_compare; // the comparator predicate for keys
}; };
template <typename Key, typename T, typename Hasher = tbb_hash<Key>, typena me Key_equality = std::equal_to<Key>, typename Allocator = tbb::tbb_allocat or<std::pair<const Key, T> > > template <typename Key, typename T, typename Hasher = tbb::tbb_hash<Key>, t ypename Key_equality = std::equal_to<Key>, typename Allocator = tbb::tbb_al locator<std::pair<const Key, T> > >
class concurrent_unordered_map : public internal::concurrent_unordered_base < concurrent_unordered_map_traits<Key, T, internal::hash_compare<Key, Hashe r, Key_equality>, Allocator, false> > class concurrent_unordered_map : public internal::concurrent_unordered_base < concurrent_unordered_map_traits<Key, T, internal::hash_compare<Key, Hashe r, Key_equality>, Allocator, false> >
{ {
// Base type definitions // Base type definitions
typedef internal::hash_compare<Key, Hasher, Key_equality> hash_compare; typedef internal::hash_compare<Key, Hasher, Key_equality> hash_compare;
typedef internal::concurrent_unordered_base< concurrent_unordered_map_t raits<Key, T, hash_compare, Allocator, false> > base_type; typedef internal::concurrent_unordered_base< concurrent_unordered_map_t raits<Key, T, hash_compare, Allocator, false> > base_type;
typedef concurrent_unordered_map_traits<Key, T, internal::hash_compare< Key, Hasher, Key_equality>, Allocator, false> traits_type; typedef concurrent_unordered_map_traits<Key, T, internal::hash_compare< Key, Hasher, Key_equality>, Allocator, false> traits_type;
using traits_type::my_hash_compare; using traits_type::my_hash_compare;
#if __TBB_EXTRA_DEBUG #if __TBB_EXTRA_DEBUG
public: public:
#endif #endif
 End of changes. 4 change blocks. 
16 lines changed or deleted 3 lines changed or added


 concurrent_vector.h   concurrent_vector.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 38 skipping to change at line 38
#ifndef __TBB_concurrent_vector_H #ifndef __TBB_concurrent_vector_H
#define __TBB_concurrent_vector_H #define __TBB_concurrent_vector_H
#include "tbb_stddef.h" #include "tbb_stddef.h"
#include "tbb_exception.h" #include "tbb_exception.h"
#include "atomic.h" #include "atomic.h"
#include "cache_aligned_allocator.h" #include "cache_aligned_allocator.h"
#include "blocked_range.h" #include "blocked_range.h"
#include "tbb_machine.h" #include "tbb_machine.h"
#include "tbb_profiling.h"
#include <new> #include <new>
#include <cstring> // for memset()
#if !TBB_USE_EXCEPTIONS && _MSC_VER #if !TBB_USE_EXCEPTIONS && _MSC_VER
// Suppress "C++ exception handler used, but unwind semantics are not e nabled" warning in STL headers // Suppress "C++ exception handler used, but unwind semantics are not e nabled" warning in STL headers
#pragma warning (push) #pragma warning (push)
#pragma warning (disable: 4530) #pragma warning (disable: 4530)
#endif #endif
#include <algorithm> #include <algorithm>
#include <iterator> #include <iterator>
skipping to change at line 80 skipping to change at line 82
template<typename T, class A = cache_aligned_allocator<T> > template<typename T, class A = cache_aligned_allocator<T> >
class concurrent_vector; class concurrent_vector;
//! @cond INTERNAL //! @cond INTERNAL
namespace internal { namespace internal {
//! Bad allocation marker //! Bad allocation marker
static void *const vector_allocation_error_flag = reinterpret_cast<void *>(size_t(63)); static void *const vector_allocation_error_flag = reinterpret_cast<void *>(size_t(63));
//! Routine that loads pointer from location pointed to by src without
any fence, without causing ITT to report a race.
void* __TBB_EXPORTED_FUNC itt_load_pointer_v3( const void* src );
//! Base class of concurrent vector implementation. //! Base class of concurrent vector implementation.
/** @ingroup containers */ /** @ingroup containers */
class concurrent_vector_base_v3 { class concurrent_vector_base_v3 {
protected: protected:
// Basic types declarations // Basic types declarations
typedef size_t segment_index_t; typedef size_t segment_index_t;
typedef size_t size_type; typedef size_t size_type;
// Using enumerations due to Mac linking problems of static const v ariables // Using enumerations due to Mac linking problems of static const v ariables
skipping to change at line 901 skipping to change at line 900
void copy(const void *src) { for(; i < n; ++i) new( &array[i] ) T(s tatic_cast<const T*>(src)[i]); } void copy(const void *src) { for(; i < n; ++i) new( &array[i] ) T(s tatic_cast<const T*>(src)[i]); }
void assign(const void *src) { for(; i < n; ++i) array[i] = static_ cast<const T*>(src)[i]; } void assign(const void *src) { for(; i < n; ++i) array[i] = static_ cast<const T*>(src)[i]; }
template<class I> void iterate(I &src) { for(; i < n; ++i, ++src) n ew( &array[i] ) T( *src ); } template<class I> void iterate(I &src) { for(; i < n; ++i, ++src) n ew( &array[i] ) T( *src ); }
~internal_loop_guide() { ~internal_loop_guide() {
if(i < n) // if exception raised, do zerroing on the rest of it ems if(i < n) // if exception raised, do zerroing on the rest of it ems
std::memset(array+i, 0, (n-i)*sizeof(value_type)); std::memset(array+i, 0, (n-i)*sizeof(value_type));
} }
}; };
}; };
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
#pragma warning (push)
#pragma warning (disable: 4701) // potentially uninitialized local variable
"old"
#endif
template<typename T, class A> template<typename T, class A>
void concurrent_vector<T, A>::shrink_to_fit() { void concurrent_vector<T, A>::shrink_to_fit() {
internal_segments_table old; internal_segments_table old;
__TBB_TRY { __TBB_TRY {
if( internal_compact( sizeof(T), &old, &destroy_array, &copy_array ) ) if( internal_compact( sizeof(T), &old, &destroy_array, &copy_array ) )
internal_free_segments( old.table, pointers_per_long_table, old .first_block ); // free joined and unnecessary segments internal_free_segments( old.table, pointers_per_long_table, old .first_block ); // free joined and unnecessary segments
} __TBB_CATCH(...) { } __TBB_CATCH(...) {
if( old.first_block ) // free segment allocated for compacting. Onl y for support of exceptions in ctor of user T[ype] if( old.first_block ) // free segment allocated for compacting. Onl y for support of exceptions in ctor of user T[ype]
internal_free_segments( old.table, 1, old.first_block ); internal_free_segments( old.table, 1, old.first_block );
__TBB_RETHROW(); __TBB_RETHROW();
} }
} }
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
#pragma warning (pop)
#endif // warning 4701 is back
template<typename T, class A> template<typename T, class A>
void concurrent_vector<T, A>::internal_free_segments(void *table[], segment _index_t k, segment_index_t first_block) { void concurrent_vector<T, A>::internal_free_segments(void *table[], segment _index_t k, segment_index_t first_block) {
// Free the arrays // Free the arrays
while( k > first_block ) { while( k > first_block ) {
--k; --k;
T* array = static_cast<T*>(table[k]); T* array = static_cast<T*>(table[k]);
table[k] = NULL; table[k] = NULL;
if( array > internal::vector_allocation_error_flag ) // check for c orrect segment pointer if( array > internal::vector_allocation_error_flag ) // check for c orrect segment pointer
this->my_allocator.deallocate( array, segment_size(k) ); this->my_allocator.deallocate( array, segment_size(k) );
skipping to change at line 939 skipping to change at line 945
} }
} }
template<typename T, class A> template<typename T, class A>
T& concurrent_vector<T, A>::internal_subscript( size_type index ) const { T& concurrent_vector<T, A>::internal_subscript( size_type index ) const {
__TBB_ASSERT( index < my_early_size, "index out of bounds" ); __TBB_ASSERT( index < my_early_size, "index out of bounds" );
size_type j = index; size_type j = index;
segment_index_t k = segment_base_index_of( j ); segment_index_t k = segment_base_index_of( j );
__TBB_ASSERT( (segment_t*)my_segment != my_storage || k < pointers_per_ short_table, "index is being allocated" ); __TBB_ASSERT( (segment_t*)my_segment != my_storage || k < pointers_per_ short_table, "index is being allocated" );
// no need in __TBB_load_with_acquire since thread works in own space o r gets // no need in __TBB_load_with_acquire since thread works in own space o r gets
#if TBB_USE_THREADING_TOOLS T* array = static_cast<T*>( tbb::internal::itt_hide_load_word(my_segmen
T* array = static_cast<T*>( tbb::internal::itt_load_pointer_v3(&my_segm t[k].array));
ent[k].array));
#else
T* array = static_cast<T*>(my_segment[k].array);
#endif /* TBB_USE_THREADING_TOOLS */
__TBB_ASSERT( array != internal::vector_allocation_error_flag, "the ins tance is broken by bad allocation. Use at() instead" ); __TBB_ASSERT( array != internal::vector_allocation_error_flag, "the ins tance is broken by bad allocation. Use at() instead" );
__TBB_ASSERT( array, "index is being allocated" ); __TBB_ASSERT( array, "index is being allocated" );
return array[j]; return array[j];
} }
template<typename T, class A> template<typename T, class A>
T& concurrent_vector<T, A>::internal_subscript_with_exceptions( size_type i ndex ) const { T& concurrent_vector<T, A>::internal_subscript_with_exceptions( size_type i ndex ) const {
if( index >= my_early_size ) if( index >= my_early_size )
internal::throw_exception(internal::eid_out_of_range); // throw std ::out_of_range internal::throw_exception(internal::eid_out_of_range); // throw std ::out_of_range
size_type j = index; size_type j = index;
 End of changes. 7 change blocks. 
11 lines changed or deleted 13 lines changed or added


 condition_variable   condition_variable 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 33 skipping to change at line 33
file does not by itself cause the resulting executable to be covered by file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered b y invalidate any other reasons why the executable file might be covered b y
the GNU General Public License. the GNU General Public License.
*/ */
#ifndef __TBB_condition_variable_H #ifndef __TBB_condition_variable_H
#define __TBB_condition_variable_H #define __TBB_condition_variable_H
#if _WIN32||_WIN64 #if _WIN32||_WIN64
#include <windows.h> #include "../machine/windows_api.h"
namespace tbb { namespace tbb {
namespace interface5 { namespace interface5 {
namespace internal { namespace internal {
struct condition_variable_using_event struct condition_variable_using_event
{ {
//! Event for blocking waiting threads. //! Event for blocking waiting threads.
HANDLE event; HANDLE event;
//! Protects invariants involving n_waiters, release_count, and epoch. //! Protects invariants involving n_waiters, release_count, and epoch.
CRITICAL_SECTION mutex; CRITICAL_SECTION mutex;
skipping to change at line 59 skipping to change at line 59
unsigned epoch; unsigned epoch;
}; };
}}} // namespace tbb::interface5::internal }}} // namespace tbb::interface5::internal
#ifndef CONDITION_VARIABLE_INIT #ifndef CONDITION_VARIABLE_INIT
typedef void* CONDITION_VARIABLE; typedef void* CONDITION_VARIABLE;
typedef CONDITION_VARIABLE* PCONDITION_VARIABLE; typedef CONDITION_VARIABLE* PCONDITION_VARIABLE;
#endif #endif
#else /* if not _WIN32||_WIN64 */ #else /* if not _WIN32||_WIN64 */
#include <errno.h> // some systems need it for ETIMEDOUT
#include <pthread.h> #include <pthread.h>
#if __linux__
#include <ctime>
#else /* generic Unix */
#include <sys/time.h>
#endif
#endif /* _WIN32||_WIN64 */ #endif /* _WIN32||_WIN64 */
#include "../tbb_stddef.h" #include "../tbb_stddef.h"
#include "../mutex.h" #include "../mutex.h"
#include "../tbb_thread.h" #include "../tbb_thread.h"
#include "../tbb_exception.h" #include "../tbb_exception.h"
#include "../tbb_profiling.h" #include "../tbb_profiling.h"
namespace tbb { namespace tbb {
skipping to change at line 208 skipping to change at line 214
mutex_type* o_pm = pm; mutex_type* o_pm = pm;
pm = NULL; pm = NULL;
owns = false; owns = false;
return o_pm; return o_pm;
} }
// 30.4.3.2.4 observers // 30.4.3.2.4 observers
//! Does this lock own the mutex? //! Does this lock own the mutex?
bool owns_lock() const { return owns; } bool owns_lock() const { return owns; }
// TODO: Un-comment 'explicit' when the last non-C++0x compiler support is dropped
//! Does this lock own the mutex? //! Does this lock own the mutex?
/*explicit*/ operator bool() const { return owns; } /*explicit*/ operator bool() const { return owns; }
//! Return the mutex that this lock currently has. //! Return the mutex that this lock currently has.
mutex_type* mutex() const { return pm; } mutex_type* mutex() const { return pm; }
private: private:
mutex_type* pm; mutex_type* pm;
bool owns; bool owns;
}; };
skipping to change at line 248 skipping to change at line 255
template<typename M> template<typename M>
void swap(unique_lock<M>& x, unique_lock<M>& y) { x.swap( y ); } void swap(unique_lock<M>& x, unique_lock<M>& y) { x.swap( y ); }
namespace internal { namespace internal {
#if _WIN32||_WIN64 #if _WIN32||_WIN64
union condvar_impl_t { union condvar_impl_t {
condition_variable_using_event cv_event; condition_variable_using_event cv_event;
CONDITION_VARIABLE cv_native; CONDITION_VARIABLE cv_native;
}; };
void __TBB_EXPORTED_FUNC internal_initialize_condition_variable( condvar_im pl_t& cv ); void __TBB_EXPORTED_FUNC internal_initialize_condition_variable( condvar_im pl_t& cv );
void __TBB_EXPORTED_FUNC internal_destroy_condition_variable( condvar_im pl_t& cv ); void __TBB_EXPORTED_FUNC internal_destroy_condition_variable( condvar_im pl_t& cv );
void __TBB_EXPORTED_FUNC internal_condition_variable_notify_one( condvar_im pl_t& cv ); void __TBB_EXPORTED_FUNC internal_condition_variable_notify_one( condvar_im pl_t& cv );
void __TBB_EXPORTED_FUNC internal_condition_variable_notify_all( condvar_im pl_t& cv ); void __TBB_EXPORTED_FUNC internal_condition_variable_notify_all( condvar_im pl_t& cv );
bool __TBB_EXPORTED_FUNC internal_condition_variable_wait( condvar_impl_t& cv, mutex* mtx, const tick_count::interval_t* i = NULL ); bool __TBB_EXPORTED_FUNC internal_condition_variable_wait( condvar_impl_t& cv, mutex* mtx, const tick_count::interval_t* i = NULL );
#else /* if !(_WIN32||_WIN64), i.e., POSIX threads */ #else /* if !(_WIN32||_WIN64), i.e., POSIX threads */
typedef pthread_cond_t condvar_impl_t; typedef pthread_cond_t condvar_impl_t;
#endif #endif
} // namespace internal } // namespace internal
//! cv_status //! cv_status
/** C++0x standard working draft 30.5 */ /** C++0x standard working draft 30.5 */
enum cv_status { no_timeout, timeout }; enum cv_status { no_timeout, timeout };
skipping to change at line 374 skipping to change at line 381
if( ec==WAIT_TIMEOUT || ec==ERROR_TIMEOUT ) if( ec==WAIT_TIMEOUT || ec==ERROR_TIMEOUT )
rc = timeout; rc = timeout;
else { else {
lock.owns = true; lock.owns = true;
throw_exception_v4( tbb::internal::eid_condvar_wait_failed ); throw_exception_v4( tbb::internal::eid_condvar_wait_failed );
} }
} }
lock.owns = true; lock.owns = true;
return rc; return rc;
} }
#else
#if __linux__
#include <ctime>
#else /* generic Unix */
#include <sys/time.h>
#include <errno.h>
#endif
#else /* !(_WIN32||_WIN64) */
inline void condition_variable::wait( unique_lock<mutex>& lock ) inline void condition_variable::wait( unique_lock<mutex>& lock )
{ {
__TBB_ASSERT( lock.owns, NULL ); __TBB_ASSERT( lock.owns, NULL );
lock.owns = false; lock.owns = false;
if( pthread_cond_wait( &my_cv, lock.mutex()->native_handle() ) ) { if( pthread_cond_wait( &my_cv, lock.mutex()->native_handle() ) ) {
lock.owns = true; lock.owns = true;
throw_exception_v4( tbb::internal::eid_condvar_wait_failed ); throw_exception_v4( tbb::internal::eid_condvar_wait_failed );
} }
// upon successful return, the mutex has been locked and is owned by th e calling thread. // upon successful return, the mutex has been locked and is owned by th e calling thread.
lock.owns = true; lock.owns = true;
 End of changes. 9 change blocks. 
10 lines changed or deleted 11 lines changed or added


 critical_section.h   critical_section.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 33 skipping to change at line 33
file does not by itself cause the resulting executable to be covered by file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered b y invalidate any other reasons why the executable file might be covered b y
the GNU General Public License. the GNU General Public License.
*/ */
#ifndef _TBB_CRITICAL_SECTION_H_ #ifndef _TBB_CRITICAL_SECTION_H_
#define _TBB_CRITICAL_SECTION_H_ #define _TBB_CRITICAL_SECTION_H_
#if _WIN32||_WIN64 #if _WIN32||_WIN64
#include <windows.h> #include "machine/windows_api.h"
#else #else
#include <pthread.h> #include <pthread.h>
#include <errno.h> #include <errno.h>
#endif // _WIN32||WIN64 #endif // _WIN32||WIN64
#include "tbb_stddef.h" #include "tbb_stddef.h"
#include "tbb_thread.h" #include "tbb_thread.h"
#include "tbb_exception.h" #include "tbb_exception.h"
#include "tbb_profiling.h" #include "tbb_profiling.h"
 End of changes. 2 change blocks. 
2 lines changed or deleted 2 lines changed or added


 enumerable_thread_specific.h   enumerable_thread_specific.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 34 skipping to change at line 34
the GNU General Public License. This exception does not however the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered b y invalidate any other reasons why the executable file might be covered b y
the GNU General Public License. the GNU General Public License.
*/ */
#ifndef __TBB_enumerable_thread_specific_H #ifndef __TBB_enumerable_thread_specific_H
#define __TBB_enumerable_thread_specific_H #define __TBB_enumerable_thread_specific_H
#include "concurrent_vector.h" #include "concurrent_vector.h"
#include "tbb_thread.h" #include "tbb_thread.h"
#include "tbb_allocator.h"
#include "cache_aligned_allocator.h" #include "cache_aligned_allocator.h"
#if __SUNPRO_CC #include "aligned_space.h"
#include <string.h> // for memcpy #include <string.h> // for memcpy
#endif
#if _WIN32||_WIN64 #if _WIN32||_WIN64
#include <windows.h> #include "machine/windows_api.h"
#else #else
#include <pthread.h> #include <pthread.h>
#endif #endif
namespace tbb { namespace tbb {
//! enum for selecting between single key and key-per-instance versions //! enum for selecting between single key and key-per-instance versions
enum ets_key_usage_type { ets_key_per_instance, ets_no_key }; enum ets_key_usage_type { ets_key_per_instance, ets_no_key };
namespace interface5 { namespace interface6 {
//! @cond //! @cond
namespace internal { namespace internal {
template<ets_key_usage_type ETS_key_type> template<ets_key_usage_type ETS_key_type>
class ets_base: tbb::internal::no_copy { class ets_base: tbb::internal::no_copy {
protected: protected:
#if _WIN32||_WIN64 #if _WIN32||_WIN64
typedef DWORD key_type; typedef DWORD key_type;
#else #else
skipping to change at line 87 skipping to change at line 87
return h>>(8*sizeof(size_t)-lg_size); return h>>(8*sizeof(size_t)-lg_size);
} }
}; };
struct slot { struct slot {
key_type key; key_type key;
void* ptr; void* ptr;
bool empty() const {return !key;} bool empty() const {return !key;}
bool match( key_type k ) const {return key==k;} bool match( key_type k ) const {return key==k;}
bool claim( key_type k ) { bool claim( key_type k ) {
__TBB_ASSERT(sizeof(tbb::atomic<key_type>)==sizeof(key_ type), NULL); __TBB_ASSERT(sizeof(tbb::atomic<key_type>)==sizeof(key_ type), NULL);
__TBB_ASSERT(sizeof(void*)==sizeof(tbb::atomic<key_type return tbb::internal::punned_cast<tbb::atomic<key_type>
>*), NULL); *>(&key)->compare_and_swap(k,0)==0;
union { void* space; tbb::atomic<key_type>* key_atomic;
} helper;
helper.space = &key;
return helper.key_atomic->compare_and_swap(k,0)==0;
} }
}; };
#if __TBB_GCC_3_3_PROTECTED_BROKEN #if __TBB_GCC_3_3_PROTECTED_BROKEN
protected: protected:
#endif #endif
static key_type key_of_current_thread() { static key_type key_of_current_thread() {
tbb::tbb_thread::id id = tbb::this_tbb_thread::get_id(); tbb::tbb_thread::id id = tbb::this_tbb_thread::get_id();
key_type k; key_type k;
memcpy( &k, &id, sizeof(k) ); memcpy( &k, &id, sizeof(k) );
skipping to change at line 235 skipping to change at line 232
size_t mask = ir->mask(); size_t mask = ir->mask();
for(size_t i = ir->start(h);;i=(i+1)&mask) { for(size_t i = ir->start(h);;i=(i+1)&mask) {
slot& s = ir->at(i); slot& s = ir->at(i);
if( s.empty() ) { if( s.empty() ) {
if( s.claim(k) ) { if( s.claim(k) ) {
s.ptr = found; s.ptr = found;
return found; return found;
} }
} }
} }
}; }
//! Specialization that exploits native TLS //! Specialization that exploits native TLS
template <> template <>
class ets_base<ets_key_per_instance>: protected ets_base<ets_no_key > { class ets_base<ets_key_per_instance>: protected ets_base<ets_no_key > {
typedef ets_base<ets_no_key> super; typedef ets_base<ets_no_key> super;
#if _WIN32||_WIN64 #if _WIN32||_WIN64
typedef DWORD tls_key_t; typedef DWORD tls_key_t;
void create_key() { my_key = TlsAlloc(); } void create_key() { my_key = TlsAlloc(); }
void destroy_key() { TlsFree(my_key); } void destroy_key() { TlsFree(my_key); }
void set_tls(void * value) { TlsSetValue(my_key, (LPVOID)value) ; } void set_tls(void * value) { TlsSetValue(my_key, (LPVOID)value) ; }
skipping to change at line 581 skipping to change at line 578
return i.inner_iter == j.inner_iter; return i.inner_iter == j.inner_iter;
} }
// != // !=
template<typename SegmentedContainer, typename T, typename U> template<typename SegmentedContainer, typename T, typename U>
bool operator!=( const segmented_iterator<SegmentedContainer,T>& i, bool operator!=( const segmented_iterator<SegmentedContainer,T>& i,
const segmented_iterator<SegmentedContainer,U>& j ) { const segmented_iterator<SegmentedContainer,U>& j ) {
return !(i==j); return !(i==j);
} }
template<typename T>
struct destruct_only: tbb::internal::no_copy {
tbb::aligned_space<T,1> value;
~destruct_only() {value.begin()[0].~T();}
};
template<typename T>
struct construct_by_default: tbb::internal::no_assign {
void construct(void*where) {new(where) T();} // C++ note: the (
) in T() ensure zero initialization.
construct_by_default( int ) {}
};
template<typename T>
struct construct_by_exemplar: tbb::internal::no_assign {
const T exemplar;
void construct(void*where) {new(where) T(exemplar);}
construct_by_exemplar( const T& t ) : exemplar(t) {}
};
template<typename T, typename Finit>
struct construct_by_finit: tbb::internal::no_assign {
Finit f;
void construct(void* where) {new(where) T(f());}
construct_by_finit( const Finit& f_ ) : f(f_) {}
};
// storage for initialization function pointer // storage for initialization function pointer
template<typename T> template<typename T>
struct callback_base { class callback_base {
virtual T apply( ) = 0; public:
virtual void destroy( ) = 0; // Clone *this
// need to be able to create copies of callback_base for copy c virtual callback_base* clone() = 0;
onstructor // Destruct and free *this
virtual callback_base* make_copy() = 0; virtual void destroy() = 0;
// need virtual destructor to satisfy GCC compiler warning // Need virtual destructor to satisfy GCC compiler warning
virtual ~callback_base() { } virtual ~callback_base() { }
// Construct T at where
virtual void construct(void* where) = 0;
}; };
template <typename T, typename Functor> template <typename T, typename Constructor>
struct callback_leaf : public callback_base<T>, public tbb::interna class callback_leaf: public callback_base<T>, Constructor {
l::no_copy { template<typename X> callback_leaf( const X& x ) : Constructor(
typedef Functor my_callback_type; x) {}
typedef callback_leaf<T,Functor> my_type;
typedef my_type* callback_pointer; typedef typename tbb::tbb_allocator<callback_leaf> my_allocator
typedef typename tbb::tbb_allocator<my_type> my_allocator_type; _type;
Functor f;
callback_leaf( const Functor& f_) : f(f_) { /*override*/ callback_base<T>* clone() {
} void* where = my_allocator_type().allocate(1);
return new(where) callback_leaf(*this);
static callback_pointer new_callback(const Functor& f_ ) { }
void* new_void = my_allocator_type().allocate(1);
callback_pointer new_cb = new (new_void) callback_leaf<T,Fu /*override*/ void destroy() {
nctor>(f_); // placement new my_allocator_type().destroy(this);
return new_cb; my_allocator_type().deallocate(this,1);
} }
/* override */ callback_pointer make_copy() { /*override*/ void construct(void* where) {
return new_callback( f ); Constructor::construct(where);
} }
public:
/* override */ void destroy( ) { template<typename X>
callback_pointer my_ptr = this; static callback_base<T>* make( const X& x ) {
my_allocator_type().destroy(my_ptr); void* where = my_allocator_type().allocate(1);
my_allocator_type().deallocate(my_ptr,1); return new(where) callback_leaf(x);
} }
/* override */ T apply() { return f(); } // does copy construc
tion of returned value.
}; };
//! Template for adding padding in order to avoid false sharing //! Template for adding padding in order to avoid false sharing
/** ModularSize should be sizeof(U) modulo the cache line size. /** ModularSize should be sizeof(U) modulo the cache line size.
All maintenance of the space will be done explicitly on push_ba ck, All maintenance of the space will be done explicitly on push_ba ck,
and all thread local copies must be destroyed before the concur rent and all thread local copies must be destroyed before the concur rent
vector is deleted. vector is deleted.
*/ */
template<typename U, size_t ModularSize> template<typename U, size_t ModularSize>
struct ets_element { struct ets_element {
char value[sizeof(U) + tbb::internal::NFS_MaxLineSize-ModularSi ze]; char value[ModularSize==0 ? sizeof(U) : sizeof(U)+(tbb::interna l::NFS_MaxLineSize-ModularSize)];
void unconstruct() { void unconstruct() {
// "reinterpret_cast<U*>(&value)->~U();" causes type-punnin tbb::internal::punned_cast<U*>(&value)->~U();
g warning with gcc 4.4,
// "U* u = reinterpret_cast<U*>(&value); u->~U();" causes u
nused variable warning with VS2010.
// Thus another "casting via union" hack.
__TBB_ASSERT(sizeof(void*)==sizeof(U*),NULL);
union { void* space; U* val; } helper;
helper.space = &value;
helper.val->~U();
}
};
//! Partial specialization for case where no padding is needed.
template<typename U>
struct ets_element<U,0> {
char value[sizeof(U)];
void unconstruct() { // Same implementation as in general case
__TBB_ASSERT(sizeof(void*)==sizeof(U*),NULL);
union { void* space; U* val; } helper;
helper.space = &value;
helper.val->~U();
} }
}; };
} // namespace internal } // namespace internal
//! @endcond //! @endcond
//! The enumerable_thread_specific container //! The enumerable_thread_specific container
/** enumerable_thread_specific has the following properties: /** enumerable_thread_specific has the following properties:
- thread-local copies are lazily created, with default, exemplar or function initialization. - thread-local copies are lazily created, with default, exemplar or function initialization.
- thread-local copies do not move (during lifetime, and excepting c lear()) so the address of a copy is invariant. - thread-local copies do not move (during lifetime, and excepting c lear()) so the address of a copy is invariant.
skipping to change at line 702 skipping to change at line 709
typedef ptrdiff_t difference_type; typedef ptrdiff_t difference_type;
generic_range_type( I begin_, I end_, size_t grainsize_ = 1) : blocked_range<I>(begin_,end_,grainsize_) {} generic_range_type( I begin_, I end_, size_t grainsize_ = 1) : blocked_range<I>(begin_,end_,grainsize_) {}
template<typename U> template<typename U>
generic_range_type( const generic_range_type<U>& r) : blocked_r ange<I>(r.begin(),r.end(),r.grainsize()) {} generic_range_type( const generic_range_type<U>& r) : blocked_r ange<I>(r.begin(),r.end(),r.grainsize()) {}
generic_range_type( generic_range_type& r, split ) : blocked_ra nge<I>(r,split()) {} generic_range_type( generic_range_type& r, split ) : blocked_ra nge<I>(r,split()) {}
}; };
typedef typename Allocator::template rebind< padded_element >::othe r padded_allocator_type; typedef typename Allocator::template rebind< padded_element >::othe r padded_allocator_type;
typedef tbb::concurrent_vector< padded_element, padded_allocator_ty pe > internal_collection_type; typedef tbb::concurrent_vector< padded_element, padded_allocator_ty pe > internal_collection_type;
internal::callback_base<T> *my_finit_callback; internal::callback_base<T> *my_construct_callback;
// need to use a pointed-to exemplar because T may not be assignabl
e.
// using tbb_allocator instead of padded_element_allocator because
we may be
// copying an exemplar from one instantiation of ETS to another wit
h a different
// allocator.
typedef typename tbb::tbb_allocator<padded_element > exemplar_alloc
ator_type;
static padded_element * create_exemplar(const T& my_value) {
padded_element *new_exemplar = reinterpret_cast<padded_element
*>(exemplar_allocator_type().allocate(1));
new(new_exemplar->value) T(my_value);
return new_exemplar;
}
static padded_element *create_exemplar( ) {
padded_element *new_exemplar = reinterpret_cast<padded_element
*>(exemplar_allocator_type().allocate(1));
new(new_exemplar->value) T( );
return new_exemplar;
}
static void free_exemplar(padded_element *my_ptr) {
my_ptr->unconstruct();
exemplar_allocator_type().destroy(my_ptr);
exemplar_allocator_type().deallocate(my_ptr,1);
}
padded_element* my_exemplar_ptr;
internal_collection_type my_locals; internal_collection_type my_locals;
/*override*/ void* create_local() { /*override*/ void* create_local() {
#if TBB_DEPRECATED #if TBB_DEPRECATED
void* lref = &my_locals[my_locals.push_back(padded_element())]; void* lref = &my_locals[my_locals.push_back(padded_element())];
#else #else
void* lref = &*my_locals.push_back(padded_element()); void* lref = &*my_locals.push_back(padded_element());
#endif #endif
if(my_finit_callback) { my_construct_callback->construct(lref);
new(lref) T(my_finit_callback->apply());
} else if(my_exemplar_ptr) {
pointer t_exemp = reinterpret_cast<T *>(&(my_exemplar_ptr->
value));
new(lref) T(*t_exemp);
} else {
new(lref) T();
}
return lref; return lref;
} }
void unconstruct_locals() { void unconstruct_locals() {
for(typename internal_collection_type::iterator cvi = my_locals .begin(); cvi != my_locals.end(); ++cvi) { for(typename internal_collection_type::iterator cvi = my_locals .begin(); cvi != my_locals.end(); ++cvi) {
cvi->unconstruct(); cvi->unconstruct();
} }
} }
typedef typename Allocator::template rebind< uintptr_t >::other arr ay_allocator_type; typedef typename Allocator::template rebind< uintptr_t >::other arr ay_allocator_type;
skipping to change at line 787 skipping to change at line 762
typedef typename internal_collection_type::difference_type differen ce_type; typedef typename internal_collection_type::difference_type differen ce_type;
// Iterator types // Iterator types
typedef typename internal::enumerable_thread_specific_iterator< int ernal_collection_type, value_type > iterator; typedef typename internal::enumerable_thread_specific_iterator< int ernal_collection_type, value_type > iterator;
typedef typename internal::enumerable_thread_specific_iterator< int ernal_collection_type, const value_type > const_iterator; typedef typename internal::enumerable_thread_specific_iterator< int ernal_collection_type, const value_type > const_iterator;
// Parallel range types // Parallel range types
typedef generic_range_type< iterator > range_type; typedef generic_range_type< iterator > range_type;
typedef generic_range_type< const_iterator > const_range_type; typedef generic_range_type< const_iterator > const_range_type;
//! Default constructor, which leads to default construction of loc //! Default constructor. Each local instance of T is default const
al copies ructed.
enumerable_thread_specific() : my_finit_callback(0) { enumerable_thread_specific() :
my_exemplar_ptr = 0; my_construct_callback( internal::callback_leaf<T,internal::cons
} truct_by_default<T> >::make(/*dummy argument*/0) )
{}
//! construction with initializer method //! Constructor with initializer functor. Each local instance of T
// Finit should be a function taking 0 parameters and returning a T is constructed by T(finit()).
template <typename Finit> template <typename Finit>
enumerable_thread_specific( Finit _finit ) enumerable_thread_specific( Finit finit ) :
{ my_construct_callback( internal::callback_leaf<T,internal::cons
my_finit_callback = internal::callback_leaf<T,Finit>::new_callb truct_by_finit<T,Finit> >::make( finit ) )
ack( _finit ); {}
my_exemplar_ptr = 0; // don't need exemplar if function is prov
ided //! Constuctor with exemplar. Each local instance of T is copied-c
} onstructed from the exemplar.
enumerable_thread_specific(const T& exemplar) :
//! Constuction with exemplar, which leads to copy construction of my_construct_callback( internal::callback_leaf<T,internal::cons
local copies truct_by_exemplar<T> >::make( exemplar ) )
enumerable_thread_specific(const T &_exemplar) : my_finit_callback( {}
0) {
my_exemplar_ptr = create_exemplar(_exemplar);
}
//! Destructor //! Destructor
~enumerable_thread_specific() { ~enumerable_thread_specific() {
if(my_finit_callback) { my_construct_callback->destroy();
my_finit_callback->destroy();
}
if(my_exemplar_ptr) {
free_exemplar(my_exemplar_ptr);
}
this->clear(); // deallocation before the derived class is fin ished destructing this->clear(); // deallocation before the derived class is fin ished destructing
// So free(array *) is still accessible // So free(array *) is still accessible
} }
//! returns reference to local, discarding exists //! returns reference to local, discarding exists
reference local() { reference local() {
bool exists; bool exists;
return local(exists); return local(exists);
} }
//! Returns reference to calling thread's local copy, creating one if necessary //! Returns reference to calling thread's local copy, creating one if necessary
reference local(bool& exists) { reference local(bool& exists) {
__TBB_ASSERT(ETS_key_type==ets_no_key,"ets_key_per_instance not yet implemented");
void* ptr = this->table_lookup(exists); void* ptr = this->table_lookup(exists);
return *(T*)ptr; return *(T*)ptr;
} }
//! Get the number of local copies //! Get the number of local copies
size_type size() const { return my_locals.size(); } size_type size() const { return my_locals.size(); }
//! true if there have been no local copies created //! true if there have been no local copies created
bool empty() const { return my_locals.empty(); } bool empty() const { return my_locals.empty(); }
skipping to change at line 888 skipping to change at line 854
internal_copy(other); internal_copy(other);
} }
private: private:
template<typename U, typename A2, ets_key_usage_type C2> template<typename U, typename A2, ets_key_usage_type C2>
enumerable_thread_specific & enumerable_thread_specific &
internal_assign(const enumerable_thread_specific<U, A2, C2>& other) { internal_assign(const enumerable_thread_specific<U, A2, C2>& other) {
if(static_cast<void *>( this ) != static_cast<const void *>( &o ther )) { if(static_cast<void *>( this ) != static_cast<const void *>( &o ther )) {
this->clear(); this->clear();
if(my_finit_callback) { my_construct_callback->destroy();
my_finit_callback->destroy(); my_construct_callback = 0;
my_finit_callback = 0;
}
if(my_exemplar_ptr) {
free_exemplar(my_exemplar_ptr);
my_exemplar_ptr = 0;
}
internal_copy( other ); internal_copy( other );
} }
return *this; return *this;
} }
public: public:
// assignment // assignment
enumerable_thread_specific& operator=(const enumerable_thread_speci fic& other) { enumerable_thread_specific& operator=(const enumerable_thread_speci fic& other) {
return internal_assign(other); return internal_assign(other);
skipping to change at line 918 skipping to change at line 878
template<typename U, typename Alloc, ets_key_usage_type Cachetype> template<typename U, typename Alloc, ets_key_usage_type Cachetype>
enumerable_thread_specific& operator=(const enumerable_thread_speci fic<U, Alloc, Cachetype>& other) enumerable_thread_specific& operator=(const enumerable_thread_speci fic<U, Alloc, Cachetype>& other)
{ {
return internal_assign(other); return internal_assign(other);
} }
// combine_func_t has signature T(T,T) or T(const T&, const T&) // combine_func_t has signature T(T,T) or T(const T&, const T&)
template <typename combine_func_t> template <typename combine_func_t>
T combine(combine_func_t f_combine) { T combine(combine_func_t f_combine) {
if(begin() == end()) { if(begin() == end()) {
if(my_finit_callback) { internal::destruct_only<T> location;
return my_finit_callback->apply(); my_construct_callback->construct(location.value.begin());
} return *location.value.begin();
pointer local_ref = reinterpret_cast<T*>((my_exemplar_ptr->
value));
return T(*local_ref);
} }
const_iterator ci = begin(); const_iterator ci = begin();
T my_result = *ci; T my_result = *ci;
while(++ci != end()) while(++ci != end())
my_result = f_combine( my_result, *ci ); my_result = f_combine( my_result, *ci );
return my_result; return my_result;
} }
// combine_func_t has signature void(T) or void(const T&) // combine_func_t has signature void(T) or void(const T&)
template <typename combine_func_t> template <typename combine_func_t>
skipping to change at line 944 skipping to change at line 902
for(const_iterator ci = begin(); ci != end(); ++ci) { for(const_iterator ci = begin(); ci != end(); ++ci) {
f_combine( *ci ); f_combine( *ci );
} }
} }
}; // enumerable_thread_specific }; // enumerable_thread_specific
template <typename T, typename Allocator, ets_key_usage_type ETS_key_ty pe> template <typename T, typename Allocator, ets_key_usage_type ETS_key_ty pe>
template<typename U, typename A2, ets_key_usage_type C2> template<typename U, typename A2, ets_key_usage_type C2>
void enumerable_thread_specific<T,Allocator,ETS_key_type>::internal_cop y( const enumerable_thread_specific<U, A2, C2>& other) { void enumerable_thread_specific<T,Allocator,ETS_key_type>::internal_cop y( const enumerable_thread_specific<U, A2, C2>& other) {
// Initialize my_construct_callback first, so that it is valid even
if rest of this routine throws an exception.
my_construct_callback = other.my_construct_callback->clone();
typedef internal::ets_base<ets_no_key> base; typedef internal::ets_base<ets_no_key> base;
__TBB_ASSERT(my_locals.size()==0,NULL); __TBB_ASSERT(my_locals.size()==0,NULL);
this->table_reserve_for_copy( other ); this->table_reserve_for_copy( other );
for( base::array* r=other.my_root; r; r=r->next ) { for( base::array* r=other.my_root; r; r=r->next ) {
for( size_t i=0; i<r->size(); ++i ) { for( size_t i=0; i<r->size(); ++i ) {
base::slot& s1 = r->at(i); base::slot& s1 = r->at(i);
if( !s1.empty() ) { if( !s1.empty() ) {
base::slot& s2 = this->table_find(s1.key); base::slot& s2 = this->table_find(s1.key);
if( s2.empty() ) { if( s2.empty() ) {
#if TBB_DEPRECATED #if TBB_DEPRECATED
skipping to change at line 966 skipping to change at line 927
void* lref = &*my_locals.push_back(padded_element() ); void* lref = &*my_locals.push_back(padded_element() );
#endif #endif
s2.ptr = new(lref) T(*(U*)s1.ptr); s2.ptr = new(lref) T(*(U*)s1.ptr);
s2.key = s1.key; s2.key = s1.key;
} else { } else {
// Skip the duplicate // Skip the duplicate
} }
} }
} }
} }
if(other.my_finit_callback) {
my_finit_callback = other.my_finit_callback->make_copy();
} else {
my_finit_callback = 0;
}
if(other.my_exemplar_ptr) {
pointer local_ref = reinterpret_cast<U*>(other.my_exemplar_ptr-
>value);
my_exemplar_ptr = create_exemplar(*local_ref);
} else {
my_exemplar_ptr = 0;
}
} }
template< typename Container > template< typename Container >
class flattened2d { class flattened2d {
// This intermediate typedef is to address issues with VC7.1 compil ers // This intermediate typedef is to address issues with VC7.1 compil ers
typedef typename Container::value_type conval_type; typedef typename Container::value_type conval_type;
public: public:
skipping to change at line 1037 skipping to change at line 987
template <typename Container> template <typename Container>
flattened2d<Container> flatten2d(const Container &c, const typename Con tainer::const_iterator b, const typename Container::const_iterator e) { flattened2d<Container> flatten2d(const Container &c, const typename Con tainer::const_iterator b, const typename Container::const_iterator e) {
return flattened2d<Container>(c, b, e); return flattened2d<Container>(c, b, e);
} }
template <typename Container> template <typename Container>
flattened2d<Container> flatten2d(const Container &c) { flattened2d<Container> flatten2d(const Container &c) {
return flattened2d<Container>(c); return flattened2d<Container>(c);
} }
} // interface5 } // interface6
namespace internal { namespace internal {
using interface5::internal::segmented_iterator; using interface6::internal::segmented_iterator;
} }
using interface5::enumerable_thread_specific; using interface6::enumerable_thread_specific;
using interface5::flattened2d; using interface6::flattened2d;
using interface5::flatten2d; using interface6::flatten2d;
} // namespace tbb } // namespace tbb
#endif #endif
 End of changes. 28 change blocks. 
170 lines changed or deleted 109 lines changed or added


 ibm_aix51.h   ibm_aix51.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 29 skipping to change at line 29
As a special exception, you may use this file as part of a free softwar e As a special exception, you may use this file as part of a free softwar e
library without restriction. Specifically, if other files instantiate library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you comp ile templates or use macros or inline functions from this file, or you comp ile
this file and link it with other files to produce an executable, this this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered b y invalidate any other reasons why the executable file might be covered b y
the GNU General Public License. the GNU General Public License.
*/ */
#ifndef __TBB_machine_H // TODO: revise by comparing with mac_ppc.h
#error Do not include this file directly; include tbb_machine.h instead
#if !defined(__TBB_machine_H) || defined(__TBB_machine_ibm_aix51_H)
#error Do not #include this internal file directly; use public TBB headers
instead.
#endif #endif
#define __TBB_machine_ibm_aix51_H
#define __TBB_WORDSIZE 8 #define __TBB_WORDSIZE 8
#define __TBB_BIG_ENDIAN 1 #define __TBB_BIG_ENDIAN 1
#include <stdint.h> #include <stdint.h>
#include <unistd.h> #include <unistd.h>
#include <sched.h> #include <sched.h>
extern "C" { extern "C" {
int32_t __TBB_machine_cas_32 (volatile void* ptr, int32_t value, int32_t co mparand); int32_t __TBB_machine_cas_32 (volatile void* ptr, int32_t value, int32_t co mparand);
int64_t __TBB_machine_cas_64 (volatile void* ptr, int64_t value, int64_t co mparand); int64_t __TBB_machine_cas_64 (volatile void* ptr, int64_t value, int64_t co mparand);
#define __TBB_fence_for_acquire() __TBB_machine_flush () void __TBB_machine_flush ();
#define __TBB_fence_for_release() __TBB_machine_flush () void __TBB_machine_lwsync ();
void __TBB_machine_isync ();
} }
#define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cas_32(P,V,C) // Mapping of old entry point names retained for the sake of backward binar
#define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cas_64(P,V,C) y compatibility
#define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cas_64(P,V,C) #define __TBB_machine_cmpswp4 __TBB_machine_cas_32
#define __TBB_machine_cmpswp8 __TBB_machine_cas_64
#define __TBB_Yield() sched_yield() #define __TBB_Yield() sched_yield()
#define __TBB_USE_GENERIC_PART_WORD_CAS 1
#define __TBB_USE_GENERIC_FETCH_ADD 1
#define __TBB_USE_GENERIC_FETCH_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
#if __GNUC__
#define __TBB_control_consistency_helper() __asm__ __volatile__( "isync
": : :"memory")
#define __TBB_acquire_consistency_helper() __asm__ __volatile__("lwsync
": : :"memory")
#define __TBB_release_consistency_helper() __asm__ __volatile__("lwsync
": : :"memory")
#define __TBB_full_memory_fence() __asm__ __volatile__( "sync
": : :"memory")
#else
// IBM C++ Compiler does not support inline assembly
// TODO: Since XL 9.0 or earlier GCC syntax is supported. Replace with
more
// lightweight implementation (like in mac_ppc.h)
#define __TBB_control_consistency_helper() __TBB_machine_isync ()
#define __TBB_acquire_consistency_helper() __TBB_machine_lwsync ()
#define __TBB_release_consistency_helper() __TBB_machine_lwsync ()
#define __TBB_full_memory_fence() __TBB_machine_flush ()
#endif
 End of changes. 7 change blocks. 
10 lines changed or deleted 16 lines changed or added


 index.html   index.html 
skipping to change at line 16 skipping to change at line 16
<H2>Directories</H2> <H2>Directories</H2>
<DL> <DL>
<DT><A HREF="tbb/index.html">tbb</A> <DT><A HREF="tbb/index.html">tbb</A>
<DD>Include files for Threading Building Blocks classes and functions. <DD>Include files for Threading Building Blocks classes and functions.
</DL> </DL>
<HR> <HR>
<A HREF="../index.html">Up to parent directory</A> <A HREF="../index.html">Up to parent directory</A>
<p></p> <p></p>
Copyright &copy; 2005-2010 Intel Corporation. All Rights Reserved. Copyright &copy; 2005-2011 Intel Corporation. All Rights Reserved.
<p></p> <P></P>
Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are Intel is a registered trademark or trademark of Intel Corporation
registered trademarks or trademarks of Intel Corporation or its or its subsidiaries in the United States and other countries.
subsidiaries in the United States and other countries.
<p></p> <p></p>
* Other names and brands may be claimed as the property of others. * Other names and brands may be claimed as the property of others.
</BODY> </BODY>
</HTML> </HTML>
 End of changes. 1 change blocks. 
5 lines changed or deleted 4 lines changed or added


 linux_common.h   linux_common.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 30 skipping to change at line 30
library without restriction. Specifically, if other files instantiate library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you comp ile templates or use macros or inline functions from this file, or you comp ile
this file and link it with other files to produce an executable, this this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered b y invalidate any other reasons why the executable file might be covered b y
the GNU General Public License. the GNU General Public License.
*/ */
#ifndef __TBB_machine_H #ifndef __TBB_machine_H
#error Do not include this file directly; include tbb_machine.h instead #error Do not #include this internal file directly; use public TBB headers instead.
#endif #endif
#include <stdint.h>
#include <unistd.h>
#include <sched.h> #include <sched.h>
// Definition of __TBB_Yield()
#ifndef __TBB_Yield
#define __TBB_Yield() sched_yield() #define __TBB_Yield() sched_yield()
#endif
/* Futex definitions */ /* Futex definitions */
#include <sys/syscall.h> #include <sys/syscall.h>
#if defined(SYS_futex) #if defined(SYS_futex)
#define __TBB_USE_FUTEX 1 #define __TBB_USE_FUTEX 1
#include <limits.h> #include <limits.h>
#include <errno.h> #include <errno.h>
// Unfortunately, some versions of Linux do not have a header that defines FUTEX_WAIT and FUTEX_WAKE. // Unfortunately, some versions of Linux do not have a header that defines FUTEX_WAIT and FUTEX_WAKE.
 End of changes. 5 change blocks. 
8 lines changed or deleted 2 lines changed or added


 linux_ia32.h   linux_ia32.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 29 skipping to change at line 29
As a special exception, you may use this file as part of a free softwar e As a special exception, you may use this file as part of a free softwar e
library without restriction. Specifically, if other files instantiate library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you comp ile templates or use macros or inline functions from this file, or you comp ile
this file and link it with other files to produce an executable, this this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered b y invalidate any other reasons why the executable file might be covered b y
the GNU General Public License. the GNU General Public License.
*/ */
#ifndef __TBB_machine_H #if !defined(__TBB_machine_H) || defined(__TBB_machine_linux_ia32_H)
#error Do not include this file directly; include tbb_machine.h instead #error Do not #include this internal file directly; use public TBB headers
instead.
#endif #endif
#if !__MINGW32__ #define __TBB_machine_linux_ia32_H
#include "linux_common.h"
#endif #include <stdint.h>
#include <unistd.h>
#define __TBB_WORDSIZE 4 #define __TBB_WORDSIZE 4
#define __TBB_BIG_ENDIAN 0 #define __TBB_BIG_ENDIAN 0
#define __TBB_release_consistency_helper() __asm__ __volatile__("": : :"mem #define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory")
ory") #define __TBB_control_consistency_helper() __TBB_compiler_fence()
#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
inline void __TBB_rel_acq_fence() { __asm__ __volatile__("mfence": : :"memo #define __TBB_release_consistency_helper() __TBB_compiler_fence()
ry"); } #define __TBB_full_memory_fence() __asm__ __volatile__("mfence": :
:"memory")
#if __TBB_ICC_ASM_VOLATILE_BROKEN #if __TBB_ICC_ASM_VOLATILE_BROKEN
#define __TBB_VOLATILE #define __TBB_VOLATILE
#else #else
#define __TBB_VOLATILE volatile #define __TBB_VOLATILE volatile
#endif #endif
#define __MACHINE_DECL_ATOMICS(S,T,X) \ #define __TBB_MACHINE_DEFINE_ATOMICS(S,T,X,R) \
static inline T __TBB_machine_cmpswp##S (volatile void *ptr, T value, T com parand ) \ static inline T __TBB_machine_cmpswp##S (volatile void *ptr, T value, T com parand ) \
{ \ { \
T result; \ T result; \
\ \
__asm__ __volatile__("lock\ncmpxchg" X " %2,%1" \ __asm__ __volatile__("lock\ncmpxchg" X " %2,%1" \
: "=a"(result), "=m"(*(__TBB_VOLATILE T*)ptr) \ : "=a"(result), "=m"(*(__TBB_VOLATILE T*)ptr) \
: "q"(value), "0"(comparand), "m"(*(__TBB_VOLATIL E T*)ptr) \ : "q"(value), "0"(comparand), "m"(*(__TBB_VOLATIL E T*)ptr) \
: "memory"); \ : "memory"); \
return result; \ return result; \
} \ } \
\ \
static inline T __TBB_machine_fetchadd##S(volatile void *ptr, T addend) \ static inline T __TBB_machine_fetchadd##S(volatile void *ptr, T addend) \
{ \ { \
T result; \ T result; \
__asm__ __volatile__("lock\nxadd" X " %0,%1" \ __asm__ __volatile__("lock\nxadd" X " %0,%1" \
: "=r"(result), "=m"(*(__TBB_VOLATILE T*)ptr) \ : R (result), "=m"(*(__TBB_VOLATILE T*)ptr) \
: "0"(addend), "m"(*(__TBB_VOLATILE T*)ptr) \ : "0"(addend), "m"(*(__TBB_VOLATILE T*)ptr) \
: "memory"); \ : "memory"); \
return result; \ return result; \
} \ } \
\ \
static inline T __TBB_machine_fetchstore##S(volatile void *ptr, T value) \ static inline T __TBB_machine_fetchstore##S(volatile void *ptr, T value) \
{ \ { \
T result; \ T result; \
__asm__ __volatile__("lock\nxchg" X " %0,%1" \ __asm__ __volatile__("lock\nxchg" X " %0,%1" \
: "=r"(result), "=m"(*(__TBB_VOLATILE T*)ptr) \ : R (result), "=m"(*(__TBB_VOLATILE T*)ptr) \
: "0"(value), "m"(*(__TBB_VOLATILE T*)ptr) \ : "0"(value), "m"(*(__TBB_VOLATILE T*)ptr) \
: "memory"); \ : "memory"); \
return result; \ return result; \
} \ } \
__MACHINE_DECL_ATOMICS(1,int8_t,"") __TBB_MACHINE_DEFINE_ATOMICS(1,int8_t,"","=q")
__MACHINE_DECL_ATOMICS(2,int16_t,"") __TBB_MACHINE_DEFINE_ATOMICS(2,int16_t,"","=r")
__MACHINE_DECL_ATOMICS(4,int32_t,"l") __TBB_MACHINE_DEFINE_ATOMICS(4,int32_t,"l","=r")
#if __INTEL_COMPILER
#pragma warning( push )
// reference to EBX in a function requiring stack alignment
#pragma warning( disable: 998 )
#endif
static inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t va lue, int64_t comparand ) static inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t va lue, int64_t comparand )
{ {
int64_t result; int64_t result;
union {
int64_t i64;
int32_t i32[2];
};
i64 = value;
#if __PIC__ #if __PIC__
/* compiling position-independent code */ /* compiling position-independent code */
// EBX register preserved for compliance with position-independent code rules on IA32 // EBX register preserved for compliance with position-independent code rules on IA32
int32_t tmp;
__asm__ __volatile__ ( __asm__ __volatile__ (
"pushl %%ebx\n\t" "movl %%ebx,%2\n\t"
"movl (%%ecx),%%ebx\n\t" "movl %5,%%ebx\n\t"
"movl 4(%%ecx),%%ecx\n\t" #if __GNUC__==3
"lock\n\t cmpxchg8b %1\n\t" "lock\n\t cmpxchg8b %1\n\t"
"popl %%ebx" #else
: "=A"(result), "=m"(*(int64_t *)ptr) "lock\n\t cmpxchg8b (%3)\n\t"
: "m"(*(int64_t *)ptr) #endif
"movl %2,%%ebx"
: "=A"(result)
, "=m"(*(__TBB_VOLATILE int64_t *)ptr)
, "=m"(tmp)
#if __GNUC__==3
: "m"(*(__TBB_VOLATILE int64_t *)ptr)
#else
: "SD"(ptr)
#endif
, "0"(comparand) , "0"(comparand)
, "c"(&value) , "m"(i32[0]), "c"(i32[1])
: "memory", "esp" : "memory"
#if __INTEL_COMPILER #if __INTEL_COMPILER
,"ebx" ,"ebx"
#endif #endif
); );
#else /* !__PIC__ */ #else /* !__PIC__ */
union {
int64_t i64;
int32_t i32[2];
};
i64 = value;
__asm__ __volatile__ ( __asm__ __volatile__ (
"lock\n\t cmpxchg8b %1\n\t" "lock\n\t cmpxchg8b %1\n\t"
: "=A"(result), "=m"(*(__TBB_VOLATILE int64_t *)ptr) : "=A"(result), "=m"(*(__TBB_VOLATILE int64_t *)ptr)
: "m"(*(__TBB_VOLATILE int64_t *)ptr) : "m"(*(__TBB_VOLATILE int64_t *)ptr)
, "0"(comparand) , "0"(comparand)
, "b"(i32[0]), "c"(i32[1]) , "b"(i32[0]), "c"(i32[1])
: "memory" : "memory"
); );
#endif /* __PIC__ */ #endif /* __PIC__ */
return result; return result;
} }
#if __INTEL_COMPILER
#pragma warning( pop )
#endif // warning 998 is back
static inline int32_t __TBB_machine_lg( uint32_t x ) { static inline int32_t __TBB_machine_lg( uint32_t x ) {
int32_t j; int32_t j;
__asm__ ("bsr %1,%0" : "=r"(j) : "r"(x)); __asm__ ("bsr %1,%0" : "=r"(j) : "r"(x));
return j; return j;
} }
static inline void __TBB_machine_or( volatile void *ptr, uint32_t addend ) { static inline void __TBB_machine_or( volatile void *ptr, uint32_t addend ) {
__asm__ __volatile__("lock\norl %1,%0" : "=m"(*(__TBB_VOLATILE uint32_t *)ptr) : "r"(addend), "m"(*(__TBB_VOLATILE uint32_t *)ptr) : "memory"); __asm__ __volatile__("lock\norl %1,%0" : "=m"(*(__TBB_VOLATILE uint32_t *)ptr) : "r"(addend), "m"(*(__TBB_VOLATILE uint32_t *)ptr) : "memory");
} }
skipping to change at line 178 skipping to change at line 201
"fistpq %0" : "=m"(*(__TBB_VOLATILE int64_t *)ptr) : "m"(value) : "memory" ); "fistpq %0" : "=m"(*(__TBB_VOLATILE int64_t *)ptr) : "m"(value) : "memory" );
} else { } else {
// Unaligned store // Unaligned store
#if TBB_USE_PERFORMANCE_WARNINGS #if TBB_USE_PERFORMANCE_WARNINGS
__TBB_machine_store8_slow_perf_warning(ptr); __TBB_machine_store8_slow_perf_warning(ptr);
#endif /* TBB_USE_PERFORMANCE_WARNINGS */ #endif /* TBB_USE_PERFORMANCE_WARNINGS */
__TBB_machine_store8_slow(ptr,value); __TBB_machine_store8_slow(ptr,value);
} }
} }
template <typename T, size_t S>
struct __TBB_machine_load_store {
static inline T load_with_acquire(const volatile T& location) {
T to_return = location;
__asm__ __volatile__("" : : : "memory" ); // Compiler fence to ke
ep operations from migrating upwards
return to_return;
}
static inline void store_with_release(volatile T &location, T value) {
__asm__ __volatile__("" : : : "memory" ); // Compiler fence to ke
ep operations from migrating upwards
location = value;
}
};
template <typename T>
struct __TBB_machine_load_store<T,8> {
static inline T load_with_acquire(const volatile T& location) {
T to_return = __TBB_machine_load8((const volatile void *)&location)
;
__asm__ __volatile__("" : : : "memory" ); // Compiler fence to ke
ep operations from migrating upwards
return to_return;
}
static inline void store_with_release(volatile T &location, T value) {
__asm__ __volatile__("" : : : "memory" ); // Compiler fence to ke
ep operations from migrating downwards
__TBB_machine_store8((volatile void *)&location,(int64_t)value);
}
};
#undef __TBB_VOLATILE
template<typename T>
inline T __TBB_machine_load_with_acquire(const volatile T &location) {
return __TBB_machine_load_store<T,sizeof(T)>::load_with_acquire(locatio
n);
}
template<typename T, typename V>
inline void __TBB_machine_store_with_release(volatile T &location, V value)
{
__TBB_machine_load_store<T,sizeof(T)>::store_with_release(location,valu
e);
}
#define __TBB_load_with_acquire(L) __TBB_machine_load_with_acquire((L))
#define __TBB_store_with_release(L,V) __TBB_machine_store_with_release((L),
(V))
// Machine specific atomic operations // Machine specific atomic operations
#define __TBB_CompareAndSwap1(P,V,C) __TBB_machine_cmpswp1(P,V,C)
#define __TBB_CompareAndSwap2(P,V,C) __TBB_machine_cmpswp2(P,V,C)
#define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4(P,V,C)
#define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8(P,V,C)
#define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp4(P,V,C)
#define __TBB_FetchAndAdd1(P,V) __TBB_machine_fetchadd1(P,V)
#define __TBB_FetchAndAdd2(P,V) __TBB_machine_fetchadd2(P,V)
#define __TBB_FetchAndAdd4(P,V) __TBB_machine_fetchadd4(P,V)
#define __TBB_FetchAndAddW(P,V) __TBB_machine_fetchadd4(P,V)
#define __TBB_FetchAndStore1(P,V) __TBB_machine_fetchstore1(P,V)
#define __TBB_FetchAndStore2(P,V) __TBB_machine_fetchstore2(P,V)
#define __TBB_FetchAndStore4(P,V) __TBB_machine_fetchstore4(P,V)
#define __TBB_FetchAndStoreW(P,V) __TBB_machine_fetchstore4(P,V)
#define __TBB_Store8(P,V) __TBB_machine_store8(P,V)
#define __TBB_Load8(P) __TBB_machine_load8(P)
#define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V) #define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V)
#define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V) #define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V)
// Those we chose not to implement (they will be implemented generically us
ing CMPSWP8)
#undef __TBB_FetchAndAdd8
#undef __TBB_FetchAndStore8
// Definition of other functions // Definition of other functions
#define __TBB_Pause(V) __TBB_machine_pause(V) #define __TBB_Pause(V) __TBB_machine_pause(V)
#define __TBB_Log2(V) __TBB_machine_lg(V) #define __TBB_Log2(V) __TBB_machine_lg(V)
// Special atomic functions #define __TBB_USE_GENERIC_DWORD_FETCH_ADD 1
#define __TBB_FetchAndAddWrelease(P,V) __TBB_FetchAndAddW(P,V) #define __TBB_USE_GENERIC_DWORD_FETCH_STORE 1
#define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAddW(P,1) #define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1
#define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAddW(P,-1) #define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
// Use generic definitions from tbb_machine.h
#undef __TBB_TryLockByte // API to retrieve/update FPU control setting
#undef __TBB_LockByte #define __TBB_CPU_CTL_ENV_PRESENT 1
struct __TBB_cpu_ctl_env_t {
int mxcsr;
short x87cw;
};
inline void __TBB_get_cpu_ctl_env ( __TBB_cpu_ctl_env_t* ctl ) {
__asm__ __volatile__ (
"stmxcsr %0\n\t"
"fstcw %1"
: "=m"(ctl->mxcsr), "=m"(ctl->x87cw)
);
}
inline void __TBB_set_cpu_ctl_env ( const __TBB_cpu_ctl_env_t* ctl ) {
__asm__ __volatile__ (
"ldmxcsr %0\n\t"
"fldcw %1"
: : "m"(ctl->mxcsr), "m"(ctl->x87cw)
);
}
 End of changes. 19 change blocks. 
107 lines changed or deleted 53 lines changed or added


 linux_ia64.h   linux_ia64.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 29 skipping to change at line 29
As a special exception, you may use this file as part of a free softwar e As a special exception, you may use this file as part of a free softwar e
library without restriction. Specifically, if other files instantiate library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you comp ile templates or use macros or inline functions from this file, or you comp ile
this file and link it with other files to produce an executable, this this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered b y invalidate any other reasons why the executable file might be covered b y
the GNU General Public License. the GNU General Public License.
*/ */
#ifndef __TBB_machine_H #if !defined(__TBB_machine_H) || defined(__TBB_machine_linux_ia64_H)
#error Do not include this file directly; include tbb_machine.h instead #error Do not #include this internal file directly; use public TBB headers
instead.
#endif #endif
#include "linux_common.h" #define __TBB_machine_linux_ia64_H
#include <stdint.h>
#include <unistd.h>
#include <ia64intrin.h> #include <ia64intrin.h>
#define __TBB_WORDSIZE 8 #define __TBB_WORDSIZE 8
#define __TBB_BIG_ENDIAN 0 #define __TBB_BIG_ENDIAN 0
#define __TBB_DECL_FENCED_ATOMICS 1
#if __INTEL_COMPILER
#define __TBB_compiler_fence()
#define __TBB_control_consistency_helper() __TBB_compiler_fence()
#define __TBB_acquire_consistency_helper()
#define __TBB_release_consistency_helper()
#define __TBB_full_memory_fence() __mf()
#else
#define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory")
#define __TBB_control_consistency_helper() __TBB_compiler_fence()
// Even though GCC imbues volatile loads with acquire semantics, it som
etimes moves
// loads over the acquire fence. The following helpers stop such incorr
ect code motion.
#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
#define __TBB_release_consistency_helper() __TBB_compiler_fence()
#define __TBB_full_memory_fence() __asm__ __volatile__("mf": :
:"memory")
#endif /* !__INTEL_COMPILER */
// Most of the functions will be in a .s file // Most of the functions will be in a .s file
extern "C" { extern "C" {
int8_t __TBB_machine_cmpswp1__TBB_full_fence (volatile void *ptr, int8_ t value, int8_t comparand);
int8_t __TBB_machine_fetchadd1__TBB_full_fence (volatile void *ptr, int 8_t addend); int8_t __TBB_machine_fetchadd1__TBB_full_fence (volatile void *ptr, int 8_t addend);
int8_t __TBB_machine_fetchadd1acquire(volatile void *ptr, int8_t addend ); int8_t __TBB_machine_fetchadd1acquire(volatile void *ptr, int8_t addend );
int8_t __TBB_machine_fetchadd1release(volatile void *ptr, int8_t addend ); int8_t __TBB_machine_fetchadd1release(volatile void *ptr, int8_t addend );
int8_t __TBB_machine_fetchstore1acquire(volatile void *ptr, int8_t valu
e);
int8_t __TBB_machine_fetchstore1release(volatile void *ptr, int8_t valu
e);
int16_t __TBB_machine_cmpswp2__TBB_full_fence (volatile void *ptr, int1 6_t value, int16_t comparand);
int16_t __TBB_machine_fetchadd2__TBB_full_fence (volatile void *ptr, in t16_t addend); int16_t __TBB_machine_fetchadd2__TBB_full_fence (volatile void *ptr, in t16_t addend);
int16_t __TBB_machine_fetchadd2acquire(volatile void *ptr, int16_t adde nd); int16_t __TBB_machine_fetchadd2acquire(volatile void *ptr, int16_t adde nd);
int16_t __TBB_machine_fetchadd2release(volatile void *ptr, int16_t adde nd); int16_t __TBB_machine_fetchadd2release(volatile void *ptr, int16_t adde nd);
int32_t __TBB_machine_fetchadd4__TBB_full_fence (volatile void *ptr, in
t32_t value);
int32_t __TBB_machine_fetchadd4acquire(volatile void *ptr, int32_t adde
nd);
int32_t __TBB_machine_fetchadd4release(volatile void *ptr, int32_t adde
nd);
int64_t __TBB_machine_fetchadd8__TBB_full_fence (volatile void *ptr, in
t64_t value);
int64_t __TBB_machine_fetchadd8acquire(volatile void *ptr, int64_t adde
nd);
int64_t __TBB_machine_fetchadd8release(volatile void *ptr, int64_t adde
nd);
int8_t __TBB_machine_fetchstore1__TBB_full_fence (volatile void *ptr, i
nt8_t value);
int8_t __TBB_machine_fetchstore1acquire(volatile void *ptr, int8_t valu
e);
int8_t __TBB_machine_fetchstore1release(volatile void *ptr, int8_t valu
e);
int16_t __TBB_machine_fetchstore2__TBB_full_fence (volatile void *ptr,
int16_t value);
int16_t __TBB_machine_fetchstore2acquire(volatile void *ptr, int16_t va lue); int16_t __TBB_machine_fetchstore2acquire(volatile void *ptr, int16_t va lue);
int16_t __TBB_machine_fetchstore2release(volatile void *ptr, int16_t va lue); int16_t __TBB_machine_fetchstore2release(volatile void *ptr, int16_t va lue);
int32_t __TBB_machine_fetchstore4__TBB_full_fence (volatile void *ptr, int32_t value); int32_t __TBB_machine_fetchstore4__TBB_full_fence (volatile void *ptr, int32_t value);
int32_t __TBB_machine_fetchstore4acquire(volatile void *ptr, int32_t va lue); int32_t __TBB_machine_fetchstore4acquire(volatile void *ptr, int32_t va lue);
int32_t __TBB_machine_fetchstore4release(volatile void *ptr, int32_t va lue); int32_t __TBB_machine_fetchstore4release(volatile void *ptr, int32_t va lue);
int32_t __TBB_machine_fetchadd4acquire(volatile void *ptr, int32_t adde
nd);
int32_t __TBB_machine_fetchadd4release(volatile void *ptr, int32_t adde
nd);
int64_t __TBB_machine_cmpswp8__TBB_full_fence (volatile void *ptr, int6 4_t value, int64_t comparand);
int64_t __TBB_machine_fetchstore8__TBB_full_fence (volatile void *ptr, int64_t value); int64_t __TBB_machine_fetchstore8__TBB_full_fence (volatile void *ptr, int64_t value);
int64_t __TBB_machine_fetchstore8acquire(volatile void *ptr, int64_t va lue); int64_t __TBB_machine_fetchstore8acquire(volatile void *ptr, int64_t va lue);
int64_t __TBB_machine_fetchstore8release(volatile void *ptr, int64_t va lue); int64_t __TBB_machine_fetchstore8release(volatile void *ptr, int64_t va lue);
int64_t __TBB_machine_fetchadd8acquire(volatile void *ptr, int64_t adde
nd);
int64_t __TBB_machine_fetchadd8release(volatile void *ptr, int64_t adde
nd);
int8_t __TBB_machine_cmpswp1__TBB_full_fence (volatile void *ptr, int8_ t value, int8_t comparand);
int8_t __TBB_machine_cmpswp1acquire(volatile void *ptr, int8_t value, i nt8_t comparand); int8_t __TBB_machine_cmpswp1acquire(volatile void *ptr, int8_t value, i nt8_t comparand);
int8_t __TBB_machine_cmpswp1release(volatile void *ptr, int8_t value, i nt8_t comparand); int8_t __TBB_machine_cmpswp1release(volatile void *ptr, int8_t value, i nt8_t comparand);
int8_t __TBB_machine_fetchstore1__TBB_full_fence (volatile void *ptr, i nt8_t value);
int16_t __TBB_machine_cmpswp2__TBB_full_fence (volatile void *ptr, int1 6_t value, int16_t comparand);
int16_t __TBB_machine_cmpswp2acquire(volatile void *ptr, int16_t value, int16_t comparand); int16_t __TBB_machine_cmpswp2acquire(volatile void *ptr, int16_t value, int16_t comparand);
int16_t __TBB_machine_cmpswp2release(volatile void *ptr, int16_t value, int16_t comparand); int16_t __TBB_machine_cmpswp2release(volatile void *ptr, int16_t value, int16_t comparand);
int16_t __TBB_machine_fetchstore2__TBB_full_fence (volatile void *ptr, int16_t value);
int32_t __TBB_machine_cmpswp4__TBB_full_fence (volatile void *ptr, int3 2_t value, int32_t comparand); int32_t __TBB_machine_cmpswp4__TBB_full_fence (volatile void *ptr, int3 2_t value, int32_t comparand);
int32_t __TBB_machine_cmpswp4acquire(volatile void *ptr, int32_t value, int32_t comparand); int32_t __TBB_machine_cmpswp4acquire(volatile void *ptr, int32_t value, int32_t comparand);
int32_t __TBB_machine_cmpswp4release(volatile void *ptr, int32_t value, int32_t comparand); int32_t __TBB_machine_cmpswp4release(volatile void *ptr, int32_t value, int32_t comparand);
int32_t __TBB_machine_fetchadd4__TBB_full_fence (volatile void *ptr, in t32_t value);
int64_t __TBB_machine_cmpswp8__TBB_full_fence (volatile void *ptr, int6 4_t value, int64_t comparand);
int64_t __TBB_machine_cmpswp8acquire(volatile void *ptr, int64_t value, int64_t comparand); int64_t __TBB_machine_cmpswp8acquire(volatile void *ptr, int64_t value, int64_t comparand);
int64_t __TBB_machine_cmpswp8release(volatile void *ptr, int64_t value, int64_t comparand); int64_t __TBB_machine_cmpswp8release(volatile void *ptr, int64_t value, int64_t comparand);
int64_t __TBB_machine_fetchadd8__TBB_full_fence (volatile void *ptr, in t64_t value);
int64_t __TBB_machine_lg(uint64_t value); int64_t __TBB_machine_lg(uint64_t value);
void __TBB_machine_pause(int32_t delay); void __TBB_machine_pause(int32_t delay);
bool __TBB_machine_trylockbyte( volatile unsigned char &ptr ); bool __TBB_machine_trylockbyte( volatile unsigned char &ptr );
int64_t __TBB_machine_lockbyte( volatile unsigned char &ptr ); int64_t __TBB_machine_lockbyte( volatile unsigned char &ptr );
//! Retrieves the current RSE backing store pointer. IA64 specific. //! Retrieves the current RSE backing store pointer. IA64 specific.
void* __TBB_get_bsp(); void* __TBB_get_bsp();
}
#define __TBB_CompareAndSwap1(P,V,C) __TBB_machine_cmpswp1__TBB_full_fence( int32_t __TBB_machine_load1_relaxed(const void *ptr);
P,V,C) int32_t __TBB_machine_load2_relaxed(const void *ptr);
#define __TBB_CompareAndSwap2(P,V,C) __TBB_machine_cmpswp2__TBB_full_fence( int32_t __TBB_machine_load4_relaxed(const void *ptr);
P,V,C) int64_t __TBB_machine_load8_relaxed(const void *ptr);
void __TBB_machine_store1_relaxed(void *ptr, int32_t value);
void __TBB_machine_store2_relaxed(void *ptr, int32_t value);
void __TBB_machine_store4_relaxed(void *ptr, int32_t value);
void __TBB_machine_store8_relaxed(void *ptr, int64_t value);
} // extern "C"
// Mapping old entry points to the names corresponding to the new full_fenc
e identifier.
#define __TBB_machine_fetchadd1full_fence __TBB_machine_fetchadd1__TBB_fu
ll_fence
#define __TBB_machine_fetchadd2full_fence __TBB_machine_fetchadd2__TBB_fu
ll_fence
#define __TBB_machine_fetchadd4full_fence __TBB_machine_fetchadd4__TBB_fu
ll_fence
#define __TBB_machine_fetchadd8full_fence __TBB_machine_fetchadd8__TBB_fu
ll_fence
#define __TBB_machine_fetchstore1full_fence __TBB_machine_fetchstore1__TBB_
full_fence
#define __TBB_machine_fetchstore2full_fence __TBB_machine_fetchstore2__TBB_
full_fence
#define __TBB_machine_fetchstore4full_fence __TBB_machine_fetchstore4__TBB_
full_fence
#define __TBB_machine_fetchstore8full_fence __TBB_machine_fetchstore8__TBB_
full_fence
#define __TBB_machine_cmpswp1full_fence __TBB_machine_cmpswp1__TBB_full
_fence
#define __TBB_machine_cmpswp2full_fence __TBB_machine_cmpswp2__TBB_full
_fence
#define __TBB_machine_cmpswp4full_fence __TBB_machine_cmpswp4__TBB_full
_fence
#define __TBB_machine_cmpswp8full_fence __TBB_machine_cmpswp8__TBB_full
_fence
// Mapping relaxed operations to the entry points implementing them.
/** On IA64 RMW operations implicitly have acquire semantics. Thus one cann
ot
actually have completely relaxed RMW operation here. **/
#define __TBB_machine_fetchadd1relaxed __TBB_machine_fetchadd1acquire
#define __TBB_machine_fetchadd2relaxed __TBB_machine_fetchadd2acquire
#define __TBB_machine_fetchadd4relaxed __TBB_machine_fetchadd4acquire
#define __TBB_machine_fetchadd8relaxed __TBB_machine_fetchadd8acquire
#define __TBB_machine_fetchstore1relaxed __TBB_machine_fetchstore1acquir
e
#define __TBB_machine_fetchstore2relaxed __TBB_machine_fetchstore2acquir
e
#define __TBB_machine_fetchstore4relaxed __TBB_machine_fetchstore4acquir
e
#define __TBB_machine_fetchstore8relaxed __TBB_machine_fetchstore8acquir
e
#define __TBB_machine_cmpswp1relaxed __TBB_machine_cmpswp1acquire
#define __TBB_machine_cmpswp2relaxed __TBB_machine_cmpswp2acquire
#define __TBB_machine_cmpswp4relaxed __TBB_machine_cmpswp4acquire
#define __TBB_machine_cmpswp8relaxed __TBB_machine_cmpswp8acquire
#define __TBB_MACHINE_DEFINE_ATOMICS(S,V) \
template <typename T> \
struct machine_load_store_relaxed<T,S> { \
static inline T load ( const T& location ) { \
return (T)__TBB_machine_load##S##_relaxed(&location); \
} \
static inline void store ( T& location, T value ) { \
__TBB_machine_store##S##_relaxed(&location, (V)value); \
} \
}
namespace tbb {
namespace internal {
__TBB_MACHINE_DEFINE_ATOMICS(1,int8_t);
__TBB_MACHINE_DEFINE_ATOMICS(2,int16_t);
__TBB_MACHINE_DEFINE_ATOMICS(4,int32_t);
__TBB_MACHINE_DEFINE_ATOMICS(8,int64_t);
}} // namespaces internal, tbb
#define __TBB_FetchAndAdd1(P,V) __TBB_machine_fetchadd1__TBB_full_fe #undef __TBB_MACHINE_DEFINE_ATOMICS
nce(P,V)
#define __TBB_FetchAndAdd1acquire(P,V) __TBB_machine_fetchadd1acquire(P,V) #define __TBB_USE_FENCED_ATOMICS 1
#define __TBB_FetchAndAdd1release(P,V) __TBB_machine_fetchadd1release(P,V) #define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_FetchAndAdd2(P,V) __TBB_machine_fetchadd2__TBB_full_fe
nce(P,V)
#define __TBB_FetchAndAdd2acquire(P,V) __TBB_machine_fetchadd2acquire(P,V)
#define __TBB_FetchAndAdd2release(P,V) __TBB_machine_fetchadd2release(P,V)
#define __TBB_FetchAndAdd4acquire(P,V) __TBB_machine_fetchadd4acquire(P,V)
#define __TBB_FetchAndAdd4release(P,V) __TBB_machine_fetchadd4release(P,V)
#define __TBB_FetchAndAdd8acquire(P,V) __TBB_machine_fetchadd8acquire(P,V)
#define __TBB_FetchAndAdd8release(P,V) __TBB_machine_fetchadd8release(P,V)
#define __TBB_FetchAndStore1acquire(P,V) __TBB_machine_fetchstore1acquire(P
,V)
#define __TBB_FetchAndStore1release(P,V) __TBB_machine_fetchstore1release(P
,V)
#define __TBB_FetchAndStore2acquire(P,V) __TBB_machine_fetchstore2acquire(P
,V)
#define __TBB_FetchAndStore2release(P,V) __TBB_machine_fetchstore2release(P
,V)
#define __TBB_FetchAndStore4acquire(P,V) __TBB_machine_fetchstore4acquire(P
,V)
#define __TBB_FetchAndStore4release(P,V) __TBB_machine_fetchstore4release(P
,V)
#define __TBB_FetchAndStore8acquire(P,V) __TBB_machine_fetchstore8acquire(P
,V)
#define __TBB_FetchAndStore8release(P,V) __TBB_machine_fetchstore8release(P
,V)
#define __TBB_CompareAndSwap1acquire(P,V,C) __TBB_machine_cmpswp1acquire(P,
V,C)
#define __TBB_CompareAndSwap1release(P,V,C) __TBB_machine_cmpswp1release(P,
V,C)
#define __TBB_CompareAndSwap2acquire(P,V,C) __TBB_machine_cmpswp2acquire(P,
V,C)
#define __TBB_CompareAndSwap2release(P,V,C) __TBB_machine_cmpswp2release(P,
V,C)
#define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4__TBB_full
_fence(P,V,C)
#define __TBB_CompareAndSwap4acquire(P,V,C) __TBB_machine_cmpswp4acquire(P,
V,C)
#define __TBB_CompareAndSwap4release(P,V,C) __TBB_machine_cmpswp4release(P,
V,C)
#define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8__TBB_full
_fence(P,V,C)
#define __TBB_CompareAndSwap8acquire(P,V,C) __TBB_machine_cmpswp8acquire(P,
V,C)
#define __TBB_CompareAndSwap8release(P,V,C) __TBB_machine_cmpswp8release(P,
V,C)
#define __TBB_FetchAndAdd4(P,V) __TBB_machine_fetchadd4__TBB_full_fence(P,V
)
#define __TBB_FetchAndAdd8(P,V) __TBB_machine_fetchadd8__TBB_full_fence(P,V
)
#define __TBB_FetchAndStore1(P,V) __TBB_machine_fetchstore1__TBB_full_fence
(P,V)
#define __TBB_FetchAndStore2(P,V) __TBB_machine_fetchstore2__TBB_full_fence
(P,V)
#define __TBB_FetchAndStore4(P,V) __TBB_machine_fetchstore4__TBB_full_fence
(P,V)
#define __TBB_FetchAndStore8(P,V) __TBB_machine_fetchstore8__TBB_full_fence
(P,V)
#define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAdd8acquire(P,1)
#define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAdd8release(P,-1)
#ifndef __INTEL_COMPILER
/* Even though GCC imbues volatile loads with acquire semantics,
it sometimes moves loads over the acquire fence. The
fences defined here stop such incorrect code motion. */
#define __TBB_release_consistency_helper() __asm__ __volatile__("": : :"mem
ory")
#define __TBB_rel_acq_fence() __asm__ __volatile__("mf": : :"memory")
#else
#define __TBB_release_consistency_helper()
#define __TBB_rel_acq_fence() __mf()
#endif /* __INTEL_COMPILER */
// Special atomic functions
#define __TBB_CompareAndSwapW(P,V,C) __TBB_CompareAndSwap8(P,V,C)
#define __TBB_FetchAndStoreW(P,V) __TBB_FetchAndStore8(P,V)
#define __TBB_FetchAndAddW(P,V) __TBB_FetchAndAdd8(P,V)
#define __TBB_FetchAndAddWrelease(P,V) __TBB_FetchAndAdd8release(P,V)
// Not needed
#undef __TBB_Store8
#undef __TBB_Load8
// Definition of Lock functions // Definition of Lock functions
#define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P) #define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P)
#define __TBB_LockByte(P) __TBB_machine_lockbyte(P) #define __TBB_LockByte(P) __TBB_machine_lockbyte(P)
// Definition of other utility functions // Definition of other utility functions
#define __TBB_Pause(V) __TBB_machine_pause(V) #define __TBB_Pause(V) __TBB_machine_pause(V)
#define __TBB_Log2(V) __TBB_machine_lg(V) #define __TBB_Log2(V) __TBB_machine_lg(V)
 End of changes. 21 change blocks. 
118 lines changed or deleted 135 lines changed or added


 linux_intel64.h   linux_intel64.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 29 skipping to change at line 29
As a special exception, you may use this file as part of a free softwar e As a special exception, you may use this file as part of a free softwar e
library without restriction. Specifically, if other files instantiate library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you comp ile templates or use macros or inline functions from this file, or you comp ile
this file and link it with other files to produce an executable, this this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered b y invalidate any other reasons why the executable file might be covered b y
the GNU General Public License. the GNU General Public License.
*/ */
#ifndef __TBB_machine_H #if !defined(__TBB_machine_H) || defined(__TBB_machine_linux_intel64_H)
#error Do not include this file directly; include tbb_machine.h instead #error Do not #include this internal file directly; use public TBB headers
instead.
#endif #endif
#include "linux_common.h" #define __TBB_machine_linux_intel64_H
#include <stdint.h>
#include <unistd.h>
#define __TBB_WORDSIZE 8 #define __TBB_WORDSIZE 8
#define __TBB_BIG_ENDIAN 0 #define __TBB_BIG_ENDIAN 0
#define __TBB_release_consistency_helper() __asm__ __volatile__("": : :"mem #define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory")
ory") #define __TBB_control_consistency_helper() __TBB_compiler_fence()
#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
#define __TBB_release_consistency_helper() __TBB_compiler_fence()
#ifndef __TBB_rel_acq_fence #ifndef __TBB_full_memory_fence
inline void __TBB_rel_acq_fence() { __asm__ __volatile__("mfence": : :"memo #define __TBB_full_memory_fence() __asm__ __volatile__("mfence": : :"memory
ry"); } ")
#endif #endif
#define __MACHINE_DECL_ATOMICS(S,T,X) \ #define __TBB_MACHINE_DEFINE_ATOMICS(S,T,X) \
static inline T __TBB_machine_cmpswp##S (volatile void *ptr, T value, T com parand ) \ static inline T __TBB_machine_cmpswp##S (volatile void *ptr, T value, T com parand ) \
{ \ { \
T result; \ T result; \
\ \
__asm__ __volatile__("lock\ncmpxchg" X " %2,%1" \ __asm__ __volatile__("lock\ncmpxchg" X " %2,%1" \
: "=a"(result), "=m"(*(volatile T*)ptr) \ : "=a"(result), "=m"(*(volatile T*)ptr) \
: "q"(value), "0"(comparand), "m"(*(volatile T*)p tr) \ : "q"(value), "0"(comparand), "m"(*(volatile T*)p tr) \
: "memory"); \ : "memory"); \
return result; \ return result; \
} \ } \
skipping to change at line 76 skipping to change at line 82
static inline T __TBB_machine_fetchstore##S(volatile void *ptr, T value) \ static inline T __TBB_machine_fetchstore##S(volatile void *ptr, T value) \
{ \ { \
T result; \ T result; \
__asm__ __volatile__("lock\nxchg" X " %0,%1" \ __asm__ __volatile__("lock\nxchg" X " %0,%1" \
: "=r"(result),"=m"(*(volatile T*)ptr) \ : "=r"(result),"=m"(*(volatile T*)ptr) \
: "0"(value), "m"(*(volatile T*)ptr) \ : "0"(value), "m"(*(volatile T*)ptr) \
: "memory"); \ : "memory"); \
return result; \ return result; \
} \ } \
__MACHINE_DECL_ATOMICS(1,int8_t,"") __TBB_MACHINE_DEFINE_ATOMICS(1,int8_t,"")
__MACHINE_DECL_ATOMICS(2,int16_t,"") __TBB_MACHINE_DEFINE_ATOMICS(2,int16_t,"")
__MACHINE_DECL_ATOMICS(4,int32_t,"") __TBB_MACHINE_DEFINE_ATOMICS(4,int32_t,"")
__MACHINE_DECL_ATOMICS(8,int64_t,"q") __TBB_MACHINE_DEFINE_ATOMICS(8,int64_t,"q")
#undef __TBB_MACHINE_DEFINE_ATOMICS
static inline int64_t __TBB_machine_lg( uint64_t x ) { static inline int64_t __TBB_machine_lg( uint64_t x ) {
int64_t j; int64_t j;
__asm__ ("bsr %1,%0" : "=r"(j) : "r"(x)); __asm__ ("bsr %1,%0" : "=r"(j) : "r"(x));
return j; return j;
} }
static inline void __TBB_machine_or( volatile void *ptr, uint64_t addend ) { static inline void __TBB_machine_or( volatile void *ptr, uint64_t addend ) {
__asm__ __volatile__("lock\norq %1,%0" : "=m"(*(volatile uint64_t*)ptr) : "r"(addend), "m"(*(volatile uint64_t*)ptr) : "memory"); __asm__ __volatile__("lock\norq %1,%0" : "=m"(*(volatile uint64_t*)ptr) : "r"(addend), "m"(*(volatile uint64_t*)ptr) : "memory");
} }
static inline void __TBB_machine_and( volatile void *ptr, uint64_t addend ) { static inline void __TBB_machine_and( volatile void *ptr, uint64_t addend ) {
__asm__ __volatile__("lock\nandq %1,%0" : "=m"(*(volatile uint64_t*)ptr ) : "r"(addend), "m"(*(volatile uint64_t*)ptr) : "memory"); __asm__ __volatile__("lock\nandq %1,%0" : "=m"(*(volatile uint64_t*)ptr ) : "r"(addend), "m"(*(volatile uint64_t*)ptr) : "memory");
} }
#define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V)
#define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V)
// Definition of other functions
#ifndef __TBB_Pause
static inline void __TBB_machine_pause( int32_t delay ) { static inline void __TBB_machine_pause( int32_t delay ) {
for (int32_t i = 0; i < delay; i++) { for (int32_t i = 0; i < delay; i++) {
__asm__ __volatile__("pause;"); __asm__ __volatile__("pause;");
} }
return; return;
} }
#define __TBB_Pause(V) __TBB_machine_pause(V)
#endif /* !__TBB_Pause */
// Machine specific atomic operations #define __TBB_Log2(V) __TBB_machine_lg(V)
#define __TBB_CompareAndSwap1(P,V,C) __TBB_machine_cmpswp1(P,V,C)
#define __TBB_CompareAndSwap2(P,V,C) __TBB_machine_cmpswp2(P,V,C)
#define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4(P,V,C)
#define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8(P,V,C)
#define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp8(P,V,C)
#define __TBB_FetchAndAdd1(P,V) __TBB_machine_fetchadd1(P,V)
#define __TBB_FetchAndAdd2(P,V) __TBB_machine_fetchadd2(P,V)
#define __TBB_FetchAndAdd4(P,V) __TBB_machine_fetchadd4(P,V)
#define __TBB_FetchAndAdd8(P,V) __TBB_machine_fetchadd8(P,V)
#define __TBB_FetchAndAddW(P,V) __TBB_machine_fetchadd8(P,V)
#define __TBB_FetchAndStore1(P,V) __TBB_machine_fetchstore1(P,V)
#define __TBB_FetchAndStore2(P,V) __TBB_machine_fetchstore2(P,V)
#define __TBB_FetchAndStore4(P,V) __TBB_machine_fetchstore4(P,V)
#define __TBB_FetchAndStore8(P,V) __TBB_machine_fetchstore8(P,V)
#define __TBB_FetchAndStoreW(P,V) __TBB_machine_fetchstore8(P,V)
#define __TBB_Store8(P,V) (*P = V)
#define __TBB_Load8(P) (*P)
#define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V)
#define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V)
// Definition of other functions #define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1
#ifndef __TBB_Pause #define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_Pause(V) __TBB_machine_pause(V) #define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
// API to retrieve/update FPU control setting
#ifndef __TBB_CPU_CTL_ENV_PRESENT
#define __TBB_CPU_CTL_ENV_PRESENT 1
struct __TBB_cpu_ctl_env_t {
int mxcsr;
short x87cw;
};
inline void __TBB_get_cpu_ctl_env ( __TBB_cpu_ctl_env_t* ctl ) {
#if __TBB_ICC_12_0_INL_ASM_FSTCW_BROKEN
__TBB_cpu_ctl_env_t loc_ctl;
__asm__ __volatile__ (
"stmxcsr %0\n\t"
"fstcw %1"
: "=m"(loc_ctl.mxcsr), "=m"(loc_ctl.x87cw)
);
*ctl = loc_ctl;
#else
__asm__ __volatile__ (
"stmxcsr %0\n\t"
"fstcw %1"
: "=m"(ctl->mxcsr), "=m"(ctl->x87cw)
);
#endif #endif
#define __TBB_Log2(V) __TBB_machine_lg(V) }
inline void __TBB_set_cpu_ctl_env ( const __TBB_cpu_ctl_env_t* ctl ) {
// Special atomic functions __asm__ __volatile__ (
#define __TBB_FetchAndAddWrelease(P,V) __TBB_FetchAndAddW(P,V) "ldmxcsr %0\n\t"
#define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAddW(P,1) "fldcw %1"
#define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAddW(P,-1) : : "m"(ctl->mxcsr), "m"(ctl->x87cw)
);
// Use generic definitions from tbb_machine.h }
#undef __TBB_TryLockByte #endif /* !__TBB_CPU_CTL_ENV_PRESENT */
#undef __TBB_LockByte
 End of changes. 12 change blocks. 
42 lines changed or deleted 58 lines changed or added


 mac_ppc.h   mac_ppc.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 29 skipping to change at line 29
As a special exception, you may use this file as part of a free softwar e As a special exception, you may use this file as part of a free softwar e
library without restriction. Specifically, if other files instantiate library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you comp ile templates or use macros or inline functions from this file, or you comp ile
this file and link it with other files to produce an executable, this this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered b y invalidate any other reasons why the executable file might be covered b y
the GNU General Public License. the GNU General Public License.
*/ */
#ifndef __TBB_machine_H #if !defined(__TBB_machine_H) || defined(__TBB_machine_gcc_power_H)
#error Do not include this file directly; include tbb_machine.h instead #error Do not #include this internal file directly; use public TBB headers
instead.
#endif #endif
#define __TBB_machine_gcc_power_H
#include <stdint.h> #include <stdint.h>
#include <unistd.h> #include <unistd.h>
#include <sched.h> // sched_yield // TODO: rename to gcc_power.h?
// This file is for Power Architecture with compilers supporting GNU inline
-assembler syntax (currently GNU g++ and IBM XL).
// Note that XL V9.0 (sometimes?) has trouble dealing with empty input and/
or clobber lists, so they should be avoided.
#if __powerpc64__ || __ppc64__
// IBM XL documents __powerpc64__ (and __PPC64__).
// Apple documents __ppc64__ (with __ppc__ only on 32-bit).
#define __TBB_WORDSIZE 8
#else
#define __TBB_WORDSIZE 4
#endif
// On Power Architecture, (lock-free) 64-bit atomics require 64-bit hardwar
e:
#if __TBB_WORDSIZE==8
// Do not change the following definition, because TBB itself will use
64-bit atomics in 64-bit builds.
#define __TBB_64BIT_ATOMICS 1
#elif __bgp__
// Do not change the following definition on known 32-bit hardware.
#define __TBB_64BIT_ATOMICS 0
#else
// To enable 64-bit atomics in 32-bit builds, set the value below to 1
instead of 0.
// You must make certain that the program will only use them on actual
64-bit hardware
// (which typically means that the entire program is only executed on s
uch hardware),
// because their implementation involves machine instructions that are
illegal elsewhere.
// The setting can be chosen independently per compilation unit,
// which also means that TBB itself does not need to be rebuilt.
// Alternatively (but only for the current architecture and TBB version
),
// override the default as a predefined macro when invoking the compile
r.
#ifndef __TBB_64BIT_ATOMICS
#define __TBB_64BIT_ATOMICS 0
#endif
#endif
inline int32_t __TBB_machine_cmpswp4 (volatile void *ptr, int32_t value, in t32_t comparand ) inline int32_t __TBB_machine_cmpswp4 (volatile void *ptr, int32_t value, in t32_t comparand )
{ {
int32_t result; int32_t result;
__asm__ __volatile__("sync\n" __asm__ __volatile__("sync\n"
"0: lwarx %0,0,%2\n\t" /* load w/ reservation */ "0:\n\t"
"cmpw %0,%4\n\t" /* compare against compara "lwarx %[res],0,%[ptr]\n\t" /* load w/ reserva
nd */ tion */
"bne- 1f\n\t" /* exit if not same */ "cmpw %[res],%[cmp]\n\t" /* compare against
"stwcx. %3,0,%2\n\t" /* store new_value */ comparand */
"bne- 0b\n" /* retry if reservation lo "bne- 1f\n\t" /* exit if not sam
st */ e */
"1: sync" /* the exit */ "stwcx. %[val],0,%[ptr]\n\t" /* store new value
: "=&r"(result), "=m"(* (int32_t*) ptr) */
: "r"(ptr), "r"(value), "r"(comparand), "m"(* (in "bne- 0b\n" /* retry if reserv
t32_t*) ptr) ation lost */
: "cr0"); "1:\n\t" /* the exit */
"isync"
: [res]"=&r"(result)
, "+m"(* (int32_t*) ptr) /* redundant with
"memory" */
: [ptr]"r"(ptr)
, [val]"r"(value)
, [cmp]"r"(comparand)
: "memory" /* compiler full f
ence */
, "cr0" /* clobbered by cm
p and/or stwcx. */
);
return result; return result;
} }
#if __TBB_WORDSIZE==8
inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t value, in t64_t comparand ) inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t value, in t64_t comparand )
{ {
int64_t result; int64_t result;
__asm__ __volatile__("sync\n" __asm__ __volatile__("sync\n"
"0: ldarx %0,0,%2\n\t" /* load w/ reservation */ "0:\n\t"
"cmpd %0,%4\n\t" /* compare against compara "ldarx %[res],0,%[ptr]\n\t" /* load w/ reserva
nd */ tion */
"bne- 1f\n\t" /* exit if not same */ "cmpd %[res],%[cmp]\n\t" /* compare against
"stdcx. %3,0,%2\n\t" /* store new_value */ comparand */
"bne- 0b\n" /* retry if reservation lo "bne- 1f\n\t" /* exit if not sam
st */ e */
"1: sync" /* the exit */ "stdcx. %[val],0,%[ptr]\n\t" /* store new value
: "=&b"(result), "=m"(* (int64_t*) ptr) */
: "r"(ptr), "r"(value), "r"(comparand), "m"(* (in "bne- 0b\n" /* retry if reserv
t64_t*) ptr) ation lost */
: "cr0"); "1:\n\t" /* the exit */
"isync"
: [res]"=&r"(result)
, "+m"(* (int64_t*) ptr) /* redundant with
"memory" */
: [ptr]"r"(ptr)
, [val]"r"(value)
, [cmp]"r"(comparand)
: "memory" /* compiler full f
ence */
, "cr0" /* clobbered by cm
p and/or stdcx. */
);
return result; return result;
} }
#define __TBB_BIG_ENDIAN 1 #elif __TBB_64BIT_ATOMICS /* && __TBB_WORDSIZE==4 */
inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t value, in
t64_t comparand )
{
int64_t result;
int64_t value_register, comparand_register, result_register; // dummy v
ariables to allocate registers
__asm__ __volatile__("sync\n\t"
"ld %[val],%[valm]\n\t"
"ld %[cmp],%[cmpm]\n"
"0:\n\t"
"ldarx %[res],0,%[ptr]\n\t" /* load w/ reserva
tion */
"cmpd %[res],%[cmp]\n\t" /* compare against
comparand */
"bne- 1f\n\t" /* exit if not sam
e */
"stdcx. %[val],0,%[ptr]\n\t" /* store new value
*/
"bne- 0b\n" /* retry if reserv
ation lost */
"1:\n\t" /* the exit */
"std %[res],%[resm]\n\t"
"isync"
: [resm]"=m"(result)
, [res] "=&r"( result_register)
, [val] "=&r"( value_register)
, [cmp] "=&r"(comparand_register)
, "+m"(* (int64_t*) ptr) /* redundant with
"memory" */
: [ptr] "r"(ptr)
, [valm]"m"(value)
, [cmpm]"m"(comparand)
: "memory" /* compiler full f
ence */
, "cr0" /* clobbered by cm
pd and/or stdcx. */
);
return result;
}
#endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
#if defined(powerpc64) || defined(__powerpc64__) || defined(__ppc64__) #define __TBB_MACHINE_DEFINE_LOAD_STORE(S,ldx,stx,cmpx)
#define __TBB_WORDSIZE 8 \
#define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp8(P,V,C) template <typename T>
\
struct machine_load_store<T,S> {
\
static inline T load_with_acquire(const volatile T& location) {
\
T result;
\
__asm__ __volatile__(ldx " %[res],0(%[ptr])\n"
\
"0:\n\t"
\
cmpx " %[res],%[res]\n\t"
\
"bne- 0b\n\t"
\
"isync"
\
: [res]"=r"(result)
\
: [ptr]"b"(&location) /* cannot use regist
er 0 here */ \
, "m"(location) /* redundant with "m
emory" */ \
: "memory" /* compiler acquire
fence */ \
, "cr0" /* clobbered by cmpw
/cmpd */); \
return result;
\
}
\
static inline void store_with_release(volatile T &location, T value
) { \
__asm__ __volatile__("lwsync\n\t"
\
stx " %[val],0(%[ptr])"
\
: "=m"(location) /* redundant with "m
emory" */ \
: [ptr]"b"(&location) /* cannot use regist
er 0 here */ \
, [val]"r"(value)
\
: "memory"/*compiler release fence*/ /*(cr
0 not affected)*/); \
}
\
};
\
\
template <typename T>
\
struct machine_load_store_relaxed<T,S> {
\
static inline T load (const __TBB_atomic T& location) {
\
T result;
\
__asm__ __volatile__(ldx " %[res],0(%[ptr])"
\
: [res]"=r"(result)
\
: [ptr]"b"(&location) /* cannot use regist
er 0 here */ \
, "m"(location)
\
); /*(no compiler fence)*/ /*(cr0 not affe
cted)*/ \
return result;
\
}
\
static inline void store (__TBB_atomic T &location, T value) {
\
__asm__ __volatile__(stx " %[val],0(%[ptr])"
\
: "=m"(location)
\
: [ptr]"b"(&location) /* cannot use regist
er 0 here */ \
, [val]"r"(value)
\
); /*(no compiler fence)*/ /*(cr0 not affe
cted)*/ \
}
\
};
namespace tbb {
namespace internal {
__TBB_MACHINE_DEFINE_LOAD_STORE(1,"lbz","stb","cmpw")
__TBB_MACHINE_DEFINE_LOAD_STORE(2,"lhz","sth","cmpw")
__TBB_MACHINE_DEFINE_LOAD_STORE(4,"lwz","stw","cmpw")
#if __TBB_WORDSIZE==8
__TBB_MACHINE_DEFINE_LOAD_STORE(8,"ld" ,"std","cmpd")
#elif __TBB_64BIT_ATOMICS /* && __TBB_WORDSIZE==4 */
template <typename T>
struct machine_load_store<T,8> {
static inline T load_with_acquire(const volatile T& location) {
T result;
T result_register; // dummy variable to allocate a register
__asm__ __volatile__("ld %[res],0(%[ptr])\n\t"
"std %[res],%[resm]\n"
"0:\n\t"
"cmpd %[res],%[res]\n\t"
"bne- 0b\n\t"
"isync"
: [resm]"=m"(result)
, [res]"=&r"(result_register)
: [ptr]"b"(&location) /* cannot use regist
er 0 here */
, "m"(location) /* redundant with "m
emory" */
: "memory" /* compiler acquire
fence */
, "cr0" /* clobbered by cmpd
*/);
return result;
}
static inline void store_with_release(volatile T &location, T value
) {
T value_register; // dummy variable to allocate a register
__asm__ __volatile__("lwsync\n\t"
"ld %[val],%[valm]\n\t"
"std %[val],0(%[ptr])"
: "=m"(location) /* redundant with "m
emory" */
, [val]"=&r"(value_register)
: [ptr]"b"(&location) /* cannot use regist
er 0 here */
, [valm]"m"(value)
: "memory"/*compiler release fence*/ /*(cr
0 not affected)*/);
}
};
struct machine_load_store_relaxed<T,8> {
static inline T load (const volatile T& location) {
T result;
T result_register; // dummy variable to allocate a register
__asm__ __volatile__("ld %[res],0(%[ptr])\n\t"
"std %[res],%[resm]"
: [resm]"=m"(result)
, [res]"=&r"(result_register)
: [ptr]"b"(&location) /* cannot use regist
er 0 here */
, "m"(location)
); /*(no compiler fence)*/ /*(cr0 not affe
cted)*/
return result;
}
static inline void store (volatile T &location, T value) {
T value_register; // dummy variable to allocate a register
__asm__ __volatile__("ld %[val],%[valm]\n\t"
"std %[val],0(%[ptr])"
: "=m"(location)
, [val]"=&r"(value_register)
: [ptr]"b"(&location) /* cannot use regist
er 0 here */
, [valm]"m"(value)
); /*(no compiler fence)*/ /*(cr0 not affe
cted)*/
}
};
#define __TBB_machine_load_store_relaxed_8
#endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
}} // namespaces internal, tbb
#undef __TBB_MACHINE_DEFINE_LOAD_STORE
#define __TBB_USE_GENERIC_PART_WORD_CAS 1
#define __TBB_USE_GENERIC_FETCH_ADD 1
#define __TBB_USE_GENERIC_FETCH_STORE 1
#define __TBB_control_consistency_helper() __asm__ __volatile__("isync": :
:"memory")
#define __TBB_full_memory_fence() __asm__ __volatile__( "sync": :
:"memory")
static inline intptr_t __TBB_machine_lg( uintptr_t x ) {
// cntlzd/cntlzw starts counting at 2^63/2^31 (ignoring any higher-orde
r bits), and does not affect cr0
#if __TBB_WORDSIZE==8
__asm__ __volatile__ ("cntlzd %0,%0" : "+r"(x));
return 63-static_cast<intptr_t>(x);
#else #else
#define __TBB_WORDSIZE 4 __asm__ __volatile__ ("cntlzw %0,%0" : "+r"(x));
#define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp4(P,V,C) return 31-static_cast<intptr_t>(x);
#endif #endif
}
#define __TBB_Log2(V) __TBB_machine_lg(V)
#define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4(P,V,C) // Assumes implicit alignment for any 32-bit value
#define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8(P,V,C) typedef uint32_t __TBB_Flag;
#define __TBB_Yield() sched_yield() #define __TBB_Flag __TBB_Flag
#define __TBB_rel_acq_fence() __asm__ __volatile__("lwsync": : :"memory")
#define __TBB_release_consistency_helper() __TBB_rel_acq_fence() inline bool __TBB_machine_trylockbyte( __TBB_atomic __TBB_Flag &flag ) {
return __TBB_machine_cmpswp4(&flag,1,0)==0;
}
#define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P)
 End of changes. 12 change blocks. 
34 lines changed or deleted 341 lines changed or added


 mutex.h   mutex.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 33 skipping to change at line 33
file does not by itself cause the resulting executable to be covered by file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered b y invalidate any other reasons why the executable file might be covered b y
the GNU General Public License. the GNU General Public License.
*/ */
#ifndef __TBB_mutex_H #ifndef __TBB_mutex_H
#define __TBB_mutex_H #define __TBB_mutex_H
#if _WIN32||_WIN64 #if _WIN32||_WIN64
#include <windows.h> #include "machine/windows_api.h"
#if !defined(_WIN32_WINNT) #else
// The following Windows API function is declared explicitly; #include <pthread.h>
// otherwise any user would have to specify /D_WIN32_WINNT=0x0400
extern "C" BOOL WINAPI TryEnterCriticalSection( LPCRITICAL_SECTION );
#endif
#else /* if not _WIN32||_WIN64 */
#include <pthread.h>
#endif /* _WIN32||_WIN64 */ #endif /* _WIN32||_WIN64 */
#include <new> #include <new>
#include "aligned_space.h" #include "aligned_space.h"
#include "tbb_stddef.h" #include "tbb_stddef.h"
#include "tbb_profiling.h" #include "tbb_profiling.h"
namespace tbb { namespace tbb {
//! Wrapper around the platform's native reader-writer lock. //! Wrapper around the platform's native reader-writer lock.
 End of changes. 2 change blocks. 
9 lines changed or deleted 4 lines changed or added


 null_mutex.h   null_mutex.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 End of changes. 1 change blocks. 
1 lines changed or deleted 1 lines changed or added


 null_rw_mutex.h   null_rw_mutex.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 End of changes. 1 change blocks. 
1 lines changed or deleted 1 lines changed or added


 parallel_do.h   parallel_do.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 End of changes. 1 change blocks. 
1 lines changed or deleted 1 lines changed or added


 parallel_for.h   parallel_for.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 32 skipping to change at line 32
this file and link it with other files to produce an executable, this this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered b y invalidate any other reasons why the executable file might be covered b y
the GNU General Public License. the GNU General Public License.
*/ */
#ifndef __TBB_parallel_for_H #ifndef __TBB_parallel_for_H
#define __TBB_parallel_for_H #define __TBB_parallel_for_H
#include <new>
#include "task.h" #include "task.h"
#include "partitioner.h" #include "partitioner.h"
#include "blocked_range.h" #include "blocked_range.h"
#include <new>
#include "tbb_exception.h" #include "tbb_exception.h"
namespace tbb { namespace tbb {
namespace interface6 {
//! @cond INTERNAL //! @cond INTERNAL
namespace internal { namespace internal {
//! Task type used in parallel_for //! Task type used in parallel_for
/** @ingroup algorithms */ /** @ingroup algorithms */
template<typename Range, typename Body, typename Partitioner> template<typename Range, typename Body, typename Partitioner>
class start_for: public task { class start_for: public task {
Range my_range; Range my_range;
const Body my_body; const Body my_body;
typename Partitioner::partition_type my_partition; typename Partitioner::task_partition_type my_partition;
/*override*/ task* execute(); /*override*/ task* execute();
public:
//! Constructor for root task. //! Constructor for root task.
start_for( const Range& range, const Body& body, Partitioner& parti tioner ) : start_for( const Range& range, const Body& body, Partitioner& parti tioner ) :
my_range(range), my_range(range),
my_body(body), my_body(body),
my_partition(partitioner) my_partition(partitioner)
{ {
} }
//! Splitting constructor used to generate children. //! Splitting constructor used to generate children.
/** this becomes left child. Newly constructed object is right chi ld. */ /** parent_ becomes left child. Newly constructed object is right child. */
start_for( start_for& parent_, split ) : start_for( start_for& parent_, split ) :
my_range(parent_.my_range,split()), my_range(parent_.my_range,split()),
my_body(parent_.my_body), my_body(parent_.my_body),
my_partition(parent_.my_partition, split())
{
my_partition.set_affinity(*this);
}
//! Construct right child from the given range as response to the d
emand.
/** parent_ remains left child. Newly constructed object is right
child. */
start_for( start_for& parent_, const Range& r, depth_t d ) :
my_range(r),
my_body(parent_.my_body),
my_partition(parent_.my_partition,split()) my_partition(parent_.my_partition,split())
{ {
my_partition.set_affinity(*this); my_partition.set_affinity(*this);
my_partition.align_depth( d );
} }
//! Update affinity info, if any. //! Update affinity info, if any.
/*override*/ void note_affinity( affinity_id id ) { /*override*/ void note_affinity( affinity_id id ) {
my_partition.note_affinity( id ); my_partition.note_affinity( id );
} }
public:
static void run( const Range& range, const Body& body, const Parti tioner& partitioner ) { static void run( const Range& range, const Body& body, const Parti tioner& partitioner ) {
if( !range.empty() ) { if( !range.empty() ) {
#if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
start_for& a = *new(task::allocate_root()) start_for(range, body,const_cast<Partitioner&>(partitioner)); start_for& a = *new(task::allocate_root()) start_for(range, body,const_cast<Partitioner&>(partitioner));
#else #else
// Bound context prevents exceptions from body to affect ne sting or sibling algorithms, // Bound context prevents exceptions from body to affect ne sting or sibling algorithms,
// and allows users to handle exceptions safely by wrapping parallel_for in the try-block. // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
task_group_context context; task_group_context context;
start_for& a = *new(task::allocate_root(context)) start_for (range,body,const_cast<Partitioner&>(partitioner)); start_for& a = *new(task::allocate_root(context)) start_for (range,body,const_cast<Partitioner&>(partitioner));
#endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */ #endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
skipping to change at line 94 skipping to change at line 105
} }
} }
#if __TBB_TASK_GROUP_CONTEXT #if __TBB_TASK_GROUP_CONTEXT
static void run( const Range& range, const Body& body, const Parti tioner& partitioner, task_group_context& context ) { static void run( const Range& range, const Body& body, const Parti tioner& partitioner, task_group_context& context ) {
if( !range.empty() ) { if( !range.empty() ) {
start_for& a = *new(task::allocate_root(context)) start_for (range,body,const_cast<Partitioner&>(partitioner)); start_for& a = *new(task::allocate_root(context)) start_for (range,body,const_cast<Partitioner&>(partitioner));
task::spawn_root_and_wait(a); task::spawn_root_and_wait(a);
} }
} }
#endif /* __TBB_TASK_GROUP_CONTEXT */ #endif /* __TBB_TASK_GROUP_CONTEXT */
//! create a continuation task, serve as callback for partitioner
flag_task *create_continuation() {
return new( allocate_continuation() ) flag_task();
}
//! Run body for range
void run_body( Range &r ) { my_body( r ); }
}; };
template<typename Range, typename Body, typename Partitioner> template<typename Range, typename Body, typename Partitioner>
task* start_for<Range,Body,Partitioner>::execute() { task* start_for<Range,Body,Partitioner>::execute() {
if( !my_range.is_divisible() || my_partition.should_execute_range(* my_partition.check_being_stolen( *this );
this) ) { my_partition.execute(*this, my_range);
my_body( my_range ); return NULL;
return my_partition.continue_after_execute_range();
} else {
empty_task& c = *new( this->allocate_continuation() ) empty_tas
k;
recycle_as_child_of(c);
c.set_ref_count(2);
bool delay = my_partition.decide_whether_to_delay();
start_for& b = *new( c.allocate_child() ) start_for(*this,split
());
my_partition.spawn_or_delay(delay,b);
return this;
}
} }
} // namespace internal } // namespace internal
//! @endcond //! @endcond
} // namespace interfaceX
//! @cond INTERNAL
namespace internal {
using interface6::internal::start_for;
//! Calls the function with values from range [begin, end) with a step
provided
template<typename Function, typename Index>
class parallel_for_body : internal::no_assign {
const Function &my_func;
const Index my_begin;
const Index my_step;
public:
parallel_for_body( const Function& _func, Index& _begin, Index& _st
ep)
: my_func(_func), my_begin(_begin), my_step(_step) {}
void operator()( tbb::blocked_range<Index>& r ) const {
for( Index i = r.begin(), k = my_begin + i * my_step; i < r.en
d(); i++, k = k + my_step)
my_func( k );
}
};
} // namespace internal
//! @endcond
// Requirements on Range concept are documented in blocked_range.h // Requirements on Range concept are documented in blocked_range.h
/** \page parallel_for_body_req Requirements on parallel_for body /** \page parallel_for_body_req Requirements on parallel_for body
Class \c Body implementing the concept of parallel_for body must define : Class \c Body implementing the concept of parallel_for body must define :
- \code Body::Body( const Body& ); \endcode Copy constr uctor - \code Body::Body( const Body& ); \endcode Copy constr uctor
- \code Body::~Body(); \endcode Destructor - \code Body::~Body(); \endcode Destructor
- \code void Body::operator()( Range& r ) const; \endcode Function ca ll operator applying the body to range \c r. - \code void Body::operator()( Range& r ) const; \endcode Function ca ll operator applying the body to range \c r.
**/ **/
skipping to change at line 179 skipping to change at line 210
//! Parallel iteration over range with affinity_partitioner and user-suppli ed context. //! Parallel iteration over range with affinity_partitioner and user-suppli ed context.
/** @ingroup algorithms **/ /** @ingroup algorithms **/
template<typename Range, typename Body> template<typename Range, typename Body>
void parallel_for( const Range& range, const Body& body, affinity_partition er& partitioner, task_group_context& context ) { void parallel_for( const Range& range, const Body& body, affinity_partition er& partitioner, task_group_context& context ) {
internal::start_for<Range,Body,affinity_partitioner>::run(range,body,pa rtitioner, context); internal::start_for<Range,Body,affinity_partitioner>::run(range,body,pa rtitioner, context);
} }
#endif /* __TBB_TASK_GROUP_CONTEXT */ #endif /* __TBB_TASK_GROUP_CONTEXT */
//@} //@}
//! @cond INTERNAL
namespace internal {
//! Calls the function with values from range [begin, end) with a step
provided
template<typename Function, typename Index>
class parallel_for_body : internal::no_assign {
const Function &my_func;
const Index my_begin;
const Index my_step;
public:
parallel_for_body( const Function& _func, Index& _begin, Index& _step)
: my_func(_func), my_begin(_begin), my_step(_step) {}
void operator()( tbb::blocked_range<Index>& r ) const {
for( Index i = r.begin(), k = my_begin + i * my_step; i < r.end();
i++, k = k + my_step)
my_func( k );
}
};
} // namespace internal
//! @endcond
namespace strict_ppl { namespace strict_ppl {
//@{ //@{
//! Parallel iteration over a range of integers with a step provided //! Parallel iteration over a range of integers with a step provided
template <typename Index, typename Function> template <typename Index, typename Function>
void parallel_for(Index first, Index last, Index step, const Function& f) { void parallel_for(Index first, Index last, Index step, const Function& f) {
tbb::task_group_context context;
parallel_for(first, last, step, f, context);
}
template <typename Index, typename Function>
void parallel_for(Index first, Index last, Index step, const Function& f, t
bb::task_group_context &context) {
if (step <= 0 ) if (step <= 0 )
internal::throw_exception(internal::eid_nonpositive_step); // throw s std::invalid_argument internal::throw_exception(internal::eid_nonpositive_step); // throw s std::invalid_argument
else if (last > first) { else if (last > first) {
// Above "else" is necessary to prevent "potential divide by zero" // Above "else" avoids "potential divide by zero" warning on some p
warning latforms
Index end = (last - first) / step; Index end = (last - first - Index(1)) / step + Index(1);
if (first + end * step < last) end++;
tbb::blocked_range<Index> range(static_cast<Index>(0), end); tbb::blocked_range<Index> range(static_cast<Index>(0), end);
internal::parallel_for_body<Function, Index> body(f, first, step); internal::parallel_for_body<Function, Index> body(f, first, step);
tbb::parallel_for(range, body, tbb::auto_partitioner(), context); tbb::parallel_for(range, body, tbb::auto_partitioner());
} }
} }
//! Parallel iteration over a range of integers with a default step value //! Parallel iteration over a range of integers with a default step value
template <typename Index, typename Function> template <typename Index, typename Function>
void parallel_for(Index first, Index last, const Function& f) { void parallel_for(Index first, Index last, const Function& f) {
tbb::task_group_context context; parallel_for(first, last, static_cast<Index>(1), f);
parallel_for(first, last, static_cast<Index>(1), f, context); }
#if __TBB_TASK_GROUP_CONTEXT
//! Parallel iteration over a range of integers with explicit step and task
group context
template <typename Index, typename Function>
void parallel_for(Index first, Index last, Index step, const Function& f, t
bb::task_group_context &context) {
if (step <= 0 )
internal::throw_exception(internal::eid_nonpositive_step); // throw
s std::invalid_argument
else if (last > first) {
// Above "else" avoids "potential divide by zero" warning on some p
latforms
Index end = (last - first - Index(1)) / step + Index(1);
tbb::blocked_range<Index> range(static_cast<Index>(0), end);
internal::parallel_for_body<Function, Index> body(f, first, step);
tbb::parallel_for(range, body, tbb::auto_partitioner(), context);
}
} }
//! Parallel iteration over a range of integers with a default step value a nd explicit task group context
template <typename Index, typename Function> template <typename Index, typename Function>
void parallel_for(Index first, Index last, const Function& f, tbb::task_gro up_context &context) { void parallel_for(Index first, Index last, const Function& f, tbb::task_gro up_context &context) {
parallel_for(first, last, static_cast<Index>(1), f, context); parallel_for(first, last, static_cast<Index>(1), f, context);
} }
#endif /* __TBB_TASK_GROUP_CONTEXT */
//@} //@}
} // namespace strict_ppl } // namespace strict_ppl
using strict_ppl::parallel_for; using strict_ppl::parallel_for;
} // namespace tbb } // namespace tbb
#if TBB_PREVIEW_SERIAL_SUBSET
#define __TBB_NORMAL_EXECUTION
#include "../serial/tbb/parallel_for.h"
#undef __TBB_NORMAL_EXECUTION
#endif
#endif /* __TBB_parallel_for_H */ #endif /* __TBB_parallel_for_H */
 End of changes. 21 change blocks. 
56 lines changed or deleted 85 lines changed or added


 parallel_for_each.h   parallel_for_each.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 46 skipping to change at line 46
//! @cond INTERNAL //! @cond INTERNAL
namespace internal { namespace internal {
// The class calls user function in operator() // The class calls user function in operator()
template <typename Function, typename Iterator> template <typename Function, typename Iterator>
class parallel_for_each_body : internal::no_assign { class parallel_for_each_body : internal::no_assign {
const Function &my_func; const Function &my_func;
public: public:
parallel_for_each_body(const Function &_func) : my_func(_func) {} parallel_for_each_body(const Function &_func) : my_func(_func) {}
parallel_for_each_body(const parallel_for_each_body<Function, Itera tor> &_caller) : my_func(_caller.my_func) {} parallel_for_each_body(const parallel_for_each_body<Function, Itera tor> &_caller) : my_func(_caller.my_func) {}
void operator() ( typename std::iterator_traits<Iterator>::value_ty pe& value ) const { void operator() ( typename std::iterator_traits<Iterator>::referenc e value ) const {
my_func(value); my_func(value);
} }
}; };
} // namespace internal } // namespace internal
//! @endcond //! @endcond
/** \name parallel_for_each /** \name parallel_for_each
**/ **/
//@{ //@{
//! Calls function f for all items from [first, last) interval using user-s upplied context //! Calls function f for all items from [first, last) interval using user-s upplied context
/** @ingroup algorithms */ /** @ingroup algorithms */
#if __TBB_TASK_GROUP_CONTEXT
template<typename InputIterator, typename Function> template<typename InputIterator, typename Function>
void parallel_for_each(InputIterator first, InputIterator last, const Funct ion& f, task_group_context &context) { void parallel_for_each(InputIterator first, InputIterator last, const Funct ion& f, task_group_context &context) {
internal::parallel_for_each_body<Function, InputIterator> body(f); internal::parallel_for_each_body<Function, InputIterator> body(f);
tbb::parallel_do (first, last, body, context); tbb::parallel_do (first, last, body, context);
} }
#endif /* __TBB_TASK_GROUP_CONTEXT */
//! Uses default context //! Uses default context
template<typename InputIterator, typename Function> template<typename InputIterator, typename Function>
void parallel_for_each(InputIterator first, InputIterator last, const Funct ion& f) { void parallel_for_each(InputIterator first, InputIterator last, const Funct ion& f) {
internal::parallel_for_each_body<Function, InputIterator> body(f); internal::parallel_for_each_body<Function, InputIterator> body(f);
tbb::parallel_do (first, last, body); tbb::parallel_do (first, last, body);
} }
//@} //@}
} // namespace } // namespace
#endif /* __TBB_parallel_for_each_H */ #endif /* __TBB_parallel_for_each_H */
 End of changes. 6 change blocks. 
4 lines changed or deleted 4 lines changed or added


 parallel_invoke.h   parallel_invoke.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 36 skipping to change at line 36
the GNU General Public License. the GNU General Public License.
*/ */
#ifndef __TBB_parallel_invoke_H #ifndef __TBB_parallel_invoke_H
#define __TBB_parallel_invoke_H #define __TBB_parallel_invoke_H
#include "task.h" #include "task.h"
namespace tbb { namespace tbb {
#if !__TBB_TASK_GROUP_CONTEXT
/** Dummy to avoid cluttering the bulk of the header with enormous amou
nt of ifdefs. **/
struct task_group_context {};
#endif /* __TBB_TASK_GROUP_CONTEXT */
//! @cond INTERNAL //! @cond INTERNAL
namespace internal { namespace internal {
// Simple task object, executing user method // Simple task object, executing user method
template<typename function> template<typename function>
class function_invoker : public task{ class function_invoker : public task{
public: public:
function_invoker(const function& _function) : my_function(_function ) {} function_invoker(const function& _function) : my_function(_function ) {}
private: private:
const function &my_function; const function &my_function;
/*override*/ /*override*/
skipping to change at line 140 skipping to change at line 145
void run_and_finish(const F0& f0) void run_and_finish(const F0& f0)
{ {
internal::function_invoker<F0>* invoker = new (allocate_child() ) internal::function_invoker<F0>(f0); internal::function_invoker<F0>* invoker = new (allocate_child() ) internal::function_invoker<F0>(f0);
__TBB_ASSERT(invoker, "Child task allocation failed"); __TBB_ASSERT(invoker, "Child task allocation failed");
spawn_and_wait_for_all(*invoker); spawn_and_wait_for_all(*invoker);
} }
}; };
// The class destroys root if exception occured as well as in normal ca se // The class destroys root if exception occured as well as in normal ca se
class parallel_invoke_cleaner: internal::no_copy { class parallel_invoke_cleaner: internal::no_copy {
public: public:
parallel_invoke_cleaner(int number_of_children, tbb::task_group_con #if __TBB_TASK_GROUP_CONTEXT
text& context) : root(*new(task::allocate_root(context)) internal::parallel parallel_invoke_cleaner(int number_of_children, tbb::task_group_con
_invoke_helper(number_of_children)) text& context)
: root(*new(task::allocate_root(context)) internal::parallel_in
voke_helper(number_of_children))
#else
parallel_invoke_cleaner(int number_of_children, tbb::task_group_con
text&)
: root(*new(task::allocate_root()) internal::parallel_invoke_he
lper(number_of_children))
#endif /* !__TBB_TASK_GROUP_CONTEXT */
{} {}
~parallel_invoke_cleaner(){ ~parallel_invoke_cleaner(){
root.destroy(root); root.destroy(root);
} }
internal::parallel_invoke_helper& root; internal::parallel_invoke_helper& root;
}; };
} // namespace internal } // namespace internal
//! @endcond //! @endcond
/** \name parallel_invoke /** \name parallel_invoke
**/ **/
 End of changes. 4 change blocks. 
4 lines changed or deleted 19 lines changed or added


 parallel_reduce.h   parallel_reduce.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 32 skipping to change at line 32
this file and link it with other files to produce an executable, this this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered b y invalidate any other reasons why the executable file might be covered b y
the GNU General Public License. the GNU General Public License.
*/ */
#ifndef __TBB_parallel_reduce_H #ifndef __TBB_parallel_reduce_H
#define __TBB_parallel_reduce_H #define __TBB_parallel_reduce_H
#include <new>
#include "task.h" #include "task.h"
#include "aligned_space.h" #include "aligned_space.h"
#include "partitioner.h" #include "partitioner.h"
#include <new> #include "tbb_profiling.h"
namespace tbb { namespace tbb {
namespace interface6 {
//! @cond INTERNAL //! @cond INTERNAL
namespace internal { namespace internal {
//! ITT instrumented routine that stores src into location pointed to b using namespace tbb::internal;
y dst.
void __TBB_EXPORTED_FUNC itt_store_pointer_with_release_v3( void* dst,
void* src );
//! ITT instrumented routine that loads pointer from location pointed t
o by src.
void* __TBB_EXPORTED_FUNC itt_load_pointer_with_acquire_v3( const void*
src );
template<typename T> inline void parallel_reduce_store_body( T*& dst, T
* src ) {
#if TBB_USE_THREADING_TOOLS
itt_store_pointer_with_release_v3(&dst,src);
#else
__TBB_store_with_release(dst,src);
#endif /* TBB_USE_THREADING_TOOLS */
}
template<typename T> inline T* parallel_reduce_load_body( T*& src ) {
#if TBB_USE_THREADING_TOOLS
return static_cast<T*>(itt_load_pointer_with_acquire_v3(&src));
#else
return __TBB_load_with_acquire(src);
#endif /* TBB_USE_THREADING_TOOLS */
}
//! 0 if root, 1 if a left child, 2 if a right child. //! 0 if root, 1 if a left child, 2 if a right child.
/** Represented as a char, not enum, for compactness. */ /** Represented as a char, not enum, for compactness. */
typedef char reduction_context; typedef char reduction_context;
//! Task type use to combine the partial results of parallel_reduce. //! Task type use to combine the partial results of parallel_reduce.
/** @ingroup algorithms */ /** @ingroup algorithms */
template<typename Body> template<typename Body>
class finish_reduce: public task { class finish_reduce: public flag_task {
//! Pointer to body, or NULL if the left child has not yet finished . //! Pointer to body, or NULL if the left child has not yet finished .
Body* my_body;
bool has_right_zombie; bool has_right_zombie;
const reduction_context my_context; const reduction_context my_context;
Body* my_body;
aligned_space<Body,1> zombie_space; aligned_space<Body,1> zombie_space;
finish_reduce( char context_ ) : finish_reduce( reduction_context context_ ) :
my_body(NULL), has_right_zombie(false), // TODO: substitute by flag_task::chil
has_right_zombie(false), d_stolen?
my_context(context_) my_context(context_),
my_body(NULL)
{ {
} }
task* execute() { task* execute() {
if( has_right_zombie ) { if( has_right_zombie ) {
// Right child was stolen. // Right child was stolen.
Body* s = zombie_space.begin(); Body* s = zombie_space.begin();
my_body->join( *s ); my_body->join( *s );
s->~Body(); s->~Body();
} }
if( my_context==1 ) if( my_context==1 ) // left child
parallel_reduce_store_body( static_cast<finish_reduce*>(par itt_store_word_with_release( static_cast<finish_reduce*>(pa
ent())->my_body, my_body ); rent())->my_body, my_body );
return NULL; return NULL;
} }
template<typename Range,typename Body_, typename Partitioner> template<typename Range,typename Body_, typename Partitioner>
friend class start_reduce; friend class start_reduce;
}; };
//! Task type used to split the work of parallel_reduce. //! Task type used to split the work of parallel_reduce.
/** @ingroup algorithms */ /** @ingroup algorithms */
template<typename Range, typename Body, typename Partitioner> template<typename Range, typename Body, typename Partitioner>
class start_reduce: public task { class start_reduce: public task {
typedef finish_reduce<Body> finish_type; typedef finish_reduce<Body> finish_type;
Body* my_body; Body* my_body;
Range my_range; Range my_range;
typename Partitioner::partition_type my_partition; typename Partitioner::task_partition_type my_partition;
reduction_context my_context; reduction_context my_context; // TODO: factor out into start_reduce
_base
/*override*/ task* execute(); /*override*/ task* execute();
template<typename Body_> template<typename Body_>
friend class finish_reduce; friend class finish_reduce;
public:
//! Constructor used for root task //! Constructor used for root task
start_reduce( const Range& range, Body* body, Partitioner& partitio ner ) : start_reduce( const Range& range, Body* body, Partitioner& partitio ner ) :
my_body(body), my_body(body),
my_range(range), my_range(range),
my_partition(partitioner), my_partition(partitioner),
my_context(0) my_context(0)
{ {
} }
//! Splitting constructor used to generate children. //! Splitting constructor used to generate children.
/** this becomes left child. Newly constructed object is right chi ld. */ /** parent_ becomes left child. Newly constructed object is right child. */
start_reduce( start_reduce& parent_, split ) : start_reduce( start_reduce& parent_, split ) :
my_body(parent_.my_body), my_body(parent_.my_body),
my_range(parent_.my_range,split()), my_range(parent_.my_range,split()),
my_partition(parent_.my_partition,split()), my_partition(parent_.my_partition,split()),
my_context(2) my_context(2)
{ {
my_partition.set_affinity(*this); my_partition.set_affinity(*this);
parent_.my_context = 1; parent_.my_context = 1;
} }
//! Construct right child from the given range as response to the d
emand.
/** parent_ remains left child. Newly constructed object is right
child. */
start_reduce( start_reduce& parent_, const Range& r, depth_t d ) :
my_body(parent_.my_body),
my_range(r),
my_partition(parent_.my_partition,split()),
my_context(2) // right leaf mark
{
my_partition.set_affinity(*this);
my_partition.align_depth( d );
parent_.my_context = 1; // left leaf mark
}
//! Update affinity info, if any //! Update affinity info, if any
/*override*/ void note_affinity( affinity_id id ) { /*override*/ void note_affinity( affinity_id id ) {
my_partition.note_affinity( id ); my_partition.note_affinity( id );
} }
public:
static void run( const Range& range, Body& body, Partitioner& parti tioner ) { static void run( const Range& range, Body& body, Partitioner& parti tioner ) {
if( !range.empty() ) { if( !range.empty() ) {
#if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
task::spawn_root_and_wait( *new(task::allocate_root()) star t_reduce(range,&body,partitioner) ); task::spawn_root_and_wait( *new(task::allocate_root()) star t_reduce(range,&body,partitioner) );
#else #else
// Bound context prevents exceptions from body to affect ne sting or sibling algorithms, // Bound context prevents exceptions from body to affect ne sting or sibling algorithms,
// and allows users to handle exceptions safely by wrapping parallel_for in the try-block. // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
task_group_context context; task_group_context context;
task::spawn_root_and_wait( *new(task::allocate_root(context )) start_reduce(range,&body,partitioner) ); task::spawn_root_and_wait( *new(task::allocate_root(context )) start_reduce(range,&body,partitioner) );
#endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */ #endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
} }
} }
#if __TBB_TASK_GROUP_CONTEXT #if __TBB_TASK_GROUP_CONTEXT
static void run( const Range& range, Body& body, Partitioner& parti tioner, task_group_context& context ) { static void run( const Range& range, Body& body, Partitioner& parti tioner, task_group_context& context ) {
if( !range.empty() ) if( !range.empty() )
task::spawn_root_and_wait( *new(task::allocate_root(context )) start_reduce(range,&body,partitioner) ); task::spawn_root_and_wait( *new(task::allocate_root(context )) start_reduce(range,&body,partitioner) );
} }
#endif /* __TBB_TASK_GROUP_CONTEXT */ #endif /* __TBB_TASK_GROUP_CONTEXT */
//! create a continuation task, serve as callback for partitioner
finish_type *create_continuation() {
return new( allocate_continuation() ) finish_type(my_context);
}
//! Run body for range
void run_body( Range &r ) { (*my_body)( r ); }
}; };
template<typename Range, typename Body, typename Partitioner> template<typename Range, typename Body, typename Partitioner>
task* start_reduce<Range,Body,Partitioner>::execute() { task* start_reduce<Range,Body,Partitioner>::execute() {
if( my_context==2 ) { my_partition.check_being_stolen( *this );
finish_type* p = static_cast<finish_type*>(parent() ); if( my_context==2 ) { // right child
if( !parallel_reduce_load_body(p->my_body) ) { finish_type* parent_ptr = static_cast<finish_type*>(parent());
my_body = new( p->zombie_space.begin() ) Body(*my_body,spli if( !itt_load_word_with_acquire(parent_ptr->my_body) ) { // TOD
t()); O: replace by is_stolen_task() or by parent_ptr->ref_count() == 2???
p->has_right_zombie = true; my_body = new( parent_ptr->zombie_space.begin() ) Body(*my_
body,split());
parent_ptr->has_right_zombie = true;
} }
} else __TBB_ASSERT(my_context==0,0);// because left leaf spawns ri
ght leafs without recycling
my_partition.execute(*this, my_range);
if( my_context==1 ) {
finish_type* parent_ptr = static_cast<finish_type*>(parent());
__TBB_ASSERT(my_body!=parent_ptr->zombie_space.begin(),0);
itt_store_word_with_release(parent_ptr->my_body, my_body );
}
return NULL;
}
#if TBB_PREVIEW_DETERMINISTIC_REDUCE
//! Task type use to combine the partial results of parallel_determinis
tic_reduce.
/** @ingroup algorithms */
template<typename Body>
class finish_deterministic_reduce: public task {
Body &my_left_body;
Body my_right_body;
finish_deterministic_reduce( Body &body ) :
my_left_body( body ),
my_right_body( body, split() )
{
}
task* execute() {
my_left_body.join( my_right_body );
return NULL;
} }
if( !my_range.is_divisible() || my_partition.should_execute_range(* template<typename Range,typename Body_>
this) ) { friend class start_deterministic_reduce;
(*my_body)( my_range ); };
if( my_context==1 )
parallel_reduce_store_body(static_cast<finish_type*>(parent //! Task type used to split the work of parallel_deterministic_reduce.
())->my_body, my_body ); /** @ingroup algorithms */
return my_partition.continue_after_execute_range(); template<typename Range, typename Body>
class start_deterministic_reduce: public task {
typedef finish_deterministic_reduce<Body> finish_type;
Body &my_body;
Range my_range;
/*override*/ task* execute();
//! Constructor used for root task
start_deterministic_reduce( const Range& range, Body& body ) :
my_body( body ),
my_range( range )
{
}
//! Splitting constructor used to generate children.
/** parent_ becomes left child. Newly constructed object is right
child. */
start_deterministic_reduce( start_deterministic_reduce& parent_, fi
nish_type& c ) :
my_body( c.my_right_body ),
my_range( parent_.my_range, split() )
{
}
public:
static void run( const Range& range, Body& body ) {
if( !range.empty() ) {
#if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
task::spawn_root_and_wait( *new(task::allocate_root()) star
t_deterministic_reduce(range,&body) );
#else
// Bound context prevents exceptions from body to affect ne
sting or sibling algorithms,
// and allows users to handle exceptions safely by wrapping
parallel_for in the try-block.
task_group_context context;
task::spawn_root_and_wait( *new(task::allocate_root(context
)) start_deterministic_reduce(range,body) );
#endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
}
}
#if __TBB_TASK_GROUP_CONTEXT
static void run( const Range& range, Body& body, task_group_context
& context ) {
if( !range.empty() )
task::spawn_root_and_wait( *new(task::allocate_root(context
)) start_deterministic_reduce(range,body) );
}
#endif /* __TBB_TASK_GROUP_CONTEXT */
};
template<typename Range, typename Body>
task* start_deterministic_reduce<Range,Body>::execute() {
if( !my_range.is_divisible() ) {
my_body( my_range );
return NULL;
} else { } else {
finish_type& c = *new( allocate_continuation()) finish_type(my_ context); finish_type& c = *new( allocate_continuation() ) finish_type( m y_body );
recycle_as_child_of(c); recycle_as_child_of(c);
c.set_ref_count(2); c.set_ref_count(2);
bool delay = my_partition.decide_whether_to_delay(); start_deterministic_reduce& b = *new( c.allocate_child() ) star
start_reduce& b = *new( c.allocate_child() ) start_reduce(*this t_deterministic_reduce( *this, c );
,split()); task::spawn(b);
my_partition.spawn_or_delay(delay,b);
return this; return this;
} }
} }
#endif /* TBB_PREVIEW_DETERMINISTIC_REDUCE */
} // namespace internal
//! @endcond
} //namespace interfaceX
//! @cond INTERNAL
namespace internal {
using interface6::internal::start_reduce;
#if TBB_PREVIEW_DETERMINISTIC_REDUCE
using interface6::internal::start_deterministic_reduce;
#endif
//! Auxiliary class for parallel_reduce; for internal use only. //! Auxiliary class for parallel_reduce; for internal use only.
/** The adaptor class that implements \ref parallel_reduce_body_req "pa rallel_reduce Body" /** The adaptor class that implements \ref parallel_reduce_body_req "pa rallel_reduce Body"
using given \ref parallel_reduce_lambda_req "anonymous function obj ects". using given \ref parallel_reduce_lambda_req "anonymous function obj ects".
**/ **/
/** @ingroup algorithms */ /** @ingroup algorithms */
template<typename Range, typename Value, typename RealBody, typename Re duction> template<typename Range, typename Value, typename RealBody, typename Re duction>
class lambda_reduce_body { class lambda_reduce_body {
//FIXME: decide if my_real_body, my_reduction, and identity_element should be copied or referenced //FIXME: decide if my_real_body, my_reduction, and identity_element should be copied or referenced
// (might require some performance measurements) // (might require some performance measurements)
skipping to change at line 382 skipping to change at line 464
/** @ingroup algorithms **/ /** @ingroup algorithms **/
template<typename Range, typename Value, typename RealBody, typename Reduct ion> template<typename Range, typename Value, typename RealBody, typename Reduct ion>
Value parallel_reduce( const Range& range, const Value& identity, const Rea lBody& real_body, const Reduction& reduction, Value parallel_reduce( const Range& range, const Value& identity, const Rea lBody& real_body, const Reduction& reduction,
affinity_partitioner& partitioner, task_group_contex t& context ) { affinity_partitioner& partitioner, task_group_contex t& context ) {
internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(ident ity, real_body, reduction); internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(ident ity, real_body, reduction);
internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,R ealBody,Reduction>,affinity_partitioner> internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,R ealBody,Reduction>,affinity_partitioner>
::run( range, body, partitioner, co ntext ); ::run( range, body, partitioner, co ntext );
return body.result(); return body.result();
} }
#endif /* __TBB_TASK_GROUP_CONTEXT */ #endif /* __TBB_TASK_GROUP_CONTEXT */
#if TBB_PREVIEW_DETERMINISTIC_REDUCE
//! Parallel iteration with deterministic reduction and default partitioner
.
/** @ingroup algorithms **/
template<typename Range, typename Body>
void parallel_deterministic_reduce( const Range& range, Body& body ) {
internal::start_deterministic_reduce<Range,Body>::run( range, body );
}
#if __TBB_TASK_GROUP_CONTEXT
//! Parallel iteration with deterministic reduction, simple partitioner and
user-supplied context.
/** @ingroup algorithms **/
template<typename Range, typename Body>
void parallel_deterministic_reduce( const Range& range, Body& body, task_gr
oup_context& context ) {
internal::start_deterministic_reduce<Range,Body>::run( range, body, con
text );
}
#endif /* __TBB_TASK_GROUP_CONTEXT */
/** parallel_reduce overloads that work with anonymous function objects
(see also \ref parallel_reduce_lambda_req "requirements on parallel_red
uce anonymous function objects"). **/
//! Parallel iteration with deterministic reduction and default partitioner
.
/** @ingroup algorithms **/
template<typename Range, typename Value, typename RealBody, typename Reduct
ion>
Value parallel_deterministic_reduce( const Range& range, const Value& ident
ity, const RealBody& real_body, const Reduction& reduction ) {
internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(ident
ity, real_body, reduction);
internal::start_deterministic_reduce<Range,internal::lambda_reduce_body
<Range,Value,RealBody,Reduction> >
::run(range, body);
return body.result();
}
#if __TBB_TASK_GROUP_CONTEXT
//! Parallel iteration with deterministic reduction, simple partitioner and
user-supplied context.
/** @ingroup algorithms **/
template<typename Range, typename Value, typename RealBody, typename Reduct
ion>
Value parallel_deterministic_reduce( const Range& range, const Value& ident
ity, const RealBody& real_body, const Reduction& reduction,
task_group_context& context ) {
internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(ident
ity, real_body, reduction);
internal::start_deterministic_reduce<Range,internal::lambda_reduce_body
<Range,Value,RealBody,Reduction> >
::run( range, body, context );
return body.result();
}
#endif /* __TBB_TASK_GROUP_CONTEXT */
#endif /* TBB_PREVIEW_DETERMINISTIC_REDUCE */
//@} //@}
} // namespace tbb } // namespace tbb
#endif /* __TBB_parallel_reduce_H */ #endif /* __TBB_parallel_reduce_H */
 End of changes. 25 change blocks. 
61 lines changed or deleted 210 lines changed or added


 parallel_scan.h   parallel_scan.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 309 skipping to change at line 309
__TBB_ASSERT( !*return_slot, NULL ); __TBB_ASSERT( !*return_slot, NULL );
} }
return next_task; return next_task;
} }
} // namespace internal } // namespace internal
//! @endcond //! @endcond
// Requirements on Range concept are documented in blocked_range.h // Requirements on Range concept are documented in blocked_range.h
/** \page parallel_scan_body_req Requirements on parallel_scan body /** \page parallel_scan_body_req Requirements on parallel_scan body
Class \c Body implementing the concept of parallel_reduce body must def ine: Class \c Body implementing the concept of parallel_scan body must defin e:
- \code Body::Body( Body&, split ); \endcode Splitting constructor. - \code Body::Body( Body&, split ); \endcode Splitting constructor.
Split \c b so that \c t his and \c b can accumulate separately Split \c b so that \c t his and \c b can accumulate separately
- \code Body::~Body(); \endcode Destructor - \code Body::~Body(); \endcode Destructor
- \code void Body::operator()( const Range& r, pre_scan_tag ); \endcode - \code void Body::operator()( const Range& r, pre_scan_tag ); \endcode
Preprocess iterations f or range \c r Preprocess iterations f or range \c r
- \code void Body::operator()( const Range& r, final_scan_tag ); \endco de - \code void Body::operator()( const Range& r, final_scan_tag ); \endco de
Do final processing for iterations of range \c r Do final processing for iterations of range \c r
- \code void Body::reverse_join( Body& a ); \endcode - \code void Body::reverse_join( Body& a ); \endcode
Merge preprocessing sta te of \c a into \c this, where \c a was Merge preprocessing sta te of \c a into \c this, where \c a was
created earlier from \c b by b's splitting constructor created earlier from \c b by b's splitting constructor
 End of changes. 2 change blocks. 
2 lines changed or deleted 2 lines changed or added


 parallel_sort.h   parallel_sort.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 114 skipping to change at line 114
// array[l..j) is less or equal to key. // array[l..j) is less or equal to key.
// array(j..r) is greater or equal to key. // array(j..r) is greater or equal to key.
// array[j] is equal to key // array[j] is equal to key
i=j+1; i=j+1;
begin = array+i; begin = array+i;
size = range.size-i; size = range.size-i;
range.size = j; range.size = j;
} }
}; };
#if __TBB_TASK_GROUP_CONTEXT
//! Body class used to test if elements in a range are presorted //! Body class used to test if elements in a range are presorted
/** @ingroup algorithms */ /** @ingroup algorithms */
template<typename RandomAccessIterator, typename Compare> template<typename RandomAccessIterator, typename Compare>
class quick_sort_pretest_body : internal::no_assign { class quick_sort_pretest_body : internal::no_assign {
const Compare &comp; const Compare &comp;
public: public:
quick_sort_pretest_body(const Compare &_comp) : comp(_comp) {} quick_sort_pretest_body(const Compare &_comp) : comp(_comp) {}
void operator()( const blocked_range<RandomAccessIterator>& range ) con st { void operator()( const blocked_range<RandomAccessIterator>& range ) con st {
skipping to change at line 140 skipping to change at line 141
// The k-1 is never out-of-range because the first chunk starts at begin+serial_cutoff+1 // The k-1 is never out-of-range because the first chunk starts at begin+serial_cutoff+1
if ( comp( *(k), *(k-1) ) ) { if ( comp( *(k), *(k-1) ) ) {
my_task.cancel_group_execution(); my_task.cancel_group_execution();
break; break;
} }
} }
} }
}; };
#endif /* __TBB_TASK_GROUP_CONTEXT */
//! Body class used to sort elements in a range that is smaller than the gr ainsize. //! Body class used to sort elements in a range that is smaller than the gr ainsize.
/** @ingroup algorithms */ /** @ingroup algorithms */
template<typename RandomAccessIterator, typename Compare> template<typename RandomAccessIterator, typename Compare>
struct quick_sort_body { struct quick_sort_body {
void operator()( const quick_sort_range<RandomAccessIterator,Compare>& range ) const { void operator()( const quick_sort_range<RandomAccessIterator,Compare>& range ) const {
//SerialQuickSort( range.begin, range.size, range.comp ); //SerialQuickSort( range.begin, range.size, range.comp );
std::sort( range.begin, range.begin + range.size, range.comp ); std::sort( range.begin, range.begin + range.size, range.comp );
} }
}; };
//! Wrapper method to initiate the sort by calling parallel_for. //! Wrapper method to initiate the sort by calling parallel_for.
/** @ingroup algorithms */ /** @ingroup algorithms */
template<typename RandomAccessIterator, typename Compare> template<typename RandomAccessIterator, typename Compare>
void parallel_quick_sort( RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp ) { void parallel_quick_sort( RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp ) {
#if __TBB_TASK_GROUP_CONTEXT
task_group_context my_context; task_group_context my_context;
const int serial_cutoff = 9; const int serial_cutoff = 9;
__TBB_ASSERT( begin + serial_cutoff < end, "min_parallel_size is smalle r than serial cutoff?" ); __TBB_ASSERT( begin + serial_cutoff < end, "min_parallel_size is smalle r than serial cutoff?" );
RandomAccessIterator k; RandomAccessIterator k;
for ( k = begin ; k != begin + serial_cutoff; ++k ) { for ( k = begin ; k != begin + serial_cutoff; ++k ) {
if ( comp( *(k+1), *k ) ) { if ( comp( *(k+1), *k ) ) {
goto do_parallel_quick_sort; goto do_parallel_quick_sort;
} }
} }
parallel_for( blocked_range<RandomAccessIterator>(k+1, end), parallel_for( blocked_range<RandomAccessIterator>(k+1, end),
quick_sort_pretest_body<RandomAccessIterator,Compare>(com p), quick_sort_pretest_body<RandomAccessIterator,Compare>(com p),
auto_partitioner(), auto_partitioner(),
my_context); my_context);
if (my_context.is_group_execution_cancelled()) if (my_context.is_group_execution_cancelled())
do_parallel_quick_sort: do_parallel_quick_sort:
#endif /* __TBB_TASK_GROUP_CONTEXT */
parallel_for( quick_sort_range<RandomAccessIterator,Compare>(begin, end-begin, comp ), parallel_for( quick_sort_range<RandomAccessIterator,Compare>(begin, end-begin, comp ),
quick_sort_body<RandomAccessIterator,Compare>(), quick_sort_body<RandomAccessIterator,Compare>(),
auto_partitioner() ); auto_partitioner() );
} }
} // namespace internal } // namespace internal
//! @endcond //! @endcond
/** \page parallel_sort_iter_req Requirements on iterators for parallel_sor t /** \page parallel_sort_iter_req Requirements on iterators for parallel_sor t
Requirements on value type \c T of \c RandomAccessIterator for \c paral lel_sort: Requirements on value type \c T of \c RandomAccessIterator for \c paral lel_sort:
 End of changes. 5 change blocks. 
1 lines changed or deleted 5 lines changed or added


 parallel_while.h   parallel_while.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 End of changes. 1 change blocks. 
1 lines changed or deleted 1 lines changed or added


 partitioner.h   partitioner.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 32 skipping to change at line 32
this file and link it with other files to produce an executable, this this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered b y invalidate any other reasons why the executable file might be covered b y
the GNU General Public License. the GNU General Public License.
*/ */
#ifndef __TBB_partitioner_H #ifndef __TBB_partitioner_H
#define __TBB_partitioner_H #define __TBB_partitioner_H
#ifndef __TBB_INITIAL_CHUNKS
#define __TBB_INITIAL_CHUNKS 2
#endif
#ifndef __TBB_RANGE_POOL_CAPACITY
#define __TBB_RANGE_POOL_CAPACITY 8
#endif
#ifndef __TBB_INIT_DEPTH
#define __TBB_INIT_DEPTH 5
#endif
#include "task.h" #include "task.h"
#include "aligned_space.h"
#include "atomic.h"
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
// Workaround for overzealous compiler warnings
#pragma warning (push)
#pragma warning (disable: 4244)
#endif
namespace tbb { namespace tbb {
class auto_partitioner;
class simple_partitioner;
class affinity_partitioner; class affinity_partitioner;
namespace interface6 {
namespace internal {
class affinity_partition_type;
}
}
//! @cond INTERNAL
namespace internal { namespace internal {
size_t __TBB_EXPORTED_FUNC get_initial_auto_partitioner_divisor(); size_t __TBB_EXPORTED_FUNC get_initial_auto_partitioner_divisor();
//! Defines entry points into tbb run-time library; //! Defines entry point for affinity partitioner into tbb run-time library.
/** The entry points are the constructor and destructor. */
class affinity_partitioner_base_v3: no_copy { class affinity_partitioner_base_v3: no_copy {
friend class tbb::affinity_partitioner; friend class tbb::affinity_partitioner;
friend class tbb::interface6::internal::affinity_partition_type;
//! Array that remembers affinities of tree positions to affinity_id. //! Array that remembers affinities of tree positions to affinity_id.
/** NULL if my_size==0. */ /** NULL if my_size==0. */
affinity_id* my_array; affinity_id* my_array;
//! Number of elements in my_array. //! Number of elements in my_array.
size_t my_size; size_t my_size;
//! Zeros the fields. //! Zeros the fields.
affinity_partitioner_base_v3() : my_array(NULL), my_size(0) {} affinity_partitioner_base_v3() : my_array(NULL), my_size(0) {}
//! Deallocates my_array. //! Deallocates my_array.
~affinity_partitioner_base_v3() {resize(0);} ~affinity_partitioner_base_v3() {resize(0);}
//! Resize my_array. //! Resize my_array.
/** Retains values if resulting size is the same. */ /** Retains values if resulting size is the same. */
void __TBB_EXPORTED_METHOD resize( unsigned factor ); void __TBB_EXPORTED_METHOD resize( unsigned factor );
friend class affinity_partition_type;
}; };
//! Provides default methods for partition objects without affinity. //! Provides backward-compatible methods for partition objects without affi nity.
class partition_type_base { class partition_type_base {
public: public:
void set_affinity( task & ) {} void set_affinity( task & ) {}
void note_affinity( task::affinity_id ) {} void note_affinity( task::affinity_id ) {}
task* continue_after_execute_range() {return NULL;} task* continue_after_execute_range() {return NULL;}
bool decide_whether_to_delay() {return false;} bool decide_whether_to_delay() {return false;}
void spawn_or_delay( bool, task& b ) { void spawn_or_delay( bool, task& b ) {
task::spawn(b); task::spawn(b);
} }
}; };
class affinity_partition_type;
template<typename Range, typename Body, typename Partitioner> class start_f
or;
template<typename Range, typename Body, typename Partitioner> class start_r
educe;
template<typename Range, typename Body> class start_reduce_with_affinity;
template<typename Range, typename Body, typename Partitioner> class start_s can; template<typename Range, typename Body, typename Partitioner> class start_s can;
} // namespace internal } // namespace internal
//! @endcond //! @endcond
//! A simple partitioner namespace serial {
/** Divides the range until the range is not divisible. namespace interface6 {
@ingroup algorithms */ template<typename Range, typename Body, typename Partitioner> class start_f
class simple_partitioner { or;
public: }
simple_partitioner() {} }
private:
template<typename Range, typename Body, typename Partitioner> friend cl
ass internal::start_for;
template<typename Range, typename Body, typename Partitioner> friend cl
ass internal::start_reduce;
template<typename Range, typename Body, typename Partitioner> friend cl
ass internal::start_scan;
class partition_type: public internal::partition_type_base { namespace interface6 {
public: //! @cond INTERNAL
bool should_execute_range(const task& ) {return false;} namespace internal {
partition_type( const simple_partitioner& ) {} using namespace tbb::internal;
partition_type( const partition_type&, split ) {} template<typename Range, typename Body, typename Partitioner> class start_f
}; or;
}; template<typename Range, typename Body, typename Partitioner> class start_r
educe;
//! An auto partitioner //! Join task node that contains shared flag for stealing feedback
/** The range is initial divided into several large chunks. class flag_task: public task {
Chunks are further subdivided into VICTIM_CHUNKS pieces if they are sto
len and divisible.
@ingroup algorithms */
class auto_partitioner {
public: public:
auto_partitioner() {} tbb::atomic<bool> child_stolen;
flag_task() { child_stolen = false; }
private: task* execute() { return NULL; }
template<typename Range, typename Body, typename Partitioner> friend cl };
ass internal::start_for;
template<typename Range, typename Body, typename Partitioner> friend cl
ass internal::start_reduce;
template<typename Range, typename Body, typename Partitioner> friend cl
ass internal::start_scan;
class partition_type: public internal::partition_type_base { //! Task to signal the demand without carrying the work
size_t num_chunks; class signal_task: public task {
static const size_t VICTIM_CHUNKS = 4;
public: public:
bool should_execute_range(const task &t) { task* execute() {
if( num_chunks<VICTIM_CHUNKS && t.is_stolen_task() ) if( is_stolen_task() ) {
num_chunks = VICTIM_CHUNKS; static_cast<flag_task*>(parent())->child_stolen = true;
return num_chunks==1;
}
partition_type( const auto_partitioner& ) : num_chunks(internal::ge
t_initial_auto_partitioner_divisor()) {}
partition_type( partition_type& pt, split ) {
num_chunks = pt.num_chunks /= 2u;
} }
}; return NULL;
}
}; };
//! An affinity partitioner //! Depth is a relative depth of recursive division inside a range pool. Re
class affinity_partitioner: internal::affinity_partitioner_base_v3 { lative depth allows
public: //! infinite absolute depth of the recursion for heavily imbalanced workloa
affinity_partitioner() {} ds with range represented
//! by a number that cannot fit into machine word.
private: typedef unsigned char depth_t;
template<typename Range, typename Body, typename Partitioner> friend cl
ass internal::start_for; //! Range pool stores ranges of type T in a circular buffer with MaxCapacit
template<typename Range, typename Body, typename Partitioner> friend cl y
ass internal::start_reduce; template <typename T, depth_t MaxCapacity>
template<typename Range, typename Body> friend class internal::start_re class range_vector {
duce_with_affinity; depth_t my_head;
template<typename Range, typename Body, typename Partitioner> friend cl depth_t my_tail;
ass internal::start_scan; depth_t my_size;
depth_t my_depth[MaxCapacity]; // relative depths of stored ranges
tbb::aligned_space<T, MaxCapacity> my_pool;
typedef internal::affinity_partition_type partition_type; public:
friend class internal::affinity_partition_type; //! initialize via first range in pool
range_vector(const T& elem) : my_head(0), my_tail(0), my_size(1) {
my_depth[0] = 0;
new( my_pool.begin() ) T(elem);//TODO: std::move?
}
~range_vector() {
while( !empty() ) pop_back();
}
bool empty() const { return my_size == 0; }
depth_t size() const { return my_size; }
//! Populates range pool via ranges up to max depth or while divisible
//! max_depth starts from 0, e.g. value 2 makes 3 ranges in the pool up
to two 1/4 pieces
void split_to_fill(depth_t max_depth) {
while( my_size < MaxCapacity && my_depth[my_head] < max_depth
&& my_pool.begin()[my_head].is_divisible() ) {
depth_t prev = my_head;
my_head = (my_head + 1) % MaxCapacity;
new(my_pool.begin()+my_head) T(my_pool.begin()[prev]); // copy
TODO: std::move?
my_pool.begin()[prev].~T(); // instead of assignment
new(my_pool.begin()+prev) T(my_pool.begin()[my_head], split());
// do 'inverse' split
my_depth[my_head] = ++my_depth[prev];
my_size++;
}
}
void pop_back() {
__TBB_ASSERT(my_size > 0, "range_vector::pop_back() with empty size
");
my_pool.begin()[my_head].~T();
my_size--;
my_head = (my_head + MaxCapacity - 1) % MaxCapacity;
}
void pop_front() {
__TBB_ASSERT(my_size > 0, "range_vector::pop_front() with empty siz
e");
my_pool.begin()[my_tail].~T();
my_size--;
my_tail = (my_tail + 1) % MaxCapacity;
}
T& back() {
__TBB_ASSERT(my_size > 0, "range_vector::back() with empty size");
return my_pool.begin()[my_head];
}
T& front() {
__TBB_ASSERT(my_size > 0, "range_vector::front() with empty size");
return my_pool.begin()[my_tail];
}
//! similarly to front(), returns depth of the first range in the pool
depth_t front_depth() {
__TBB_ASSERT(my_size > 0, "range_vector::front_depth() with empty s
ize");
return my_depth[my_tail];
}
}; };
//! @cond INTERNAL //! Provides default methods for partition objects and common algorithm blo
namespace internal { cks.
template <typename Partition>
struct partition_type_base {
// decision makers
void set_affinity( task & ) {}
void note_affinity( task::affinity_id ) {}
bool check_being_stolen(task &) { return false; } // part of old should
_execute_range()
bool check_for_demand(task &) { return false; }
bool divisions_left() { return true; } // part of old should_execute_ra
nge()
bool should_create_trap() { return false; }
depth_t max_depth() { return 0; }
void align_depth(depth_t) { }
// common function blocks
Partition& derived() { return *static_cast<Partition*>(this); }
template<typename StartType>
flag_task* split_work(StartType &start) {
flag_task* parent_ptr = start.create_continuation(); // the type he
re is to express expectation
start.set_parent(parent_ptr);
parent_ptr->set_ref_count(2);
StartType& right_work = *new( parent_ptr->allocate_child() ) StartT
ype(start, split());
start.spawn(right_work);
return parent_ptr;
}
template<typename StartType, typename Range>
void execute(StartType &start, Range &range) {
// The algorithm in a few words ([]-denotes calls to decision matho
ds of partitioner):
// [If this task is stolen, adjust depth and divisions if necessary
, set flag].
// If range is divisible {
// Spread the work while [initial divisions left];
// Create trap task [if necessary];
// }
// If not divisible or [max depth is reached], execute, else do the
range pool part
task* parent_ptr = start.parent();
if( range.is_divisible() ) {
if( derived().divisions_left() )
do parent_ptr = split_work(start); // split until divisions
_left()
while( range.is_divisible() && derived().divisions_left() )
;
if( derived().should_create_trap() ) { // only for range pool
if( parent_ptr->ref_count() > 1 ) { // create new parent if
necessary
parent_ptr = start.create_continuation();
start.set_parent(parent_ptr);
} else __TBB_ASSERT(parent_ptr->ref_count() == 1, NULL);
parent_ptr->set_ref_count(2); // safe because parent has on
ly one reference
signal_task& right_signal = *new( parent_ptr->allocate_chil
d() ) signal_task();
start.spawn(right_signal); // pure signal is to avoid deep
recursion in the end
}
}
if( !range.is_divisible() || !derived().max_depth() )
start.run_body( range ); // simple partitioner goes always here
else { // do range pool
internal::range_vector<Range, Partition::range_pool_size> range
_pool(range);
do {
range_pool.split_to_fill(derived().max_depth()); // fill ra
nge pool
if( derived().check_for_demand( start ) ) {
if( range_pool.size() > 1 ) {
parent_ptr = start.create_continuation();
start.set_parent(parent_ptr);
parent_ptr->set_ref_count(2);
StartType& right_work = *new( parent_ptr->allocate_
child() ) StartType(start, range_pool.front(), range_pool.front_depth());
start.spawn(right_work);
range_pool.pop_front();
continue;
}
if( range_pool.back().is_divisible() ) // was not enoug
h depth to fork a task
continue; // note: check_for_demand() should guaran
tee increasing max_depth() next time
}
start.run_body( range_pool.back() );
range_pool.pop_back();
} while( !range_pool.empty() && !start.is_cancelled() );
}
}
};
class affinity_partition_type: public no_copy { //! Provides default methods for auto (adaptive) partition objects.
//! Must be power of two template <typename Partition>
static const unsigned factor = 16; struct auto_partition_type_base : partition_type_base<Partition> {
static const size_t VICTIM_CHUNKS = 4; size_t my_divisor;
depth_t my_max_depth;
auto_partition_type_base() : my_max_depth(__TBB_INIT_DEPTH) {
my_divisor = tbb::internal::get_initial_auto_partitioner_divisor()*
__TBB_INITIAL_CHUNKS/4;
__TBB_ASSERT(my_divisor, "initial value of get_initial_auto_partiti
oner_divisor() is not valid");
}
auto_partition_type_base(auto_partition_type_base &src, split) {
my_max_depth = src.my_max_depth;
#if __TBB_INITIAL_TASK_IMBALANCE
if( src.my_divisor <= 1 ) my_divisor = 0;
else my_divisor = src.my_divisor = (src.my_divisor+1u) / 2u;
#else
my_divisor = src.my_divisor / 2u;
src.my_divisor = src.my_divisor - my_divisor; // TODO: check the ef
fect separately
if(my_divisor) src.my_max_depth += static_cast<depth_t>(__TBB_Log2(
src.my_divisor/my_divisor));
#endif
}
bool check_being_stolen( task &t) { // part of old should_execute_range
()
if( !my_divisor ) {
my_divisor = 1; // todo: replace by on-stack flag (partition_st
ate's member)?
if( t.is_stolen_task() ) {
#if TBB_USE_EXCEPTIONS
// RTTI is available, check whether the cast is valid
__TBB_ASSERT(dynamic_cast<flag_task*>(t.parent()), 0);
// correctess of the cast rely on avoiding the root task fo
r which:
// - initial value of my_divisor != 0 (protected by separat
e assertion)
// - is_stolen_task() always return false for the root task
.
#endif
static_cast<flag_task*>(t.parent())->child_stolen = true;
my_max_depth++;
return true;
}
}
return false;
}
bool divisions_left() { // part of old should_execute_range()
if( my_divisor > 1 ) return true;
if( my_divisor && my_max_depth > 1 ) { // can split the task and on
ce more internally. TODO: on-stack flag instead
// keep same fragmentation while splitting for the local task p
ool
my_max_depth--;
my_divisor = 0;
return true;
} else return false;
}
bool should_create_trap() {
return my_divisor > 0;
}
bool check_for_demand(task &t) {
if( static_cast<flag_task*>(t.parent())->child_stolen ) {
my_max_depth++;
return true;
} else return false;
}
void align_depth(depth_t base) {
__TBB_ASSERT(base <= my_max_depth, 0);
my_max_depth -= base;
}
depth_t max_depth() { return my_max_depth; }
};
internal::affinity_id* my_array; //! Provides default methods for affinity (adaptive) partition objects.
task_list delay_list; class affinity_partition_type : public auto_partition_type_base<affinity_pa
unsigned map_begin, map_end; rtition_type> {
size_t num_chunks; static const unsigned factor_power = 4;
static const unsigned factor = 1<<factor_power;
bool my_delay;
unsigned map_begin, map_end, map_mid;
tbb::internal::affinity_id* my_array;
void set_mid() {
unsigned d = (map_end - map_begin)/2; // we could add 1 but it is r
ather for LIFO affinity
if( d > factor )
d &= 0u-factor;
map_mid = map_end - d;
}
public: public:
affinity_partition_type( affinity_partitioner& ap ) { affinity_partition_type( tbb::internal::affinity_partitioner_base_v3& a p ) {
__TBB_ASSERT( (factor&(factor-1))==0, "factor must be power of two" ); __TBB_ASSERT( (factor&(factor-1))==0, "factor must be power of two" );
ap.resize(factor); ap.resize(factor);
my_array = ap.my_array; my_array = ap.my_array;
map_begin = 0; map_begin = 0;
map_end = unsigned(ap.my_size); map_end = unsigned(ap.my_size);
num_chunks = internal::get_initial_auto_partitioner_divisor(); set_mid();
my_delay = true;
my_divisor /= __TBB_INITIAL_CHUNKS; // let excatly P tasks to be di
stributed across workers
my_max_depth = factor_power+1; // the first factor_power ranges wil
l be spawned, and >=1 ranges should left
__TBB_ASSERT( my_max_depth < __TBB_RANGE_POOL_CAPACITY, 0 );
} }
affinity_partition_type(affinity_partition_type& p, split) : my_array(p affinity_partition_type(affinity_partition_type& p, split)
.my_array) { : auto_partition_type_base<affinity_partition_type>(p, split()), my
_array(p.my_array) {
__TBB_ASSERT( p.map_end-p.map_begin<factor || (p.map_end-p.map_begi n)%factor==0, NULL ); __TBB_ASSERT( p.map_end-p.map_begin<factor || (p.map_end-p.map_begi n)%factor==0, NULL );
num_chunks = p.num_chunks /= 2; map_end = p.map_end;
unsigned e = p.map_end; map_begin = p.map_end = p.map_mid;
unsigned d = (e - p.map_begin)/2; set_mid(); p.set_mid();
if( d>factor ) my_delay = p.my_delay;
d &= 0u-factor;
map_end = e;
map_begin = p.map_end = e-d;
}
bool should_execute_range(const task &t) {
if( num_chunks < VICTIM_CHUNKS && t.is_stolen_task() )
num_chunks = VICTIM_CHUNKS;
return num_chunks == 1;
} }
void set_affinity( task &t ) { void set_affinity( task &t ) {
if( map_begin<map_end ) if( map_begin<map_end )
t.set_affinity( my_array[map_begin] ); t.set_affinity( my_array[map_begin] );
} }
void note_affinity( task::affinity_id id ) { void note_affinity( task::affinity_id id ) {
if( map_begin<map_end ) if( map_begin<map_end )
my_array[map_begin] = id; my_array[map_begin] = id;
} }
task* continue_after_execute_range() { bool check_for_demand( task &t ) {
task* first = NULL; if( !my_delay ) {
if( !delay_list.empty() ) { if( map_mid<map_end ) {
first = &delay_list.pop_front(); __TBB_ASSERT(my_max_depth>__TBB_Log2(map_end-map_mid), 0);
while( !delay_list.empty() ) { return true;// do not do my_max_depth++ here, but be sure m
task::spawn(*first); y_max_depth is big enough
first = &delay_list.pop_front();
} }
} if( static_cast<flag_task*>(t.parent())->child_stolen ) {
return first; my_max_depth++;
return true;
}
} else my_delay = false;
return false;
} }
bool decide_whether_to_delay() { bool divisions_left() { // part of old should_execute_range()
// The possible underflow caused by "-1u" is deliberate return my_divisor > 1;
return (map_begin&(factor-1))==0 && map_end-map_begin-1u<factor;
}
void spawn_or_delay( bool delay, task& b ) {
if( delay )
delay_list.push_back(b);
else
task::spawn(b);
}
~affinity_partition_type() {
// The delay_list can be non-empty if an exception is thrown.
while( !delay_list.empty() ) {
task& t = delay_list.pop_front();
t.destroy(t);
}
} }
bool should_create_trap() {
return true; // TODO: rethink for the stage after memorizing level
}
static const unsigned range_pool_size = __TBB_RANGE_POOL_CAPACITY;
}; };
} // namespace internal class auto_partition_type: public auto_partition_type_base<auto_partition_t
ype> {
public:
auto_partition_type( const auto_partitioner& ) {}
auto_partition_type( auto_partition_type& src, split)
: auto_partition_type_base<auto_partition_type>(src, split()) {}
static const unsigned range_pool_size = __TBB_RANGE_POOL_CAPACITY;
};
class simple_partition_type: public partition_type_base<simple_partition_ty
pe> {
public:
simple_partition_type( const simple_partitioner& ) {}
simple_partition_type( const simple_partition_type&, split ) {}
//! simplified algorithm
template<typename StartType, typename Range>
void execute(StartType &start, Range &range) {
while( range.is_divisible() )
split_work( start );
start.run_body( range );
}
//static const unsigned range_pool_size = 1; - not necessary because ex
ecute() is overridden
};
//! Backward-compatible partition for auto and affinity partition objects.
class old_auto_partition_type: public tbb::internal::partition_type_base {
size_t num_chunks;
static const size_t VICTIM_CHUNKS = 4;
public:
bool should_execute_range(const task &t) {
if( num_chunks<VICTIM_CHUNKS && t.is_stolen_task() )
num_chunks = VICTIM_CHUNKS;
return num_chunks==1;
}
old_auto_partition_type( const auto_partitioner& )
: num_chunks(internal::get_initial_auto_partitioner_divisor()*__TBB_I
NITIAL_CHUNKS/4) {}
old_auto_partition_type( const affinity_partitioner& )
: num_chunks(internal::get_initial_auto_partitioner_divisor()*__TBB_I
NITIAL_CHUNKS/4) {}
old_auto_partition_type( old_auto_partition_type& pt, split ) {
num_chunks = pt.num_chunks = (pt.num_chunks+1u) / 2u;
}
};
} // namespace interfaceX::internal
//! @endcond //! @endcond
} // namespace interfaceX
//! A simple partitioner
/** Divides the range until the range is not divisible.
@ingroup algorithms */
class simple_partitioner {
public:
simple_partitioner() {}
private:
template<typename Range, typename Body, typename Partitioner> friend cl
ass serial::interface6::start_for;
template<typename Range, typename Body, typename Partitioner> friend cl
ass interface6::internal::start_for;
template<typename Range, typename Body, typename Partitioner> friend cl
ass interface6::internal::start_reduce;
template<typename Range, typename Body, typename Partitioner> friend cl
ass internal::start_scan;
// backward compatibility
class partition_type: public internal::partition_type_base {
public:
bool should_execute_range(const task& ) {return false;}
partition_type( const simple_partitioner& ) {}
partition_type( const partition_type&, split ) {}
};
// new implementation just extends existing interface
typedef interface6::internal::simple_partition_type task_partition_type
;
};
//! An auto partitioner
/** The range is initial divided into several large chunks.
Chunks are further subdivided into smaller pieces if demand detected an
d they are divisible.
@ingroup algorithms */
class auto_partitioner {
public:
auto_partitioner() {}
private:
template<typename Range, typename Body, typename Partitioner> friend cl
ass serial::interface6::start_for;
template<typename Range, typename Body, typename Partitioner> friend cl
ass interface6::internal::start_for;
template<typename Range, typename Body, typename Partitioner> friend cl
ass interface6::internal::start_reduce;
template<typename Range, typename Body, typename Partitioner> friend cl
ass internal::start_scan;
// backward compatibility
typedef interface6::internal::old_auto_partition_type partition_type;
// new implementation just extends existing interface
typedef interface6::internal::auto_partition_type task_partition_type;
};
//! An affinity partitioner
class affinity_partitioner: internal::affinity_partitioner_base_v3 {
public:
affinity_partitioner() {}
private:
template<typename Range, typename Body, typename Partitioner> friend cl
ass serial::interface6::start_for;
template<typename Range, typename Body, typename Partitioner> friend cl
ass interface6::internal::start_for;
template<typename Range, typename Body, typename Partitioner> friend cl
ass interface6::internal::start_reduce;
template<typename Range, typename Body, typename Partitioner> friend cl
ass internal::start_scan;
// backward compatibility - for parallel_scan only
typedef interface6::internal::old_auto_partition_type partition_type;
// new implementation just extends existing interface
typedef interface6::internal::affinity_partition_type task_partition_ty
pe;
};
} // namespace tbb } // namespace tbb
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
#pragma warning (pop)
#endif // warning 4244 is back
#undef __TBB_INITIAL_CHUNKS
#undef __TBB_RANGE_POOL_CAPACITY
#undef __TBB_INIT_DEPTH
#endif /* __TBB_partitioner_H */ #endif /* __TBB_partitioner_H */
 End of changes. 35 change blocks. 
132 lines changed or deleted 467 lines changed or added


 pipeline.h   pipeline.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 34 skipping to change at line 34
the GNU General Public License. This exception does not however the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered b y invalidate any other reasons why the executable file might be covered b y
the GNU General Public License. the GNU General Public License.
*/ */
#ifndef __TBB_pipeline_H #ifndef __TBB_pipeline_H
#define __TBB_pipeline_H #define __TBB_pipeline_H
#include "atomic.h" #include "atomic.h"
#include "task.h" #include "task.h"
#include "tbb_allocator.h"
#include <cstddef> #include <cstddef>
namespace tbb { namespace tbb {
class pipeline; class pipeline;
class filter; class filter;
//! @cond INTERNAL //! @cond INTERNAL
namespace internal { namespace internal {
skipping to change at line 56 skipping to change at line 57
typedef unsigned long Token; typedef unsigned long Token;
typedef long tokendiff_t; typedef long tokendiff_t;
class stage_task; class stage_task;
class input_buffer; class input_buffer;
class pipeline_root_task; class pipeline_root_task;
class pipeline_cleaner; class pipeline_cleaner;
} // namespace internal } // namespace internal
namespace interface5 { namespace interface6 {
template<typename T, typename U> class filter_t; template<typename T, typename U> class filter_t;
namespace internal { namespace internal {
class pipeline_proxy; class pipeline_proxy;
} }
} }
//! @endcond //! @endcond
//! A stage in a pipeline. //! A stage in a pipeline.
/** @ingroup algorithms */ /** @ingroup algorithms */
class filter: internal::no_copy { class filter: internal::no_copy {
private: private:
//! Value used to mark "not in pipeline" //! Value used to mark "not in pipeline"
static filter* not_in_pipeline() {return reinterpret_cast<filter*>(intp tr_t(-1));} static filter* not_in_pipeline() {return reinterpret_cast<filter*>(intp tr_t(-1));}
protected:
//! The lowest bit 0 is for parallel vs. serial //! The lowest bit 0 is for parallel vs. serial
static const unsigned char filter_is_serial = 0x1; static const unsigned char filter_is_serial = 0x1;
//! 4th bit distinguishes ordered vs unordered filters. //! 4th bit distinguishes ordered vs unordered filters.
/** The bit was not set for parallel filters in TBB 2.1 and earlier, /** The bit was not set for parallel filters in TBB 2.1 and earlier,
but is_ordered() function always treats parallel filters as out of order. */ but is_ordered() function always treats parallel filters as out of order. */
static const unsigned char filter_is_out_of_order = 0x1<<4; static const unsigned char filter_is_out_of_order = 0x1<<4;
//! 5th bit distinguishes thread-bound and regular filters. //! 5th bit distinguishes thread-bound and regular filters.
static const unsigned char filter_is_bound = 0x1<<5; static const unsigned char filter_is_bound = 0x1<<5;
//! 6th bit marks input filters emitting small objects
static const unsigned char filter_may_emit_null = 0x1<<6;
//! 7th bit defines exception propagation mode expected by the applicat ion. //! 7th bit defines exception propagation mode expected by the applicat ion.
static const unsigned char exact_exception_propagation = static const unsigned char exact_exception_propagation =
#if TBB_USE_CAPTURED_EXCEPTION #if TBB_USE_CAPTURED_EXCEPTION
0x0; 0x0;
#else #else
0x1<<7; 0x1<<7;
#endif /* TBB_USE_CAPTURED_EXCEPTION */ #endif /* TBB_USE_CAPTURED_EXCEPTION */
static const unsigned char current_version = __TBB_PIPELINE_VERSION(5); static const unsigned char current_version = __TBB_PIPELINE_VERSION(5);
static const unsigned char version_mask = 0x7<<1; // bits 1-3 are for v ersion static const unsigned char version_mask = 0x7<<1; // bits 1-3 are for v ersion
skipping to change at line 124 skipping to change at line 128
filter( mode filter_mode ) : filter( mode filter_mode ) :
next_filter_in_pipeline(not_in_pipeline()), next_filter_in_pipeline(not_in_pipeline()),
my_input_buffer(NULL), my_input_buffer(NULL),
my_filter_mode(static_cast<unsigned char>(filter_mode | exact_excep tion_propagation)), my_filter_mode(static_cast<unsigned char>(filter_mode | exact_excep tion_propagation)),
prev_filter_in_pipeline(not_in_pipeline()), prev_filter_in_pipeline(not_in_pipeline()),
my_pipeline(NULL), my_pipeline(NULL),
next_segment(NULL) next_segment(NULL)
{} {}
// signal end-of-input for concrete_filters
void __TBB_EXPORTED_METHOD set_end_of_input();
public: public:
//! True if filter is serial. //! True if filter is serial.
bool is_serial() const { bool is_serial() const {
return bool( my_filter_mode & filter_is_serial ); return bool( my_filter_mode & filter_is_serial );
} }
//! True if filter must receive stream in order. //! True if filter must receive stream in order.
bool is_ordered() const { bool is_ordered() const {
return (my_filter_mode & (filter_is_out_of_order|filter_is_serial)) ==filter_is_serial; return (my_filter_mode & (filter_is_out_of_order|filter_is_serial)) ==filter_is_serial;
} }
//! True if filter is thread-bound. //! True if filter is thread-bound.
bool is_bound() const { bool is_bound() const {
return ( my_filter_mode & filter_is_bound )==filter_is_bound; return ( my_filter_mode & filter_is_bound )==filter_is_bound;
} }
//! true if an input filter can emit null
bool object_may_be_null() {
return ( my_filter_mode & filter_may_emit_null ) == filter_may_emit
_null;
}
//! Operate on an item from the input stream, and return item for outpu t stream. //! Operate on an item from the input stream, and return item for outpu t stream.
/** Returns NULL if filter is a sink. */ /** Returns NULL if filter is a sink. */
virtual void* operator()( void* item ) = 0; virtual void* operator()( void* item ) = 0;
//! Destroy filter. //! Destroy filter.
/** If the filter was added to a pipeline, the pipeline must be destroy ed first. */ /** If the filter was added to a pipeline, the pipeline must be destroy ed first. */
virtual __TBB_EXPORTED_METHOD ~filter(); virtual __TBB_EXPORTED_METHOD ~filter();
#if __TBB_TASK_GROUP_CONTEXT #if __TBB_TASK_GROUP_CONTEXT
//! Destroys item if pipeline was cancelled. //! Destroys item if pipeline was cancelled.
/** Required to prevent memory leaks. /** Required to prevent memory leaks.
Note it can be called concurrently even for serial filters.*/ Note it can be called concurrently even for serial filters.*/
virtual void finalize( void* /*item*/ ) {}; virtual void finalize( void* /*item*/ ) {};
#endif #endif
private: private:
//! Pointer to next filter in the pipeline. //! Pointer to next filter in the pipeline.
filter* next_filter_in_pipeline; filter* next_filter_in_pipeline;
//! has the filter not yet processed all the tokens it will ever see?
// (pipeline has not yet reached end_of_input or this filter has not y
et
// seen the last token produced by input_filter)
bool has_more_work();
//! Buffer for incoming tokens, or NULL if not required. //! Buffer for incoming tokens, or NULL if not required.
/** The buffer is required if the filter is serial or follows a thread- bound one. */ /** The buffer is required if the filter is serial or follows a thread- bound one. */
internal::input_buffer* my_input_buffer; internal::input_buffer* my_input_buffer;
friend class internal::stage_task; friend class internal::stage_task;
friend class internal::pipeline_root_task; friend class internal::pipeline_root_task;
friend class pipeline; friend class pipeline;
friend class thread_bound_filter; friend class thread_bound_filter;
//! Storage for filter mode and dynamically checked implementation vers ion. //! Storage for filter mode and dynamically checked implementation vers ion.
skipping to change at line 196 skipping to change at line 213
enum result_type { enum result_type {
// item was processed // item was processed
success, success,
// item is currently not available // item is currently not available
item_not_available, item_not_available,
// there are no more items to process // there are no more items to process
end_of_stream end_of_stream
}; };
protected: protected:
thread_bound_filter(mode filter_mode): thread_bound_filter(mode filter_mode):
filter(static_cast<mode>(filter_mode | filter::filter_is_bound | f ilter::exact_exception_propagation)) filter(static_cast<mode>(filter_mode | filter::filter_is_bound))
{} {}
public: public:
//! If a data item is available, invoke operator() on that item. //! If a data item is available, invoke operator() on that item.
/** This interface is non-blocking. /** This interface is non-blocking.
Returns 'success' if an item was processed. Returns 'success' if an item was processed.
Returns 'item_not_available' if no item can be processed now Returns 'item_not_available' if no item can be processed now
but more may arrive in the future, or if token limit is reached. but more may arrive in the future, or if token limit is reached.
Returns 'end_of_stream' if there are no more items to process. */ Returns 'end_of_stream' if there are no more items to process. */
result_type __TBB_EXPORTED_METHOD try_process_item(); result_type __TBB_EXPORTED_METHOD try_process_item();
skipping to change at line 219 skipping to change at line 236
Returns 'success' if an item was processed. Returns 'success' if an item was processed.
Returns 'end_of_stream' if there are no more items to process. Returns 'end_of_stream' if there are no more items to process.
Never returns 'item_not_available', as it blocks until another retu rn condition applies. */ Never returns 'item_not_available', as it blocks until another retu rn condition applies. */
result_type __TBB_EXPORTED_METHOD process_item(); result_type __TBB_EXPORTED_METHOD process_item();
private: private:
//! Internal routine for item processing //! Internal routine for item processing
result_type internal_process_item(bool is_blocking); result_type internal_process_item(bool is_blocking);
}; };
//! A processing pipeling that applies filters to items. //! A processing pipeline that applies filters to items.
/** @ingroup algorithms */ /** @ingroup algorithms */
class pipeline { class pipeline {
public: public:
//! Construct empty pipeline. //! Construct empty pipeline.
__TBB_EXPORTED_METHOD pipeline(); __TBB_EXPORTED_METHOD pipeline();
/** Though the current implementation declares the destructor virtual, do not rely on this /** Though the current implementation declares the destructor virtual, do not rely on this
detail. The virtualness is deprecated and may disappear in future versions of TBB. */ detail. The virtualness is deprecated and may disappear in future versions of TBB. */
virtual __TBB_EXPORTED_METHOD ~pipeline(); virtual __TBB_EXPORTED_METHOD ~pipeline();
skipping to change at line 250 skipping to change at line 267
//! Remove all filters from the pipeline. //! Remove all filters from the pipeline.
void __TBB_EXPORTED_METHOD clear(); void __TBB_EXPORTED_METHOD clear();
private: private:
friend class internal::stage_task; friend class internal::stage_task;
friend class internal::pipeline_root_task; friend class internal::pipeline_root_task;
friend class filter; friend class filter;
friend class thread_bound_filter; friend class thread_bound_filter;
friend class internal::pipeline_cleaner; friend class internal::pipeline_cleaner;
friend class tbb::interface5::internal::pipeline_proxy; friend class tbb::interface6::internal::pipeline_proxy;
//! Pointer to first filter in the pipeline. //! Pointer to first filter in the pipeline.
filter* filter_list; filter* filter_list;
//! Pointer to location where address of next filter to be added should be stored. //! Pointer to location where address of next filter to be added should be stored.
filter* filter_end; filter* filter_end;
//! task who's reference count is used to determine when all stages are done. //! task who's reference count is used to determine when all stages are done.
task* end_counter; task* end_counter;
skipping to change at line 289 skipping to change at line 306
#if __TBB_TASK_GROUP_CONTEXT #if __TBB_TASK_GROUP_CONTEXT
//! Does clean up if pipeline is cancelled or exception occured //! Does clean up if pipeline is cancelled or exception occured
void clear_filters(); void clear_filters();
#endif #endif
}; };
//------------------------------------------------------------------------ //------------------------------------------------------------------------
// Support for lambda-friendly parallel_pipeline interface // Support for lambda-friendly parallel_pipeline interface
//------------------------------------------------------------------------ //------------------------------------------------------------------------
namespace interface5 { namespace interface6 {
namespace internal { namespace internal {
template<typename T, typename U, typename Body> class concrete_filter; template<typename T, typename U, typename Body> class concrete_filter;
} }
//! input_filter control to signal end-of-input for parallel_pipeline
class flow_control { class flow_control {
bool is_pipeline_stopped; bool is_pipeline_stopped;
flow_control() { is_pipeline_stopped = false; } flow_control() { is_pipeline_stopped = false; }
template<typename T, typename U, typename Body> friend class internal:: concrete_filter; template<typename T, typename U, typename Body> friend class internal:: concrete_filter;
public: public:
void stop() { is_pipeline_stopped = true; } void stop() { is_pipeline_stopped = true; }
}; };
//! @cond INTERNAL //! @cond INTERNAL
namespace internal { namespace internal {
template<typename T> struct is_large_object { enum { r = sizeof(T) > sizeof
(void *) }; };
template<typename T, bool> class token_helper;
// large object helper (uses tbb_allocator)
template<typename T>
class token_helper<T, true> {
public:
typedef typename tbb::tbb_allocator<T> allocator;
typedef T* pointer;
typedef T value_type;
static pointer create_token(const value_type & source) {
pointer output_t = allocator().allocate(1);
return new (output_t) T(source);
}
static value_type & token(pointer & t) { return *t;}
static void * cast_to_void_ptr(pointer ref) { return (void *) ref; }
static pointer cast_from_void_ptr(void * ref) { return (pointer)ref; }
static void destroy_token(pointer token) {
allocator().destroy(token);
allocator().deallocate(token,1);
}
};
// pointer specialization
template<typename T>
class token_helper<T*, false > {
public:
typedef T* pointer;
typedef T* value_type;
static pointer create_token(const value_type & source) { return source;
}
static value_type & token(pointer & t) { return t;}
static void * cast_to_void_ptr(pointer ref) { return (void *)ref; }
static pointer cast_from_void_ptr(void * ref) { return (pointer)ref; }
static void destroy_token( pointer /*token*/) {}
};
// small object specialization (converts void* to the correct type, passes
objects directly.)
template<typename T>
class token_helper<T, false> {
typedef union {
T actual_value;
void * void_overlay;
} type_to_void_ptr_map;
public:
typedef T pointer; // not really a pointer in this case.
typedef T value_type;
static pointer create_token(const value_type & source) {
return source; }
static value_type & token(pointer & t) { return t;}
static void * cast_to_void_ptr(pointer ref) {
type_to_void_ptr_map mymap;
mymap.void_overlay = NULL;
mymap.actual_value = ref;
return mymap.void_overlay;
}
static pointer cast_from_void_ptr(void * ref) {
type_to_void_ptr_map mymap;
mymap.void_overlay = ref;
return mymap.actual_value;
}
static void destroy_token( pointer /*token*/) {}
};
template<typename T, typename U, typename Body> template<typename T, typename U, typename Body>
class concrete_filter: public tbb::filter { class concrete_filter: public tbb::filter {
Body my_body; const Body& my_body;
typedef token_helper<T,is_large_object<T>::r > t_helper;
typedef typename t_helper::pointer t_pointer;
typedef token_helper<U,is_large_object<U>::r > u_helper;
typedef typename u_helper::pointer u_pointer;
/*override*/ void* operator()(void* input) { /*override*/ void* operator()(void* input) {
T* temp_input = (T*)input; t_pointer temp_input = t_helper::cast_from_void_ptr(input);
// Call user's operator()() here u_pointer output_u = u_helper::create_token(my_body(t_helper::token
void* output = (void*) new U(my_body(*temp_input)); (temp_input)));
delete temp_input; t_helper::destroy_token(temp_input);
return output; return u_helper::cast_to_void_ptr(output_u);
} }
public: public:
concrete_filter(tbb::filter::mode filter_mode, const Body& body) : filt er(filter_mode), my_body(body) {} concrete_filter(tbb::filter::mode filter_mode, const Body& body) : filt er(filter_mode), my_body(body) {}
}; };
// input
template<typename U, typename Body> template<typename U, typename Body>
class concrete_filter<void,U,Body>: public filter { class concrete_filter<void,U,Body>: public filter {
Body my_body; const Body& my_body;
typedef token_helper<U, is_large_object<U>::r > u_helper;
typedef typename u_helper::pointer u_pointer;
/*override*/void* operator()(void*) { /*override*/void* operator()(void*) {
flow_control control; flow_control control;
U temp_output = my_body(control); u_pointer output_u = u_helper::create_token(my_body(control));
void* output = control.is_pipeline_stopped ? NULL : (void*) new U(t if(control.is_pipeline_stopped) {
emp_output); u_helper::destroy_token(output_u);
return output; set_end_of_input();
return NULL;
}
return u_helper::cast_to_void_ptr(output_u);
} }
public: public:
concrete_filter(tbb::filter::mode filter_mode, const Body& body) : filt concrete_filter(tbb::filter::mode filter_mode, const Body& body) :
er(filter_mode), my_body(body) {} filter(static_cast<tbb::filter::mode>(filter_mode | filter_may_emit
_null)),
my_body(body)
{}
}; };
template<typename T, typename Body> template<typename T, typename Body>
class concrete_filter<T,void,Body>: public filter { class concrete_filter<T,void,Body>: public filter {
Body my_body; const Body& my_body;
typedef token_helper<T, is_large_object<T>::r > t_helper;
typedef typename t_helper::pointer t_pointer;
/*override*/ void* operator()(void* input) { /*override*/ void* operator()(void* input) {
T* temp_input = (T*)input; t_pointer temp_input = t_helper::cast_from_void_ptr(input);
my_body(*temp_input); my_body(t_helper::token(temp_input));
delete temp_input; t_helper::destroy_token(temp_input);
return NULL; return NULL;
} }
public: public:
concrete_filter(tbb::filter::mode filter_mode, const Body& body) : filt er(filter_mode), my_body(body) {} concrete_filter(tbb::filter::mode filter_mode, const Body& body) : filt er(filter_mode), my_body(body) {}
}; };
template<typename Body> template<typename Body>
class concrete_filter<void,void,Body>: public filter { class concrete_filter<void,void,Body>: public filter {
Body my_body; const Body& my_body;
/** Override privately because it is always called virtually */ /** Override privately because it is always called virtually */
/*override*/ void* operator()(void*) { /*override*/ void* operator()(void*) {
flow_control control; flow_control control;
my_body(control); my_body(control);
void* output = control.is_pipeline_stopped ? NULL : (void*)(intptr_ t)-1; void* output = control.is_pipeline_stopped ? NULL : (void*)(intptr_ t)-1;
return output; return output;
} }
public: public:
concrete_filter(filter::mode filter_mode, const Body& body) : filter(fi lter_mode), my_body(body) {} concrete_filter(filter::mode filter_mode, const Body& body) : filter(fi lter_mode), my_body(body) {}
skipping to change at line 412 skipping to change at line 510
#ifdef __TBB_TEST_FILTER_NODE_COUNT #ifdef __TBB_TEST_FILTER_NODE_COUNT
--(__TBB_TEST_FILTER_NODE_COUNT); --(__TBB_TEST_FILTER_NODE_COUNT);
#endif #endif
} }
}; };
//! Node in parse tree representing result of make_filter. //! Node in parse tree representing result of make_filter.
template<typename T, typename U, typename Body> template<typename T, typename U, typename Body>
class filter_node_leaf: public filter_node { class filter_node_leaf: public filter_node {
const tbb::filter::mode mode; const tbb::filter::mode mode;
const Body& body; const Body body;
/*override*/void add_to( pipeline& p ) { /*override*/void add_to( pipeline& p ) {
concrete_filter<T,U,Body>* f = new concrete_filter<T,U,Body>(mode,b ody); concrete_filter<T,U,Body>* f = new concrete_filter<T,U,Body>(mode,b ody);
p.add_filter( *f ); p.add_filter( *f );
} }
public: public:
filter_node_leaf( tbb::filter::mode m, const Body& b ) : mode(m), body( b) {} filter_node_leaf( tbb::filter::mode m, const Body& b ) : mode(m), body( b) {}
}; };
//! Node in parse tree representing join of two filters. //! Node in parse tree representing join of two filters.
class filter_node_join: public filter_node { class filter_node_join: public filter_node {
skipping to change at line 444 skipping to change at line 542
public: public:
filter_node_join( filter_node& x, filter_node& y ) : left(x), right(y) { filter_node_join( filter_node& x, filter_node& y ) : left(x), right(y) {
left.add_ref(); left.add_ref();
right.add_ref(); right.add_ref();
} }
}; };
} // namespace internal } // namespace internal
//! @endcond //! @endcond
//! Create a filter to participate in parallel_pipeline
template<typename T, typename U, typename Body> template<typename T, typename U, typename Body>
filter_t<T,U> make_filter(tbb::filter::mode mode, const Body& body) { filter_t<T,U> make_filter(tbb::filter::mode mode, const Body& body) {
return new internal::filter_node_leaf<T,U,Body>(mode, body); return new internal::filter_node_leaf<T,U,Body>(mode, body);
} }
template<typename T, typename V, typename U> template<typename T, typename V, typename U>
filter_t<T,U> operator& (const filter_t<T,V>& left, const filter_t<V,U>& ri ght) { filter_t<T,U> operator& (const filter_t<T,V>& left, const filter_t<V,U>& ri ght) {
__TBB_ASSERT(left.root,"cannot use default-constructed filter_t as left argument of '&'"); __TBB_ASSERT(left.root,"cannot use default-constructed filter_t as left argument of '&'");
__TBB_ASSERT(right.root,"cannot use default-constructed filter_t as rig ht argument of '&'"); __TBB_ASSERT(right.root,"cannot use default-constructed filter_t as rig ht argument of '&'");
return new internal::filter_node_join(*left.root,*right.root); return new internal::filter_node_join(*left.root,*right.root);
skipping to change at line 527 skipping to change at line 626
); );
} }
#if __TBB_TASK_GROUP_CONTEXT #if __TBB_TASK_GROUP_CONTEXT
inline void parallel_pipeline(size_t max_number_of_live_tokens, const filte r_t<void,void>& filter_chain) { inline void parallel_pipeline(size_t max_number_of_live_tokens, const filte r_t<void,void>& filter_chain) {
tbb::task_group_context context; tbb::task_group_context context;
parallel_pipeline(max_number_of_live_tokens, filter_chain, context); parallel_pipeline(max_number_of_live_tokens, filter_chain, context);
} }
#endif // __TBB_TASK_GROUP_CONTEXT #endif // __TBB_TASK_GROUP_CONTEXT
} // interface5 } // interface6
using interface5::flow_control; using interface6::flow_control;
using interface5::filter_t; using interface6::filter_t;
using interface5::make_filter; using interface6::make_filter;
using interface5::parallel_pipeline; using interface6::parallel_pipeline;
} // tbb } // tbb
#endif /* __TBB_pipeline_H */ #endif /* __TBB_pipeline_H */
 End of changes. 28 change blocks. 
31 lines changed or deleted 135 lines changed or added


 ppl.h   ppl.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 43 skipping to change at line 43
#include "../parallel_invoke.h" #include "../parallel_invoke.h"
#include "../parallel_for_each.h" #include "../parallel_for_each.h"
#include "../parallel_for.h" #include "../parallel_for.h"
#include "../tbb_exception.h" #include "../tbb_exception.h"
#include "../critical_section.h" #include "../critical_section.h"
#include "../reader_writer_lock.h" #include "../reader_writer_lock.h"
#include "../combinable.h" #include "../combinable.h"
namespace Concurrency { namespace Concurrency {
#if __TBB_TASK_GROUP_CONTEXT
using tbb::task_handle; using tbb::task_handle;
using tbb::task_group_status; using tbb::task_group_status;
using tbb::task_group; using tbb::task_group;
using tbb::structured_task_group; using tbb::structured_task_group;
using tbb::invalid_multiple_scheduling; using tbb::invalid_multiple_scheduling;
using tbb::missing_wait; using tbb::missing_wait;
using tbb::make_task; using tbb::make_task;
using tbb::not_complete; using tbb::not_complete;
using tbb::complete; using tbb::complete;
using tbb::canceled; using tbb::canceled;
using tbb::is_current_task_group_canceling; using tbb::is_current_task_group_canceling;
#endif /* __TBB_TASK_GROUP_CONTEXT */
using tbb::parallel_invoke; using tbb::parallel_invoke;
using tbb::strict_ppl::parallel_for; using tbb::strict_ppl::parallel_for;
using tbb::parallel_for_each; using tbb::parallel_for_each;
using tbb::critical_section; using tbb::critical_section;
using tbb::reader_writer_lock; using tbb::reader_writer_lock;
using tbb::combinable; using tbb::combinable;
using tbb::improper_lock; using tbb::improper_lock;
 End of changes. 3 change blocks. 
1 lines changed or deleted 3 lines changed or added


 queuing_mutex.h   queuing_mutex.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 119 skipping to change at line 119
a byte seems to help performance slightly. */ a byte seems to help performance slightly. */
uintptr_t going; uintptr_t going;
}; };
void __TBB_EXPORTED_METHOD internal_construct(); void __TBB_EXPORTED_METHOD internal_construct();
// Mutex traits // Mutex traits
static const bool is_rw_mutex = false; static const bool is_rw_mutex = false;
static const bool is_recursive_mutex = false; static const bool is_recursive_mutex = false;
static const bool is_fair_mutex = true; static const bool is_fair_mutex = true;
friend class scoped_lock;
private: private:
//! The last competitor requesting the lock //! The last competitor requesting the lock
atomic<scoped_lock*> q_tail; atomic<scoped_lock*> q_tail;
}; };
__TBB_DEFINE_PROFILING_SET_NAME(queuing_mutex) __TBB_DEFINE_PROFILING_SET_NAME(queuing_mutex)
} // namespace tbb } // namespace tbb
 End of changes. 2 change blocks. 
3 lines changed or deleted 1 lines changed or added


 queuing_rw_mutex.h   queuing_rw_mutex.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 81 skipping to change at line 81
class scoped_lock; class scoped_lock;
friend class scoped_lock; friend class scoped_lock;
//! The scoped locking pattern //! The scoped locking pattern
/** It helps to avoid the common problem of forgetting to release lock. /** It helps to avoid the common problem of forgetting to release lock.
It also nicely provides the "node" for queuing locks. */ It also nicely provides the "node" for queuing locks. */
class scoped_lock: internal::no_copy { class scoped_lock: internal::no_copy {
//! Initialize fields //! Initialize fields
void initialize() { void initialize() {
mutex = NULL; my_mutex = NULL;
#if TBB_USE_ASSERT #if TBB_USE_ASSERT
state = 0xFF; // Set to invalid state my_state = 0xFF; // Set to invalid state
internal::poison_pointer(next); internal::poison_pointer(my_next);
internal::poison_pointer(prev); internal::poison_pointer(my_prev);
#endif /* TBB_USE_ASSERT */ #endif /* TBB_USE_ASSERT */
} }
public: public:
//! Construct lock that has not acquired a mutex. //! Construct lock that has not acquired a mutex.
/** Equivalent to zero-initialization of *this. */ /** Equivalent to zero-initialization of *this. */
scoped_lock() {initialize();} scoped_lock() {initialize();}
//! Acquire lock on given mutex. //! Acquire lock on given mutex.
scoped_lock( queuing_rw_mutex& m, bool write=true ) { scoped_lock( queuing_rw_mutex& m, bool write=true ) {
initialize(); initialize();
acquire(m,write); acquire(m,write);
} }
//! Release lock (if lock is held). //! Release lock (if lock is held).
~scoped_lock() { ~scoped_lock() {
if( mutex ) release(); if( my_mutex ) release();
} }
//! Acquire lock on given mutex. //! Acquire lock on given mutex.
void acquire( queuing_rw_mutex& m, bool write=true ); void acquire( queuing_rw_mutex& m, bool write=true );
//! Try acquire lock on given mutex. //! Try acquire lock on given mutex.
bool try_acquire( queuing_rw_mutex& m, bool write=true ); bool try_acquire( queuing_rw_mutex& m, bool write=true );
//! Release lock. //! Release lock.
void release(); void release();
//! Upgrade reader to become a writer. //! Upgrade reader to become a writer.
/** Returns true if the upgrade happened without re-acquiring the l ock and false if opposite */ /** Returns true if the upgrade happened without re-acquiring the l ock and false if opposite */
bool upgrade_to_writer(); bool upgrade_to_writer();
//! Downgrade writer to become a reader. //! Downgrade writer to become a reader.
bool downgrade_to_reader(); bool downgrade_to_reader();
private: private:
//! The pointer to the current mutex to work //! The pointer to the current mutex to work
queuing_rw_mutex* mutex; queuing_rw_mutex* my_mutex;
//! The pointer to the previous and next competitors for a mutex //! The pointer to the previous and next competitors for a mutex
scoped_lock * prev, * next; scoped_lock *__TBB_atomic my_prev, *__TBB_atomic my_next;
typedef unsigned char state_t; typedef unsigned char state_t;
//! State of the request: reader, writer, active reader, other serv ice states //! State of the request: reader, writer, active reader, other serv ice states
atomic<state_t> state; atomic<state_t> my_state;
//! The local spin-wait variable //! The local spin-wait variable
/** Corresponds to "spin" in the pseudocode but inverted for the sa ke of zero-initialization */ /** Corresponds to "spin" in the pseudocode but inverted for the sa ke of zero-initialization */
unsigned char going; unsigned char __TBB_atomic my_going;
//! A tiny internal lock //! A tiny internal lock
unsigned char internal_lock; unsigned char my_internal_lock;
//! Acquire the internal lock //! Acquire the internal lock
void acquire_internal_lock(); void acquire_internal_lock();
//! Try to acquire the internal lock //! Try to acquire the internal lock
/** Returns true if lock was successfully acquired. */ /** Returns true if lock was successfully acquired. */
bool try_acquire_internal_lock(); bool try_acquire_internal_lock();
//! Release the internal lock //! Release the internal lock
void release_internal_lock(); void release_internal_lock();
 End of changes. 9 change blocks. 
11 lines changed or deleted 11 lines changed or added


 reader_writer_lock.h   reader_writer_lock.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 96 skipping to change at line 96
internal_construct(); internal_construct();
} }
//! Destructs a reader_writer_lock object //! Destructs a reader_writer_lock object
~reader_writer_lock() { ~reader_writer_lock() {
internal_destroy(); internal_destroy();
} }
//! The scoped lock pattern for write locks //! The scoped lock pattern for write locks
/** Scoped locks help avoid the common problem of forgetting to release the lock. /** Scoped locks help avoid the common problem of forgetting to release the lock.
This type is also serves as the node for queuing locks. */ This type also serves as the node for queuing locks. */
class scoped_lock : tbb::internal::no_copy { class scoped_lock : tbb::internal::no_copy {
public: public:
friend class reader_writer_lock; friend class reader_writer_lock;
//! Construct with blocking attempt to acquire write lock on the pa ssed-in lock //! Construct with blocking attempt to acquire write lock on the pa ssed-in lock
scoped_lock(reader_writer_lock& lock) { scoped_lock(reader_writer_lock& lock) {
internal_construct(lock); internal_construct(lock);
} }
//! Destructor, releases the write lock //! Destructor, releases the write lock
 End of changes. 2 change blocks. 
2 lines changed or deleted 2 lines changed or added


 recursive_mutex.h   recursive_mutex.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 33 skipping to change at line 33
file does not by itself cause the resulting executable to be covered by file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered b y invalidate any other reasons why the executable file might be covered b y
the GNU General Public License. the GNU General Public License.
*/ */
#ifndef __TBB_recursive_mutex_H #ifndef __TBB_recursive_mutex_H
#define __TBB_recursive_mutex_H #define __TBB_recursive_mutex_H
#if _WIN32||_WIN64 #if _WIN32||_WIN64
#include <windows.h> #include "machine/windows_api.h"
#if !defined(_WIN32_WINNT) #else
// The following Windows API function is declared explicitly; #include <pthread.h>
// otherwise any user would have to specify /D_WIN32_WINNT=0x0400
extern "C" BOOL WINAPI TryEnterCriticalSection( LPCRITICAL_SECTION );
#endif
#else /* if not _WIN32||_WIN64 */
#include <pthread.h>
#endif /* _WIN32||_WIN64 */ #endif /* _WIN32||_WIN64 */
#include <new> #include <new>
#include "aligned_space.h" #include "aligned_space.h"
#include "tbb_stddef.h" #include "tbb_stddef.h"
#include "tbb_profiling.h" #include "tbb_profiling.h"
namespace tbb { namespace tbb {
//! Mutex that allows recursive mutex acquisition. //! Mutex that allows recursive mutex acquisition.
/** Mutex that allows recursive mutex acquisition. /** Mutex that allows recursive mutex acquisition.
 End of changes. 2 change blocks. 
9 lines changed or deleted 4 lines changed or added


 scalable_allocator.h   scalable_allocator.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 34 skipping to change at line 34
the GNU General Public License. This exception does not however the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered b y invalidate any other reasons why the executable file might be covered b y
the GNU General Public License. the GNU General Public License.
*/ */
#ifndef __TBB_scalable_allocator_H #ifndef __TBB_scalable_allocator_H
#define __TBB_scalable_allocator_H #define __TBB_scalable_allocator_H
/** @file */ /** @file */
#include <stddef.h> /* Need ptrdiff_t and size_t from here. */ #include <stddef.h> /* Need ptrdiff_t and size_t from here. */
#if !_MSC_VER
#include <stdint.h> /* Need intptr_t from here. */
#endif
#if !defined(__cplusplus) && __ICC==1100 #if !defined(__cplusplus) && __ICC==1100
#pragma warning (push) #pragma warning (push)
#pragma warning (disable: 991) #pragma warning (disable: 991)
#endif #endif
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {
#endif /* __cplusplus */ #endif /* __cplusplus */
skipping to change at line 94 skipping to change at line 97
or 0 (zero) if ptr does not point to such a block. or 0 (zero) if ptr does not point to such a block.
@ingroup memory_allocation */ @ingroup memory_allocation */
size_t __TBB_EXPORTED_FUNC scalable_msize (void* ptr); size_t __TBB_EXPORTED_FUNC scalable_msize (void* ptr);
#ifdef __cplusplus #ifdef __cplusplus
} /* extern "C" */ } /* extern "C" */
#endif /* __cplusplus */ #endif /* __cplusplus */
#ifdef __cplusplus #ifdef __cplusplus
namespace rml {
class MemoryPool;
#define MEM_POLICY_DEFINED 1
typedef void *(*rawAllocType)(intptr_t pool_id, size_t &bytes);
typedef int (*rawFreeType)(intptr_t pool_id, void* raw_ptr, size_t raw_by
tes);
struct MemPoolPolicy {
rawAllocType pAlloc;
rawFreeType pFree;
size_t granularity; // granularity of pAlloc allocations
void *pReserved; // reserved for future extensions
size_t szReserved; // size of pReserved data
};
MemoryPool *pool_create(intptr_t pool_id, const MemPoolPolicy* memPoolPolic
y);
bool pool_destroy(MemoryPool* memPool);
void *pool_malloc(MemoryPool* memPool, size_t size);
void *pool_realloc(MemoryPool* memPool, void *object, size_t size);
bool pool_reset(MemoryPool* memPool);
bool pool_free(MemoryPool *memPool, void *object);
}
#include <new> /* To use new with the placement argument */ #include <new> /* To use new with the placement argument */
/* Ensure that including this header does not cause implicit linkage with T BB */ /* Ensure that including this header does not cause implicit linkage with T BB */
#ifndef __TBB_NO_IMPLICIT_LINKAGE #ifndef __TBB_NO_IMPLICIT_LINKAGE
#define __TBB_NO_IMPLICIT_LINKAGE 1 #define __TBB_NO_IMPLICIT_LINKAGE 1
#include "tbb_stddef.h" #include "tbb_stddef.h"
#undef __TBB_NO_IMPLICIT_LINKAGE #undef __TBB_NO_IMPLICIT_LINKAGE
#else #else
#include "tbb_stddef.h" #include "tbb_stddef.h"
#endif #endif
 End of changes. 3 change blocks. 
1 lines changed or deleted 29 lines changed or added


 spin_mutex.h   spin_mutex.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 44 skipping to change at line 44
#include "aligned_space.h" #include "aligned_space.h"
#include "tbb_stddef.h" #include "tbb_stddef.h"
#include "tbb_machine.h" #include "tbb_machine.h"
#include "tbb_profiling.h" #include "tbb_profiling.h"
namespace tbb { namespace tbb {
//! A lock that occupies a single byte. //! A lock that occupies a single byte.
/** A spin_mutex is a spin mutex that fits in a single byte. /** A spin_mutex is a spin mutex that fits in a single byte.
It should be used only for locking short critical sections It should be used only for locking short critical sections
(typically &lt;20 instructions) when fairness is not an issue. (typically less than 20 instructions) when fairness is not an issue.
If zero-initialized, the mutex is considered unheld. If zero-initialized, the mutex is considered unheld.
@ingroup synchronization */ @ingroup synchronization */
class spin_mutex { class spin_mutex {
//! 0 if lock is released, 1 if lock is acquired. //! 0 if lock is released, 1 if lock is acquired.
unsigned char flag; __TBB_atomic_flag flag;
public: public:
//! Construct unacquired lock. //! Construct unacquired lock.
/** Equivalent to zero-initialization of *this. */ /** Equivalent to zero-initialization of *this. */
spin_mutex() : flag(0) { spin_mutex() : flag(0) {
#if TBB_USE_THREADING_TOOLS #if TBB_USE_THREADING_TOOLS
internal_construct(); internal_construct();
#endif #endif
} }
//! Represents acquisition of a mutex. //! Represents acquisition of a mutex.
class scoped_lock : internal::no_copy { class scoped_lock : internal::no_copy {
private: private:
//! Points to currently held mutex, or NULL if no lock is held. //! Points to currently held mutex, or NULL if no lock is held.
spin_mutex* my_mutex; spin_mutex* my_mutex;
//! Value to store into spin_mutex::flag to unlock the mutex. //! Value to store into spin_mutex::flag to unlock the mutex.
uintptr_t my_unlock_value; __TBB_Flag my_unlock_value;
//! Like acquire, but with ITT instrumentation. //! Like acquire, but with ITT instrumentation.
void __TBB_EXPORTED_METHOD internal_acquire( spin_mutex& m ); void __TBB_EXPORTED_METHOD internal_acquire( spin_mutex& m );
//! Like try_acquire, but with ITT instrumentation. //! Like try_acquire, but with ITT instrumentation.
bool __TBB_EXPORTED_METHOD internal_try_acquire( spin_mutex& m ); bool __TBB_EXPORTED_METHOD internal_try_acquire( spin_mutex& m );
//! Like release, but with ITT instrumentation. //! Like release, but with ITT instrumentation.
void __TBB_EXPORTED_METHOD internal_release(); void __TBB_EXPORTED_METHOD internal_release();
skipping to change at line 125 skipping to change at line 125
} }
return result; return result;
#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT*/ #endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT*/
} }
//! Release lock //! Release lock
void release() { void release() {
#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT #if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
internal_release(); internal_release();
#else #else
__TBB_store_with_release(my_mutex->flag, static_cast<unsigned c har>(my_unlock_value)); __TBB_UnlockByte(my_mutex->flag, my_unlock_value);
my_mutex = NULL; my_mutex = NULL;
#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */ #endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */
} }
//! Destroy lock. If holding a lock, releases the lock first. //! Destroy lock. If holding a lock, releases the lock first.
~scoped_lock() { ~scoped_lock() {
if( my_mutex ) { if( my_mutex ) {
#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT #if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
internal_release(); internal_release();
#else #else
__TBB_store_with_release(my_mutex->flag, static_cast<unsign ed char>(my_unlock_value)); __TBB_UnlockByte(my_mutex->flag, my_unlock_value);
#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */ #endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */
} }
} }
}; };
void __TBB_EXPORTED_METHOD internal_construct(); void __TBB_EXPORTED_METHOD internal_construct();
// Mutex traits // Mutex traits
static const bool is_rw_mutex = false; static const bool is_rw_mutex = false;
static const bool is_recursive_mutex = false; static const bool is_recursive_mutex = false;
 End of changes. 6 change blocks. 
6 lines changed or deleted 6 lines changed or added


 spin_rw_mutex.h   spin_rw_mutex.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 165 skipping to change at line 165
__TBB_ASSERT( !mutex, "holding mutex already" ); __TBB_ASSERT( !mutex, "holding mutex already" );
bool result; bool result;
is_writer = write; is_writer = write;
result = write? m.internal_try_acquire_writer() result = write? m.internal_try_acquire_writer()
: m.internal_try_acquire_reader(); : m.internal_try_acquire_reader();
if( result ) if( result )
mutex = &m; mutex = &m;
return result; return result;
} }
private: protected:
//! The pointer to the current mutex that is held, or NULL if no mu tex is held. //! The pointer to the current mutex that is held, or NULL if no mu tex is held.
spin_rw_mutex* mutex; spin_rw_mutex* mutex;
//! If mutex!=NULL, then is_writer is true if holding a writer lock , false if holding a reader lock. //! If mutex!=NULL, then is_writer is true if holding a writer lock , false if holding a reader lock.
/** Not defined if not holding a lock. */ /** Not defined if not holding a lock. */
bool is_writer; bool is_writer;
}; };
// Mutex traits // Mutex traits
static const bool is_rw_mutex = true; static const bool is_rw_mutex = true;
 End of changes. 2 change blocks. 
2 lines changed or deleted 2 lines changed or added


 sunos_sparc.h   sunos_sparc.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 29 skipping to change at line 29
As a special exception, you may use this file as part of a free softwar e As a special exception, you may use this file as part of a free softwar e
library without restriction. Specifically, if other files instantiate library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you comp ile templates or use macros or inline functions from this file, or you comp ile
this file and link it with other files to produce an executable, this this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered b y invalidate any other reasons why the executable file might be covered b y
the GNU General Public License. the GNU General Public License.
*/ */
#ifndef __TBB_machine_H #if !defined(__TBB_machine_H) || defined(__TBB_machine_sunos_sparc_H)
#error Do not include this file directly; include tbb_machine.h instead #error Do not #include this internal file directly; use public TBB headers
instead.
#endif #endif
#define __TBB_machine_sunos_sparc_H
#include <stdint.h> #include <stdint.h>
#include <unistd.h> #include <unistd.h>
#include <sched.h> // sched_yield
#define __TBB_WORDSIZE 8 #define __TBB_WORDSIZE 8
#define __TBB_BIG_ENDIAN 1 #define __TBB_BIG_ENDIAN 1
#define __TBB_release_consistency_helper() __asm__ __volatile__ ("": : :"me /** To those working on SPARC hardware. Consider relaxing acquire and relea
mory") se
consistency helpers to no-op (as this port covers TSO mode only). **/
inline void __TBB_rel_acq_fence() { __asm__ __volatile__("membar #LoadLoad| #define __TBB_compiler_fence() __asm__ __volatile__ ("": : :"me
#LoadStore|#StoreStore|#StoreLoad": : : "memory"); } mory")
#define __TBB_control_consistency_helper() __TBB_compiler_fence()
#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
#define __TBB_release_consistency_helper() __TBB_compiler_fence()
#define __TBB_full_memory_fence() __asm__ __volatile__("membar #Lo
adLoad|#LoadStore|#StoreStore|#StoreLoad": : : "memory")
//-------------------------------------------------- //--------------------------------------------------
// Compare and swap // Compare and swap
//-------------------------------------------------- //--------------------------------------------------
/** /**
* Atomic CAS for 32 bit values, if *ptr==comparand, then *ptr=value, retur ns *ptr * Atomic CAS for 32 bit values, if *ptr==comparand, then *ptr=value, retur ns *ptr
* @param ptr pointer to value in memory to be swapped with value if *ptr== comparand * @param ptr pointer to value in memory to be swapped with value if *ptr== comparand
* @param value value to assign *ptr to if *ptr==comparand * @param value value to assign *ptr to if *ptr==comparand
* @param comparand value to compare with *ptr * @param comparand value to compare with *ptr
skipping to change at line 188 skipping to change at line 192
static inline bool __TBB_machine_trylockbyte(unsigned char &flag){ static inline bool __TBB_machine_trylockbyte(unsigned char &flag){
unsigned char result; unsigned char result;
__asm__ __volatile__ ( __asm__ __volatile__ (
"ldstub\t [%2], %0\n" "ldstub\t [%2], %0\n"
: "=r"(result), "=m"(flag) : "=r"(result), "=m"(flag)
: "r"(&flag), "m"(flag) : "r"(&flag), "m"(flag)
: "memory"); : "memory");
return result == 0; return result == 0;
} }
// Machine specific atomic operations #define __TBB_USE_GENERIC_PART_WORD_CAS 1
#define __TBB_USE_GENERIC_PART_WORD_FETCH_ADD 1
//#define __TBB_CompareAndSwap1(P,V,C) __TBB_machine_cmpswp1(P,V,C) // use #define __TBB_USE_GENERIC_FETCH_STORE 1
generic version in tbb_machine.h #define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
//#define __TBB_CompareAndSwap2(P,V,C) __TBB_machine_cmpswp2(P,V,C) // use #define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
generic version in tbb_machine.h
#define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4(P,V,C)
#define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8(P,V,C)
#define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp8(P,V,C)
//#define __TBB_FetchAndAdd1(P,V) __TBB_machine_fetchadd1(P,V) // use
generic version in tbb_machine.h
//#define __TBB_FetchAndAdd2(P,V) __TBB_machine_fetchadd2(P,V) // use
generic version in tbb_machine.h
#define __TBB_FetchAndAdd4(P,V) __TBB_machine_fetchadd4(P,V)
#define __TBB_FetchAndAdd8(P,V) __TBB_machine_fetchadd8(P,V)
#define __TBB_FetchAndAddW(P,V) __TBB_machine_fetchadd8(P,V)
// use generic version in tbb_machine.h
//#define __TBB_FetchAndStore1(P,V) __TBB_machine_fetchstore1(P,V)
//#define __TBB_FetchAndStore2(P,V) __TBB_machine_fetchstore2(P,V)
//#define __TBB_FetchAndStore4(P,V) __TBB_machine_fetchstore4(P,V)
//#define __TBB_FetchAndStore8(P,V) __TBB_machine_fetchstore8(P,V)
//#define __TBB_FetchAndStoreW(P,V) __TBB_machine_fetchstore8(P,V)
#define __TBB_Store8(P,V) (*P = V)
#define __TBB_Load8(P) (*P)
#define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V) #define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V)
#define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V) #define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V)
// Definition of other functions // Definition of other functions
#define __TBB_Pause(V) __TBB_machine_pause(V) #define __TBB_Pause(V) __TBB_machine_pause(V)
#define __TBB_Log2(V) __TBB_machine_lg(V) #define __TBB_Log2(V) __TBB_machine_lg(V)
// Special atomic functions
#define __TBB_FetchAndAddWrelease(P,V) __TBB_FetchAndAddW(P,V)
#define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAddW(P,1)
#define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAddW(P,-1)
// Definition of Lock functions
// Repeatedly runs TryLockByte, no need to implement
#undef __TBB_LockByte
#define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P) #define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P)
#define __TBB_Yield() sched_yield()
 End of changes. 8 change blocks. 
47 lines changed or deleted 22 lines changed or added


 task.h   task.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 34 skipping to change at line 34
the GNU General Public License. This exception does not however the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered b y invalidate any other reasons why the executable file might be covered b y
the GNU General Public License. the GNU General Public License.
*/ */
#ifndef __TBB_task_H #ifndef __TBB_task_H
#define __TBB_task_H #define __TBB_task_H
#include "tbb_stddef.h" #include "tbb_stddef.h"
#include "tbb_machine.h" #include "tbb_machine.h"
#include <climits>
typedef struct ___itt_caller *__itt_caller; typedef struct ___itt_caller *__itt_caller;
namespace tbb { namespace tbb {
class task; class task;
class task_list; class task_list;
#if __TBB_TASK_GROUP_CONTEXT #if __TBB_TASK_GROUP_CONTEXT
class task_group_context; class task_group_context;
skipping to change at line 120 skipping to change at line 121
//! For internal use only //! For internal use only
virtual void wait_for_all( task& parent, task* child ) = 0; virtual void wait_for_all( task& parent, task* child ) = 0;
//! For internal use only //! For internal use only
virtual void spawn_root_and_wait( task& first, task*& next ) = 0; virtual void spawn_root_and_wait( task& first, task*& next ) = 0;
//! Pure virtual destructor; //! Pure virtual destructor;
// Have to have it just to shut up overzealous compilation warning s // Have to have it just to shut up overzealous compilation warning s
virtual ~scheduler() = 0; virtual ~scheduler() = 0;
#if __TBB_ARENA_PER_MASTER
//! For internal use only //! For internal use only
virtual void enqueue( task& t, void* reserved ) = 0; virtual void enqueue( task& t, void* reserved ) = 0;
#endif /* __TBB_ARENA_PER_MASTER */
}; };
//! A reference count //! A reference count
/** Should always be non-negative. A signed type is used so that under flow can be detected. */ /** Should always be non-negative. A signed type is used so that under flow can be detected. */
typedef intptr_t reference_count; typedef intptr_t reference_count;
//! An id as used for specifying affinity. //! An id as used for specifying affinity.
typedef unsigned short affinity_id; typedef unsigned short affinity_id;
#if __TBB_TASK_GROUP_CONTEXT #if __TBB_TASK_GROUP_CONTEXT
class generic_scheduler;
struct context_list_node_t { struct context_list_node_t {
context_list_node_t *my_prev, context_list_node_t *my_prev,
*my_next; *my_next;
}; };
class allocate_root_with_context_proxy: no_assign { class allocate_root_with_context_proxy: no_assign {
task_group_context& my_context; task_group_context& my_context;
public: public:
allocate_root_with_context_proxy ( task_group_context& ctx ) : my_c ontext(ctx) {} allocate_root_with_context_proxy ( task_group_context& ctx ) : my_c ontext(ctx) {}
task& __TBB_EXPORTED_METHOD allocate( size_t size ) const; task& __TBB_EXPORTED_METHOD allocate( size_t size ) const;
skipping to change at line 172 skipping to change at line 173
public: public:
task& __TBB_EXPORTED_METHOD allocate( size_t size ) const; task& __TBB_EXPORTED_METHOD allocate( size_t size ) const;
void __TBB_EXPORTED_METHOD free( task& ) const; void __TBB_EXPORTED_METHOD free( task& ) const;
}; };
//! Memory prefix to a task object. //! Memory prefix to a task object.
/** This class is internal to the library. /** This class is internal to the library.
Do not reference it directly, except within the library itself. Do not reference it directly, except within the library itself.
Fields are ordered in way that preserves backwards compatibility an d yields Fields are ordered in way that preserves backwards compatibility an d yields
good packing on typical 32-bit and 64-bit platforms. good packing on typical 32-bit and 64-bit platforms.
In case task prefix size exceeds 32 or 64 bytes on IA32 and Intel64
architectures correspondingly, consider dynamic setting of task_ali
gnment
and task_prefix_reservation_size based on the maximal operand size
supported
by the current CPU.
@ingroup task_scheduling */ @ingroup task_scheduling */
class task_prefix { class task_prefix {
private: private:
friend class tbb::task; friend class tbb::task;
friend class tbb::interface5::internal::task_base; friend class tbb::interface5::internal::task_base;
friend class tbb::task_list; friend class tbb::task_list;
friend class internal::scheduler; friend class internal::scheduler;
friend class internal::allocate_root_proxy; friend class internal::allocate_root_proxy;
friend class internal::allocate_child_proxy; friend class internal::allocate_child_proxy;
friend class internal::allocate_continuation_proxy; friend class internal::allocate_continuation_proxy;
skipping to change at line 200 skipping to change at line 207
#endif /* __TBB_TASK_GROUP_CONTEXT */ #endif /* __TBB_TASK_GROUP_CONTEXT */
//! The scheduler that allocated the task, or NULL if the task is b ig. //! The scheduler that allocated the task, or NULL if the task is b ig.
/** Small tasks are pooled by the scheduler that allocated the task . /** Small tasks are pooled by the scheduler that allocated the task .
If a scheduler needs to free a small task allocated by another scheduler, If a scheduler needs to free a small task allocated by another scheduler,
it returns the task to that other scheduler. This policy avoid s it returns the task to that other scheduler. This policy avoid s
memory space blowup issues for memory allocators that allocate from memory space blowup issues for memory allocators that allocate from
thread-specific pools. */ thread-specific pools. */
scheduler* origin; scheduler* origin;
//! The scheduler that owns the task. #if __TBB_TASK_PRIORITY
union {
#endif /* __TBB_TASK_PRIORITY */
//! Obsolete. The scheduler that owns the task.
/** Retained only for the sake of backward binary compatibility.
Still used by inline methods in the task.h header. **/
scheduler* owner; scheduler* owner;
#if __TBB_TASK_PRIORITY
//! Pointer to the next offloaded lower priority task.
/** Used to maintain a list of offloaded tasks inside the scheduler
. **/
task* next_offloaded;
};
#endif /* __TBB_TASK_PRIORITY */
//! The task whose reference count includes me. //! The task whose reference count includes me.
/** In the "blocking style" of programming, this field points to th e parent task. /** In the "blocking style" of programming, this field points to th e parent task.
In the "continuation-passing style" of programming, this field points to the In the "continuation-passing style" of programming, this field points to the
continuation of the parent. */ continuation of the parent. */
tbb::task* parent; tbb::task* parent;
//! Reference count used for synchronization. //! Reference count used for synchronization.
/** In the "continuation-passing style" of programming, this field is /** In the "continuation-passing style" of programming, this field is
the difference of the number of allocated children minus the the difference of the number of allocated children minus the
number of children that have completed. number of children that have completed.
In the "blocking style" of programming, this field is one more than the difference. */ In the "blocking style" of programming, this field is one more than the difference. */
reference_count ref_count; __TBB_atomic reference_count ref_count;
//! Obsolete. Used to be scheduling depth before TBB 2.2 //! Obsolete. Used to be scheduling depth before TBB 2.2
/** Retained only for the sake of backward binary compatibility. ** /** Retained only for the sake of backward binary compatibility.
/ Not used by TBB anymore. **/
int depth; int depth;
//! A task::state_type, stored as a byte for compactness. //! A task::state_type, stored as a byte for compactness.
/** This state is exposed to users via method task::state(). */ /** This state is exposed to users via method task::state(). */
unsigned char state; unsigned char state;
//! Miscellaneous state that is not directly visible to users, stor ed as a byte for compactness. //! Miscellaneous state that is not directly visible to users, stor ed as a byte for compactness.
/** 0x0 -> version 1.0 task /** 0x0 -> version 1.0 task
0x1 -> version >=2.1 task 0x1 -> version >=2.1 task
0x20 -> task_proxy 0x20 -> task_proxy
skipping to change at line 246 skipping to change at line 266
//! The task corresponding to this task_prefix. //! The task corresponding to this task_prefix.
tbb::task& task() {return *reinterpret_cast<tbb::task*>(this+1);} tbb::task& task() {return *reinterpret_cast<tbb::task*>(this+1);}
}; };
} // namespace internal } // namespace internal
//! @endcond //! @endcond
#if __TBB_TASK_GROUP_CONTEXT #if __TBB_TASK_GROUP_CONTEXT
#if __TBB_TASK_PRIORITY
namespace internal {
static const int priority_stride_v4 = INT_MAX / 4;
}
enum priority_t {
priority_normal = internal::priority_stride_v4 * 2,
priority_low = priority_normal - internal::priority_stride_v4,
priority_high = priority_normal + internal::priority_stride_v4
};
#endif /* __TBB_TASK_PRIORITY */
#if TBB_USE_CAPTURED_EXCEPTION #if TBB_USE_CAPTURED_EXCEPTION
class tbb_exception; class tbb_exception;
#else #else
namespace internal { namespace internal {
class tbb_exception_ptr; class tbb_exception_ptr;
} }
#endif /* !TBB_USE_CAPTURED_EXCEPTION */ #endif /* !TBB_USE_CAPTURED_EXCEPTION */
class task_scheduler_init;
//! Used to form groups of tasks //! Used to form groups of tasks
/** @ingroup task_scheduling /** @ingroup task_scheduling
The context services explicit cancellation requests from user code, and unhandled The context services explicit cancellation requests from user code, and unhandled
exceptions intercepted during tasks execution. Intercepting an exceptio n results exceptions intercepted during tasks execution. Intercepting an exceptio n results
in generating internal cancellation requests (which is processed in exa ctly the in generating internal cancellation requests (which is processed in exa ctly the
same way as external ones). same way as external ones).
The context is associated with one or more root tasks and defines the c ancellation The context is associated with one or more root tasks and defines the c ancellation
group that includes all the descendants of the corresponding root task( s). Association group that includes all the descendants of the corresponding root task( s). Association
is established when a context object is passed as an argument to the ta sk::allocate_root() is established when a context object is passed as an argument to the ta sk::allocate_root()
skipping to change at line 277 skipping to change at line 312
forming a tree-like structure: parent -> this -> children. Arrows here designate forming a tree-like structure: parent -> this -> children. Arrows here designate
cancellation propagation direction. If a task in a cancellation group i s canceled cancellation propagation direction. If a task in a cancellation group i s canceled
all the other tasks in this group and groups bound to it (as children) get canceled too. all the other tasks in this group and groups bound to it (as children) get canceled too.
IMPLEMENTATION NOTE: IMPLEMENTATION NOTE:
When adding new members to task_group_context or changing types of exis ting ones, When adding new members to task_group_context or changing types of exis ting ones,
update the size of both padding buffers (_leading_padding and _trailing _padding) update the size of both padding buffers (_leading_padding and _trailing _padding)
appropriately. See also VERSIONING NOTE at the constructor definition b elow. **/ appropriately. See also VERSIONING NOTE at the constructor definition b elow. **/
class task_group_context : internal::no_copy { class task_group_context : internal::no_copy {
private: private:
friend class internal::generic_scheduler;
friend class task_scheduler_init;
#if TBB_USE_CAPTURED_EXCEPTION #if TBB_USE_CAPTURED_EXCEPTION
typedef tbb_exception exception_container_type; typedef tbb_exception exception_container_type;
#else #else
typedef internal::tbb_exception_ptr exception_container_type; typedef internal::tbb_exception_ptr exception_container_type;
#endif #endif
enum version_traits_word_layout { enum version_traits_word_layout {
traits_offset = 16, traits_offset = 16,
version_mask = 0xFFFF, version_mask = 0xFFFF,
traits_mask = 0xFFFFul << traits_offset traits_mask = 0xFFFFul << traits_offset
skipping to change at line 306 skipping to change at line 344
exact_exception = 0x0001ul << traits_offset, exact_exception = 0x0001ul << traits_offset,
concurrent_wait = 0x0004ul << traits_offset, concurrent_wait = 0x0004ul << traits_offset,
#if TBB_USE_CAPTURED_EXCEPTION #if TBB_USE_CAPTURED_EXCEPTION
default_traits = 0 default_traits = 0
#else #else
default_traits = exact_exception default_traits = exact_exception
#endif /* !TBB_USE_CAPTURED_EXCEPTION */ #endif /* !TBB_USE_CAPTURED_EXCEPTION */
}; };
private: private:
enum state {
may_have_children = 1
};
union { union {
//! Flavor of this context: bound or isolated. //! Flavor of this context: bound or isolated.
kind_type my_kind; kind_type my_kind;
uintptr_t _my_kind_aligner; uintptr_t _my_kind_aligner;
}; };
//! Pointer to the context of the parent cancellation group. NULL for i solated contexts. //! Pointer to the context of the parent cancellation group. NULL for i solated contexts.
task_group_context *my_parent; task_group_context *my_parent;
//! Used to form the thread specific list of contexts without additiona l memory allocation. //! Used to form the thread specific list of contexts without additiona l memory allocation.
skipping to change at line 327 skipping to change at line 369
its parent happens. Any context can be present in the list of one t hread only. **/ its parent happens. Any context can be present in the list of one t hread only. **/
internal::context_list_node_t my_node; internal::context_list_node_t my_node;
//! Used to set and maintain stack stitching point for Intel Performanc e Tools. //! Used to set and maintain stack stitching point for Intel Performanc e Tools.
__itt_caller itt_caller; __itt_caller itt_caller;
//! Leading padding protecting accesses to frequently used members from false sharing. //! Leading padding protecting accesses to frequently used members from false sharing.
/** Read accesses to the field my_cancellation_requested are on the hot path inside /** Read accesses to the field my_cancellation_requested are on the hot path inside
the scheduler. This padding ensures that this field never shares th e same cache the scheduler. This padding ensures that this field never shares th e same cache
line with a local variable that is frequently written to. **/ line with a local variable that is frequently written to. **/
char _leading_padding[internal::NFS_MaxLineSize - char _leading_padding[internal::NFS_MaxLineSize
2 * sizeof(uintptr_t)- sizeof(void*) - sizeof(internal: - 2 * sizeof(uintptr_t)- sizeof(void*) - sizeof(i
:context_list_node_t) nternal::context_list_node_t)
- sizeof(__itt_caller)]; - sizeof(__itt_caller)];
//! Specifies whether cancellation was request for this task group. //! Specifies whether cancellation was request for this task group.
uintptr_t my_cancellation_requested; uintptr_t my_cancellation_requested;
//! Version for run-time checks and behavioral traits of the context. //! Version for run-time checks and behavioral traits of the context.
/** Version occupies low 16 bits, and traits (zero or more ORed enumera tors /** Version occupies low 16 bits, and traits (zero or more ORed enumera tors
from the traits_type enumerations) take the next 16 bits. from the traits_type enumerations) take the next 16 bits.
Original (zeroth) version of the context did not support any traits . **/ Original (zeroth) version of the context did not support any traits . **/
uintptr_t my_version_and_traits; uintptr_t my_version_and_traits;
//! Pointer to the container storing exception being propagated across this task group. //! Pointer to the container storing exception being propagated across this task group.
exception_container_type *my_exception; exception_container_type *my_exception;
//! Scheduler that registered this context in its thread specific list. //! Scheduler instance that registered this context in its thread speci
/** This field is not terribly necessary, but it allows to get a small fic list.
performance internal::generic_scheduler *my_owner;
benefit by getting us rid of using thread local storage. We do not
care //! Internal state (combination of state flags).
about extra memory it takes since this data structure is excessivel uintptr_t my_state;
y padded anyway. **/
void *my_owner; #if __TBB_TASK_PRIORITY
//! Priority level of the task group (in normalized representation)
intptr_t my_priority;
#endif /* __TBB_TASK_PRIORITY */
//! Trailing padding protecting accesses to frequently used members fro m false sharing //! Trailing padding protecting accesses to frequently used members fro m false sharing
/** \sa _leading_padding **/ /** \sa _leading_padding **/
char _trailing_padding[internal::NFS_MaxLineSize - sizeof(intptr_t) - 2 char _trailing_padding[internal::NFS_MaxLineSize - 2 * sizeof(uintptr_t
* sizeof(void*)]; ) - 2 * sizeof(void*)
#if __TBB_TASK_PRIORITY
- sizeof(intptr_t)
#endif /* __TBB_TASK_PRIORITY */
];
public: public:
//! Default & binding constructor. //! Default & binding constructor.
/** By default a bound context is created. That is this context will be bound /** By default a bound context is created. That is this context will be bound
(as child) to the context of the task calling task::allocate_root(t his_context) (as child) to the context of the task calling task::allocate_root(t his_context)
method. Cancellation requests passed to the parent context are prop agated method. Cancellation requests passed to the parent context are prop agated
to all the contexts bound to it. to all the contexts bound to it. Similarly priority change is propa
gated
from the parent context to its children.
If task_group_context::isolated is used as the argument, then the t asks associated If task_group_context::isolated is used as the argument, then the t asks associated
with this context will never be affected by events in any other con text. with this context will never be affected by events in any other con text.
Creating isolated contexts involve much less overhead, but they hav e limited Creating isolated contexts involve much less overhead, but they hav e limited
utility. Normally when an exception occurs in an algorithm that has nested utility. Normally when an exception occurs in an algorithm that has nested
ones running, it is desirably to have all the nested algorithms can celed ones running, it is desirably to have all the nested algorithms can celed
as well. Such a behavior requires nested algorithms to use bound co ntexts. as well. Such a behavior requires nested algorithms to use bound co ntexts.
There is one good place where using isolated algorithms is benefici al. It is There is one good place where using isolated algorithms is benefici al. It is
a master thread. That is if a particular algorithm is invoked direc tly from a master thread. That is if a particular algorithm is invoked direc tly from
the master thread (not from a TBB task), supplying it with explicit ly the master thread (not from a TBB task), supplying it with explicit ly
created isolated context will result in a faster algorithm startup. created isolated context will result in a faster algorithm startup.
VERSIONING NOTE: VERSIONING NOTE:
Implementation(s) of task_group_context constructor(s) cannot be ma de Implementation(s) of task_group_context constructor(s) cannot be ma de
entirely out-of-line because the run-time version must be set by th e user entirely out-of-line because the run-time version must be set by th e user
code. This will become critically important for binary compatibilit y, if code. This will become critically important for binary compatibilit y, if
we ever have to change the size of the context object. we ever have to change the size of the context object.
Boosting the runtime version will also be necessary whenever new fi Boosting the runtime version will also be necessary if new data fie
elds lds are
are introduced in the currently unused padding areas or the meaning introduced in the currently unused padding areas and these fields a
of re updated
the existing fields is changed or extended. **/ by inline methods. **/
task_group_context ( kind_type relation_with_parent = bound, task_group_context ( kind_type relation_with_parent = bound,
uintptr_t traits = default_traits ) uintptr_t traits = default_traits )
: my_kind(relation_with_parent) : my_kind(relation_with_parent)
, my_version_and_traits(1 | traits) , my_version_and_traits(1 | traits)
{ {
init(); init();
} }
__TBB_EXPORTED_METHOD ~task_group_context (); __TBB_EXPORTED_METHOD ~task_group_context ();
skipping to change at line 424 skipping to change at line 476
//! Records the pending exception, and cancels the task group. //! Records the pending exception, and cancels the task group.
/** May be called only from inside a catch-block. If the context is alr eady /** May be called only from inside a catch-block. If the context is alr eady
canceled, does nothing. canceled, does nothing.
The method brings the task group associated with this context exact ly into The method brings the task group associated with this context exact ly into
the state it would be in, if one of its tasks threw the currently p ending the state it would be in, if one of its tasks threw the currently p ending
exception during its execution. In other words, it emulates the act ions exception during its execution. In other words, it emulates the act ions
of the scheduler's dispatch loop exception handler. **/ of the scheduler's dispatch loop exception handler. **/
void __TBB_EXPORTED_METHOD register_pending_exception (); void __TBB_EXPORTED_METHOD register_pending_exception ();
#if __TBB_TASK_PRIORITY
//! Changes priority of the task grop
void set_priority ( priority_t );
//! Retrieves current priority of the current task group
priority_t priority () const;
#endif /* __TBB_TASK_PRIORITY */
protected: protected:
//! Out-of-line part of the constructor. //! Out-of-line part of the constructor.
/** Singled out to ensure backward binary compatibility of the future v ersions. **/ /** Singled out to ensure backward binary compatibility of the future v ersions. **/
void __TBB_EXPORTED_METHOD init (); void __TBB_EXPORTED_METHOD init ();
private: private:
friend class task; friend class task;
friend class internal::allocate_root_with_context_proxy; friend class internal::allocate_root_with_context_proxy;
static const kind_type binding_required = bound; static const kind_type binding_required = bound;
static const kind_type binding_completed = kind_type(bound+1); static const kind_type binding_completed = kind_type(bound+1);
static const kind_type detached = kind_type(binding_completed+1); static const kind_type detached = kind_type(binding_completed+1);
static const kind_type dying = kind_type(detached+1); static const kind_type dying = kind_type(detached+1);
//! Checks if any of the ancestors has a cancellation request outstandi //! Propagates state change (if any) from an ancestor
ng, /** Checks if one of this object's ancestors is in a new state, and pro
//! and propagates it back to descendants. pagates
void propagate_cancellation_from_ancestors (); the new state to all its descendants in this object's heritage line
. **/
//! For debugging purposes only. template <typename T>
bool is_alive () { void propagate_state_from_ancestors ( T task_group_context::*mptr_state
#if TBB_USE_DEBUG , T new_state );
return my_version_and_traits != 0xDeadBeef;
#else //! Makes sure that the context is registered with a scheduler instance
return true; .
#endif /* TBB_USE_DEBUG */ inline void finish_initialization ( internal::generic_scheduler *local_
} sched );
//! Registers this context with the local scheduler and binds it to its
parent context
void bind_to ( internal::generic_scheduler *local_sched );
//! Registers this context with the local scheduler
void register_with ( internal::generic_scheduler *local_sched );
}; // class task_group_context }; // class task_group_context
#endif /* __TBB_TASK_GROUP_CONTEXT */ #endif /* __TBB_TASK_GROUP_CONTEXT */
//! Base class for user-defined tasks. //! Base class for user-defined tasks.
/** @ingroup task_scheduling */ /** @ingroup task_scheduling */
class task: __TBB_TASK_BASE_ACCESS interface5::internal::task_base { class task: __TBB_TASK_BASE_ACCESS interface5::internal::task_base {
//! Set reference count //! Set reference count
void __TBB_EXPORTED_METHOD internal_set_ref_count( int count ); void __TBB_EXPORTED_METHOD internal_set_ref_count( int count );
skipping to change at line 551 skipping to change at line 614
cannot be made, use method recycle_as_safe_continuation instead. cannot be made, use method recycle_as_safe_continuation instead.
Because of the hazard, this method may be deprecated in the future. */ Because of the hazard, this method may be deprecated in the future. */
void recycle_as_continuation() { void recycle_as_continuation() {
__TBB_ASSERT( prefix().state==executing, "execute not running?" ); __TBB_ASSERT( prefix().state==executing, "execute not running?" );
prefix().state = allocated; prefix().state = allocated;
} }
//! Recommended to use, safe variant of recycle_as_continuation //! Recommended to use, safe variant of recycle_as_continuation
/** For safety, it requires additional increment of ref_count. /** For safety, it requires additional increment of ref_count.
With no decendants and ref_count of 1, it has the semantics of recy cle_to_reexecute. */ With no descendants and ref_count of 1, it has the semantics of rec ycle_to_reexecute. */
void recycle_as_safe_continuation() { void recycle_as_safe_continuation() {
__TBB_ASSERT( prefix().state==executing, "execute not running?" ); __TBB_ASSERT( prefix().state==executing, "execute not running?" );
prefix().state = recycle; prefix().state = recycle;
} }
//! Change this to be a child of new_parent. //! Change this to be a child of new_parent.
void recycle_as_child_of( task& new_parent ) { void recycle_as_child_of( task& new_parent ) {
internal::task_prefix& p = prefix(); internal::task_prefix& p = prefix();
__TBB_ASSERT( prefix().state==executing||prefix().state==allocated, "execute not running, or already recycled" ); __TBB_ASSERT( prefix().state==executing||prefix().state==allocated, "execute not running, or already recycled" );
__TBB_ASSERT( prefix().ref_count==0, "no child tasks allowed when r ecycled as a child" ); __TBB_ASSERT( prefix().ref_count==0, "no child tasks allowed when r ecycled as a child" );
skipping to change at line 599 skipping to change at line 662
//! Set reference count //! Set reference count
void set_ref_count( int count ) { void set_ref_count( int count ) {
#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT #if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
internal_set_ref_count(count); internal_set_ref_count(count);
#else #else
prefix().ref_count = count; prefix().ref_count = count;
#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */ #endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */
} }
//! Atomically increment reference count. //! Atomically increment reference count and returns its old value.
/** Has acquire semantics */ /** Has acquire semantics */
void increment_ref_count() { void increment_ref_count() {
__TBB_FetchAndIncrementWacquire( &prefix().ref_count ); __TBB_FetchAndIncrementWacquire( &prefix().ref_count );
} }
//! Atomically decrement reference count. //! Atomically decrement reference count and returns its new value.
/** Has release semantics. */ /** Has release semantics. */
int decrement_ref_count() { int decrement_ref_count() {
#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT #if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
return int(internal_decrement_ref_count()); return int(internal_decrement_ref_count());
#else #else
return int(__TBB_FetchAndDecrementWrelease( &prefix().ref_count ))- 1; return int(__TBB_FetchAndDecrementWrelease( &prefix().ref_count ))- 1;
#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */ #endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */
} }
//! Define recommended static forms via import from base class. //! Define recommended static forms via import from base class.
skipping to change at line 642 skipping to change at line 705
/** If there are more tasks than worker threads, the tasks are spawned in /** If there are more tasks than worker threads, the tasks are spawned in
order of front to back. */ order of front to back. */
static void spawn_root_and_wait( task_list& root_list ); static void spawn_root_and_wait( task_list& root_list );
//! Wait for reference count to become one, and set reference count to zero. //! Wait for reference count to become one, and set reference count to zero.
/** Works on tasks while waiting. */ /** Works on tasks while waiting. */
void wait_for_all() { void wait_for_all() {
prefix().owner->wait_for_all( *this, NULL ); prefix().owner->wait_for_all( *this, NULL );
} }
#if __TBB_ARENA_PER_MASTER
//! Enqueue task for starvation-resistant execution. //! Enqueue task for starvation-resistant execution.
#if __TBB_TASK_PRIORITY
/** The task will be enqueued on the normal priority level disregarding
the
priority of its task group.
The rationale of such semantics is that priority of an enqueued tas
k is
statically fixed at the moment of its enqueuing, while task group p
riority
is dynamic. Thus automatic priority inheritance would be generally
a subject
to the race, which may result in unexpected behavior.
Use enqueue() overload with explicit priority value and task::group
_priority()
method to implement such priority inheritance when it is really nec
essary. **/
#endif /* __TBB_TASK_PRIORITY */
static void enqueue( task& t ) { static void enqueue( task& t ) {
t.prefix().owner->enqueue( t, NULL ); t.prefix().owner->enqueue( t, NULL );
} }
#endif /* __TBB_ARENA_PER_MASTER */ #if __TBB_TASK_PRIORITY
//! Enqueue task for starvation-resistant execution on the specified pr
iority level.
static void enqueue( task& t, priority_t p ) {
__TBB_ASSERT( p == priority_low || p == priority_normal || p == pri
ority_high, "Invalid priority level value" );
t.prefix().owner->enqueue( t, (void*)p );
}
#endif /* __TBB_TASK_PRIORITY */
//! The innermost task being executed or destroyed by the current threa d at the moment. //! The innermost task being executed or destroyed by the current threa d at the moment.
static task& __TBB_EXPORTED_FUNC self(); static task& __TBB_EXPORTED_FUNC self();
//! task on whose behalf this task is working, or NULL if this is a roo t. //! task on whose behalf this task is working, or NULL if this is a roo t.
task* parent() const {return prefix().parent;} task* parent() const {return prefix().parent;}
//! sets parent task pointer to specified value
void set_parent(task* p) {
#if __TBB_TASK_GROUP_CONTEXT
__TBB_ASSERT(prefix().context == p->prefix().context, "The tasks mu
st be in the same context");
#endif
prefix().parent = p;
}
#if __TBB_TASK_GROUP_CONTEXT #if __TBB_TASK_GROUP_CONTEXT
//! Shared context that is used to communicate asynchronous state chang //! This method is deprecated and will be removed in the future.
es /** Use method group() instead. **/
task_group_context* context() {return prefix().context;} task_group_context* context() {return prefix().context;}
//! Pointer to the task group descriptor.
task_group_context* group () { return prefix().context; }
#endif /* __TBB_TASK_GROUP_CONTEXT */ #endif /* __TBB_TASK_GROUP_CONTEXT */
//! True if task was stolen from the task pool of another thread. //! True if task was stolen from the task pool of another thread.
bool is_stolen_task() const { bool is_stolen_task() const {
return (prefix().extra_state & 0x80)!=0; return (prefix().extra_state & 0x80)!=0;
} }
//--------------------------------------------------------------------- --- //--------------------------------------------------------------------- ---
// Debugging // Debugging
//--------------------------------------------------------------------- --- //--------------------------------------------------------------------- ---
skipping to change at line 706 skipping to change at line 799
affinity_id affinity() const {return prefix().affinity;} affinity_id affinity() const {return prefix().affinity;}
//! Invoked by scheduler to notify task that it ran on unexpected threa d. //! Invoked by scheduler to notify task that it ran on unexpected threa d.
/** Invoked before method execute() runs, if task is stolen, or task ha s /** Invoked before method execute() runs, if task is stolen, or task ha s
affinity but will be executed on another thread. affinity but will be executed on another thread.
The default action does nothing. */ The default action does nothing. */
virtual void __TBB_EXPORTED_METHOD note_affinity( affinity_id id ); virtual void __TBB_EXPORTED_METHOD note_affinity( affinity_id id );
#if __TBB_TASK_GROUP_CONTEXT #if __TBB_TASK_GROUP_CONTEXT
//! Moves this task from its current group into another one.
/** Argument ctx specifies the new group.
The primary purpose of this method is to associate unique task grou
p context
with a task allocated for subsequent enqueuing. In contrast to spaw
ned tasks
enqueued ones normally outlive the scope where they were created. T
his makes
traditional usage model where task group context are allocated loca
lly on
the stack inapplicable. Dynamic allocation of context objects is pe
rformance
inefficient. Method change_group() allows to make task group contex
t object
a member of the task class, and then associate it with its containi
ng task
object in the latter's constructor. **/
void __TBB_EXPORTED_METHOD change_group ( task_group_context& ctx );
//! Initiates cancellation of all tasks in this cancellation group and its subordinate groups. //! Initiates cancellation of all tasks in this cancellation group and its subordinate groups.
/** \return false if cancellation has already been requested, true othe rwise. **/ /** \return false if cancellation has already been requested, true othe rwise. **/
bool cancel_group_execution () { return prefix().context->cancel_group_ execution(); } bool cancel_group_execution () { return prefix().context->cancel_group_ execution(); }
//! Returns true if the context received cancellation request. //! Returns true if the context has received cancellation request.
bool is_cancelled () const { return prefix().context->is_group_executio n_cancelled(); } bool is_cancelled () const { return prefix().context->is_group_executio n_cancelled(); }
#endif /* __TBB_TASK_GROUP_CONTEXT */ #endif /* __TBB_TASK_GROUP_CONTEXT */
#if __TBB_TASK_PRIORITY
//! Changes priority of the task group this task belongs to.
void set_group_priority ( priority_t p ) { prefix().context->set_prior
ity(p); }
//! Retrieves current priority of the task group this task belongs to.
priority_t group_priority () const { return prefix().context->priority(
); }
#endif /* __TBB_TASK_PRIORITY */
private: private:
friend class interface5::internal::task_base; friend class interface5::internal::task_base;
friend class task_list; friend class task_list;
friend class internal::scheduler; friend class internal::scheduler;
friend class internal::allocate_root_proxy; friend class internal::allocate_root_proxy;
#if __TBB_TASK_GROUP_CONTEXT #if __TBB_TASK_GROUP_CONTEXT
friend class internal::allocate_root_with_context_proxy; friend class internal::allocate_root_with_context_proxy;
#endif /* __TBB_TASK_GROUP_CONTEXT */ #endif /* __TBB_TASK_GROUP_CONTEXT */
friend class internal::allocate_continuation_proxy; friend class internal::allocate_continuation_proxy;
friend class internal::allocate_child_proxy; friend class internal::allocate_child_proxy;
 End of changes. 33 change blocks. 
47 lines changed or deleted 185 lines changed or added


 task_group.h   task_group.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 35 skipping to change at line 35
invalidate any other reasons why the executable file might be covered b y invalidate any other reasons why the executable file might be covered b y
the GNU General Public License. the GNU General Public License.
*/ */
#ifndef __TBB_task_group_H #ifndef __TBB_task_group_H
#define __TBB_task_group_H #define __TBB_task_group_H
#include "task.h" #include "task.h"
#include "tbb_exception.h" #include "tbb_exception.h"
#if __TBB_TASK_GROUP_CONTEXT
namespace tbb { namespace tbb {
namespace internal { namespace internal {
template<typename F> class task_handle_task; template<typename F> class task_handle_task;
} }
template<typename F> template<typename F>
class task_handle : internal::no_assign { class task_handle : internal::no_assign {
template<typename _F> friend class internal::task_handle_task; template<typename _F> friend class internal::task_handle_task;
skipping to change at line 126 skipping to change at line 128
} }
public: public:
task_group_base( uintptr_t traits = 0 ) task_group_base( uintptr_t traits = 0 )
: my_context(task_group_context::bound, task_group_context::default _traits | traits) : my_context(task_group_context::bound, task_group_context::default _traits | traits)
{ {
my_root = new( task::allocate_root(my_context) ) empty_task; my_root = new( task::allocate_root(my_context) ) empty_task;
my_root->set_ref_count(1); my_root->set_ref_count(1);
} }
~task_group_base() {
if( my_root->ref_count() > 1 ) {
bool stack_unwinding_in_progress = std::uncaught_exception();
// Always attempt to do proper cleanup to avoid inevitable memo
ry corruption
// in case of missing wait (for the sake of better testability
& debuggability)
if ( !is_canceling() )
cancel();
__TBB_TRY {
my_root->wait_for_all();
} __TBB_CATCH (...) {
task::destroy(*my_root);
__TBB_RETHROW();
}
task::destroy(*my_root);
if ( !stack_unwinding_in_progress )
internal::throw_exception( internal::eid_missing_wait );
}
else {
task::destroy(*my_root);
}
}
template<typename F> template<typename F>
void run( task_handle<F>& h ) { void run( task_handle<F>& h ) {
internal_run< task_handle<F>, internal::task_handle_task<F> >( h ); internal_run< task_handle<F>, internal::task_handle_task<F> >( h );
} }
task_group_status wait() { task_group_status wait() {
__TBB_TRY { __TBB_TRY {
my_root->wait_for_all(); my_root->wait_for_all();
} __TBB_CATCH( ... ) { } __TBB_CATCH( ... ) {
my_context.reset(); my_context.reset();
skipping to change at line 160 skipping to change at line 184
my_context.cancel_group_execution(); my_context.cancel_group_execution();
} }
}; // class task_group_base }; // class task_group_base
} // namespace internal } // namespace internal
class task_group : public internal::task_group_base { class task_group : public internal::task_group_base {
public: public:
task_group () : task_group_base( task_group_context::concurrent_wait ) {} task_group () : task_group_base( task_group_context::concurrent_wait ) {}
#if TBB_DEPRECATED
~task_group() __TBB_TRY { ~task_group() __TBB_TRY {
__TBB_ASSERT( my_root->ref_count() != 0, NULL ); __TBB_ASSERT( my_root->ref_count() != 0, NULL );
if( my_root->ref_count() > 1 ) if( my_root->ref_count() > 1 )
my_root->wait_for_all(); my_root->wait_for_all();
owner().destroy(*my_root);
} }
#if TBB_USE_EXCEPTIONS #if TBB_USE_EXCEPTIONS
catch (...) { catch (...) {
owner().destroy(*my_root); // Have to destroy my_root here as the base class destructor won't
be called
task::destroy(*my_root);
throw; throw;
} }
#endif /* TBB_USE_EXCEPTIONS */ #endif /* TBB_USE_EXCEPTIONS */
#endif /* TBB_DEPRECATED */
#if __SUNPRO_CC #if __SUNPRO_CC
template<typename F> template<typename F>
void run( task_handle<F>& h ) { void run( task_handle<F>& h ) {
internal_run< task_handle<F>, internal::task_handle_task<F> >( h ); internal_run< task_handle<F>, internal::task_handle_task<F> >( h );
} }
#else #else
using task_group_base::run; using task_group_base::run;
#endif #endif
skipping to change at line 200 skipping to change at line 226
} }
template<typename F> template<typename F>
task_group_status run_and_wait( task_handle<F>& h ) { task_group_status run_and_wait( task_handle<F>& h ) {
return internal_run_and_wait< task_handle<F> >( h ); return internal_run_and_wait< task_handle<F> >( h );
} }
}; // class task_group }; // class task_group
class structured_task_group : public internal::task_group_base { class structured_task_group : public internal::task_group_base {
public: public:
~structured_task_group() {
if( my_root->ref_count() > 1 ) {
bool stack_unwinding_in_progress = std::uncaught_exception();
// Always attempt to do proper cleanup to avoid inevitable memo
ry corruption
// in case of missing wait (for the sake of better testability
& debuggability)
if ( !is_canceling() )
cancel();
my_root->wait_for_all();
owner().destroy(*my_root);
if ( !stack_unwinding_in_progress )
internal::throw_exception( internal::eid_missing_wait );
}
else {
if( my_root->ref_count() == 1 )
my_root->set_ref_count(0);
owner().destroy(*my_root);
}
}
template<typename F> template<typename F>
task_group_status run_and_wait ( task_handle<F>& h ) { task_group_status run_and_wait ( task_handle<F>& h ) {
return internal_run_and_wait< task_handle<F> >( h ); return internal_run_and_wait< task_handle<F> >( h );
} }
task_group_status wait() { task_group_status wait() {
task_group_status res = task_group_base::wait(); task_group_status res = task_group_base::wait();
my_root->set_ref_count(1); my_root->set_ref_count(1);
return res; return res;
} }
skipping to change at line 243 skipping to change at line 250
return task::self().is_cancelled(); return task::self().is_cancelled();
} }
template<class F> template<class F>
task_handle<F> make_task( const F& f ) { task_handle<F> make_task( const F& f ) {
return task_handle<F>( f ); return task_handle<F>( f );
} }
} // namespace tbb } // namespace tbb
#endif /* __TBB_TASK_GROUP_CONTEXT */
#endif /* __TBB_task_group_H */ #endif /* __TBB_task_group_H */
 End of changes. 9 change blocks. 
24 lines changed or deleted 34 lines changed or added


 task_scheduler_init.h   task_scheduler_init.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 33 skipping to change at line 33
file does not by itself cause the resulting executable to be covered by file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered b y invalidate any other reasons why the executable file might be covered b y
the GNU General Public License. the GNU General Public License.
*/ */
#ifndef __TBB_task_scheduler_init_H #ifndef __TBB_task_scheduler_init_H
#define __TBB_task_scheduler_init_H #define __TBB_task_scheduler_init_H
#include "tbb_stddef.h" #include "tbb_stddef.h"
#include "limits.h"
namespace tbb { namespace tbb {
typedef std::size_t stack_size_type; typedef std::size_t stack_size_type;
//! @cond INTERNAL //! @cond INTERNAL
namespace internal { namespace internal {
//! Internal to library. Should not be used by clients. //! Internal to library. Should not be used by clients.
/** @ingroup task_scheduling */ /** @ingroup task_scheduling */
class scheduler; class scheduler;
} // namespace internal } // namespace internal
//! @endcond //! @endcond
//! Class representing reference to tbb scheduler. //! Class delimiting the scope of task scheduler activity.
/** A thread must construct a task_scheduler_init, and keep it alive, /** A thread can construct a task_scheduler_init object and keep it alive
during the time that it uses the services of class task. while it uses TBB's tasking subsystem (including parallel algorithms).
This class allows to customize properties of the TBB task pool to some
extent.
For example it can limit concurrency level of parallel work initiated b
y the
given thread. It also can be used to specify stack size of the TBB work
er threads,
though this setting is not effective if the thread pool has already bee
n created.
If a parallel construct is used without task_scheduler_init object prev
iously
created, the scheduler will be initialized automatically with default s
ettings,
and will persist until this thread exits. Default concurrency level is
defined
as described in task_scheduler_init::initialize().
@ingroup task_scheduling */ @ingroup task_scheduling */
class task_scheduler_init: internal::no_copy { class task_scheduler_init: internal::no_copy {
enum ExceptionPropagationMode {
propagation_mode_exact = 1u,
propagation_mode_captured = 2u,
propagation_mode_mask = propagation_mode_exact | propagation_mode_c
aptured
};
/** NULL if not currently initialized. */ /** NULL if not currently initialized. */
internal::scheduler* my_scheduler; internal::scheduler* my_scheduler;
public: public:
//! Typedef for number of threads that is automatic. //! Typedef for number of threads that is automatic.
static const int automatic = -1; static const int automatic = -1;
//! Argument to initialize() or constructor that causes initialization to be deferred. //! Argument to initialize() or constructor that causes initialization to be deferred.
static const int deferred = -2; static const int deferred = -2;
//! Ensure that scheduler exists for this thread //! Ensure that scheduler exists for this thread
/** A value of -1 lets tbb decide on the number of threads, which is ty /** A value of -1 lets TBB decide on the number of threads, which is us
pically ually
the number of hardware threads. For production code, the default va maximal hardware concurrency for this process, that is the number o
lue of -1 f logical
should be used, particularly if the client code is mixed with third CPUs on the machine (possibly limited by the processor affinity mas
party clients k of this
that might also use tbb. process (Windows) or of this thread (Linux, FreeBSD). It is prefera
ble option
for production code because it helps to avoid nasty surprises when
several
TBB based components run side-by-side or in a nested fashion inside
the same
process.
The number_of_threads is ignored if any other task_scheduler_inits The number_of_threads is ignored if any other task_scheduler_inits
currently exist. A thread may construct multiple task_scheduler_in its. currently exist. A thread may construct multiple task_scheduler_in its.
Doing so does no harm because the underlying scheduler is reference counted. */ Doing so does no harm because the underlying scheduler is reference counted. */
void __TBB_EXPORTED_METHOD initialize( int number_of_threads=automatic ); void __TBB_EXPORTED_METHOD initialize( int number_of_threads=automatic );
//! The overloaded method with stack size parameter //! The overloaded method with stack size parameter
/** Overloading is necessary to preserve ABI compatibility */ /** Overloading is necessary to preserve ABI compatibility */
void __TBB_EXPORTED_METHOD initialize( int number_of_threads, stack_siz e_type thread_stack_size ); void __TBB_EXPORTED_METHOD initialize( int number_of_threads, stack_siz e_type thread_stack_size );
//! Inverse of method initialize. //! Inverse of method initialize.
void __TBB_EXPORTED_METHOD terminate(); void __TBB_EXPORTED_METHOD terminate();
//! Shorthand for default constructor followed by call to intialize(num ber_of_threads). //! Shorthand for default constructor followed by call to initialize(nu mber_of_threads).
task_scheduler_init( int number_of_threads=automatic, stack_size_type t hread_stack_size=0 ) : my_scheduler(NULL) { task_scheduler_init( int number_of_threads=automatic, stack_size_type t hread_stack_size=0 ) : my_scheduler(NULL) {
// Two lowest order bits of the stack size argument may be taken to
communicate
// default exception propagation mode of the client to be used when
the
// client manually creates tasks in the master thread and does not
use
// explicit task group context object. This is necessary because ne
wer
// TBB binaries with exact propagation enabled by default may be us
ed
// by older clients that expect tbb::captured_exception wrapper.
// All zeros mean old client - no preference.
__TBB_ASSERT( !(thread_stack_size & propagation_mode_mask), "Reques
ted stack size is not aligned" );
#if TBB_USE_EXCEPTIONS
thread_stack_size |= TBB_USE_CAPTURED_EXCEPTION ? propagation_mode_
captured : propagation_mode_exact;
#endif /* TBB_USE_EXCEPTIONS */
initialize( number_of_threads, thread_stack_size ); initialize( number_of_threads, thread_stack_size );
} }
//! Destroy scheduler for this thread if thread has no other live task_ scheduler_inits. //! Destroy scheduler for this thread if thread has no other live task_ scheduler_inits.
~task_scheduler_init() { ~task_scheduler_init() {
if( my_scheduler ) if( my_scheduler )
terminate(); terminate();
internal::poison_pointer( my_scheduler ); internal::poison_pointer( my_scheduler );
} }
//! Returns the number of threads tbb scheduler would create if initial ized by default. //! Returns the number of threads TBB scheduler would create if initial ized by default.
/** Result returned by this method does not depend on whether the sched uler /** Result returned by this method does not depend on whether the sched uler
has already been initialized. has already been initialized.
Because tbb 2.0 does not support blocking tasks yet, you may use th is method Because tbb 2.0 does not support blocking tasks yet, you may use th is method
to boost the number of threads in the tbb's internal pool, if your tasks are to boost the number of threads in the tbb's internal pool, if your tasks are
doing I/O operations. The optimal number of additional threads depe nds on how doing I/O operations. The optimal number of additional threads depe nds on how
much time your tasks spend in the blocked state. */ much time your tasks spend in the blocked state.
Before TBB 3.0 U4 this method returned the number of logical CPU in
the
system. Currently on Windows, Linux and FreeBSD it returns the numb
er of
logical CPUs available to the current process in accordance with it
s affinity
mask.
NOTE: The return value of this method never changes after its first
invocation.
This means that changes in the process affinity mask that took plac
e after
this method was first invoked will not affect the number of worker
threads
in the TBB worker threads pool. */
static int __TBB_EXPORTED_FUNC default_num_threads (); static int __TBB_EXPORTED_FUNC default_num_threads ();
//! Returns true if scheduler is active (initialized); false otherwise //! Returns true if scheduler is active (initialized); false otherwise
bool is_active() const { return my_scheduler != NULL; } bool is_active() const { return my_scheduler != NULL; }
}; };
} // namespace tbb } // namespace tbb
#endif /* __TBB_task_scheduler_init_H */ #endif /* __TBB_task_scheduler_init_H */
 End of changes. 9 change blocks. 
14 lines changed or deleted 79 lines changed or added


 task_scheduler_observer.h   task_scheduler_observer.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 End of changes. 1 change blocks. 
1 lines changed or deleted 1 lines changed or added


 tbb.h   tbb.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 68 skipping to change at line 68
#include "parallel_for_each.h" #include "parallel_for_each.h"
#include "parallel_invoke.h" #include "parallel_invoke.h"
#include "parallel_reduce.h" #include "parallel_reduce.h"
#include "parallel_scan.h" #include "parallel_scan.h"
#include "parallel_sort.h" #include "parallel_sort.h"
#include "partitioner.h" #include "partitioner.h"
#include "pipeline.h" #include "pipeline.h"
#include "queuing_mutex.h" #include "queuing_mutex.h"
#include "queuing_rw_mutex.h" #include "queuing_rw_mutex.h"
#include "reader_writer_lock.h" #include "reader_writer_lock.h"
#if TBB_PREVIEW_CONCURRENT_PRIORITY_QUEUE
#include "concurrent_priority_queue.h"
#endif
#include "recursive_mutex.h" #include "recursive_mutex.h"
#include "spin_mutex.h" #include "spin_mutex.h"
#include "spin_rw_mutex.h" #include "spin_rw_mutex.h"
#include "task.h" #include "task.h"
#include "task_group.h" #include "task_group.h"
#include "task_scheduler_init.h" #include "task_scheduler_init.h"
#include "task_scheduler_observer.h" #include "task_scheduler_observer.h"
#include "tbb_allocator.h" #include "tbb_allocator.h"
#include "tbb_exception.h" #include "tbb_exception.h"
#include "tbb_thread.h" #include "tbb_thread.h"
 End of changes. 2 change blocks. 
1 lines changed or deleted 4 lines changed or added


 tbb_allocator.h   tbb_allocator.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 End of changes. 1 change blocks. 
1 lines changed or deleted 1 lines changed or added


 tbb_config.h   tbb_config.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 34 skipping to change at line 34
the GNU General Public License. This exception does not however the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered b y invalidate any other reasons why the executable file might be covered b y
the GNU General Public License. the GNU General Public License.
*/ */
#ifndef __TBB_tbb_config_H #ifndef __TBB_tbb_config_H
#define __TBB_tbb_config_H #define __TBB_tbb_config_H
/** This header is supposed to contain macro definitions and C style commen ts only. /** This header is supposed to contain macro definitions and C style commen ts only.
The macros defined here are intended to control such aspects of TBB bui ld as The macros defined here are intended to control such aspects of TBB bui ld as
- presence of compiler features
- compilation modes - compilation modes
- feature sets - feature sets
- workarounds presence - known compiler/platform issues
**/ **/
/** Compilation modes **/ #define __TBB_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC
_PATCHLEVEL__)
/** Presence of compiler features **/
#if (__TBB_GCC_VERSION >= 40400) && !defined(__INTEL_COMPILER)
/** warning suppression pragmas available in GCC since 4.4 **/
#define __TBB_GCC_WARNING_SUPPRESSION_PRESENT 1
#endif
/* TODO: The following condition should be extended when new compilers/runt
imes
with std::exception_ptr support appear. */
#define __TBB_EXCEPTION_PTR_PRESENT ((_MSC_VER >= 1600 || (__GXX_EXPERIMEN
TAL_CXX0X__ && __GNUC__==4 && __GNUC_MINOR__>=4)) && !__INTEL_COMPILER)
#if __GNUC__ || __SUNPRO_CC || __IBMCPP__
/* ICC defines __GNUC__ and so is covered */
#define __TBB_ATTRIBUTE_ALIGNED_PRESENT 1
#elif _MSC_VER && (_MSC_VER >= 1300 || __INTEL_COMPILER)
#define __TBB_DECLSPEC_ALIGN_PRESENT 1
#endif
#if (__TBB_GCC_VERSION >= 40102) && !defined(__INTEL_COMPILER)
/** built-in atomics available in GCC since 4.1.2 **/
#define __TBB_GCC_BUILTIN_ATOMICS_PRESENT 1
#endif
/** User controlled TBB features & modes **/
#ifndef TBB_USE_DEBUG #ifndef TBB_USE_DEBUG
#ifdef TBB_DO_ASSERT #ifdef TBB_DO_ASSERT
#define TBB_USE_DEBUG TBB_DO_ASSERT #define TBB_USE_DEBUG TBB_DO_ASSERT
#else #else
#ifdef _DEBUG
#define TBB_USE_DEBUG _DEBUG
#else
#define TBB_USE_DEBUG 0 #define TBB_USE_DEBUG 0
#endif
#endif /* TBB_DO_ASSERT */ #endif /* TBB_DO_ASSERT */
#else
#define TBB_DO_ASSERT TBB_USE_DEBUG
#endif /* TBB_USE_DEBUG */ #endif /* TBB_USE_DEBUG */
#ifndef TBB_USE_ASSERT #ifndef TBB_USE_ASSERT
#ifdef TBB_DO_ASSERT #ifdef TBB_DO_ASSERT
#define TBB_USE_ASSERT TBB_DO_ASSERT #define TBB_USE_ASSERT TBB_DO_ASSERT
#else #else
#define TBB_USE_ASSERT TBB_USE_DEBUG #define TBB_USE_ASSERT TBB_USE_DEBUG
#endif /* TBB_DO_ASSERT */ #endif /* TBB_DO_ASSERT */
#endif /* TBB_USE_ASSERT */ #endif /* TBB_USE_ASSERT */
skipping to change at line 86 skipping to change at line 114
#if TBB_USE_EXCEPTIONS #if TBB_USE_EXCEPTIONS
#error Compilation settings do not support exception handling. Plea se do not set TBB_USE_EXCEPTIONS macro or set it to 0. #error Compilation settings do not support exception handling. Plea se do not set TBB_USE_EXCEPTIONS macro or set it to 0.
#elif !defined(TBB_USE_EXCEPTIONS) #elif !defined(TBB_USE_EXCEPTIONS)
#define TBB_USE_EXCEPTIONS 0 #define TBB_USE_EXCEPTIONS 0
#endif #endif
#elif !defined(TBB_USE_EXCEPTIONS) #elif !defined(TBB_USE_EXCEPTIONS)
#define TBB_USE_EXCEPTIONS 1 #define TBB_USE_EXCEPTIONS 1
#endif #endif
#ifndef TBB_IMPLEMENT_CPP0X #ifndef TBB_IMPLEMENT_CPP0X
/** By default, use C++0x classes if available **/ /** By default, use C++0x classes if available **/
#if __GNUC__==4 && __GNUC_MINOR__>=4 && __GXX_EXPERIMENTAL_CXX0X__ #if __GNUC__==4 && __GNUC_MINOR__>=4 && __GXX_EXPERIMENTAL_CXX0X__
#define TBB_IMPLEMENT_CPP0X 0 #define TBB_IMPLEMENT_CPP0X 0
#else #else
#define TBB_IMPLEMENT_CPP0X 1 #define TBB_IMPLEMENT_CPP0X 1
#endif #endif
#endif /* TBB_IMPLEMENT_CPP0X */ #endif /* TBB_IMPLEMENT_CPP0X */
/** Feature sets **/ #ifndef TBB_USE_CAPTURED_EXCEPTION
#if __TBB_EXCEPTION_PTR_PRESENT
#define TBB_USE_CAPTURED_EXCEPTION 0
#else
#define TBB_USE_CAPTURED_EXCEPTION 1
#endif
#else /* defined TBB_USE_CAPTURED_EXCEPTION */
#if !TBB_USE_CAPTURED_EXCEPTION && !__TBB_EXCEPTION_PTR_PRESENT
#error Current runtime does not support std::exception_ptr. Set TBB
_USE_CAPTURED_EXCEPTION and make sure that your code is ready to catch tbb:
:captured_exception.
#endif
#endif /* defined TBB_USE_CAPTURED_EXCEPTION */
/** Check whether the request to use GCC atomics can be satisfied **/
#if (TBB_USE_GCC_BUILTINS && !__TBB_GCC_BUILTIN_ATOMICS_PRESENT)
#error "GCC atomic built-ins are not supported."
#endif
/** Internal TBB features & modes **/
#ifndef __TBB_DYNAMIC_LOAD_ENABLED
#define __TBB_DYNAMIC_LOAD_ENABLED !__TBB_TASK_CPP_DIRECTLY_INCLUDED
#elif !__TBB_DYNAMIC_LOAD_ENABLED
#if _WIN32||_WIN64
#define __TBB_NO_IMPLICIT_LINKAGE 1
#define __TBBMALLOC_NO_IMPLICIT_LINKAGE 1
#else
#define __TBB_WEAK_SYMBOLS 1
#endif
#endif
#ifndef __TBB_COUNT_TASK_NODES #ifndef __TBB_COUNT_TASK_NODES
#define __TBB_COUNT_TASK_NODES TBB_USE_ASSERT #define __TBB_COUNT_TASK_NODES TBB_USE_ASSERT
#endif #endif
#ifndef __TBB_TASK_GROUP_CONTEXT #ifndef __TBB_TASK_GROUP_CONTEXT
#define __TBB_TASK_GROUP_CONTEXT 1 #define __TBB_TASK_GROUP_CONTEXT 1
#endif /* __TBB_TASK_GROUP_CONTEXT */ #endif /* __TBB_TASK_GROUP_CONTEXT */
#if TBB_USE_EXCEPTIONS && !__TBB_TASK_GROUP_CONTEXT
#error TBB_USE_EXCEPTIONS requires __TBB_TASK_GROUP_CONTEXT to be enabl
ed
#endif
#ifndef __TBB_SCHEDULER_OBSERVER #ifndef __TBB_SCHEDULER_OBSERVER
#define __TBB_SCHEDULER_OBSERVER 1 #define __TBB_SCHEDULER_OBSERVER 1
#endif /* __TBB_SCHEDULER_OBSERVER */ #endif /* __TBB_SCHEDULER_OBSERVER */
#ifndef __TBB_ARENA_PER_MASTER #ifndef __TBB_TASK_PRIORITY
#define __TBB_ARENA_PER_MASTER 1 #define __TBB_TASK_PRIORITY __TBB_TASK_GROUP_CONTEXT
#endif /* __TBB_ARENA_PER_MASTER */ #endif /* __TBB_TASK_PRIORITY */
/* TODO: The following condition should be extended as soon as new compiler #if __TBB_TASK_PRIORITY && !__TBB_TASK_GROUP_CONTEXT
s/runtimes #error __TBB_TASK_PRIORITY requires __TBB_TASK_GROUP_CONTEXT to be enab
with std::exception_ptr support appear. */ led
#define __TBB_EXCEPTION_PTR_PRESENT (_MSC_VER >= 1600 || __GXX_EXPERIMENTA #endif
L_CXX0X__ && (__GNUC__==4 && __GNUC_MINOR__>=4))
#ifndef TBB_USE_CAPTURED_EXCEPTION #if !defined(__TBB_SURVIVE_THREAD_SWITCH) && (_WIN32 || _WIN64 || __linux__
#if __TBB_EXCEPTION_PTR_PRESENT )
#define TBB_USE_CAPTURED_EXCEPTION 0 #define __TBB_SURVIVE_THREAD_SWITCH 1
#else #endif /* __TBB_SURVIVE_THREAD_SWITCH */
#define TBB_USE_CAPTURED_EXCEPTION 1
#endif
#else /* defined TBB_USE_CAPTURED_EXCEPTION */
#if !TBB_USE_CAPTURED_EXCEPTION && !__TBB_EXCEPTION_PTR_PRESENT
#error Current runtime does not support std::exception_ptr. Set TBB
_USE_CAPTURED_EXCEPTION and make sure that your code is ready to catch tbb:
:captured_exception.
#endif
#endif /* defined TBB_USE_CAPTURED_EXCEPTION */
#ifndef __TBB_DEFAULT_PARTITIONER #ifndef __TBB_DEFAULT_PARTITIONER
#if TBB_DEPRECATED #if TBB_DEPRECATED
/** Default partitioner for parallel loop templates in TBB 1.0-2.1 */ /** Default partitioner for parallel loop templates in TBB 1.0-2.1 */
#define __TBB_DEFAULT_PARTITIONER tbb::simple_partitioner #define __TBB_DEFAULT_PARTITIONER tbb::simple_partitioner
#else #else
/** Default partitioner for parallel loop templates in TBB 2.2 */ /** Default partitioner for parallel loop templates since TBB 2.2 */
#define __TBB_DEFAULT_PARTITIONER tbb::auto_partitioner #define __TBB_DEFAULT_PARTITIONER tbb::auto_partitioner
#endif /* TBB_DEFAULT_PARTITIONER */ #endif /* TBB_DEPRECATED */
#endif /* !defined(__TBB_DEFAULT_PARTITIONER */ #endif /* !defined(__TBB_DEFAULT_PARTITIONER */
/** Workarounds presence **/
#if __GNUC__==4 && __GNUC_MINOR__>=4 && !defined(__INTEL_COMPILER)
#define __TBB_GCC_WARNING_SUPPRESSION_ENABLED 1
#endif
/** Macros of the form __TBB_XXX_BROKEN denote known issues that are caused by /** Macros of the form __TBB_XXX_BROKEN denote known issues that are caused by
the bugs in compilers, standard or OS specific libraries. They should b e the bugs in compilers, standard or OS specific libraries. They should b e
removed as soon as the corresponding bugs are fixed or the buggy OS/com piler removed as soon as the corresponding bugs are fixed or the buggy OS/com piler
versions go out of the support list. versions go out of the support list.
**/ **/
#if __GNUC__ && __TBB_x86_64 && __INTEL_COMPILER == 1200
#define __TBB_ICC_12_0_INL_ASM_FSTCW_BROKEN 1
#endif
#if _MSC_VER && __INTEL_COMPILER && (__INTEL_COMPILER<1110 || __INTEL_COMPI LER==1110 && __INTEL_COMPILER_BUILD_DATE < 20091012) #if _MSC_VER && __INTEL_COMPILER && (__INTEL_COMPILER<1110 || __INTEL_COMPI LER==1110 && __INTEL_COMPILER_BUILD_DATE < 20091012)
/** Necessary to avoid ICL error (or warning in non-strict mode): /** Necessary to avoid ICL error (or warning in non-strict mode):
"exception specification for implicitly declared virtual destructor is "exception specification for implicitly declared virtual destructor is
incompatible with that of overridden one". **/ incompatible with that of overridden one". **/
#define __TBB_DEFAULT_DTOR_THROW_SPEC_BROKEN 1 #define __TBB_DEFAULT_DTOR_THROW_SPEC_BROKEN 1
#endif #endif
#if defined(_MSC_VER) && _MSC_VER < 1500 && !defined(__INTEL_COMPILER) #if defined(_MSC_VER) && _MSC_VER < 1500 && !defined(__INTEL_COMPILER)
/** VS2005 and earlier do not allow declaring template class as a frien d /** VS2005 and earlier do not allow declaring template class as a frien d
of classes defined in other namespaces. **/ of classes defined in other namespaces. **/
#define __TBB_TEMPLATE_FRIENDS_BROKEN 1 #define __TBB_TEMPLATE_FRIENDS_BROKEN 1
#endif #endif
#if __GLIBC__==2 && __GLIBC_MINOR__==3 || __MINGW32__ #if __GLIBC__==2 && __GLIBC_MINOR__==3 || __MINGW32__ || (__APPLE__ && __IN TEL_COMPILER==1200 && !TBB_USE_DEBUG)
//! Macro controlling EH usages in TBB tests //! Macro controlling EH usages in TBB tests
/** Some older versions of glibc crash when exception handling happens concurrently. **/ /** Some older versions of glibc crash when exception handling happens concurrently. **/
#define __TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN 1 #define __TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN 1
#endif #endif
#if (_WIN32||_WIN64) && __INTEL_COMPILER == 1110 #if (_WIN32||_WIN64) && __INTEL_COMPILER == 1110
/** That's a bug in Intel compiler 11.1.044/IA-32/Windows, that leads t o a worker thread crash on the thread's startup. **/ /** That's a bug in Intel compiler 11.1.044/IA-32/Windows, that leads t o a worker thread crash on the thread's startup. **/
#define __TBB_ICL_11_1_CODE_GEN_BROKEN 1 #define __TBB_ICL_11_1_CODE_GEN_BROKEN 1
#endif #endif
#if __GNUC__==3 && __GNUC_MINOR__==3 && !defined(__INTEL_COMPILER) #if __GNUC__==3 && __GNUC_MINOR__==3 && !defined(__INTEL_COMPILER)
/** A bug in GCC 3.3 with access to nested classes declared in protecte d area */ /** A bug in GCC 3.3 with access to nested classes declared in protecte d area */
#define __TBB_GCC_3_3_PROTECTED_BROKEN 1 #define __TBB_GCC_3_3_PROTECTED_BROKEN 1
#endif #endif
#if __MINGW32__ && (__GNUC__<4 || __GNUC__==4 && __GNUC_MINOR__<2)
/** MinGW has a bug with stack alignment for routines invoked from MS R
TLs.
Since GCC 4.2, the bug can be worked around via a special attribute
. **/
#define __TBB_SSE_STACK_ALIGNMENT_BROKEN 1
#endif
#if __GNUC__==4 && __GNUC_MINOR__==3 && __GNUC_PATCHLEVEL__==0
// GCC of this version may rashly ignore control dependencies
#define __TBB_GCC_OPTIMIZER_ORDERING_BROKEN 1
#endif
#if __FreeBSD__ #if __FreeBSD__
/** A bug in FreeBSD 8.0 results in kernel panic when there is contenti on /** A bug in FreeBSD 8.0 results in kernel panic when there is contenti on
on a mutex created with this attribute. **/ on a mutex created with this attribute. **/
#define __TBB_PRIO_INHERIT_BROKEN 1 #define __TBB_PRIO_INHERIT_BROKEN 1
/** A bug in FreeBSD 8.0 results in test hanging when an exception occu rs /** A bug in FreeBSD 8.0 results in test hanging when an exception occu rs
during (concurrent?) object construction by means of placement new operator. **/ during (concurrent?) object construction by means of placement new operator. **/
#define __TBB_PLACEMENT_NEW_EXCEPTION_SAFETY_BROKEN 1 #define __TBB_PLACEMENT_NEW_EXCEPTION_SAFETY_BROKEN 1
#endif /* __FreeBSD__ */ #endif /* __FreeBSD__ */
#if (__linux__ || __APPLE__) && __i386__ && defined(__INTEL_COMPILER) #if (__linux__ || __APPLE__) && __i386__ && defined(__INTEL_COMPILER)
/** The Intel compiler for IA-32 (Linux|Mac OS X) crashes or generates /** The Intel compiler for IA-32 (Linux|Mac OS X) crashes or generates
incorrect code when __asm__ arguments have a cast to volatile. **/ incorrect code when __asm__ arguments have a cast to volatile. **/
#define __TBB_ICC_ASM_VOLATILE_BROKEN 1 #define __TBB_ICC_ASM_VOLATILE_BROKEN 1
#endif #endif
#if !__INTEL_COMPILER && (_MSC_VER || __GNUC__==3 && __GNUC_MINOR__<=2)
/** Bug in GCC 3.2 and MSVC compilers that sometimes return 0 for __ali
gnof(T)
when T has not yet been instantiated. **/
#define __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN 1
#endif
#endif /* __TBB_tbb_config_H */ #endif /* __TBB_tbb_config_H */
 End of changes. 22 change blocks. 
44 lines changed or deleted 118 lines changed or added


 tbb_exception.h   tbb_exception.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 41 skipping to change at line 41
#include "tbb_stddef.h" #include "tbb_stddef.h"
#if !TBB_USE_EXCEPTIONS && _MSC_VER #if !TBB_USE_EXCEPTIONS && _MSC_VER
// Suppress "C++ exception handler used, but unwind semantics are not e nabled" warning in STL headers // Suppress "C++ exception handler used, but unwind semantics are not e nabled" warning in STL headers
#pragma warning (push) #pragma warning (push)
#pragma warning (disable: 4530) #pragma warning (disable: 4530)
#endif #endif
#include <stdexcept> #include <stdexcept>
#include <string> // required to construct std exception classes
#if !TBB_USE_EXCEPTIONS && _MSC_VER #if !TBB_USE_EXCEPTIONS && _MSC_VER
#pragma warning (pop) #pragma warning (pop)
#endif #endif
#if __SUNPRO_CC
#include <string> // required to construct std exception classes
#endif
namespace tbb { namespace tbb {
//! Exception for concurrent containers //! Exception for concurrent containers
class bad_last_alloc : public std::bad_alloc { class bad_last_alloc : public std::bad_alloc {
public: public:
/*override*/ const char* what() const throw(); /*override*/ const char* what() const throw();
#if __TBB_DEFAULT_DTOR_THROW_SPEC_BROKEN #if __TBB_DEFAULT_DTOR_THROW_SPEC_BROKEN
/*override*/ ~bad_last_alloc() throw() {} /*override*/ ~bad_last_alloc() throw() {}
#endif #endif
}; };
skipping to change at line 97 skipping to change at line 94
eid_out_of_range, eid_out_of_range,
eid_segment_range_error, eid_segment_range_error,
eid_index_range_error, eid_index_range_error,
eid_missing_wait, eid_missing_wait,
eid_invalid_multiple_scheduling, eid_invalid_multiple_scheduling,
eid_improper_lock, eid_improper_lock,
eid_possible_deadlock, eid_possible_deadlock,
eid_operation_not_permitted, eid_operation_not_permitted,
eid_condvar_wait_failed, eid_condvar_wait_failed,
eid_invalid_load_factor, eid_invalid_load_factor,
eid_invalid_buckets_number, eid_reserved, // free slot for backward compatibility, can be reused.
eid_invalid_swap, eid_invalid_swap,
eid_reservation_length_error, eid_reservation_length_error,
eid_invalid_key, eid_invalid_key,
//! The last enumerator tracks the number of defined IDs. It must remai n the last one. //! The last enumerator tracks the number of defined IDs. It must remai n the last one.
/** When adding new IDs, place them immediately _before_ this comment ( that is /** When adding new IDs, place them immediately _before_ this comment ( that is
_after_ all the existing IDs. NEVER insert new IDs between the exis ting ones. **/ _after_ all the existing IDs. NEVER insert new IDs between the exis ting ones. **/
eid_max eid_max
}; };
//! Gathers all throw operators in one place. //! Gathers all throw operators in one place.
skipping to change at line 208 skipping to change at line 205
{ {
set(src.my_exception_name, src.my_exception_info); set(src.my_exception_name, src.my_exception_info);
} }
captured_exception ( const char* name_, const char* info ) captured_exception ( const char* name_, const char* info )
: my_dynamic(false) : my_dynamic(false)
{ {
set(name_, info); set(name_, info);
} }
__TBB_EXPORTED_METHOD ~captured_exception () throw() { __TBB_EXPORTED_METHOD ~captured_exception () throw();
clear();
}
captured_exception& operator= ( const captured_exception& src ) { captured_exception& operator= ( const captured_exception& src ) {
if ( this != &src ) { if ( this != &src ) {
clear(); clear();
set(src.my_exception_name, src.my_exception_info); set(src.my_exception_name, src.my_exception_info);
} }
return *this; return *this;
} }
/*override*/ /*override*/
 End of changes. 5 change blocks. 
9 lines changed or deleted 4 lines changed or added


 tbb_machine.h   tbb_machine.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 32 skipping to change at line 32
this file and link it with other files to produce an executable, this this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered b y invalidate any other reasons why the executable file might be covered b y
the GNU General Public License. the GNU General Public License.
*/ */
#ifndef __TBB_machine_H #ifndef __TBB_machine_H
#define __TBB_machine_H #define __TBB_machine_H
/** This header provides basic platform abstraction layer by hooking up app
ropriate
architecture/OS/compiler specific headers from the /include/tbb/machine
directory.
If a plug-in header does not implement all the required APIs, it must s
pecify
the missing ones by setting one or more of the following macros:
__TBB_USE_GENERIC_PART_WORD_CAS
__TBB_USE_GENERIC_PART_WORD_FETCH_ADD
__TBB_USE_GENERIC_PART_WORD_FETCH_STORE
__TBB_USE_GENERIC_FETCH_ADD
__TBB_USE_GENERIC_FETCH_STORE
__TBB_USE_GENERIC_DWORD_FETCH_ADD
__TBB_USE_GENERIC_DWORD_FETCH_STORE
__TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE
__TBB_USE_GENERIC_FULL_FENCED_LOAD_STORE
__TBB_USE_GENERIC_RELAXED_LOAD_STORE
__TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE
In this case tbb_machine.h will add missing functionality based on a mi
nimal set
of APIs that are required to be implemented by all plug-n headers as de
scribed
futher.
Note that these generic implementations may be sub-optimal for a partic
ular
architecture, and thus should be relied upon only after careful evaluat
ion
or as the last resort.
Additionally __TBB_64BIT_ATOMICS can be set to 0 on a 32-bit architectu
re to
indicate that the port is not going to support double word atomics. It
may also
be set to 1 explicitly, though normally this is not necessary as tbb_ma
chine.h
will set it automatically.
Prerequisites for each architecture port
----------------------------------------
The following functions have no generic implementation. Therefore they
must be
implemented in each machine architecture specific header either as a co
nventional
function or as a functional macro.
__TBB_Yield()
Signals OS that the current thread is willing to relinquish the rem
ainder
of its time quantum.
__TBB_full_memory_fence()
Must prevent all memory operations from being reordered across it (
both
by hardware and compiler). All such fences must be totally ordered
(or
sequentially consistent).
__TBB_machine_cmpswp4( volatile void *ptr, int32_t value, int32_t compa
rand )
Must be provided if __TBB_USE_FENCED_ATOMICS is not set.
__TBB_machine_cmpswp8( volatile void *ptr, int32_t value, int64_t compa
rand )
Must be provided for 64-bit architectures if __TBB_USE_FENCED_ATOMI
CS is not set,
and for 32-bit architectures if __TBB_64BIT_ATOMICS is set
__TBB_machine_<op><S><fence>(...), where
<op> = {cmpswp, fetchadd, fetchstore}
<S> = {1, 2, 4, 8}
<fence> = {full_fence, acquire, release, relaxed}
Must be provided if __TBB_USE_FENCED_ATOMICS is set.
__TBB_control_consistency_helper()
Bridges the memory-semantics gap between architectures providing on
ly
implicit C++0x "consume" semantics (like Power Architecture) and th
ose
also implicitly obeying control dependencies (like Itanium).
It must be used only in conditional code where the condition is its
elf
data-dependent, and will then make subsequent code behave as if the
original data dependency were acquired.
It needs only an empty definition where implied by the architecture
either specifically (Itanium) or because generally stronger C++0x "
acquire"
semantics are enforced (like x86).
__TBB_acquire_consistency_helper(), __TBB_release_consistency_helper()
Must be provided if __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE is set
.
Enforce acquire and release semantics in generic implementations of
fenced
store and load operations. Depending on the particular architecture
/compiler
combination they may be a hardware fence, a compiler fence, both or
nothing.
**/
#include "tbb_stddef.h" #include "tbb_stddef.h"
namespace tbb {
namespace internal {
///////////////////////////////////////////////////////////////////////////
/////
// Overridable helpers declarations
//
// A machine/*.h file may choose to define these templates, otherwise it mu
st
// request default implementation by setting appropriate __TBB_USE_GENERIC_
XXX macro(s).
//
template <typename T, std::size_t S>
struct machine_load_store;
template <typename T, std::size_t S>
struct machine_load_store_relaxed;
template <typename T, std::size_t S>
struct machine_load_store_seq_cst;
//
// End of overridable helpers declarations
///////////////////////////////////////////////////////////////////////////
/////
template<size_t S> struct atomic_selector;
template<> struct atomic_selector<1> {
typedef int8_t word;
inline static word fetch_store ( volatile void* location, word value );
};
template<> struct atomic_selector<2> {
typedef int16_t word;
inline static word fetch_store ( volatile void* location, word value );
};
template<> struct atomic_selector<4> {
#if _MSC_VER && !_WIN64
// Work-around that avoids spurious /Wp64 warnings
typedef intptr_t word;
#else
typedef int32_t word;
#endif
inline static word fetch_store ( volatile void* location, word value );
};
template<> struct atomic_selector<8> {
typedef int64_t word;
inline static word fetch_store ( volatile void* location, word value );
};
}} // namespaces internal, tbb
#if _WIN32||_WIN64 #if _WIN32||_WIN64
#ifdef _MANAGED #ifdef _MANAGED
#pragma managed(push, off) #pragma managed(push, off)
#endif #endif
#if __MINGW32__ #if __MINGW64__ || __MINGW32__
#include "machine/linux_ia32.h" extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void
extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void ); );
#define __TBB_Yield() SwitchToThread() #define __TBB_Yield() SwitchToThread()
#elif defined(_M_IX86) #if (TBB_USE_GCC_BUILTINS && __TBB_GCC_BUILTIN_ATOMICS_PRESENT)
#include "machine/windows_ia32.h" #include "machine/gcc_generic.h"
#elif defined(_M_AMD64) #elif __MINGW64__
#include "machine/windows_intel64.h" #include "machine/linux_intel64.h"
#elif _XBOX #elif __MINGW32__
#include "machine/xbox360_ppc.h" #include "machine/linux_ia32.h"
#else #endif
#error Unsupported platform #elif defined(_M_IX86)
#endif #include "machine/windows_ia32.h"
#elif defined(_M_X64)
#include "machine/windows_intel64.h"
#elif _XBOX
#include "machine/xbox360_ppc.h"
#endif
#ifdef _MANAGED #ifdef _MANAGED
#pragma managed(pop) #pragma managed(pop)
#endif #endif
#elif __linux__ || __FreeBSD__ #elif __linux__ || __FreeBSD__ || __NetBSD__
#if __i386__ #if (TBB_USE_GCC_BUILTINS && __TBB_GCC_BUILTIN_ATOMICS_PRESENT)
#include "machine/linux_ia32.h" #include "machine/gcc_generic.h"
#elif __x86_64__ #elif __i386__
#include "machine/linux_intel64.h" #include "machine/linux_ia32.h"
#elif __ia64__ #elif __x86_64__
#include "machine/linux_ia64.h" #include "machine/linux_intel64.h"
#endif #elif __ia64__
#include "machine/linux_ia64.h"
#elif __powerpc__
#include "machine/mac_ppc.h"
#elif __TBB_GCC_BUILTIN_ATOMICS_PRESENT
#include "machine/gcc_generic.h"
#endif
#include "machine/linux_common.h"
#elif __APPLE__ #elif __APPLE__
#if __i386__ #if __i386__
#include "machine/linux_ia32.h" #include "machine/linux_ia32.h"
#elif __x86_64__ #elif __x86_64__
#include "machine/linux_intel64.h" #include "machine/linux_intel64.h"
#elif __POWERPC__ #elif __POWERPC__
#include "machine/mac_ppc.h" #include "machine/mac_ppc.h"
#endif #endif
#include "machine/macos_common.h"
#elif _AIX #elif _AIX
#include "machine/ibm_aix51.h" #include "machine/ibm_aix51.h"
#elif __sun || __SUNPRO_CC #elif __sun || __SUNPRO_CC
#define __asm__ asm #define __asm__ asm
#define __volatile__ volatile #define __volatile__ volatile
#if __i386 || __i386__
#include "machine/linux_ia32.h"
#elif __x86_64__
#include "machine/linux_intel64.h"
#elif __sparc
#include "machine/sunos_sparc.h"
#endif
#endif #if __i386 || __i386__
#include "machine/linux_ia32.h"
#elif __x86_64__
#include "machine/linux_intel64.h"
#elif __sparc
#include "machine/sunos_sparc.h"
#endif
#include <sched.h>
#if !defined(__TBB_CompareAndSwap4) \ #define __TBB_Yield() sched_yield()
|| !defined(__TBB_CompareAndSwap8) \
|| !defined(__TBB_Yield) \
|| !defined(__TBB_release_consistency_helper)
#error Minimal requirements for tbb_machine.h not satisfied
#endif
#ifndef __TBB_load_with_acquire
//! Load with acquire semantics; i.e., no following memory operation ca
n move above the load.
template<typename T>
inline T __TBB_load_with_acquire(const volatile T& location) {
T temp = location;
__TBB_release_consistency_helper();
return temp;
}
#endif
#ifndef __TBB_store_with_release #endif /* OS selection */
//! Store with release semantics; i.e., no prior memory operation can m
ove below the store. #ifndef __TBB_64BIT_ATOMICS
template<typename T, typename V> #define __TBB_64BIT_ATOMICS 1
inline void __TBB_store_with_release(volatile T& location, V value) {
__TBB_release_consistency_helper();
location = T(value);
}
#endif #endif
// Special atomic functions
#if __TBB_USE_FENCED_ATOMICS
#define __TBB_machine_cmpswp1 __TBB_machine_cmpswp1full_fence
#define __TBB_machine_cmpswp2 __TBB_machine_cmpswp2full_fence
#define __TBB_machine_cmpswp4 __TBB_machine_cmpswp4full_fence
#define __TBB_machine_cmpswp8 __TBB_machine_cmpswp8full_fence
#if __TBB_WORDSIZE==8
#define __TBB_machine_fetchadd8 __TBB_machine_fetchadd8
full_fence
#define __TBB_machine_fetchstore8 __TBB_machine_fetchstor
e8full_fence
#define __TBB_FetchAndAddWrelease(P,V) __TBB_machine_fetchadd8
release(P,V)
#define __TBB_FetchAndIncrementWacquire(P) __TBB_machine_fetchadd8
acquire(P,1)
#define __TBB_FetchAndDecrementWrelease(P) __TBB_machine_fetchadd8
release(P,(-1))
#else
#error Define macros for 4-byte word, similarly to the above __TBB_
WORDSIZE==8 branch.
#endif /* __TBB_WORDSIZE==4 */
#else /* !__TBB_USE_FENCED_ATOMICS */
#define __TBB_FetchAndAddWrelease(P,V) __TBB_FetchAndAddW(P,V)
#define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAddW(P,1)
#define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAddW(P,(-1))
#endif /* !__TBB_USE_FENCED_ATOMICS */
#if __TBB_WORDSIZE==4
#define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp4(P,V,C)
#define __TBB_FetchAndAddW(P,V) __TBB_machine_fetchadd4(P,V)
#define __TBB_FetchAndStoreW(P,V) __TBB_machine_fetchstore4(P,V)
#elif __TBB_WORDSIZE==8
#if __TBB_USE_GENERIC_DWORD_LOAD_STORE || __TBB_USE_GENERIC_DWORD_FETCH
_ADD || __TBB_USE_GENERIC_DWORD_FETCH_STORE
#error These macros should only be used on 32-bit platforms.
#endif
#define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp8(P,V,C)
#define __TBB_FetchAndAddW(P,V) __TBB_machine_fetchadd8(P,V)
#define __TBB_FetchAndStoreW(P,V) __TBB_machine_fetchstore8(P,V)
#else /* __TBB_WORDSIZE != 8 */
#error Unsupported machine word size.
#endif /* __TBB_WORDSIZE */
#ifndef __TBB_Pause #ifndef __TBB_Pause
inline void __TBB_Pause(int32_t) { inline void __TBB_Pause(int32_t) {
__TBB_Yield(); __TBB_Yield();
} }
#endif #endif
namespace tbb { namespace tbb {
//! Sequentially consistent full memory fence.
inline void atomic_fence () { __TBB_full_memory_fence(); }
namespace internal { namespace internal {
//! Class that implements exponential backoff. //! Class that implements exponential backoff.
/** See implementation of spin_wait_while_eq for an example. */ /** See implementation of spin_wait_while_eq for an example. */
class atomic_backoff { class atomic_backoff : no_copy {
//! Time delay, in units of "pause" instructions. //! Time delay, in units of "pause" instructions.
/** Should be equal to approximately the number of "pause" instructions /** Should be equal to approximately the number of "pause" instructions
that take the same time as an context switch. */ that take the same time as an context switch. */
static const int32_t LOOPS_BEFORE_YIELD = 16; static const int32_t LOOPS_BEFORE_YIELD = 16;
int32_t count; int32_t count;
public: public:
atomic_backoff() : count(1) {} atomic_backoff() : count(1) {}
//! Pause for a while. //! Pause for a while.
void pause() { void pause() {
skipping to change at line 206 skipping to change at line 367
const uint8_t bitoffset = uint8_t( 8*((uintptr_t)ptr & 0x3) ); const uint8_t bitoffset = uint8_t( 8*((uintptr_t)ptr & 0x3) );
#endif #endif
const uint32_t mask = ( (1<<(S*8)) - 1 )<<bitoffset; const uint32_t mask = ( (1<<(S*8)) - 1 )<<bitoffset;
atomic_backoff b; atomic_backoff b;
uint32_t result; uint32_t result;
for(;;) { for(;;) {
result = *base; // reload the base value which might change during the pause result = *base; // reload the base value which might change during the pause
uint32_t old_value = ( result & ~mask ) | ( comparand << bitoffset ); uint32_t old_value = ( result & ~mask ) | ( comparand << bitoffset );
uint32_t new_value = ( result & ~mask ) | ( value << bitoffset ); uint32_t new_value = ( result & ~mask ) | ( value << bitoffset );
// __TBB_CompareAndSwap4 presumed to have full fence. // __TBB_CompareAndSwap4 presumed to have full fence.
result = __TBB_CompareAndSwap4( base, new_value, old_value ); // Cast shuts up /Wp64 warning
result = (uint32_t)__TBB_machine_cmpswp4( base, new_value, old_valu
e );
if( result==old_value // CAS succeeded if( result==old_value // CAS succeeded
|| ((result^old_value)&mask)!=0 ) // CAS failed and the bits of interest have changed || ((result^old_value)&mask)!=0 ) // CAS failed and the bits of interest have changed
break; break;
else // CAS failed but the bits of interest left unchanged else // CAS failed but the bits of interest left unchanged
b.pause(); b.pause();
} }
return T((result & mask) >> bitoffset); return T((result & mask) >> bitoffset);
} }
template<size_t S, typename T> template<size_t S, typename T>
inline T __TBB_CompareAndSwapGeneric (volatile void *ptr, T value, T compar inline T __TBB_CompareAndSwapGeneric (volatile void *ptr, T value, T compar
and ) { and );
return __TBB_CompareAndSwapW((T *)ptr,value,comparand);
}
template<> template<>
inline uint8_t __TBB_CompareAndSwapGeneric <1,uint8_t> (volatile void *ptr, uint8_t value, uint8_t comparand ) { inline uint8_t __TBB_CompareAndSwapGeneric <1,uint8_t> (volatile void *ptr, uint8_t value, uint8_t comparand ) {
#ifdef __TBB_CompareAndSwap1 #if __TBB_USE_GENERIC_PART_WORD_CAS
return __TBB_CompareAndSwap1(ptr,value,comparand);
#else
return __TBB_MaskedCompareAndSwap<1,uint8_t>((volatile uint8_t *)ptr,va lue,comparand); return __TBB_MaskedCompareAndSwap<1,uint8_t>((volatile uint8_t *)ptr,va lue,comparand);
#else
return __TBB_machine_cmpswp1(ptr,value,comparand);
#endif #endif
} }
template<> template<>
inline uint16_t __TBB_CompareAndSwapGeneric <2,uint16_t> (volatile void *pt r, uint16_t value, uint16_t comparand ) { inline uint16_t __TBB_CompareAndSwapGeneric <2,uint16_t> (volatile void *pt r, uint16_t value, uint16_t comparand ) {
#ifdef __TBB_CompareAndSwap2 #if __TBB_USE_GENERIC_PART_WORD_CAS
return __TBB_CompareAndSwap2(ptr,value,comparand);
#else
return __TBB_MaskedCompareAndSwap<2,uint16_t>((volatile uint16_t *)ptr, value,comparand); return __TBB_MaskedCompareAndSwap<2,uint16_t>((volatile uint16_t *)ptr, value,comparand);
#else
return __TBB_machine_cmpswp2(ptr,value,comparand);
#endif #endif
} }
template<> template<>
inline uint32_t __TBB_CompareAndSwapGeneric <4,uint32_t> (volatile void *pt r, uint32_t value, uint32_t comparand ) { inline uint32_t __TBB_CompareAndSwapGeneric <4,uint32_t> (volatile void *pt r, uint32_t value, uint32_t comparand ) {
return __TBB_CompareAndSwap4(ptr,value,comparand); // Cast shuts up /Wp64 warning
return (uint32_t)__TBB_machine_cmpswp4(ptr,value,comparand);
} }
#if __TBB_64BIT_ATOMICS
template<> template<>
inline uint64_t __TBB_CompareAndSwapGeneric <8,uint64_t> (volatile void *pt r, uint64_t value, uint64_t comparand ) { inline uint64_t __TBB_CompareAndSwapGeneric <8,uint64_t> (volatile void *pt r, uint64_t value, uint64_t comparand ) {
return __TBB_CompareAndSwap8(ptr,value,comparand); return __TBB_machine_cmpswp8(ptr,value,comparand);
} }
#endif
template<size_t S, typename T> template<size_t S, typename T>
inline T __TBB_FetchAndAddGeneric (volatile void *ptr, T addend) { inline T __TBB_FetchAndAddGeneric (volatile void *ptr, T addend) {
atomic_backoff b; atomic_backoff b;
T result; T result;
for(;;) { for(;;) {
result = *reinterpret_cast<volatile T *>(ptr); result = *reinterpret_cast<volatile T *>(ptr);
// __TBB_CompareAndSwapGeneric presumed to have full fence. // __TBB_CompareAndSwapGeneric presumed to have full fence.
if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, result+addend, result ) ==result ) if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, result+addend, result ) ==result )
break; break;
skipping to change at line 277 skipping to change at line 440
for(;;) { for(;;) {
result = *reinterpret_cast<volatile T *>(ptr); result = *reinterpret_cast<volatile T *>(ptr);
// __TBB_CompareAndSwapGeneric presumed to have full fence. // __TBB_CompareAndSwapGeneric presumed to have full fence.
if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, value, result )==result ) if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, value, result )==result )
break; break;
b.pause(); b.pause();
} }
return result; return result;
} }
#if __TBB_USE_GENERIC_PART_WORD_CAS
#define __TBB_machine_cmpswp1 tbb::internal::__TBB_CompareAndSwapGeneric<1,
uint8_t>
#define __TBB_machine_cmpswp2 tbb::internal::__TBB_CompareAndSwapGeneric<2,
uint16_t>
#endif
#if __TBB_USE_GENERIC_FETCH_ADD || __TBB_USE_GENERIC_PART_WORD_FETCH_ADD
#define __TBB_machine_fetchadd1 tbb::internal::__TBB_FetchAndAddGeneric<1,u
int8_t>
#define __TBB_machine_fetchadd2 tbb::internal::__TBB_FetchAndAddGeneric<2,u
int16_t>
#endif
#if __TBB_USE_GENERIC_FETCH_ADD
#define __TBB_machine_fetchadd4 tbb::internal::__TBB_FetchAndAddGeneric<4,u
int32_t>
#endif
#if __TBB_USE_GENERIC_FETCH_ADD || __TBB_USE_GENERIC_DWORD_FETCH_ADD
#define __TBB_machine_fetchadd8 tbb::internal::__TBB_FetchAndAddGeneric<8,u
int64_t>
#endif
#if __TBB_USE_GENERIC_FETCH_STORE || __TBB_USE_GENERIC_PART_WORD_FETCH_STOR
E
#define __TBB_machine_fetchstore1 tbb::internal::__TBB_FetchAndStoreGeneric
<1,uint8_t>
#define __TBB_machine_fetchstore2 tbb::internal::__TBB_FetchAndStoreGeneric
<2,uint16_t>
#endif
#if __TBB_USE_GENERIC_FETCH_STORE
#define __TBB_machine_fetchstore4 tbb::internal::__TBB_FetchAndStoreGeneric
<4,uint32_t>
#endif
#if __TBB_USE_GENERIC_FETCH_STORE || __TBB_USE_GENERIC_DWORD_FETCH_STORE
#define __TBB_machine_fetchstore8 tbb::internal::__TBB_FetchAndStoreGeneric
<8,uint64_t>
#endif
#if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE
#define __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(S)
\
atomic_selector<S>::word atomic_selector<S>::fetch_store ( volatile voi
d* location, word value ) { \
return __TBB_machine_fetchstore##S( location, value );
\
}
__TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(1)
__TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(2)
__TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(4)
__TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(8)
#undef __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE
#endif /* __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */
#if __TBB_USE_GENERIC_DWORD_LOAD_STORE
inline void __TBB_machine_store8 (volatile void *ptr, int64_t value) {
for(;;) {
int64_t result = *(int64_t *)ptr;
if( __TBB_machine_cmpswp8(ptr,value,result)==result ) break;
}
}
inline int64_t __TBB_machine_load8 (const volatile void *ptr) {
// Comparand and new value may be anything, they only must be equal, an
d
// the value should have a low probability to be actually found in 'loc
ation'.
const int64_t anyvalue = 2305843009213693951;
return __TBB_machine_cmpswp8(const_cast<volatile void *>(ptr),anyvalue,
anyvalue);
}
#endif /* __TBB_USE_GENERIC_DWORD_LOAD_STORE */
#if __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE
/** Fenced operations use volatile qualifier to prevent compiler from optim
izing
them out, and on on architectures with weak memory ordering to induce c
ompiler
to generate code with appropriate acquire/release semantics.
On architectures like IA32, Intel64 (and likely and Sparc TSO) volatile
has
no effect on code gen, and consistency helpers serve as a compiler fenc
e (the
latter being true for IA64/gcc as well to fix a bug in some gcc version
s). **/
template <typename T, size_t S>
struct machine_load_store {
static T load_with_acquire ( const volatile T& location ) {
T to_return = location;
__TBB_acquire_consistency_helper();
return to_return;
}
static void store_with_release ( volatile T &location, T value ) {
__TBB_release_consistency_helper();
location = value;
}
};
#if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
template <typename T>
struct machine_load_store<T,8> {
static T load_with_acquire ( const volatile T& location ) {
return (T)__TBB_machine_load8( (const volatile void*)&location );
}
static void store_with_release ( volatile T& location, T value ) {
__TBB_machine_store8( (volatile void*)&location, (int64_t)value );
}
};
#endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
#endif /* __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE */
template <typename T, size_t S>
struct machine_load_store_seq_cst {
static T load ( const volatile T& location ) {
__TBB_full_memory_fence();
return machine_load_store<T,S>::load_with_acquire( location );
}
#if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE
static void store ( volatile T &location, T value ) {
atomic_selector<S>::fetch_store( (volatile void*)&location, (typena
me atomic_selector<S>::word)value );
}
#else /* !__TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */
static void store ( volatile T &location, T value ) {
machine_load_store<T,S>::store_with_release( location, value );
__TBB_full_memory_fence();
}
#endif /* !__TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */
};
#if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
/** The implementation does not use functions __TBB_machine_load8/store8 as
they
are not required to be sequentially consistent. **/
template <typename T>
struct machine_load_store_seq_cst<T,8> {
static T load ( const volatile T& location ) {
// Comparand and new value may be anything, they only must be equal
, and
// the value should have a low probability to be actually found in
'location'.
const int64_t anyvalue = 2305843009213693951ll;
return __TBB_machine_cmpswp8( (volatile void*)const_cast<volatile T
*>(&location), anyvalue, anyvalue );
}
static void store ( volatile T &location, T value ) {
int64_t result = (volatile int64_t&)location;
while ( __TBB_machine_cmpswp8((volatile void*)&location, (int64_t)v
alue, result) != result )
result = (volatile int64_t&)location;
}
};
#endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
#if __TBB_USE_GENERIC_RELAXED_LOAD_STORE
// Relaxed operations add volatile qualifier to prevent compiler from optim
izing them out.
/** Volatile should not incur any additional cost on IA32, Intel64, and Spa
rc TSO
architectures. However on architectures with weak memory ordering compi
ler may
generate code with acquire/release semantics for operations on volatile
data. **/
template <typename T, size_t S>
struct machine_load_store_relaxed {
static inline T load ( const volatile T& location ) {
return location;
}
static inline void store ( volatile T& location, T value ) {
location = value;
}
};
#if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
template <typename T>
struct machine_load_store_relaxed<T,8> {
static inline T load ( const volatile T& location ) {
return (T)__TBB_machine_load8( (const volatile void*)&location );
}
static inline void store ( volatile T& location, T value ) {
__TBB_machine_store8( (volatile void*)&location, (int64_t)value );
}
};
#endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
#endif /* __TBB_USE_GENERIC_RELAXED_LOAD_STORE */
template<typename T>
inline T __TBB_load_with_acquire(const volatile T &location) {
return machine_load_store<T,sizeof(T)>::load_with_acquire( location );
}
template<typename T, typename V>
inline void __TBB_store_with_release(volatile T& location, V value) {
machine_load_store<T,sizeof(T)>::store_with_release( location, T(value)
);
}
//! Overload that exists solely to avoid /Wp64 warnings.
inline void __TBB_store_with_release(volatile size_t& location, size_t valu
e) {
machine_load_store<size_t,sizeof(size_t)>::store_with_release( location
, value );
}
template<typename T>
inline T __TBB_load_full_fence(const volatile T &location) {
return machine_load_store_seq_cst<T,sizeof(T)>::load( location );
}
template<typename T, typename V>
inline void __TBB_store_full_fence(volatile T& location, V value) {
machine_load_store_seq_cst<T,sizeof(T)>::store( location, T(value) );
}
//! Overload that exists solely to avoid /Wp64 warnings.
inline void __TBB_store_full_fence(volatile size_t& location, size_t value)
{
machine_load_store_seq_cst<size_t,sizeof(size_t)>::store( location, val
ue );
}
template<typename T>
inline T __TBB_load_relaxed (const volatile T& location) {
return machine_load_store_relaxed<T,sizeof(T)>::load( const_cast<T&>(lo
cation) );
}
template<typename T, typename V>
inline void __TBB_store_relaxed ( volatile T& location, V value ) {
machine_load_store_relaxed<T,sizeof(T)>::store( const_cast<T&>(location
), T(value) );
}
//! Overload that exists solely to avoid /Wp64 warnings.
inline void __TBB_store_relaxed ( volatile size_t& location, size_t value )
{
machine_load_store_relaxed<size_t,sizeof(size_t)>::store( const_cast<si
ze_t&>(location), value );
}
// Macro __TBB_TypeWithAlignmentAtLeastAsStrict(T) should be a type with al ignment at least as // Macro __TBB_TypeWithAlignmentAtLeastAsStrict(T) should be a type with al ignment at least as
// strict as type T. Type type should have a trivial default constructor a nd destructor, so that // strict as type T. The type should have a trivial default constructor an d destructor, so that
// arrays of that type can be declared without initializers. // arrays of that type can be declared without initializers.
// It is correct (but perhaps a waste of space) if __TBB_TypeWithAlignmentA tLeastAsStrict(T) expands // It is correct (but perhaps a waste of space) if __TBB_TypeWithAlignmentA tLeastAsStrict(T) expands
// to a type bigger than T. // to a type bigger than T.
// The default definition here works on machines where integers are natural ly aligned and the // The default definition here works on machines where integers are natural ly aligned and the
// strictest alignment is 16. // strictest alignment is 64.
#ifndef __TBB_TypeWithAlignmentAtLeastAsStrict #ifndef __TBB_TypeWithAlignmentAtLeastAsStrict
#if __GNUC__ || __SUNPRO_CC #if __TBB_ATTRIBUTE_ALIGNED_PRESENT
struct __TBB_machine_type_with_strictest_alignment {
int member[4]; #define __TBB_DefineTypeWithAlignment(PowerOf2) \
} __attribute__((aligned(16))); struct __TBB_machine_type_with_alignment_##PowerOf2 { \
#elif _MSC_VER uint32_t member[PowerOf2/sizeof(uint32_t)]; \
__declspec(align(16)) struct __TBB_machine_type_with_strictest_alignment { } __attribute__((aligned(PowerOf2)));
int member[4]; #define __TBB_alignof(T) __alignof__(T)
#elif __TBB_DECLSPEC_ALIGN_PRESENT
#define __TBB_DefineTypeWithAlignment(PowerOf2) \
__declspec(align(PowerOf2)) \
struct __TBB_machine_type_with_alignment_##PowerOf2 { \
uint32_t member[PowerOf2/sizeof(uint32_t)]; \
}; };
#else #define __TBB_alignof(T) __alignof(T)
#error Must define __TBB_TypeWithAlignmentAtLeastAsStrict(T) or __TBB_machi
ne_type_with_strictest_alignment #else /* A compiler with unknown syntax for data alignment */
#error Must define __TBB_TypeWithAlignmentAtLeastAsStrict(T)
#endif #endif
template<size_t N> struct type_with_alignment {__TBB_machine_type_with_stri /* Now declare types aligned to useful powers of two */
ctest_alignment member;}; // TODO: Is __TBB_DefineTypeWithAlignment(8) needed on 32 bit platforms?
__TBB_DefineTypeWithAlignment(16)
__TBB_DefineTypeWithAlignment(32)
__TBB_DefineTypeWithAlignment(64)
typedef __TBB_machine_type_with_alignment_64 __TBB_machine_type_with_strict
est_alignment;
// Primary template is a declaration of incomplete type so that it fails wi
th unknown alignments
template<size_t N> struct type_with_alignment;
// Specializations for allowed alignments
template<> struct type_with_alignment<1> { char member; }; template<> struct type_with_alignment<1> { char member; };
template<> struct type_with_alignment<2> { uint16_t member; }; template<> struct type_with_alignment<2> { uint16_t member; };
template<> struct type_with_alignment<4> { uint32_t member; }; template<> struct type_with_alignment<4> { uint32_t member; };
template<> struct type_with_alignment<8> { uint64_t member; }; template<> struct type_with_alignment<8> { uint64_t member; };
template<> struct type_with_alignment<16> {__TBB_machine_type_with_alignmen
t_16 member; };
template<> struct type_with_alignment<32> {__TBB_machine_type_with_alignmen
t_32 member; };
template<> struct type_with_alignment<64> {__TBB_machine_type_with_alignmen
t_64 member; };
#if _MSC_VER||defined(__GNUC__)&&__GNUC__==3 && __GNUC_MINOR__<=2 #if __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN
//! Work around for bug in GNU 3.2 and MSVC compilers. //! Work around for bug in GNU 3.2 and MSVC compilers.
/** Bug is that compiler sometimes returns 0 for __alignof(T) when T has no t yet been instantiated. /** Bug is that compiler sometimes returns 0 for __alignof(T) when T has no t yet been instantiated.
The work-around forces instantiation by forcing computation of sizeof(T ) before __alignof(T). */ The work-around forces instantiation by forcing computation of sizeof(T ) before __alignof(T). */
template<size_t Size, typename T> template<size_t Size, typename T>
struct work_around_alignment_bug { struct work_around_alignment_bug {
#if _MSC_VER static const size_t alignment = __TBB_alignof(T);
static const size_t alignment = __alignof(T);
#else
static const size_t alignment = __alignof__(T);
#endif
}; };
#define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_ alignment<tbb::internal::work_around_alignment_bug<sizeof(T),T>::alignment> #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_ alignment<tbb::internal::work_around_alignment_bug<sizeof(T),T>::alignment>
#elif __GNUC__ || __SUNPRO_CC
#define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_
alignment<__alignof__(T)>
#else #else
#define __TBB_TypeWithAlignmentAtLeastAsStrict(T) __TBB_machine_type_with_s #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_
trictest_alignment alignment<__TBB_alignof(T)>
#endif #endif /* __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN */
#endif /* ____TBB_TypeWithAlignmentAtLeastAsStrict */
#endif /* __TBB_TypeWithAlignmentAtLeastAsStrict */
// Template class here is to avoid instantiation of the static data for mod ules that don't use it // Template class here is to avoid instantiation of the static data for mod ules that don't use it
template<typename T> template<typename T>
struct reverse { struct reverse {
static const T byte_table[256]; static const T byte_table[256];
}; };
// An efficient implementation of the reverse function utilizes a 2^8 looku p table holding the bit-reversed // An efficient implementation of the reverse function utilizes a 2^8 looku p table holding the bit-reversed
// values of [0..2^8 - 1]. Those values can also be computed on the fly at a slightly higher cost. // values of [0..2^8 - 1]. Those values can also be computed on the fly at a slightly higher cost.
template<typename T> template<typename T>
const T reverse<T>::byte_table[256] = { const T reverse<T>::byte_table[256] = {
skipping to change at line 354 skipping to change at line 733
0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB, 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7, 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
}; };
} // namespace internal } // namespace internal
} // namespace tbb } // namespace tbb
#ifndef __TBB_CompareAndSwap1 // Preserving access to legacy APIs
#define __TBB_CompareAndSwap1 tbb::internal::__TBB_CompareAndSwapGeneric<1, using tbb::internal::__TBB_load_with_acquire;
uint8_t> using tbb::internal::__TBB_store_with_release;
#endif
// Mapping historically used names to the ones expected by atomic_load_stor
#ifndef __TBB_CompareAndSwap2 e_traits
#define __TBB_CompareAndSwap2 tbb::internal::__TBB_CompareAndSwapGeneric<2, #define __TBB_load_acquire __TBB_load_with_acquire
uint16_t> #define __TBB_store_release __TBB_store_with_release
#endif
#ifndef __TBB_CompareAndSwapW
#define __TBB_CompareAndSwapW tbb::internal::__TBB_CompareAndSwapGeneric<si
zeof(ptrdiff_t),ptrdiff_t>
#endif
#ifndef __TBB_FetchAndAdd1
#define __TBB_FetchAndAdd1 tbb::internal::__TBB_FetchAndAddGeneric<1,uint8_
t>
#endif
#ifndef __TBB_FetchAndAdd2
#define __TBB_FetchAndAdd2 tbb::internal::__TBB_FetchAndAddGeneric<2,uint16
_t>
#endif
#ifndef __TBB_FetchAndAdd4
#define __TBB_FetchAndAdd4 tbb::internal::__TBB_FetchAndAddGeneric<4,uint32
_t>
#endif
#ifndef __TBB_FetchAndAdd8
#define __TBB_FetchAndAdd8 tbb::internal::__TBB_FetchAndAddGeneric<8,uint64
_t>
#endif
#ifndef __TBB_FetchAndAddW
#define __TBB_FetchAndAddW tbb::internal::__TBB_FetchAndAddGeneric<sizeof(p
trdiff_t),ptrdiff_t>
#endif
#ifndef __TBB_FetchAndStore1
#define __TBB_FetchAndStore1 tbb::internal::__TBB_FetchAndStoreGeneric<1,ui
nt8_t>
#endif
#ifndef __TBB_FetchAndStore2
#define __TBB_FetchAndStore2 tbb::internal::__TBB_FetchAndStoreGeneric<2,ui
nt16_t>
#endif
#ifndef __TBB_FetchAndStore4
#define __TBB_FetchAndStore4 tbb::internal::__TBB_FetchAndStoreGeneric<4,ui
nt32_t>
#endif
#ifndef __TBB_FetchAndStore8
#define __TBB_FetchAndStore8 tbb::internal::__TBB_FetchAndStoreGeneric<8,ui
nt64_t>
#endif
#ifndef __TBB_FetchAndStoreW
#define __TBB_FetchAndStoreW tbb::internal::__TBB_FetchAndStoreGeneric<size
of(ptrdiff_t),ptrdiff_t>
#endif
#if __TBB_DECL_FENCED_ATOMICS
#ifndef __TBB_CompareAndSwap1__TBB_full_fence
#define __TBB_CompareAndSwap1__TBB_full_fence __TBB_CompareAndSwap1
#endif
#ifndef __TBB_CompareAndSwap1acquire
#define __TBB_CompareAndSwap1acquire __TBB_CompareAndSwap1__TBB_full_fence
#endif
#ifndef __TBB_CompareAndSwap1release
#define __TBB_CompareAndSwap1release __TBB_CompareAndSwap1__TBB_full_fence
#endif
#ifndef __TBB_CompareAndSwap2__TBB_full_fence
#define __TBB_CompareAndSwap2__TBB_full_fence __TBB_CompareAndSwap2
#endif
#ifndef __TBB_CompareAndSwap2acquire
#define __TBB_CompareAndSwap2acquire __TBB_CompareAndSwap2__TBB_full_fence
#endif
#ifndef __TBB_CompareAndSwap2release
#define __TBB_CompareAndSwap2release __TBB_CompareAndSwap2__TBB_full_fence
#endif
#ifndef __TBB_CompareAndSwap4__TBB_full_fence
#define __TBB_CompareAndSwap4__TBB_full_fence __TBB_CompareAndSwap4
#endif
#ifndef __TBB_CompareAndSwap4acquire
#define __TBB_CompareAndSwap4acquire __TBB_CompareAndSwap4__TBB_full_fence
#endif
#ifndef __TBB_CompareAndSwap4release
#define __TBB_CompareAndSwap4release __TBB_CompareAndSwap4__TBB_full_fence
#endif
#ifndef __TBB_CompareAndSwap8__TBB_full_fence
#define __TBB_CompareAndSwap8__TBB_full_fence __TBB_CompareAndSwap8
#endif
#ifndef __TBB_CompareAndSwap8acquire
#define __TBB_CompareAndSwap8acquire __TBB_CompareAndSwap8__TBB_full_fence
#endif
#ifndef __TBB_CompareAndSwap8release
#define __TBB_CompareAndSwap8release __TBB_CompareAndSwap8__TBB_full_fence
#endif
#ifndef __TBB_FetchAndAdd1__TBB_full_fence
#define __TBB_FetchAndAdd1__TBB_full_fence __TBB_FetchAndAdd1
#endif
#ifndef __TBB_FetchAndAdd1acquire
#define __TBB_FetchAndAdd1acquire __TBB_FetchAndAdd1__TBB_full_fence
#endif
#ifndef __TBB_FetchAndAdd1release
#define __TBB_FetchAndAdd1release __TBB_FetchAndAdd1__TBB_full_fence
#endif
#ifndef __TBB_FetchAndAdd2__TBB_full_fence
#define __TBB_FetchAndAdd2__TBB_full_fence __TBB_FetchAndAdd2
#endif
#ifndef __TBB_FetchAndAdd2acquire
#define __TBB_FetchAndAdd2acquire __TBB_FetchAndAdd2__TBB_full_fence
#endif
#ifndef __TBB_FetchAndAdd2release
#define __TBB_FetchAndAdd2release __TBB_FetchAndAdd2__TBB_full_fence
#endif
#ifndef __TBB_FetchAndAdd4__TBB_full_fence
#define __TBB_FetchAndAdd4__TBB_full_fence __TBB_FetchAndAdd4
#endif
#ifndef __TBB_FetchAndAdd4acquire
#define __TBB_FetchAndAdd4acquire __TBB_FetchAndAdd4__TBB_full_fence
#endif
#ifndef __TBB_FetchAndAdd4release
#define __TBB_FetchAndAdd4release __TBB_FetchAndAdd4__TBB_full_fence
#endif
#ifndef __TBB_FetchAndAdd8__TBB_full_fence
#define __TBB_FetchAndAdd8__TBB_full_fence __TBB_FetchAndAdd8
#endif
#ifndef __TBB_FetchAndAdd8acquire
#define __TBB_FetchAndAdd8acquire __TBB_FetchAndAdd8__TBB_full_fence
#endif
#ifndef __TBB_FetchAndAdd8release
#define __TBB_FetchAndAdd8release __TBB_FetchAndAdd8__TBB_full_fence
#endif
#ifndef __TBB_FetchAndStore1__TBB_full_fence
#define __TBB_FetchAndStore1__TBB_full_fence __TBB_FetchAndStore1
#endif
#ifndef __TBB_FetchAndStore1acquire
#define __TBB_FetchAndStore1acquire __TBB_FetchAndStore1__TBB_full_fence
#endif
#ifndef __TBB_FetchAndStore1release
#define __TBB_FetchAndStore1release __TBB_FetchAndStore1__TBB_full_fence
#endif
#ifndef __TBB_FetchAndStore2__TBB_full_fence
#define __TBB_FetchAndStore2__TBB_full_fence __TBB_FetchAndStore2
#endif
#ifndef __TBB_FetchAndStore2acquire
#define __TBB_FetchAndStore2acquire __TBB_FetchAndStore2__TBB_full_fence
#endif
#ifndef __TBB_FetchAndStore2release
#define __TBB_FetchAndStore2release __TBB_FetchAndStore2__TBB_full_fence
#endif
#ifndef __TBB_FetchAndStore4__TBB_full_fence
#define __TBB_FetchAndStore4__TBB_full_fence __TBB_FetchAndStore4
#endif
#ifndef __TBB_FetchAndStore4acquire
#define __TBB_FetchAndStore4acquire __TBB_FetchAndStore4__TBB_full_fence
#endif
#ifndef __TBB_FetchAndStore4release
#define __TBB_FetchAndStore4release __TBB_FetchAndStore4__TBB_full_fence
#endif
#ifndef __TBB_FetchAndStore8__TBB_full_fence
#define __TBB_FetchAndStore8__TBB_full_fence __TBB_FetchAndStore8
#endif
#ifndef __TBB_FetchAndStore8acquire
#define __TBB_FetchAndStore8acquire __TBB_FetchAndStore8__TBB_full_fence
#endif
#ifndef __TBB_FetchAndStore8release
#define __TBB_FetchAndStore8release __TBB_FetchAndStore8__TBB_full_fence
#endif
#endif // __TBB_DECL_FENCED_ATOMICS
// Special atomic functions
#ifndef __TBB_FetchAndAddWrelease
#define __TBB_FetchAndAddWrelease __TBB_FetchAndAddW
#endif
#ifndef __TBB_FetchAndIncrementWacquire
#define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAddW(P,1)
#endif
#ifndef __TBB_FetchAndDecrementWrelease
#define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAddW(P,(-1))
#endif
#if __TBB_WORDSIZE==4
// On 32-bit platforms, "atomic.h" requires definition of __TBB_Store8 and
__TBB_Load8
#ifndef __TBB_Store8
inline void __TBB_Store8 (volatile void *ptr, int64_t value) {
tbb::internal::atomic_backoff b;
for(;;) {
int64_t result = *(int64_t *)ptr;
if( __TBB_CompareAndSwap8(ptr,value,result)==result ) break;
b.pause();
}
}
#endif
#ifndef __TBB_Load8
inline int64_t __TBB_Load8 (const volatile void *ptr) {
int64_t result = *(int64_t *)ptr;
result = __TBB_CompareAndSwap8((volatile void *)ptr,result,result);
return result;
}
#endif
#endif /* __TBB_WORDSIZE==4 */
#ifndef __TBB_Log2 #ifndef __TBB_Log2
inline intptr_t __TBB_Log2( uintptr_t x ) { inline intptr_t __TBB_Log2( uintptr_t x ) {
if( x==0 ) return -1; if( x==0 ) return -1;
intptr_t result = 0; intptr_t result = 0;
uintptr_t tmp; uintptr_t tmp;
#if __TBB_WORDSIZE>=8 #if __TBB_WORDSIZE>=8
if( (tmp = x>>32) ) { x=tmp; result += 32; } if( (tmp = x>>32) ) { x=tmp; result += 32; }
#endif #endif
if( (tmp = x>>16) ) { x=tmp; result += 16; } if( (tmp = x>>16) ) { x=tmp; result += 16; }
skipping to change at line 605 skipping to change at line 781
tbb::internal::atomic_backoff b; tbb::internal::atomic_backoff b;
for(;;) { for(;;) {
uintptr_t tmp = *(volatile uintptr_t *)operand; uintptr_t tmp = *(volatile uintptr_t *)operand;
uintptr_t result = __TBB_CompareAndSwapW(operand, tmp&addend, tmp); uintptr_t result = __TBB_CompareAndSwapW(operand, tmp&addend, tmp);
if( result==tmp ) break; if( result==tmp ) break;
b.pause(); b.pause();
} }
} }
#endif #endif
#ifndef __TBB_Flag
typedef unsigned char __TBB_Flag;
#endif
typedef __TBB_atomic __TBB_Flag __TBB_atomic_flag;
#ifndef __TBB_TryLockByte #ifndef __TBB_TryLockByte
inline bool __TBB_TryLockByte( unsigned char &flag ) { inline bool __TBB_TryLockByte( __TBB_atomic_flag &flag ) {
return __TBB_CompareAndSwap1(&flag,1,0)==0; return __TBB_machine_cmpswp1(&flag,1,0)==0;
} }
#endif #endif
#ifndef __TBB_LockByte #ifndef __TBB_LockByte
inline uintptr_t __TBB_LockByte( unsigned char& flag ) { inline __TBB_Flag __TBB_LockByte( __TBB_atomic_flag& flag ) {
if ( !__TBB_TryLockByte(flag) ) { if ( !__TBB_TryLockByte(flag) ) {
tbb::internal::atomic_backoff b; tbb::internal::atomic_backoff b;
do { do {
b.pause(); b.pause();
} while ( !__TBB_TryLockByte(flag) ); } while ( !__TBB_TryLockByte(flag) );
} }
return 0; return 0;
} }
#endif #endif
#define __TBB_UnlockByte __TBB_store_with_release
#ifndef __TBB_ReverseByte #ifndef __TBB_ReverseByte
inline unsigned char __TBB_ReverseByte(unsigned char src) { inline unsigned char __TBB_ReverseByte(unsigned char src) {
return tbb::internal::reverse<unsigned char>::byte_table[src]; return tbb::internal::reverse<unsigned char>::byte_table[src];
} }
#endif #endif
template<typename T> template<typename T>
T __TBB_ReverseBits(T src) T __TBB_ReverseBits(T src) {
{
T dst; T dst;
unsigned char *original = (unsigned char *) &src; unsigned char *original = (unsigned char *) &src;
unsigned char *reversed = (unsigned char *) &dst; unsigned char *reversed = (unsigned char *) &dst;
for( int i = sizeof(T)-1; i >= 0; i-- ) for( int i = sizeof(T)-1; i >= 0; i-- )
reversed[i] = __TBB_ReverseByte( original[sizeof(T)-i-1] ); reversed[i] = __TBB_ReverseByte( original[sizeof(T)-i-1] );
return dst; return dst;
} }
 End of changes. 42 change blocks. 
335 lines changed or deleted 584 lines changed or added


 tbb_profiling.h   tbb_profiling.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 105 skipping to change at line 105
} }
#else /* !WIN */ #else /* !WIN */
#define __TBB_DEFINE_PROFILING_SET_NAME(sync_object_type) \ #define __TBB_DEFINE_PROFILING_SET_NAME(sync_object_type) \
namespace profiling { \ namespace profiling { \
inline void set_name( sync_object_type&, const char* ) {} \ inline void set_name( sync_object_type&, const char* ) {} \
} }
#endif /* !WIN */ #endif /* !WIN */
#endif /* no tools support */ #endif /* no tools support */
#include "atomic.h"
// Need these to work regardless of tools support
namespace tbb {
namespace internal {
enum notify_type {prepare=0, cancel, acquired, releasing};
const uintptr_t NUM_NOTIFY_TYPES = 4; // set to # elements in enum
above
void __TBB_EXPORTED_FUNC call_itt_notify_v5(int t, void *ptr);
void __TBB_EXPORTED_FUNC itt_store_pointer_with_release_v3(void *ds
t, void *src);
void* __TBB_EXPORTED_FUNC itt_load_pointer_with_acquire_v3(const vo
id *src);
void* __TBB_EXPORTED_FUNC itt_load_pointer_v3( const void* src );
// two template arguments are to workaround /Wp64 warning with tbb:
:atomic specialized for unsigned type
template <typename T, typename U>
inline void itt_store_word_with_release(tbb::atomic<T>& dst, U src)
{
#if TBB_USE_THREADING_TOOLS
// This assertion should be replaced with static_assert
__TBB_ASSERT(sizeof(T) == sizeof(void *), "Type must be word-si
zed.");
itt_store_pointer_with_release_v3(&dst, (void *)uintptr_t(src))
;
#else
dst = src;
#endif // TBB_USE_THREADING_TOOLS
}
template <typename T>
inline T itt_load_word_with_acquire(const tbb::atomic<T>& src) {
#if TBB_USE_THREADING_TOOLS
// This assertion should be replaced with static_assert
__TBB_ASSERT(sizeof(T) == sizeof(void *), "Type must be word-si
zed.");
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
// Workaround for overzealous compiler warnings
#pragma warning (push)
#pragma warning (disable: 4311)
#endif
T result = (T)itt_load_pointer_with_acquire_v3(&src);
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
#pragma warning (pop)
#endif
return result;
#else
return src;
#endif // TBB_USE_THREADING_TOOLS
}
template <typename T>
inline void itt_store_word_with_release(T& dst, T src) {
#if TBB_USE_THREADING_TOOLS
// This assertion should be replaced with static_assert
__TBB_ASSERT(sizeof(T) == sizeof(void *), "Type must be word-si
zed.");
itt_store_pointer_with_release_v3(&dst, (void *)src);
#else
__TBB_store_with_release(dst, src);
#endif // TBB_USE_THREADING_TOOLS
}
template <typename T>
inline T itt_load_word_with_acquire(const T& src) {
#if TBB_USE_THREADING_TOOLS
// This assertion should be replaced with static_assert
__TBB_ASSERT(sizeof(T) == sizeof(void *), "Type must be word-si
zed");
return (T)itt_load_pointer_with_acquire_v3(&src);
#else
return __TBB_load_with_acquire(src);
#endif // TBB_USE_THREADING_TOOLS
}
template <typename T>
inline void itt_hide_store_word(T& dst, T src) {
#if TBB_USE_THREADING_TOOLS
// This assertion should be replaced with static_assert
__TBB_ASSERT(sizeof(T) == sizeof(void *), "Type must be word-si
zed");
itt_store_pointer_with_release_v3(&dst, (void *)src);
#else
dst = src;
#endif
}
template <typename T>
inline T itt_hide_load_word(const T& src) {
#if TBB_USE_THREADING_TOOLS
// This assertion should be replaced with static_assert
__TBB_ASSERT(sizeof(T) == sizeof(void *), "Type must be word-si
zed.");
return (T)itt_load_pointer_v3(&src);
#else
return src;
#endif
}
#if TBB_USE_THREADING_TOOLS
inline void call_itt_notify(notify_type t, void *ptr) {
call_itt_notify_v5((int)t, ptr);
}
#else
inline void call_itt_notify(notify_type /*t*/, void * /*ptr*/) {}
#endif // TBB_USE_THREADING_TOOLS
} // namespace internal
} // namespace tbb
#endif /* __TBB_profiling_H */ #endif /* __TBB_profiling_H */
 End of changes. 2 change blocks. 
1 lines changed or deleted 113 lines changed or added


 tbb_stddef.h   tbb_stddef.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 33 skipping to change at line 33
file does not by itself cause the resulting executable to be covered by file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered b y invalidate any other reasons why the executable file might be covered b y
the GNU General Public License. the GNU General Public License.
*/ */
#ifndef __TBB_tbb_stddef_H #ifndef __TBB_tbb_stddef_H
#define __TBB_tbb_stddef_H #define __TBB_tbb_stddef_H
// Marketing-driven product version // Marketing-driven product version
#define TBB_VERSION_MAJOR 3 #define TBB_VERSION_MAJOR 4
#define TBB_VERSION_MINOR 0 #define TBB_VERSION_MINOR 0
// Engineering-focused interface version // Engineering-focused interface version
#define TBB_INTERFACE_VERSION 5000 #define TBB_INTERFACE_VERSION 6000
#define TBB_INTERFACE_VERSION_MAJOR TBB_INTERFACE_VERSION/1000 #define TBB_INTERFACE_VERSION_MAJOR TBB_INTERFACE_VERSION/1000
// The oldest major interface version still supported // The oldest major interface version still supported
// To be used in SONAME, manifests, etc. // To be used in SONAME, manifests, etc.
#define TBB_COMPATIBLE_INTERFACE_VERSION 2 #define TBB_COMPATIBLE_INTERFACE_VERSION 2
#define __TBB_STRING_AUX(x) #x #define __TBB_STRING_AUX(x) #x
#define __TBB_STRING(x) __TBB_STRING_AUX(x) #define __TBB_STRING(x) __TBB_STRING_AUX(x)
// We do not need defines below for resource processing on windows // We do not need defines below for resource processing on windows
skipping to change at line 104 skipping to change at line 104
- \subpage range_req - \subpage range_req
- \subpage parallel_do_body_req - \subpage parallel_do_body_req
- \subpage parallel_for_body_req - \subpage parallel_for_body_req
- \subpage parallel_reduce_body_req - \subpage parallel_reduce_body_req
- \subpage parallel_scan_body_req - \subpage parallel_scan_body_req
- \subpage parallel_sort_iter_req - \subpage parallel_sort_iter_req
**/ **/
// Define preprocessor symbols used to determine architecture // Define preprocessor symbols used to determine architecture
#if _WIN32||_WIN64 #if _WIN32||_WIN64
# if defined(_M_AMD64) # if defined(_M_X64)||defined(__x86_64__) // the latter for MinGW suppor t
# define __TBB_x86_64 1 # define __TBB_x86_64 1
# elif defined(_M_IA64) # elif defined(_M_IA64)
# define __TBB_ipf 1 # define __TBB_ipf 1
# elif defined(_M_IX86)||defined(__i386__) // the latter for MinGW suppor t # elif defined(_M_IX86)||defined(__i386__) // the latter for MinGW suppor t
# define __TBB_x86_32 1 # define __TBB_x86_32 1
# endif # endif
#else /* Assume generic Unix */ #else /* Assume generic Unix */
# if !__linux__ && !__APPLE__ # if !__linux__ && !__APPLE__
# define __TBB_generic_os 1 # define __TBB_generic_os 1
# endif # endif
skipping to change at line 126 skipping to change at line 126
# define __TBB_x86_64 1 # define __TBB_x86_64 1
# elif __ia64__ # elif __ia64__
# define __TBB_ipf 1 # define __TBB_ipf 1
# elif __i386__||__i386 // __i386 is for Sun OS # elif __i386__||__i386 // __i386 is for Sun OS
# define __TBB_x86_32 1 # define __TBB_x86_32 1
# else # else
# define __TBB_generic_arch 1 # define __TBB_generic_arch 1
# endif # endif
#endif #endif
#if _MSC_VER // tbb_config.h should be included the first since it contains macro defini
// define the parts of stdint.h that are needed, but put them inside tbb::i tions used in other headers
nternal #include "tbb_config.h"
namespace tbb {
namespace internal {
typedef __int8 int8_t;
typedef __int16 int16_t;
typedef __int32 int32_t;
typedef __int64 int64_t;
typedef unsigned __int8 uint8_t;
typedef unsigned __int16 uint16_t;
typedef unsigned __int32 uint32_t;
typedef unsigned __int64 uint64_t;
} // namespace internal
} // namespace tbb
#else
#include <stdint.h>
#endif /* _MSC_VER */
#if _MSC_VER >=1400 #if _MSC_VER >=1400
#define __TBB_EXPORTED_FUNC __cdecl #define __TBB_EXPORTED_FUNC __cdecl
#define __TBB_EXPORTED_METHOD __thiscall #define __TBB_EXPORTED_METHOD __thiscall
#else #else
#define __TBB_EXPORTED_FUNC #define __TBB_EXPORTED_FUNC
#define __TBB_EXPORTED_METHOD #define __TBB_EXPORTED_METHOD
#endif #endif
#include <cstddef> /* Need size_t and ptrdiff_t (the latter on Windows only) from here. */ #include <cstddef> /* Need size_t and ptrdiff_t */
#if _MSC_VER #if _MSC_VER
#define __TBB_tbb_windef_H #define __TBB_tbb_windef_H
#include "_tbb_windef.h" #include "internal/_tbb_windef.h"
#undef __TBB_tbb_windef_H #undef __TBB_tbb_windef_H
#else
#include <stdint.h>
#endif #endif
#include "tbb_config.h"
//! The namespace tbb contains all components of the library. //! The namespace tbb contains all components of the library.
namespace tbb { namespace tbb {
#if _MSC_VER
namespace internal {
typedef __int8 int8_t;
typedef __int16 int16_t;
typedef __int32 int32_t;
typedef __int64 int64_t;
typedef unsigned __int8 uint8_t;
typedef unsigned __int16 uint16_t;
typedef unsigned __int32 uint32_t;
typedef unsigned __int64 uint64_t;
} // namespace internal
#else /* Posix */
namespace internal {
using ::int8_t;
using ::int16_t;
using ::int32_t;
using ::int64_t;
using ::uint8_t;
using ::uint16_t;
using ::uint32_t;
using ::uint64_t;
} // namespace internal
#endif /* Posix */
using std::size_t;
using std::ptrdiff_t;
//! Type for an assertion handler //! Type for an assertion handler
typedef void(*assertion_handler_type)( const char* filename, int line, const char* expression, const char * comment ); typedef void(*assertion_handler_type)( const char* filename, int line, const char* expression, const char * comment );
#if TBB_USE_ASSERT #if TBB_USE_ASSERT
//! Assert that x is true. //! Assert that x is true.
/** If x is false, print assertion failure message. /** If x is false, print assertion failure message.
If the comment argument is not NULL, it is printed as part of the failu If the comment argument is not NULL, it is printed as part of the f
re message. ailure message.
The comment argument has no other effect. */ The comment argument has no other effect. */
#define __TBB_ASSERT(predicate,message) ((predicate)?((void)0):tbb::asserti #define __TBB_ASSERT(predicate,message) ((predicate)?((void)0):tbb::ass
on_failure(__FILE__,__LINE__,#predicate,message)) ertion_failure(__FILE__,__LINE__,#predicate,message))
#define __TBB_ASSERT_EX __TBB_ASSERT #define __TBB_ASSERT_EX __TBB_ASSERT
//! Set assertion handler and return previous value of it. //! Set assertion handler and return previous value of it.
assertion_handler_type __TBB_EXPORTED_FUNC set_assertion_handler( asser tion_handler_type new_handler ); assertion_handler_type __TBB_EXPORTED_FUNC set_assertion_handler( asser tion_handler_type new_handler );
//! Process an assertion failure. //! Process an assertion failure.
/** Normally called from __TBB_ASSERT macro. /** Normally called from __TBB_ASSERT macro.
If assertion handler is null, print message for assertion failure a nd abort. If assertion handler is null, print message for assertion failure a nd abort.
Otherwise call the assertion handler. */ Otherwise call the assertion handler. */
void __TBB_EXPORTED_FUNC assertion_failure( const char* filename, int l ine, const char* expression, const char* comment ); void __TBB_EXPORTED_FUNC assertion_failure( const char* filename, int l ine, const char* expression, const char* comment );
#else #else /* !TBB_USE_ASSERT */
//! No-op version of __TBB_ASSERT. //! No-op version of __TBB_ASSERT.
#define __TBB_ASSERT(predicate,comment) ((void)0) #define __TBB_ASSERT(predicate,comment) ((void)0)
//! "Extended" version is useful to suppress warnings if a variable is only //! "Extended" version is useful to suppress warnings if a variable is
used with an assert only used with an assert
#define __TBB_ASSERT_EX(predicate,comment) ((void)(1 && (predicate))) #define __TBB_ASSERT_EX(predicate,comment) ((void)(1 && (predicate)))
#endif /* TBB_USE_ASSERT */ #endif /* !TBB_USE_ASSERT */
//! The function returns the interface version of the TBB shared library be ing used. //! The function returns the interface version of the TBB shared library be ing used.
/** /**
* The version it returns is determined at runtime, not at compile/link tim e. * The version it returns is determined at runtime, not at compile/link tim e.
* So it can be different than the value of TBB_INTERFACE_VERSION obtained at compile time. * So it can be different than the value of TBB_INTERFACE_VERSION obtained at compile time.
*/ */
extern "C" int __TBB_EXPORTED_FUNC TBB_runtime_interface_version(); extern "C" int __TBB_EXPORTED_FUNC TBB_runtime_interface_version();
//! Dummy type that distinguishes splitting constructor from copy construct or. //! Dummy type that distinguishes splitting constructor from copy construct or.
/** /**
skipping to change at line 215 skipping to change at line 228
*/ */
class split { class split {
}; };
/** /**
* @cond INTERNAL * @cond INTERNAL
* @brief Identifiers declared inside namespace internal should never be us ed directly by client code. * @brief Identifiers declared inside namespace internal should never be us ed directly by client code.
*/ */
namespace internal { namespace internal {
using std::size_t;
//! Compile-time constant that is upper bound on cache line/sector size. //! Compile-time constant that is upper bound on cache line/sector size.
/** It should be used only in situations where having a compile-time upper /** It should be used only in situations where having a compile-time upper
bound is more useful than a run-time exact answer. bound is more useful than a run-time exact answer.
@ingroup memory_allocation */ @ingroup memory_allocation */
const size_t NFS_MaxLineSize = 128; const size_t NFS_MaxLineSize = 128;
/** Label for data that may be accessed from different threads, and that ma
y eventually become wrapped
in a formal atomic type.
Note that no problems have yet been observed relating to the definition
currently being empty,
even if at least "volatile" would seem to be in order to avoid data som
etimes temporarily hiding
in a register (although "volatile" as a "poor man's atomic" lacks sever
al other features of a proper
atomic, some of which are now provided instead through specialized func
tions).
Note that usage is intentionally compatible with a definition as qualif
ier "volatile",
both as a way to have the compiler help enforce use of the label and to
quickly rule out
one potential issue.
Note however that, with some architecture/compiler combinations, e.g. o
n Itanium, "volatile"
also has non-portable memory semantics that are needlessly expensive fo
r "relaxed" operations.
Note that this must only be applied to data that will not change bit pa
tterns when cast to/from
an integral type of the same length; tbb::atomic must be used instead f
or, e.g., floating-point types.
TODO: apply wherever relevant **/
#define __TBB_atomic // intentionally empty, see above
template<class T, int S> template<class T, int S>
struct padded_base : T { struct padded_base : T {
char pad[NFS_MaxLineSize - sizeof(T) % NFS_MaxLineSize]; char pad[NFS_MaxLineSize - sizeof(T) % NFS_MaxLineSize];
}; };
template<class T> struct padded_base<T, 0> : T {}; template<class T> struct padded_base<T, 0> : T {};
//! Pads type T to fill out to a multiple of cache line size. //! Pads type T to fill out to a multiple of cache line size.
template<class T> template<class T>
struct padded : padded_base<T, sizeof(T)> {}; struct padded : padded_base<T, sizeof(T)> {};
skipping to change at line 263 skipping to change at line 295
#define __TBB_TRY #define __TBB_TRY
#define __TBB_CATCH(e) if ( tbb::internal::__TBB_false() ) #define __TBB_CATCH(e) if ( tbb::internal::__TBB_false() )
#define __TBB_THROW(e) ((void)0) #define __TBB_THROW(e) ((void)0)
#define __TBB_RETHROW() ((void)0) #define __TBB_RETHROW() ((void)0)
#endif /* !TBB_USE_EXCEPTIONS */ #endif /* !TBB_USE_EXCEPTIONS */
//! Report a runtime warning. //! Report a runtime warning.
void __TBB_EXPORTED_FUNC runtime_warning( const char* format, ... ); void __TBB_EXPORTED_FUNC runtime_warning( const char* format, ... );
#if TBB_USE_ASSERT #if TBB_USE_ASSERT
static void* const poisoned_ptr = reinterpret_cast<void*>(-1);
//! Set p to invalid pointer value. //! Set p to invalid pointer value.
template<typename T> template<typename T>
inline void poison_pointer( T* & p ) { inline void poison_pointer( T*& p ) { p = reinterpret_cast<T*>(poisoned_ptr
p = reinterpret_cast<T*>(-1); ); }
}
/** Expected to be used in assertions only, thus no empty form is defined.
**/
template<typename T>
inline bool is_poisoned( T* p ) { return p == reinterpret_cast<T*>(poisoned
_ptr); }
#else #else
template<typename T> template<typename T>
inline void poison_pointer( T* ) {/*do nothing*/} inline void poison_pointer( T* ) {/*do nothing*/}
#endif /* TBB_USE_ASSERT */ #endif /* !TBB_USE_ASSERT */
//! Cast pointer from U* to T.
/** This method should be used sparingly as a last resort for dealing with
situations that inherently break strict ISO C++ aliasing rules. */
template<typename T, typename U>
inline T punned_cast( U* ptr ) {
uintptr_t x = reinterpret_cast<uintptr_t>(ptr);
return reinterpret_cast<T>(x);
}
//! Base class for types that should not be assigned. //! Base class for types that should not be assigned.
class no_assign { class no_assign {
// Deny assignment // Deny assignment
void operator=( const no_assign& ); void operator=( const no_assign& );
public: public:
#if __GNUC__ #if __GNUC__
//! Explicitly define default construction, because otherwise gcc issue s gratuitous warning. //! Explicitly define default construction, because otherwise gcc issue s gratuitous warning.
no_assign() {} no_assign() {}
#endif /* __GNUC__ */ #endif /* __GNUC__ */
 End of changes. 20 change blocks. 
53 lines changed or deleted 112 lines changed or added


 tbb_thread.h   tbb_thread.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 33 skipping to change at line 33
file does not by itself cause the resulting executable to be covered by file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered b y invalidate any other reasons why the executable file might be covered b y
the GNU General Public License. the GNU General Public License.
*/ */
#ifndef __TBB_tbb_thread_H #ifndef __TBB_tbb_thread_H
#define __TBB_tbb_thread_H #define __TBB_tbb_thread_H
#if _WIN32||_WIN64 #if _WIN32||_WIN64
#include <windows.h> #include "machine/windows_api.h"
#define __TBB_NATIVE_THREAD_ROUTINE unsigned WINAPI #define __TBB_NATIVE_THREAD_ROUTINE unsigned WINAPI
#define __TBB_NATIVE_THREAD_ROUTINE_PTR(r) unsigned (WINAPI* r)( void* ) #define __TBB_NATIVE_THREAD_ROUTINE_PTR(r) unsigned (WINAPI* r)( void* )
#else #else
#define __TBB_NATIVE_THREAD_ROUTINE void* #define __TBB_NATIVE_THREAD_ROUTINE void*
#define __TBB_NATIVE_THREAD_ROUTINE_PTR(r) void* (*r)( void* ) #define __TBB_NATIVE_THREAD_ROUTINE_PTR(r) void* (*r)( void* )
#include <pthread.h> #include <pthread.h>
#endif // _WIN32||_WIN64 #endif // _WIN32||_WIN64
#include "tbb_stddef.h" #include "tbb_stddef.h"
#include "tick_count.h" #include "tick_count.h"
#include <exception> // Need std::terminate from here.
#if !TBB_USE_EXCEPTIONS && _MSC_VER #if !TBB_USE_EXCEPTIONS && _MSC_VER
// Suppress "C++ exception handler used, but unwind semantics are not e nabled" warning in STL headers // Suppress "C++ exception handler used, but unwind semantics are not e nabled" warning in STL headers
#pragma warning (push) #pragma warning (push)
#pragma warning (disable: 4530) #pragma warning (disable: 4530)
#endif #endif
#include <iosfwd> #include <iosfwd>
#if !TBB_USE_EXCEPTIONS && _MSC_VER #if !TBB_USE_EXCEPTIONS && _MSC_VER
skipping to change at line 67 skipping to change at line 66
namespace tbb { namespace tbb {
//! @cond INTERNAL //! @cond INTERNAL
namespace internal { namespace internal {
class tbb_thread_v3; class tbb_thread_v3;
} // namespace internal } // namespace internal
void swap( internal::tbb_thread_v3& t1, internal::tbb_thread_v3& t2 ); inline void swap( internal::tbb_thread_v3& t1, internal::tbb_thread_v3& t2 );
namespace internal { namespace internal {
//! Allocate a closure //! Allocate a closure
void* __TBB_EXPORTED_FUNC allocate_closure_v3( size_t size ); void* __TBB_EXPORTED_FUNC allocate_closure_v3( size_t size );
//! Free a closure allocated by allocate_closure_v3 //! Free a closure allocated by allocate_closure_v3
void __TBB_EXPORTED_FUNC free_closure_v3( void* ); void __TBB_EXPORTED_FUNC free_closure_v3( void* );
struct thread_closure_base { struct thread_closure_base {
void* operator new( size_t size ) {return allocate_closure_v3(size) ;} void* operator new( size_t size ) {return allocate_closure_v3(size) ;}
void operator delete( void* ptr ) {free_closure_v3(ptr);} void operator delete( void* ptr ) {free_closure_v3(ptr);}
}; };
template<class F> struct thread_closure_0: thread_closure_base { template<class F> struct thread_closure_0: thread_closure_base {
F function; F function;
static __TBB_NATIVE_THREAD_ROUTINE start_routine( void* c ) { static __TBB_NATIVE_THREAD_ROUTINE start_routine( void* c ) {
thread_closure_0 *self = static_cast<thread_closure_0*>(c); thread_closure_0 *self = static_cast<thread_closure_0*>(c);
__TBB_TRY { self->function();
self->function();
} __TBB_CATCH( ... ) {
std::terminate();
}
delete self; delete self;
return 0; return 0;
} }
thread_closure_0( const F& f ) : function(f) {} thread_closure_0( const F& f ) : function(f) {}
}; };
//! Structure used to pass user function with 1 argument to thread. //! Structure used to pass user function with 1 argument to thread.
template<class F, class X> struct thread_closure_1: thread_closure_base { template<class F, class X> struct thread_closure_1: thread_closure_base {
F function; F function;
X arg1; X arg1;
//! Routine passed to Windows's _beginthreadex by thread::internal_ start() inside tbb.dll //! Routine passed to Windows's _beginthreadex by thread::internal_ start() inside tbb.dll
static __TBB_NATIVE_THREAD_ROUTINE start_routine( void* c ) { static __TBB_NATIVE_THREAD_ROUTINE start_routine( void* c ) {
thread_closure_1 *self = static_cast<thread_closure_1*>(c); thread_closure_1 *self = static_cast<thread_closure_1*>(c);
__TBB_TRY { self->function(self->arg1);
self->function(self->arg1);
} __TBB_CATCH( ... ) {
std::terminate();
}
delete self; delete self;
return 0; return 0;
} }
thread_closure_1( const F& f, const X& x ) : function(f), arg1(x) { } thread_closure_1( const F& f, const X& x ) : function(f), arg1(x) { }
}; };
template<class F, class X, class Y> struct thread_closure_2: thread_clo sure_base { template<class F, class X, class Y> struct thread_closure_2: thread_clo sure_base {
F function; F function;
X arg1; X arg1;
Y arg2; Y arg2;
//! Routine passed to Windows's _beginthreadex by thread::internal_ start() inside tbb.dll //! Routine passed to Windows's _beginthreadex by thread::internal_ start() inside tbb.dll
static __TBB_NATIVE_THREAD_ROUTINE start_routine( void* c ) { static __TBB_NATIVE_THREAD_ROUTINE start_routine( void* c ) {
thread_closure_2 *self = static_cast<thread_closure_2*>(c); thread_closure_2 *self = static_cast<thread_closure_2*>(c);
__TBB_TRY { self->function(self->arg1, self->arg2);
self->function(self->arg1, self->arg2);
} __TBB_CATCH( ... ) {
std::terminate();
}
delete self; delete self;
return 0; return 0;
} }
thread_closure_2( const F& f, const X& x, const Y& y ) : function(f ), arg1(x), arg2(y) {} thread_closure_2( const F& f, const X& x, const Y& y ) : function(f ), arg1(x), arg2(y) {}
}; };
//! Versioned thread class. //! Versioned thread class.
class tbb_thread_v3 { class tbb_thread_v3 {
tbb_thread_v3(const tbb_thread_v3&); // = delete; // Deny access tbb_thread_v3(const tbb_thread_v3&); // = delete; // Deny access
public: public:
skipping to change at line 186 skipping to change at line 173
bool joinable() const {return my_handle!=0; } bool joinable() const {return my_handle!=0; }
//! The completion of the thread represented by *this happens befor e join() returns. //! The completion of the thread represented by *this happens befor e join() returns.
void __TBB_EXPORTED_METHOD join(); void __TBB_EXPORTED_METHOD join();
//! When detach() returns, *this no longer represents the possibly continuing thread of execution. //! When detach() returns, *this no longer represents the possibly continuing thread of execution.
void __TBB_EXPORTED_METHOD detach(); void __TBB_EXPORTED_METHOD detach();
~tbb_thread_v3() {if( joinable() ) detach();} ~tbb_thread_v3() {if( joinable() ) detach();}
inline id get_id() const; inline id get_id() const;
native_handle_type native_handle() { return my_handle; } native_handle_type native_handle() { return my_handle; }
//! The number of hardware thread contexts. //! The number of hardware thread contexts.
/** Before TBB 3.0 U4 this methods returned the number of logical C
PU in
the system. Currently on Windows, Linux and FreeBSD it returns
the
number of logical CPUs available to the current process in acco
rdance
with its affinity mask.
NOTE: The return value of this method never changes after its f
irst
invocation. This means that changes in the process affinity mas
k that
took place after this method was first invoked will not affect
the
number of worker threads in the TBB worker threads pool. **/
static unsigned __TBB_EXPORTED_FUNC hardware_concurrency(); static unsigned __TBB_EXPORTED_FUNC hardware_concurrency();
private: private:
native_handle_type my_handle; native_handle_type my_handle;
#if _WIN32||_WIN64 #if _WIN32||_WIN64
DWORD my_thread_id; DWORD my_thread_id;
#endif // _WIN32||_WIN64 #endif // _WIN32||_WIN64
/** Runs start_routine(closure) on another thread and sets my_handl e to the handle of the created thread. */ /** Runs start_routine(closure) on another thread and sets my_handl e to the handle of the created thread. */
void __TBB_EXPORTED_METHOD internal_start( __TBB_NATIVE_THREAD_ROUT INE_PTR(start_routine), void __TBB_EXPORTED_METHOD internal_start( __TBB_NATIVE_THREAD_ROUT INE_PTR(start_routine),
void* closure ); void* closure );
 End of changes. 8 change blocks. 
19 lines changed or deleted 21 lines changed or added


 tbbmalloc_proxy.h   tbbmalloc_proxy.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 End of changes. 1 change blocks. 
1 lines changed or deleted 1 lines changed or added


 thread   thread 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 End of changes. 1 change blocks. 
1 lines changed or deleted 1 lines changed or added


 tick_count.h   tick_count.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 35 skipping to change at line 35
invalidate any other reasons why the executable file might be covered b y invalidate any other reasons why the executable file might be covered b y
the GNU General Public License. the GNU General Public License.
*/ */
#ifndef __TBB_tick_count_H #ifndef __TBB_tick_count_H
#define __TBB_tick_count_H #define __TBB_tick_count_H
#include "tbb_stddef.h" #include "tbb_stddef.h"
#if _WIN32||_WIN64 #if _WIN32||_WIN64
#include <windows.h> #include "machine/windows_api.h"
#elif __linux__ #elif __linux__
#include <ctime> #include <ctime>
#else /* generic Unix */ #else /* generic Unix */
#include <sys/time.h> #include <sys/time.h>
#endif /* (choice of OS) */ #endif /* (choice of OS) */
namespace tbb { namespace tbb {
//! Absolute timestamp //! Absolute timestamp
/** @ingroup timing */ /** @ingroup timing */
 End of changes. 2 change blocks. 
2 lines changed or deleted 2 lines changed or added


 windows_ia32.h   windows_ia32.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 29 skipping to change at line 29
As a special exception, you may use this file as part of a free softwar e As a special exception, you may use this file as part of a free softwar e
library without restriction. Specifically, if other files instantiate library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you comp ile templates or use macros or inline functions from this file, or you comp ile
this file and link it with other files to produce an executable, this this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered b y invalidate any other reasons why the executable file might be covered b y
the GNU General Public License. the GNU General Public License.
*/ */
#ifndef __TBB_machine_H #if !defined(__TBB_machine_H) || defined(__TBB_machine_windows_ia32_H)
#error Do not include this file directly; include tbb_machine.h instead #error Do not #include this internal file directly; use public TBB headers
instead.
#endif #endif
#if defined(__INTEL_COMPILER) #define __TBB_machine_windows_ia32_H
#define __TBB_release_consistency_helper() __asm { __asm nop }
#define __TBB_WORDSIZE 4
#define __TBB_BIG_ENDIAN 0
#if __INTEL_COMPILER
#define __TBB_compiler_fence() __asm { __asm nop }
#elif _MSC_VER >= 1300 #elif _MSC_VER >= 1300
extern "C" void _ReadWriteBarrier(); extern "C" void _ReadWriteBarrier();
#pragma intrinsic(_ReadWriteBarrier) #pragma intrinsic(_ReadWriteBarrier)
#define __TBB_release_consistency_helper() _ReadWriteBarrier() #define __TBB_compiler_fence() _ReadWriteBarrier()
#else #else
#error Unsupported compiler - need to define __TBB_release_consistency_help er to support it #error Unsupported compiler - need to define __TBB_{control,acquire,rel ease}_consistency_helper to support it
#endif #endif
inline void __TBB_rel_acq_fence() { __asm { __asm mfence } } #define __TBB_control_consistency_helper() __TBB_compiler_fence()
#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
#define __TBB_WORDSIZE 4 #define __TBB_release_consistency_helper() __TBB_compiler_fence()
#define __TBB_BIG_ENDIAN 0 #define __TBB_full_memory_fence() __asm { __asm mfence }
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
// Workaround for overzealous compiler warnings in /Wp64 mode // Workaround for overzealous compiler warnings in /Wp64 mode
#pragma warning (push) #pragma warning (push)
#pragma warning (disable: 4244 4267) #pragma warning (disable: 4244 4267)
#endif #endif
extern "C" { extern "C" {
__int64 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp8 (volatile void *ptr, __int64 value, __int64 comparand ); __int64 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp8 (volatile void *ptr, __int64 value, __int64 comparand );
__int64 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd8 (volatile void *ptr , __int64 addend ); __int64 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd8 (volatile void *ptr , __int64 addend );
__int64 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore8 (volatile void *p tr, __int64 value ); __int64 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore8 (volatile void *p tr, __int64 value );
void __TBB_EXPORTED_FUNC __TBB_machine_store8 (volatile void *ptr, __in t64 value ); void __TBB_EXPORTED_FUNC __TBB_machine_store8 (volatile void *ptr, __in t64 value );
__int64 __TBB_EXPORTED_FUNC __TBB_machine_load8 (const volatile void *p tr); __int64 __TBB_EXPORTED_FUNC __TBB_machine_load8 (const volatile void *p tr);
} }
template <typename T, size_t S> #define __TBB_MACHINE_DEFINE_ATOMICS(S,T,U,A,C) \
struct __TBB_machine_load_store {
static inline T load_with_acquire(const volatile T& location) {
T to_return = location;
__TBB_release_consistency_helper();
return to_return;
}
static inline void store_with_release(volatile T &location, T value) {
__TBB_release_consistency_helper();
location = value;
}
};
template <typename T>
struct __TBB_machine_load_store<T,8> {
static inline T load_with_acquire(const volatile T& location) {
return __TBB_machine_load8((volatile void *)&location);
}
static inline void store_with_release(T &location, T value) {
__TBB_machine_store8((volatile void *)&location,(__int64)value);
}
};
template<typename T>
inline T __TBB_machine_load_with_acquire(const volatile T &location) {
return __TBB_machine_load_store<T,sizeof(T)>::load_with_acquire(locatio
n);
}
template<typename T, typename V>
inline void __TBB_machine_store_with_release(T& location, V value) {
__TBB_machine_load_store<T,sizeof(T)>::store_with_release(location,valu
e);
}
//! Overload that exists solely to avoid /Wp64 warnings.
inline void __TBB_machine_store_with_release(size_t& location, size_t value
) {
__TBB_machine_load_store<size_t,sizeof(size_t)>::store_with_release(loc
ation,value);
}
#define __TBB_load_with_acquire(L) __TBB_machine_load_with_acquire((L))
#define __TBB_store_with_release(L,V) __TBB_machine_store_with_release((L),
(V))
#define __TBB_DEFINE_ATOMICS(S,T,U,A,C) \
static inline T __TBB_machine_cmpswp##S ( volatile void * ptr, U value, U c omparand ) { \ static inline T __TBB_machine_cmpswp##S ( volatile void * ptr, U value, U c omparand ) { \
T result; \ T result; \
volatile T *p = (T *)ptr; \ volatile T *p = (T *)ptr; \
__TBB_release_consistency_helper(); \
__asm \ __asm \
{ \ { \
__asm mov edx, p \ __asm mov edx, p \
__asm mov C , value \ __asm mov C , value \
__asm mov A , comparand \ __asm mov A , comparand \
__asm lock cmpxchg [edx], C \ __asm lock cmpxchg [edx], C \
__asm mov result, A \ __asm mov result, A \
} \ } \
__TBB_release_consistency_helper(); \
return result; \ return result; \
} \ } \
\ \
static inline T __TBB_machine_fetchadd##S ( volatile void * ptr, U addend ) { \ static inline T __TBB_machine_fetchadd##S ( volatile void * ptr, U addend ) { \
T result; \ T result; \
volatile T *p = (T *)ptr; \ volatile T *p = (T *)ptr; \
__TBB_release_consistency_helper(); \
__asm \ __asm \
{ \ { \
__asm mov edx, p \ __asm mov edx, p \
__asm mov A, addend \ __asm mov A, addend \
__asm lock xadd [edx], A \ __asm lock xadd [edx], A \
__asm mov result, A \ __asm mov result, A \
} \ } \
__TBB_release_consistency_helper(); \
return result; \ return result; \
}\ }\
\ \
static inline T __TBB_machine_fetchstore##S ( volatile void * ptr, U value ) { \ static inline T __TBB_machine_fetchstore##S ( volatile void * ptr, U value ) { \
T result; \ T result; \
volatile T *p = (T *)ptr; \ volatile T *p = (T *)ptr; \
__TBB_release_consistency_helper(); \
__asm \ __asm \
{ \ { \
__asm mov edx, p \ __asm mov edx, p \
__asm mov A, value \ __asm mov A, value \
__asm lock xchg [edx], A \ __asm lock xchg [edx], A \
__asm mov result, A \ __asm mov result, A \
} \ } \
__TBB_release_consistency_helper(); \
return result; \ return result; \
} }
__TBB_DEFINE_ATOMICS(1, __int8, __int8, al, cl) __TBB_MACHINE_DEFINE_ATOMICS(1, __int8, __int8, al, cl)
__TBB_DEFINE_ATOMICS(2, __int16, __int16, ax, cx) __TBB_MACHINE_DEFINE_ATOMICS(2, __int16, __int16, ax, cx)
__TBB_DEFINE_ATOMICS(4, __int32, __int32, eax, ecx) __TBB_MACHINE_DEFINE_ATOMICS(4, ptrdiff_t, ptrdiff_t, eax, ecx)
__TBB_DEFINE_ATOMICS(W, ptrdiff_t, ptrdiff_t, eax, ecx)
#undef __TBB_MACHINE_DEFINE_ATOMICS
static inline __int32 __TBB_machine_lg( unsigned __int64 i ) { static inline __int32 __TBB_machine_lg( unsigned __int64 i ) {
unsigned __int32 j; unsigned __int32 j;
__asm __asm
{ {
bsr eax, i bsr eax, i
mov j, eax mov j, eax
} }
return j; return j;
} }
skipping to change at line 197 skipping to change at line 154
{ {
mov eax, delay mov eax, delay
L1: L1:
pause pause
add eax, -1 add eax, -1
jne L1 jne L1
} }
return; return;
} }
#define __TBB_CompareAndSwap1(P,V,C) __TBB_machine_cmpswp1(P,V,C)
#define __TBB_CompareAndSwap2(P,V,C) __TBB_machine_cmpswp2(P,V,C)
#define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4(P,V,C)
#define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8(P,V,C)
#define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswpW(P,V,C)
#define __TBB_FetchAndAdd1(P,V) __TBB_machine_fetchadd1(P,V)
#define __TBB_FetchAndAdd2(P,V) __TBB_machine_fetchadd2(P,V)
#define __TBB_FetchAndAdd4(P,V) __TBB_machine_fetchadd4(P,V)
#define __TBB_FetchAndAdd8(P,V) __TBB_machine_fetchadd8(P,V)
#define __TBB_FetchAndAddW(P,V) __TBB_machine_fetchaddW(P,V)
#define __TBB_FetchAndStore1(P,V) __TBB_machine_fetchstore1(P,V)
#define __TBB_FetchAndStore2(P,V) __TBB_machine_fetchstore2(P,V)
#define __TBB_FetchAndStore4(P,V) __TBB_machine_fetchstore4(P,V)
#define __TBB_FetchAndStore8(P,V) __TBB_machine_fetchstore8(P,V)
#define __TBB_FetchAndStoreW(P,V) __TBB_machine_fetchstoreW(P,V)
// Should define this:
#define __TBB_Store8(P,V) __TBB_machine_store8(P,V)
#define __TBB_Load8(P) __TBB_machine_load8(P)
#define __TBB_AtomicOR(P,V) __TBB_machine_OR(P,V) #define __TBB_AtomicOR(P,V) __TBB_machine_OR(P,V)
#define __TBB_AtomicAND(P,V) __TBB_machine_AND(P,V) #define __TBB_AtomicAND(P,V) __TBB_machine_AND(P,V)
#define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
// Definition of other functions // Definition of other functions
extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void ); extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
#define __TBB_Yield() SwitchToThread() #define __TBB_Yield() SwitchToThread()
#define __TBB_Pause(V) __TBB_machine_pause(V) #define __TBB_Pause(V) __TBB_machine_pause(V)
#define __TBB_Log2(V) __TBB_machine_lg(V) #define __TBB_Log2(V) __TBB_machine_lg(V)
// Use generic definitions from tbb_machine.h
#undef __TBB_TryLockByte
#undef __TBB_LockByte
#if defined(_MSC_VER)&&_MSC_VER<1400 #if defined(_MSC_VER)&&_MSC_VER<1400
static inline void* __TBB_machine_get_current_teb () { static inline void* __TBB_machine_get_current_teb () {
void* pteb; void* pteb;
__asm mov eax, fs:[0x18] __asm mov eax, fs:[0x18]
__asm mov pteb, eax __asm mov pteb, eax
return pteb; return pteb;
} }
#endif #endif
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
#pragma warning (pop) #pragma warning (pop)
#endif // warnings 4244, 4267 are back #endif // warnings 4244, 4267 are back
// API to retrieve/update FPU control setting
#define __TBB_CPU_CTL_ENV_PRESENT 1
struct __TBB_cpu_ctl_env_t {
int mxcsr;
short x87cw;
};
inline void __TBB_get_cpu_ctl_env ( __TBB_cpu_ctl_env_t* ctl ) {
__asm {
__asm mov eax, ctl
__asm stmxcsr [eax]
__asm fstcw [eax+4]
}
}
inline void __TBB_set_cpu_ctl_env ( const __TBB_cpu_ctl_env_t* ctl ) {
__asm {
__asm mov eax, ctl
__asm ldmxcsr [eax]
__asm fldcw [eax+4]
}
}
 End of changes. 18 change blocks. 
98 lines changed or deleted 30 lines changed or added


 windows_intel64.h   windows_intel64.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 29 skipping to change at line 29
As a special exception, you may use this file as part of a free softwar e As a special exception, you may use this file as part of a free softwar e
library without restriction. Specifically, if other files instantiate library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you comp ile templates or use macros or inline functions from this file, or you comp ile
this file and link it with other files to produce an executable, this this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered b y invalidate any other reasons why the executable file might be covered b y
the GNU General Public License. the GNU General Public License.
*/ */
#ifndef __TBB_machine_H #if !defined(__TBB_machine_H) || defined(__TBB_machine_windows_intel64_H)
#error Do not include this file directly; include tbb_machine.h instead #error Do not #include this internal file directly; use public TBB headers
instead.
#endif #endif
#define __TBB_machine_windows_intel64_H
#define __TBB_WORDSIZE 8
#define __TBB_BIG_ENDIAN 0
#include <intrin.h> #include <intrin.h>
#if !defined(__INTEL_COMPILER)
#pragma intrinsic(_InterlockedOr64) #if !__INTEL_COMPILER
#pragma intrinsic(_InterlockedAnd64) #pragma intrinsic(_InterlockedOr64)
#pragma intrinsic(_InterlockedCompareExchange) #pragma intrinsic(_InterlockedAnd64)
#pragma intrinsic(_InterlockedCompareExchange64) #pragma intrinsic(_InterlockedCompareExchange)
#pragma intrinsic(_InterlockedExchangeAdd) #pragma intrinsic(_InterlockedCompareExchange64)
#pragma intrinsic(_InterlockedExchangeAdd64) #pragma intrinsic(_InterlockedExchangeAdd)
#pragma intrinsic(_InterlockedExchange) #pragma intrinsic(_InterlockedExchangeAdd64)
#pragma intrinsic(_InterlockedExchange64) #pragma intrinsic(_InterlockedExchange)
#pragma intrinsic(_InterlockedExchange64)
#endif /* !defined(__INTEL_COMPILER) */ #endif /* !defined(__INTEL_COMPILER) */
#if defined(__INTEL_COMPILER) #if __INTEL_COMPILER
#define __TBB_release_consistency_helper() __asm { __asm nop } #define __TBB_compiler_fence() __asm { __asm nop }
inline void __TBB_rel_acq_fence() { __asm { __asm mfence } } #define __TBB_full_memory_fence() __asm { __asm mfence }
#elif _MSC_VER >= 1300 #elif _MSC_VER >= 1300
extern "C" void _ReadWriteBarrier(); extern "C" void _ReadWriteBarrier();
#pragma intrinsic(_ReadWriteBarrier) #pragma intrinsic(_ReadWriteBarrier)
#define __TBB_release_consistency_helper() _ReadWriteBarrier() #pragma intrinsic(_mm_mfence)
#pragma intrinsic(_mm_mfence) #define __TBB_compiler_fence() _ReadWriteBarrier()
inline void __TBB_rel_acq_fence() { _mm_mfence(); } #define __TBB_full_memory_fence() _mm_mfence()
#endif #endif
#define __TBB_WORDSIZE 8 #define __TBB_control_consistency_helper() __TBB_compiler_fence()
#define __TBB_BIG_ENDIAN 0 #define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
#define __TBB_release_consistency_helper() __TBB_compiler_fence()
// ATTENTION: if you ever change argument types in machine-specific primiti ves, // ATTENTION: if you ever change argument types in machine-specific primiti ves,
// please take care of atomic_word<> specializations in tbb/atomic.h // please take care of atomic_word<> specializations in tbb/atomic.h
extern "C" { extern "C" {
__int8 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp1 (volatile void *ptr, _ _int8 value, __int8 comparand ); __int8 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp1 (volatile void *ptr, _ _int8 value, __int8 comparand );
__int8 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd1 (volatile void *ptr, __int8 addend ); __int8 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd1 (volatile void *ptr, __int8 addend );
__int8 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore1 (volatile void *pt r, __int8 value ); __int8 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore1 (volatile void *pt r, __int8 value );
__int16 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp2 (volatile void *ptr, __int16 value, __int16 comparand ); __int16 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp2 (volatile void *ptr, __int16 value, __int16 comparand );
__int16 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd2 (volatile void *ptr , __int16 addend ); __int16 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd2 (volatile void *ptr , __int16 addend );
__int16 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore2 (volatile void *p tr, __int16 value ); __int16 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore2 (volatile void *p tr, __int16 value );
void __TBB_EXPORTED_FUNC __TBB_machine_pause (__int32 delay ); void __TBB_EXPORTED_FUNC __TBB_machine_pause (__int32 delay );
} }
inline long __TBB_machine_cmpswp4 (volatile void *ptr, __int32 value, __int
32 comparand ) {
return _InterlockedCompareExchange( (long*)ptr, value, comparand );
}
inline long __TBB_machine_fetchadd4 (volatile void *ptr, __int32 addend ) {
return _InterlockedExchangeAdd( (long*)ptr, addend );
}
inline long __TBB_machine_fetchstore4 (volatile void *ptr, __int32 value )
{
return _InterlockedExchange( (long*)ptr, value );
}
inline __int64 __TBB_machine_cmpswp8 (volatile void *ptr, __int64 value, __
int64 comparand ) {
return _InterlockedCompareExchange64( (__int64*)ptr, value, comparand )
;
}
inline __int64 __TBB_machine_fetchadd8 (volatile void *ptr, __int64 addend
) {
return _InterlockedExchangeAdd64( (__int64*)ptr, addend );
}
inline __int64 __TBB_machine_fetchstore8 (volatile void *ptr, __int64 value
) {
return _InterlockedExchange64( (__int64*)ptr, value );
}
#define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
#if !__INTEL_COMPILER #if !__INTEL_COMPILER
extern "C" unsigned char _BitScanReverse64( unsigned long* i, unsigned __in t64 w ); extern "C" unsigned char _BitScanReverse64( unsigned long* i, unsigned __in t64 w );
#pragma intrinsic(_BitScanReverse64) #pragma intrinsic(_BitScanReverse64)
#endif #endif
inline __int64 __TBB_machine_lg( unsigned __int64 i ) { inline __int64 __TBB_machine_lg( unsigned __int64 i ) {
#if __INTEL_COMPILER #if __INTEL_COMPILER
unsigned __int64 j; unsigned __int64 j;
__asm __asm
{ {
skipping to change at line 99 skipping to change at line 130
} }
inline void __TBB_machine_OR( volatile void *operand, intptr_t addend ) { inline void __TBB_machine_OR( volatile void *operand, intptr_t addend ) {
_InterlockedOr64((__int64*)operand, addend); _InterlockedOr64((__int64*)operand, addend);
} }
inline void __TBB_machine_AND( volatile void *operand, intptr_t addend ) { inline void __TBB_machine_AND( volatile void *operand, intptr_t addend ) {
_InterlockedAnd64((__int64*)operand, addend); _InterlockedAnd64((__int64*)operand, addend);
} }
#define __TBB_CompareAndSwap1(P,V,C) __TBB_machine_cmpswp1(P,V,C)
#define __TBB_CompareAndSwap2(P,V,C) __TBB_machine_cmpswp2(P,V,C)
#define __TBB_CompareAndSwap4(P,V,C) _InterlockedCompareExchange( (long*) P
, V , C )
#define __TBB_CompareAndSwap8(P,V,C) _InterlockedCompareExchange64( (__int6
4*) P , V , C )
#define __TBB_CompareAndSwapW(P,V,C) _InterlockedCompareExchange64( (__int6
4*) P , V , C )
#define __TBB_FetchAndAdd1(P,V) __TBB_machine_fetchadd1(P,V)
#define __TBB_FetchAndAdd2(P,V) __TBB_machine_fetchadd2(P,V)
#define __TBB_FetchAndAdd4(P,V) _InterlockedExchangeAdd((long*) P , V )
#define __TBB_FetchAndAdd8(P,V) _InterlockedExchangeAdd64((__int64*) P , V
)
#define __TBB_FetchAndAddW(P,V) _InterlockedExchangeAdd64((__int64*) P , V
)
#define __TBB_FetchAndStore1(P,V) __TBB_machine_fetchstore1(P,V)
#define __TBB_FetchAndStore2(P,V) __TBB_machine_fetchstore2(P,V)
#define __TBB_FetchAndStore4(P,V) _InterlockedExchange((long*) P , V )
#define __TBB_FetchAndStore8(P,V) _InterlockedExchange64((__int64*) P , V )
#define __TBB_FetchAndStoreW(P,V) _InterlockedExchange64((__int64*) P , V )
// Not used if wordsize == 8
#undef __TBB_Store8
#undef __TBB_Load8
#define __TBB_AtomicOR(P,V) __TBB_machine_OR(P,V) #define __TBB_AtomicOR(P,V) __TBB_machine_OR(P,V)
#define __TBB_AtomicAND(P,V) __TBB_machine_AND(P,V) #define __TBB_AtomicAND(P,V) __TBB_machine_AND(P,V)
extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void ); extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
#define __TBB_Yield() SwitchToThread() #define __TBB_Yield() SwitchToThread()
#define __TBB_Pause(V) __TBB_machine_pause(V) #define __TBB_Pause(V) __TBB_machine_pause(V)
#define __TBB_Log2(V) __TBB_machine_lg(V) #define __TBB_Log2(V) __TBB_machine_lg(V)
// API to retrieve/update FPU control setting
#define __TBB_CPU_CTL_ENV_PRESENT 1
struct __TBB_cpu_ctl_env_t {
int mxcsr;
short x87cw;
};
// Use generic definitions from tbb_machine.h extern "C" {
#undef __TBB_TryLockByte void __TBB_EXPORTED_FUNC __TBB_get_cpu_ctl_env ( __TBB_cpu_ctl_env_t* )
#undef __TBB_LockByte ;
void __TBB_EXPORTED_FUNC __TBB_set_cpu_ctl_env ( const __TBB_cpu_ctl_en
v_t* );
}
 End of changes. 11 change blocks. 
50 lines changed or deleted 69 lines changed or added


 xbox360_ppc.h   xbox360_ppc.h 
/* /*
Copyright 2005-2010 Intel Corporation. All Rights Reserved. Copyright 2005-2011 Intel Corporation. All Rights Reserved.
This file is part of Threading Building Blocks. This file is part of Threading Building Blocks.
Threading Building Blocks is free software; you can redistribute it Threading Building Blocks is free software; you can redistribute it
and/or modify it under the terms of the GNU General Public License and/or modify it under the terms of the GNU General Public License
version 2 as published by the Free Software Foundation. version 2 as published by the Free Software Foundation.
Threading Building Blocks is distributed in the hope that it will be Threading Building Blocks is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty useful, but WITHOUT ANY WARRANTY; without even the implied warranty
of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
skipping to change at line 29 skipping to change at line 29
As a special exception, you may use this file as part of a free softwar e As a special exception, you may use this file as part of a free softwar e
library without restriction. Specifically, if other files instantiate library without restriction. Specifically, if other files instantiate
templates or use macros or inline functions from this file, or you comp ile templates or use macros or inline functions from this file, or you comp ile
this file and link it with other files to produce an executable, this this file and link it with other files to produce an executable, this
file does not by itself cause the resulting executable to be covered by file does not by itself cause the resulting executable to be covered by
the GNU General Public License. This exception does not however the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered b y invalidate any other reasons why the executable file might be covered b y
the GNU General Public License. the GNU General Public License.
*/ */
#ifndef __TBB_machine_H // TODO: revise by comparing with mac_ppc.h
#error Do not include this file directly; include tbb_machine.h instead
#if !defined(__TBB_machine_H) || defined(__TBB_machine_xbox360_ppc_H)
#error Do not #include this internal file directly; use public TBB headers
instead.
#endif #endif
#define __TBB_machine_xbox360_ppc_H
#define NONET #define NONET
#define NOD3D #define NOD3D
#include "xtl.h" #include "xtl.h"
#include "ppcintrinsics.h" #include "ppcintrinsics.h"
#if _MSC_VER >= 1300 #if _MSC_VER >= 1300
extern "C" void _ReadWriteBarrier(); extern "C" void _MemoryBarrier();
#pragma intrinsic(_ReadWriteBarrier) #pragma intrinsic(_MemoryBarrier)
#define __TBB_release_consistency_helper() _ReadWriteBarrier() #define __TBB_control_consistency_helper() __isync()
#define __TBB_acquire_consistency_helper() _MemoryBarrier()
#define __TBB_release_consistency_helper() _MemoryBarrier()
#endif #endif
inline void __TBB_rel_acq_fence() { __lwsync(); } #define __TBB_full_memory_fence() __sync()
#define __TBB_WORDSIZE 4 #define __TBB_WORDSIZE 4
#define __TBB_BIG_ENDIAN 1 #define __TBB_BIG_ENDIAN 1
//todo: define __TBB_DECL_FENCED_ATOMICS and define acquire/release primiti ves to maximize performance //todo: define __TBB_USE_FENCED_ATOMICS and define acquire/release primitiv es to maximize performance
typedef __int64 int64_t; //required for definition of Store8/Load8 in atom inline __int32 __TBB_machine_cmpswp4(volatile void *ptr, __int32 value, __i
ic.h nt32 comparand ) {
typedef unsigned char uint8_t; //same reason __sync();
inline __int32 __TBB_machine_cmpswp4(volatile void *ptr, __int32 value, __i
nt32 comparand )
{
__lwsync();
__int32 result = InterlockedCompareExchange((volatile LONG*)ptr, value, co mparand); __int32 result = InterlockedCompareExchange((volatile LONG*)ptr, value, co mparand);
__lwsync(); __isync();
return result; return result;
} }
inline __int64 __TBB_machine_cmpswp8(volatile void *ptr, __int64 value, __i nt64 comparand ) inline __int64 __TBB_machine_cmpswp8(volatile void *ptr, __int64 value, __i nt64 comparand )
{ {
__lwsync(); __sync();
__int64 result = InterlockedCompareExchange64((volatile LONG64*)ptr, value , comparand); __int64 result = InterlockedCompareExchange64((volatile LONG64*)ptr, value , comparand);
__lwsync(); __isync();
return result; return result;
} }
#define __TBB_USE_GENERIC_PART_WORD_CAS 1
#define __TBB_USE_GENERIC_FETCH_ADD 1
#define __TBB_USE_GENERIC_FETCH_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
#define __TBB_USE_GENERIC_DWORD_LOAD_STORE 1
#pragma optimize( "", off ) #pragma optimize( "", off )
inline void __TBB_machine_pause (__int32 delay ) inline void __TBB_machine_pause (__int32 delay )
{ {
for (__int32 i=0; i<delay; i++) {;}; for (__int32 i=0; i<delay; i++) {;};
} }
#pragma optimize( "", on ) #pragma optimize( "", on )
#define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4(P,V,C)
#define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8(P,V,C)
#define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp4(P,V,C)
#define __TBB_Yield() Sleep(0) #define __TBB_Yield() Sleep(0)
#define __TBB_Pause(V) __TBB_machine_pause(V) #define __TBB_Pause(V) __TBB_machine_pause(V)
#define __TBB_fence_for_acquire() __lwsync()
#define __TBB_fence_for_release() __lwsync() // This port uses only 2 hardware threads for TBB on XBOX 360.
// Others are left to sound etc.
// Change the following mask to allow TBB use more HW threads.
static const int __TBB_XBOX360_HARDWARE_THREAD_MASK = 0x0C;
static inline int __TBB_XBOX360_DetectNumberOfWorkers()
{
char a[__TBB_XBOX360_HARDWARE_THREAD_MASK]; //compile time assert - a
t least one bit should be set always
a[0]=0;
return ((__TBB_XBOX360_HARDWARE_THREAD_MASK >> 0) & 1) +
((__TBB_XBOX360_HARDWARE_THREAD_MASK >> 1) & 1) +
((__TBB_XBOX360_HARDWARE_THREAD_MASK >> 2) & 1) +
((__TBB_XBOX360_HARDWARE_THREAD_MASK >> 3) & 1) +
((__TBB_XBOX360_HARDWARE_THREAD_MASK >> 4) & 1) +
((__TBB_XBOX360_HARDWARE_THREAD_MASK >> 5) & 1) + 1; // +1 acc
omodates for the master thread
}
static inline int __TBB_XBOX360_GetHardwareThreadIndex(int workerThreadInde
x)
{
workerThreadIndex %= __TBB_XBOX360_DetectNumberOfWorkers()-1;
int m = __TBB_XBOX360_HARDWARE_THREAD_MASK;
int index = 0;
int skipcount = workerThreadIndex;
while (true)
{
if ((m & 1)!=0)
{
if (skipcount==0) break;
skipcount--;
}
m >>= 1;
index++;
}
return index;
}
#define __TBB_HardwareConcurrency() __TBB_XBOX360_DetectNumberOfWorkers()
 End of changes. 13 change blocks. 
22 lines changed or deleted 28 lines changed or added

This html diff was produced by rfcdiff 1.41. The latest version is available from http://tools.ietf.org/tools/rfcdiff/