| atomic.h | | atomic.h | |
| /* | | /* | |
|
| Copyright 2005-2010 Intel Corporation. All Rights Reserved. | | Copyright 2005-2011 Intel Corporation. All Rights Reserved. | |
| | | | |
| This file is part of Threading Building Blocks. | | This file is part of Threading Building Blocks. | |
| | | | |
| Threading Building Blocks is free software; you can redistribute it | | Threading Building Blocks is free software; you can redistribute it | |
| and/or modify it under the terms of the GNU General Public License | | and/or modify it under the terms of the GNU General Public License | |
| version 2 as published by the Free Software Foundation. | | version 2 as published by the Free Software Foundation. | |
| | | | |
| Threading Building Blocks is distributed in the hope that it will be | | Threading Building Blocks is distributed in the hope that it will be | |
| useful, but WITHOUT ANY WARRANTY; without even the implied warranty | | useful, but WITHOUT ANY WARRANTY; without even the implied warranty | |
| of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| | | | |
| skipping to change at line 53 | | skipping to change at line 53 | |
| #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) | | #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) | |
| // Workaround for overzealous compiler warnings | | // Workaround for overzealous compiler warnings | |
| #pragma warning (push) | | #pragma warning (push) | |
| #pragma warning (disable: 4244 4267) | | #pragma warning (disable: 4244 4267) | |
| #endif | | #endif | |
| | | | |
| namespace tbb { | | namespace tbb { | |
| | | | |
| //! Specifies memory fencing. | | //! Specifies memory fencing. | |
| enum memory_semantics { | | enum memory_semantics { | |
|
| //! For internal use only. | | //! Sequentially consistent fence. | |
| __TBB_full_fence, | | full_fence, | |
| //! Acquire fence | | //! Acquire fence | |
| acquire, | | acquire, | |
| //! Release fence | | //! Release fence | |
|
| release | | release, | |
| | | //! No ordering | |
| | | relaxed | |
| }; | | }; | |
| | | | |
| //! @cond INTERNAL | | //! @cond INTERNAL | |
| namespace internal { | | namespace internal { | |
| | | | |
|
| #if __GNUC__ || __SUNPRO_CC | | #if __TBB_ATTRIBUTE_ALIGNED_PRESENT | |
| #define __TBB_DECL_ATOMIC_FIELD(t,f,a) t f __attribute__ ((aligned(a))); | | #define __TBB_DECL_ATOMIC_FIELD(t,f,a) t f __attribute__ ((aligned(a)) | |
| #elif defined(__INTEL_COMPILER)||_MSC_VER >= 1300 | | ); | |
| #define __TBB_DECL_ATOMIC_FIELD(t,f,a) __declspec(align(a)) t f; | | #elif __TBB_DECLSPEC_ALIGN_PRESENT | |
| | | #define __TBB_DECL_ATOMIC_FIELD(t,f,a) __declspec(align(a)) t f; | |
| #else | | #else | |
|
| #error Do not know syntax for forcing alignment. | | #error Do not know syntax for forcing alignment. | |
| #endif /* __GNUC__ */ | | #endif | |
| | | | |
| template<size_t S> | | template<size_t S> | |
| struct atomic_rep; // Primary template declared, but never define
d. | | struct atomic_rep; // Primary template declared, but never define
d. | |
| | | | |
| template<> | | template<> | |
| struct atomic_rep<1> { // Specialization | | struct atomic_rep<1> { // Specialization | |
| typedef int8_t word; | | typedef int8_t word; | |
| int8_t value; | | int8_t value; | |
| }; | | }; | |
| template<> | | template<> | |
| | | | |
| skipping to change at line 95 | | skipping to change at line 97 | |
| template<> | | template<> | |
| struct atomic_rep<4> { // Specialization | | struct atomic_rep<4> { // Specialization | |
| #if _MSC_VER && __TBB_WORDSIZE==4 | | #if _MSC_VER && __TBB_WORDSIZE==4 | |
| // Work-around that avoids spurious /Wp64 warnings | | // Work-around that avoids spurious /Wp64 warnings | |
| typedef intptr_t word; | | typedef intptr_t word; | |
| #else | | #else | |
| typedef int32_t word; | | typedef int32_t word; | |
| #endif | | #endif | |
| __TBB_DECL_ATOMIC_FIELD(int32_t,value,4) | | __TBB_DECL_ATOMIC_FIELD(int32_t,value,4) | |
| }; | | }; | |
|
| | | #if __TBB_64BIT_ATOMICS | |
| template<> | | template<> | |
| struct atomic_rep<8> { // Specialization | | struct atomic_rep<8> { // Specialization | |
| typedef int64_t word; | | typedef int64_t word; | |
| __TBB_DECL_ATOMIC_FIELD(int64_t,value,8) | | __TBB_DECL_ATOMIC_FIELD(int64_t,value,8) | |
| }; | | }; | |
|
| | | #endif | |
| | | | |
| template<size_t Size, memory_semantics M> | | template<size_t Size, memory_semantics M> | |
| struct atomic_traits; // Primary template declared, but not defined. | | struct atomic_traits; // Primary template declared, but not defined. | |
| | | | |
|
| #define __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(S,M) \ | | #define __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(S,M) | |
| template<> struct atomic_traits<S,M> { \ | | \ | |
| typedef atomic_rep<S>::word word; \ | | template<> struct atomic_traits<S,M> { | |
| inline static word compare_and_swap( volatile void* location, word | | \ | |
| new_value, word comparand ) {\ | | typedef atomic_rep<S>::word word; | |
| return __TBB_CompareAndSwap##S##M(location,new_value,comparand) | | \ | |
| ; \ | | inline static word compare_and_swap( volatile void* location, word | |
| } | | new_value, word comparand ) { \ | |
| \ | | return __TBB_machine_cmpswp##S##M(location,new_value,comparand) | |
| inline static word fetch_and_add( volatile void* location, word add | | ; \ | |
| end ) { \ | | } | |
| return __TBB_FetchAndAdd##S##M(location,addend); | | \ | |
| \ | | inline static word fetch_and_add( volatile void* location, word add | |
| } | | end ) { \ | |
| \ | | return __TBB_machine_fetchadd##S##M(location,addend); | |
| inline static word fetch_and_store( volatile void* location, word v | | \ | |
| alue ) {\ | | } | |
| return __TBB_FetchAndStore##S##M(location,value); | | \ | |
| \ | | inline static word fetch_and_store( volatile void* location, word v | |
| } | | alue ) { \ | |
| \ | | return __TBB_machine_fetchstore##S##M(location,value); | |
| | | \ | |
| | | } | |
| | | \ | |
| }; | | }; | |
| | | | |
|
| #define __TBB_DECL_ATOMIC_PRIMITIVES(S) \ | | #define __TBB_DECL_ATOMIC_PRIMITIVES(S) | |
| template<memory_semantics M> \ | | \ | |
| struct atomic_traits<S,M> { \ | | template<memory_semantics M> | |
| typedef atomic_rep<S>::word word; \ | | \ | |
| inline static word compare_and_swap( volatile void* location, word | | struct atomic_traits<S,M> { | |
| new_value, word comparand ) {\ | | \ | |
| return __TBB_CompareAndSwap##S(location,new_value,comparand); | | typedef atomic_rep<S>::word word; | |
| \ | | \ | |
| } | | inline static word compare_and_swap( volatile void* location, word | |
| \ | | new_value, word comparand ) { \ | |
| inline static word fetch_and_add( volatile void* location, word add | | return __TBB_machine_cmpswp##S(location,new_value,comparand); | |
| end ) { \ | | \ | |
| return __TBB_FetchAndAdd##S(location,addend); | | } | |
| \ | | \ | |
| } | | inline static word fetch_and_add( volatile void* location, word add | |
| \ | | end ) { \ | |
| inline static word fetch_and_store( volatile void* location, word v | | return __TBB_machine_fetchadd##S(location,addend); | |
| alue ) {\ | | \ | |
| return __TBB_FetchAndStore##S(location,value); | | } | |
| \ | | \ | |
| } | | inline static word fetch_and_store( volatile void* location, word v | |
| \ | | alue ) { \ | |
| | | return __TBB_machine_fetchstore##S(location,value); | |
| | | \ | |
| | | } | |
| | | \ | |
| }; | | }; | |
| | | | |
|
| #if __TBB_DECL_FENCED_ATOMICS | | template<memory_semantics M> | |
| __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(1,__TBB_full_fence) | | struct atomic_load_store_traits; // Primary template declaration | |
| __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(2,__TBB_full_fence) | | | |
| __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(4,__TBB_full_fence) | | #define __TBB_DECL_ATOMIC_LOAD_STORE_PRIMITIVES(M) \ | |
| __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(8,__TBB_full_fence) | | template<> struct atomic_load_store_traits<M> { \ | |
| | | template <typename T> \ | |
| | | inline static T load( const volatile T& location ) { \ | |
| | | return __TBB_load_##M( location ); \ | |
| | | } \ | |
| | | template <typename T> \ | |
| | | inline static void store( volatile T& location, T value ) { \ | |
| | | __TBB_store_##M( location, value ); \ | |
| | | } \ | |
| | | } | |
| | | | |
| | | #if __TBB_USE_FENCED_ATOMICS | |
| | | __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(1,full_fence) | |
| | | __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(2,full_fence) | |
| | | __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(4,full_fence) | |
| __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(1,acquire) | | __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(1,acquire) | |
| __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(2,acquire) | | __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(2,acquire) | |
| __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(4,acquire) | | __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(4,acquire) | |
|
| __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(8,acquire) | | | |
| __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(1,release) | | __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(1,release) | |
| __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(2,release) | | __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(2,release) | |
| __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(4,release) | | __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(4,release) | |
|
| | | __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(1,relaxed) | |
| | | __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(2,relaxed) | |
| | | __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(4,relaxed) | |
| | | #if __TBB_64BIT_ATOMICS | |
| | | __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(8,full_fence) | |
| | | __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(8,acquire) | |
| __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(8,release) | | __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(8,release) | |
|
| #else | | __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(8,relaxed) | |
| | | #endif | |
| | | #else /* !__TBB_USE_FENCED_ATOMICS */ | |
| __TBB_DECL_ATOMIC_PRIMITIVES(1) | | __TBB_DECL_ATOMIC_PRIMITIVES(1) | |
| __TBB_DECL_ATOMIC_PRIMITIVES(2) | | __TBB_DECL_ATOMIC_PRIMITIVES(2) | |
| __TBB_DECL_ATOMIC_PRIMITIVES(4) | | __TBB_DECL_ATOMIC_PRIMITIVES(4) | |
|
| | | #if __TBB_64BIT_ATOMICS | |
| __TBB_DECL_ATOMIC_PRIMITIVES(8) | | __TBB_DECL_ATOMIC_PRIMITIVES(8) | |
| #endif | | #endif | |
|
| | | #endif /* !__TBB_USE_FENCED_ATOMICS */ | |
| | | | |
| | | __TBB_DECL_ATOMIC_LOAD_STORE_PRIMITIVES(full_fence); | |
| | | __TBB_DECL_ATOMIC_LOAD_STORE_PRIMITIVES(acquire); | |
| | | __TBB_DECL_ATOMIC_LOAD_STORE_PRIMITIVES(release); | |
| | | __TBB_DECL_ATOMIC_LOAD_STORE_PRIMITIVES(relaxed); | |
| | | | |
| //! Additive inverse of 1 for type T. | | //! Additive inverse of 1 for type T. | |
| /** Various compilers issue various warnings if -1 is used with various int
eger types. | | /** Various compilers issue various warnings if -1 is used with various int
eger types. | |
| The baroque expression below avoids all the warnings (we hope). */ | | The baroque expression below avoids all the warnings (we hope). */ | |
| #define __TBB_MINUS_ONE(T) (T(T(0)-T(1))) | | #define __TBB_MINUS_ONE(T) (T(T(0)-T(1))) | |
| | | | |
| //! Base class that provides basic functionality for atomic<T> without fetc
h_and_add. | | //! Base class that provides basic functionality for atomic<T> without fetc
h_and_add. | |
| /** Works for any type T that has the same size as an integral type, has a
trivial constructor/destructor, | | /** Works for any type T that has the same size as an integral type, has a
trivial constructor/destructor, | |
| and can be copied/compared by memcpy/memcmp. */ | | and can be copied/compared by memcpy/memcmp. */ | |
| template<typename T> | | template<typename T> | |
| | | | |
| skipping to change at line 183 | | skipping to change at line 215 | |
| | | | |
| template<memory_semantics M> | | template<memory_semantics M> | |
| value_type fetch_and_store( value_type value ) { | | value_type fetch_and_store( value_type value ) { | |
| converter u, w; | | converter u, w; | |
| u.value = value; | | u.value = value; | |
| w.bits = internal::atomic_traits<sizeof(value_type),M>::fetch_and_s
tore(&rep.value,u.bits); | | w.bits = internal::atomic_traits<sizeof(value_type),M>::fetch_and_s
tore(&rep.value,u.bits); | |
| return w.value; | | return w.value; | |
| } | | } | |
| | | | |
| value_type fetch_and_store( value_type value ) { | | value_type fetch_and_store( value_type value ) { | |
|
| return fetch_and_store<__TBB_full_fence>(value); | | return fetch_and_store<full_fence>(value); | |
| } | | } | |
| | | | |
| template<memory_semantics M> | | template<memory_semantics M> | |
| value_type compare_and_swap( value_type value, value_type comparand ) { | | value_type compare_and_swap( value_type value, value_type comparand ) { | |
| converter u, v, w; | | converter u, v, w; | |
| u.value = value; | | u.value = value; | |
| v.value = comparand; | | v.value = comparand; | |
| w.bits = internal::atomic_traits<sizeof(value_type),M>::compare_and
_swap(&rep.value,u.bits,v.bits); | | w.bits = internal::atomic_traits<sizeof(value_type),M>::compare_and
_swap(&rep.value,u.bits,v.bits); | |
| return w.value; | | return w.value; | |
| } | | } | |
| | | | |
| value_type compare_and_swap( value_type value, value_type comparand ) { | | value_type compare_and_swap( value_type value, value_type comparand ) { | |
|
| return compare_and_swap<__TBB_full_fence>(value,comparand); | | return compare_and_swap<full_fence>(value,comparand); | |
| } | | } | |
| | | | |
| operator value_type() const volatile { // volatile quali
fier here for backwards compatibility | | operator value_type() const volatile { // volatile quali
fier here for backwards compatibility | |
| converter w; | | converter w; | |
| w.bits = __TBB_load_with_acquire( rep.value ); | | w.bits = __TBB_load_with_acquire( rep.value ); | |
| return w.value; | | return w.value; | |
| } | | } | |
| | | | |
|
| | | template<memory_semantics M> | |
| | | value_type load () const { | |
| | | converter u; | |
| | | u.bits = internal::atomic_load_store_traits<M>::load( rep.value ); | |
| | | return u.value; | |
| | | } | |
| | | | |
| | | value_type load () const { | |
| | | return load<acquire>(); | |
| | | } | |
| | | | |
| | | template<memory_semantics M> | |
| | | void store ( value_type value ) { | |
| | | converter u; | |
| | | u.value = value; | |
| | | internal::atomic_load_store_traits<M>::store( rep.value, u.bits ); | |
| | | } | |
| | | | |
| | | void store ( value_type value ) { | |
| | | store<release>( value ); | |
| | | } | |
| | | | |
| protected: | | protected: | |
| value_type store_with_release( value_type rhs ) { | | value_type store_with_release( value_type rhs ) { | |
| converter u; | | converter u; | |
| u.value = rhs; | | u.value = rhs; | |
| __TBB_store_with_release(rep.value,u.bits); | | __TBB_store_with_release(rep.value,u.bits); | |
| return rhs; | | return rhs; | |
| } | | } | |
| }; | | }; | |
| | | | |
| //! Base class that provides basic functionality for atomic<T> with fetch_a
nd_add. | | //! Base class that provides basic functionality for atomic<T> with fetch_a
nd_add. | |
| | | | |
| skipping to change at line 229 | | skipping to change at line 283 | |
| struct atomic_impl_with_arithmetic: atomic_impl<I> { | | struct atomic_impl_with_arithmetic: atomic_impl<I> { | |
| public: | | public: | |
| typedef I value_type; | | typedef I value_type; | |
| | | | |
| template<memory_semantics M> | | template<memory_semantics M> | |
| value_type fetch_and_add( D addend ) { | | value_type fetch_and_add( D addend ) { | |
| return value_type(internal::atomic_traits<sizeof(value_type),M>::fe
tch_and_add( &this->rep.value, addend*sizeof(StepType) )); | | return value_type(internal::atomic_traits<sizeof(value_type),M>::fe
tch_and_add( &this->rep.value, addend*sizeof(StepType) )); | |
| } | | } | |
| | | | |
| value_type fetch_and_add( D addend ) { | | value_type fetch_and_add( D addend ) { | |
|
| return fetch_and_add<__TBB_full_fence>(addend); | | return fetch_and_add<full_fence>(addend); | |
| } | | } | |
| | | | |
| template<memory_semantics M> | | template<memory_semantics M> | |
| value_type fetch_and_increment() { | | value_type fetch_and_increment() { | |
| return fetch_and_add<M>(1); | | return fetch_and_add<M>(1); | |
| } | | } | |
| | | | |
| value_type fetch_and_increment() { | | value_type fetch_and_increment() { | |
| return fetch_and_add(1); | | return fetch_and_add(1); | |
| } | | } | |
| | | | |
| skipping to change at line 278 | | skipping to change at line 332 | |
| | | | |
| value_type operator++(int) { | | value_type operator++(int) { | |
| return fetch_and_add(1); | | return fetch_and_add(1); | |
| } | | } | |
| | | | |
| value_type operator--(int) { | | value_type operator--(int) { | |
| return fetch_and_add(__TBB_MINUS_ONE(D)); | | return fetch_and_add(__TBB_MINUS_ONE(D)); | |
| } | | } | |
| }; | | }; | |
| | | | |
|
| #if __TBB_WORDSIZE == 4 | | | |
| // Plaforms with 32-bit hardware require special effort for 64-bit loads an | | | |
| d stores. | | | |
| #if defined(__INTEL_COMPILER)||!defined(_MSC_VER)||_MSC_VER>=1400 | | | |
| | | | |
| template<> | | | |
| inline atomic_impl<__TBB_LONG_LONG>::operator atomic_impl<__TBB_LONG_LONG>: | | | |
| :value_type() const volatile { | | | |
| return __TBB_Load8(&rep.value); | | | |
| } | | | |
| | | | |
| template<> | | | |
| inline atomic_impl<unsigned __TBB_LONG_LONG>::operator atomic_impl<unsigned | | | |
| __TBB_LONG_LONG>::value_type() const volatile { | | | |
| return __TBB_Load8(&rep.value); | | | |
| } | | | |
| | | | |
| template<> | | | |
| inline atomic_impl<__TBB_LONG_LONG>::value_type atomic_impl<__TBB_LONG_LONG | | | |
| >::store_with_release( value_type rhs ) { | | | |
| __TBB_Store8(&rep.value,rhs); | | | |
| return rhs; | | | |
| } | | | |
| | | | |
| template<> | | | |
| inline atomic_impl<unsigned __TBB_LONG_LONG>::value_type atomic_impl<unsign | | | |
| ed __TBB_LONG_LONG>::store_with_release( value_type rhs ) { | | | |
| __TBB_Store8(&rep.value,rhs); | | | |
| return rhs; | | | |
| } | | | |
| | | | |
| #endif /* defined(__INTEL_COMPILER)||!defined(_MSC_VER)||_MSC_VER>=1400 */ | | | |
| #endif /* __TBB_WORDSIZE==4 */ | | | |
| | | | |
| } /* Internal */ | | } /* Internal */ | |
| //! @endcond | | //! @endcond | |
| | | | |
| //! Primary template for atomic. | | //! Primary template for atomic. | |
| /** See the Reference for details. | | /** See the Reference for details. | |
| @ingroup synchronization */ | | @ingroup synchronization */ | |
| template<typename T> | | template<typename T> | |
| struct atomic: internal::atomic_impl<T> { | | struct atomic: internal::atomic_impl<T> { | |
| T operator=( T rhs ) { | | T operator=( T rhs ) { | |
| // "this" required here in strict ISO C++ because store_with_releas
e is a dependent name | | // "this" required here in strict ISO C++ because store_with_releas
e is a dependent name | |
| | | | |
| skipping to change at line 328 | | skipping to change at line 353 | |
| } | | } | |
| atomic<T>& operator=( const atomic<T>& rhs ) {this->store_with_release(
rhs); return *this;} | | atomic<T>& operator=( const atomic<T>& rhs ) {this->store_with_release(
rhs); return *this;} | |
| }; | | }; | |
| | | | |
| #define __TBB_DECL_ATOMIC(T) \ | | #define __TBB_DECL_ATOMIC(T) \ | |
| template<> struct atomic<T>: internal::atomic_impl_with_arithmetic<T,T,
char> { \ | | template<> struct atomic<T>: internal::atomic_impl_with_arithmetic<T,T,
char> { \ | |
| T operator=( T rhs ) {return store_with_release(rhs);} \ | | T operator=( T rhs ) {return store_with_release(rhs);} \ | |
| atomic<T>& operator=( const atomic<T>& rhs ) {store_with_release(rh
s); return *this;} \ | | atomic<T>& operator=( const atomic<T>& rhs ) {store_with_release(rh
s); return *this;} \ | |
| }; | | }; | |
| | | | |
|
| #if defined(__INTEL_COMPILER)||!defined(_MSC_VER)||_MSC_VER>=1400 | | #if __TBB_64BIT_ATOMICS | |
| __TBB_DECL_ATOMIC(__TBB_LONG_LONG) | | __TBB_DECL_ATOMIC(__TBB_LONG_LONG) | |
| __TBB_DECL_ATOMIC(unsigned __TBB_LONG_LONG) | | __TBB_DECL_ATOMIC(unsigned __TBB_LONG_LONG) | |
| #else | | #else | |
|
| // Some old versions of MVSC cannot correctly compile templates with "long | | // test_atomic will verify that sizeof(long long)==8 | |
| long". | | #endif | |
| #endif /* defined(__INTEL_COMPILER)||!defined(_MSC_VER)||_MSC_VER>=1400 */ | | | |
| | | | |
| __TBB_DECL_ATOMIC(long) | | __TBB_DECL_ATOMIC(long) | |
| __TBB_DECL_ATOMIC(unsigned long) | | __TBB_DECL_ATOMIC(unsigned long) | |
| | | | |
| #if defined(_MSC_VER) && __TBB_WORDSIZE==4 | | #if defined(_MSC_VER) && __TBB_WORDSIZE==4 | |
| /* Special version of __TBB_DECL_ATOMIC that avoids gratuitous warnings fro
m cl /Wp64 option. | | /* Special version of __TBB_DECL_ATOMIC that avoids gratuitous warnings fro
m cl /Wp64 option. | |
| It is identical to __TBB_DECL_ATOMIC(unsigned) except that it replaces o
perator=(T) | | It is identical to __TBB_DECL_ATOMIC(unsigned) except that it replaces o
perator=(T) | |
| with an operator=(U) that explicitly converts the U to a T. Types T and
U should be | | with an operator=(U) that explicitly converts the U to a T. Types T and
U should be | |
| type synonyms on the platform. Type U should be the wider variant of T
from the | | type synonyms on the platform. Type U should be the wider variant of T
from the | |
| perspective of /Wp64. */ | | perspective of /Wp64. */ | |
| #define __TBB_DECL_ATOMIC_ALT(T,U) \ | | #define __TBB_DECL_ATOMIC_ALT(T,U) \ | |
| | | | |
| skipping to change at line 391 | | skipping to change at line 415 | |
| template<> struct atomic<void*>: internal::atomic_impl<void*> { | | template<> struct atomic<void*>: internal::atomic_impl<void*> { | |
| void* operator=( void* rhs ) { | | void* operator=( void* rhs ) { | |
| // "this" required here in strict ISO C++ because store_with_releas
e is a dependent name | | // "this" required here in strict ISO C++ because store_with_releas
e is a dependent name | |
| return this->store_with_release(rhs); | | return this->store_with_release(rhs); | |
| } | | } | |
| atomic<void*>& operator=( const atomic<void*>& rhs ) { | | atomic<void*>& operator=( const atomic<void*>& rhs ) { | |
| this->store_with_release(rhs); return *this; | | this->store_with_release(rhs); return *this; | |
| } | | } | |
| }; | | }; | |
| | | | |
|
| | | // Helpers to workaround ugly syntax of calling template member function of | |
| | | a | |
| | | // template class with template argument dependent on template parameters. | |
| | | | |
| | | template <memory_semantics M, typename T> | |
| | | T load ( const atomic<T>& a ) { return a.template load<M>(); } | |
| | | | |
| | | template <memory_semantics M, typename T> | |
| | | void store ( atomic<T>& a, T value ) { return a.template store<M>(value); } | |
| | | | |
| } // namespace tbb | | } // namespace tbb | |
| | | | |
| #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) | | #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) | |
| #pragma warning (pop) | | #pragma warning (pop) | |
| #endif // warnings 4244, 4267 are back | | #endif // warnings 4244, 4267 are back | |
| | | | |
| #endif /* __TBB_atomic_H */ | | #endif /* __TBB_atomic_H */ | |
| | | | |
End of changes. 23 change blocks. |
| 102 lines changed or deleted | | 138 lines changed or added | |
|
| concurrent_hash_map.h | | concurrent_hash_map.h | |
| /* | | /* | |
|
| Copyright 2005-2010 Intel Corporation. All Rights Reserved. | | Copyright 2005-2011 Intel Corporation. All Rights Reserved. | |
| | | | |
| This file is part of Threading Building Blocks. | | This file is part of Threading Building Blocks. | |
| | | | |
| Threading Building Blocks is free software; you can redistribute it | | Threading Building Blocks is free software; you can redistribute it | |
| and/or modify it under the terms of the GNU General Public License | | and/or modify it under the terms of the GNU General Public License | |
| version 2 as published by the Free Software Foundation. | | version 2 as published by the Free Software Foundation. | |
| | | | |
| Threading Building Blocks is distributed in the hope that it will be | | Threading Building Blocks is distributed in the hope that it will be | |
| useful, but WITHOUT ANY WARRANTY; without even the implied warranty | | useful, but WITHOUT ANY WARRANTY; without even the implied warranty | |
| of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| | | | |
| skipping to change at line 54 | | skipping to change at line 54 | |
| #if !TBB_USE_EXCEPTIONS && _MSC_VER | | #if !TBB_USE_EXCEPTIONS && _MSC_VER | |
| #pragma warning (pop) | | #pragma warning (pop) | |
| #endif | | #endif | |
| | | | |
| #include "cache_aligned_allocator.h" | | #include "cache_aligned_allocator.h" | |
| #include "tbb_allocator.h" | | #include "tbb_allocator.h" | |
| #include "spin_rw_mutex.h" | | #include "spin_rw_mutex.h" | |
| #include "atomic.h" | | #include "atomic.h" | |
| #include "aligned_space.h" | | #include "aligned_space.h" | |
| #include "tbb_exception.h" | | #include "tbb_exception.h" | |
|
| #include "_concurrent_unordered_internal.h" // Need tbb_hasher | | #include "tbb_profiling.h" | |
| #if TBB_USE_PERFORMANCE_WARNINGS | | #include "internal/_concurrent_unordered_impl.h" // Need tbb_hasher | |
| | | #if TBB_USE_PERFORMANCE_WARNINGS || __TBB_STATISTICS | |
| #include <typeinfo> | | #include <typeinfo> | |
| #endif | | #endif | |
|
| | | #if __TBB_STATISTICS | |
| | | #include <stdio.h> | |
| | | #endif | |
| | | | |
| namespace tbb { | | namespace tbb { | |
| | | | |
|
| //! @cond INTERNAL | | | |
| namespace internal { | | | |
| //! ITT instrumented routine that loads pointer from location pointed t | | | |
| o by src. | | | |
| void* __TBB_EXPORTED_FUNC itt_load_pointer_with_acquire_v3( const void* | | | |
| src ); | | | |
| //! ITT instrumented routine that stores src into location pointed to b | | | |
| y dst. | | | |
| void __TBB_EXPORTED_FUNC itt_store_pointer_with_release_v3( void* dst, | | | |
| void* src ); | | | |
| //! Routine that loads pointer from location pointed to by src without | | | |
| causing ITT to report a race. | | | |
| void* __TBB_EXPORTED_FUNC itt_load_pointer_v3( const void* src ); | | | |
| } | | | |
| //! @endcond | | | |
| | | | |
| //! hash_compare that is default argument for concurrent_hash_map | | //! hash_compare that is default argument for concurrent_hash_map | |
| template<typename Key> | | template<typename Key> | |
| struct tbb_hash_compare { | | struct tbb_hash_compare { | |
| static size_t hash( const Key& a ) { return tbb_hasher(a); } | | static size_t hash( const Key& a ) { return tbb_hasher(a); } | |
| static bool equal( const Key& a, const Key& b ) { return a == b; } | | static bool equal( const Key& a, const Key& b ) { return a == b; } | |
| }; | | }; | |
| | | | |
|
| namespace interface4 { | | namespace interface5 { | |
| | | | |
| template<typename Key, typename T, typename HashCompare = tbb_hash_comp
are<Key>, typename A = tbb_allocator<std::pair<Key, T> > > | | template<typename Key, typename T, typename HashCompare = tbb_hash_comp
are<Key>, typename A = tbb_allocator<std::pair<Key, T> > > | |
| class concurrent_hash_map; | | class concurrent_hash_map; | |
| | | | |
| //! @cond INTERNAL | | //! @cond INTERNAL | |
| namespace internal { | | namespace internal { | |
| | | | |
| //! Type of a hash code. | | //! Type of a hash code. | |
| typedef size_t hashcode_t; | | typedef size_t hashcode_t; | |
| //! Node base type | | //! Node base type | |
| | | | |
| skipping to change at line 135 | | skipping to change at line 128 | |
| //! Count of segments in the first block | | //! Count of segments in the first block | |
| static size_type const embedded_buckets = 1<<embedded_block; | | static size_type const embedded_buckets = 1<<embedded_block; | |
| //! Count of segments in the first block | | //! Count of segments in the first block | |
| static size_type const first_block = 8; //including embedded_block.
perfect with bucket size 16, so the allocations are power of 4096 | | static size_type const first_block = 8; //including embedded_block.
perfect with bucket size 16, so the allocations are power of 4096 | |
| //! Size of a pointer / table size | | //! Size of a pointer / table size | |
| static size_type const pointers_per_table = sizeof(segment_index_t)
* 8; // one segment per bit | | static size_type const pointers_per_table = sizeof(segment_index_t)
* 8; // one segment per bit | |
| //! Segment pointer | | //! Segment pointer | |
| typedef bucket *segment_ptr_t; | | typedef bucket *segment_ptr_t; | |
| //! Segment pointers table type | | //! Segment pointers table type | |
| typedef segment_ptr_t segments_table_t[pointers_per_table]; | | typedef segment_ptr_t segments_table_t[pointers_per_table]; | |
|
| //! Hash mask = sum of allocated segments sizes - 1 | | //! Hash mask = sum of allocated segment sizes - 1 | |
| atomic<hashcode_t> my_mask; | | atomic<hashcode_t> my_mask; | |
| //! Segment pointers table. Also prevents false sharing between my_
mask and my_size | | //! Segment pointers table. Also prevents false sharing between my_
mask and my_size | |
| segments_table_t my_table; | | segments_table_t my_table; | |
| //! Size of container in stored items | | //! Size of container in stored items | |
| atomic<size_type> my_size; // It must be in separate cache line fro
m my_mask due to performance effects | | atomic<size_type> my_size; // It must be in separate cache line fro
m my_mask due to performance effects | |
| //! Zero segment | | //! Zero segment | |
| bucket my_embedded_segment[embedded_buckets]; | | bucket my_embedded_segment[embedded_buckets]; | |
|
| | | #if __TBB_STATISTICS | |
| | | atomic<unsigned> my_info_resizes; // concurrent ones | |
| | | mutable atomic<unsigned> my_info_restarts; // race collisions | |
| | | atomic<unsigned> my_info_rehashes; // invocations of rehash_bucket | |
| | | #endif | |
| //! Constructor | | //! Constructor | |
| hash_map_base() { | | hash_map_base() { | |
| std::memset( this, 0, pointers_per_table*sizeof(segment_ptr_t)
// 32*4=128 or 64*8=512 | | std::memset( this, 0, pointers_per_table*sizeof(segment_ptr_t)
// 32*4=128 or 64*8=512 | |
| + sizeof(my_size) + sizeof(my_mask) // 4+4 or 8+8 | | + sizeof(my_size) + sizeof(my_mask) // 4+4 or 8+8 | |
| + embedded_buckets*sizeof(bucket) ); // n*8 or n*16 | | + embedded_buckets*sizeof(bucket) ); // n*8 or n*16 | |
| for( size_type i = 0; i < embedded_block; i++ ) // fill the tab
le | | for( size_type i = 0; i < embedded_block; i++ ) // fill the tab
le | |
| my_table[i] = my_embedded_segment + segment_base(i); | | my_table[i] = my_embedded_segment + segment_base(i); | |
| my_mask = embedded_buckets - 1; | | my_mask = embedded_buckets - 1; | |
| __TBB_ASSERT( embedded_block <= first_block, "The first block n
umber must include embedded blocks"); | | __TBB_ASSERT( embedded_block <= first_block, "The first block n
umber must include embedded blocks"); | |
|
| | | #if __TBB_STATISTICS | |
| | | my_info_resizes = 0; // concurrent ones | |
| | | my_info_restarts = 0; // race collisions | |
| | | my_info_rehashes = 0; // invocations of rehash_bucket | |
| | | #endif | |
| } | | } | |
| | | | |
| //! @return segment index of given index in the array | | //! @return segment index of given index in the array | |
| static segment_index_t segment_index_of( size_type index ) { | | static segment_index_t segment_index_of( size_type index ) { | |
| return segment_index_t( __TBB_Log2( index|1 ) ); | | return segment_index_t( __TBB_Log2( index|1 ) ); | |
| } | | } | |
| | | | |
| //! @return the first array index of given segment | | //! @return the first array index of given segment | |
| static segment_index_t segment_base( segment_index_t k ) { | | static segment_index_t segment_base( segment_index_t k ) { | |
| return (segment_index_t(1)<<k & ~segment_index_t(1)); | | return (segment_index_t(1)<<k & ~segment_index_t(1)); | |
| | | | |
| skipping to change at line 211 | | skipping to change at line 213 | |
| void enable_segment( segment_index_t k, bool is_initial = false ) { | | void enable_segment( segment_index_t k, bool is_initial = false ) { | |
| __TBB_ASSERT( k, "Zero segment must be embedded" ); | | __TBB_ASSERT( k, "Zero segment must be embedded" ); | |
| enable_segment_failsafe watchdog( my_table, k ); | | enable_segment_failsafe watchdog( my_table, k ); | |
| cache_aligned_allocator<bucket> alloc; | | cache_aligned_allocator<bucket> alloc; | |
| size_type sz; | | size_type sz; | |
| __TBB_ASSERT( !is_valid(my_table[k]), "Wrong concurrent assignm
ent"); | | __TBB_ASSERT( !is_valid(my_table[k]), "Wrong concurrent assignm
ent"); | |
| if( k >= first_block ) { | | if( k >= first_block ) { | |
| sz = segment_size( k ); | | sz = segment_size( k ); | |
| segment_ptr_t ptr = alloc.allocate( sz ); | | segment_ptr_t ptr = alloc.allocate( sz ); | |
| init_buckets( ptr, sz, is_initial ); | | init_buckets( ptr, sz, is_initial ); | |
|
| #if TBB_USE_THREADING_TOOLS | | itt_hide_store_word( my_table[k], ptr ); | |
| // TODO: actually, fence and notification are unnecessary h | | | |
| ere and below | | | |
| itt_store_pointer_with_release_v3( my_table + k, ptr ); | | | |
| #else | | | |
| my_table[k] = ptr;// my_mask has release fence | | | |
| #endif | | | |
| sz <<= 1;// double it to get entire capacity of the contain
er | | sz <<= 1;// double it to get entire capacity of the contain
er | |
| } else { // the first block | | } else { // the first block | |
| __TBB_ASSERT( k == embedded_block, "Wrong segment index" ); | | __TBB_ASSERT( k == embedded_block, "Wrong segment index" ); | |
| sz = segment_size( first_block ); | | sz = segment_size( first_block ); | |
| segment_ptr_t ptr = alloc.allocate( sz - embedded_buckets )
; | | segment_ptr_t ptr = alloc.allocate( sz - embedded_buckets )
; | |
| init_buckets( ptr, sz - embedded_buckets, is_initial ); | | init_buckets( ptr, sz - embedded_buckets, is_initial ); | |
| ptr -= segment_base(embedded_block); | | ptr -= segment_base(embedded_block); | |
| for(segment_index_t i = embedded_block; i < first_block; i+
+) // calc the offsets | | for(segment_index_t i = embedded_block; i < first_block; i+
+) // calc the offsets | |
|
| #if TBB_USE_THREADING_TOOLS | | itt_hide_store_word( my_table[i], ptr + segment_base(i) | |
| itt_store_pointer_with_release_v3( my_table + i, ptr + | | ); | |
| segment_base(i) ); | | | |
| #else | | | |
| my_table[i] = ptr + segment_base(i); | | | |
| #endif | | | |
| } | | } | |
|
| #if TBB_USE_THREADING_TOOLS | | itt_store_word_with_release( my_mask, sz-1 ); | |
| itt_store_pointer_with_release_v3( &my_mask, (void*)(sz-1) ); | | | |
| #else | | | |
| my_mask = sz - 1; | | | |
| #endif | | | |
| watchdog.my_segment_ptr = 0; | | watchdog.my_segment_ptr = 0; | |
| } | | } | |
| | | | |
| //! Get bucket by (masked) hashcode | | //! Get bucket by (masked) hashcode | |
| bucket *get_bucket( hashcode_t h ) const throw() { // TODO: add thr
ow() everywhere? | | bucket *get_bucket( hashcode_t h ) const throw() { // TODO: add thr
ow() everywhere? | |
| segment_index_t s = segment_index_of( h ); | | segment_index_t s = segment_index_of( h ); | |
| h -= segment_base(s); | | h -= segment_base(s); | |
| segment_ptr_t seg = my_table[s]; | | segment_ptr_t seg = my_table[s]; | |
| __TBB_ASSERT( is_valid(seg), "hashcode must be cut by valid mas
k for allocated segments" ); | | __TBB_ASSERT( is_valid(seg), "hashcode must be cut by valid mas
k for allocated segments" ); | |
| return &seg[h]; | | return &seg[h]; | |
| } | | } | |
| | | | |
| // internal serial rehashing helper | | // internal serial rehashing helper | |
| void mark_rehashed_levels( hashcode_t h ) throw () { | | void mark_rehashed_levels( hashcode_t h ) throw () { | |
| segment_index_t s = segment_index_of( h ); | | segment_index_t s = segment_index_of( h ); | |
| while( segment_ptr_t seg = my_table[++s] ) | | while( segment_ptr_t seg = my_table[++s] ) | |
| if( seg[h].node_list == rehash_req ) { | | if( seg[h].node_list == rehash_req ) { | |
| seg[h].node_list = empty_rehashed; | | seg[h].node_list = empty_rehashed; | |
|
| mark_rehashed_levels( h + segment_base(s) ); | | mark_rehashed_levels( h + ((hashcode_t)1<<s) ); // opti
mized segment_base(s) | |
| } | | } | |
| } | | } | |
| | | | |
| //! Check for mask race | | //! Check for mask race | |
| // Splitting into two functions should help inlining | | // Splitting into two functions should help inlining | |
| inline bool check_mask_race( const hashcode_t h, hashcode_t &m ) co
nst { | | inline bool check_mask_race( const hashcode_t h, hashcode_t &m ) co
nst { | |
| hashcode_t m_now, m_old = m; | | hashcode_t m_now, m_old = m; | |
|
| #if TBB_USE_THREADING_TOOLS | | m_now = (hashcode_t) itt_load_word_with_acquire( my_mask ); | |
| m_now = (hashcode_t) itt_load_pointer_with_acquire_v3( &my_mask | | | |
| ); | | | |
| #else | | | |
| m_now = my_mask; | | | |
| #endif | | | |
| if( m_old != m_now ) | | if( m_old != m_now ) | |
| return check_rehashing_collision( h, m_old, m = m_now ); | | return check_rehashing_collision( h, m_old, m = m_now ); | |
| return false; | | return false; | |
| } | | } | |
| | | | |
| //! Process mask race, check for rehashing collision | | //! Process mask race, check for rehashing collision | |
| bool check_rehashing_collision( const hashcode_t h, hashcode_t m_ol
d, hashcode_t m ) const { | | bool check_rehashing_collision( const hashcode_t h, hashcode_t m_ol
d, hashcode_t m ) const { | |
| __TBB_ASSERT(m_old != m, NULL); // TODO?: m arg could be optimi
zed out by passing h = h&m | | __TBB_ASSERT(m_old != m, NULL); // TODO?: m arg could be optimi
zed out by passing h = h&m | |
| if( (h & m_old) != (h & m) ) { // mask changed for this hashcod
e, rare event | | if( (h & m_old) != (h & m) ) { // mask changed for this hashcod
e, rare event | |
| // condition above proves that 'h' has some other bits set
beside 'm_old' | | // condition above proves that 'h' has some other bits set
beside 'm_old' | |
| // find next applicable mask after m_old //TODO: look at
bsl instruction | | // find next applicable mask after m_old //TODO: look at
bsl instruction | |
| for( ++m_old; !(h & m_old); m_old <<= 1 ) // at maximum few
rounds depending on the first block size | | for( ++m_old; !(h & m_old); m_old <<= 1 ) // at maximum few
rounds depending on the first block size | |
| ; | | ; | |
| m_old = (m_old<<1) - 1; // get full mask from a bit | | m_old = (m_old<<1) - 1; // get full mask from a bit | |
| __TBB_ASSERT((m_old&(m_old+1))==0 && m_old <= m, NULL); | | __TBB_ASSERT((m_old&(m_old+1))==0 && m_old <= m, NULL); | |
| // check whether it is rehashing/ed | | // check whether it is rehashing/ed | |
|
| #if TBB_USE_THREADING_TOOLS | | if( itt_load_word_with_acquire(get_bucket(h & m_old)->node_ | |
| if( itt_load_pointer_with_acquire_v3(&( get_bucket(h & m_ol | | list) != rehash_req ) | |
| d)->node_list )) != rehash_req ) | | { | |
| #else | | #if __TBB_STATISTICS | |
| if( __TBB_load_with_acquire(get_bucket( h & m_old )->node_l | | my_info_restarts++; // race collisions | |
| ist) != rehash_req ) | | | |
| #endif | | #endif | |
| return true; | | return true; | |
|
| | | } | |
| } | | } | |
| return false; | | return false; | |
| } | | } | |
| | | | |
| //! Insert a node and check for load factor. @return segment index
to enable. | | //! Insert a node and check for load factor. @return segment index
to enable. | |
| segment_index_t insert_new_node( bucket *b, node_base *n, hashcode_
t mask ) { | | segment_index_t insert_new_node( bucket *b, node_base *n, hashcode_
t mask ) { | |
| size_type sz = ++my_size; // prefix form is to enforce allocati
on after the first item inserted | | size_type sz = ++my_size; // prefix form is to enforce allocati
on after the first item inserted | |
| add_to_bucket( b, n ); | | add_to_bucket( b, n ); | |
| // check load factor | | // check load factor | |
| if( sz >= mask ) { // TODO: add custom load_factor | | if( sz >= mask ) { // TODO: add custom load_factor | |
|
| segment_index_t new_seg = segment_index_of( mask+1 ); | | segment_index_t new_seg = __TBB_Log2( mask+1 ); //optimized
segment_index_of | |
| __TBB_ASSERT( is_valid(my_table[new_seg-1]), "new allocatio
ns must not publish new mask until segment has allocated"); | | __TBB_ASSERT( is_valid(my_table[new_seg-1]), "new allocatio
ns must not publish new mask until segment has allocated"); | |
|
| #if TBB_USE_THREADING_TOOLS | | if( !itt_hide_load_word(my_table[new_seg]) | |
| if( !itt_load_pointer_v3(my_table+new_seg) | | | |
| #else | | | |
| if( !my_table[new_seg] | | | |
| #endif | | | |
| && __TBB_CompareAndSwapW(&my_table[new_seg], 2, 0) == 0 ) | | && __TBB_CompareAndSwapW(&my_table[new_seg], 2, 0) == 0 ) | |
| return new_seg; // The value must be processed | | return new_seg; // The value must be processed | |
| } | | } | |
| return 0; | | return 0; | |
| } | | } | |
| | | | |
| //! Prepare enough segments for number of buckets | | //! Prepare enough segments for number of buckets | |
| void reserve(size_type buckets) { | | void reserve(size_type buckets) { | |
| if( !buckets-- ) return; | | if( !buckets-- ) return; | |
| bool is_initial = !my_size; | | bool is_initial = !my_size; | |
| | | | |
| skipping to change at line 377 | | skipping to change at line 359 | |
| my_node = static_cast<node*>( my_bucket->node_list ); | | my_node = static_cast<node*>( my_bucket->node_list ); | |
| if( hash_map_base::is_valid(my_node) ) { | | if( hash_map_base::is_valid(my_node) ) { | |
| my_index = k; return; | | my_index = k; return; | |
| } | | } | |
| ++k; | | ++k; | |
| } | | } | |
| my_bucket = 0; my_node = 0; my_index = k; // the end | | my_bucket = 0; my_node = 0; my_index = k; // the end | |
| } | | } | |
| #if !defined(_MSC_VER) || defined(__INTEL_COMPILER) | | #if !defined(_MSC_VER) || defined(__INTEL_COMPILER) | |
| template<typename Key, typename T, typename HashCompare, typename A
> | | template<typename Key, typename T, typename HashCompare, typename A
> | |
|
| friend class interface4::concurrent_hash_map; | | friend class interface5::concurrent_hash_map; | |
| #else | | #else | |
| public: // workaround | | public: // workaround | |
| #endif | | #endif | |
| //! concurrent_hash_map over which we are iterating. | | //! concurrent_hash_map over which we are iterating. | |
| const Container *my_map; | | const Container *my_map; | |
| | | | |
| //! Index in hash table for current item | | //! Index in hash table for current item | |
| size_t my_index; | | size_t my_index; | |
| | | | |
| //! Pointer to bucket | | //! Pointer to bucket | |
| | | | |
| skipping to change at line 412 | | skipping to change at line 394 | |
| my_node(other.my_node) | | my_node(other.my_node) | |
| {} | | {} | |
| Value& operator*() const { | | Value& operator*() const { | |
| __TBB_ASSERT( hash_map_base::is_valid(my_node), "iterator unini
tialized or at end of container?" ); | | __TBB_ASSERT( hash_map_base::is_valid(my_node), "iterator unini
tialized or at end of container?" ); | |
| return my_node->item; | | return my_node->item; | |
| } | | } | |
| Value* operator->() const {return &operator*();} | | Value* operator->() const {return &operator*();} | |
| hash_map_iterator& operator++(); | | hash_map_iterator& operator++(); | |
| | | | |
| //! Post increment | | //! Post increment | |
|
| Value* operator++(int) { | | hash_map_iterator operator++(int) { | |
| Value* result = &operator*(); | | hash_map_iterator old(*this); | |
| operator++(); | | operator++(); | |
|
| return result; | | return old; | |
| } | | } | |
| }; | | }; | |
| | | | |
| template<typename Container, typename Value> | | template<typename Container, typename Value> | |
| hash_map_iterator<Container,Value>::hash_map_iterator( const Container
&map, size_t index, const bucket *b, node_base *n ) : | | hash_map_iterator<Container,Value>::hash_map_iterator( const Container
&map, size_t index, const bucket *b, node_base *n ) : | |
| my_map(&map), | | my_map(&map), | |
| my_index(index), | | my_index(index), | |
| my_bucket(b), | | my_bucket(b), | |
| my_node( static_cast<node*>(n) ) | | my_node( static_cast<node*>(n) ) | |
| { | | { | |
| | | | |
| skipping to change at line 634 | | skipping to change at line 616 | |
| node *search_bucket( const key_type &key, bucket *b ) const { | | node *search_bucket( const key_type &key, bucket *b ) const { | |
| node *n = static_cast<node*>( b->node_list ); | | node *n = static_cast<node*>( b->node_list ); | |
| while( is_valid(n) && !my_hash_compare.equal(key, n->item.first) ) | | while( is_valid(n) && !my_hash_compare.equal(key, n->item.first) ) | |
| n = static_cast<node*>( n->next ); | | n = static_cast<node*>( n->next ); | |
| __TBB_ASSERT(n != internal::rehash_req, "Search can be executed onl
y for rehashed bucket"); | | __TBB_ASSERT(n != internal::rehash_req, "Search can be executed onl
y for rehashed bucket"); | |
| return n; | | return n; | |
| } | | } | |
| | | | |
| //! bucket accessor is to find, rehash, acquire a lock, and access a bu
cket | | //! bucket accessor is to find, rehash, acquire a lock, and access a bu
cket | |
| class bucket_accessor : public bucket::scoped_t { | | class bucket_accessor : public bucket::scoped_t { | |
|
| bool my_is_writer; // TODO: use it from base type | | | |
| bucket *my_b; | | bucket *my_b; | |
| public: | | public: | |
| bucket_accessor( concurrent_hash_map *base, const hashcode_t h, boo
l writer = false ) { acquire( base, h, writer ); } | | bucket_accessor( concurrent_hash_map *base, const hashcode_t h, boo
l writer = false ) { acquire( base, h, writer ); } | |
| //! find a bucket by masked hashcode, optionally rehash, and acquir
e the lock | | //! find a bucket by masked hashcode, optionally rehash, and acquir
e the lock | |
| inline void acquire( concurrent_hash_map *base, const hashcode_t h,
bool writer = false ) { | | inline void acquire( concurrent_hash_map *base, const hashcode_t h,
bool writer = false ) { | |
| my_b = base->get_bucket( h ); | | my_b = base->get_bucket( h ); | |
|
| #if TBB_USE_THREADING_TOOLS | | | |
| // TODO: actually, notification is unnecessary here, just hidin
g double-check | | // TODO: actually, notification is unnecessary here, just hidin
g double-check | |
|
| if( itt_load_pointer_with_acquire_v3(&my_b->node_list) == inter | | if( itt_load_word_with_acquire(my_b->node_list) == internal::re | |
| nal::rehash_req | | hash_req | |
| #else | | | |
| if( __TBB_load_with_acquire(my_b->node_list) == internal::rehas | | | |
| h_req | | | |
| #endif | | | |
| && try_acquire( my_b->mutex, /*write=*/true ) ) | | && try_acquire( my_b->mutex, /*write=*/true ) ) | |
| { | | { | |
| if( my_b->node_list == internal::rehash_req ) base->rehash_
bucket( my_b, h ); //recursive rehashing | | if( my_b->node_list == internal::rehash_req ) base->rehash_
bucket( my_b, h ); //recursive rehashing | |
|
| my_is_writer = true; | | | |
| } | | } | |
|
| else bucket::scoped_t::acquire( my_b->mutex, /*write=*/my_is_wr
iter = writer ); | | else bucket::scoped_t::acquire( my_b->mutex, writer ); | |
| __TBB_ASSERT( my_b->node_list != internal::rehash_req, NULL); | | __TBB_ASSERT( my_b->node_list != internal::rehash_req, NULL); | |
| } | | } | |
| //! check whether bucket is locked for write | | //! check whether bucket is locked for write | |
|
| bool is_writer() { return my_is_writer; } | | bool is_writer() { return bucket::scoped_t::is_writer; } | |
| //! get bucket pointer | | //! get bucket pointer | |
| bucket *operator() () { return my_b; } | | bucket *operator() () { return my_b; } | |
|
| // TODO: optimize out | | | |
| bool upgrade_to_writer() { my_is_writer = true; return bucket::scop | | | |
| ed_t::upgrade_to_writer(); } | | | |
| }; | | }; | |
| | | | |
| // TODO refactor to hash_base | | // TODO refactor to hash_base | |
| void rehash_bucket( bucket *b_new, const hashcode_t h ) { | | void rehash_bucket( bucket *b_new, const hashcode_t h ) { | |
| __TBB_ASSERT( *(intptr_t*)(&b_new->mutex), "b_new must be locked (f
or write)"); | | __TBB_ASSERT( *(intptr_t*)(&b_new->mutex), "b_new must be locked (f
or write)"); | |
| __TBB_ASSERT( h > 1, "The lowermost buckets can't be rehashed" ); | | __TBB_ASSERT( h > 1, "The lowermost buckets can't be rehashed" ); | |
| __TBB_store_with_release(b_new->node_list, internal::empty_rehashed
); // mark rehashed | | __TBB_store_with_release(b_new->node_list, internal::empty_rehashed
); // mark rehashed | |
| hashcode_t mask = ( 1u<<__TBB_Log2( h ) ) - 1; // get parent mask f
rom the topmost bit | | hashcode_t mask = ( 1u<<__TBB_Log2( h ) ) - 1; // get parent mask f
rom the topmost bit | |
|
| | | #if __TBB_STATISTICS | |
| | | my_info_rehashes++; // invocations of rehash_bucket | |
| | | #endif | |
| | | | |
| bucket_accessor b_old( this, h & mask ); | | bucket_accessor b_old( this, h & mask ); | |
| | | | |
| mask = (mask<<1) | 1; // get full mask for new bucket | | mask = (mask<<1) | 1; // get full mask for new bucket | |
| __TBB_ASSERT( (mask&(mask+1))==0 && (h & mask) == h, NULL ); | | __TBB_ASSERT( (mask&(mask+1))==0 && (h & mask) == h, NULL ); | |
| restart: | | restart: | |
| for( node_base **p = &b_old()->node_list, *n = __TBB_load_with_acqu
ire(*p); is_valid(n); n = *p ) { | | for( node_base **p = &b_old()->node_list, *n = __TBB_load_with_acqu
ire(*p); is_valid(n); n = *p ) { | |
| hashcode_t c = my_hash_compare.hash( static_cast<node*>(n)->ite
m.first ); | | hashcode_t c = my_hash_compare.hash( static_cast<node*>(n)->ite
m.first ); | |
| #if TBB_USE_ASSERT | | #if TBB_USE_ASSERT | |
| hashcode_t bmask = h & (mask>>1); | | hashcode_t bmask = h & (mask>>1); | |
| | | | |
| skipping to change at line 697 | | skipping to change at line 674 | |
| *p = n->next; // exclude from b_old | | *p = n->next; // exclude from b_old | |
| add_to_bucket( b_new, n ); | | add_to_bucket( b_new, n ); | |
| } else p = &n->next; // iterate to next item | | } else p = &n->next; // iterate to next item | |
| } | | } | |
| } | | } | |
| | | | |
| public: | | public: | |
| | | | |
| class accessor; | | class accessor; | |
| //! Combines data access, locking, and garbage collection. | | //! Combines data access, locking, and garbage collection. | |
|
| class const_accessor { | | class const_accessor : private node::scoped_t /*which derived from no_c
opy*/ { | |
| friend class concurrent_hash_map<Key,T,HashCompare,Allocator>; | | friend class concurrent_hash_map<Key,T,HashCompare,Allocator>; | |
| friend class accessor; | | friend class accessor; | |
|
| void operator=( const accessor & ) const; // Deny access | | | |
| const_accessor( const accessor & ); // Deny access | | | |
| public: | | public: | |
| //! Type of value | | //! Type of value | |
| typedef const typename concurrent_hash_map::value_type value_type; | | typedef const typename concurrent_hash_map::value_type value_type; | |
| | | | |
| //! True if result is empty. | | //! True if result is empty. | |
| bool empty() const {return !my_node;} | | bool empty() const {return !my_node;} | |
| | | | |
| //! Set to null | | //! Set to null | |
| void release() { | | void release() { | |
| if( my_node ) { | | if( my_node ) { | |
|
| my_lock.release(); | | node::scoped_t::release(); | |
| my_node = 0; | | my_node = 0; | |
| } | | } | |
| } | | } | |
| | | | |
| //! Return reference to associated value in hash table. | | //! Return reference to associated value in hash table. | |
| const_reference operator*() const { | | const_reference operator*() const { | |
| __TBB_ASSERT( my_node, "attempt to dereference empty accessor"
); | | __TBB_ASSERT( my_node, "attempt to dereference empty accessor"
); | |
| return my_node->item; | | return my_node->item; | |
| } | | } | |
| | | | |
| //! Return pointer to associated value in hash table. | | //! Return pointer to associated value in hash table. | |
| const_pointer operator->() const { | | const_pointer operator->() const { | |
| return &operator*(); | | return &operator*(); | |
| } | | } | |
| | | | |
| //! Create empty result | | //! Create empty result | |
| const_accessor() : my_node(NULL) {} | | const_accessor() : my_node(NULL) {} | |
| | | | |
| //! Destroy result after releasing the underlying reference. | | //! Destroy result after releasing the underlying reference. | |
| ~const_accessor() { | | ~const_accessor() { | |
|
| my_node = NULL; // my_lock.release() is called in scoped_lock d
estructor | | my_node = NULL; // scoped lock's release() is called in its des
tructor | |
| } | | } | |
|
| private: | | protected: | |
| | | bool is_writer() { return node::scoped_t::is_writer; } | |
| node *my_node; | | node *my_node; | |
|
| typename node::scoped_t my_lock; | | | |
| hashcode_t my_hash; | | hashcode_t my_hash; | |
| }; | | }; | |
| | | | |
| //! Allows write access to elements and combines data access, locking,
and garbage collection. | | //! Allows write access to elements and combines data access, locking,
and garbage collection. | |
| class accessor: public const_accessor { | | class accessor: public const_accessor { | |
| public: | | public: | |
| //! Type of value | | //! Type of value | |
| typedef typename concurrent_hash_map::value_type value_type; | | typedef typename concurrent_hash_map::value_type value_type; | |
| | | | |
| //! Return reference to associated value in hash table. | | //! Return reference to associated value in hash table. | |
| | | | |
| skipping to change at line 916 | | skipping to change at line 891 | |
| insert( *first ); | | insert( *first ); | |
| } | | } | |
| | | | |
| //! Erase item. | | //! Erase item. | |
| /** Return true if item was erased by particularly this call. */ | | /** Return true if item was erased by particularly this call. */ | |
| bool erase( const Key& key ); | | bool erase( const Key& key ); | |
| | | | |
| //! Erase item by const_accessor. | | //! Erase item by const_accessor. | |
| /** Return true if item was erased by particularly this call. */ | | /** Return true if item was erased by particularly this call. */ | |
| bool erase( const_accessor& item_accessor ) { | | bool erase( const_accessor& item_accessor ) { | |
|
| return exclude( item_accessor, /*readonly=*/ true ); | | return exclude( item_accessor ); | |
| } | | } | |
| | | | |
| //! Erase item by accessor. | | //! Erase item by accessor. | |
| /** Return true if item was erased by particularly this call. */ | | /** Return true if item was erased by particularly this call. */ | |
| bool erase( accessor& item_accessor ) { | | bool erase( accessor& item_accessor ) { | |
|
| return exclude( item_accessor, /*readonly=*/ false ); | | return exclude( item_accessor ); | |
| } | | } | |
| | | | |
| protected: | | protected: | |
| //! Insert or find item and optionally acquire a lock on the item. | | //! Insert or find item and optionally acquire a lock on the item. | |
| bool lookup( bool op_insert, const Key &key, const T *t, const_accessor
*result, bool write ); | | bool lookup( bool op_insert, const Key &key, const T *t, const_accessor
*result, bool write ); | |
| | | | |
| //! delete item by accessor | | //! delete item by accessor | |
|
| bool exclude( const_accessor &item_accessor, bool readonly ); | | bool exclude( const_accessor &item_accessor ); | |
| | | | |
| //! Returns an iterator for an item defined by the key, or for the next
item after it (if upper==true) | | //! Returns an iterator for an item defined by the key, or for the next
item after it (if upper==true) | |
| template<typename I> | | template<typename I> | |
| std::pair<I, I> internal_equal_range( const Key& key, I end ) const; | | std::pair<I, I> internal_equal_range( const Key& key, I end ) const; | |
| | | | |
| //! Copy "source" to *this, where *this must start out empty. | | //! Copy "source" to *this, where *this must start out empty. | |
| void internal_copy( const concurrent_hash_map& source ); | | void internal_copy( const concurrent_hash_map& source ); | |
| | | | |
| template<typename I> | | template<typename I> | |
| void internal_copy(I first, I last); | | void internal_copy(I first, I last); | |
| | | | |
| //! Fast find when no concurrent erasure is used. For internal use insi
de TBB only! | | //! Fast find when no concurrent erasure is used. For internal use insi
de TBB only! | |
| /** Return pointer to item with given key, or NULL if no such item exis
ts. | | /** Return pointer to item with given key, or NULL if no such item exis
ts. | |
| Must not be called concurrently with erasure operations. */ | | Must not be called concurrently with erasure operations. */ | |
| const_pointer internal_fast_find( const Key& key ) const { | | const_pointer internal_fast_find( const Key& key ) const { | |
| hashcode_t h = my_hash_compare.hash( key ); | | hashcode_t h = my_hash_compare.hash( key ); | |
|
| #if TBB_USE_THREADING_TOOLS | | hashcode_t m = (hashcode_t) itt_load_word_with_acquire( my_mask ); | |
| hashcode_t m = (hashcode_t) itt_load_pointer_with_acquire_v3( &my_m | | | |
| ask ); | | | |
| #else | | | |
| hashcode_t m = my_mask; | | | |
| #endif | | | |
| node *n; | | node *n; | |
| restart: | | restart: | |
| __TBB_ASSERT((m&(m+1))==0, NULL); | | __TBB_ASSERT((m&(m+1))==0, NULL); | |
| bucket *b = get_bucket( h & m ); | | bucket *b = get_bucket( h & m ); | |
|
| #if TBB_USE_THREADING_TOOLS | | | |
| // TODO: actually, notification is unnecessary here, just hiding do
uble-check | | // TODO: actually, notification is unnecessary here, just hiding do
uble-check | |
|
| if( itt_load_pointer_with_acquire_v3(&b->node_list) == internal::re | | if( itt_load_word_with_acquire(b->node_list) == internal::rehash_re | |
| hash_req ) | | q ) | |
| #else | | | |
| if( __TBB_load_with_acquire(b->node_list) == internal::rehash_req ) | | | |
| #endif | | | |
| { | | { | |
| bucket::scoped_t lock; | | bucket::scoped_t lock; | |
| if( lock.try_acquire( b->mutex, /*write=*/true ) ) { | | if( lock.try_acquire( b->mutex, /*write=*/true ) ) { | |
| if( b->node_list == internal::rehash_req) | | if( b->node_list == internal::rehash_req) | |
| const_cast<concurrent_hash_map*>(this)->rehash_bucket(
b, h & m ); //recursive rehashing | | const_cast<concurrent_hash_map*>(this)->rehash_bucket(
b, h & m ); //recursive rehashing | |
| } | | } | |
| else lock.acquire( b->mutex, /*write=*/false ); | | else lock.acquire( b->mutex, /*write=*/false ); | |
| __TBB_ASSERT(b->node_list!=internal::rehash_req,NULL); | | __TBB_ASSERT(b->node_list!=internal::rehash_req,NULL); | |
| } | | } | |
| n = search_bucket( key, b ); | | n = search_bucket( key, b ); | |
| | | | |
| skipping to change at line 989 | | skipping to change at line 956 | |
| | | | |
| #if _MSC_VER && !defined(__INTEL_COMPILER) | | #if _MSC_VER && !defined(__INTEL_COMPILER) | |
| // Suppress "conditional expression is constant" warning. | | // Suppress "conditional expression is constant" warning. | |
| #pragma warning( push ) | | #pragma warning( push ) | |
| #pragma warning( disable: 4127 ) | | #pragma warning( disable: 4127 ) | |
| #endif | | #endif | |
| | | | |
| template<typename Key, typename T, typename HashCompare, typename A> | | template<typename Key, typename T, typename HashCompare, typename A> | |
| bool concurrent_hash_map<Key,T,HashCompare,A>::lookup( bool op_insert, cons
t Key &key, const T *t, const_accessor *result, bool write ) { | | bool concurrent_hash_map<Key,T,HashCompare,A>::lookup( bool op_insert, cons
t Key &key, const T *t, const_accessor *result, bool write ) { | |
| __TBB_ASSERT( !result || !result->my_node, NULL ); | | __TBB_ASSERT( !result || !result->my_node, NULL ); | |
|
| segment_index_t grow_segment; | | | |
| bool return_value; | | bool return_value; | |
|
| node *n, *tmp_n = 0; | | | |
| hashcode_t const h = my_hash_compare.hash( key ); | | hashcode_t const h = my_hash_compare.hash( key ); | |
|
| #if TBB_USE_THREADING_TOOLS | | hashcode_t m = (hashcode_t) itt_load_word_with_acquire( my_mask ); | |
| hashcode_t m = (hashcode_t) itt_load_pointer_with_acquire_v3( &my_mask | | segment_index_t grow_segment = 0; | |
| ); | | node *n, *tmp_n = 0; | |
| #else | | | |
| hashcode_t m = my_mask; | | | |
| #endif | | | |
| restart: | | restart: | |
| {//lock scope | | {//lock scope | |
| __TBB_ASSERT((m&(m+1))==0, NULL); | | __TBB_ASSERT((m&(m+1))==0, NULL); | |
| return_value = false; | | return_value = false; | |
| // get bucket | | // get bucket | |
| bucket_accessor b( this, h & m ); | | bucket_accessor b( this, h & m ); | |
| | | | |
| // find a node | | // find a node | |
| n = search_bucket( key, b() ); | | n = search_bucket( key, b() ); | |
| if( op_insert ) { | | if( op_insert ) { | |
| | | | |
| skipping to change at line 1028 | | skipping to change at line 991 | |
| b.downgrade_to_reader(); | | b.downgrade_to_reader(); | |
| goto exists; | | goto exists; | |
| } | | } | |
| } | | } | |
| if( check_mask_race(h, m) ) | | if( check_mask_race(h, m) ) | |
| goto restart; // b.release() is done in ~b(). | | goto restart; // b.release() is done in ~b(). | |
| // insert and set flag to grow the container | | // insert and set flag to grow the container | |
| grow_segment = insert_new_node( b(), n = tmp_n, m ); | | grow_segment = insert_new_node( b(), n = tmp_n, m ); | |
| tmp_n = 0; | | tmp_n = 0; | |
| return_value = true; | | return_value = true; | |
|
| } else { | | | |
| exists: grow_segment = 0; | | | |
| } | | } | |
| } else { // find or count | | } else { // find or count | |
| if( !n ) { | | if( !n ) { | |
| if( check_mask_race( h, m ) ) | | if( check_mask_race( h, m ) ) | |
| goto restart; // b.release() is done in ~b(). TODO: rep
lace by continue | | goto restart; // b.release() is done in ~b(). TODO: rep
lace by continue | |
| return false; | | return false; | |
| } | | } | |
| return_value = true; | | return_value = true; | |
|
| grow_segment = 0; | | | |
| } | | } | |
|
| | | exists: | |
| if( !result ) goto check_growth; | | if( !result ) goto check_growth; | |
| // TODO: the following seems as generic/regular operation | | // TODO: the following seems as generic/regular operation | |
| // acquire the item | | // acquire the item | |
|
| if( !result->my_lock.try_acquire( n->mutex, write ) ) { | | if( !result->try_acquire( n->mutex, write ) ) { | |
| // we are unlucky, prepare for longer wait | | // we are unlucky, prepare for longer wait | |
| tbb::internal::atomic_backoff trials; | | tbb::internal::atomic_backoff trials; | |
| do { | | do { | |
| if( !trials.bounded_pause() ) { | | if( !trials.bounded_pause() ) { | |
| // the wait takes really long, restart the operation | | // the wait takes really long, restart the operation | |
| b.release(); | | b.release(); | |
| __TBB_ASSERT( !op_insert || !return_value, "Can't acqui
re new item in locked bucket?" ); | | __TBB_ASSERT( !op_insert || !return_value, "Can't acqui
re new item in locked bucket?" ); | |
| __TBB_Yield(); | | __TBB_Yield(); | |
|
| #if TBB_USE_THREADING_TOOLS | | m = (hashcode_t) itt_load_word_with_acquire( my_mask ); | |
| m = (hashcode_t) itt_load_pointer_with_acquire_v3( &my_ | | | |
| mask ); | | | |
| #else | | | |
| m = my_mask; | | | |
| #endif | | | |
| goto restart; | | goto restart; | |
| } | | } | |
|
| } while( !result->my_lock.try_acquire( n->mutex, write ) ); | | } while( !result->try_acquire( n->mutex, write ) ); | |
| } | | } | |
| }//lock scope | | }//lock scope | |
| result->my_node = n; | | result->my_node = n; | |
| result->my_hash = h; | | result->my_hash = h; | |
| check_growth: | | check_growth: | |
| // [opt] grow the container | | // [opt] grow the container | |
|
| if( grow_segment ) | | if( grow_segment ) { | |
| | | #if __TBB_STATISTICS | |
| | | my_info_resizes++; // concurrent ones | |
| | | #endif | |
| enable_segment( grow_segment ); | | enable_segment( grow_segment ); | |
|
| | | } | |
| if( tmp_n ) // if op_insert only | | if( tmp_n ) // if op_insert only | |
| delete_node( tmp_n ); | | delete_node( tmp_n ); | |
| return return_value; | | return return_value; | |
| } | | } | |
| | | | |
| template<typename Key, typename T, typename HashCompare, typename A> | | template<typename Key, typename T, typename HashCompare, typename A> | |
| template<typename I> | | template<typename I> | |
| std::pair<I, I> concurrent_hash_map<Key,T,HashCompare,A>::internal_equal_ra
nge( const Key& key, I end_ ) const { | | std::pair<I, I> concurrent_hash_map<Key,T,HashCompare,A>::internal_equal_ra
nge( const Key& key, I end_ ) const { | |
| hashcode_t h = my_hash_compare.hash( key ); | | hashcode_t h = my_hash_compare.hash( key ); | |
| hashcode_t m = my_mask; | | hashcode_t m = my_mask; | |
| | | | |
| skipping to change at line 1093 | | skipping to change at line 1054 | |
| b = get_bucket( h &= m ); | | b = get_bucket( h &= m ); | |
| } | | } | |
| node *n = search_bucket( key, b ); | | node *n = search_bucket( key, b ); | |
| if( !n ) | | if( !n ) | |
| return std::make_pair(end_, end_); | | return std::make_pair(end_, end_); | |
| iterator lower(*this, h, b, n), upper(lower); | | iterator lower(*this, h, b, n), upper(lower); | |
| return std::make_pair(lower, ++upper); | | return std::make_pair(lower, ++upper); | |
| } | | } | |
| | | | |
| template<typename Key, typename T, typename HashCompare, typename A> | | template<typename Key, typename T, typename HashCompare, typename A> | |
|
| bool concurrent_hash_map<Key,T,HashCompare,A>::exclude( const_accessor &ite
m_accessor, bool readonly ) { | | bool concurrent_hash_map<Key,T,HashCompare,A>::exclude( const_accessor &ite
m_accessor ) { | |
| __TBB_ASSERT( item_accessor.my_node, NULL ); | | __TBB_ASSERT( item_accessor.my_node, NULL ); | |
| node_base *const n = item_accessor.my_node; | | node_base *const n = item_accessor.my_node; | |
|
| item_accessor.my_node = NULL; // we ought release accessor anyway | | | |
| hashcode_t const h = item_accessor.my_hash; | | hashcode_t const h = item_accessor.my_hash; | |
|
| #if TBB_USE_THREADING_TOOLS | | hashcode_t m = (hashcode_t) itt_load_word_with_acquire( my_mask ); | |
| hashcode_t m = (hashcode_t) itt_load_pointer_with_acquire_v3( &my_mask | | | |
| ); | | | |
| #else | | | |
| hashcode_t m = my_mask; | | | |
| #endif | | | |
| do { | | do { | |
| // get bucket | | // get bucket | |
| bucket_accessor b( this, h & m, /*writer=*/true ); | | bucket_accessor b( this, h & m, /*writer=*/true ); | |
| node_base **p = &b()->node_list; | | node_base **p = &b()->node_list; | |
| while( *p && *p != n ) | | while( *p && *p != n ) | |
| p = &(*p)->next; | | p = &(*p)->next; | |
| if( !*p ) { // someone else was the first | | if( !*p ) { // someone else was the first | |
| if( check_mask_race( h, m ) ) | | if( check_mask_race( h, m ) ) | |
| continue; | | continue; | |
|
| item_accessor.my_lock.release(); | | item_accessor.release(); | |
| return false; | | return false; | |
| } | | } | |
| __TBB_ASSERT( *p == n, NULL ); | | __TBB_ASSERT( *p == n, NULL ); | |
| *p = n->next; // remove from container | | *p = n->next; // remove from container | |
| my_size--; | | my_size--; | |
| break; | | break; | |
| } while(true); | | } while(true); | |
|
| if( readonly ) // need to get exclusive lock | | if( !item_accessor.is_writer() ) // need to get exclusive lock | |
| item_accessor.my_lock.upgrade_to_writer(); // return value means no | | item_accessor.upgrade_to_writer(); // return value means nothing he | |
| thing here | | re | |
| item_accessor.my_lock.release(); | | item_accessor.release(); | |
| delete_node( n ); // Only one thread can delete it due to write lock on | | delete_node( n ); // Only one thread can delete it | |
| the chain_mutex | | | |
| return true; | | return true; | |
| } | | } | |
| | | | |
| template<typename Key, typename T, typename HashCompare, typename A> | | template<typename Key, typename T, typename HashCompare, typename A> | |
| bool concurrent_hash_map<Key,T,HashCompare,A>::erase( const Key &key ) { | | bool concurrent_hash_map<Key,T,HashCompare,A>::erase( const Key &key ) { | |
| node_base *n; | | node_base *n; | |
| hashcode_t const h = my_hash_compare.hash( key ); | | hashcode_t const h = my_hash_compare.hash( key ); | |
|
| #if TBB_USE_THREADING_TOOLS | | hashcode_t m = (hashcode_t) itt_load_word_with_acquire( my_mask ); | |
| hashcode_t m = (hashcode_t) itt_load_pointer_with_acquire_v3( &my_mask | | | |
| ); | | | |
| #else | | | |
| hashcode_t m = my_mask; | | | |
| #endif | | | |
| restart: | | restart: | |
| {//lock scope | | {//lock scope | |
| // get bucket | | // get bucket | |
| bucket_accessor b( this, h & m ); | | bucket_accessor b( this, h & m ); | |
| search: | | search: | |
| node_base **p = &b()->node_list; | | node_base **p = &b()->node_list; | |
| n = *p; | | n = *p; | |
| while( is_valid(n) && !my_hash_compare.equal(key, static_cast<node*
>(n)->item.first ) ) { | | while( is_valid(n) && !my_hash_compare.equal(key, static_cast<node*
>(n)->item.first ) ) { | |
| p = &n->next; | | p = &n->next; | |
| n = *p; | | n = *p; | |
| | | | |
| skipping to change at line 1245 | | skipping to change at line 1197 | |
| typeid(*this).name(), current_size, empty_buckets, overpopulate
d_buckets ); | | typeid(*this).name(), current_size, empty_buckets, overpopulate
d_buckets ); | |
| reported = true; | | reported = true; | |
| } | | } | |
| #endif | | #endif | |
| } | | } | |
| | | | |
| template<typename Key, typename T, typename HashCompare, typename A> | | template<typename Key, typename T, typename HashCompare, typename A> | |
| void concurrent_hash_map<Key,T,HashCompare,A>::clear() { | | void concurrent_hash_map<Key,T,HashCompare,A>::clear() { | |
| hashcode_t m = my_mask; | | hashcode_t m = my_mask; | |
| __TBB_ASSERT((m&(m+1))==0, NULL); | | __TBB_ASSERT((m&(m+1))==0, NULL); | |
|
| #if TBB_USE_ASSERT || TBB_USE_PERFORMANCE_WARNINGS | | #if TBB_USE_ASSERT || TBB_USE_PERFORMANCE_WARNINGS || __TBB_STATISTICS | |
| #if TBB_USE_PERFORMANCE_WARNINGS | | #if TBB_USE_PERFORMANCE_WARNINGS || __TBB_STATISTICS | |
| int current_size = int(my_size), buckets = int(m)+1, empty_buckets = 0,
overpopulated_buckets = 0; // usage statistics | | int current_size = int(my_size), buckets = int(m)+1, empty_buckets = 0,
overpopulated_buckets = 0; // usage statistics | |
| static bool reported = false; | | static bool reported = false; | |
| #endif | | #endif | |
| bucket *bp = 0; | | bucket *bp = 0; | |
| // check consistency | | // check consistency | |
| for( segment_index_t b = 0; b <= m; b++ ) { | | for( segment_index_t b = 0; b <= m; b++ ) { | |
| if( b & (b-2) ) ++bp; // not the beginning of a segment | | if( b & (b-2) ) ++bp; // not the beginning of a segment | |
| else bp = get_bucket( b ); | | else bp = get_bucket( b ); | |
| node_base *n = bp->node_list; | | node_base *n = bp->node_list; | |
| __TBB_ASSERT( is_valid(n) || n == internal::empty_rehashed || n ==
internal::rehash_req, "Broken internal structure" ); | | __TBB_ASSERT( is_valid(n) || n == internal::empty_rehashed || n ==
internal::rehash_req, "Broken internal structure" ); | |
| __TBB_ASSERT( *reinterpret_cast<intptr_t*>(&bp->mutex) == 0, "concu
rrent or unexpectedly terminated operation during clear() execution" ); | | __TBB_ASSERT( *reinterpret_cast<intptr_t*>(&bp->mutex) == 0, "concu
rrent or unexpectedly terminated operation during clear() execution" ); | |
|
| #if TBB_USE_PERFORMANCE_WARNINGS | | #if TBB_USE_PERFORMANCE_WARNINGS || __TBB_STATISTICS | |
| if( n == internal::empty_rehashed ) empty_buckets++; | | if( n == internal::empty_rehashed ) empty_buckets++; | |
| else if( n == internal::rehash_req ) buckets--; | | else if( n == internal::rehash_req ) buckets--; | |
| else if( n->next ) overpopulated_buckets++; | | else if( n->next ) overpopulated_buckets++; | |
| #endif | | #endif | |
| #if __TBB_EXTRA_DEBUG | | #if __TBB_EXTRA_DEBUG | |
| for(; is_valid(n); n = n->next ) { | | for(; is_valid(n); n = n->next ) { | |
| hashcode_t h = my_hash_compare.hash( static_cast<node*>(n)->ite
m.first ); | | hashcode_t h = my_hash_compare.hash( static_cast<node*>(n)->ite
m.first ); | |
| h &= m; | | h &= m; | |
| __TBB_ASSERT( h == b || get_bucket(h)->node_list == internal::r
ehash_req, "hash() function changed for key in table or internal error" ); | | __TBB_ASSERT( h == b || get_bucket(h)->node_list == internal::r
ehash_req, "hash() function changed for key in table or internal error" ); | |
| } | | } | |
| #endif | | #endif | |
| } | | } | |
|
| #if TBB_USE_PERFORMANCE_WARNINGS | | #if TBB_USE_PERFORMANCE_WARNINGS || __TBB_STATISTICS | |
| | | #if __TBB_STATISTICS | |
| | | printf( "items=%d buckets: capacity=%d rehashed=%d empty=%d overpopulat | |
| | | ed=%d" | |
| | | " concurrent: resizes=%u rehashes=%u restarts=%u\n", | |
| | | current_size, int(m+1), buckets, empty_buckets, overpopulated_bucke | |
| | | ts, | |
| | | unsigned(my_info_resizes), unsigned(my_info_rehashes), unsigned(my_ | |
| | | info_restarts) ); | |
| | | my_info_resizes = 0; // concurrent ones | |
| | | my_info_restarts = 0; // race collisions | |
| | | my_info_rehashes = 0; // invocations of rehash_bucket | |
| | | #endif | |
| if( buckets > current_size) empty_buckets -= buckets - current_size; | | if( buckets > current_size) empty_buckets -= buckets - current_size; | |
| else overpopulated_buckets -= current_size - buckets; // TODO: load_fac
tor? | | else overpopulated_buckets -= current_size - buckets; // TODO: load_fac
tor? | |
| if( !reported && buckets >= 512 && ( 2*empty_buckets > current_size ||
2*overpopulated_buckets > current_size ) ) { | | if( !reported && buckets >= 512 && ( 2*empty_buckets > current_size ||
2*overpopulated_buckets > current_size ) ) { | |
| tbb::internal::runtime_warning( | | tbb::internal::runtime_warning( | |
| "Performance is not optimal because the hash function produces
bad randomness in lower bits in %s.\nSize: %d Empties: %d Overlaps: %d", | | "Performance is not optimal because the hash function produces
bad randomness in lower bits in %s.\nSize: %d Empties: %d Overlaps: %d", | |
| typeid(*this).name(), current_size, empty_buckets, overpopulate
d_buckets ); | | typeid(*this).name(), current_size, empty_buckets, overpopulate
d_buckets ); | |
| reported = true; | | reported = true; | |
| } | | } | |
| #endif | | #endif | |
|
| #endif//TBB_USE_ASSERT || TBB_USE_PERFORMANCE_WARNINGS | | #endif//TBB_USE_ASSERT || TBB_USE_PERFORMANCE_WARNINGS || __TBB_STATISTICS | |
| my_size = 0; | | my_size = 0; | |
| segment_index_t s = segment_index_of( m ); | | segment_index_t s = segment_index_of( m ); | |
| __TBB_ASSERT( s+1 == pointers_per_table || !my_table[s+1], "wrong mask
or concurrent grow" ); | | __TBB_ASSERT( s+1 == pointers_per_table || !my_table[s+1], "wrong mask
or concurrent grow" ); | |
| cache_aligned_allocator<bucket> alloc; | | cache_aligned_allocator<bucket> alloc; | |
| do { | | do { | |
| __TBB_ASSERT( is_valid( my_table[s] ), "wrong mask or concurrent gr
ow" ); | | __TBB_ASSERT( is_valid( my_table[s] ), "wrong mask or concurrent gr
ow" ); | |
| segment_ptr_t buckets_ptr = my_table[s]; | | segment_ptr_t buckets_ptr = my_table[s]; | |
| size_type sz = segment_size( s ? s : 1 ); | | size_type sz = segment_size( s ? s : 1 ); | |
| for( segment_index_t i = 0; i < sz; i++ ) | | for( segment_index_t i = 0; i < sz; i++ ) | |
| for( node_base *n = buckets_ptr[i].node_list; is_valid(n); n =
buckets_ptr[i].node_list ) { | | for( node_base *n = buckets_ptr[i].node_list; is_valid(n); n =
buckets_ptr[i].node_list ) { | |
| | | | |
| skipping to change at line 1342 | | skipping to change at line 1303 | |
| for(; first != last; ++first) { | | for(; first != last; ++first) { | |
| hashcode_t h = my_hash_compare.hash( first->first ); | | hashcode_t h = my_hash_compare.hash( first->first ); | |
| bucket *b = get_bucket( h & m ); | | bucket *b = get_bucket( h & m ); | |
| __TBB_ASSERT( b->node_list != internal::rehash_req, "Invalid bucket
in destination table"); | | __TBB_ASSERT( b->node_list != internal::rehash_req, "Invalid bucket
in destination table"); | |
| node *n = new( my_allocator ) node(first->first, first->second); | | node *n = new( my_allocator ) node(first->first, first->second); | |
| add_to_bucket( b, n ); | | add_to_bucket( b, n ); | |
| ++my_size; // TODO: replace by non-atomic op | | ++my_size; // TODO: replace by non-atomic op | |
| } | | } | |
| } | | } | |
| | | | |
|
| } // namespace interface4 | | } // namespace interface5 | |
| | | | |
|
| using interface4::concurrent_hash_map; | | using interface5::concurrent_hash_map; | |
| | | | |
| template<typename Key, typename T, typename HashCompare, typename A1, typen
ame A2> | | template<typename Key, typename T, typename HashCompare, typename A1, typen
ame A2> | |
| inline bool operator==(const concurrent_hash_map<Key, T, HashCompare, A1> &
a, const concurrent_hash_map<Key, T, HashCompare, A2> &b) { | | inline bool operator==(const concurrent_hash_map<Key, T, HashCompare, A1> &
a, const concurrent_hash_map<Key, T, HashCompare, A2> &b) { | |
| if(a.size() != b.size()) return false; | | if(a.size() != b.size()) return false; | |
| typename concurrent_hash_map<Key, T, HashCompare, A1>::const_iterator i
(a.begin()), i_end(a.end()); | | typename concurrent_hash_map<Key, T, HashCompare, A1>::const_iterator i
(a.begin()), i_end(a.end()); | |
| typename concurrent_hash_map<Key, T, HashCompare, A2>::const_iterator j
, j_end(b.end()); | | typename concurrent_hash_map<Key, T, HashCompare, A2>::const_iterator j
, j_end(b.end()); | |
| for(; i != i_end; ++i) { | | for(; i != i_end; ++i) { | |
| j = b.equal_range(i->first).first; | | j = b.equal_range(i->first).first; | |
| if( j == j_end || !(i->second == j->second) ) return false; | | if( j == j_end || !(i->second == j->second) ) return false; | |
| } | | } | |
| | | | |
End of changes. 63 change blocks. |
| 147 lines changed or deleted | | 95 lines changed or added | |
|
| concurrent_queue.h | | concurrent_queue.h | |
| /* | | /* | |
|
| Copyright 2005-2010 Intel Corporation. All Rights Reserved. | | Copyright 2005-2011 Intel Corporation. All Rights Reserved. | |
| | | | |
| This file is part of Threading Building Blocks. | | This file is part of Threading Building Blocks. | |
| | | | |
| Threading Building Blocks is free software; you can redistribute it | | Threading Building Blocks is free software; you can redistribute it | |
| and/or modify it under the terms of the GNU General Public License | | and/or modify it under the terms of the GNU General Public License | |
| version 2 as published by the Free Software Foundation. | | version 2 as published by the Free Software Foundation. | |
| | | | |
| Threading Building Blocks is distributed in the hope that it will be | | Threading Building Blocks is distributed in the hope that it will be | |
| useful, but WITHOUT ANY WARRANTY; without even the implied warranty | | useful, but WITHOUT ANY WARRANTY; without even the implied warranty | |
| of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| | | | |
| skipping to change at line 32 | | skipping to change at line 32 | |
| this file and link it with other files to produce an executable, this | | this file and link it with other files to produce an executable, this | |
| file does not by itself cause the resulting executable to be covered by | | file does not by itself cause the resulting executable to be covered by | |
| the GNU General Public License. This exception does not however | | the GNU General Public License. This exception does not however | |
| invalidate any other reasons why the executable file might be covered b
y | | invalidate any other reasons why the executable file might be covered b
y | |
| the GNU General Public License. | | the GNU General Public License. | |
| */ | | */ | |
| | | | |
| #ifndef __TBB_concurrent_queue_H | | #ifndef __TBB_concurrent_queue_H | |
| #define __TBB_concurrent_queue_H | | #define __TBB_concurrent_queue_H | |
| | | | |
|
| #include "_concurrent_queue_internal.h" | | #include "internal/_concurrent_queue_impl.h" | |
| | | | |
| namespace tbb { | | namespace tbb { | |
| | | | |
| namespace strict_ppl { | | namespace strict_ppl { | |
| | | | |
| //! A high-performance thread-safe non-blocking concurrent queue. | | //! A high-performance thread-safe non-blocking concurrent queue. | |
| /** Multiple threads may each push and pop concurrently. | | /** Multiple threads may each push and pop concurrently. | |
| Assignment construction is not allowed. | | Assignment construction is not allowed. | |
| @ingroup containers */ | | @ingroup containers */ | |
| template<typename T, typename A = cache_aligned_allocator<T> > | | template<typename T, typename A = cache_aligned_allocator<T> > | |
| class concurrent_queue: public internal::concurrent_queue_base_v3<T> { | | class concurrent_queue: public internal::concurrent_queue_base_v3<T> { | |
| template<typename Container, typename Value> friend class internal::con
current_queue_iterator; | | template<typename Container, typename Value> friend class internal::con
current_queue_iterator; | |
| | | | |
| //! Allocator type | | //! Allocator type | |
| typedef typename A::template rebind<char>::other page_allocator_type; | | typedef typename A::template rebind<char>::other page_allocator_type; | |
| page_allocator_type my_allocator; | | page_allocator_type my_allocator; | |
| | | | |
| //! Allocates a block of size n (bytes) | | //! Allocates a block of size n (bytes) | |
|
| /*overide*/ virtual void *allocate_block( size_t n ) { | | /*override*/ virtual void *allocate_block( size_t n ) { | |
| void *b = reinterpret_cast<void*>(my_allocator.allocate( n )); | | void *b = reinterpret_cast<void*>(my_allocator.allocate( n )); | |
| if( !b ) | | if( !b ) | |
| internal::throw_exception(internal::eid_bad_alloc); | | internal::throw_exception(internal::eid_bad_alloc); | |
| return b; | | return b; | |
| } | | } | |
| | | | |
| //! Deallocates block created by allocate_block. | | //! Deallocates block created by allocate_block. | |
| /*override*/ virtual void deallocate_block( void *b, size_t n ) { | | /*override*/ virtual void deallocate_block( void *b, size_t n ) { | |
| my_allocator.deallocate( reinterpret_cast<char*>(b), n ); | | my_allocator.deallocate( reinterpret_cast<char*>(b), n ); | |
| } | | } | |
| | | | |
| skipping to change at line 94 | | skipping to change at line 94 | |
| my_allocator( a ) | | my_allocator( a ) | |
| { | | { | |
| } | | } | |
| | | | |
| //! [begin,end) constructor | | //! [begin,end) constructor | |
| template<typename InputIterator> | | template<typename InputIterator> | |
| concurrent_queue( InputIterator begin, InputIterator end, const allocat
or_type& a = allocator_type()) : | | concurrent_queue( InputIterator begin, InputIterator end, const allocat
or_type& a = allocator_type()) : | |
| my_allocator( a ) | | my_allocator( a ) | |
| { | | { | |
| for( ; begin != end; ++begin ) | | for( ; begin != end; ++begin ) | |
|
| internal_push(&*begin); | | this->internal_push(&*begin); | |
| } | | } | |
| | | | |
| //! Copy constructor | | //! Copy constructor | |
| concurrent_queue( const concurrent_queue& src, const allocator_type& a
= allocator_type()) : | | concurrent_queue( const concurrent_queue& src, const allocator_type& a
= allocator_type()) : | |
| internal::concurrent_queue_base_v3<T>(), my_allocator( a ) | | internal::concurrent_queue_base_v3<T>(), my_allocator( a ) | |
| { | | { | |
|
| assign( src ); | | this->assign( src ); | |
| } | | } | |
| | | | |
| //! Destroy queue | | //! Destroy queue | |
| ~concurrent_queue(); | | ~concurrent_queue(); | |
| | | | |
| //! Enqueue an item at tail of queue. | | //! Enqueue an item at tail of queue. | |
| void push( const T& source ) { | | void push( const T& source ) { | |
|
| internal_push( &source ); | | this->internal_push( &source ); | |
| } | | } | |
| | | | |
| //! Attempt to dequeue an item from head of queue. | | //! Attempt to dequeue an item from head of queue. | |
| /** Does not wait for item to become available. | | /** Does not wait for item to become available. | |
| Returns true if successful; false otherwise. */ | | Returns true if successful; false otherwise. */ | |
| bool try_pop( T& result ) { | | bool try_pop( T& result ) { | |
|
| return internal_try_pop( &result ); | | return this->internal_try_pop( &result ); | |
| } | | } | |
| | | | |
| //! Return the number of items in the queue; thread unsafe | | //! Return the number of items in the queue; thread unsafe | |
| size_type unsafe_size() const {return this->internal_size();} | | size_type unsafe_size() const {return this->internal_size();} | |
| | | | |
| //! Equivalent to size()==0. | | //! Equivalent to size()==0. | |
| bool empty() const {return this->internal_empty();} | | bool empty() const {return this->internal_empty();} | |
| | | | |
| //! Clear the queue. not thread-safe. | | //! Clear the queue. not thread-safe. | |
| void clear() ; | | void clear() ; | |
| | | | |
| skipping to change at line 202 | | skipping to change at line 202 | |
| /*override*/ virtual void copy_page_item( page& dst, size_t dindex, con
st page& src, size_t sindex ) { | | /*override*/ virtual void copy_page_item( page& dst, size_t dindex, con
st page& src, size_t sindex ) { | |
| new( &get_ref(dst,dindex) ) T( get_ref( const_cast<page&>(src), sin
dex ) ); | | new( &get_ref(dst,dindex) ) T( get_ref( const_cast<page&>(src), sin
dex ) ); | |
| } | | } | |
| | | | |
| /*override*/ virtual void assign_and_destroy_item( void* dst, page& src
, size_t index ) { | | /*override*/ virtual void assign_and_destroy_item( void* dst, page& src
, size_t index ) { | |
| T& from = get_ref(src,index); | | T& from = get_ref(src,index); | |
| destroyer d(from); | | destroyer d(from); | |
| *static_cast<T*>(dst) = from; | | *static_cast<T*>(dst) = from; | |
| } | | } | |
| | | | |
|
| /*overide*/ virtual page *allocate_page() { | | /*override*/ virtual page *allocate_page() { | |
| size_t n = sizeof(padded_page) + (items_per_page-1)*sizeof(T); | | size_t n = sizeof(padded_page) + (items_per_page-1)*sizeof(T); | |
| page *p = reinterpret_cast<page*>(my_allocator.allocate( n )); | | page *p = reinterpret_cast<page*>(my_allocator.allocate( n )); | |
| if( !p ) | | if( !p ) | |
| internal::throw_exception(internal::eid_bad_alloc); | | internal::throw_exception(internal::eid_bad_alloc); | |
| return p; | | return p; | |
| } | | } | |
| | | | |
| /*override*/ virtual void deallocate_page( page *p ) { | | /*override*/ virtual void deallocate_page( page *p ) { | |
|
| size_t n = sizeof(padded_page) + items_per_page*sizeof(T); | | size_t n = sizeof(padded_page) + (items_per_page-1)*sizeof(T); | |
| my_allocator.deallocate( reinterpret_cast<char*>(p), n ); | | my_allocator.deallocate( reinterpret_cast<char*>(p), n ); | |
| } | | } | |
| | | | |
| public: | | public: | |
| //! Element type in the queue. | | //! Element type in the queue. | |
| typedef T value_type; | | typedef T value_type; | |
| | | | |
| //! Allocator type | | //! Allocator type | |
| typedef A allocator_type; | | typedef A allocator_type; | |
| | | | |
| //! Reference type | | //! Reference type | |
| typedef T& reference; | | typedef T& reference; | |
| | | | |
| //! Const reference type | | //! Const reference type | |
| typedef const T& const_reference; | | typedef const T& const_reference; | |
| | | | |
| //! Integral type for representing size of the queue. | | //! Integral type for representing size of the queue. | |
|
| /** Notice that the size_type is a signed integral type. | | /** Note that the size_type is a signed integral type. | |
| This is because the size can be negative if there are pending pops
without corresponding pushes. */ | | This is because the size can be negative if there are pending pops
without corresponding pushes. */ | |
| typedef std::ptrdiff_t size_type; | | typedef std::ptrdiff_t size_type; | |
| | | | |
| //! Difference type for iterator | | //! Difference type for iterator | |
| typedef std::ptrdiff_t difference_type; | | typedef std::ptrdiff_t difference_type; | |
| | | | |
| //! Construct empty queue | | //! Construct empty queue | |
| explicit concurrent_bounded_queue(const allocator_type& a = allocator_t
ype()) : | | explicit concurrent_bounded_queue(const allocator_type& a = allocator_t
ype()) : | |
| concurrent_queue_base_v3( sizeof(T) ), my_allocator( a ) | | concurrent_queue_base_v3( sizeof(T) ), my_allocator( a ) | |
| { | | { | |
| | | | |
| skipping to change at line 378 | | skipping to change at line 378 | |
| template<typename InputIterator> | | template<typename InputIterator> | |
| concurrent_queue( InputIterator b /*begin*/, InputIterator e /*end*/, c
onst A& a = A()) : | | concurrent_queue( InputIterator b /*begin*/, InputIterator e /*end*/, c
onst A& a = A()) : | |
| concurrent_bounded_queue<T,A>( b, e, a ) | | concurrent_bounded_queue<T,A>( b, e, a ) | |
| { | | { | |
| } | | } | |
| | | | |
| //! Enqueue an item at tail of queue if queue is not already full. | | //! Enqueue an item at tail of queue if queue is not already full. | |
| /** Does not wait for queue to become not full. | | /** Does not wait for queue to become not full. | |
| Returns true if item is pushed; false if queue was already full. */ | | Returns true if item is pushed; false if queue was already full. */ | |
| bool push_if_not_full( const T& source ) { | | bool push_if_not_full( const T& source ) { | |
|
| return try_push( source ); | | return this->try_push( source ); | |
| } | | } | |
| | | | |
| //! Attempt to dequeue an item from head of queue. | | //! Attempt to dequeue an item from head of queue. | |
| /** Does not wait for item to become available. | | /** Does not wait for item to become available. | |
| Returns true if successful; false otherwise. | | Returns true if successful; false otherwise. | |
| @deprecated Use try_pop() | | @deprecated Use try_pop() | |
| */ | | */ | |
| bool pop_if_present( T& destination ) { | | bool pop_if_present( T& destination ) { | |
|
| return try_pop( destination ); | | return this->try_pop( destination ); | |
| } | | } | |
| | | | |
| typedef typename concurrent_bounded_queue<T,A>::iterator iterator; | | typedef typename concurrent_bounded_queue<T,A>::iterator iterator; | |
| typedef typename concurrent_bounded_queue<T,A>::const_iterator const_it
erator; | | typedef typename concurrent_bounded_queue<T,A>::const_iterator const_it
erator; | |
| // | | // | |
| //---------------------------------------------------------------------
--- | | //---------------------------------------------------------------------
--- | |
| // The iterators are intended only for debugging. They are slow and no
t thread safe. | | // The iterators are intended only for debugging. They are slow and no
t thread safe. | |
| //---------------------------------------------------------------------
--- | | //---------------------------------------------------------------------
--- | |
| iterator begin() {return this->unsafe_begin();} | | iterator begin() {return this->unsafe_begin();} | |
| iterator end() {return this->unsafe_end();} | | iterator end() {return this->unsafe_end();} | |
| | | | |
End of changes. 12 change blocks. |
| 12 lines changed or deleted | | 12 lines changed or added | |
|
| enumerable_thread_specific.h | | enumerable_thread_specific.h | |
| /* | | /* | |
|
| Copyright 2005-2010 Intel Corporation. All Rights Reserved. | | Copyright 2005-2011 Intel Corporation. All Rights Reserved. | |
| | | | |
| This file is part of Threading Building Blocks. | | This file is part of Threading Building Blocks. | |
| | | | |
| Threading Building Blocks is free software; you can redistribute it | | Threading Building Blocks is free software; you can redistribute it | |
| and/or modify it under the terms of the GNU General Public License | | and/or modify it under the terms of the GNU General Public License | |
| version 2 as published by the Free Software Foundation. | | version 2 as published by the Free Software Foundation. | |
| | | | |
| Threading Building Blocks is distributed in the hope that it will be | | Threading Building Blocks is distributed in the hope that it will be | |
| useful, but WITHOUT ANY WARRANTY; without even the implied warranty | | useful, but WITHOUT ANY WARRANTY; without even the implied warranty | |
| of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| | | | |
| skipping to change at line 34 | | skipping to change at line 34 | |
| the GNU General Public License. This exception does not however | | the GNU General Public License. This exception does not however | |
| invalidate any other reasons why the executable file might be covered b
y | | invalidate any other reasons why the executable file might be covered b
y | |
| the GNU General Public License. | | the GNU General Public License. | |
| */ | | */ | |
| | | | |
| #ifndef __TBB_enumerable_thread_specific_H | | #ifndef __TBB_enumerable_thread_specific_H | |
| #define __TBB_enumerable_thread_specific_H | | #define __TBB_enumerable_thread_specific_H | |
| | | | |
| #include "concurrent_vector.h" | | #include "concurrent_vector.h" | |
| #include "tbb_thread.h" | | #include "tbb_thread.h" | |
|
| | | #include "tbb_allocator.h" | |
| #include "cache_aligned_allocator.h" | | #include "cache_aligned_allocator.h" | |
|
| #if __SUNPRO_CC | | #include "aligned_space.h" | |
| #include <string.h> // for memcpy | | #include <string.h> // for memcpy | |
|
| #endif | | | |
| | | | |
| #if _WIN32||_WIN64 | | #if _WIN32||_WIN64 | |
|
| #include <windows.h> | | #include "machine/windows_api.h" | |
| #else | | #else | |
| #include <pthread.h> | | #include <pthread.h> | |
| #endif | | #endif | |
| | | | |
| namespace tbb { | | namespace tbb { | |
| | | | |
| //! enum for selecting between single key and key-per-instance versions | | //! enum for selecting between single key and key-per-instance versions | |
| enum ets_key_usage_type { ets_key_per_instance, ets_no_key }; | | enum ets_key_usage_type { ets_key_per_instance, ets_no_key }; | |
| | | | |
|
| namespace interface5 { | | namespace interface6 { | |
| | | | |
| //! @cond | | //! @cond | |
| namespace internal { | | namespace internal { | |
| | | | |
| template<ets_key_usage_type ETS_key_type> | | template<ets_key_usage_type ETS_key_type> | |
| class ets_base: tbb::internal::no_copy { | | class ets_base: tbb::internal::no_copy { | |
| protected: | | protected: | |
| #if _WIN32||_WIN64 | | #if _WIN32||_WIN64 | |
| typedef DWORD key_type; | | typedef DWORD key_type; | |
| #else | | #else | |
| | | | |
| skipping to change at line 87 | | skipping to change at line 87 | |
| return h>>(8*sizeof(size_t)-lg_size); | | return h>>(8*sizeof(size_t)-lg_size); | |
| } | | } | |
| }; | | }; | |
| struct slot { | | struct slot { | |
| key_type key; | | key_type key; | |
| void* ptr; | | void* ptr; | |
| bool empty() const {return !key;} | | bool empty() const {return !key;} | |
| bool match( key_type k ) const {return key==k;} | | bool match( key_type k ) const {return key==k;} | |
| bool claim( key_type k ) { | | bool claim( key_type k ) { | |
| __TBB_ASSERT(sizeof(tbb::atomic<key_type>)==sizeof(key_
type), NULL); | | __TBB_ASSERT(sizeof(tbb::atomic<key_type>)==sizeof(key_
type), NULL); | |
|
| __TBB_ASSERT(sizeof(void*)==sizeof(tbb::atomic<key_type | | return tbb::internal::punned_cast<tbb::atomic<key_type> | |
| >*), NULL); | | *>(&key)->compare_and_swap(k,0)==0; | |
| union { void* space; tbb::atomic<key_type>* key_atomic; | | | |
| } helper; | | | |
| helper.space = &key; | | | |
| return helper.key_atomic->compare_and_swap(k,0)==0; | | | |
| } | | } | |
| }; | | }; | |
| #if __TBB_GCC_3_3_PROTECTED_BROKEN | | #if __TBB_GCC_3_3_PROTECTED_BROKEN | |
| protected: | | protected: | |
| #endif | | #endif | |
| | | | |
| static key_type key_of_current_thread() { | | static key_type key_of_current_thread() { | |
| tbb::tbb_thread::id id = tbb::this_tbb_thread::get_id(); | | tbb::tbb_thread::id id = tbb::this_tbb_thread::get_id(); | |
| key_type k; | | key_type k; | |
| memcpy( &k, &id, sizeof(k) ); | | memcpy( &k, &id, sizeof(k) ); | |
| | | | |
| skipping to change at line 235 | | skipping to change at line 232 | |
| size_t mask = ir->mask(); | | size_t mask = ir->mask(); | |
| for(size_t i = ir->start(h);;i=(i+1)&mask) { | | for(size_t i = ir->start(h);;i=(i+1)&mask) { | |
| slot& s = ir->at(i); | | slot& s = ir->at(i); | |
| if( s.empty() ) { | | if( s.empty() ) { | |
| if( s.claim(k) ) { | | if( s.claim(k) ) { | |
| s.ptr = found; | | s.ptr = found; | |
| return found; | | return found; | |
| } | | } | |
| } | | } | |
| } | | } | |
|
| }; | | } | |
| | | | |
| //! Specialization that exploits native TLS | | //! Specialization that exploits native TLS | |
| template <> | | template <> | |
| class ets_base<ets_key_per_instance>: protected ets_base<ets_no_key
> { | | class ets_base<ets_key_per_instance>: protected ets_base<ets_no_key
> { | |
| typedef ets_base<ets_no_key> super; | | typedef ets_base<ets_no_key> super; | |
| #if _WIN32||_WIN64 | | #if _WIN32||_WIN64 | |
| typedef DWORD tls_key_t; | | typedef DWORD tls_key_t; | |
| void create_key() { my_key = TlsAlloc(); } | | void create_key() { my_key = TlsAlloc(); } | |
| void destroy_key() { TlsFree(my_key); } | | void destroy_key() { TlsFree(my_key); } | |
| void set_tls(void * value) { TlsSetValue(my_key, (LPVOID)value)
; } | | void set_tls(void * value) { TlsSetValue(my_key, (LPVOID)value)
; } | |
| | | | |
| skipping to change at line 581 | | skipping to change at line 578 | |
| return i.inner_iter == j.inner_iter; | | return i.inner_iter == j.inner_iter; | |
| } | | } | |
| | | | |
| // != | | // != | |
| template<typename SegmentedContainer, typename T, typename U> | | template<typename SegmentedContainer, typename T, typename U> | |
| bool operator!=( const segmented_iterator<SegmentedContainer,T>& i, | | bool operator!=( const segmented_iterator<SegmentedContainer,T>& i, | |
| const segmented_iterator<SegmentedContainer,U>& j
) { | | const segmented_iterator<SegmentedContainer,U>& j
) { | |
| return !(i==j); | | return !(i==j); | |
| } | | } | |
| | | | |
|
| | | template<typename T> | |
| | | struct destruct_only: tbb::internal::no_copy { | |
| | | tbb::aligned_space<T,1> value; | |
| | | ~destruct_only() {value.begin()[0].~T();} | |
| | | }; | |
| | | | |
| | | template<typename T> | |
| | | struct construct_by_default: tbb::internal::no_assign { | |
| | | void construct(void*where) {new(where) T();} // C++ note: the ( | |
| | | ) in T() ensure zero initialization. | |
| | | construct_by_default( int ) {} | |
| | | }; | |
| | | | |
| | | template<typename T> | |
| | | struct construct_by_exemplar: tbb::internal::no_assign { | |
| | | const T exemplar; | |
| | | void construct(void*where) {new(where) T(exemplar);} | |
| | | construct_by_exemplar( const T& t ) : exemplar(t) {} | |
| | | }; | |
| | | | |
| | | template<typename T, typename Finit> | |
| | | struct construct_by_finit: tbb::internal::no_assign { | |
| | | Finit f; | |
| | | void construct(void* where) {new(where) T(f());} | |
| | | construct_by_finit( const Finit& f_ ) : f(f_) {} | |
| | | }; | |
| | | | |
| // storage for initialization function pointer | | // storage for initialization function pointer | |
| template<typename T> | | template<typename T> | |
|
| struct callback_base { | | class callback_base { | |
| virtual T apply( ) = 0; | | public: | |
| virtual void destroy( ) = 0; | | // Clone *this | |
| // need to be able to create copies of callback_base for copy c | | virtual callback_base* clone() = 0; | |
| onstructor | | // Destruct and free *this | |
| virtual callback_base* make_copy() = 0; | | virtual void destroy() = 0; | |
| // need virtual destructor to satisfy GCC compiler warning | | // Need virtual destructor to satisfy GCC compiler warning | |
| virtual ~callback_base() { } | | virtual ~callback_base() { } | |
|
| | | // Construct T at where | |
| | | virtual void construct(void* where) = 0; | |
| }; | | }; | |
| | | | |
|
| template <typename T, typename Functor> | | template <typename T, typename Constructor> | |
| struct callback_leaf : public callback_base<T>, public tbb::interna | | class callback_leaf: public callback_base<T>, Constructor { | |
| l::no_copy { | | template<typename X> callback_leaf( const X& x ) : Constructor( | |
| typedef Functor my_callback_type; | | x) {} | |
| typedef callback_leaf<T,Functor> my_type; | | | |
| typedef my_type* callback_pointer; | | typedef typename tbb::tbb_allocator<callback_leaf> my_allocator | |
| typedef typename tbb::tbb_allocator<my_type> my_allocator_type; | | _type; | |
| Functor f; | | | |
| callback_leaf( const Functor& f_) : f(f_) { | | /*override*/ callback_base<T>* clone() { | |
| } | | void* where = my_allocator_type().allocate(1); | |
| | | return new(where) callback_leaf(*this); | |
| static callback_pointer new_callback(const Functor& f_ ) { | | } | |
| void* new_void = my_allocator_type().allocate(1); | | | |
| callback_pointer new_cb = new (new_void) callback_leaf<T,Fu | | /*override*/ void destroy() { | |
| nctor>(f_); // placement new | | my_allocator_type().destroy(this); | |
| return new_cb; | | my_allocator_type().deallocate(this,1); | |
| } | | } | |
| | | | |
| /* override */ callback_pointer make_copy() { | | /*override*/ void construct(void* where) { | |
| return new_callback( f ); | | Constructor::construct(where); | |
| } | | } | |
| | | public: | |
| /* override */ void destroy( ) { | | template<typename X> | |
| callback_pointer my_ptr = this; | | static callback_base<T>* make( const X& x ) { | |
| my_allocator_type().destroy(my_ptr); | | void* where = my_allocator_type().allocate(1); | |
| my_allocator_type().deallocate(my_ptr,1); | | return new(where) callback_leaf(x); | |
| } | | } | |
| /* override */ T apply() { return f(); } // does copy construc | | | |
| tion of returned value. | | | |
| }; | | }; | |
| | | | |
| //! Template for adding padding in order to avoid false sharing | | //! Template for adding padding in order to avoid false sharing | |
| /** ModularSize should be sizeof(U) modulo the cache line size. | | /** ModularSize should be sizeof(U) modulo the cache line size. | |
| All maintenance of the space will be done explicitly on push_ba
ck, | | All maintenance of the space will be done explicitly on push_ba
ck, | |
| and all thread local copies must be destroyed before the concur
rent | | and all thread local copies must be destroyed before the concur
rent | |
| vector is deleted. | | vector is deleted. | |
| */ | | */ | |
| template<typename U, size_t ModularSize> | | template<typename U, size_t ModularSize> | |
| struct ets_element { | | struct ets_element { | |
|
| char value[sizeof(U) + tbb::internal::NFS_MaxLineSize-ModularSi
ze]; | | char value[ModularSize==0 ? sizeof(U) : sizeof(U)+(tbb::interna
l::NFS_MaxLineSize-ModularSize)]; | |
| void unconstruct() { | | void unconstruct() { | |
|
| // "reinterpret_cast<U*>(&value)->~U();" causes type-punnin | | tbb::internal::punned_cast<U*>(&value)->~U(); | |
| g warning with gcc 4.4, | | | |
| // "U* u = reinterpret_cast<U*>(&value); u->~U();" causes u | | | |
| nused variable warning with VS2010. | | | |
| // Thus another "casting via union" hack. | | | |
| __TBB_ASSERT(sizeof(void*)==sizeof(U*),NULL); | | | |
| union { void* space; U* val; } helper; | | | |
| helper.space = &value; | | | |
| helper.val->~U(); | | | |
| } | | | |
| }; | | | |
| | | | |
| //! Partial specialization for case where no padding is needed. | | | |
| template<typename U> | | | |
| struct ets_element<U,0> { | | | |
| char value[sizeof(U)]; | | | |
| void unconstruct() { // Same implementation as in general case | | | |
| __TBB_ASSERT(sizeof(void*)==sizeof(U*),NULL); | | | |
| union { void* space; U* val; } helper; | | | |
| helper.space = &value; | | | |
| helper.val->~U(); | | | |
| } | | } | |
| }; | | }; | |
| | | | |
| } // namespace internal | | } // namespace internal | |
| //! @endcond | | //! @endcond | |
| | | | |
| //! The enumerable_thread_specific container | | //! The enumerable_thread_specific container | |
| /** enumerable_thread_specific has the following properties: | | /** enumerable_thread_specific has the following properties: | |
| - thread-local copies are lazily created, with default, exemplar or
function initialization. | | - thread-local copies are lazily created, with default, exemplar or
function initialization. | |
| - thread-local copies do not move (during lifetime, and excepting c
lear()) so the address of a copy is invariant. | | - thread-local copies do not move (during lifetime, and excepting c
lear()) so the address of a copy is invariant. | |
| | | | |
| skipping to change at line 702 | | skipping to change at line 709 | |
| typedef ptrdiff_t difference_type; | | typedef ptrdiff_t difference_type; | |
| generic_range_type( I begin_, I end_, size_t grainsize_ = 1) :
blocked_range<I>(begin_,end_,grainsize_) {} | | generic_range_type( I begin_, I end_, size_t grainsize_ = 1) :
blocked_range<I>(begin_,end_,grainsize_) {} | |
| template<typename U> | | template<typename U> | |
| generic_range_type( const generic_range_type<U>& r) : blocked_r
ange<I>(r.begin(),r.end(),r.grainsize()) {} | | generic_range_type( const generic_range_type<U>& r) : blocked_r
ange<I>(r.begin(),r.end(),r.grainsize()) {} | |
| generic_range_type( generic_range_type& r, split ) : blocked_ra
nge<I>(r,split()) {} | | generic_range_type( generic_range_type& r, split ) : blocked_ra
nge<I>(r,split()) {} | |
| }; | | }; | |
| | | | |
| typedef typename Allocator::template rebind< padded_element >::othe
r padded_allocator_type; | | typedef typename Allocator::template rebind< padded_element >::othe
r padded_allocator_type; | |
| typedef tbb::concurrent_vector< padded_element, padded_allocator_ty
pe > internal_collection_type; | | typedef tbb::concurrent_vector< padded_element, padded_allocator_ty
pe > internal_collection_type; | |
| | | | |
|
| internal::callback_base<T> *my_finit_callback; | | internal::callback_base<T> *my_construct_callback; | |
| | | | |
| // need to use a pointed-to exemplar because T may not be assignabl | | | |
| e. | | | |
| // using tbb_allocator instead of padded_element_allocator because | | | |
| we may be | | | |
| // copying an exemplar from one instantiation of ETS to another wit | | | |
| h a different | | | |
| // allocator. | | | |
| typedef typename tbb::tbb_allocator<padded_element > exemplar_alloc | | | |
| ator_type; | | | |
| static padded_element * create_exemplar(const T& my_value) { | | | |
| padded_element *new_exemplar = reinterpret_cast<padded_element | | | |
| *>(exemplar_allocator_type().allocate(1)); | | | |
| new(new_exemplar->value) T(my_value); | | | |
| return new_exemplar; | | | |
| } | | | |
| | | | |
| static padded_element *create_exemplar( ) { | | | |
| padded_element *new_exemplar = reinterpret_cast<padded_element | | | |
| *>(exemplar_allocator_type().allocate(1)); | | | |
| new(new_exemplar->value) T( ); | | | |
| return new_exemplar; | | | |
| } | | | |
| | | | |
| static void free_exemplar(padded_element *my_ptr) { | | | |
| my_ptr->unconstruct(); | | | |
| exemplar_allocator_type().destroy(my_ptr); | | | |
| exemplar_allocator_type().deallocate(my_ptr,1); | | | |
| } | | | |
| | | | |
| padded_element* my_exemplar_ptr; | | | |
| | | | |
| internal_collection_type my_locals; | | internal_collection_type my_locals; | |
| | | | |
| /*override*/ void* create_local() { | | /*override*/ void* create_local() { | |
| #if TBB_DEPRECATED | | #if TBB_DEPRECATED | |
| void* lref = &my_locals[my_locals.push_back(padded_element())]; | | void* lref = &my_locals[my_locals.push_back(padded_element())]; | |
| #else | | #else | |
| void* lref = &*my_locals.push_back(padded_element()); | | void* lref = &*my_locals.push_back(padded_element()); | |
| #endif | | #endif | |
|
| if(my_finit_callback) { | | my_construct_callback->construct(lref); | |
| new(lref) T(my_finit_callback->apply()); | | | |
| } else if(my_exemplar_ptr) { | | | |
| pointer t_exemp = reinterpret_cast<T *>(&(my_exemplar_ptr-> | | | |
| value)); | | | |
| new(lref) T(*t_exemp); | | | |
| } else { | | | |
| new(lref) T(); | | | |
| } | | | |
| return lref; | | return lref; | |
| } | | } | |
| | | | |
| void unconstruct_locals() { | | void unconstruct_locals() { | |
| for(typename internal_collection_type::iterator cvi = my_locals
.begin(); cvi != my_locals.end(); ++cvi) { | | for(typename internal_collection_type::iterator cvi = my_locals
.begin(); cvi != my_locals.end(); ++cvi) { | |
| cvi->unconstruct(); | | cvi->unconstruct(); | |
| } | | } | |
| } | | } | |
| | | | |
| typedef typename Allocator::template rebind< uintptr_t >::other arr
ay_allocator_type; | | typedef typename Allocator::template rebind< uintptr_t >::other arr
ay_allocator_type; | |
| | | | |
| skipping to change at line 787 | | skipping to change at line 762 | |
| typedef typename internal_collection_type::difference_type differen
ce_type; | | typedef typename internal_collection_type::difference_type differen
ce_type; | |
| | | | |
| // Iterator types | | // Iterator types | |
| typedef typename internal::enumerable_thread_specific_iterator< int
ernal_collection_type, value_type > iterator; | | typedef typename internal::enumerable_thread_specific_iterator< int
ernal_collection_type, value_type > iterator; | |
| typedef typename internal::enumerable_thread_specific_iterator< int
ernal_collection_type, const value_type > const_iterator; | | typedef typename internal::enumerable_thread_specific_iterator< int
ernal_collection_type, const value_type > const_iterator; | |
| | | | |
| // Parallel range types | | // Parallel range types | |
| typedef generic_range_type< iterator > range_type; | | typedef generic_range_type< iterator > range_type; | |
| typedef generic_range_type< const_iterator > const_range_type; | | typedef generic_range_type< const_iterator > const_range_type; | |
| | | | |
|
| //! Default constructor, which leads to default construction of loc | | //! Default constructor. Each local instance of T is default const | |
| al copies | | ructed. | |
| enumerable_thread_specific() : my_finit_callback(0) { | | enumerable_thread_specific() : | |
| my_exemplar_ptr = 0; | | my_construct_callback( internal::callback_leaf<T,internal::cons | |
| } | | truct_by_default<T> >::make(/*dummy argument*/0) ) | |
| | | {} | |
| | | | |
|
| //! construction with initializer method | | //! Constructor with initializer functor. Each local instance of T | |
| // Finit should be a function taking 0 parameters and returning a T | | is constructed by T(finit()). | |
| template <typename Finit> | | template <typename Finit> | |
|
| enumerable_thread_specific( Finit _finit ) | | enumerable_thread_specific( Finit finit ) : | |
| { | | my_construct_callback( internal::callback_leaf<T,internal::cons | |
| my_finit_callback = internal::callback_leaf<T,Finit>::new_callb | | truct_by_finit<T,Finit> >::make( finit ) ) | |
| ack( _finit ); | | {} | |
| my_exemplar_ptr = 0; // don't need exemplar if function is prov | | | |
| ided | | //! Constuctor with exemplar. Each local instance of T is copied-c | |
| } | | onstructed from the exemplar. | |
| | | enumerable_thread_specific(const T& exemplar) : | |
| //! Constuction with exemplar, which leads to copy construction of | | my_construct_callback( internal::callback_leaf<T,internal::cons | |
| local copies | | truct_by_exemplar<T> >::make( exemplar ) ) | |
| enumerable_thread_specific(const T &_exemplar) : my_finit_callback( | | {} | |
| 0) { | | | |
| my_exemplar_ptr = create_exemplar(_exemplar); | | | |
| } | | | |
| | | | |
| //! Destructor | | //! Destructor | |
| ~enumerable_thread_specific() { | | ~enumerable_thread_specific() { | |
|
| if(my_finit_callback) { | | my_construct_callback->destroy(); | |
| my_finit_callback->destroy(); | | | |
| } | | | |
| if(my_exemplar_ptr) { | | | |
| free_exemplar(my_exemplar_ptr); | | | |
| } | | | |
| this->clear(); // deallocation before the derived class is fin
ished destructing | | this->clear(); // deallocation before the derived class is fin
ished destructing | |
| // So free(array *) is still accessible | | // So free(array *) is still accessible | |
| } | | } | |
| | | | |
| //! returns reference to local, discarding exists | | //! returns reference to local, discarding exists | |
| reference local() { | | reference local() { | |
| bool exists; | | bool exists; | |
| return local(exists); | | return local(exists); | |
| } | | } | |
| | | | |
| //! Returns reference to calling thread's local copy, creating one
if necessary | | //! Returns reference to calling thread's local copy, creating one
if necessary | |
| reference local(bool& exists) { | | reference local(bool& exists) { | |
|
| __TBB_ASSERT(ETS_key_type==ets_no_key,"ets_key_per_instance not
yet implemented"); | | | |
| void* ptr = this->table_lookup(exists); | | void* ptr = this->table_lookup(exists); | |
| return *(T*)ptr; | | return *(T*)ptr; | |
| } | | } | |
| | | | |
| //! Get the number of local copies | | //! Get the number of local copies | |
| size_type size() const { return my_locals.size(); } | | size_type size() const { return my_locals.size(); } | |
| | | | |
| //! true if there have been no local copies created | | //! true if there have been no local copies created | |
| bool empty() const { return my_locals.empty(); } | | bool empty() const { return my_locals.empty(); } | |
| | | | |
| | | | |
| skipping to change at line 888 | | skipping to change at line 854 | |
| internal_copy(other); | | internal_copy(other); | |
| } | | } | |
| | | | |
| private: | | private: | |
| | | | |
| template<typename U, typename A2, ets_key_usage_type C2> | | template<typename U, typename A2, ets_key_usage_type C2> | |
| enumerable_thread_specific & | | enumerable_thread_specific & | |
| internal_assign(const enumerable_thread_specific<U, A2, C2>& other)
{ | | internal_assign(const enumerable_thread_specific<U, A2, C2>& other)
{ | |
| if(static_cast<void *>( this ) != static_cast<const void *>( &o
ther )) { | | if(static_cast<void *>( this ) != static_cast<const void *>( &o
ther )) { | |
| this->clear(); | | this->clear(); | |
|
| if(my_finit_callback) { | | my_construct_callback->destroy(); | |
| my_finit_callback->destroy(); | | my_construct_callback = 0; | |
| my_finit_callback = 0; | | | |
| } | | | |
| if(my_exemplar_ptr) { | | | |
| free_exemplar(my_exemplar_ptr); | | | |
| my_exemplar_ptr = 0; | | | |
| } | | | |
| internal_copy( other ); | | internal_copy( other ); | |
| } | | } | |
| return *this; | | return *this; | |
| } | | } | |
| | | | |
| public: | | public: | |
| | | | |
| // assignment | | // assignment | |
| enumerable_thread_specific& operator=(const enumerable_thread_speci
fic& other) { | | enumerable_thread_specific& operator=(const enumerable_thread_speci
fic& other) { | |
| return internal_assign(other); | | return internal_assign(other); | |
| | | | |
| skipping to change at line 918 | | skipping to change at line 878 | |
| template<typename U, typename Alloc, ets_key_usage_type Cachetype> | | template<typename U, typename Alloc, ets_key_usage_type Cachetype> | |
| enumerable_thread_specific& operator=(const enumerable_thread_speci
fic<U, Alloc, Cachetype>& other) | | enumerable_thread_specific& operator=(const enumerable_thread_speci
fic<U, Alloc, Cachetype>& other) | |
| { | | { | |
| return internal_assign(other); | | return internal_assign(other); | |
| } | | } | |
| | | | |
| // combine_func_t has signature T(T,T) or T(const T&, const T&) | | // combine_func_t has signature T(T,T) or T(const T&, const T&) | |
| template <typename combine_func_t> | | template <typename combine_func_t> | |
| T combine(combine_func_t f_combine) { | | T combine(combine_func_t f_combine) { | |
| if(begin() == end()) { | | if(begin() == end()) { | |
|
| if(my_finit_callback) { | | internal::destruct_only<T> location; | |
| return my_finit_callback->apply(); | | my_construct_callback->construct(location.value.begin()); | |
| } | | return *location.value.begin(); | |
| pointer local_ref = reinterpret_cast<T*>((my_exemplar_ptr-> | | | |
| value)); | | | |
| return T(*local_ref); | | | |
| } | | } | |
| const_iterator ci = begin(); | | const_iterator ci = begin(); | |
| T my_result = *ci; | | T my_result = *ci; | |
| while(++ci != end()) | | while(++ci != end()) | |
| my_result = f_combine( my_result, *ci ); | | my_result = f_combine( my_result, *ci ); | |
| return my_result; | | return my_result; | |
| } | | } | |
| | | | |
| // combine_func_t has signature void(T) or void(const T&) | | // combine_func_t has signature void(T) or void(const T&) | |
| template <typename combine_func_t> | | template <typename combine_func_t> | |
| | | | |
| skipping to change at line 944 | | skipping to change at line 902 | |
| for(const_iterator ci = begin(); ci != end(); ++ci) { | | for(const_iterator ci = begin(); ci != end(); ++ci) { | |
| f_combine( *ci ); | | f_combine( *ci ); | |
| } | | } | |
| } | | } | |
| | | | |
| }; // enumerable_thread_specific | | }; // enumerable_thread_specific | |
| | | | |
| template <typename T, typename Allocator, ets_key_usage_type ETS_key_ty
pe> | | template <typename T, typename Allocator, ets_key_usage_type ETS_key_ty
pe> | |
| template<typename U, typename A2, ets_key_usage_type C2> | | template<typename U, typename A2, ets_key_usage_type C2> | |
| void enumerable_thread_specific<T,Allocator,ETS_key_type>::internal_cop
y( const enumerable_thread_specific<U, A2, C2>& other) { | | void enumerable_thread_specific<T,Allocator,ETS_key_type>::internal_cop
y( const enumerable_thread_specific<U, A2, C2>& other) { | |
|
| | | // Initialize my_construct_callback first, so that it is valid even | |
| | | if rest of this routine throws an exception. | |
| | | my_construct_callback = other.my_construct_callback->clone(); | |
| | | | |
| typedef internal::ets_base<ets_no_key> base; | | typedef internal::ets_base<ets_no_key> base; | |
| __TBB_ASSERT(my_locals.size()==0,NULL); | | __TBB_ASSERT(my_locals.size()==0,NULL); | |
| this->table_reserve_for_copy( other ); | | this->table_reserve_for_copy( other ); | |
| for( base::array* r=other.my_root; r; r=r->next ) { | | for( base::array* r=other.my_root; r; r=r->next ) { | |
| for( size_t i=0; i<r->size(); ++i ) { | | for( size_t i=0; i<r->size(); ++i ) { | |
| base::slot& s1 = r->at(i); | | base::slot& s1 = r->at(i); | |
| if( !s1.empty() ) { | | if( !s1.empty() ) { | |
| base::slot& s2 = this->table_find(s1.key); | | base::slot& s2 = this->table_find(s1.key); | |
| if( s2.empty() ) { | | if( s2.empty() ) { | |
| #if TBB_DEPRECATED | | #if TBB_DEPRECATED | |
| | | | |
| skipping to change at line 966 | | skipping to change at line 927 | |
| void* lref = &*my_locals.push_back(padded_element()
); | | void* lref = &*my_locals.push_back(padded_element()
); | |
| #endif | | #endif | |
| s2.ptr = new(lref) T(*(U*)s1.ptr); | | s2.ptr = new(lref) T(*(U*)s1.ptr); | |
| s2.key = s1.key; | | s2.key = s1.key; | |
| } else { | | } else { | |
| // Skip the duplicate | | // Skip the duplicate | |
| } | | } | |
| } | | } | |
| } | | } | |
| } | | } | |
|
| if(other.my_finit_callback) { | | | |
| my_finit_callback = other.my_finit_callback->make_copy(); | | | |
| } else { | | | |
| my_finit_callback = 0; | | | |
| } | | | |
| if(other.my_exemplar_ptr) { | | | |
| pointer local_ref = reinterpret_cast<U*>(other.my_exemplar_ptr- | | | |
| >value); | | | |
| my_exemplar_ptr = create_exemplar(*local_ref); | | | |
| } else { | | | |
| my_exemplar_ptr = 0; | | | |
| } | | | |
| } | | } | |
| | | | |
| template< typename Container > | | template< typename Container > | |
| class flattened2d { | | class flattened2d { | |
| | | | |
| // This intermediate typedef is to address issues with VC7.1 compil
ers | | // This intermediate typedef is to address issues with VC7.1 compil
ers | |
| typedef typename Container::value_type conval_type; | | typedef typename Container::value_type conval_type; | |
| | | | |
| public: | | public: | |
| | | | |
| | | | |
| skipping to change at line 1037 | | skipping to change at line 987 | |
| template <typename Container> | | template <typename Container> | |
| flattened2d<Container> flatten2d(const Container &c, const typename Con
tainer::const_iterator b, const typename Container::const_iterator e) { | | flattened2d<Container> flatten2d(const Container &c, const typename Con
tainer::const_iterator b, const typename Container::const_iterator e) { | |
| return flattened2d<Container>(c, b, e); | | return flattened2d<Container>(c, b, e); | |
| } | | } | |
| | | | |
| template <typename Container> | | template <typename Container> | |
| flattened2d<Container> flatten2d(const Container &c) { | | flattened2d<Container> flatten2d(const Container &c) { | |
| return flattened2d<Container>(c); | | return flattened2d<Container>(c); | |
| } | | } | |
| | | | |
|
| } // interface5 | | } // interface6 | |
| | | | |
| namespace internal { | | namespace internal { | |
|
| using interface5::internal::segmented_iterator; | | using interface6::internal::segmented_iterator; | |
| } | | } | |
| | | | |
|
| using interface5::enumerable_thread_specific; | | using interface6::enumerable_thread_specific; | |
| using interface5::flattened2d; | | using interface6::flattened2d; | |
| using interface5::flatten2d; | | using interface6::flatten2d; | |
| | | | |
| } // namespace tbb | | } // namespace tbb | |
| | | | |
| #endif | | #endif | |
| | | | |
End of changes. 28 change blocks. |
| 170 lines changed or deleted | | 109 lines changed or added | |
|
| linux_ia32.h | | linux_ia32.h | |
| /* | | /* | |
|
| Copyright 2005-2010 Intel Corporation. All Rights Reserved. | | Copyright 2005-2011 Intel Corporation. All Rights Reserved. | |
| | | | |
| This file is part of Threading Building Blocks. | | This file is part of Threading Building Blocks. | |
| | | | |
| Threading Building Blocks is free software; you can redistribute it | | Threading Building Blocks is free software; you can redistribute it | |
| and/or modify it under the terms of the GNU General Public License | | and/or modify it under the terms of the GNU General Public License | |
| version 2 as published by the Free Software Foundation. | | version 2 as published by the Free Software Foundation. | |
| | | | |
| Threading Building Blocks is distributed in the hope that it will be | | Threading Building Blocks is distributed in the hope that it will be | |
| useful, but WITHOUT ANY WARRANTY; without even the implied warranty | | useful, but WITHOUT ANY WARRANTY; without even the implied warranty | |
| of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| | | | |
| skipping to change at line 29 | | skipping to change at line 29 | |
| As a special exception, you may use this file as part of a free softwar
e | | As a special exception, you may use this file as part of a free softwar
e | |
| library without restriction. Specifically, if other files instantiate | | library without restriction. Specifically, if other files instantiate | |
| templates or use macros or inline functions from this file, or you comp
ile | | templates or use macros or inline functions from this file, or you comp
ile | |
| this file and link it with other files to produce an executable, this | | this file and link it with other files to produce an executable, this | |
| file does not by itself cause the resulting executable to be covered by | | file does not by itself cause the resulting executable to be covered by | |
| the GNU General Public License. This exception does not however | | the GNU General Public License. This exception does not however | |
| invalidate any other reasons why the executable file might be covered b
y | | invalidate any other reasons why the executable file might be covered b
y | |
| the GNU General Public License. | | the GNU General Public License. | |
| */ | | */ | |
| | | | |
|
| #ifndef __TBB_machine_H | | #if !defined(__TBB_machine_H) || defined(__TBB_machine_linux_ia32_H) | |
| #error Do not include this file directly; include tbb_machine.h instead | | #error Do not #include this internal file directly; use public TBB headers | |
| | | instead. | |
| #endif | | #endif | |
| | | | |
|
| #if !__MINGW32__ | | #define __TBB_machine_linux_ia32_H | |
| #include "linux_common.h" | | | |
| #endif | | #include <stdint.h> | |
| | | #include <unistd.h> | |
| | | | |
| #define __TBB_WORDSIZE 4 | | #define __TBB_WORDSIZE 4 | |
| #define __TBB_BIG_ENDIAN 0 | | #define __TBB_BIG_ENDIAN 0 | |
| | | | |
|
| #define __TBB_release_consistency_helper() __asm__ __volatile__("": : :"mem | | #define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory") | |
| ory") | | #define __TBB_control_consistency_helper() __TBB_compiler_fence() | |
| | | #define __TBB_acquire_consistency_helper() __TBB_compiler_fence() | |
| inline void __TBB_rel_acq_fence() { __asm__ __volatile__("mfence": : :"memo | | #define __TBB_release_consistency_helper() __TBB_compiler_fence() | |
| ry"); } | | #define __TBB_full_memory_fence() __asm__ __volatile__("mfence": : | |
| | | :"memory") | |
| | | | |
| #if __TBB_ICC_ASM_VOLATILE_BROKEN | | #if __TBB_ICC_ASM_VOLATILE_BROKEN | |
| #define __TBB_VOLATILE | | #define __TBB_VOLATILE | |
| #else | | #else | |
| #define __TBB_VOLATILE volatile | | #define __TBB_VOLATILE volatile | |
| #endif | | #endif | |
| | | | |
|
| #define __MACHINE_DECL_ATOMICS(S,T,X) \ | | #define __TBB_MACHINE_DEFINE_ATOMICS(S,T,X,R)
\ | |
| static inline T __TBB_machine_cmpswp##S (volatile void *ptr, T value, T com
parand ) \ | | static inline T __TBB_machine_cmpswp##S (volatile void *ptr, T value, T com
parand ) \ | |
| {
\ | | {
\ | |
| T result;
\ | | T result;
\ | |
|
\ | |
\ | |
| __asm__ __volatile__("lock\ncmpxchg" X " %2,%1"
\ | | __asm__ __volatile__("lock\ncmpxchg" X " %2,%1"
\ | |
| : "=a"(result), "=m"(*(__TBB_VOLATILE T*)ptr)
\ | | : "=a"(result), "=m"(*(__TBB_VOLATILE T*)ptr)
\ | |
| : "q"(value), "0"(comparand), "m"(*(__TBB_VOLATIL
E T*)ptr) \ | | : "q"(value), "0"(comparand), "m"(*(__TBB_VOLATIL
E T*)ptr) \ | |
| : "memory");
\ | | : "memory");
\ | |
| return result;
\ | | return result;
\ | |
| }
\ | | }
\ | |
|
\ | |
\ | |
| static inline T __TBB_machine_fetchadd##S(volatile void *ptr, T addend)
\ | | static inline T __TBB_machine_fetchadd##S(volatile void *ptr, T addend)
\ | |
| {
\ | | {
\ | |
| T result;
\ | | T result;
\ | |
| __asm__ __volatile__("lock\nxadd" X " %0,%1"
\ | | __asm__ __volatile__("lock\nxadd" X " %0,%1"
\ | |
|
| : "=r"(result), "=m"(*(__TBB_VOLATILE T*)ptr)
\ | | : R (result), "=m"(*(__TBB_VOLATILE T*)ptr)
\ | |
| : "0"(addend), "m"(*(__TBB_VOLATILE T*)ptr)
\ | | : "0"(addend), "m"(*(__TBB_VOLATILE T*)ptr)
\ | |
| : "memory");
\ | | : "memory");
\ | |
| return result;
\ | | return result;
\ | |
| }
\ | | }
\ | |
|
\ | |
\ | |
| static inline T __TBB_machine_fetchstore##S(volatile void *ptr, T value)
\ | | static inline T __TBB_machine_fetchstore##S(volatile void *ptr, T value)
\ | |
| {
\ | | {
\ | |
| T result;
\ | | T result;
\ | |
| __asm__ __volatile__("lock\nxchg" X " %0,%1"
\ | | __asm__ __volatile__("lock\nxchg" X " %0,%1"
\ | |
|
| : "=r"(result), "=m"(*(__TBB_VOLATILE T*)ptr)
\ | | : R (result), "=m"(*(__TBB_VOLATILE T*)ptr)
\ | |
| : "0"(value), "m"(*(__TBB_VOLATILE T*)ptr)
\ | | : "0"(value), "m"(*(__TBB_VOLATILE T*)ptr)
\ | |
| : "memory");
\ | | : "memory");
\ | |
| return result;
\ | | return result;
\ | |
| }
\ | | }
\ | |
| | | | |
|
| __MACHINE_DECL_ATOMICS(1,int8_t,"") | | __TBB_MACHINE_DEFINE_ATOMICS(1,int8_t,"","=q") | |
| __MACHINE_DECL_ATOMICS(2,int16_t,"") | | __TBB_MACHINE_DEFINE_ATOMICS(2,int16_t,"","=r") | |
| __MACHINE_DECL_ATOMICS(4,int32_t,"l") | | __TBB_MACHINE_DEFINE_ATOMICS(4,int32_t,"l","=r") | |
| | | | |
| | | #if __INTEL_COMPILER | |
| | | #pragma warning( push ) | |
| | | // reference to EBX in a function requiring stack alignment | |
| | | #pragma warning( disable: 998 ) | |
| | | #endif | |
| | | | |
| static inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t va
lue, int64_t comparand ) | | static inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t va
lue, int64_t comparand ) | |
| { | | { | |
| int64_t result; | | int64_t result; | |
|
| | | union { | |
| | | int64_t i64; | |
| | | int32_t i32[2]; | |
| | | }; | |
| | | i64 = value; | |
| #if __PIC__ | | #if __PIC__ | |
| /* compiling position-independent code */ | | /* compiling position-independent code */ | |
| // EBX register preserved for compliance with position-independent code
rules on IA32 | | // EBX register preserved for compliance with position-independent code
rules on IA32 | |
|
| | | int32_t tmp; | |
| __asm__ __volatile__ ( | | __asm__ __volatile__ ( | |
|
| "pushl %%ebx\n\t" | | "movl %%ebx,%2\n\t" | |
| "movl (%%ecx),%%ebx\n\t" | | "movl %5,%%ebx\n\t" | |
| "movl 4(%%ecx),%%ecx\n\t" | | #if __GNUC__==3 | |
| "lock\n\t cmpxchg8b %1\n\t" | | "lock\n\t cmpxchg8b %1\n\t" | |
|
| "popl %%ebx" | | #else | |
| : "=A"(result), "=m"(*(int64_t *)ptr) | | "lock\n\t cmpxchg8b (%3)\n\t" | |
| : "m"(*(int64_t *)ptr) | | #endif | |
| | | "movl %2,%%ebx" | |
| | | : "=A"(result) | |
| | | , "=m"(*(__TBB_VOLATILE int64_t *)ptr) | |
| | | , "=m"(tmp) | |
| | | #if __GNUC__==3 | |
| | | : "m"(*(__TBB_VOLATILE int64_t *)ptr) | |
| | | #else | |
| | | : "SD"(ptr) | |
| | | #endif | |
| , "0"(comparand) | | , "0"(comparand) | |
|
| , "c"(&value) | | , "m"(i32[0]), "c"(i32[1]) | |
| : "memory", "esp" | | : "memory" | |
| #if __INTEL_COMPILER | | #if __INTEL_COMPILER | |
| ,"ebx" | | ,"ebx" | |
| #endif | | #endif | |
| ); | | ); | |
| #else /* !__PIC__ */ | | #else /* !__PIC__ */ | |
|
| union { | | | |
| int64_t i64; | | | |
| int32_t i32[2]; | | | |
| }; | | | |
| i64 = value; | | | |
| __asm__ __volatile__ ( | | __asm__ __volatile__ ( | |
| "lock\n\t cmpxchg8b %1\n\t" | | "lock\n\t cmpxchg8b %1\n\t" | |
| : "=A"(result), "=m"(*(__TBB_VOLATILE int64_t *)ptr) | | : "=A"(result), "=m"(*(__TBB_VOLATILE int64_t *)ptr) | |
| : "m"(*(__TBB_VOLATILE int64_t *)ptr) | | : "m"(*(__TBB_VOLATILE int64_t *)ptr) | |
| , "0"(comparand) | | , "0"(comparand) | |
| , "b"(i32[0]), "c"(i32[1]) | | , "b"(i32[0]), "c"(i32[1]) | |
| : "memory" | | : "memory" | |
| ); | | ); | |
| #endif /* __PIC__ */ | | #endif /* __PIC__ */ | |
| return result; | | return result; | |
| } | | } | |
| | | | |
|
| | | #if __INTEL_COMPILER | |
| | | #pragma warning( pop ) | |
| | | #endif // warning 998 is back | |
| | | | |
| static inline int32_t __TBB_machine_lg( uint32_t x ) { | | static inline int32_t __TBB_machine_lg( uint32_t x ) { | |
| int32_t j; | | int32_t j; | |
| __asm__ ("bsr %1,%0" : "=r"(j) : "r"(x)); | | __asm__ ("bsr %1,%0" : "=r"(j) : "r"(x)); | |
| return j; | | return j; | |
| } | | } | |
| | | | |
| static inline void __TBB_machine_or( volatile void *ptr, uint32_t addend )
{ | | static inline void __TBB_machine_or( volatile void *ptr, uint32_t addend )
{ | |
| __asm__ __volatile__("lock\norl %1,%0" : "=m"(*(__TBB_VOLATILE uint32_t
*)ptr) : "r"(addend), "m"(*(__TBB_VOLATILE uint32_t *)ptr) : "memory"); | | __asm__ __volatile__("lock\norl %1,%0" : "=m"(*(__TBB_VOLATILE uint32_t
*)ptr) : "r"(addend), "m"(*(__TBB_VOLATILE uint32_t *)ptr) : "memory"); | |
| } | | } | |
| | | | |
| | | | |
| skipping to change at line 178 | | skipping to change at line 201 | |
| "fistpq %0" : "=m"(*(__TBB_VOLATILE int64_t
*)ptr) : "m"(value) : "memory" ); | | "fistpq %0" : "=m"(*(__TBB_VOLATILE int64_t
*)ptr) : "m"(value) : "memory" ); | |
| } else { | | } else { | |
| // Unaligned store | | // Unaligned store | |
| #if TBB_USE_PERFORMANCE_WARNINGS | | #if TBB_USE_PERFORMANCE_WARNINGS | |
| __TBB_machine_store8_slow_perf_warning(ptr); | | __TBB_machine_store8_slow_perf_warning(ptr); | |
| #endif /* TBB_USE_PERFORMANCE_WARNINGS */ | | #endif /* TBB_USE_PERFORMANCE_WARNINGS */ | |
| __TBB_machine_store8_slow(ptr,value); | | __TBB_machine_store8_slow(ptr,value); | |
| } | | } | |
| } | | } | |
| | | | |
|
| template <typename T, size_t S> | | | |
| struct __TBB_machine_load_store { | | | |
| static inline T load_with_acquire(const volatile T& location) { | | | |
| T to_return = location; | | | |
| __asm__ __volatile__("" : : : "memory" ); // Compiler fence to ke | | | |
| ep operations from migrating upwards | | | |
| return to_return; | | | |
| } | | | |
| | | | |
| static inline void store_with_release(volatile T &location, T value) { | | | |
| __asm__ __volatile__("" : : : "memory" ); // Compiler fence to ke | | | |
| ep operations from migrating upwards | | | |
| location = value; | | | |
| } | | | |
| }; | | | |
| | | | |
| template <typename T> | | | |
| struct __TBB_machine_load_store<T,8> { | | | |
| static inline T load_with_acquire(const volatile T& location) { | | | |
| T to_return = __TBB_machine_load8((const volatile void *)&location) | | | |
| ; | | | |
| __asm__ __volatile__("" : : : "memory" ); // Compiler fence to ke | | | |
| ep operations from migrating upwards | | | |
| return to_return; | | | |
| } | | | |
| | | | |
| static inline void store_with_release(volatile T &location, T value) { | | | |
| __asm__ __volatile__("" : : : "memory" ); // Compiler fence to ke | | | |
| ep operations from migrating downwards | | | |
| __TBB_machine_store8((volatile void *)&location,(int64_t)value); | | | |
| } | | | |
| }; | | | |
| | | | |
| #undef __TBB_VOLATILE | | | |
| | | | |
| template<typename T> | | | |
| inline T __TBB_machine_load_with_acquire(const volatile T &location) { | | | |
| return __TBB_machine_load_store<T,sizeof(T)>::load_with_acquire(locatio | | | |
| n); | | | |
| } | | | |
| | | | |
| template<typename T, typename V> | | | |
| inline void __TBB_machine_store_with_release(volatile T &location, V value) | | | |
| { | | | |
| __TBB_machine_load_store<T,sizeof(T)>::store_with_release(location,valu | | | |
| e); | | | |
| } | | | |
| | | | |
| #define __TBB_load_with_acquire(L) __TBB_machine_load_with_acquire((L)) | | | |
| #define __TBB_store_with_release(L,V) __TBB_machine_store_with_release((L), | | | |
| (V)) | | | |
| | | | |
| // Machine specific atomic operations | | // Machine specific atomic operations | |
|
| | | | |
| #define __TBB_CompareAndSwap1(P,V,C) __TBB_machine_cmpswp1(P,V,C) | | | |
| #define __TBB_CompareAndSwap2(P,V,C) __TBB_machine_cmpswp2(P,V,C) | | | |
| #define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4(P,V,C) | | | |
| #define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8(P,V,C) | | | |
| #define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp4(P,V,C) | | | |
| | | | |
| #define __TBB_FetchAndAdd1(P,V) __TBB_machine_fetchadd1(P,V) | | | |
| #define __TBB_FetchAndAdd2(P,V) __TBB_machine_fetchadd2(P,V) | | | |
| #define __TBB_FetchAndAdd4(P,V) __TBB_machine_fetchadd4(P,V) | | | |
| #define __TBB_FetchAndAddW(P,V) __TBB_machine_fetchadd4(P,V) | | | |
| | | | |
| #define __TBB_FetchAndStore1(P,V) __TBB_machine_fetchstore1(P,V) | | | |
| #define __TBB_FetchAndStore2(P,V) __TBB_machine_fetchstore2(P,V) | | | |
| #define __TBB_FetchAndStore4(P,V) __TBB_machine_fetchstore4(P,V) | | | |
| #define __TBB_FetchAndStoreW(P,V) __TBB_machine_fetchstore4(P,V) | | | |
| | | | |
| #define __TBB_Store8(P,V) __TBB_machine_store8(P,V) | | | |
| #define __TBB_Load8(P) __TBB_machine_load8(P) | | | |
| | | | |
| #define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V) | | #define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V) | |
| #define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V) | | #define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V) | |
| | | | |
|
| // Those we chose not to implement (they will be implemented generically us | | | |
| ing CMPSWP8) | | | |
| #undef __TBB_FetchAndAdd8 | | | |
| #undef __TBB_FetchAndStore8 | | | |
| | | | |
| // Definition of other functions | | // Definition of other functions | |
| #define __TBB_Pause(V) __TBB_machine_pause(V) | | #define __TBB_Pause(V) __TBB_machine_pause(V) | |
| #define __TBB_Log2(V) __TBB_machine_lg(V) | | #define __TBB_Log2(V) __TBB_machine_lg(V) | |
| | | | |
|
| // Special atomic functions | | #define __TBB_USE_GENERIC_DWORD_FETCH_ADD 1 | |
| #define __TBB_FetchAndAddWrelease(P,V) __TBB_FetchAndAddW(P,V) | | #define __TBB_USE_GENERIC_DWORD_FETCH_STORE 1 | |
| #define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAddW(P,1) | | #define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1 | |
| #define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAddW(P,-1) | | #define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1 | |
| | | #define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1 | |
| // Use generic definitions from tbb_machine.h | | | |
| #undef __TBB_TryLockByte | | // API to retrieve/update FPU control setting | |
| #undef __TBB_LockByte | | #define __TBB_CPU_CTL_ENV_PRESENT 1 | |
| | | | |
| | | struct __TBB_cpu_ctl_env_t { | |
| | | int mxcsr; | |
| | | short x87cw; | |
| | | }; | |
| | | | |
| | | inline void __TBB_get_cpu_ctl_env ( __TBB_cpu_ctl_env_t* ctl ) { | |
| | | __asm__ __volatile__ ( | |
| | | "stmxcsr %0\n\t" | |
| | | "fstcw %1" | |
| | | : "=m"(ctl->mxcsr), "=m"(ctl->x87cw) | |
| | | ); | |
| | | } | |
| | | inline void __TBB_set_cpu_ctl_env ( const __TBB_cpu_ctl_env_t* ctl ) { | |
| | | __asm__ __volatile__ ( | |
| | | "ldmxcsr %0\n\t" | |
| | | "fldcw %1" | |
| | | : : "m"(ctl->mxcsr), "m"(ctl->x87cw) | |
| | | ); | |
| | | } | |
| | | | |
End of changes. 19 change blocks. |
| 107 lines changed or deleted | | 53 lines changed or added | |
|
| linux_ia64.h | | linux_ia64.h | |
| /* | | /* | |
|
| Copyright 2005-2010 Intel Corporation. All Rights Reserved. | | Copyright 2005-2011 Intel Corporation. All Rights Reserved. | |
| | | | |
| This file is part of Threading Building Blocks. | | This file is part of Threading Building Blocks. | |
| | | | |
| Threading Building Blocks is free software; you can redistribute it | | Threading Building Blocks is free software; you can redistribute it | |
| and/or modify it under the terms of the GNU General Public License | | and/or modify it under the terms of the GNU General Public License | |
| version 2 as published by the Free Software Foundation. | | version 2 as published by the Free Software Foundation. | |
| | | | |
| Threading Building Blocks is distributed in the hope that it will be | | Threading Building Blocks is distributed in the hope that it will be | |
| useful, but WITHOUT ANY WARRANTY; without even the implied warranty | | useful, but WITHOUT ANY WARRANTY; without even the implied warranty | |
| of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| | | | |
| skipping to change at line 29 | | skipping to change at line 29 | |
| As a special exception, you may use this file as part of a free softwar
e | | As a special exception, you may use this file as part of a free softwar
e | |
| library without restriction. Specifically, if other files instantiate | | library without restriction. Specifically, if other files instantiate | |
| templates or use macros or inline functions from this file, or you comp
ile | | templates or use macros or inline functions from this file, or you comp
ile | |
| this file and link it with other files to produce an executable, this | | this file and link it with other files to produce an executable, this | |
| file does not by itself cause the resulting executable to be covered by | | file does not by itself cause the resulting executable to be covered by | |
| the GNU General Public License. This exception does not however | | the GNU General Public License. This exception does not however | |
| invalidate any other reasons why the executable file might be covered b
y | | invalidate any other reasons why the executable file might be covered b
y | |
| the GNU General Public License. | | the GNU General Public License. | |
| */ | | */ | |
| | | | |
|
| #ifndef __TBB_machine_H | | #if !defined(__TBB_machine_H) || defined(__TBB_machine_linux_ia64_H) | |
| #error Do not include this file directly; include tbb_machine.h instead | | #error Do not #include this internal file directly; use public TBB headers | |
| | | instead. | |
| #endif | | #endif | |
| | | | |
|
| #include "linux_common.h" | | #define __TBB_machine_linux_ia64_H | |
| | | | |
| | | #include <stdint.h> | |
| | | #include <unistd.h> | |
| #include <ia64intrin.h> | | #include <ia64intrin.h> | |
| | | | |
| #define __TBB_WORDSIZE 8 | | #define __TBB_WORDSIZE 8 | |
| #define __TBB_BIG_ENDIAN 0 | | #define __TBB_BIG_ENDIAN 0 | |
|
| #define __TBB_DECL_FENCED_ATOMICS 1 | | | |
| | | #if __INTEL_COMPILER | |
| | | #define __TBB_compiler_fence() | |
| | | #define __TBB_control_consistency_helper() __TBB_compiler_fence() | |
| | | #define __TBB_acquire_consistency_helper() | |
| | | #define __TBB_release_consistency_helper() | |
| | | #define __TBB_full_memory_fence() __mf() | |
| | | #else | |
| | | #define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory") | |
| | | #define __TBB_control_consistency_helper() __TBB_compiler_fence() | |
| | | // Even though GCC imbues volatile loads with acquire semantics, it som | |
| | | etimes moves | |
| | | // loads over the acquire fence. The following helpers stop such incorr | |
| | | ect code motion. | |
| | | #define __TBB_acquire_consistency_helper() __TBB_compiler_fence() | |
| | | #define __TBB_release_consistency_helper() __TBB_compiler_fence() | |
| | | #define __TBB_full_memory_fence() __asm__ __volatile__("mf": : | |
| | | :"memory") | |
| | | #endif /* !__INTEL_COMPILER */ | |
| | | | |
| // Most of the functions will be in a .s file | | // Most of the functions will be in a .s file | |
| | | | |
| extern "C" { | | extern "C" { | |
|
| int8_t __TBB_machine_cmpswp1__TBB_full_fence (volatile void *ptr, int8_
t value, int8_t comparand); | | | |
| int8_t __TBB_machine_fetchadd1__TBB_full_fence (volatile void *ptr, int
8_t addend); | | int8_t __TBB_machine_fetchadd1__TBB_full_fence (volatile void *ptr, int
8_t addend); | |
| int8_t __TBB_machine_fetchadd1acquire(volatile void *ptr, int8_t addend
); | | int8_t __TBB_machine_fetchadd1acquire(volatile void *ptr, int8_t addend
); | |
| int8_t __TBB_machine_fetchadd1release(volatile void *ptr, int8_t addend
); | | int8_t __TBB_machine_fetchadd1release(volatile void *ptr, int8_t addend
); | |
|
| int8_t __TBB_machine_fetchstore1acquire(volatile void *ptr, int8_t valu | | | |
| e); | | | |
| int8_t __TBB_machine_fetchstore1release(volatile void *ptr, int8_t valu | | | |
| e); | | | |
| | | | |
|
| int16_t __TBB_machine_cmpswp2__TBB_full_fence (volatile void *ptr, int1
6_t value, int16_t comparand); | | | |
| int16_t __TBB_machine_fetchadd2__TBB_full_fence (volatile void *ptr, in
t16_t addend); | | int16_t __TBB_machine_fetchadd2__TBB_full_fence (volatile void *ptr, in
t16_t addend); | |
| int16_t __TBB_machine_fetchadd2acquire(volatile void *ptr, int16_t adde
nd); | | int16_t __TBB_machine_fetchadd2acquire(volatile void *ptr, int16_t adde
nd); | |
| int16_t __TBB_machine_fetchadd2release(volatile void *ptr, int16_t adde
nd); | | int16_t __TBB_machine_fetchadd2release(volatile void *ptr, int16_t adde
nd); | |
|
| | | | |
| | | int32_t __TBB_machine_fetchadd4__TBB_full_fence (volatile void *ptr, in | |
| | | t32_t value); | |
| | | int32_t __TBB_machine_fetchadd4acquire(volatile void *ptr, int32_t adde | |
| | | nd); | |
| | | int32_t __TBB_machine_fetchadd4release(volatile void *ptr, int32_t adde | |
| | | nd); | |
| | | | |
| | | int64_t __TBB_machine_fetchadd8__TBB_full_fence (volatile void *ptr, in | |
| | | t64_t value); | |
| | | int64_t __TBB_machine_fetchadd8acquire(volatile void *ptr, int64_t adde | |
| | | nd); | |
| | | int64_t __TBB_machine_fetchadd8release(volatile void *ptr, int64_t adde | |
| | | nd); | |
| | | | |
| | | int8_t __TBB_machine_fetchstore1__TBB_full_fence (volatile void *ptr, i | |
| | | nt8_t value); | |
| | | int8_t __TBB_machine_fetchstore1acquire(volatile void *ptr, int8_t valu | |
| | | e); | |
| | | int8_t __TBB_machine_fetchstore1release(volatile void *ptr, int8_t valu | |
| | | e); | |
| | | | |
| | | int16_t __TBB_machine_fetchstore2__TBB_full_fence (volatile void *ptr, | |
| | | int16_t value); | |
| int16_t __TBB_machine_fetchstore2acquire(volatile void *ptr, int16_t va
lue); | | int16_t __TBB_machine_fetchstore2acquire(volatile void *ptr, int16_t va
lue); | |
| int16_t __TBB_machine_fetchstore2release(volatile void *ptr, int16_t va
lue); | | int16_t __TBB_machine_fetchstore2release(volatile void *ptr, int16_t va
lue); | |
| | | | |
| int32_t __TBB_machine_fetchstore4__TBB_full_fence (volatile void *ptr,
int32_t value); | | int32_t __TBB_machine_fetchstore4__TBB_full_fence (volatile void *ptr,
int32_t value); | |
| int32_t __TBB_machine_fetchstore4acquire(volatile void *ptr, int32_t va
lue); | | int32_t __TBB_machine_fetchstore4acquire(volatile void *ptr, int32_t va
lue); | |
| int32_t __TBB_machine_fetchstore4release(volatile void *ptr, int32_t va
lue); | | int32_t __TBB_machine_fetchstore4release(volatile void *ptr, int32_t va
lue); | |
|
| int32_t __TBB_machine_fetchadd4acquire(volatile void *ptr, int32_t adde | | | |
| nd); | | | |
| int32_t __TBB_machine_fetchadd4release(volatile void *ptr, int32_t adde | | | |
| nd); | | | |
| | | | |
|
| int64_t __TBB_machine_cmpswp8__TBB_full_fence (volatile void *ptr, int6
4_t value, int64_t comparand); | | | |
| int64_t __TBB_machine_fetchstore8__TBB_full_fence (volatile void *ptr,
int64_t value); | | int64_t __TBB_machine_fetchstore8__TBB_full_fence (volatile void *ptr,
int64_t value); | |
| int64_t __TBB_machine_fetchstore8acquire(volatile void *ptr, int64_t va
lue); | | int64_t __TBB_machine_fetchstore8acquire(volatile void *ptr, int64_t va
lue); | |
| int64_t __TBB_machine_fetchstore8release(volatile void *ptr, int64_t va
lue); | | int64_t __TBB_machine_fetchstore8release(volatile void *ptr, int64_t va
lue); | |
|
| int64_t __TBB_machine_fetchadd8acquire(volatile void *ptr, int64_t adde | | | |
| nd); | | | |
| int64_t __TBB_machine_fetchadd8release(volatile void *ptr, int64_t adde | | | |
| nd); | | | |
| | | | |
|
| | | int8_t __TBB_machine_cmpswp1__TBB_full_fence (volatile void *ptr, int8_
t value, int8_t comparand); | |
| int8_t __TBB_machine_cmpswp1acquire(volatile void *ptr, int8_t value, i
nt8_t comparand); | | int8_t __TBB_machine_cmpswp1acquire(volatile void *ptr, int8_t value, i
nt8_t comparand); | |
| int8_t __TBB_machine_cmpswp1release(volatile void *ptr, int8_t value, i
nt8_t comparand); | | int8_t __TBB_machine_cmpswp1release(volatile void *ptr, int8_t value, i
nt8_t comparand); | |
|
| int8_t __TBB_machine_fetchstore1__TBB_full_fence (volatile void *ptr, i
nt8_t value); | | | |
| | | | |
|
| | | int16_t __TBB_machine_cmpswp2__TBB_full_fence (volatile void *ptr, int1
6_t value, int16_t comparand); | |
| int16_t __TBB_machine_cmpswp2acquire(volatile void *ptr, int16_t value,
int16_t comparand); | | int16_t __TBB_machine_cmpswp2acquire(volatile void *ptr, int16_t value,
int16_t comparand); | |
| int16_t __TBB_machine_cmpswp2release(volatile void *ptr, int16_t value,
int16_t comparand); | | int16_t __TBB_machine_cmpswp2release(volatile void *ptr, int16_t value,
int16_t comparand); | |
|
| int16_t __TBB_machine_fetchstore2__TBB_full_fence (volatile void *ptr,
int16_t value); | | | |
| | | | |
| int32_t __TBB_machine_cmpswp4__TBB_full_fence (volatile void *ptr, int3
2_t value, int32_t comparand); | | int32_t __TBB_machine_cmpswp4__TBB_full_fence (volatile void *ptr, int3
2_t value, int32_t comparand); | |
| int32_t __TBB_machine_cmpswp4acquire(volatile void *ptr, int32_t value,
int32_t comparand); | | int32_t __TBB_machine_cmpswp4acquire(volatile void *ptr, int32_t value,
int32_t comparand); | |
| int32_t __TBB_machine_cmpswp4release(volatile void *ptr, int32_t value,
int32_t comparand); | | int32_t __TBB_machine_cmpswp4release(volatile void *ptr, int32_t value,
int32_t comparand); | |
|
| int32_t __TBB_machine_fetchadd4__TBB_full_fence (volatile void *ptr, in
t32_t value); | | | |
| | | | |
|
| | | int64_t __TBB_machine_cmpswp8__TBB_full_fence (volatile void *ptr, int6
4_t value, int64_t comparand); | |
| int64_t __TBB_machine_cmpswp8acquire(volatile void *ptr, int64_t value,
int64_t comparand); | | int64_t __TBB_machine_cmpswp8acquire(volatile void *ptr, int64_t value,
int64_t comparand); | |
| int64_t __TBB_machine_cmpswp8release(volatile void *ptr, int64_t value,
int64_t comparand); | | int64_t __TBB_machine_cmpswp8release(volatile void *ptr, int64_t value,
int64_t comparand); | |
|
| int64_t __TBB_machine_fetchadd8__TBB_full_fence (volatile void *ptr, in
t64_t value); | | | |
| | | | |
| int64_t __TBB_machine_lg(uint64_t value); | | int64_t __TBB_machine_lg(uint64_t value); | |
| void __TBB_machine_pause(int32_t delay); | | void __TBB_machine_pause(int32_t delay); | |
| bool __TBB_machine_trylockbyte( volatile unsigned char &ptr ); | | bool __TBB_machine_trylockbyte( volatile unsigned char &ptr ); | |
| int64_t __TBB_machine_lockbyte( volatile unsigned char &ptr ); | | int64_t __TBB_machine_lockbyte( volatile unsigned char &ptr ); | |
| | | | |
| //! Retrieves the current RSE backing store pointer. IA64 specific. | | //! Retrieves the current RSE backing store pointer. IA64 specific. | |
| void* __TBB_get_bsp(); | | void* __TBB_get_bsp(); | |
|
| } | | | |
| | | | |
|
| #define __TBB_CompareAndSwap1(P,V,C) __TBB_machine_cmpswp1__TBB_full_fence( | | int32_t __TBB_machine_load1_relaxed(const void *ptr); | |
| P,V,C) | | int32_t __TBB_machine_load2_relaxed(const void *ptr); | |
| #define __TBB_CompareAndSwap2(P,V,C) __TBB_machine_cmpswp2__TBB_full_fence( | | int32_t __TBB_machine_load4_relaxed(const void *ptr); | |
| P,V,C) | | int64_t __TBB_machine_load8_relaxed(const void *ptr); | |
| | | | |
| | | void __TBB_machine_store1_relaxed(void *ptr, int32_t value); | |
| | | void __TBB_machine_store2_relaxed(void *ptr, int32_t value); | |
| | | void __TBB_machine_store4_relaxed(void *ptr, int32_t value); | |
| | | void __TBB_machine_store8_relaxed(void *ptr, int64_t value); | |
| | | } // extern "C" | |
| | | | |
| | | // Mapping old entry points to the names corresponding to the new full_fenc | |
| | | e identifier. | |
| | | #define __TBB_machine_fetchadd1full_fence __TBB_machine_fetchadd1__TBB_fu | |
| | | ll_fence | |
| | | #define __TBB_machine_fetchadd2full_fence __TBB_machine_fetchadd2__TBB_fu | |
| | | ll_fence | |
| | | #define __TBB_machine_fetchadd4full_fence __TBB_machine_fetchadd4__TBB_fu | |
| | | ll_fence | |
| | | #define __TBB_machine_fetchadd8full_fence __TBB_machine_fetchadd8__TBB_fu | |
| | | ll_fence | |
| | | #define __TBB_machine_fetchstore1full_fence __TBB_machine_fetchstore1__TBB_ | |
| | | full_fence | |
| | | #define __TBB_machine_fetchstore2full_fence __TBB_machine_fetchstore2__TBB_ | |
| | | full_fence | |
| | | #define __TBB_machine_fetchstore4full_fence __TBB_machine_fetchstore4__TBB_ | |
| | | full_fence | |
| | | #define __TBB_machine_fetchstore8full_fence __TBB_machine_fetchstore8__TBB_ | |
| | | full_fence | |
| | | #define __TBB_machine_cmpswp1full_fence __TBB_machine_cmpswp1__TBB_full | |
| | | _fence | |
| | | #define __TBB_machine_cmpswp2full_fence __TBB_machine_cmpswp2__TBB_full | |
| | | _fence | |
| | | #define __TBB_machine_cmpswp4full_fence __TBB_machine_cmpswp4__TBB_full | |
| | | _fence | |
| | | #define __TBB_machine_cmpswp8full_fence __TBB_machine_cmpswp8__TBB_full | |
| | | _fence | |
| | | | |
| | | // Mapping relaxed operations to the entry points implementing them. | |
| | | /** On IA64 RMW operations implicitly have acquire semantics. Thus one cann | |
| | | ot | |
| | | actually have completely relaxed RMW operation here. **/ | |
| | | #define __TBB_machine_fetchadd1relaxed __TBB_machine_fetchadd1acquire | |
| | | #define __TBB_machine_fetchadd2relaxed __TBB_machine_fetchadd2acquire | |
| | | #define __TBB_machine_fetchadd4relaxed __TBB_machine_fetchadd4acquire | |
| | | #define __TBB_machine_fetchadd8relaxed __TBB_machine_fetchadd8acquire | |
| | | #define __TBB_machine_fetchstore1relaxed __TBB_machine_fetchstore1acquir | |
| | | e | |
| | | #define __TBB_machine_fetchstore2relaxed __TBB_machine_fetchstore2acquir | |
| | | e | |
| | | #define __TBB_machine_fetchstore4relaxed __TBB_machine_fetchstore4acquir | |
| | | e | |
| | | #define __TBB_machine_fetchstore8relaxed __TBB_machine_fetchstore8acquir | |
| | | e | |
| | | #define __TBB_machine_cmpswp1relaxed __TBB_machine_cmpswp1acquire | |
| | | #define __TBB_machine_cmpswp2relaxed __TBB_machine_cmpswp2acquire | |
| | | #define __TBB_machine_cmpswp4relaxed __TBB_machine_cmpswp4acquire | |
| | | #define __TBB_machine_cmpswp8relaxed __TBB_machine_cmpswp8acquire | |
| | | | |
| | | #define __TBB_MACHINE_DEFINE_ATOMICS(S,V) \ | |
| | | template <typename T> \ | |
| | | struct machine_load_store_relaxed<T,S> { \ | |
| | | static inline T load ( const T& location ) { \ | |
| | | return (T)__TBB_machine_load##S##_relaxed(&location); \ | |
| | | } \ | |
| | | static inline void store ( T& location, T value ) { \ | |
| | | __TBB_machine_store##S##_relaxed(&location, (V)value); \ | |
| | | } \ | |
| | | } | |
| | | | |
| | | namespace tbb { | |
| | | namespace internal { | |
| | | __TBB_MACHINE_DEFINE_ATOMICS(1,int8_t); | |
| | | __TBB_MACHINE_DEFINE_ATOMICS(2,int16_t); | |
| | | __TBB_MACHINE_DEFINE_ATOMICS(4,int32_t); | |
| | | __TBB_MACHINE_DEFINE_ATOMICS(8,int64_t); | |
| | | }} // namespaces internal, tbb | |
| | | | |
|
| #define __TBB_FetchAndAdd1(P,V) __TBB_machine_fetchadd1__TBB_full_fe | | #undef __TBB_MACHINE_DEFINE_ATOMICS | |
| nce(P,V) | | | |
| #define __TBB_FetchAndAdd1acquire(P,V) __TBB_machine_fetchadd1acquire(P,V) | | #define __TBB_USE_FENCED_ATOMICS 1 | |
| #define __TBB_FetchAndAdd1release(P,V) __TBB_machine_fetchadd1release(P,V) | | #define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1 | |
| #define __TBB_FetchAndAdd2(P,V) __TBB_machine_fetchadd2__TBB_full_fe | | | |
| nce(P,V) | | | |
| #define __TBB_FetchAndAdd2acquire(P,V) __TBB_machine_fetchadd2acquire(P,V) | | | |
| #define __TBB_FetchAndAdd2release(P,V) __TBB_machine_fetchadd2release(P,V) | | | |
| #define __TBB_FetchAndAdd4acquire(P,V) __TBB_machine_fetchadd4acquire(P,V) | | | |
| #define __TBB_FetchAndAdd4release(P,V) __TBB_machine_fetchadd4release(P,V) | | | |
| #define __TBB_FetchAndAdd8acquire(P,V) __TBB_machine_fetchadd8acquire(P,V) | | | |
| #define __TBB_FetchAndAdd8release(P,V) __TBB_machine_fetchadd8release(P,V) | | | |
| | | | |
| #define __TBB_FetchAndStore1acquire(P,V) __TBB_machine_fetchstore1acquire(P | | | |
| ,V) | | | |
| #define __TBB_FetchAndStore1release(P,V) __TBB_machine_fetchstore1release(P | | | |
| ,V) | | | |
| #define __TBB_FetchAndStore2acquire(P,V) __TBB_machine_fetchstore2acquire(P | | | |
| ,V) | | | |
| #define __TBB_FetchAndStore2release(P,V) __TBB_machine_fetchstore2release(P | | | |
| ,V) | | | |
| #define __TBB_FetchAndStore4acquire(P,V) __TBB_machine_fetchstore4acquire(P | | | |
| ,V) | | | |
| #define __TBB_FetchAndStore4release(P,V) __TBB_machine_fetchstore4release(P | | | |
| ,V) | | | |
| #define __TBB_FetchAndStore8acquire(P,V) __TBB_machine_fetchstore8acquire(P | | | |
| ,V) | | | |
| #define __TBB_FetchAndStore8release(P,V) __TBB_machine_fetchstore8release(P | | | |
| ,V) | | | |
| | | | |
| #define __TBB_CompareAndSwap1acquire(P,V,C) __TBB_machine_cmpswp1acquire(P, | | | |
| V,C) | | | |
| #define __TBB_CompareAndSwap1release(P,V,C) __TBB_machine_cmpswp1release(P, | | | |
| V,C) | | | |
| #define __TBB_CompareAndSwap2acquire(P,V,C) __TBB_machine_cmpswp2acquire(P, | | | |
| V,C) | | | |
| #define __TBB_CompareAndSwap2release(P,V,C) __TBB_machine_cmpswp2release(P, | | | |
| V,C) | | | |
| #define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4__TBB_full | | | |
| _fence(P,V,C) | | | |
| #define __TBB_CompareAndSwap4acquire(P,V,C) __TBB_machine_cmpswp4acquire(P, | | | |
| V,C) | | | |
| #define __TBB_CompareAndSwap4release(P,V,C) __TBB_machine_cmpswp4release(P, | | | |
| V,C) | | | |
| #define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8__TBB_full | | | |
| _fence(P,V,C) | | | |
| #define __TBB_CompareAndSwap8acquire(P,V,C) __TBB_machine_cmpswp8acquire(P, | | | |
| V,C) | | | |
| #define __TBB_CompareAndSwap8release(P,V,C) __TBB_machine_cmpswp8release(P, | | | |
| V,C) | | | |
| | | | |
| #define __TBB_FetchAndAdd4(P,V) __TBB_machine_fetchadd4__TBB_full_fence(P,V | | | |
| ) | | | |
| #define __TBB_FetchAndAdd8(P,V) __TBB_machine_fetchadd8__TBB_full_fence(P,V | | | |
| ) | | | |
| | | | |
| #define __TBB_FetchAndStore1(P,V) __TBB_machine_fetchstore1__TBB_full_fence | | | |
| (P,V) | | | |
| #define __TBB_FetchAndStore2(P,V) __TBB_machine_fetchstore2__TBB_full_fence | | | |
| (P,V) | | | |
| #define __TBB_FetchAndStore4(P,V) __TBB_machine_fetchstore4__TBB_full_fence | | | |
| (P,V) | | | |
| #define __TBB_FetchAndStore8(P,V) __TBB_machine_fetchstore8__TBB_full_fence | | | |
| (P,V) | | | |
| | | | |
| #define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAdd8acquire(P,1) | | | |
| #define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAdd8release(P,-1) | | | |
| | | | |
| #ifndef __INTEL_COMPILER | | | |
| /* Even though GCC imbues volatile loads with acquire semantics, | | | |
| it sometimes moves loads over the acquire fence. The | | | |
| fences defined here stop such incorrect code motion. */ | | | |
| #define __TBB_release_consistency_helper() __asm__ __volatile__("": : :"mem | | | |
| ory") | | | |
| #define __TBB_rel_acq_fence() __asm__ __volatile__("mf": : :"memory") | | | |
| #else | | | |
| #define __TBB_release_consistency_helper() | | | |
| #define __TBB_rel_acq_fence() __mf() | | | |
| #endif /* __INTEL_COMPILER */ | | | |
| | | | |
| // Special atomic functions | | | |
| #define __TBB_CompareAndSwapW(P,V,C) __TBB_CompareAndSwap8(P,V,C) | | | |
| #define __TBB_FetchAndStoreW(P,V) __TBB_FetchAndStore8(P,V) | | | |
| #define __TBB_FetchAndAddW(P,V) __TBB_FetchAndAdd8(P,V) | | | |
| #define __TBB_FetchAndAddWrelease(P,V) __TBB_FetchAndAdd8release(P,V) | | | |
| | | | |
| // Not needed | | | |
| #undef __TBB_Store8 | | | |
| #undef __TBB_Load8 | | | |
| | | | |
| // Definition of Lock functions | | // Definition of Lock functions | |
| #define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P) | | #define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P) | |
| #define __TBB_LockByte(P) __TBB_machine_lockbyte(P) | | #define __TBB_LockByte(P) __TBB_machine_lockbyte(P) | |
| | | | |
| // Definition of other utility functions | | // Definition of other utility functions | |
| #define __TBB_Pause(V) __TBB_machine_pause(V) | | #define __TBB_Pause(V) __TBB_machine_pause(V) | |
| #define __TBB_Log2(V) __TBB_machine_lg(V) | | #define __TBB_Log2(V) __TBB_machine_lg(V) | |
| | | | |
End of changes. 21 change blocks. |
| 118 lines changed or deleted | | 135 lines changed or added | |
|
| linux_intel64.h | | linux_intel64.h | |
| /* | | /* | |
|
| Copyright 2005-2010 Intel Corporation. All Rights Reserved. | | Copyright 2005-2011 Intel Corporation. All Rights Reserved. | |
| | | | |
| This file is part of Threading Building Blocks. | | This file is part of Threading Building Blocks. | |
| | | | |
| Threading Building Blocks is free software; you can redistribute it | | Threading Building Blocks is free software; you can redistribute it | |
| and/or modify it under the terms of the GNU General Public License | | and/or modify it under the terms of the GNU General Public License | |
| version 2 as published by the Free Software Foundation. | | version 2 as published by the Free Software Foundation. | |
| | | | |
| Threading Building Blocks is distributed in the hope that it will be | | Threading Building Blocks is distributed in the hope that it will be | |
| useful, but WITHOUT ANY WARRANTY; without even the implied warranty | | useful, but WITHOUT ANY WARRANTY; without even the implied warranty | |
| of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| | | | |
| skipping to change at line 29 | | skipping to change at line 29 | |
| As a special exception, you may use this file as part of a free softwar
e | | As a special exception, you may use this file as part of a free softwar
e | |
| library without restriction. Specifically, if other files instantiate | | library without restriction. Specifically, if other files instantiate | |
| templates or use macros or inline functions from this file, or you comp
ile | | templates or use macros or inline functions from this file, or you comp
ile | |
| this file and link it with other files to produce an executable, this | | this file and link it with other files to produce an executable, this | |
| file does not by itself cause the resulting executable to be covered by | | file does not by itself cause the resulting executable to be covered by | |
| the GNU General Public License. This exception does not however | | the GNU General Public License. This exception does not however | |
| invalidate any other reasons why the executable file might be covered b
y | | invalidate any other reasons why the executable file might be covered b
y | |
| the GNU General Public License. | | the GNU General Public License. | |
| */ | | */ | |
| | | | |
|
| #ifndef __TBB_machine_H | | #if !defined(__TBB_machine_H) || defined(__TBB_machine_linux_intel64_H) | |
| #error Do not include this file directly; include tbb_machine.h instead | | #error Do not #include this internal file directly; use public TBB headers | |
| | | instead. | |
| #endif | | #endif | |
| | | | |
|
| #include "linux_common.h" | | #define __TBB_machine_linux_intel64_H | |
| | | | |
| | | #include <stdint.h> | |
| | | #include <unistd.h> | |
| | | | |
| #define __TBB_WORDSIZE 8 | | #define __TBB_WORDSIZE 8 | |
| #define __TBB_BIG_ENDIAN 0 | | #define __TBB_BIG_ENDIAN 0 | |
| | | | |
|
| #define __TBB_release_consistency_helper() __asm__ __volatile__("": : :"mem | | #define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory") | |
| ory") | | #define __TBB_control_consistency_helper() __TBB_compiler_fence() | |
| | | #define __TBB_acquire_consistency_helper() __TBB_compiler_fence() | |
| | | #define __TBB_release_consistency_helper() __TBB_compiler_fence() | |
| | | | |
|
| #ifndef __TBB_rel_acq_fence | | #ifndef __TBB_full_memory_fence | |
| inline void __TBB_rel_acq_fence() { __asm__ __volatile__("mfence": : :"memo | | #define __TBB_full_memory_fence() __asm__ __volatile__("mfence": : :"memory | |
| ry"); } | | ") | |
| #endif | | #endif | |
| | | | |
|
| #define __MACHINE_DECL_ATOMICS(S,T,X) \ | | #define __TBB_MACHINE_DEFINE_ATOMICS(S,T,X)
\ | |
| static inline T __TBB_machine_cmpswp##S (volatile void *ptr, T value, T com
parand ) \ | | static inline T __TBB_machine_cmpswp##S (volatile void *ptr, T value, T com
parand ) \ | |
| {
\ | | {
\ | |
| T result;
\ | | T result;
\ | |
|
\ | |
\ | |
| __asm__ __volatile__("lock\ncmpxchg" X " %2,%1"
\ | | __asm__ __volatile__("lock\ncmpxchg" X " %2,%1"
\ | |
| : "=a"(result), "=m"(*(volatile T*)ptr)
\ | | : "=a"(result), "=m"(*(volatile T*)ptr)
\ | |
| : "q"(value), "0"(comparand), "m"(*(volatile T*)p
tr) \ | | : "q"(value), "0"(comparand), "m"(*(volatile T*)p
tr) \ | |
| : "memory");
\ | | : "memory");
\ | |
| return result;
\ | | return result;
\ | |
| }
\ | | }
\ | |
| | | | |
| skipping to change at line 76 | | skipping to change at line 82 | |
| static inline T __TBB_machine_fetchstore##S(volatile void *ptr, T value)
\ | | static inline T __TBB_machine_fetchstore##S(volatile void *ptr, T value)
\ | |
| {
\ | | {
\ | |
| T result;
\ | | T result;
\ | |
| __asm__ __volatile__("lock\nxchg" X " %0,%1"
\ | | __asm__ __volatile__("lock\nxchg" X " %0,%1"
\ | |
| : "=r"(result),"=m"(*(volatile T*)ptr)
\ | | : "=r"(result),"=m"(*(volatile T*)ptr)
\ | |
| : "0"(value), "m"(*(volatile T*)ptr)
\ | | : "0"(value), "m"(*(volatile T*)ptr)
\ | |
| : "memory");
\ | | : "memory");
\ | |
| return result;
\ | | return result;
\ | |
| }
\ | | }
\ | |
| | | | |
|
| __MACHINE_DECL_ATOMICS(1,int8_t,"") | | __TBB_MACHINE_DEFINE_ATOMICS(1,int8_t,"") | |
| __MACHINE_DECL_ATOMICS(2,int16_t,"") | | __TBB_MACHINE_DEFINE_ATOMICS(2,int16_t,"") | |
| __MACHINE_DECL_ATOMICS(4,int32_t,"") | | __TBB_MACHINE_DEFINE_ATOMICS(4,int32_t,"") | |
| __MACHINE_DECL_ATOMICS(8,int64_t,"q") | | __TBB_MACHINE_DEFINE_ATOMICS(8,int64_t,"q") | |
| | | | |
| | | #undef __TBB_MACHINE_DEFINE_ATOMICS | |
| | | | |
| static inline int64_t __TBB_machine_lg( uint64_t x ) { | | static inline int64_t __TBB_machine_lg( uint64_t x ) { | |
| int64_t j; | | int64_t j; | |
| __asm__ ("bsr %1,%0" : "=r"(j) : "r"(x)); | | __asm__ ("bsr %1,%0" : "=r"(j) : "r"(x)); | |
| return j; | | return j; | |
| } | | } | |
| | | | |
| static inline void __TBB_machine_or( volatile void *ptr, uint64_t addend )
{ | | static inline void __TBB_machine_or( volatile void *ptr, uint64_t addend )
{ | |
| __asm__ __volatile__("lock\norq %1,%0" : "=m"(*(volatile uint64_t*)ptr)
: "r"(addend), "m"(*(volatile uint64_t*)ptr) : "memory"); | | __asm__ __volatile__("lock\norq %1,%0" : "=m"(*(volatile uint64_t*)ptr)
: "r"(addend), "m"(*(volatile uint64_t*)ptr) : "memory"); | |
| } | | } | |
| | | | |
| static inline void __TBB_machine_and( volatile void *ptr, uint64_t addend )
{ | | static inline void __TBB_machine_and( volatile void *ptr, uint64_t addend )
{ | |
| __asm__ __volatile__("lock\nandq %1,%0" : "=m"(*(volatile uint64_t*)ptr
) : "r"(addend), "m"(*(volatile uint64_t*)ptr) : "memory"); | | __asm__ __volatile__("lock\nandq %1,%0" : "=m"(*(volatile uint64_t*)ptr
) : "r"(addend), "m"(*(volatile uint64_t*)ptr) : "memory"); | |
| } | | } | |
| | | | |
|
| | | #define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V) | |
| | | #define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V) | |
| | | | |
| | | // Definition of other functions | |
| | | #ifndef __TBB_Pause | |
| static inline void __TBB_machine_pause( int32_t delay ) { | | static inline void __TBB_machine_pause( int32_t delay ) { | |
| for (int32_t i = 0; i < delay; i++) { | | for (int32_t i = 0; i < delay; i++) { | |
| __asm__ __volatile__("pause;"); | | __asm__ __volatile__("pause;"); | |
| } | | } | |
| return; | | return; | |
| } | | } | |
|
| | | #define __TBB_Pause(V) __TBB_machine_pause(V) | |
| | | #endif /* !__TBB_Pause */ | |
| | | | |
|
| // Machine specific atomic operations | | #define __TBB_Log2(V) __TBB_machine_lg(V) | |
| | | | |
| #define __TBB_CompareAndSwap1(P,V,C) __TBB_machine_cmpswp1(P,V,C) | | | |
| #define __TBB_CompareAndSwap2(P,V,C) __TBB_machine_cmpswp2(P,V,C) | | | |
| #define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4(P,V,C) | | | |
| #define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8(P,V,C) | | | |
| #define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp8(P,V,C) | | | |
| | | | |
| #define __TBB_FetchAndAdd1(P,V) __TBB_machine_fetchadd1(P,V) | | | |
| #define __TBB_FetchAndAdd2(P,V) __TBB_machine_fetchadd2(P,V) | | | |
| #define __TBB_FetchAndAdd4(P,V) __TBB_machine_fetchadd4(P,V) | | | |
| #define __TBB_FetchAndAdd8(P,V) __TBB_machine_fetchadd8(P,V) | | | |
| #define __TBB_FetchAndAddW(P,V) __TBB_machine_fetchadd8(P,V) | | | |
| | | | |
| #define __TBB_FetchAndStore1(P,V) __TBB_machine_fetchstore1(P,V) | | | |
| #define __TBB_FetchAndStore2(P,V) __TBB_machine_fetchstore2(P,V) | | | |
| #define __TBB_FetchAndStore4(P,V) __TBB_machine_fetchstore4(P,V) | | | |
| #define __TBB_FetchAndStore8(P,V) __TBB_machine_fetchstore8(P,V) | | | |
| #define __TBB_FetchAndStoreW(P,V) __TBB_machine_fetchstore8(P,V) | | | |
| | | | |
| #define __TBB_Store8(P,V) (*P = V) | | | |
| #define __TBB_Load8(P) (*P) | | | |
| | | | |
| #define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V) | | | |
| #define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V) | | | |
| | | | |
|
| // Definition of other functions | | #define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1 | |
| #ifndef __TBB_Pause | | #define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1 | |
| #define __TBB_Pause(V) __TBB_machine_pause(V) | | #define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1 | |
| | | | |
| | | // API to retrieve/update FPU control setting | |
| | | #ifndef __TBB_CPU_CTL_ENV_PRESENT | |
| | | #define __TBB_CPU_CTL_ENV_PRESENT 1 | |
| | | | |
| | | struct __TBB_cpu_ctl_env_t { | |
| | | int mxcsr; | |
| | | short x87cw; | |
| | | }; | |
| | | | |
| | | inline void __TBB_get_cpu_ctl_env ( __TBB_cpu_ctl_env_t* ctl ) { | |
| | | #if __TBB_ICC_12_0_INL_ASM_FSTCW_BROKEN | |
| | | __TBB_cpu_ctl_env_t loc_ctl; | |
| | | __asm__ __volatile__ ( | |
| | | "stmxcsr %0\n\t" | |
| | | "fstcw %1" | |
| | | : "=m"(loc_ctl.mxcsr), "=m"(loc_ctl.x87cw) | |
| | | ); | |
| | | *ctl = loc_ctl; | |
| | | #else | |
| | | __asm__ __volatile__ ( | |
| | | "stmxcsr %0\n\t" | |
| | | "fstcw %1" | |
| | | : "=m"(ctl->mxcsr), "=m"(ctl->x87cw) | |
| | | ); | |
| #endif | | #endif | |
|
| #define __TBB_Log2(V) __TBB_machine_lg(V) | | } | |
| | | inline void __TBB_set_cpu_ctl_env ( const __TBB_cpu_ctl_env_t* ctl ) { | |
| // Special atomic functions | | __asm__ __volatile__ ( | |
| #define __TBB_FetchAndAddWrelease(P,V) __TBB_FetchAndAddW(P,V) | | "ldmxcsr %0\n\t" | |
| #define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAddW(P,1) | | "fldcw %1" | |
| #define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAddW(P,-1) | | : : "m"(ctl->mxcsr), "m"(ctl->x87cw) | |
| | | ); | |
| // Use generic definitions from tbb_machine.h | | } | |
| #undef __TBB_TryLockByte | | #endif /* !__TBB_CPU_CTL_ENV_PRESENT */ | |
| #undef __TBB_LockByte | | | |
| | | | |
End of changes. 12 change blocks. |
| 42 lines changed or deleted | | 58 lines changed or added | |
|
| mac_ppc.h | | mac_ppc.h | |
| /* | | /* | |
|
| Copyright 2005-2010 Intel Corporation. All Rights Reserved. | | Copyright 2005-2011 Intel Corporation. All Rights Reserved. | |
| | | | |
| This file is part of Threading Building Blocks. | | This file is part of Threading Building Blocks. | |
| | | | |
| Threading Building Blocks is free software; you can redistribute it | | Threading Building Blocks is free software; you can redistribute it | |
| and/or modify it under the terms of the GNU General Public License | | and/or modify it under the terms of the GNU General Public License | |
| version 2 as published by the Free Software Foundation. | | version 2 as published by the Free Software Foundation. | |
| | | | |
| Threading Building Blocks is distributed in the hope that it will be | | Threading Building Blocks is distributed in the hope that it will be | |
| useful, but WITHOUT ANY WARRANTY; without even the implied warranty | | useful, but WITHOUT ANY WARRANTY; without even the implied warranty | |
| of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| | | | |
| skipping to change at line 29 | | skipping to change at line 29 | |
| As a special exception, you may use this file as part of a free softwar
e | | As a special exception, you may use this file as part of a free softwar
e | |
| library without restriction. Specifically, if other files instantiate | | library without restriction. Specifically, if other files instantiate | |
| templates or use macros or inline functions from this file, or you comp
ile | | templates or use macros or inline functions from this file, or you comp
ile | |
| this file and link it with other files to produce an executable, this | | this file and link it with other files to produce an executable, this | |
| file does not by itself cause the resulting executable to be covered by | | file does not by itself cause the resulting executable to be covered by | |
| the GNU General Public License. This exception does not however | | the GNU General Public License. This exception does not however | |
| invalidate any other reasons why the executable file might be covered b
y | | invalidate any other reasons why the executable file might be covered b
y | |
| the GNU General Public License. | | the GNU General Public License. | |
| */ | | */ | |
| | | | |
|
| #ifndef __TBB_machine_H | | #if !defined(__TBB_machine_H) || defined(__TBB_machine_gcc_power_H) | |
| #error Do not include this file directly; include tbb_machine.h instead | | #error Do not #include this internal file directly; use public TBB headers | |
| | | instead. | |
| #endif | | #endif | |
| | | | |
|
| | | #define __TBB_machine_gcc_power_H | |
| | | | |
| #include <stdint.h> | | #include <stdint.h> | |
| #include <unistd.h> | | #include <unistd.h> | |
| | | | |
|
| #include <sched.h> // sched_yield | | // TODO: rename to gcc_power.h? | |
| | | // This file is for Power Architecture with compilers supporting GNU inline | |
| | | -assembler syntax (currently GNU g++ and IBM XL). | |
| | | // Note that XL V9.0 (sometimes?) has trouble dealing with empty input and/ | |
| | | or clobber lists, so they should be avoided. | |
| | | | |
| | | #if __powerpc64__ || __ppc64__ | |
| | | // IBM XL documents __powerpc64__ (and __PPC64__). | |
| | | // Apple documents __ppc64__ (with __ppc__ only on 32-bit). | |
| | | #define __TBB_WORDSIZE 8 | |
| | | #else | |
| | | #define __TBB_WORDSIZE 4 | |
| | | #endif | |
| | | | |
| | | // On Power Architecture, (lock-free) 64-bit atomics require 64-bit hardwar | |
| | | e: | |
| | | #if __TBB_WORDSIZE==8 | |
| | | // Do not change the following definition, because TBB itself will use | |
| | | 64-bit atomics in 64-bit builds. | |
| | | #define __TBB_64BIT_ATOMICS 1 | |
| | | #elif __bgp__ | |
| | | // Do not change the following definition on known 32-bit hardware. | |
| | | #define __TBB_64BIT_ATOMICS 0 | |
| | | #else | |
| | | // To enable 64-bit atomics in 32-bit builds, set the value below to 1 | |
| | | instead of 0. | |
| | | // You must make certain that the program will only use them on actual | |
| | | 64-bit hardware | |
| | | // (which typically means that the entire program is only executed on s | |
| | | uch hardware), | |
| | | // because their implementation involves machine instructions that are | |
| | | illegal elsewhere. | |
| | | // The setting can be chosen independently per compilation unit, | |
| | | // which also means that TBB itself does not need to be rebuilt. | |
| | | // Alternatively (but only for the current architecture and TBB version | |
| | | ), | |
| | | // override the default as a predefined macro when invoking the compile | |
| | | r. | |
| | | #ifndef __TBB_64BIT_ATOMICS | |
| | | #define __TBB_64BIT_ATOMICS 0 | |
| | | #endif | |
| | | #endif | |
| | | | |
| inline int32_t __TBB_machine_cmpswp4 (volatile void *ptr, int32_t value, in
t32_t comparand ) | | inline int32_t __TBB_machine_cmpswp4 (volatile void *ptr, int32_t value, in
t32_t comparand ) | |
| { | | { | |
| int32_t result; | | int32_t result; | |
| | | | |
| __asm__ __volatile__("sync\n" | | __asm__ __volatile__("sync\n" | |
|
| "0: lwarx %0,0,%2\n\t" /* load w/ reservation */ | | "0:\n\t" | |
| "cmpw %0,%4\n\t" /* compare against compara | | "lwarx %[res],0,%[ptr]\n\t" /* load w/ reserva | |
| nd */ | | tion */ | |
| "bne- 1f\n\t" /* exit if not same */ | | "cmpw %[res],%[cmp]\n\t" /* compare against | |
| "stwcx. %3,0,%2\n\t" /* store new_value */ | | comparand */ | |
| "bne- 0b\n" /* retry if reservation lo | | "bne- 1f\n\t" /* exit if not sam | |
| st */ | | e */ | |
| "1: sync" /* the exit */ | | "stwcx. %[val],0,%[ptr]\n\t" /* store new value | |
| : "=&r"(result), "=m"(* (int32_t*) ptr) | | */ | |
| : "r"(ptr), "r"(value), "r"(comparand), "m"(* (in | | "bne- 0b\n" /* retry if reserv | |
| t32_t*) ptr) | | ation lost */ | |
| : "cr0"); | | "1:\n\t" /* the exit */ | |
| | | "isync" | |
| | | : [res]"=&r"(result) | |
| | | , "+m"(* (int32_t*) ptr) /* redundant with | |
| | | "memory" */ | |
| | | : [ptr]"r"(ptr) | |
| | | , [val]"r"(value) | |
| | | , [cmp]"r"(comparand) | |
| | | : "memory" /* compiler full f | |
| | | ence */ | |
| | | , "cr0" /* clobbered by cm | |
| | | p and/or stwcx. */ | |
| | | ); | |
| return result; | | return result; | |
| } | | } | |
| | | | |
|
| | | #if __TBB_WORDSIZE==8 | |
| | | | |
| inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t value, in
t64_t comparand ) | | inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t value, in
t64_t comparand ) | |
| { | | { | |
| int64_t result; | | int64_t result; | |
| __asm__ __volatile__("sync\n" | | __asm__ __volatile__("sync\n" | |
|
| "0: ldarx %0,0,%2\n\t" /* load w/ reservation */ | | "0:\n\t" | |
| "cmpd %0,%4\n\t" /* compare against compara | | "ldarx %[res],0,%[ptr]\n\t" /* load w/ reserva | |
| nd */ | | tion */ | |
| "bne- 1f\n\t" /* exit if not same */ | | "cmpd %[res],%[cmp]\n\t" /* compare against | |
| "stdcx. %3,0,%2\n\t" /* store new_value */ | | comparand */ | |
| "bne- 0b\n" /* retry if reservation lo | | "bne- 1f\n\t" /* exit if not sam | |
| st */ | | e */ | |
| "1: sync" /* the exit */ | | "stdcx. %[val],0,%[ptr]\n\t" /* store new value | |
| : "=&b"(result), "=m"(* (int64_t*) ptr) | | */ | |
| : "r"(ptr), "r"(value), "r"(comparand), "m"(* (in | | "bne- 0b\n" /* retry if reserv | |
| t64_t*) ptr) | | ation lost */ | |
| : "cr0"); | | "1:\n\t" /* the exit */ | |
| | | "isync" | |
| | | : [res]"=&r"(result) | |
| | | , "+m"(* (int64_t*) ptr) /* redundant with | |
| | | "memory" */ | |
| | | : [ptr]"r"(ptr) | |
| | | , [val]"r"(value) | |
| | | , [cmp]"r"(comparand) | |
| | | : "memory" /* compiler full f | |
| | | ence */ | |
| | | , "cr0" /* clobbered by cm | |
| | | p and/or stdcx. */ | |
| | | ); | |
| return result; | | return result; | |
| } | | } | |
| | | | |
|
| #define __TBB_BIG_ENDIAN 1 | | #elif __TBB_64BIT_ATOMICS /* && __TBB_WORDSIZE==4 */ | |
| | | | |
| | | inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t value, in | |
| | | t64_t comparand ) | |
| | | { | |
| | | int64_t result; | |
| | | int64_t value_register, comparand_register, result_register; // dummy v | |
| | | ariables to allocate registers | |
| | | __asm__ __volatile__("sync\n\t" | |
| | | "ld %[val],%[valm]\n\t" | |
| | | "ld %[cmp],%[cmpm]\n" | |
| | | "0:\n\t" | |
| | | "ldarx %[res],0,%[ptr]\n\t" /* load w/ reserva | |
| | | tion */ | |
| | | "cmpd %[res],%[cmp]\n\t" /* compare against | |
| | | comparand */ | |
| | | "bne- 1f\n\t" /* exit if not sam | |
| | | e */ | |
| | | "stdcx. %[val],0,%[ptr]\n\t" /* store new value | |
| | | */ | |
| | | "bne- 0b\n" /* retry if reserv | |
| | | ation lost */ | |
| | | "1:\n\t" /* the exit */ | |
| | | "std %[res],%[resm]\n\t" | |
| | | "isync" | |
| | | : [resm]"=m"(result) | |
| | | , [res] "=&r"( result_register) | |
| | | , [val] "=&r"( value_register) | |
| | | , [cmp] "=&r"(comparand_register) | |
| | | , "+m"(* (int64_t*) ptr) /* redundant with | |
| | | "memory" */ | |
| | | : [ptr] "r"(ptr) | |
| | | , [valm]"m"(value) | |
| | | , [cmpm]"m"(comparand) | |
| | | : "memory" /* compiler full f | |
| | | ence */ | |
| | | , "cr0" /* clobbered by cm | |
| | | pd and/or stdcx. */ | |
| | | ); | |
| | | return result; | |
| | | } | |
| | | #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */ | |
| | | | |
|
| #if defined(powerpc64) || defined(__powerpc64__) || defined(__ppc64__) | | #define __TBB_MACHINE_DEFINE_LOAD_STORE(S,ldx,stx,cmpx) | |
| #define __TBB_WORDSIZE 8 | | \ | |
| #define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp8(P,V,C) | | template <typename T> | |
| | | \ | |
| | | struct machine_load_store<T,S> { | |
| | | \ | |
| | | static inline T load_with_acquire(const volatile T& location) { | |
| | | \ | |
| | | T result; | |
| | | \ | |
| | | __asm__ __volatile__(ldx " %[res],0(%[ptr])\n" | |
| | | \ | |
| | | "0:\n\t" | |
| | | \ | |
| | | cmpx " %[res],%[res]\n\t" | |
| | | \ | |
| | | "bne- 0b\n\t" | |
| | | \ | |
| | | "isync" | |
| | | \ | |
| | | : [res]"=r"(result) | |
| | | \ | |
| | | : [ptr]"b"(&location) /* cannot use regist | |
| | | er 0 here */ \ | |
| | | , "m"(location) /* redundant with "m | |
| | | emory" */ \ | |
| | | : "memory" /* compiler acquire | |
| | | fence */ \ | |
| | | , "cr0" /* clobbered by cmpw | |
| | | /cmpd */); \ | |
| | | return result; | |
| | | \ | |
| | | } | |
| | | \ | |
| | | static inline void store_with_release(volatile T &location, T value | |
| | | ) { \ | |
| | | __asm__ __volatile__("lwsync\n\t" | |
| | | \ | |
| | | stx " %[val],0(%[ptr])" | |
| | | \ | |
| | | : "=m"(location) /* redundant with "m | |
| | | emory" */ \ | |
| | | : [ptr]"b"(&location) /* cannot use regist | |
| | | er 0 here */ \ | |
| | | , [val]"r"(value) | |
| | | \ | |
| | | : "memory"/*compiler release fence*/ /*(cr | |
| | | 0 not affected)*/); \ | |
| | | } | |
| | | \ | |
| | | }; | |
| | | \ | |
| | | | |
| | | \ | |
| | | template <typename T> | |
| | | \ | |
| | | struct machine_load_store_relaxed<T,S> { | |
| | | \ | |
| | | static inline T load (const __TBB_atomic T& location) { | |
| | | \ | |
| | | T result; | |
| | | \ | |
| | | __asm__ __volatile__(ldx " %[res],0(%[ptr])" | |
| | | \ | |
| | | : [res]"=r"(result) | |
| | | \ | |
| | | : [ptr]"b"(&location) /* cannot use regist | |
| | | er 0 here */ \ | |
| | | , "m"(location) | |
| | | \ | |
| | | ); /*(no compiler fence)*/ /*(cr0 not affe | |
| | | cted)*/ \ | |
| | | return result; | |
| | | \ | |
| | | } | |
| | | \ | |
| | | static inline void store (__TBB_atomic T &location, T value) { | |
| | | \ | |
| | | __asm__ __volatile__(stx " %[val],0(%[ptr])" | |
| | | \ | |
| | | : "=m"(location) | |
| | | \ | |
| | | : [ptr]"b"(&location) /* cannot use regist | |
| | | er 0 here */ \ | |
| | | , [val]"r"(value) | |
| | | \ | |
| | | ); /*(no compiler fence)*/ /*(cr0 not affe | |
| | | cted)*/ \ | |
| | | } | |
| | | \ | |
| | | }; | |
| | | | |
| | | namespace tbb { | |
| | | namespace internal { | |
| | | __TBB_MACHINE_DEFINE_LOAD_STORE(1,"lbz","stb","cmpw") | |
| | | __TBB_MACHINE_DEFINE_LOAD_STORE(2,"lhz","sth","cmpw") | |
| | | __TBB_MACHINE_DEFINE_LOAD_STORE(4,"lwz","stw","cmpw") | |
| | | | |
| | | #if __TBB_WORDSIZE==8 | |
| | | | |
| | | __TBB_MACHINE_DEFINE_LOAD_STORE(8,"ld" ,"std","cmpd") | |
| | | | |
| | | #elif __TBB_64BIT_ATOMICS /* && __TBB_WORDSIZE==4 */ | |
| | | | |
| | | template <typename T> | |
| | | struct machine_load_store<T,8> { | |
| | | static inline T load_with_acquire(const volatile T& location) { | |
| | | T result; | |
| | | T result_register; // dummy variable to allocate a register | |
| | | __asm__ __volatile__("ld %[res],0(%[ptr])\n\t" | |
| | | "std %[res],%[resm]\n" | |
| | | "0:\n\t" | |
| | | "cmpd %[res],%[res]\n\t" | |
| | | "bne- 0b\n\t" | |
| | | "isync" | |
| | | : [resm]"=m"(result) | |
| | | , [res]"=&r"(result_register) | |
| | | : [ptr]"b"(&location) /* cannot use regist | |
| | | er 0 here */ | |
| | | , "m"(location) /* redundant with "m | |
| | | emory" */ | |
| | | : "memory" /* compiler acquire | |
| | | fence */ | |
| | | , "cr0" /* clobbered by cmpd | |
| | | */); | |
| | | return result; | |
| | | } | |
| | | | |
| | | static inline void store_with_release(volatile T &location, T value | |
| | | ) { | |
| | | T value_register; // dummy variable to allocate a register | |
| | | __asm__ __volatile__("lwsync\n\t" | |
| | | "ld %[val],%[valm]\n\t" | |
| | | "std %[val],0(%[ptr])" | |
| | | : "=m"(location) /* redundant with "m | |
| | | emory" */ | |
| | | , [val]"=&r"(value_register) | |
| | | : [ptr]"b"(&location) /* cannot use regist | |
| | | er 0 here */ | |
| | | , [valm]"m"(value) | |
| | | : "memory"/*compiler release fence*/ /*(cr | |
| | | 0 not affected)*/); | |
| | | } | |
| | | }; | |
| | | | |
| | | struct machine_load_store_relaxed<T,8> { | |
| | | static inline T load (const volatile T& location) { | |
| | | T result; | |
| | | T result_register; // dummy variable to allocate a register | |
| | | __asm__ __volatile__("ld %[res],0(%[ptr])\n\t" | |
| | | "std %[res],%[resm]" | |
| | | : [resm]"=m"(result) | |
| | | , [res]"=&r"(result_register) | |
| | | : [ptr]"b"(&location) /* cannot use regist | |
| | | er 0 here */ | |
| | | , "m"(location) | |
| | | ); /*(no compiler fence)*/ /*(cr0 not affe | |
| | | cted)*/ | |
| | | return result; | |
| | | } | |
| | | | |
| | | static inline void store (volatile T &location, T value) { | |
| | | T value_register; // dummy variable to allocate a register | |
| | | __asm__ __volatile__("ld %[val],%[valm]\n\t" | |
| | | "std %[val],0(%[ptr])" | |
| | | : "=m"(location) | |
| | | , [val]"=&r"(value_register) | |
| | | : [ptr]"b"(&location) /* cannot use regist | |
| | | er 0 here */ | |
| | | , [valm]"m"(value) | |
| | | ); /*(no compiler fence)*/ /*(cr0 not affe | |
| | | cted)*/ | |
| | | } | |
| | | }; | |
| | | #define __TBB_machine_load_store_relaxed_8 | |
| | | | |
| | | #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */ | |
| | | | |
| | | }} // namespaces internal, tbb | |
| | | | |
| | | #undef __TBB_MACHINE_DEFINE_LOAD_STORE | |
| | | | |
| | | #define __TBB_USE_GENERIC_PART_WORD_CAS 1 | |
| | | #define __TBB_USE_GENERIC_FETCH_ADD 1 | |
| | | #define __TBB_USE_GENERIC_FETCH_STORE 1 | |
| | | | |
| | | #define __TBB_control_consistency_helper() __asm__ __volatile__("isync": : | |
| | | :"memory") | |
| | | #define __TBB_full_memory_fence() __asm__ __volatile__( "sync": : | |
| | | :"memory") | |
| | | | |
| | | static inline intptr_t __TBB_machine_lg( uintptr_t x ) { | |
| | | // cntlzd/cntlzw starts counting at 2^63/2^31 (ignoring any higher-orde | |
| | | r bits), and does not affect cr0 | |
| | | #if __TBB_WORDSIZE==8 | |
| | | __asm__ __volatile__ ("cntlzd %0,%0" : "+r"(x)); | |
| | | return 63-static_cast<intptr_t>(x); | |
| #else | | #else | |
|
| #define __TBB_WORDSIZE 4 | | __asm__ __volatile__ ("cntlzw %0,%0" : "+r"(x)); | |
| #define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp4(P,V,C) | | return 31-static_cast<intptr_t>(x); | |
| #endif | | #endif | |
|
| | | } | |
| | | #define __TBB_Log2(V) __TBB_machine_lg(V) | |
| | | | |
|
| #define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4(P,V,C) | | // Assumes implicit alignment for any 32-bit value | |
| #define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8(P,V,C) | | typedef uint32_t __TBB_Flag; | |
| #define __TBB_Yield() sched_yield() | | #define __TBB_Flag __TBB_Flag | |
| #define __TBB_rel_acq_fence() __asm__ __volatile__("lwsync": : :"memory") | | | |
| #define __TBB_release_consistency_helper() __TBB_rel_acq_fence() | | inline bool __TBB_machine_trylockbyte( __TBB_atomic __TBB_Flag &flag ) { | |
| | | return __TBB_machine_cmpswp4(&flag,1,0)==0; | |
| | | } | |
| | | #define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P) | |
| | | | |
End of changes. 12 change blocks. |
| 34 lines changed or deleted | | 341 lines changed or added | |
|
| parallel_for.h | | parallel_for.h | |
| /* | | /* | |
|
| Copyright 2005-2010 Intel Corporation. All Rights Reserved. | | Copyright 2005-2011 Intel Corporation. All Rights Reserved. | |
| | | | |
| This file is part of Threading Building Blocks. | | This file is part of Threading Building Blocks. | |
| | | | |
| Threading Building Blocks is free software; you can redistribute it | | Threading Building Blocks is free software; you can redistribute it | |
| and/or modify it under the terms of the GNU General Public License | | and/or modify it under the terms of the GNU General Public License | |
| version 2 as published by the Free Software Foundation. | | version 2 as published by the Free Software Foundation. | |
| | | | |
| Threading Building Blocks is distributed in the hope that it will be | | Threading Building Blocks is distributed in the hope that it will be | |
| useful, but WITHOUT ANY WARRANTY; without even the implied warranty | | useful, but WITHOUT ANY WARRANTY; without even the implied warranty | |
| of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| | | | |
| skipping to change at line 32 | | skipping to change at line 32 | |
| this file and link it with other files to produce an executable, this | | this file and link it with other files to produce an executable, this | |
| file does not by itself cause the resulting executable to be covered by | | file does not by itself cause the resulting executable to be covered by | |
| the GNU General Public License. This exception does not however | | the GNU General Public License. This exception does not however | |
| invalidate any other reasons why the executable file might be covered b
y | | invalidate any other reasons why the executable file might be covered b
y | |
| the GNU General Public License. | | the GNU General Public License. | |
| */ | | */ | |
| | | | |
| #ifndef __TBB_parallel_for_H | | #ifndef __TBB_parallel_for_H | |
| #define __TBB_parallel_for_H | | #define __TBB_parallel_for_H | |
| | | | |
|
| | | #include <new> | |
| #include "task.h" | | #include "task.h" | |
| #include "partitioner.h" | | #include "partitioner.h" | |
| #include "blocked_range.h" | | #include "blocked_range.h" | |
|
| #include <new> | | | |
| #include "tbb_exception.h" | | #include "tbb_exception.h" | |
| | | | |
| namespace tbb { | | namespace tbb { | |
| | | | |
|
| | | namespace interface6 { | |
| //! @cond INTERNAL | | //! @cond INTERNAL | |
| namespace internal { | | namespace internal { | |
| | | | |
| //! Task type used in parallel_for | | //! Task type used in parallel_for | |
| /** @ingroup algorithms */ | | /** @ingroup algorithms */ | |
| template<typename Range, typename Body, typename Partitioner> | | template<typename Range, typename Body, typename Partitioner> | |
| class start_for: public task { | | class start_for: public task { | |
| Range my_range; | | Range my_range; | |
| const Body my_body; | | const Body my_body; | |
|
| typename Partitioner::partition_type my_partition; | | typename Partitioner::task_partition_type my_partition; | |
| /*override*/ task* execute(); | | /*override*/ task* execute(); | |
| | | | |
|
| | | public: | |
| //! Constructor for root task. | | //! Constructor for root task. | |
| start_for( const Range& range, const Body& body, Partitioner& parti
tioner ) : | | start_for( const Range& range, const Body& body, Partitioner& parti
tioner ) : | |
| my_range(range), | | my_range(range), | |
| my_body(body), | | my_body(body), | |
| my_partition(partitioner) | | my_partition(partitioner) | |
| { | | { | |
| } | | } | |
| //! Splitting constructor used to generate children. | | //! Splitting constructor used to generate children. | |
|
| /** this becomes left child. Newly constructed object is right chi
ld. */ | | /** parent_ becomes left child. Newly constructed object is right
child. */ | |
| start_for( start_for& parent_, split ) : | | start_for( start_for& parent_, split ) : | |
| my_range(parent_.my_range,split()), | | my_range(parent_.my_range,split()), | |
| my_body(parent_.my_body), | | my_body(parent_.my_body), | |
|
| | | my_partition(parent_.my_partition, split()) | |
| | | { | |
| | | my_partition.set_affinity(*this); | |
| | | } | |
| | | //! Construct right child from the given range as response to the d | |
| | | emand. | |
| | | /** parent_ remains left child. Newly constructed object is right | |
| | | child. */ | |
| | | start_for( start_for& parent_, const Range& r, depth_t d ) : | |
| | | my_range(r), | |
| | | my_body(parent_.my_body), | |
| my_partition(parent_.my_partition,split()) | | my_partition(parent_.my_partition,split()) | |
| { | | { | |
| my_partition.set_affinity(*this); | | my_partition.set_affinity(*this); | |
|
| | | my_partition.align_depth( d ); | |
| } | | } | |
| //! Update affinity info, if any. | | //! Update affinity info, if any. | |
| /*override*/ void note_affinity( affinity_id id ) { | | /*override*/ void note_affinity( affinity_id id ) { | |
| my_partition.note_affinity( id ); | | my_partition.note_affinity( id ); | |
| } | | } | |
|
| public: | | | |
| static void run( const Range& range, const Body& body, const Parti
tioner& partitioner ) { | | static void run( const Range& range, const Body& body, const Parti
tioner& partitioner ) { | |
| if( !range.empty() ) { | | if( !range.empty() ) { | |
| #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP | | #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP | |
| start_for& a = *new(task::allocate_root()) start_for(range,
body,const_cast<Partitioner&>(partitioner)); | | start_for& a = *new(task::allocate_root()) start_for(range,
body,const_cast<Partitioner&>(partitioner)); | |
| #else | | #else | |
| // Bound context prevents exceptions from body to affect ne
sting or sibling algorithms, | | // Bound context prevents exceptions from body to affect ne
sting or sibling algorithms, | |
| // and allows users to handle exceptions safely by wrapping
parallel_for in the try-block. | | // and allows users to handle exceptions safely by wrapping
parallel_for in the try-block. | |
| task_group_context context; | | task_group_context context; | |
| start_for& a = *new(task::allocate_root(context)) start_for
(range,body,const_cast<Partitioner&>(partitioner)); | | start_for& a = *new(task::allocate_root(context)) start_for
(range,body,const_cast<Partitioner&>(partitioner)); | |
| #endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */ | | #endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */ | |
| | | | |
| skipping to change at line 94 | | skipping to change at line 105 | |
| } | | } | |
| } | | } | |
| #if __TBB_TASK_GROUP_CONTEXT | | #if __TBB_TASK_GROUP_CONTEXT | |
| static void run( const Range& range, const Body& body, const Parti
tioner& partitioner, task_group_context& context ) { | | static void run( const Range& range, const Body& body, const Parti
tioner& partitioner, task_group_context& context ) { | |
| if( !range.empty() ) { | | if( !range.empty() ) { | |
| start_for& a = *new(task::allocate_root(context)) start_for
(range,body,const_cast<Partitioner&>(partitioner)); | | start_for& a = *new(task::allocate_root(context)) start_for
(range,body,const_cast<Partitioner&>(partitioner)); | |
| task::spawn_root_and_wait(a); | | task::spawn_root_and_wait(a); | |
| } | | } | |
| } | | } | |
| #endif /* __TBB_TASK_GROUP_CONTEXT */ | | #endif /* __TBB_TASK_GROUP_CONTEXT */ | |
|
| | | //! create a continuation task, serve as callback for partitioner | |
| | | flag_task *create_continuation() { | |
| | | return new( allocate_continuation() ) flag_task(); | |
| | | } | |
| | | //! Run body for range | |
| | | void run_body( Range &r ) { my_body( r ); } | |
| }; | | }; | |
| | | | |
| template<typename Range, typename Body, typename Partitioner> | | template<typename Range, typename Body, typename Partitioner> | |
| task* start_for<Range,Body,Partitioner>::execute() { | | task* start_for<Range,Body,Partitioner>::execute() { | |
|
| if( !my_range.is_divisible() || my_partition.should_execute_range(* | | my_partition.check_being_stolen( *this ); | |
| this) ) { | | my_partition.execute(*this, my_range); | |
| my_body( my_range ); | | return NULL; | |
| return my_partition.continue_after_execute_range(); | | | |
| } else { | | | |
| empty_task& c = *new( this->allocate_continuation() ) empty_tas | | | |
| k; | | | |
| recycle_as_child_of(c); | | | |
| c.set_ref_count(2); | | | |
| bool delay = my_partition.decide_whether_to_delay(); | | | |
| start_for& b = *new( c.allocate_child() ) start_for(*this,split | | | |
| ()); | | | |
| my_partition.spawn_or_delay(delay,b); | | | |
| return this; | | | |
| } | | | |
| } | | } | |
| } // namespace internal | | } // namespace internal | |
| //! @endcond | | //! @endcond | |
|
| | | } // namespace interfaceX | |
| | | | |
| | | //! @cond INTERNAL | |
| | | namespace internal { | |
| | | using interface6::internal::start_for; | |
| | | | |
| | | //! Calls the function with values from range [begin, end) with a step | |
| | | provided | |
| | | template<typename Function, typename Index> | |
| | | class parallel_for_body : internal::no_assign { | |
| | | const Function &my_func; | |
| | | const Index my_begin; | |
| | | const Index my_step; | |
| | | public: | |
| | | parallel_for_body( const Function& _func, Index& _begin, Index& _st | |
| | | ep) | |
| | | : my_func(_func), my_begin(_begin), my_step(_step) {} | |
| | | | |
| | | void operator()( tbb::blocked_range<Index>& r ) const { | |
| | | for( Index i = r.begin(), k = my_begin + i * my_step; i < r.en | |
| | | d(); i++, k = k + my_step) | |
| | | my_func( k ); | |
| | | } | |
| | | }; | |
| | | } // namespace internal | |
| | | //! @endcond | |
| | | | |
| // Requirements on Range concept are documented in blocked_range.h | | // Requirements on Range concept are documented in blocked_range.h | |
| | | | |
| /** \page parallel_for_body_req Requirements on parallel_for body | | /** \page parallel_for_body_req Requirements on parallel_for body | |
| Class \c Body implementing the concept of parallel_for body must define
: | | Class \c Body implementing the concept of parallel_for body must define
: | |
| - \code Body::Body( const Body& ); \endcode Copy constr
uctor | | - \code Body::Body( const Body& ); \endcode Copy constr
uctor | |
| - \code Body::~Body(); \endcode Destructor | | - \code Body::~Body(); \endcode Destructor | |
| - \code void Body::operator()( Range& r ) const; \endcode Function ca
ll operator applying the body to range \c r. | | - \code void Body::operator()( Range& r ) const; \endcode Function ca
ll operator applying the body to range \c r. | |
| **/ | | **/ | |
| | | | |
| | | | |
| skipping to change at line 179 | | skipping to change at line 210 | |
| | | | |
| //! Parallel iteration over range with affinity_partitioner and user-suppli
ed context. | | //! Parallel iteration over range with affinity_partitioner and user-suppli
ed context. | |
| /** @ingroup algorithms **/ | | /** @ingroup algorithms **/ | |
| template<typename Range, typename Body> | | template<typename Range, typename Body> | |
| void parallel_for( const Range& range, const Body& body, affinity_partition
er& partitioner, task_group_context& context ) { | | void parallel_for( const Range& range, const Body& body, affinity_partition
er& partitioner, task_group_context& context ) { | |
| internal::start_for<Range,Body,affinity_partitioner>::run(range,body,pa
rtitioner, context); | | internal::start_for<Range,Body,affinity_partitioner>::run(range,body,pa
rtitioner, context); | |
| } | | } | |
| #endif /* __TBB_TASK_GROUP_CONTEXT */ | | #endif /* __TBB_TASK_GROUP_CONTEXT */ | |
| //@} | | //@} | |
| | | | |
|
| //! @cond INTERNAL | | | |
| namespace internal { | | | |
| //! Calls the function with values from range [begin, end) with a step | | | |
| provided | | | |
| template<typename Function, typename Index> | | | |
| class parallel_for_body : internal::no_assign { | | | |
| const Function &my_func; | | | |
| const Index my_begin; | | | |
| const Index my_step; | | | |
| public: | | | |
| parallel_for_body( const Function& _func, Index& _begin, Index& _step) | | | |
| : my_func(_func), my_begin(_begin), my_step(_step) {} | | | |
| | | | |
| void operator()( tbb::blocked_range<Index>& r ) const { | | | |
| for( Index i = r.begin(), k = my_begin + i * my_step; i < r.end(); | | | |
| i++, k = k + my_step) | | | |
| my_func( k ); | | | |
| } | | | |
| }; | | | |
| } // namespace internal | | | |
| //! @endcond | | | |
| | | | |
| namespace strict_ppl { | | namespace strict_ppl { | |
| | | | |
| //@{ | | //@{ | |
| //! Parallel iteration over a range of integers with a step provided | | //! Parallel iteration over a range of integers with a step provided | |
| template <typename Index, typename Function> | | template <typename Index, typename Function> | |
| void parallel_for(Index first, Index last, Index step, const Function& f) { | | void parallel_for(Index first, Index last, Index step, const Function& f) { | |
|
| tbb::task_group_context context; | | | |
| parallel_for(first, last, step, f, context); | | | |
| } | | | |
| template <typename Index, typename Function> | | | |
| void parallel_for(Index first, Index last, Index step, const Function& f, t | | | |
| bb::task_group_context &context) { | | | |
| if (step <= 0 ) | | if (step <= 0 ) | |
| internal::throw_exception(internal::eid_nonpositive_step); // throw
s std::invalid_argument | | internal::throw_exception(internal::eid_nonpositive_step); // throw
s std::invalid_argument | |
| else if (last > first) { | | else if (last > first) { | |
|
| // Above "else" is necessary to prevent "potential divide by zero" | | // Above "else" avoids "potential divide by zero" warning on some p | |
| warning | | latforms | |
| Index end = (last - first) / step; | | Index end = (last - first - Index(1)) / step + Index(1); | |
| if (first + end * step < last) end++; | | | |
| tbb::blocked_range<Index> range(static_cast<Index>(0), end); | | tbb::blocked_range<Index> range(static_cast<Index>(0), end); | |
| internal::parallel_for_body<Function, Index> body(f, first, step); | | internal::parallel_for_body<Function, Index> body(f, first, step); | |
|
| tbb::parallel_for(range, body, tbb::auto_partitioner(), context); | | tbb::parallel_for(range, body, tbb::auto_partitioner()); | |
| } | | } | |
| } | | } | |
| //! Parallel iteration over a range of integers with a default step value | | //! Parallel iteration over a range of integers with a default step value | |
| template <typename Index, typename Function> | | template <typename Index, typename Function> | |
| void parallel_for(Index first, Index last, const Function& f) { | | void parallel_for(Index first, Index last, const Function& f) { | |
|
| tbb::task_group_context context; | | parallel_for(first, last, static_cast<Index>(1), f); | |
| parallel_for(first, last, static_cast<Index>(1), f, context); | | } | |
| | | | |
| | | #if __TBB_TASK_GROUP_CONTEXT | |
| | | //! Parallel iteration over a range of integers with explicit step and task | |
| | | group context | |
| | | template <typename Index, typename Function> | |
| | | void parallel_for(Index first, Index last, Index step, const Function& f, t | |
| | | bb::task_group_context &context) { | |
| | | if (step <= 0 ) | |
| | | internal::throw_exception(internal::eid_nonpositive_step); // throw | |
| | | s std::invalid_argument | |
| | | else if (last > first) { | |
| | | // Above "else" avoids "potential divide by zero" warning on some p | |
| | | latforms | |
| | | Index end = (last - first - Index(1)) / step + Index(1); | |
| | | tbb::blocked_range<Index> range(static_cast<Index>(0), end); | |
| | | internal::parallel_for_body<Function, Index> body(f, first, step); | |
| | | tbb::parallel_for(range, body, tbb::auto_partitioner(), context); | |
| | | } | |
| } | | } | |
|
| | | //! Parallel iteration over a range of integers with a default step value a
nd explicit task group context | |
| template <typename Index, typename Function> | | template <typename Index, typename Function> | |
| void parallel_for(Index first, Index last, const Function& f, tbb::task_gro
up_context &context) { | | void parallel_for(Index first, Index last, const Function& f, tbb::task_gro
up_context &context) { | |
| parallel_for(first, last, static_cast<Index>(1), f, context); | | parallel_for(first, last, static_cast<Index>(1), f, context); | |
| } | | } | |
|
| | | #endif /* __TBB_TASK_GROUP_CONTEXT */ | |
| //@} | | //@} | |
| | | | |
| } // namespace strict_ppl | | } // namespace strict_ppl | |
| | | | |
| using strict_ppl::parallel_for; | | using strict_ppl::parallel_for; | |
| | | | |
| } // namespace tbb | | } // namespace tbb | |
| | | | |
|
| | | #if TBB_PREVIEW_SERIAL_SUBSET | |
| | | #define __TBB_NORMAL_EXECUTION | |
| | | #include "../serial/tbb/parallel_for.h" | |
| | | #undef __TBB_NORMAL_EXECUTION | |
| | | #endif | |
| | | | |
| #endif /* __TBB_parallel_for_H */ | | #endif /* __TBB_parallel_for_H */ | |
| | | | |
End of changes. 21 change blocks. |
| 56 lines changed or deleted | | 85 lines changed or added | |
|
| parallel_reduce.h | | parallel_reduce.h | |
| /* | | /* | |
|
| Copyright 2005-2010 Intel Corporation. All Rights Reserved. | | Copyright 2005-2011 Intel Corporation. All Rights Reserved. | |
| | | | |
| This file is part of Threading Building Blocks. | | This file is part of Threading Building Blocks. | |
| | | | |
| Threading Building Blocks is free software; you can redistribute it | | Threading Building Blocks is free software; you can redistribute it | |
| and/or modify it under the terms of the GNU General Public License | | and/or modify it under the terms of the GNU General Public License | |
| version 2 as published by the Free Software Foundation. | | version 2 as published by the Free Software Foundation. | |
| | | | |
| Threading Building Blocks is distributed in the hope that it will be | | Threading Building Blocks is distributed in the hope that it will be | |
| useful, but WITHOUT ANY WARRANTY; without even the implied warranty | | useful, but WITHOUT ANY WARRANTY; without even the implied warranty | |
| of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| | | | |
| skipping to change at line 32 | | skipping to change at line 32 | |
| this file and link it with other files to produce an executable, this | | this file and link it with other files to produce an executable, this | |
| file does not by itself cause the resulting executable to be covered by | | file does not by itself cause the resulting executable to be covered by | |
| the GNU General Public License. This exception does not however | | the GNU General Public License. This exception does not however | |
| invalidate any other reasons why the executable file might be covered b
y | | invalidate any other reasons why the executable file might be covered b
y | |
| the GNU General Public License. | | the GNU General Public License. | |
| */ | | */ | |
| | | | |
| #ifndef __TBB_parallel_reduce_H | | #ifndef __TBB_parallel_reduce_H | |
| #define __TBB_parallel_reduce_H | | #define __TBB_parallel_reduce_H | |
| | | | |
|
| | | #include <new> | |
| #include "task.h" | | #include "task.h" | |
| #include "aligned_space.h" | | #include "aligned_space.h" | |
| #include "partitioner.h" | | #include "partitioner.h" | |
|
| #include <new> | | #include "tbb_profiling.h" | |
| | | | |
| namespace tbb { | | namespace tbb { | |
| | | | |
|
| | | namespace interface6 { | |
| //! @cond INTERNAL | | //! @cond INTERNAL | |
| namespace internal { | | namespace internal { | |
| | | | |
|
| //! ITT instrumented routine that stores src into location pointed to b | | using namespace tbb::internal; | |
| y dst. | | | |
| void __TBB_EXPORTED_FUNC itt_store_pointer_with_release_v3( void* dst, | | | |
| void* src ); | | | |
| | | | |
| //! ITT instrumented routine that loads pointer from location pointed t | | | |
| o by src. | | | |
| void* __TBB_EXPORTED_FUNC itt_load_pointer_with_acquire_v3( const void* | | | |
| src ); | | | |
| | | | |
| template<typename T> inline void parallel_reduce_store_body( T*& dst, T | | | |
| * src ) { | | | |
| #if TBB_USE_THREADING_TOOLS | | | |
| itt_store_pointer_with_release_v3(&dst,src); | | | |
| #else | | | |
| __TBB_store_with_release(dst,src); | | | |
| #endif /* TBB_USE_THREADING_TOOLS */ | | | |
| } | | | |
| | | | |
| template<typename T> inline T* parallel_reduce_load_body( T*& src ) { | | | |
| #if TBB_USE_THREADING_TOOLS | | | |
| return static_cast<T*>(itt_load_pointer_with_acquire_v3(&src)); | | | |
| #else | | | |
| return __TBB_load_with_acquire(src); | | | |
| #endif /* TBB_USE_THREADING_TOOLS */ | | | |
| } | | | |
| | | | |
| //! 0 if root, 1 if a left child, 2 if a right child. | | //! 0 if root, 1 if a left child, 2 if a right child. | |
| /** Represented as a char, not enum, for compactness. */ | | /** Represented as a char, not enum, for compactness. */ | |
| typedef char reduction_context; | | typedef char reduction_context; | |
| | | | |
| //! Task type use to combine the partial results of parallel_reduce. | | //! Task type use to combine the partial results of parallel_reduce. | |
| /** @ingroup algorithms */ | | /** @ingroup algorithms */ | |
| template<typename Body> | | template<typename Body> | |
|
| class finish_reduce: public task { | | class finish_reduce: public flag_task { | |
| //! Pointer to body, or NULL if the left child has not yet finished
. | | //! Pointer to body, or NULL if the left child has not yet finished
. | |
|
| Body* my_body; | | | |
| bool has_right_zombie; | | bool has_right_zombie; | |
| const reduction_context my_context; | | const reduction_context my_context; | |
|
| | | Body* my_body; | |
| aligned_space<Body,1> zombie_space; | | aligned_space<Body,1> zombie_space; | |
|
| finish_reduce( char context_ ) : | | finish_reduce( reduction_context context_ ) : | |
| my_body(NULL), | | has_right_zombie(false), // TODO: substitute by flag_task::chil | |
| has_right_zombie(false), | | d_stolen? | |
| my_context(context_) | | my_context(context_), | |
| | | my_body(NULL) | |
| { | | { | |
| } | | } | |
| task* execute() { | | task* execute() { | |
| if( has_right_zombie ) { | | if( has_right_zombie ) { | |
| // Right child was stolen. | | // Right child was stolen. | |
| Body* s = zombie_space.begin(); | | Body* s = zombie_space.begin(); | |
| my_body->join( *s ); | | my_body->join( *s ); | |
| s->~Body(); | | s->~Body(); | |
| } | | } | |
|
| if( my_context==1 ) | | if( my_context==1 ) // left child | |
| parallel_reduce_store_body( static_cast<finish_reduce*>(par | | itt_store_word_with_release( static_cast<finish_reduce*>(pa | |
| ent())->my_body, my_body ); | | rent())->my_body, my_body ); | |
| return NULL; | | return NULL; | |
| } | | } | |
| template<typename Range,typename Body_, typename Partitioner> | | template<typename Range,typename Body_, typename Partitioner> | |
| friend class start_reduce; | | friend class start_reduce; | |
| }; | | }; | |
| | | | |
| //! Task type used to split the work of parallel_reduce. | | //! Task type used to split the work of parallel_reduce. | |
| /** @ingroup algorithms */ | | /** @ingroup algorithms */ | |
| template<typename Range, typename Body, typename Partitioner> | | template<typename Range, typename Body, typename Partitioner> | |
| class start_reduce: public task { | | class start_reduce: public task { | |
| typedef finish_reduce<Body> finish_type; | | typedef finish_reduce<Body> finish_type; | |
| Body* my_body; | | Body* my_body; | |
| Range my_range; | | Range my_range; | |
|
| typename Partitioner::partition_type my_partition; | | typename Partitioner::task_partition_type my_partition; | |
| reduction_context my_context; | | reduction_context my_context; // TODO: factor out into start_reduce | |
| | | _base | |
| /*override*/ task* execute(); | | /*override*/ task* execute(); | |
| template<typename Body_> | | template<typename Body_> | |
| friend class finish_reduce; | | friend class finish_reduce; | |
| | | | |
|
| | | public: | |
| //! Constructor used for root task | | //! Constructor used for root task | |
| start_reduce( const Range& range, Body* body, Partitioner& partitio
ner ) : | | start_reduce( const Range& range, Body* body, Partitioner& partitio
ner ) : | |
| my_body(body), | | my_body(body), | |
| my_range(range), | | my_range(range), | |
| my_partition(partitioner), | | my_partition(partitioner), | |
| my_context(0) | | my_context(0) | |
| { | | { | |
| } | | } | |
| //! Splitting constructor used to generate children. | | //! Splitting constructor used to generate children. | |
|
| /** this becomes left child. Newly constructed object is right chi
ld. */ | | /** parent_ becomes left child. Newly constructed object is right
child. */ | |
| start_reduce( start_reduce& parent_, split ) : | | start_reduce( start_reduce& parent_, split ) : | |
| my_body(parent_.my_body), | | my_body(parent_.my_body), | |
| my_range(parent_.my_range,split()), | | my_range(parent_.my_range,split()), | |
| my_partition(parent_.my_partition,split()), | | my_partition(parent_.my_partition,split()), | |
| my_context(2) | | my_context(2) | |
| { | | { | |
| my_partition.set_affinity(*this); | | my_partition.set_affinity(*this); | |
| parent_.my_context = 1; | | parent_.my_context = 1; | |
| } | | } | |
|
| | | //! Construct right child from the given range as response to the d | |
| | | emand. | |
| | | /** parent_ remains left child. Newly constructed object is right | |
| | | child. */ | |
| | | start_reduce( start_reduce& parent_, const Range& r, depth_t d ) : | |
| | | my_body(parent_.my_body), | |
| | | my_range(r), | |
| | | my_partition(parent_.my_partition,split()), | |
| | | my_context(2) // right leaf mark | |
| | | { | |
| | | my_partition.set_affinity(*this); | |
| | | my_partition.align_depth( d ); | |
| | | parent_.my_context = 1; // left leaf mark | |
| | | } | |
| //! Update affinity info, if any | | //! Update affinity info, if any | |
| /*override*/ void note_affinity( affinity_id id ) { | | /*override*/ void note_affinity( affinity_id id ) { | |
| my_partition.note_affinity( id ); | | my_partition.note_affinity( id ); | |
| } | | } | |
|
| | | | |
| public: | | | |
| static void run( const Range& range, Body& body, Partitioner& parti
tioner ) { | | static void run( const Range& range, Body& body, Partitioner& parti
tioner ) { | |
| if( !range.empty() ) { | | if( !range.empty() ) { | |
| #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP | | #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP | |
| task::spawn_root_and_wait( *new(task::allocate_root()) star
t_reduce(range,&body,partitioner) ); | | task::spawn_root_and_wait( *new(task::allocate_root()) star
t_reduce(range,&body,partitioner) ); | |
| #else | | #else | |
| // Bound context prevents exceptions from body to affect ne
sting or sibling algorithms, | | // Bound context prevents exceptions from body to affect ne
sting or sibling algorithms, | |
| // and allows users to handle exceptions safely by wrapping
parallel_for in the try-block. | | // and allows users to handle exceptions safely by wrapping
parallel_for in the try-block. | |
| task_group_context context; | | task_group_context context; | |
| task::spawn_root_and_wait( *new(task::allocate_root(context
)) start_reduce(range,&body,partitioner) ); | | task::spawn_root_and_wait( *new(task::allocate_root(context
)) start_reduce(range,&body,partitioner) ); | |
| #endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */ | | #endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */ | |
| } | | } | |
| } | | } | |
| #if __TBB_TASK_GROUP_CONTEXT | | #if __TBB_TASK_GROUP_CONTEXT | |
| static void run( const Range& range, Body& body, Partitioner& parti
tioner, task_group_context& context ) { | | static void run( const Range& range, Body& body, Partitioner& parti
tioner, task_group_context& context ) { | |
| if( !range.empty() ) | | if( !range.empty() ) | |
| task::spawn_root_and_wait( *new(task::allocate_root(context
)) start_reduce(range,&body,partitioner) ); | | task::spawn_root_and_wait( *new(task::allocate_root(context
)) start_reduce(range,&body,partitioner) ); | |
| } | | } | |
| #endif /* __TBB_TASK_GROUP_CONTEXT */ | | #endif /* __TBB_TASK_GROUP_CONTEXT */ | |
|
| | | //! create a continuation task, serve as callback for partitioner | |
| | | finish_type *create_continuation() { | |
| | | return new( allocate_continuation() ) finish_type(my_context); | |
| | | } | |
| | | //! Run body for range | |
| | | void run_body( Range &r ) { (*my_body)( r ); } | |
| }; | | }; | |
|
| | | | |
| template<typename Range, typename Body, typename Partitioner> | | template<typename Range, typename Body, typename Partitioner> | |
| task* start_reduce<Range,Body,Partitioner>::execute() { | | task* start_reduce<Range,Body,Partitioner>::execute() { | |
|
| if( my_context==2 ) { | | my_partition.check_being_stolen( *this ); | |
| finish_type* p = static_cast<finish_type*>(parent() ); | | if( my_context==2 ) { // right child | |
| if( !parallel_reduce_load_body(p->my_body) ) { | | finish_type* parent_ptr = static_cast<finish_type*>(parent()); | |
| my_body = new( p->zombie_space.begin() ) Body(*my_body,spli | | if( !itt_load_word_with_acquire(parent_ptr->my_body) ) { // TOD | |
| t()); | | O: replace by is_stolen_task() or by parent_ptr->ref_count() == 2??? | |
| p->has_right_zombie = true; | | my_body = new( parent_ptr->zombie_space.begin() ) Body(*my_ | |
| | | body,split()); | |
| | | parent_ptr->has_right_zombie = true; | |
| } | | } | |
|
| | | } else __TBB_ASSERT(my_context==0,0);// because left leaf spawns ri | |
| | | ght leafs without recycling | |
| | | my_partition.execute(*this, my_range); | |
| | | if( my_context==1 ) { | |
| | | finish_type* parent_ptr = static_cast<finish_type*>(parent()); | |
| | | __TBB_ASSERT(my_body!=parent_ptr->zombie_space.begin(),0); | |
| | | itt_store_word_with_release(parent_ptr->my_body, my_body ); | |
| | | } | |
| | | return NULL; | |
| | | } | |
| | | | |
| | | #if TBB_PREVIEW_DETERMINISTIC_REDUCE | |
| | | //! Task type use to combine the partial results of parallel_determinis | |
| | | tic_reduce. | |
| | | /** @ingroup algorithms */ | |
| | | template<typename Body> | |
| | | class finish_deterministic_reduce: public task { | |
| | | Body &my_left_body; | |
| | | Body my_right_body; | |
| | | | |
| | | finish_deterministic_reduce( Body &body ) : | |
| | | my_left_body( body ), | |
| | | my_right_body( body, split() ) | |
| | | { | |
| | | } | |
| | | task* execute() { | |
| | | my_left_body.join( my_right_body ); | |
| | | return NULL; | |
| } | | } | |
|
| if( !my_range.is_divisible() || my_partition.should_execute_range(* | | template<typename Range,typename Body_> | |
| this) ) { | | friend class start_deterministic_reduce; | |
| (*my_body)( my_range ); | | }; | |
| if( my_context==1 ) | | | |
| parallel_reduce_store_body(static_cast<finish_type*>(parent | | //! Task type used to split the work of parallel_deterministic_reduce. | |
| ())->my_body, my_body ); | | /** @ingroup algorithms */ | |
| return my_partition.continue_after_execute_range(); | | template<typename Range, typename Body> | |
| | | class start_deterministic_reduce: public task { | |
| | | typedef finish_deterministic_reduce<Body> finish_type; | |
| | | Body &my_body; | |
| | | Range my_range; | |
| | | /*override*/ task* execute(); | |
| | | | |
| | | //! Constructor used for root task | |
| | | start_deterministic_reduce( const Range& range, Body& body ) : | |
| | | my_body( body ), | |
| | | my_range( range ) | |
| | | { | |
| | | } | |
| | | //! Splitting constructor used to generate children. | |
| | | /** parent_ becomes left child. Newly constructed object is right | |
| | | child. */ | |
| | | start_deterministic_reduce( start_deterministic_reduce& parent_, fi | |
| | | nish_type& c ) : | |
| | | my_body( c.my_right_body ), | |
| | | my_range( parent_.my_range, split() ) | |
| | | { | |
| | | } | |
| | | | |
| | | public: | |
| | | static void run( const Range& range, Body& body ) { | |
| | | if( !range.empty() ) { | |
| | | #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP | |
| | | task::spawn_root_and_wait( *new(task::allocate_root()) star | |
| | | t_deterministic_reduce(range,&body) ); | |
| | | #else | |
| | | // Bound context prevents exceptions from body to affect ne | |
| | | sting or sibling algorithms, | |
| | | // and allows users to handle exceptions safely by wrapping | |
| | | parallel_for in the try-block. | |
| | | task_group_context context; | |
| | | task::spawn_root_and_wait( *new(task::allocate_root(context | |
| | | )) start_deterministic_reduce(range,body) ); | |
| | | #endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */ | |
| | | } | |
| | | } | |
| | | #if __TBB_TASK_GROUP_CONTEXT | |
| | | static void run( const Range& range, Body& body, task_group_context | |
| | | & context ) { | |
| | | if( !range.empty() ) | |
| | | task::spawn_root_and_wait( *new(task::allocate_root(context | |
| | | )) start_deterministic_reduce(range,body) ); | |
| | | } | |
| | | #endif /* __TBB_TASK_GROUP_CONTEXT */ | |
| | | }; | |
| | | | |
| | | template<typename Range, typename Body> | |
| | | task* start_deterministic_reduce<Range,Body>::execute() { | |
| | | if( !my_range.is_divisible() ) { | |
| | | my_body( my_range ); | |
| | | return NULL; | |
| } else { | | } else { | |
|
| finish_type& c = *new( allocate_continuation()) finish_type(my_
context); | | finish_type& c = *new( allocate_continuation() ) finish_type( m
y_body ); | |
| recycle_as_child_of(c); | | recycle_as_child_of(c); | |
| c.set_ref_count(2); | | c.set_ref_count(2); | |
|
| bool delay = my_partition.decide_whether_to_delay(); | | start_deterministic_reduce& b = *new( c.allocate_child() ) star | |
| start_reduce& b = *new( c.allocate_child() ) start_reduce(*this | | t_deterministic_reduce( *this, c ); | |
| ,split()); | | task::spawn(b); | |
| my_partition.spawn_or_delay(delay,b); | | | |
| return this; | | return this; | |
| } | | } | |
| } | | } | |
|
| | | #endif /* TBB_PREVIEW_DETERMINISTIC_REDUCE */ | |
| | | } // namespace internal | |
| | | //! @endcond | |
| | | } //namespace interfaceX | |
| | | | |
|
| | | //! @cond INTERNAL | |
| | | namespace internal { | |
| | | using interface6::internal::start_reduce; | |
| | | #if TBB_PREVIEW_DETERMINISTIC_REDUCE | |
| | | using interface6::internal::start_deterministic_reduce; | |
| | | #endif | |
| //! Auxiliary class for parallel_reduce; for internal use only. | | //! Auxiliary class for parallel_reduce; for internal use only. | |
| /** The adaptor class that implements \ref parallel_reduce_body_req "pa
rallel_reduce Body" | | /** The adaptor class that implements \ref parallel_reduce_body_req "pa
rallel_reduce Body" | |
| using given \ref parallel_reduce_lambda_req "anonymous function obj
ects". | | using given \ref parallel_reduce_lambda_req "anonymous function obj
ects". | |
| **/ | | **/ | |
| /** @ingroup algorithms */ | | /** @ingroup algorithms */ | |
| template<typename Range, typename Value, typename RealBody, typename Re
duction> | | template<typename Range, typename Value, typename RealBody, typename Re
duction> | |
| class lambda_reduce_body { | | class lambda_reduce_body { | |
| | | | |
| //FIXME: decide if my_real_body, my_reduction, and identity_element should
be copied or referenced | | //FIXME: decide if my_real_body, my_reduction, and identity_element should
be copied or referenced | |
| // (might require some performance measurements) | | // (might require some performance measurements) | |
| | | | |
| skipping to change at line 382 | | skipping to change at line 464 | |
| /** @ingroup algorithms **/ | | /** @ingroup algorithms **/ | |
| template<typename Range, typename Value, typename RealBody, typename Reduct
ion> | | template<typename Range, typename Value, typename RealBody, typename Reduct
ion> | |
| Value parallel_reduce( const Range& range, const Value& identity, const Rea
lBody& real_body, const Reduction& reduction, | | Value parallel_reduce( const Range& range, const Value& identity, const Rea
lBody& real_body, const Reduction& reduction, | |
| affinity_partitioner& partitioner, task_group_contex
t& context ) { | | affinity_partitioner& partitioner, task_group_contex
t& context ) { | |
| internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(ident
ity, real_body, reduction); | | internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(ident
ity, real_body, reduction); | |
| internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,R
ealBody,Reduction>,affinity_partitioner> | | internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,R
ealBody,Reduction>,affinity_partitioner> | |
| ::run( range, body, partitioner, co
ntext ); | | ::run( range, body, partitioner, co
ntext ); | |
| return body.result(); | | return body.result(); | |
| } | | } | |
| #endif /* __TBB_TASK_GROUP_CONTEXT */ | | #endif /* __TBB_TASK_GROUP_CONTEXT */ | |
|
| | | | |
| | | #if TBB_PREVIEW_DETERMINISTIC_REDUCE | |
| | | //! Parallel iteration with deterministic reduction and default partitioner | |
| | | . | |
| | | /** @ingroup algorithms **/ | |
| | | template<typename Range, typename Body> | |
| | | void parallel_deterministic_reduce( const Range& range, Body& body ) { | |
| | | internal::start_deterministic_reduce<Range,Body>::run( range, body ); | |
| | | } | |
| | | | |
| | | #if __TBB_TASK_GROUP_CONTEXT | |
| | | //! Parallel iteration with deterministic reduction, simple partitioner and | |
| | | user-supplied context. | |
| | | /** @ingroup algorithms **/ | |
| | | template<typename Range, typename Body> | |
| | | void parallel_deterministic_reduce( const Range& range, Body& body, task_gr | |
| | | oup_context& context ) { | |
| | | internal::start_deterministic_reduce<Range,Body>::run( range, body, con | |
| | | text ); | |
| | | } | |
| | | #endif /* __TBB_TASK_GROUP_CONTEXT */ | |
| | | | |
| | | /** parallel_reduce overloads that work with anonymous function objects | |
| | | (see also \ref parallel_reduce_lambda_req "requirements on parallel_red | |
| | | uce anonymous function objects"). **/ | |
| | | | |
| | | //! Parallel iteration with deterministic reduction and default partitioner | |
| | | . | |
| | | /** @ingroup algorithms **/ | |
| | | template<typename Range, typename Value, typename RealBody, typename Reduct | |
| | | ion> | |
| | | Value parallel_deterministic_reduce( const Range& range, const Value& ident | |
| | | ity, const RealBody& real_body, const Reduction& reduction ) { | |
| | | internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(ident | |
| | | ity, real_body, reduction); | |
| | | internal::start_deterministic_reduce<Range,internal::lambda_reduce_body | |
| | | <Range,Value,RealBody,Reduction> > | |
| | | ::run(range, body); | |
| | | return body.result(); | |
| | | } | |
| | | | |
| | | #if __TBB_TASK_GROUP_CONTEXT | |
| | | //! Parallel iteration with deterministic reduction, simple partitioner and | |
| | | user-supplied context. | |
| | | /** @ingroup algorithms **/ | |
| | | template<typename Range, typename Value, typename RealBody, typename Reduct | |
| | | ion> | |
| | | Value parallel_deterministic_reduce( const Range& range, const Value& ident | |
| | | ity, const RealBody& real_body, const Reduction& reduction, | |
| | | task_group_context& context ) { | |
| | | internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(ident | |
| | | ity, real_body, reduction); | |
| | | internal::start_deterministic_reduce<Range,internal::lambda_reduce_body | |
| | | <Range,Value,RealBody,Reduction> > | |
| | | ::run( range, body, context ); | |
| | | return body.result(); | |
| | | } | |
| | | #endif /* __TBB_TASK_GROUP_CONTEXT */ | |
| | | #endif /* TBB_PREVIEW_DETERMINISTIC_REDUCE */ | |
| //@} | | //@} | |
| | | | |
| } // namespace tbb | | } // namespace tbb | |
| | | | |
| #endif /* __TBB_parallel_reduce_H */ | | #endif /* __TBB_parallel_reduce_H */ | |
| | | | |
End of changes. 25 change blocks. |
| 61 lines changed or deleted | | 210 lines changed or added | |
|
| partitioner.h | | partitioner.h | |
| /* | | /* | |
|
| Copyright 2005-2010 Intel Corporation. All Rights Reserved. | | Copyright 2005-2011 Intel Corporation. All Rights Reserved. | |
| | | | |
| This file is part of Threading Building Blocks. | | This file is part of Threading Building Blocks. | |
| | | | |
| Threading Building Blocks is free software; you can redistribute it | | Threading Building Blocks is free software; you can redistribute it | |
| and/or modify it under the terms of the GNU General Public License | | and/or modify it under the terms of the GNU General Public License | |
| version 2 as published by the Free Software Foundation. | | version 2 as published by the Free Software Foundation. | |
| | | | |
| Threading Building Blocks is distributed in the hope that it will be | | Threading Building Blocks is distributed in the hope that it will be | |
| useful, but WITHOUT ANY WARRANTY; without even the implied warranty | | useful, but WITHOUT ANY WARRANTY; without even the implied warranty | |
| of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| | | | |
| skipping to change at line 32 | | skipping to change at line 32 | |
| this file and link it with other files to produce an executable, this | | this file and link it with other files to produce an executable, this | |
| file does not by itself cause the resulting executable to be covered by | | file does not by itself cause the resulting executable to be covered by | |
| the GNU General Public License. This exception does not however | | the GNU General Public License. This exception does not however | |
| invalidate any other reasons why the executable file might be covered b
y | | invalidate any other reasons why the executable file might be covered b
y | |
| the GNU General Public License. | | the GNU General Public License. | |
| */ | | */ | |
| | | | |
| #ifndef __TBB_partitioner_H | | #ifndef __TBB_partitioner_H | |
| #define __TBB_partitioner_H | | #define __TBB_partitioner_H | |
| | | | |
|
| | | #ifndef __TBB_INITIAL_CHUNKS | |
| | | #define __TBB_INITIAL_CHUNKS 2 | |
| | | #endif | |
| | | #ifndef __TBB_RANGE_POOL_CAPACITY | |
| | | #define __TBB_RANGE_POOL_CAPACITY 8 | |
| | | #endif | |
| | | #ifndef __TBB_INIT_DEPTH | |
| | | #define __TBB_INIT_DEPTH 5 | |
| | | #endif | |
| | | | |
| #include "task.h" | | #include "task.h" | |
|
| | | #include "aligned_space.h" | |
| | | #include "atomic.h" | |
| | | | |
| | | #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) | |
| | | // Workaround for overzealous compiler warnings | |
| | | #pragma warning (push) | |
| | | #pragma warning (disable: 4244) | |
| | | #endif | |
| | | | |
| namespace tbb { | | namespace tbb { | |
|
| | | | |
| | | class auto_partitioner; | |
| | | class simple_partitioner; | |
| class affinity_partitioner; | | class affinity_partitioner; | |
|
| | | namespace interface6 { | |
| | | namespace internal { | |
| | | class affinity_partition_type; | |
| | | } | |
| | | } | |
| | | | |
|
| //! @cond INTERNAL | | | |
| namespace internal { | | namespace internal { | |
| size_t __TBB_EXPORTED_FUNC get_initial_auto_partitioner_divisor(); | | size_t __TBB_EXPORTED_FUNC get_initial_auto_partitioner_divisor(); | |
| | | | |
|
| //! Defines entry points into tbb run-time library; | | //! Defines entry point for affinity partitioner into tbb run-time library. | |
| /** The entry points are the constructor and destructor. */ | | | |
| class affinity_partitioner_base_v3: no_copy { | | class affinity_partitioner_base_v3: no_copy { | |
| friend class tbb::affinity_partitioner; | | friend class tbb::affinity_partitioner; | |
|
| | | friend class tbb::interface6::internal::affinity_partition_type; | |
| //! Array that remembers affinities of tree positions to affinity_id. | | //! Array that remembers affinities of tree positions to affinity_id. | |
| /** NULL if my_size==0. */ | | /** NULL if my_size==0. */ | |
| affinity_id* my_array; | | affinity_id* my_array; | |
| //! Number of elements in my_array. | | //! Number of elements in my_array. | |
| size_t my_size; | | size_t my_size; | |
| //! Zeros the fields. | | //! Zeros the fields. | |
| affinity_partitioner_base_v3() : my_array(NULL), my_size(0) {} | | affinity_partitioner_base_v3() : my_array(NULL), my_size(0) {} | |
| //! Deallocates my_array. | | //! Deallocates my_array. | |
| ~affinity_partitioner_base_v3() {resize(0);} | | ~affinity_partitioner_base_v3() {resize(0);} | |
| //! Resize my_array. | | //! Resize my_array. | |
| /** Retains values if resulting size is the same. */ | | /** Retains values if resulting size is the same. */ | |
| void __TBB_EXPORTED_METHOD resize( unsigned factor ); | | void __TBB_EXPORTED_METHOD resize( unsigned factor ); | |
|
| friend class affinity_partition_type; | | | |
| }; | | }; | |
| | | | |
|
| //! Provides default methods for partition objects without affinity. | | //! Provides backward-compatible methods for partition objects without affi
nity. | |
| class partition_type_base { | | class partition_type_base { | |
| public: | | public: | |
| void set_affinity( task & ) {} | | void set_affinity( task & ) {} | |
| void note_affinity( task::affinity_id ) {} | | void note_affinity( task::affinity_id ) {} | |
| task* continue_after_execute_range() {return NULL;} | | task* continue_after_execute_range() {return NULL;} | |
| bool decide_whether_to_delay() {return false;} | | bool decide_whether_to_delay() {return false;} | |
| void spawn_or_delay( bool, task& b ) { | | void spawn_or_delay( bool, task& b ) { | |
| task::spawn(b); | | task::spawn(b); | |
| } | | } | |
| }; | | }; | |
| | | | |
|
| class affinity_partition_type; | | | |
| | | | |
| template<typename Range, typename Body, typename Partitioner> class start_f | | | |
| or; | | | |
| template<typename Range, typename Body, typename Partitioner> class start_r | | | |
| educe; | | | |
| template<typename Range, typename Body> class start_reduce_with_affinity; | | | |
| template<typename Range, typename Body, typename Partitioner> class start_s
can; | | template<typename Range, typename Body, typename Partitioner> class start_s
can; | |
| | | | |
| } // namespace internal | | } // namespace internal | |
| //! @endcond | | //! @endcond | |
| | | | |
|
| //! A simple partitioner | | namespace serial { | |
| /** Divides the range until the range is not divisible. | | namespace interface6 { | |
| @ingroup algorithms */ | | template<typename Range, typename Body, typename Partitioner> class start_f | |
| class simple_partitioner { | | or; | |
| public: | | } | |
| simple_partitioner() {} | | } | |
| private: | | | |
| template<typename Range, typename Body, typename Partitioner> friend cl | | | |
| ass internal::start_for; | | | |
| template<typename Range, typename Body, typename Partitioner> friend cl | | | |
| ass internal::start_reduce; | | | |
| template<typename Range, typename Body, typename Partitioner> friend cl | | | |
| ass internal::start_scan; | | | |
| | | | |
|
| class partition_type: public internal::partition_type_base { | | namespace interface6 { | |
| public: | | //! @cond INTERNAL | |
| bool should_execute_range(const task& ) {return false;} | | namespace internal { | |
| partition_type( const simple_partitioner& ) {} | | using namespace tbb::internal; | |
| partition_type( const partition_type&, split ) {} | | template<typename Range, typename Body, typename Partitioner> class start_f | |
| }; | | or; | |
| }; | | template<typename Range, typename Body, typename Partitioner> class start_r | |
| | | educe; | |
| | | | |
|
| //! An auto partitioner | | //! Join task node that contains shared flag for stealing feedback | |
| /** The range is initial divided into several large chunks. | | class flag_task: public task { | |
| Chunks are further subdivided into VICTIM_CHUNKS pieces if they are sto | | | |
| len and divisible. | | | |
| @ingroup algorithms */ | | | |
| class auto_partitioner { | | | |
| public: | | public: | |
|
| auto_partitioner() {} | | tbb::atomic<bool> child_stolen; | |
| | | flag_task() { child_stolen = false; } | |
| private: | | task* execute() { return NULL; } | |
| template<typename Range, typename Body, typename Partitioner> friend cl | | }; | |
| ass internal::start_for; | | | |
| template<typename Range, typename Body, typename Partitioner> friend cl | | | |
| ass internal::start_reduce; | | | |
| template<typename Range, typename Body, typename Partitioner> friend cl | | | |
| ass internal::start_scan; | | | |
| | | | |
|
| class partition_type: public internal::partition_type_base { | | //! Task to signal the demand without carrying the work | |
| size_t num_chunks; | | class signal_task: public task { | |
| static const size_t VICTIM_CHUNKS = 4; | | | |
| public: | | public: | |
|
| bool should_execute_range(const task &t) { | | task* execute() { | |
| if( num_chunks<VICTIM_CHUNKS && t.is_stolen_task() ) | | if( is_stolen_task() ) { | |
| num_chunks = VICTIM_CHUNKS; | | static_cast<flag_task*>(parent())->child_stolen = true; | |
| return num_chunks==1; | | | |
| } | | | |
| partition_type( const auto_partitioner& ) : num_chunks(internal::ge | | | |
| t_initial_auto_partitioner_divisor()) {} | | | |
| partition_type( partition_type& pt, split ) { | | | |
| num_chunks = pt.num_chunks /= 2u; | | | |
| } | | } | |
|
| }; | | return NULL; | |
| | | } | |
| }; | | }; | |
| | | | |
|
| //! An affinity partitioner | | //! Depth is a relative depth of recursive division inside a range pool. Re | |
| class affinity_partitioner: internal::affinity_partitioner_base_v3 { | | lative depth allows | |
| public: | | //! infinite absolute depth of the recursion for heavily imbalanced workloa | |
| affinity_partitioner() {} | | ds with range represented | |
| | | //! by a number that cannot fit into machine word. | |
| private: | | typedef unsigned char depth_t; | |
| template<typename Range, typename Body, typename Partitioner> friend cl | | | |
| ass internal::start_for; | | //! Range pool stores ranges of type T in a circular buffer with MaxCapacit | |
| template<typename Range, typename Body, typename Partitioner> friend cl | | y | |
| ass internal::start_reduce; | | template <typename T, depth_t MaxCapacity> | |
| template<typename Range, typename Body> friend class internal::start_re | | class range_vector { | |
| duce_with_affinity; | | depth_t my_head; | |
| template<typename Range, typename Body, typename Partitioner> friend cl | | depth_t my_tail; | |
| ass internal::start_scan; | | depth_t my_size; | |
| | | depth_t my_depth[MaxCapacity]; // relative depths of stored ranges | |
| | | tbb::aligned_space<T, MaxCapacity> my_pool; | |
| | | | |
|
| typedef internal::affinity_partition_type partition_type; | | public: | |
| friend class internal::affinity_partition_type; | | //! initialize via first range in pool | |
| | | range_vector(const T& elem) : my_head(0), my_tail(0), my_size(1) { | |
| | | my_depth[0] = 0; | |
| | | new( my_pool.begin() ) T(elem);//TODO: std::move? | |
| | | } | |
| | | ~range_vector() { | |
| | | while( !empty() ) pop_back(); | |
| | | } | |
| | | bool empty() const { return my_size == 0; } | |
| | | depth_t size() const { return my_size; } | |
| | | //! Populates range pool via ranges up to max depth or while divisible | |
| | | //! max_depth starts from 0, e.g. value 2 makes 3 ranges in the pool up | |
| | | to two 1/4 pieces | |
| | | void split_to_fill(depth_t max_depth) { | |
| | | while( my_size < MaxCapacity && my_depth[my_head] < max_depth | |
| | | && my_pool.begin()[my_head].is_divisible() ) { | |
| | | depth_t prev = my_head; | |
| | | my_head = (my_head + 1) % MaxCapacity; | |
| | | new(my_pool.begin()+my_head) T(my_pool.begin()[prev]); // copy | |
| | | TODO: std::move? | |
| | | my_pool.begin()[prev].~T(); // instead of assignment | |
| | | new(my_pool.begin()+prev) T(my_pool.begin()[my_head], split()); | |
| | | // do 'inverse' split | |
| | | my_depth[my_head] = ++my_depth[prev]; | |
| | | my_size++; | |
| | | } | |
| | | } | |
| | | void pop_back() { | |
| | | __TBB_ASSERT(my_size > 0, "range_vector::pop_back() with empty size | |
| | | "); | |
| | | my_pool.begin()[my_head].~T(); | |
| | | my_size--; | |
| | | my_head = (my_head + MaxCapacity - 1) % MaxCapacity; | |
| | | } | |
| | | void pop_front() { | |
| | | __TBB_ASSERT(my_size > 0, "range_vector::pop_front() with empty siz | |
| | | e"); | |
| | | my_pool.begin()[my_tail].~T(); | |
| | | my_size--; | |
| | | my_tail = (my_tail + 1) % MaxCapacity; | |
| | | } | |
| | | T& back() { | |
| | | __TBB_ASSERT(my_size > 0, "range_vector::back() with empty size"); | |
| | | return my_pool.begin()[my_head]; | |
| | | } | |
| | | T& front() { | |
| | | __TBB_ASSERT(my_size > 0, "range_vector::front() with empty size"); | |
| | | return my_pool.begin()[my_tail]; | |
| | | } | |
| | | //! similarly to front(), returns depth of the first range in the pool | |
| | | depth_t front_depth() { | |
| | | __TBB_ASSERT(my_size > 0, "range_vector::front_depth() with empty s | |
| | | ize"); | |
| | | return my_depth[my_tail]; | |
| | | } | |
| }; | | }; | |
| | | | |
|
| //! @cond INTERNAL | | //! Provides default methods for partition objects and common algorithm blo | |
| namespace internal { | | cks. | |
| | | template <typename Partition> | |
| | | struct partition_type_base { | |
| | | // decision makers | |
| | | void set_affinity( task & ) {} | |
| | | void note_affinity( task::affinity_id ) {} | |
| | | bool check_being_stolen(task &) { return false; } // part of old should | |
| | | _execute_range() | |
| | | bool check_for_demand(task &) { return false; } | |
| | | bool divisions_left() { return true; } // part of old should_execute_ra | |
| | | nge() | |
| | | bool should_create_trap() { return false; } | |
| | | depth_t max_depth() { return 0; } | |
| | | void align_depth(depth_t) { } | |
| | | // common function blocks | |
| | | Partition& derived() { return *static_cast<Partition*>(this); } | |
| | | template<typename StartType> | |
| | | flag_task* split_work(StartType &start) { | |
| | | flag_task* parent_ptr = start.create_continuation(); // the type he | |
| | | re is to express expectation | |
| | | start.set_parent(parent_ptr); | |
| | | parent_ptr->set_ref_count(2); | |
| | | StartType& right_work = *new( parent_ptr->allocate_child() ) StartT | |
| | | ype(start, split()); | |
| | | start.spawn(right_work); | |
| | | return parent_ptr; | |
| | | } | |
| | | template<typename StartType, typename Range> | |
| | | void execute(StartType &start, Range &range) { | |
| | | // The algorithm in a few words ([]-denotes calls to decision matho | |
| | | ds of partitioner): | |
| | | // [If this task is stolen, adjust depth and divisions if necessary | |
| | | , set flag]. | |
| | | // If range is divisible { | |
| | | // Spread the work while [initial divisions left]; | |
| | | // Create trap task [if necessary]; | |
| | | // } | |
| | | // If not divisible or [max depth is reached], execute, else do the | |
| | | range pool part | |
| | | task* parent_ptr = start.parent(); | |
| | | if( range.is_divisible() ) { | |
| | | if( derived().divisions_left() ) | |
| | | do parent_ptr = split_work(start); // split until divisions | |
| | | _left() | |
| | | while( range.is_divisible() && derived().divisions_left() ) | |
| | | ; | |
| | | if( derived().should_create_trap() ) { // only for range pool | |
| | | if( parent_ptr->ref_count() > 1 ) { // create new parent if | |
| | | necessary | |
| | | parent_ptr = start.create_continuation(); | |
| | | start.set_parent(parent_ptr); | |
| | | } else __TBB_ASSERT(parent_ptr->ref_count() == 1, NULL); | |
| | | parent_ptr->set_ref_count(2); // safe because parent has on | |
| | | ly one reference | |
| | | signal_task& right_signal = *new( parent_ptr->allocate_chil | |
| | | d() ) signal_task(); | |
| | | start.spawn(right_signal); // pure signal is to avoid deep | |
| | | recursion in the end | |
| | | } | |
| | | } | |
| | | if( !range.is_divisible() || !derived().max_depth() ) | |
| | | start.run_body( range ); // simple partitioner goes always here | |
| | | else { // do range pool | |
| | | internal::range_vector<Range, Partition::range_pool_size> range | |
| | | _pool(range); | |
| | | do { | |
| | | range_pool.split_to_fill(derived().max_depth()); // fill ra | |
| | | nge pool | |
| | | if( derived().check_for_demand( start ) ) { | |
| | | if( range_pool.size() > 1 ) { | |
| | | parent_ptr = start.create_continuation(); | |
| | | start.set_parent(parent_ptr); | |
| | | parent_ptr->set_ref_count(2); | |
| | | StartType& right_work = *new( parent_ptr->allocate_ | |
| | | child() ) StartType(start, range_pool.front(), range_pool.front_depth()); | |
| | | start.spawn(right_work); | |
| | | range_pool.pop_front(); | |
| | | continue; | |
| | | } | |
| | | if( range_pool.back().is_divisible() ) // was not enoug | |
| | | h depth to fork a task | |
| | | continue; // note: check_for_demand() should guaran | |
| | | tee increasing max_depth() next time | |
| | | } | |
| | | start.run_body( range_pool.back() ); | |
| | | range_pool.pop_back(); | |
| | | } while( !range_pool.empty() && !start.is_cancelled() ); | |
| | | } | |
| | | } | |
| | | }; | |
| | | | |
|
| class affinity_partition_type: public no_copy { | | //! Provides default methods for auto (adaptive) partition objects. | |
| //! Must be power of two | | template <typename Partition> | |
| static const unsigned factor = 16; | | struct auto_partition_type_base : partition_type_base<Partition> { | |
| static const size_t VICTIM_CHUNKS = 4; | | size_t my_divisor; | |
| | | depth_t my_max_depth; | |
| | | auto_partition_type_base() : my_max_depth(__TBB_INIT_DEPTH) { | |
| | | my_divisor = tbb::internal::get_initial_auto_partitioner_divisor()* | |
| | | __TBB_INITIAL_CHUNKS/4; | |
| | | __TBB_ASSERT(my_divisor, "initial value of get_initial_auto_partiti | |
| | | oner_divisor() is not valid"); | |
| | | } | |
| | | auto_partition_type_base(auto_partition_type_base &src, split) { | |
| | | my_max_depth = src.my_max_depth; | |
| | | #if __TBB_INITIAL_TASK_IMBALANCE | |
| | | if( src.my_divisor <= 1 ) my_divisor = 0; | |
| | | else my_divisor = src.my_divisor = (src.my_divisor+1u) / 2u; | |
| | | #else | |
| | | my_divisor = src.my_divisor / 2u; | |
| | | src.my_divisor = src.my_divisor - my_divisor; // TODO: check the ef | |
| | | fect separately | |
| | | if(my_divisor) src.my_max_depth += static_cast<depth_t>(__TBB_Log2( | |
| | | src.my_divisor/my_divisor)); | |
| | | #endif | |
| | | } | |
| | | bool check_being_stolen( task &t) { // part of old should_execute_range | |
| | | () | |
| | | if( !my_divisor ) { | |
| | | my_divisor = 1; // todo: replace by on-stack flag (partition_st | |
| | | ate's member)? | |
| | | if( t.is_stolen_task() ) { | |
| | | #if TBB_USE_EXCEPTIONS | |
| | | // RTTI is available, check whether the cast is valid | |
| | | __TBB_ASSERT(dynamic_cast<flag_task*>(t.parent()), 0); | |
| | | // correctess of the cast rely on avoiding the root task fo | |
| | | r which: | |
| | | // - initial value of my_divisor != 0 (protected by separat | |
| | | e assertion) | |
| | | // - is_stolen_task() always return false for the root task | |
| | | . | |
| | | #endif | |
| | | static_cast<flag_task*>(t.parent())->child_stolen = true; | |
| | | my_max_depth++; | |
| | | return true; | |
| | | } | |
| | | } | |
| | | return false; | |
| | | } | |
| | | bool divisions_left() { // part of old should_execute_range() | |
| | | if( my_divisor > 1 ) return true; | |
| | | if( my_divisor && my_max_depth > 1 ) { // can split the task and on | |
| | | ce more internally. TODO: on-stack flag instead | |
| | | // keep same fragmentation while splitting for the local task p | |
| | | ool | |
| | | my_max_depth--; | |
| | | my_divisor = 0; | |
| | | return true; | |
| | | } else return false; | |
| | | } | |
| | | bool should_create_trap() { | |
| | | return my_divisor > 0; | |
| | | } | |
| | | bool check_for_demand(task &t) { | |
| | | if( static_cast<flag_task*>(t.parent())->child_stolen ) { | |
| | | my_max_depth++; | |
| | | return true; | |
| | | } else return false; | |
| | | } | |
| | | void align_depth(depth_t base) { | |
| | | __TBB_ASSERT(base <= my_max_depth, 0); | |
| | | my_max_depth -= base; | |
| | | } | |
| | | depth_t max_depth() { return my_max_depth; } | |
| | | }; | |
| | | | |
|
| internal::affinity_id* my_array; | | //! Provides default methods for affinity (adaptive) partition objects. | |
| task_list delay_list; | | class affinity_partition_type : public auto_partition_type_base<affinity_pa | |
| unsigned map_begin, map_end; | | rtition_type> { | |
| size_t num_chunks; | | static const unsigned factor_power = 4; | |
| | | static const unsigned factor = 1<<factor_power; | |
| | | bool my_delay; | |
| | | unsigned map_begin, map_end, map_mid; | |
| | | tbb::internal::affinity_id* my_array; | |
| | | void set_mid() { | |
| | | unsigned d = (map_end - map_begin)/2; // we could add 1 but it is r | |
| | | ather for LIFO affinity | |
| | | if( d > factor ) | |
| | | d &= 0u-factor; | |
| | | map_mid = map_end - d; | |
| | | } | |
| public: | | public: | |
|
| affinity_partition_type( affinity_partitioner& ap ) { | | affinity_partition_type( tbb::internal::affinity_partitioner_base_v3& a
p ) { | |
| __TBB_ASSERT( (factor&(factor-1))==0, "factor must be power of two"
); | | __TBB_ASSERT( (factor&(factor-1))==0, "factor must be power of two"
); | |
| ap.resize(factor); | | ap.resize(factor); | |
| my_array = ap.my_array; | | my_array = ap.my_array; | |
| map_begin = 0; | | map_begin = 0; | |
| map_end = unsigned(ap.my_size); | | map_end = unsigned(ap.my_size); | |
|
| num_chunks = internal::get_initial_auto_partitioner_divisor(); | | set_mid(); | |
| | | my_delay = true; | |
| | | my_divisor /= __TBB_INITIAL_CHUNKS; // let excatly P tasks to be di | |
| | | stributed across workers | |
| | | my_max_depth = factor_power+1; // the first factor_power ranges wil | |
| | | l be spawned, and >=1 ranges should left | |
| | | __TBB_ASSERT( my_max_depth < __TBB_RANGE_POOL_CAPACITY, 0 ); | |
| } | | } | |
|
| affinity_partition_type(affinity_partition_type& p, split) : my_array(p | | affinity_partition_type(affinity_partition_type& p, split) | |
| .my_array) { | | : auto_partition_type_base<affinity_partition_type>(p, split()), my | |
| | | _array(p.my_array) { | |
| __TBB_ASSERT( p.map_end-p.map_begin<factor || (p.map_end-p.map_begi
n)%factor==0, NULL ); | | __TBB_ASSERT( p.map_end-p.map_begin<factor || (p.map_end-p.map_begi
n)%factor==0, NULL ); | |
|
| num_chunks = p.num_chunks /= 2; | | map_end = p.map_end; | |
| unsigned e = p.map_end; | | map_begin = p.map_end = p.map_mid; | |
| unsigned d = (e - p.map_begin)/2; | | set_mid(); p.set_mid(); | |
| if( d>factor ) | | my_delay = p.my_delay; | |
| d &= 0u-factor; | | | |
| map_end = e; | | | |
| map_begin = p.map_end = e-d; | | | |
| } | | | |
| | | | |
| bool should_execute_range(const task &t) { | | | |
| if( num_chunks < VICTIM_CHUNKS && t.is_stolen_task() ) | | | |
| num_chunks = VICTIM_CHUNKS; | | | |
| return num_chunks == 1; | | | |
| } | | } | |
|
| | | | |
| void set_affinity( task &t ) { | | void set_affinity( task &t ) { | |
| if( map_begin<map_end ) | | if( map_begin<map_end ) | |
| t.set_affinity( my_array[map_begin] ); | | t.set_affinity( my_array[map_begin] ); | |
| } | | } | |
| void note_affinity( task::affinity_id id ) { | | void note_affinity( task::affinity_id id ) { | |
| if( map_begin<map_end ) | | if( map_begin<map_end ) | |
| my_array[map_begin] = id; | | my_array[map_begin] = id; | |
| } | | } | |
|
| task* continue_after_execute_range() { | | bool check_for_demand( task &t ) { | |
| task* first = NULL; | | if( !my_delay ) { | |
| if( !delay_list.empty() ) { | | if( map_mid<map_end ) { | |
| first = &delay_list.pop_front(); | | __TBB_ASSERT(my_max_depth>__TBB_Log2(map_end-map_mid), 0); | |
| while( !delay_list.empty() ) { | | return true;// do not do my_max_depth++ here, but be sure m | |
| task::spawn(*first); | | y_max_depth is big enough | |
| first = &delay_list.pop_front(); | | | |
| } | | } | |
|
| } | | if( static_cast<flag_task*>(t.parent())->child_stolen ) { | |
| return first; | | my_max_depth++; | |
| | | return true; | |
| | | } | |
| | | } else my_delay = false; | |
| | | return false; | |
| } | | } | |
|
| bool decide_whether_to_delay() { | | bool divisions_left() { // part of old should_execute_range() | |
| // The possible underflow caused by "-1u" is deliberate | | return my_divisor > 1; | |
| return (map_begin&(factor-1))==0 && map_end-map_begin-1u<factor; | | | |
| } | | | |
| void spawn_or_delay( bool delay, task& b ) { | | | |
| if( delay ) | | | |
| delay_list.push_back(b); | | | |
| else | | | |
| task::spawn(b); | | | |
| } | | | |
| | | | |
| ~affinity_partition_type() { | | | |
| // The delay_list can be non-empty if an exception is thrown. | | | |
| while( !delay_list.empty() ) { | | | |
| task& t = delay_list.pop_front(); | | | |
| t.destroy(t); | | | |
| } | | | |
| } | | } | |
|
| | | bool should_create_trap() { | |
| | | return true; // TODO: rethink for the stage after memorizing level | |
| | | } | |
| | | static const unsigned range_pool_size = __TBB_RANGE_POOL_CAPACITY; | |
| }; | | }; | |
| | | | |
|
| } // namespace internal | | class auto_partition_type: public auto_partition_type_base<auto_partition_t | |
| | | ype> { | |
| | | public: | |
| | | auto_partition_type( const auto_partitioner& ) {} | |
| | | auto_partition_type( auto_partition_type& src, split) | |
| | | : auto_partition_type_base<auto_partition_type>(src, split()) {} | |
| | | static const unsigned range_pool_size = __TBB_RANGE_POOL_CAPACITY; | |
| | | }; | |
| | | | |
| | | class simple_partition_type: public partition_type_base<simple_partition_ty | |
| | | pe> { | |
| | | public: | |
| | | simple_partition_type( const simple_partitioner& ) {} | |
| | | simple_partition_type( const simple_partition_type&, split ) {} | |
| | | //! simplified algorithm | |
| | | template<typename StartType, typename Range> | |
| | | void execute(StartType &start, Range &range) { | |
| | | while( range.is_divisible() ) | |
| | | split_work( start ); | |
| | | start.run_body( range ); | |
| | | } | |
| | | //static const unsigned range_pool_size = 1; - not necessary because ex | |
| | | ecute() is overridden | |
| | | }; | |
| | | | |
| | | //! Backward-compatible partition for auto and affinity partition objects. | |
| | | class old_auto_partition_type: public tbb::internal::partition_type_base { | |
| | | size_t num_chunks; | |
| | | static const size_t VICTIM_CHUNKS = 4; | |
| | | public: | |
| | | bool should_execute_range(const task &t) { | |
| | | if( num_chunks<VICTIM_CHUNKS && t.is_stolen_task() ) | |
| | | num_chunks = VICTIM_CHUNKS; | |
| | | return num_chunks==1; | |
| | | } | |
| | | old_auto_partition_type( const auto_partitioner& ) | |
| | | : num_chunks(internal::get_initial_auto_partitioner_divisor()*__TBB_I | |
| | | NITIAL_CHUNKS/4) {} | |
| | | old_auto_partition_type( const affinity_partitioner& ) | |
| | | : num_chunks(internal::get_initial_auto_partitioner_divisor()*__TBB_I | |
| | | NITIAL_CHUNKS/4) {} | |
| | | old_auto_partition_type( old_auto_partition_type& pt, split ) { | |
| | | num_chunks = pt.num_chunks = (pt.num_chunks+1u) / 2u; | |
| | | } | |
| | | }; | |
| | | | |
| | | } // namespace interfaceX::internal | |
| //! @endcond | | //! @endcond | |
|
| | | } // namespace interfaceX | |
| | | | |
| | | //! A simple partitioner | |
| | | /** Divides the range until the range is not divisible. | |
| | | @ingroup algorithms */ | |
| | | class simple_partitioner { | |
| | | public: | |
| | | simple_partitioner() {} | |
| | | private: | |
| | | template<typename Range, typename Body, typename Partitioner> friend cl | |
| | | ass serial::interface6::start_for; | |
| | | template<typename Range, typename Body, typename Partitioner> friend cl | |
| | | ass interface6::internal::start_for; | |
| | | template<typename Range, typename Body, typename Partitioner> friend cl | |
| | | ass interface6::internal::start_reduce; | |
| | | template<typename Range, typename Body, typename Partitioner> friend cl | |
| | | ass internal::start_scan; | |
| | | // backward compatibility | |
| | | class partition_type: public internal::partition_type_base { | |
| | | public: | |
| | | bool should_execute_range(const task& ) {return false;} | |
| | | partition_type( const simple_partitioner& ) {} | |
| | | partition_type( const partition_type&, split ) {} | |
| | | }; | |
| | | // new implementation just extends existing interface | |
| | | typedef interface6::internal::simple_partition_type task_partition_type | |
| | | ; | |
| | | }; | |
| | | | |
| | | //! An auto partitioner | |
| | | /** The range is initial divided into several large chunks. | |
| | | Chunks are further subdivided into smaller pieces if demand detected an | |
| | | d they are divisible. | |
| | | @ingroup algorithms */ | |
| | | class auto_partitioner { | |
| | | public: | |
| | | auto_partitioner() {} | |
| | | | |
| | | private: | |
| | | template<typename Range, typename Body, typename Partitioner> friend cl | |
| | | ass serial::interface6::start_for; | |
| | | template<typename Range, typename Body, typename Partitioner> friend cl | |
| | | ass interface6::internal::start_for; | |
| | | template<typename Range, typename Body, typename Partitioner> friend cl | |
| | | ass interface6::internal::start_reduce; | |
| | | template<typename Range, typename Body, typename Partitioner> friend cl | |
| | | ass internal::start_scan; | |
| | | // backward compatibility | |
| | | typedef interface6::internal::old_auto_partition_type partition_type; | |
| | | // new implementation just extends existing interface | |
| | | typedef interface6::internal::auto_partition_type task_partition_type; | |
| | | }; | |
| | | | |
| | | //! An affinity partitioner | |
| | | class affinity_partitioner: internal::affinity_partitioner_base_v3 { | |
| | | public: | |
| | | affinity_partitioner() {} | |
| | | | |
| | | private: | |
| | | template<typename Range, typename Body, typename Partitioner> friend cl | |
| | | ass serial::interface6::start_for; | |
| | | template<typename Range, typename Body, typename Partitioner> friend cl | |
| | | ass interface6::internal::start_for; | |
| | | template<typename Range, typename Body, typename Partitioner> friend cl | |
| | | ass interface6::internal::start_reduce; | |
| | | template<typename Range, typename Body, typename Partitioner> friend cl | |
| | | ass internal::start_scan; | |
| | | // backward compatibility - for parallel_scan only | |
| | | typedef interface6::internal::old_auto_partition_type partition_type; | |
| | | // new implementation just extends existing interface | |
| | | typedef interface6::internal::affinity_partition_type task_partition_ty | |
| | | pe; | |
| | | }; | |
| | | | |
| } // namespace tbb | | } // namespace tbb | |
| | | | |
|
| | | #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) | |
| | | #pragma warning (pop) | |
| | | #endif // warning 4244 is back | |
| | | #undef __TBB_INITIAL_CHUNKS | |
| | | #undef __TBB_RANGE_POOL_CAPACITY | |
| | | #undef __TBB_INIT_DEPTH | |
| #endif /* __TBB_partitioner_H */ | | #endif /* __TBB_partitioner_H */ | |
| | | | |
End of changes. 35 change blocks. |
| 132 lines changed or deleted | | 467 lines changed or added | |
|
| pipeline.h | | pipeline.h | |
| /* | | /* | |
|
| Copyright 2005-2010 Intel Corporation. All Rights Reserved. | | Copyright 2005-2011 Intel Corporation. All Rights Reserved. | |
| | | | |
| This file is part of Threading Building Blocks. | | This file is part of Threading Building Blocks. | |
| | | | |
| Threading Building Blocks is free software; you can redistribute it | | Threading Building Blocks is free software; you can redistribute it | |
| and/or modify it under the terms of the GNU General Public License | | and/or modify it under the terms of the GNU General Public License | |
| version 2 as published by the Free Software Foundation. | | version 2 as published by the Free Software Foundation. | |
| | | | |
| Threading Building Blocks is distributed in the hope that it will be | | Threading Building Blocks is distributed in the hope that it will be | |
| useful, but WITHOUT ANY WARRANTY; without even the implied warranty | | useful, but WITHOUT ANY WARRANTY; without even the implied warranty | |
| of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| | | | |
| skipping to change at line 34 | | skipping to change at line 34 | |
| the GNU General Public License. This exception does not however | | the GNU General Public License. This exception does not however | |
| invalidate any other reasons why the executable file might be covered b
y | | invalidate any other reasons why the executable file might be covered b
y | |
| the GNU General Public License. | | the GNU General Public License. | |
| */ | | */ | |
| | | | |
| #ifndef __TBB_pipeline_H | | #ifndef __TBB_pipeline_H | |
| #define __TBB_pipeline_H | | #define __TBB_pipeline_H | |
| | | | |
| #include "atomic.h" | | #include "atomic.h" | |
| #include "task.h" | | #include "task.h" | |
|
| | | #include "tbb_allocator.h" | |
| #include <cstddef> | | #include <cstddef> | |
| | | | |
| namespace tbb { | | namespace tbb { | |
| | | | |
| class pipeline; | | class pipeline; | |
| class filter; | | class filter; | |
| | | | |
| //! @cond INTERNAL | | //! @cond INTERNAL | |
| namespace internal { | | namespace internal { | |
| | | | |
| | | | |
| skipping to change at line 56 | | skipping to change at line 57 | |
| | | | |
| typedef unsigned long Token; | | typedef unsigned long Token; | |
| typedef long tokendiff_t; | | typedef long tokendiff_t; | |
| class stage_task; | | class stage_task; | |
| class input_buffer; | | class input_buffer; | |
| class pipeline_root_task; | | class pipeline_root_task; | |
| class pipeline_cleaner; | | class pipeline_cleaner; | |
| | | | |
| } // namespace internal | | } // namespace internal | |
| | | | |
|
| namespace interface5 { | | namespace interface6 { | |
| template<typename T, typename U> class filter_t; | | template<typename T, typename U> class filter_t; | |
| | | | |
| namespace internal { | | namespace internal { | |
| class pipeline_proxy; | | class pipeline_proxy; | |
| } | | } | |
| } | | } | |
| | | | |
| //! @endcond | | //! @endcond | |
| | | | |
| //! A stage in a pipeline. | | //! A stage in a pipeline. | |
| /** @ingroup algorithms */ | | /** @ingroup algorithms */ | |
| class filter: internal::no_copy { | | class filter: internal::no_copy { | |
| private: | | private: | |
| //! Value used to mark "not in pipeline" | | //! Value used to mark "not in pipeline" | |
| static filter* not_in_pipeline() {return reinterpret_cast<filter*>(intp
tr_t(-1));} | | static filter* not_in_pipeline() {return reinterpret_cast<filter*>(intp
tr_t(-1));} | |
|
| | | protected: | |
| //! The lowest bit 0 is for parallel vs. serial | | //! The lowest bit 0 is for parallel vs. serial | |
| static const unsigned char filter_is_serial = 0x1; | | static const unsigned char filter_is_serial = 0x1; | |
| | | | |
| //! 4th bit distinguishes ordered vs unordered filters. | | //! 4th bit distinguishes ordered vs unordered filters. | |
| /** The bit was not set for parallel filters in TBB 2.1 and earlier, | | /** The bit was not set for parallel filters in TBB 2.1 and earlier, | |
| but is_ordered() function always treats parallel filters as out of
order. */ | | but is_ordered() function always treats parallel filters as out of
order. */ | |
| static const unsigned char filter_is_out_of_order = 0x1<<4; | | static const unsigned char filter_is_out_of_order = 0x1<<4; | |
| | | | |
| //! 5th bit distinguishes thread-bound and regular filters. | | //! 5th bit distinguishes thread-bound and regular filters. | |
| static const unsigned char filter_is_bound = 0x1<<5; | | static const unsigned char filter_is_bound = 0x1<<5; | |
| | | | |
|
| | | //! 6th bit marks input filters emitting small objects | |
| | | static const unsigned char filter_may_emit_null = 0x1<<6; | |
| | | | |
| //! 7th bit defines exception propagation mode expected by the applicat
ion. | | //! 7th bit defines exception propagation mode expected by the applicat
ion. | |
| static const unsigned char exact_exception_propagation = | | static const unsigned char exact_exception_propagation = | |
| #if TBB_USE_CAPTURED_EXCEPTION | | #if TBB_USE_CAPTURED_EXCEPTION | |
| 0x0; | | 0x0; | |
| #else | | #else | |
| 0x1<<7; | | 0x1<<7; | |
| #endif /* TBB_USE_CAPTURED_EXCEPTION */ | | #endif /* TBB_USE_CAPTURED_EXCEPTION */ | |
| | | | |
| static const unsigned char current_version = __TBB_PIPELINE_VERSION(5); | | static const unsigned char current_version = __TBB_PIPELINE_VERSION(5); | |
| static const unsigned char version_mask = 0x7<<1; // bits 1-3 are for v
ersion | | static const unsigned char version_mask = 0x7<<1; // bits 1-3 are for v
ersion | |
| | | | |
| skipping to change at line 124 | | skipping to change at line 128 | |
| | | | |
| filter( mode filter_mode ) : | | filter( mode filter_mode ) : | |
| next_filter_in_pipeline(not_in_pipeline()), | | next_filter_in_pipeline(not_in_pipeline()), | |
| my_input_buffer(NULL), | | my_input_buffer(NULL), | |
| my_filter_mode(static_cast<unsigned char>(filter_mode | exact_excep
tion_propagation)), | | my_filter_mode(static_cast<unsigned char>(filter_mode | exact_excep
tion_propagation)), | |
| prev_filter_in_pipeline(not_in_pipeline()), | | prev_filter_in_pipeline(not_in_pipeline()), | |
| my_pipeline(NULL), | | my_pipeline(NULL), | |
| next_segment(NULL) | | next_segment(NULL) | |
| {} | | {} | |
| | | | |
|
| | | // signal end-of-input for concrete_filters | |
| | | void __TBB_EXPORTED_METHOD set_end_of_input(); | |
| | | | |
| public: | | public: | |
| //! True if filter is serial. | | //! True if filter is serial. | |
| bool is_serial() const { | | bool is_serial() const { | |
| return bool( my_filter_mode & filter_is_serial ); | | return bool( my_filter_mode & filter_is_serial ); | |
| } | | } | |
| | | | |
| //! True if filter must receive stream in order. | | //! True if filter must receive stream in order. | |
| bool is_ordered() const { | | bool is_ordered() const { | |
| return (my_filter_mode & (filter_is_out_of_order|filter_is_serial))
==filter_is_serial; | | return (my_filter_mode & (filter_is_out_of_order|filter_is_serial))
==filter_is_serial; | |
| } | | } | |
| | | | |
| //! True if filter is thread-bound. | | //! True if filter is thread-bound. | |
| bool is_bound() const { | | bool is_bound() const { | |
| return ( my_filter_mode & filter_is_bound )==filter_is_bound; | | return ( my_filter_mode & filter_is_bound )==filter_is_bound; | |
| } | | } | |
| | | | |
|
| | | //! true if an input filter can emit null | |
| | | bool object_may_be_null() { | |
| | | return ( my_filter_mode & filter_may_emit_null ) == filter_may_emit | |
| | | _null; | |
| | | } | |
| | | | |
| //! Operate on an item from the input stream, and return item for outpu
t stream. | | //! Operate on an item from the input stream, and return item for outpu
t stream. | |
| /** Returns NULL if filter is a sink. */ | | /** Returns NULL if filter is a sink. */ | |
| virtual void* operator()( void* item ) = 0; | | virtual void* operator()( void* item ) = 0; | |
| | | | |
| //! Destroy filter. | | //! Destroy filter. | |
| /** If the filter was added to a pipeline, the pipeline must be destroy
ed first. */ | | /** If the filter was added to a pipeline, the pipeline must be destroy
ed first. */ | |
| virtual __TBB_EXPORTED_METHOD ~filter(); | | virtual __TBB_EXPORTED_METHOD ~filter(); | |
| | | | |
| #if __TBB_TASK_GROUP_CONTEXT | | #if __TBB_TASK_GROUP_CONTEXT | |
| //! Destroys item if pipeline was cancelled. | | //! Destroys item if pipeline was cancelled. | |
| /** Required to prevent memory leaks. | | /** Required to prevent memory leaks. | |
| Note it can be called concurrently even for serial filters.*/ | | Note it can be called concurrently even for serial filters.*/ | |
| virtual void finalize( void* /*item*/ ) {}; | | virtual void finalize( void* /*item*/ ) {}; | |
| #endif | | #endif | |
| | | | |
| private: | | private: | |
| //! Pointer to next filter in the pipeline. | | //! Pointer to next filter in the pipeline. | |
| filter* next_filter_in_pipeline; | | filter* next_filter_in_pipeline; | |
| | | | |
|
| | | //! has the filter not yet processed all the tokens it will ever see? | |
| | | // (pipeline has not yet reached end_of_input or this filter has not y | |
| | | et | |
| | | // seen the last token produced by input_filter) | |
| | | bool has_more_work(); | |
| | | | |
| //! Buffer for incoming tokens, or NULL if not required. | | //! Buffer for incoming tokens, or NULL if not required. | |
| /** The buffer is required if the filter is serial or follows a thread-
bound one. */ | | /** The buffer is required if the filter is serial or follows a thread-
bound one. */ | |
| internal::input_buffer* my_input_buffer; | | internal::input_buffer* my_input_buffer; | |
| | | | |
| friend class internal::stage_task; | | friend class internal::stage_task; | |
| friend class internal::pipeline_root_task; | | friend class internal::pipeline_root_task; | |
| friend class pipeline; | | friend class pipeline; | |
| friend class thread_bound_filter; | | friend class thread_bound_filter; | |
| | | | |
| //! Storage for filter mode and dynamically checked implementation vers
ion. | | //! Storage for filter mode and dynamically checked implementation vers
ion. | |
| | | | |
| skipping to change at line 196 | | skipping to change at line 213 | |
| enum result_type { | | enum result_type { | |
| // item was processed | | // item was processed | |
| success, | | success, | |
| // item is currently not available | | // item is currently not available | |
| item_not_available, | | item_not_available, | |
| // there are no more items to process | | // there are no more items to process | |
| end_of_stream | | end_of_stream | |
| }; | | }; | |
| protected: | | protected: | |
| thread_bound_filter(mode filter_mode): | | thread_bound_filter(mode filter_mode): | |
|
| filter(static_cast<mode>(filter_mode | filter::filter_is_bound | f
ilter::exact_exception_propagation)) | | filter(static_cast<mode>(filter_mode | filter::filter_is_bound)) | |
| {} | | {} | |
| public: | | public: | |
| //! If a data item is available, invoke operator() on that item. | | //! If a data item is available, invoke operator() on that item. | |
| /** This interface is non-blocking. | | /** This interface is non-blocking. | |
| Returns 'success' if an item was processed. | | Returns 'success' if an item was processed. | |
| Returns 'item_not_available' if no item can be processed now | | Returns 'item_not_available' if no item can be processed now | |
| but more may arrive in the future, or if token limit is reached. | | but more may arrive in the future, or if token limit is reached. | |
| Returns 'end_of_stream' if there are no more items to process. */ | | Returns 'end_of_stream' if there are no more items to process. */ | |
| result_type __TBB_EXPORTED_METHOD try_process_item(); | | result_type __TBB_EXPORTED_METHOD try_process_item(); | |
| | | | |
| | | | |
| skipping to change at line 219 | | skipping to change at line 236 | |
| Returns 'success' if an item was processed. | | Returns 'success' if an item was processed. | |
| Returns 'end_of_stream' if there are no more items to process. | | Returns 'end_of_stream' if there are no more items to process. | |
| Never returns 'item_not_available', as it blocks until another retu
rn condition applies. */ | | Never returns 'item_not_available', as it blocks until another retu
rn condition applies. */ | |
| result_type __TBB_EXPORTED_METHOD process_item(); | | result_type __TBB_EXPORTED_METHOD process_item(); | |
| | | | |
| private: | | private: | |
| //! Internal routine for item processing | | //! Internal routine for item processing | |
| result_type internal_process_item(bool is_blocking); | | result_type internal_process_item(bool is_blocking); | |
| }; | | }; | |
| | | | |
|
| //! A processing pipeling that applies filters to items. | | //! A processing pipeline that applies filters to items. | |
| /** @ingroup algorithms */ | | /** @ingroup algorithms */ | |
| class pipeline { | | class pipeline { | |
| public: | | public: | |
| //! Construct empty pipeline. | | //! Construct empty pipeline. | |
| __TBB_EXPORTED_METHOD pipeline(); | | __TBB_EXPORTED_METHOD pipeline(); | |
| | | | |
| /** Though the current implementation declares the destructor virtual,
do not rely on this | | /** Though the current implementation declares the destructor virtual,
do not rely on this | |
| detail. The virtualness is deprecated and may disappear in future
versions of TBB. */ | | detail. The virtualness is deprecated and may disappear in future
versions of TBB. */ | |
| virtual __TBB_EXPORTED_METHOD ~pipeline(); | | virtual __TBB_EXPORTED_METHOD ~pipeline(); | |
| | | | |
| | | | |
| skipping to change at line 250 | | skipping to change at line 267 | |
| | | | |
| //! Remove all filters from the pipeline. | | //! Remove all filters from the pipeline. | |
| void __TBB_EXPORTED_METHOD clear(); | | void __TBB_EXPORTED_METHOD clear(); | |
| | | | |
| private: | | private: | |
| friend class internal::stage_task; | | friend class internal::stage_task; | |
| friend class internal::pipeline_root_task; | | friend class internal::pipeline_root_task; | |
| friend class filter; | | friend class filter; | |
| friend class thread_bound_filter; | | friend class thread_bound_filter; | |
| friend class internal::pipeline_cleaner; | | friend class internal::pipeline_cleaner; | |
|
| friend class tbb::interface5::internal::pipeline_proxy; | | friend class tbb::interface6::internal::pipeline_proxy; | |
| | | | |
| //! Pointer to first filter in the pipeline. | | //! Pointer to first filter in the pipeline. | |
| filter* filter_list; | | filter* filter_list; | |
| | | | |
| //! Pointer to location where address of next filter to be added should
be stored. | | //! Pointer to location where address of next filter to be added should
be stored. | |
| filter* filter_end; | | filter* filter_end; | |
| | | | |
| //! task who's reference count is used to determine when all stages are
done. | | //! task who's reference count is used to determine when all stages are
done. | |
| task* end_counter; | | task* end_counter; | |
| | | | |
| | | | |
| skipping to change at line 289 | | skipping to change at line 306 | |
| #if __TBB_TASK_GROUP_CONTEXT | | #if __TBB_TASK_GROUP_CONTEXT | |
| //! Does clean up if pipeline is cancelled or exception occured | | //! Does clean up if pipeline is cancelled or exception occured | |
| void clear_filters(); | | void clear_filters(); | |
| #endif | | #endif | |
| }; | | }; | |
| | | | |
| //------------------------------------------------------------------------ | | //------------------------------------------------------------------------ | |
| // Support for lambda-friendly parallel_pipeline interface | | // Support for lambda-friendly parallel_pipeline interface | |
| //------------------------------------------------------------------------ | | //------------------------------------------------------------------------ | |
| | | | |
|
| namespace interface5 { | | namespace interface6 { | |
| | | | |
| namespace internal { | | namespace internal { | |
| template<typename T, typename U, typename Body> class concrete_filter; | | template<typename T, typename U, typename Body> class concrete_filter; | |
| } | | } | |
| | | | |
|
| | | //! input_filter control to signal end-of-input for parallel_pipeline | |
| class flow_control { | | class flow_control { | |
| bool is_pipeline_stopped; | | bool is_pipeline_stopped; | |
| flow_control() { is_pipeline_stopped = false; } | | flow_control() { is_pipeline_stopped = false; } | |
| template<typename T, typename U, typename Body> friend class internal::
concrete_filter; | | template<typename T, typename U, typename Body> friend class internal::
concrete_filter; | |
| public: | | public: | |
| void stop() { is_pipeline_stopped = true; } | | void stop() { is_pipeline_stopped = true; } | |
| }; | | }; | |
| | | | |
| //! @cond INTERNAL | | //! @cond INTERNAL | |
| namespace internal { | | namespace internal { | |
| | | | |
|
| | | template<typename T> struct is_large_object { enum { r = sizeof(T) > sizeof | |
| | | (void *) }; }; | |
| | | | |
| | | template<typename T, bool> class token_helper; | |
| | | | |
| | | // large object helper (uses tbb_allocator) | |
| | | template<typename T> | |
| | | class token_helper<T, true> { | |
| | | public: | |
| | | typedef typename tbb::tbb_allocator<T> allocator; | |
| | | typedef T* pointer; | |
| | | typedef T value_type; | |
| | | static pointer create_token(const value_type & source) { | |
| | | pointer output_t = allocator().allocate(1); | |
| | | return new (output_t) T(source); | |
| | | } | |
| | | static value_type & token(pointer & t) { return *t;} | |
| | | static void * cast_to_void_ptr(pointer ref) { return (void *) ref; } | |
| | | static pointer cast_from_void_ptr(void * ref) { return (pointer)ref; } | |
| | | static void destroy_token(pointer token) { | |
| | | allocator().destroy(token); | |
| | | allocator().deallocate(token,1); | |
| | | } | |
| | | }; | |
| | | | |
| | | // pointer specialization | |
| | | template<typename T> | |
| | | class token_helper<T*, false > { | |
| | | public: | |
| | | typedef T* pointer; | |
| | | typedef T* value_type; | |
| | | static pointer create_token(const value_type & source) { return source; | |
| | | } | |
| | | static value_type & token(pointer & t) { return t;} | |
| | | static void * cast_to_void_ptr(pointer ref) { return (void *)ref; } | |
| | | static pointer cast_from_void_ptr(void * ref) { return (pointer)ref; } | |
| | | static void destroy_token( pointer /*token*/) {} | |
| | | }; | |
| | | | |
| | | // small object specialization (converts void* to the correct type, passes | |
| | | objects directly.) | |
| | | template<typename T> | |
| | | class token_helper<T, false> { | |
| | | typedef union { | |
| | | T actual_value; | |
| | | void * void_overlay; | |
| | | } type_to_void_ptr_map; | |
| | | public: | |
| | | typedef T pointer; // not really a pointer in this case. | |
| | | typedef T value_type; | |
| | | static pointer create_token(const value_type & source) { | |
| | | return source; } | |
| | | static value_type & token(pointer & t) { return t;} | |
| | | static void * cast_to_void_ptr(pointer ref) { | |
| | | type_to_void_ptr_map mymap; | |
| | | mymap.void_overlay = NULL; | |
| | | mymap.actual_value = ref; | |
| | | return mymap.void_overlay; | |
| | | } | |
| | | static pointer cast_from_void_ptr(void * ref) { | |
| | | type_to_void_ptr_map mymap; | |
| | | mymap.void_overlay = ref; | |
| | | return mymap.actual_value; | |
| | | } | |
| | | static void destroy_token( pointer /*token*/) {} | |
| | | }; | |
| | | | |
| template<typename T, typename U, typename Body> | | template<typename T, typename U, typename Body> | |
| class concrete_filter: public tbb::filter { | | class concrete_filter: public tbb::filter { | |
|
| Body my_body; | | const Body& my_body; | |
| | | typedef token_helper<T,is_large_object<T>::r > t_helper; | |
| | | typedef typename t_helper::pointer t_pointer; | |
| | | typedef token_helper<U,is_large_object<U>::r > u_helper; | |
| | | typedef typename u_helper::pointer u_pointer; | |
| | | | |
| /*override*/ void* operator()(void* input) { | | /*override*/ void* operator()(void* input) { | |
|
| T* temp_input = (T*)input; | | t_pointer temp_input = t_helper::cast_from_void_ptr(input); | |
| // Call user's operator()() here | | u_pointer output_u = u_helper::create_token(my_body(t_helper::token | |
| void* output = (void*) new U(my_body(*temp_input)); | | (temp_input))); | |
| delete temp_input; | | t_helper::destroy_token(temp_input); | |
| return output; | | return u_helper::cast_to_void_ptr(output_u); | |
| } | | } | |
| | | | |
| public: | | public: | |
| concrete_filter(tbb::filter::mode filter_mode, const Body& body) : filt
er(filter_mode), my_body(body) {} | | concrete_filter(tbb::filter::mode filter_mode, const Body& body) : filt
er(filter_mode), my_body(body) {} | |
| }; | | }; | |
| | | | |
|
| | | // input | |
| template<typename U, typename Body> | | template<typename U, typename Body> | |
| class concrete_filter<void,U,Body>: public filter { | | class concrete_filter<void,U,Body>: public filter { | |
|
| Body my_body; | | const Body& my_body; | |
| | | typedef token_helper<U, is_large_object<U>::r > u_helper; | |
| | | typedef typename u_helper::pointer u_pointer; | |
| | | | |
| /*override*/void* operator()(void*) { | | /*override*/void* operator()(void*) { | |
| flow_control control; | | flow_control control; | |
|
| U temp_output = my_body(control); | | u_pointer output_u = u_helper::create_token(my_body(control)); | |
| void* output = control.is_pipeline_stopped ? NULL : (void*) new U(t | | if(control.is_pipeline_stopped) { | |
| emp_output); | | u_helper::destroy_token(output_u); | |
| return output; | | set_end_of_input(); | |
| | | return NULL; | |
| | | } | |
| | | return u_helper::cast_to_void_ptr(output_u); | |
| } | | } | |
|
| | | | |
| public: | | public: | |
|
| concrete_filter(tbb::filter::mode filter_mode, const Body& body) : filt | | concrete_filter(tbb::filter::mode filter_mode, const Body& body) : | |
| er(filter_mode), my_body(body) {} | | filter(static_cast<tbb::filter::mode>(filter_mode | filter_may_emit | |
| | | _null)), | |
| | | my_body(body) | |
| | | {} | |
| }; | | }; | |
| | | | |
| template<typename T, typename Body> | | template<typename T, typename Body> | |
| class concrete_filter<T,void,Body>: public filter { | | class concrete_filter<T,void,Body>: public filter { | |
|
| Body my_body; | | const Body& my_body; | |
| | | typedef token_helper<T, is_large_object<T>::r > t_helper; | |
| | | typedef typename t_helper::pointer t_pointer; | |
| | | | |
| /*override*/ void* operator()(void* input) { | | /*override*/ void* operator()(void* input) { | |
|
| T* temp_input = (T*)input; | | t_pointer temp_input = t_helper::cast_from_void_ptr(input); | |
| my_body(*temp_input); | | my_body(t_helper::token(temp_input)); | |
| delete temp_input; | | t_helper::destroy_token(temp_input); | |
| return NULL; | | return NULL; | |
| } | | } | |
| public: | | public: | |
| concrete_filter(tbb::filter::mode filter_mode, const Body& body) : filt
er(filter_mode), my_body(body) {} | | concrete_filter(tbb::filter::mode filter_mode, const Body& body) : filt
er(filter_mode), my_body(body) {} | |
| }; | | }; | |
| | | | |
| template<typename Body> | | template<typename Body> | |
| class concrete_filter<void,void,Body>: public filter { | | class concrete_filter<void,void,Body>: public filter { | |
|
| Body my_body; | | const Body& my_body; | |
| | | | |
| /** Override privately because it is always called virtually */ | | /** Override privately because it is always called virtually */ | |
| /*override*/ void* operator()(void*) { | | /*override*/ void* operator()(void*) { | |
| flow_control control; | | flow_control control; | |
| my_body(control); | | my_body(control); | |
| void* output = control.is_pipeline_stopped ? NULL : (void*)(intptr_
t)-1; | | void* output = control.is_pipeline_stopped ? NULL : (void*)(intptr_
t)-1; | |
| return output; | | return output; | |
| } | | } | |
| public: | | public: | |
| concrete_filter(filter::mode filter_mode, const Body& body) : filter(fi
lter_mode), my_body(body) {} | | concrete_filter(filter::mode filter_mode, const Body& body) : filter(fi
lter_mode), my_body(body) {} | |
| | | | |
| skipping to change at line 412 | | skipping to change at line 510 | |
| #ifdef __TBB_TEST_FILTER_NODE_COUNT | | #ifdef __TBB_TEST_FILTER_NODE_COUNT | |
| --(__TBB_TEST_FILTER_NODE_COUNT); | | --(__TBB_TEST_FILTER_NODE_COUNT); | |
| #endif | | #endif | |
| } | | } | |
| }; | | }; | |
| | | | |
| //! Node in parse tree representing result of make_filter. | | //! Node in parse tree representing result of make_filter. | |
| template<typename T, typename U, typename Body> | | template<typename T, typename U, typename Body> | |
| class filter_node_leaf: public filter_node { | | class filter_node_leaf: public filter_node { | |
| const tbb::filter::mode mode; | | const tbb::filter::mode mode; | |
|
| const Body& body; | | const Body body; | |
| /*override*/void add_to( pipeline& p ) { | | /*override*/void add_to( pipeline& p ) { | |
| concrete_filter<T,U,Body>* f = new concrete_filter<T,U,Body>(mode,b
ody); | | concrete_filter<T,U,Body>* f = new concrete_filter<T,U,Body>(mode,b
ody); | |
| p.add_filter( *f ); | | p.add_filter( *f ); | |
| } | | } | |
| public: | | public: | |
| filter_node_leaf( tbb::filter::mode m, const Body& b ) : mode(m), body(
b) {} | | filter_node_leaf( tbb::filter::mode m, const Body& b ) : mode(m), body(
b) {} | |
| }; | | }; | |
| | | | |
| //! Node in parse tree representing join of two filters. | | //! Node in parse tree representing join of two filters. | |
| class filter_node_join: public filter_node { | | class filter_node_join: public filter_node { | |
| | | | |
| skipping to change at line 444 | | skipping to change at line 542 | |
| public: | | public: | |
| filter_node_join( filter_node& x, filter_node& y ) : left(x), right(y)
{ | | filter_node_join( filter_node& x, filter_node& y ) : left(x), right(y)
{ | |
| left.add_ref(); | | left.add_ref(); | |
| right.add_ref(); | | right.add_ref(); | |
| } | | } | |
| }; | | }; | |
| | | | |
| } // namespace internal | | } // namespace internal | |
| //! @endcond | | //! @endcond | |
| | | | |
|
| | | //! Create a filter to participate in parallel_pipeline | |
| template<typename T, typename U, typename Body> | | template<typename T, typename U, typename Body> | |
| filter_t<T,U> make_filter(tbb::filter::mode mode, const Body& body) { | | filter_t<T,U> make_filter(tbb::filter::mode mode, const Body& body) { | |
| return new internal::filter_node_leaf<T,U,Body>(mode, body); | | return new internal::filter_node_leaf<T,U,Body>(mode, body); | |
| } | | } | |
| | | | |
| template<typename T, typename V, typename U> | | template<typename T, typename V, typename U> | |
| filter_t<T,U> operator& (const filter_t<T,V>& left, const filter_t<V,U>& ri
ght) { | | filter_t<T,U> operator& (const filter_t<T,V>& left, const filter_t<V,U>& ri
ght) { | |
| __TBB_ASSERT(left.root,"cannot use default-constructed filter_t as left
argument of '&'"); | | __TBB_ASSERT(left.root,"cannot use default-constructed filter_t as left
argument of '&'"); | |
| __TBB_ASSERT(right.root,"cannot use default-constructed filter_t as rig
ht argument of '&'"); | | __TBB_ASSERT(right.root,"cannot use default-constructed filter_t as rig
ht argument of '&'"); | |
| return new internal::filter_node_join(*left.root,*right.root); | | return new internal::filter_node_join(*left.root,*right.root); | |
| | | | |
| skipping to change at line 527 | | skipping to change at line 626 | |
| ); | | ); | |
| } | | } | |
| | | | |
| #if __TBB_TASK_GROUP_CONTEXT | | #if __TBB_TASK_GROUP_CONTEXT | |
| inline void parallel_pipeline(size_t max_number_of_live_tokens, const filte
r_t<void,void>& filter_chain) { | | inline void parallel_pipeline(size_t max_number_of_live_tokens, const filte
r_t<void,void>& filter_chain) { | |
| tbb::task_group_context context; | | tbb::task_group_context context; | |
| parallel_pipeline(max_number_of_live_tokens, filter_chain, context); | | parallel_pipeline(max_number_of_live_tokens, filter_chain, context); | |
| } | | } | |
| #endif // __TBB_TASK_GROUP_CONTEXT | | #endif // __TBB_TASK_GROUP_CONTEXT | |
| | | | |
|
| } // interface5 | | } // interface6 | |
| | | | |
|
| using interface5::flow_control; | | using interface6::flow_control; | |
| using interface5::filter_t; | | using interface6::filter_t; | |
| using interface5::make_filter; | | using interface6::make_filter; | |
| using interface5::parallel_pipeline; | | using interface6::parallel_pipeline; | |
| | | | |
| } // tbb | | } // tbb | |
| | | | |
| #endif /* __TBB_pipeline_H */ | | #endif /* __TBB_pipeline_H */ | |
| | | | |
End of changes. 28 change blocks. |
| 31 lines changed or deleted | | 135 lines changed or added | |
|
| task.h | | task.h | |
| /* | | /* | |
|
| Copyright 2005-2010 Intel Corporation. All Rights Reserved. | | Copyright 2005-2011 Intel Corporation. All Rights Reserved. | |
| | | | |
| This file is part of Threading Building Blocks. | | This file is part of Threading Building Blocks. | |
| | | | |
| Threading Building Blocks is free software; you can redistribute it | | Threading Building Blocks is free software; you can redistribute it | |
| and/or modify it under the terms of the GNU General Public License | | and/or modify it under the terms of the GNU General Public License | |
| version 2 as published by the Free Software Foundation. | | version 2 as published by the Free Software Foundation. | |
| | | | |
| Threading Building Blocks is distributed in the hope that it will be | | Threading Building Blocks is distributed in the hope that it will be | |
| useful, but WITHOUT ANY WARRANTY; without even the implied warranty | | useful, but WITHOUT ANY WARRANTY; without even the implied warranty | |
| of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| | | | |
| skipping to change at line 34 | | skipping to change at line 34 | |
| the GNU General Public License. This exception does not however | | the GNU General Public License. This exception does not however | |
| invalidate any other reasons why the executable file might be covered b
y | | invalidate any other reasons why the executable file might be covered b
y | |
| the GNU General Public License. | | the GNU General Public License. | |
| */ | | */ | |
| | | | |
| #ifndef __TBB_task_H | | #ifndef __TBB_task_H | |
| #define __TBB_task_H | | #define __TBB_task_H | |
| | | | |
| #include "tbb_stddef.h" | | #include "tbb_stddef.h" | |
| #include "tbb_machine.h" | | #include "tbb_machine.h" | |
|
| | | #include <climits> | |
| | | | |
| typedef struct ___itt_caller *__itt_caller; | | typedef struct ___itt_caller *__itt_caller; | |
| | | | |
| namespace tbb { | | namespace tbb { | |
| | | | |
| class task; | | class task; | |
| class task_list; | | class task_list; | |
| | | | |
| #if __TBB_TASK_GROUP_CONTEXT | | #if __TBB_TASK_GROUP_CONTEXT | |
| class task_group_context; | | class task_group_context; | |
| | | | |
| skipping to change at line 120 | | skipping to change at line 121 | |
| | | | |
| //! For internal use only | | //! For internal use only | |
| virtual void wait_for_all( task& parent, task* child ) = 0; | | virtual void wait_for_all( task& parent, task* child ) = 0; | |
| | | | |
| //! For internal use only | | //! For internal use only | |
| virtual void spawn_root_and_wait( task& first, task*& next ) = 0; | | virtual void spawn_root_and_wait( task& first, task*& next ) = 0; | |
| | | | |
| //! Pure virtual destructor; | | //! Pure virtual destructor; | |
| // Have to have it just to shut up overzealous compilation warning
s | | // Have to have it just to shut up overzealous compilation warning
s | |
| virtual ~scheduler() = 0; | | virtual ~scheduler() = 0; | |
|
| #if __TBB_ARENA_PER_MASTER | | | |
| | | | |
| //! For internal use only | | //! For internal use only | |
| virtual void enqueue( task& t, void* reserved ) = 0; | | virtual void enqueue( task& t, void* reserved ) = 0; | |
|
| #endif /* __TBB_ARENA_PER_MASTER */ | | | |
| }; | | }; | |
| | | | |
| //! A reference count | | //! A reference count | |
| /** Should always be non-negative. A signed type is used so that under
flow can be detected. */ | | /** Should always be non-negative. A signed type is used so that under
flow can be detected. */ | |
| typedef intptr_t reference_count; | | typedef intptr_t reference_count; | |
| | | | |
| //! An id as used for specifying affinity. | | //! An id as used for specifying affinity. | |
| typedef unsigned short affinity_id; | | typedef unsigned short affinity_id; | |
| | | | |
| #if __TBB_TASK_GROUP_CONTEXT | | #if __TBB_TASK_GROUP_CONTEXT | |
|
| | | class generic_scheduler; | |
| | | | |
| struct context_list_node_t { | | struct context_list_node_t { | |
| context_list_node_t *my_prev, | | context_list_node_t *my_prev, | |
| *my_next; | | *my_next; | |
| }; | | }; | |
| | | | |
| class allocate_root_with_context_proxy: no_assign { | | class allocate_root_with_context_proxy: no_assign { | |
| task_group_context& my_context; | | task_group_context& my_context; | |
| public: | | public: | |
| allocate_root_with_context_proxy ( task_group_context& ctx ) : my_c
ontext(ctx) {} | | allocate_root_with_context_proxy ( task_group_context& ctx ) : my_c
ontext(ctx) {} | |
| task& __TBB_EXPORTED_METHOD allocate( size_t size ) const; | | task& __TBB_EXPORTED_METHOD allocate( size_t size ) const; | |
| | | | |
| skipping to change at line 172 | | skipping to change at line 173 | |
| public: | | public: | |
| task& __TBB_EXPORTED_METHOD allocate( size_t size ) const; | | task& __TBB_EXPORTED_METHOD allocate( size_t size ) const; | |
| void __TBB_EXPORTED_METHOD free( task& ) const; | | void __TBB_EXPORTED_METHOD free( task& ) const; | |
| }; | | }; | |
| | | | |
| //! Memory prefix to a task object. | | //! Memory prefix to a task object. | |
| /** This class is internal to the library. | | /** This class is internal to the library. | |
| Do not reference it directly, except within the library itself. | | Do not reference it directly, except within the library itself. | |
| Fields are ordered in way that preserves backwards compatibility an
d yields | | Fields are ordered in way that preserves backwards compatibility an
d yields | |
| good packing on typical 32-bit and 64-bit platforms. | | good packing on typical 32-bit and 64-bit platforms. | |
|
| | | | |
| | | In case task prefix size exceeds 32 or 64 bytes on IA32 and Intel64 | |
| | | architectures correspondingly, consider dynamic setting of task_ali | |
| | | gnment | |
| | | and task_prefix_reservation_size based on the maximal operand size | |
| | | supported | |
| | | by the current CPU. | |
| | | | |
| @ingroup task_scheduling */ | | @ingroup task_scheduling */ | |
| class task_prefix { | | class task_prefix { | |
| private: | | private: | |
| friend class tbb::task; | | friend class tbb::task; | |
| friend class tbb::interface5::internal::task_base; | | friend class tbb::interface5::internal::task_base; | |
| friend class tbb::task_list; | | friend class tbb::task_list; | |
| friend class internal::scheduler; | | friend class internal::scheduler; | |
| friend class internal::allocate_root_proxy; | | friend class internal::allocate_root_proxy; | |
| friend class internal::allocate_child_proxy; | | friend class internal::allocate_child_proxy; | |
| friend class internal::allocate_continuation_proxy; | | friend class internal::allocate_continuation_proxy; | |
| | | | |
| skipping to change at line 200 | | skipping to change at line 207 | |
| #endif /* __TBB_TASK_GROUP_CONTEXT */ | | #endif /* __TBB_TASK_GROUP_CONTEXT */ | |
| | | | |
| //! The scheduler that allocated the task, or NULL if the task is b
ig. | | //! The scheduler that allocated the task, or NULL if the task is b
ig. | |
| /** Small tasks are pooled by the scheduler that allocated the task
. | | /** Small tasks are pooled by the scheduler that allocated the task
. | |
| If a scheduler needs to free a small task allocated by another
scheduler, | | If a scheduler needs to free a small task allocated by another
scheduler, | |
| it returns the task to that other scheduler. This policy avoid
s | | it returns the task to that other scheduler. This policy avoid
s | |
| memory space blowup issues for memory allocators that allocate
from | | memory space blowup issues for memory allocators that allocate
from | |
| thread-specific pools. */ | | thread-specific pools. */ | |
| scheduler* origin; | | scheduler* origin; | |
| | | | |
|
| //! The scheduler that owns the task. | | #if __TBB_TASK_PRIORITY | |
| | | union { | |
| | | #endif /* __TBB_TASK_PRIORITY */ | |
| | | //! Obsolete. The scheduler that owns the task. | |
| | | /** Retained only for the sake of backward binary compatibility. | |
| | | Still used by inline methods in the task.h header. **/ | |
| scheduler* owner; | | scheduler* owner; | |
| | | | |
|
| | | #if __TBB_TASK_PRIORITY | |
| | | //! Pointer to the next offloaded lower priority task. | |
| | | /** Used to maintain a list of offloaded tasks inside the scheduler | |
| | | . **/ | |
| | | task* next_offloaded; | |
| | | }; | |
| | | #endif /* __TBB_TASK_PRIORITY */ | |
| | | | |
| //! The task whose reference count includes me. | | //! The task whose reference count includes me. | |
| /** In the "blocking style" of programming, this field points to th
e parent task. | | /** In the "blocking style" of programming, this field points to th
e parent task. | |
| In the "continuation-passing style" of programming, this field
points to the | | In the "continuation-passing style" of programming, this field
points to the | |
| continuation of the parent. */ | | continuation of the parent. */ | |
| tbb::task* parent; | | tbb::task* parent; | |
| | | | |
| //! Reference count used for synchronization. | | //! Reference count used for synchronization. | |
| /** In the "continuation-passing style" of programming, this field
is | | /** In the "continuation-passing style" of programming, this field
is | |
| the difference of the number of allocated children minus the | | the difference of the number of allocated children minus the | |
| number of children that have completed. | | number of children that have completed. | |
| In the "blocking style" of programming, this field is one more
than the difference. */ | | In the "blocking style" of programming, this field is one more
than the difference. */ | |
|
| reference_count ref_count; | | __TBB_atomic reference_count ref_count; | |
| | | | |
| //! Obsolete. Used to be scheduling depth before TBB 2.2 | | //! Obsolete. Used to be scheduling depth before TBB 2.2 | |
|
| /** Retained only for the sake of backward binary compatibility. ** | | /** Retained only for the sake of backward binary compatibility. | |
| / | | Not used by TBB anymore. **/ | |
| int depth; | | int depth; | |
| | | | |
| //! A task::state_type, stored as a byte for compactness. | | //! A task::state_type, stored as a byte for compactness. | |
| /** This state is exposed to users via method task::state(). */ | | /** This state is exposed to users via method task::state(). */ | |
| unsigned char state; | | unsigned char state; | |
| | | | |
| //! Miscellaneous state that is not directly visible to users, stor
ed as a byte for compactness. | | //! Miscellaneous state that is not directly visible to users, stor
ed as a byte for compactness. | |
| /** 0x0 -> version 1.0 task | | /** 0x0 -> version 1.0 task | |
| 0x1 -> version >=2.1 task | | 0x1 -> version >=2.1 task | |
| 0x20 -> task_proxy | | 0x20 -> task_proxy | |
| | | | |
| skipping to change at line 246 | | skipping to change at line 266 | |
| | | | |
| //! The task corresponding to this task_prefix. | | //! The task corresponding to this task_prefix. | |
| tbb::task& task() {return *reinterpret_cast<tbb::task*>(this+1);} | | tbb::task& task() {return *reinterpret_cast<tbb::task*>(this+1);} | |
| }; | | }; | |
| | | | |
| } // namespace internal | | } // namespace internal | |
| //! @endcond | | //! @endcond | |
| | | | |
| #if __TBB_TASK_GROUP_CONTEXT | | #if __TBB_TASK_GROUP_CONTEXT | |
| | | | |
|
| | | #if __TBB_TASK_PRIORITY | |
| | | namespace internal { | |
| | | static const int priority_stride_v4 = INT_MAX / 4; | |
| | | } | |
| | | | |
| | | enum priority_t { | |
| | | priority_normal = internal::priority_stride_v4 * 2, | |
| | | priority_low = priority_normal - internal::priority_stride_v4, | |
| | | priority_high = priority_normal + internal::priority_stride_v4 | |
| | | }; | |
| | | | |
| | | #endif /* __TBB_TASK_PRIORITY */ | |
| | | | |
| #if TBB_USE_CAPTURED_EXCEPTION | | #if TBB_USE_CAPTURED_EXCEPTION | |
| class tbb_exception; | | class tbb_exception; | |
| #else | | #else | |
| namespace internal { | | namespace internal { | |
| class tbb_exception_ptr; | | class tbb_exception_ptr; | |
| } | | } | |
| #endif /* !TBB_USE_CAPTURED_EXCEPTION */ | | #endif /* !TBB_USE_CAPTURED_EXCEPTION */ | |
| | | | |
|
| | | class task_scheduler_init; | |
| | | | |
| //! Used to form groups of tasks | | //! Used to form groups of tasks | |
| /** @ingroup task_scheduling | | /** @ingroup task_scheduling | |
| The context services explicit cancellation requests from user code, and
unhandled | | The context services explicit cancellation requests from user code, and
unhandled | |
| exceptions intercepted during tasks execution. Intercepting an exceptio
n results | | exceptions intercepted during tasks execution. Intercepting an exceptio
n results | |
| in generating internal cancellation requests (which is processed in exa
ctly the | | in generating internal cancellation requests (which is processed in exa
ctly the | |
| same way as external ones). | | same way as external ones). | |
| | | | |
| The context is associated with one or more root tasks and defines the c
ancellation | | The context is associated with one or more root tasks and defines the c
ancellation | |
| group that includes all the descendants of the corresponding root task(
s). Association | | group that includes all the descendants of the corresponding root task(
s). Association | |
| is established when a context object is passed as an argument to the ta
sk::allocate_root() | | is established when a context object is passed as an argument to the ta
sk::allocate_root() | |
| | | | |
| skipping to change at line 277 | | skipping to change at line 312 | |
| forming a tree-like structure: parent -> this -> children. Arrows here
designate | | forming a tree-like structure: parent -> this -> children. Arrows here
designate | |
| cancellation propagation direction. If a task in a cancellation group i
s canceled | | cancellation propagation direction. If a task in a cancellation group i
s canceled | |
| all the other tasks in this group and groups bound to it (as children)
get canceled too. | | all the other tasks in this group and groups bound to it (as children)
get canceled too. | |
| | | | |
| IMPLEMENTATION NOTE: | | IMPLEMENTATION NOTE: | |
| When adding new members to task_group_context or changing types of exis
ting ones, | | When adding new members to task_group_context or changing types of exis
ting ones, | |
| update the size of both padding buffers (_leading_padding and _trailing
_padding) | | update the size of both padding buffers (_leading_padding and _trailing
_padding) | |
| appropriately. See also VERSIONING NOTE at the constructor definition b
elow. **/ | | appropriately. See also VERSIONING NOTE at the constructor definition b
elow. **/ | |
| class task_group_context : internal::no_copy { | | class task_group_context : internal::no_copy { | |
| private: | | private: | |
|
| | | friend class internal::generic_scheduler; | |
| | | friend class task_scheduler_init; | |
| | | | |
| #if TBB_USE_CAPTURED_EXCEPTION | | #if TBB_USE_CAPTURED_EXCEPTION | |
| typedef tbb_exception exception_container_type; | | typedef tbb_exception exception_container_type; | |
| #else | | #else | |
| typedef internal::tbb_exception_ptr exception_container_type; | | typedef internal::tbb_exception_ptr exception_container_type; | |
| #endif | | #endif | |
| | | | |
| enum version_traits_word_layout { | | enum version_traits_word_layout { | |
| traits_offset = 16, | | traits_offset = 16, | |
| version_mask = 0xFFFF, | | version_mask = 0xFFFF, | |
| traits_mask = 0xFFFFul << traits_offset | | traits_mask = 0xFFFFul << traits_offset | |
| | | | |
| skipping to change at line 306 | | skipping to change at line 344 | |
| exact_exception = 0x0001ul << traits_offset, | | exact_exception = 0x0001ul << traits_offset, | |
| concurrent_wait = 0x0004ul << traits_offset, | | concurrent_wait = 0x0004ul << traits_offset, | |
| #if TBB_USE_CAPTURED_EXCEPTION | | #if TBB_USE_CAPTURED_EXCEPTION | |
| default_traits = 0 | | default_traits = 0 | |
| #else | | #else | |
| default_traits = exact_exception | | default_traits = exact_exception | |
| #endif /* !TBB_USE_CAPTURED_EXCEPTION */ | | #endif /* !TBB_USE_CAPTURED_EXCEPTION */ | |
| }; | | }; | |
| | | | |
| private: | | private: | |
|
| | | enum state { | |
| | | may_have_children = 1 | |
| | | }; | |
| | | | |
| union { | | union { | |
| //! Flavor of this context: bound or isolated. | | //! Flavor of this context: bound or isolated. | |
| kind_type my_kind; | | kind_type my_kind; | |
| uintptr_t _my_kind_aligner; | | uintptr_t _my_kind_aligner; | |
| }; | | }; | |
| | | | |
| //! Pointer to the context of the parent cancellation group. NULL for i
solated contexts. | | //! Pointer to the context of the parent cancellation group. NULL for i
solated contexts. | |
| task_group_context *my_parent; | | task_group_context *my_parent; | |
| | | | |
| //! Used to form the thread specific list of contexts without additiona
l memory allocation. | | //! Used to form the thread specific list of contexts without additiona
l memory allocation. | |
| | | | |
| skipping to change at line 327 | | skipping to change at line 369 | |
| its parent happens. Any context can be present in the list of one t
hread only. **/ | | its parent happens. Any context can be present in the list of one t
hread only. **/ | |
| internal::context_list_node_t my_node; | | internal::context_list_node_t my_node; | |
| | | | |
| //! Used to set and maintain stack stitching point for Intel Performanc
e Tools. | | //! Used to set and maintain stack stitching point for Intel Performanc
e Tools. | |
| __itt_caller itt_caller; | | __itt_caller itt_caller; | |
| | | | |
| //! Leading padding protecting accesses to frequently used members from
false sharing. | | //! Leading padding protecting accesses to frequently used members from
false sharing. | |
| /** Read accesses to the field my_cancellation_requested are on the hot
path inside | | /** Read accesses to the field my_cancellation_requested are on the hot
path inside | |
| the scheduler. This padding ensures that this field never shares th
e same cache | | the scheduler. This padding ensures that this field never shares th
e same cache | |
| line with a local variable that is frequently written to. **/ | | line with a local variable that is frequently written to. **/ | |
|
| char _leading_padding[internal::NFS_MaxLineSize - | | char _leading_padding[internal::NFS_MaxLineSize | |
| 2 * sizeof(uintptr_t)- sizeof(void*) - sizeof(internal: | | - 2 * sizeof(uintptr_t)- sizeof(void*) - sizeof(i | |
| :context_list_node_t) | | nternal::context_list_node_t) | |
| - sizeof(__itt_caller)]; | | - sizeof(__itt_caller)]; | |
| | | | |
| //! Specifies whether cancellation was request for this task group. | | //! Specifies whether cancellation was request for this task group. | |
| uintptr_t my_cancellation_requested; | | uintptr_t my_cancellation_requested; | |
| | | | |
| //! Version for run-time checks and behavioral traits of the context. | | //! Version for run-time checks and behavioral traits of the context. | |
| /** Version occupies low 16 bits, and traits (zero or more ORed enumera
tors | | /** Version occupies low 16 bits, and traits (zero or more ORed enumera
tors | |
| from the traits_type enumerations) take the next 16 bits. | | from the traits_type enumerations) take the next 16 bits. | |
| Original (zeroth) version of the context did not support any traits
. **/ | | Original (zeroth) version of the context did not support any traits
. **/ | |
| uintptr_t my_version_and_traits; | | uintptr_t my_version_and_traits; | |
| | | | |
| //! Pointer to the container storing exception being propagated across
this task group. | | //! Pointer to the container storing exception being propagated across
this task group. | |
| exception_container_type *my_exception; | | exception_container_type *my_exception; | |
| | | | |
|
| //! Scheduler that registered this context in its thread specific list. | | //! Scheduler instance that registered this context in its thread speci | |
| /** This field is not terribly necessary, but it allows to get a small | | fic list. | |
| performance | | internal::generic_scheduler *my_owner; | |
| benefit by getting us rid of using thread local storage. We do not | | | |
| care | | //! Internal state (combination of state flags). | |
| about extra memory it takes since this data structure is excessivel | | uintptr_t my_state; | |
| y padded anyway. **/ | | | |
| void *my_owner; | | #if __TBB_TASK_PRIORITY | |
| | | //! Priority level of the task group (in normalized representation) | |
| | | intptr_t my_priority; | |
| | | #endif /* __TBB_TASK_PRIORITY */ | |
| | | | |
| //! Trailing padding protecting accesses to frequently used members fro
m false sharing | | //! Trailing padding protecting accesses to frequently used members fro
m false sharing | |
| /** \sa _leading_padding **/ | | /** \sa _leading_padding **/ | |
|
| char _trailing_padding[internal::NFS_MaxLineSize - sizeof(intptr_t) - 2 | | char _trailing_padding[internal::NFS_MaxLineSize - 2 * sizeof(uintptr_t | |
| * sizeof(void*)]; | | ) - 2 * sizeof(void*) | |
| | | #if __TBB_TASK_PRIORITY | |
| | | - sizeof(intptr_t) | |
| | | #endif /* __TBB_TASK_PRIORITY */ | |
| | | ]; | |
| | | | |
| public: | | public: | |
| //! Default & binding constructor. | | //! Default & binding constructor. | |
| /** By default a bound context is created. That is this context will be
bound | | /** By default a bound context is created. That is this context will be
bound | |
| (as child) to the context of the task calling task::allocate_root(t
his_context) | | (as child) to the context of the task calling task::allocate_root(t
his_context) | |
| method. Cancellation requests passed to the parent context are prop
agated | | method. Cancellation requests passed to the parent context are prop
agated | |
|
| to all the contexts bound to it. | | to all the contexts bound to it. Similarly priority change is propa | |
| | | gated | |
| | | from the parent context to its children. | |
| | | | |
| If task_group_context::isolated is used as the argument, then the t
asks associated | | If task_group_context::isolated is used as the argument, then the t
asks associated | |
| with this context will never be affected by events in any other con
text. | | with this context will never be affected by events in any other con
text. | |
| | | | |
| Creating isolated contexts involve much less overhead, but they hav
e limited | | Creating isolated contexts involve much less overhead, but they hav
e limited | |
| utility. Normally when an exception occurs in an algorithm that has
nested | | utility. Normally when an exception occurs in an algorithm that has
nested | |
| ones running, it is desirably to have all the nested algorithms can
celed | | ones running, it is desirably to have all the nested algorithms can
celed | |
| as well. Such a behavior requires nested algorithms to use bound co
ntexts. | | as well. Such a behavior requires nested algorithms to use bound co
ntexts. | |
| | | | |
| There is one good place where using isolated algorithms is benefici
al. It is | | There is one good place where using isolated algorithms is benefici
al. It is | |
| a master thread. That is if a particular algorithm is invoked direc
tly from | | a master thread. That is if a particular algorithm is invoked direc
tly from | |
| the master thread (not from a TBB task), supplying it with explicit
ly | | the master thread (not from a TBB task), supplying it with explicit
ly | |
| created isolated context will result in a faster algorithm startup. | | created isolated context will result in a faster algorithm startup. | |
| | | | |
| VERSIONING NOTE: | | VERSIONING NOTE: | |
| Implementation(s) of task_group_context constructor(s) cannot be ma
de | | Implementation(s) of task_group_context constructor(s) cannot be ma
de | |
| entirely out-of-line because the run-time version must be set by th
e user | | entirely out-of-line because the run-time version must be set by th
e user | |
| code. This will become critically important for binary compatibilit
y, if | | code. This will become critically important for binary compatibilit
y, if | |
| we ever have to change the size of the context object. | | we ever have to change the size of the context object. | |
| | | | |
|
| Boosting the runtime version will also be necessary whenever new fi | | Boosting the runtime version will also be necessary if new data fie | |
| elds | | lds are | |
| are introduced in the currently unused padding areas or the meaning | | introduced in the currently unused padding areas and these fields a | |
| of | | re updated | |
| the existing fields is changed or extended. **/ | | by inline methods. **/ | |
| task_group_context ( kind_type relation_with_parent = bound, | | task_group_context ( kind_type relation_with_parent = bound, | |
| uintptr_t traits = default_traits ) | | uintptr_t traits = default_traits ) | |
| : my_kind(relation_with_parent) | | : my_kind(relation_with_parent) | |
| , my_version_and_traits(1 | traits) | | , my_version_and_traits(1 | traits) | |
| { | | { | |
| init(); | | init(); | |
| } | | } | |
| | | | |
| __TBB_EXPORTED_METHOD ~task_group_context (); | | __TBB_EXPORTED_METHOD ~task_group_context (); | |
| | | | |
| | | | |
| skipping to change at line 424 | | skipping to change at line 476 | |
| | | | |
| //! Records the pending exception, and cancels the task group. | | //! Records the pending exception, and cancels the task group. | |
| /** May be called only from inside a catch-block. If the context is alr
eady | | /** May be called only from inside a catch-block. If the context is alr
eady | |
| canceled, does nothing. | | canceled, does nothing. | |
| The method brings the task group associated with this context exact
ly into | | The method brings the task group associated with this context exact
ly into | |
| the state it would be in, if one of its tasks threw the currently p
ending | | the state it would be in, if one of its tasks threw the currently p
ending | |
| exception during its execution. In other words, it emulates the act
ions | | exception during its execution. In other words, it emulates the act
ions | |
| of the scheduler's dispatch loop exception handler. **/ | | of the scheduler's dispatch loop exception handler. **/ | |
| void __TBB_EXPORTED_METHOD register_pending_exception (); | | void __TBB_EXPORTED_METHOD register_pending_exception (); | |
| | | | |
|
| | | #if __TBB_TASK_PRIORITY | |
| | | //! Changes priority of the task grop | |
| | | void set_priority ( priority_t ); | |
| | | | |
| | | //! Retrieves current priority of the current task group | |
| | | priority_t priority () const; | |
| | | #endif /* __TBB_TASK_PRIORITY */ | |
| | | | |
| protected: | | protected: | |
| //! Out-of-line part of the constructor. | | //! Out-of-line part of the constructor. | |
| /** Singled out to ensure backward binary compatibility of the future v
ersions. **/ | | /** Singled out to ensure backward binary compatibility of the future v
ersions. **/ | |
| void __TBB_EXPORTED_METHOD init (); | | void __TBB_EXPORTED_METHOD init (); | |
| | | | |
| private: | | private: | |
| friend class task; | | friend class task; | |
| friend class internal::allocate_root_with_context_proxy; | | friend class internal::allocate_root_with_context_proxy; | |
| | | | |
| static const kind_type binding_required = bound; | | static const kind_type binding_required = bound; | |
| static const kind_type binding_completed = kind_type(bound+1); | | static const kind_type binding_completed = kind_type(bound+1); | |
| static const kind_type detached = kind_type(binding_completed+1); | | static const kind_type detached = kind_type(binding_completed+1); | |
| static const kind_type dying = kind_type(detached+1); | | static const kind_type dying = kind_type(detached+1); | |
| | | | |
|
| //! Checks if any of the ancestors has a cancellation request outstandi | | //! Propagates state change (if any) from an ancestor | |
| ng, | | /** Checks if one of this object's ancestors is in a new state, and pro | |
| //! and propagates it back to descendants. | | pagates | |
| void propagate_cancellation_from_ancestors (); | | the new state to all its descendants in this object's heritage line | |
| | | . **/ | |
| //! For debugging purposes only. | | template <typename T> | |
| bool is_alive () { | | void propagate_state_from_ancestors ( T task_group_context::*mptr_state | |
| #if TBB_USE_DEBUG | | , T new_state ); | |
| return my_version_and_traits != 0xDeadBeef; | | | |
| #else | | //! Makes sure that the context is registered with a scheduler instance | |
| return true; | | . | |
| #endif /* TBB_USE_DEBUG */ | | inline void finish_initialization ( internal::generic_scheduler *local_ | |
| } | | sched ); | |
| | | | |
| | | //! Registers this context with the local scheduler and binds it to its | |
| | | parent context | |
| | | void bind_to ( internal::generic_scheduler *local_sched ); | |
| | | | |
| | | //! Registers this context with the local scheduler | |
| | | void register_with ( internal::generic_scheduler *local_sched ); | |
| | | | |
| }; // class task_group_context | | }; // class task_group_context | |
| | | | |
| #endif /* __TBB_TASK_GROUP_CONTEXT */ | | #endif /* __TBB_TASK_GROUP_CONTEXT */ | |
| | | | |
| //! Base class for user-defined tasks. | | //! Base class for user-defined tasks. | |
| /** @ingroup task_scheduling */ | | /** @ingroup task_scheduling */ | |
| class task: __TBB_TASK_BASE_ACCESS interface5::internal::task_base { | | class task: __TBB_TASK_BASE_ACCESS interface5::internal::task_base { | |
| | | | |
| //! Set reference count | | //! Set reference count | |
| void __TBB_EXPORTED_METHOD internal_set_ref_count( int count ); | | void __TBB_EXPORTED_METHOD internal_set_ref_count( int count ); | |
| | | | |
| skipping to change at line 551 | | skipping to change at line 614 | |
| cannot be made, use method recycle_as_safe_continuation instead. | | cannot be made, use method recycle_as_safe_continuation instead. | |
| | | | |
| Because of the hazard, this method may be deprecated in the future.
*/ | | Because of the hazard, this method may be deprecated in the future.
*/ | |
| void recycle_as_continuation() { | | void recycle_as_continuation() { | |
| __TBB_ASSERT( prefix().state==executing, "execute not running?" ); | | __TBB_ASSERT( prefix().state==executing, "execute not running?" ); | |
| prefix().state = allocated; | | prefix().state = allocated; | |
| } | | } | |
| | | | |
| //! Recommended to use, safe variant of recycle_as_continuation | | //! Recommended to use, safe variant of recycle_as_continuation | |
| /** For safety, it requires additional increment of ref_count. | | /** For safety, it requires additional increment of ref_count. | |
|
| With no decendants and ref_count of 1, it has the semantics of recy
cle_to_reexecute. */ | | With no descendants and ref_count of 1, it has the semantics of rec
ycle_to_reexecute. */ | |
| void recycle_as_safe_continuation() { | | void recycle_as_safe_continuation() { | |
| __TBB_ASSERT( prefix().state==executing, "execute not running?" ); | | __TBB_ASSERT( prefix().state==executing, "execute not running?" ); | |
| prefix().state = recycle; | | prefix().state = recycle; | |
| } | | } | |
| | | | |
| //! Change this to be a child of new_parent. | | //! Change this to be a child of new_parent. | |
| void recycle_as_child_of( task& new_parent ) { | | void recycle_as_child_of( task& new_parent ) { | |
| internal::task_prefix& p = prefix(); | | internal::task_prefix& p = prefix(); | |
| __TBB_ASSERT( prefix().state==executing||prefix().state==allocated,
"execute not running, or already recycled" ); | | __TBB_ASSERT( prefix().state==executing||prefix().state==allocated,
"execute not running, or already recycled" ); | |
| __TBB_ASSERT( prefix().ref_count==0, "no child tasks allowed when r
ecycled as a child" ); | | __TBB_ASSERT( prefix().ref_count==0, "no child tasks allowed when r
ecycled as a child" ); | |
| | | | |
| skipping to change at line 599 | | skipping to change at line 662 | |
| | | | |
| //! Set reference count | | //! Set reference count | |
| void set_ref_count( int count ) { | | void set_ref_count( int count ) { | |
| #if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT | | #if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT | |
| internal_set_ref_count(count); | | internal_set_ref_count(count); | |
| #else | | #else | |
| prefix().ref_count = count; | | prefix().ref_count = count; | |
| #endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */ | | #endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */ | |
| } | | } | |
| | | | |
|
| //! Atomically increment reference count. | | //! Atomically increment reference count and returns its old value. | |
| /** Has acquire semantics */ | | /** Has acquire semantics */ | |
| void increment_ref_count() { | | void increment_ref_count() { | |
| __TBB_FetchAndIncrementWacquire( &prefix().ref_count ); | | __TBB_FetchAndIncrementWacquire( &prefix().ref_count ); | |
| } | | } | |
| | | | |
|
| //! Atomically decrement reference count. | | //! Atomically decrement reference count and returns its new value. | |
| /** Has release semantics. */ | | /** Has release semantics. */ | |
| int decrement_ref_count() { | | int decrement_ref_count() { | |
| #if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT | | #if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT | |
| return int(internal_decrement_ref_count()); | | return int(internal_decrement_ref_count()); | |
| #else | | #else | |
| return int(__TBB_FetchAndDecrementWrelease( &prefix().ref_count ))-
1; | | return int(__TBB_FetchAndDecrementWrelease( &prefix().ref_count ))-
1; | |
| #endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */ | | #endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */ | |
| } | | } | |
| | | | |
| //! Define recommended static forms via import from base class. | | //! Define recommended static forms via import from base class. | |
| | | | |
| skipping to change at line 642 | | skipping to change at line 705 | |
| /** If there are more tasks than worker threads, the tasks are spawned
in | | /** If there are more tasks than worker threads, the tasks are spawned
in | |
| order of front to back. */ | | order of front to back. */ | |
| static void spawn_root_and_wait( task_list& root_list ); | | static void spawn_root_and_wait( task_list& root_list ); | |
| | | | |
| //! Wait for reference count to become one, and set reference count to
zero. | | //! Wait for reference count to become one, and set reference count to
zero. | |
| /** Works on tasks while waiting. */ | | /** Works on tasks while waiting. */ | |
| void wait_for_all() { | | void wait_for_all() { | |
| prefix().owner->wait_for_all( *this, NULL ); | | prefix().owner->wait_for_all( *this, NULL ); | |
| } | | } | |
| | | | |
|
| #if __TBB_ARENA_PER_MASTER | | | |
| //! Enqueue task for starvation-resistant execution. | | //! Enqueue task for starvation-resistant execution. | |
|
| | | #if __TBB_TASK_PRIORITY | |
| | | /** The task will be enqueued on the normal priority level disregarding | |
| | | the | |
| | | priority of its task group. | |
| | | | |
| | | The rationale of such semantics is that priority of an enqueued tas | |
| | | k is | |
| | | statically fixed at the moment of its enqueuing, while task group p | |
| | | riority | |
| | | is dynamic. Thus automatic priority inheritance would be generally | |
| | | a subject | |
| | | to the race, which may result in unexpected behavior. | |
| | | | |
| | | Use enqueue() overload with explicit priority value and task::group | |
| | | _priority() | |
| | | method to implement such priority inheritance when it is really nec | |
| | | essary. **/ | |
| | | #endif /* __TBB_TASK_PRIORITY */ | |
| static void enqueue( task& t ) { | | static void enqueue( task& t ) { | |
| t.prefix().owner->enqueue( t, NULL ); | | t.prefix().owner->enqueue( t, NULL ); | |
| } | | } | |
| | | | |
|
| #endif /* __TBB_ARENA_PER_MASTER */ | | #if __TBB_TASK_PRIORITY | |
| | | //! Enqueue task for starvation-resistant execution on the specified pr | |
| | | iority level. | |
| | | static void enqueue( task& t, priority_t p ) { | |
| | | __TBB_ASSERT( p == priority_low || p == priority_normal || p == pri | |
| | | ority_high, "Invalid priority level value" ); | |
| | | t.prefix().owner->enqueue( t, (void*)p ); | |
| | | } | |
| | | #endif /* __TBB_TASK_PRIORITY */ | |
| | | | |
| //! The innermost task being executed or destroyed by the current threa
d at the moment. | | //! The innermost task being executed or destroyed by the current threa
d at the moment. | |
| static task& __TBB_EXPORTED_FUNC self(); | | static task& __TBB_EXPORTED_FUNC self(); | |
| | | | |
| //! task on whose behalf this task is working, or NULL if this is a roo
t. | | //! task on whose behalf this task is working, or NULL if this is a roo
t. | |
| task* parent() const {return prefix().parent;} | | task* parent() const {return prefix().parent;} | |
| | | | |
|
| | | //! sets parent task pointer to specified value | |
| | | void set_parent(task* p) { | |
| | | #if __TBB_TASK_GROUP_CONTEXT | |
| | | __TBB_ASSERT(prefix().context == p->prefix().context, "The tasks mu | |
| | | st be in the same context"); | |
| | | #endif | |
| | | prefix().parent = p; | |
| | | } | |
| | | | |
| #if __TBB_TASK_GROUP_CONTEXT | | #if __TBB_TASK_GROUP_CONTEXT | |
|
| //! Shared context that is used to communicate asynchronous state chang | | //! This method is deprecated and will be removed in the future. | |
| es | | /** Use method group() instead. **/ | |
| task_group_context* context() {return prefix().context;} | | task_group_context* context() {return prefix().context;} | |
|
| | | | |
| | | //! Pointer to the task group descriptor. | |
| | | task_group_context* group () { return prefix().context; } | |
| #endif /* __TBB_TASK_GROUP_CONTEXT */ | | #endif /* __TBB_TASK_GROUP_CONTEXT */ | |
| | | | |
| //! True if task was stolen from the task pool of another thread. | | //! True if task was stolen from the task pool of another thread. | |
| bool is_stolen_task() const { | | bool is_stolen_task() const { | |
| return (prefix().extra_state & 0x80)!=0; | | return (prefix().extra_state & 0x80)!=0; | |
| } | | } | |
| | | | |
| //---------------------------------------------------------------------
--- | | //---------------------------------------------------------------------
--- | |
| // Debugging | | // Debugging | |
| //---------------------------------------------------------------------
--- | | //---------------------------------------------------------------------
--- | |
| | | | |
| skipping to change at line 706 | | skipping to change at line 799 | |
| affinity_id affinity() const {return prefix().affinity;} | | affinity_id affinity() const {return prefix().affinity;} | |
| | | | |
| //! Invoked by scheduler to notify task that it ran on unexpected threa
d. | | //! Invoked by scheduler to notify task that it ran on unexpected threa
d. | |
| /** Invoked before method execute() runs, if task is stolen, or task ha
s | | /** Invoked before method execute() runs, if task is stolen, or task ha
s | |
| affinity but will be executed on another thread. | | affinity but will be executed on another thread. | |
| | | | |
| The default action does nothing. */ | | The default action does nothing. */ | |
| virtual void __TBB_EXPORTED_METHOD note_affinity( affinity_id id ); | | virtual void __TBB_EXPORTED_METHOD note_affinity( affinity_id id ); | |
| | | | |
| #if __TBB_TASK_GROUP_CONTEXT | | #if __TBB_TASK_GROUP_CONTEXT | |
|
| | | //! Moves this task from its current group into another one. | |
| | | /** Argument ctx specifies the new group. | |
| | | | |
| | | The primary purpose of this method is to associate unique task grou | |
| | | p context | |
| | | with a task allocated for subsequent enqueuing. In contrast to spaw | |
| | | ned tasks | |
| | | enqueued ones normally outlive the scope where they were created. T | |
| | | his makes | |
| | | traditional usage model where task group context are allocated loca | |
| | | lly on | |
| | | the stack inapplicable. Dynamic allocation of context objects is pe | |
| | | rformance | |
| | | inefficient. Method change_group() allows to make task group contex | |
| | | t object | |
| | | a member of the task class, and then associate it with its containi | |
| | | ng task | |
| | | object in the latter's constructor. **/ | |
| | | void __TBB_EXPORTED_METHOD change_group ( task_group_context& ctx ); | |
| | | | |
| //! Initiates cancellation of all tasks in this cancellation group and
its subordinate groups. | | //! Initiates cancellation of all tasks in this cancellation group and
its subordinate groups. | |
| /** \return false if cancellation has already been requested, true othe
rwise. **/ | | /** \return false if cancellation has already been requested, true othe
rwise. **/ | |
| bool cancel_group_execution () { return prefix().context->cancel_group_
execution(); } | | bool cancel_group_execution () { return prefix().context->cancel_group_
execution(); } | |
| | | | |
|
| //! Returns true if the context received cancellation request. | | //! Returns true if the context has received cancellation request. | |
| bool is_cancelled () const { return prefix().context->is_group_executio
n_cancelled(); } | | bool is_cancelled () const { return prefix().context->is_group_executio
n_cancelled(); } | |
| #endif /* __TBB_TASK_GROUP_CONTEXT */ | | #endif /* __TBB_TASK_GROUP_CONTEXT */ | |
| | | | |
|
| | | #if __TBB_TASK_PRIORITY | |
| | | //! Changes priority of the task group this task belongs to. | |
| | | void set_group_priority ( priority_t p ) { prefix().context->set_prior | |
| | | ity(p); } | |
| | | | |
| | | //! Retrieves current priority of the task group this task belongs to. | |
| | | priority_t group_priority () const { return prefix().context->priority( | |
| | | ); } | |
| | | | |
| | | #endif /* __TBB_TASK_PRIORITY */ | |
| | | | |
| private: | | private: | |
| friend class interface5::internal::task_base; | | friend class interface5::internal::task_base; | |
| friend class task_list; | | friend class task_list; | |
| friend class internal::scheduler; | | friend class internal::scheduler; | |
| friend class internal::allocate_root_proxy; | | friend class internal::allocate_root_proxy; | |
| #if __TBB_TASK_GROUP_CONTEXT | | #if __TBB_TASK_GROUP_CONTEXT | |
| friend class internal::allocate_root_with_context_proxy; | | friend class internal::allocate_root_with_context_proxy; | |
| #endif /* __TBB_TASK_GROUP_CONTEXT */ | | #endif /* __TBB_TASK_GROUP_CONTEXT */ | |
| friend class internal::allocate_continuation_proxy; | | friend class internal::allocate_continuation_proxy; | |
| friend class internal::allocate_child_proxy; | | friend class internal::allocate_child_proxy; | |
| | | | |
End of changes. 33 change blocks. |
| 47 lines changed or deleted | | 185 lines changed or added | |
|
| tbb_config.h | | tbb_config.h | |
| /* | | /* | |
|
| Copyright 2005-2010 Intel Corporation. All Rights Reserved. | | Copyright 2005-2011 Intel Corporation. All Rights Reserved. | |
| | | | |
| This file is part of Threading Building Blocks. | | This file is part of Threading Building Blocks. | |
| | | | |
| Threading Building Blocks is free software; you can redistribute it | | Threading Building Blocks is free software; you can redistribute it | |
| and/or modify it under the terms of the GNU General Public License | | and/or modify it under the terms of the GNU General Public License | |
| version 2 as published by the Free Software Foundation. | | version 2 as published by the Free Software Foundation. | |
| | | | |
| Threading Building Blocks is distributed in the hope that it will be | | Threading Building Blocks is distributed in the hope that it will be | |
| useful, but WITHOUT ANY WARRANTY; without even the implied warranty | | useful, but WITHOUT ANY WARRANTY; without even the implied warranty | |
| of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| | | | |
| skipping to change at line 34 | | skipping to change at line 34 | |
| the GNU General Public License. This exception does not however | | the GNU General Public License. This exception does not however | |
| invalidate any other reasons why the executable file might be covered b
y | | invalidate any other reasons why the executable file might be covered b
y | |
| the GNU General Public License. | | the GNU General Public License. | |
| */ | | */ | |
| | | | |
| #ifndef __TBB_tbb_config_H | | #ifndef __TBB_tbb_config_H | |
| #define __TBB_tbb_config_H | | #define __TBB_tbb_config_H | |
| | | | |
| /** This header is supposed to contain macro definitions and C style commen
ts only. | | /** This header is supposed to contain macro definitions and C style commen
ts only. | |
| The macros defined here are intended to control such aspects of TBB bui
ld as | | The macros defined here are intended to control such aspects of TBB bui
ld as | |
|
| | | - presence of compiler features | |
| - compilation modes | | - compilation modes | |
| - feature sets | | - feature sets | |
|
| - workarounds presence | | - known compiler/platform issues | |
| **/ | | **/ | |
| | | | |
|
| /** Compilation modes **/ | | #define __TBB_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC | |
| | | _PATCHLEVEL__) | |
| | | | |
| | | /** Presence of compiler features **/ | |
| | | | |
| | | #if (__TBB_GCC_VERSION >= 40400) && !defined(__INTEL_COMPILER) | |
| | | /** warning suppression pragmas available in GCC since 4.4 **/ | |
| | | #define __TBB_GCC_WARNING_SUPPRESSION_PRESENT 1 | |
| | | #endif | |
| | | | |
| | | /* TODO: The following condition should be extended when new compilers/runt | |
| | | imes | |
| | | with std::exception_ptr support appear. */ | |
| | | #define __TBB_EXCEPTION_PTR_PRESENT ((_MSC_VER >= 1600 || (__GXX_EXPERIMEN | |
| | | TAL_CXX0X__ && __GNUC__==4 && __GNUC_MINOR__>=4)) && !__INTEL_COMPILER) | |
| | | | |
| | | #if __GNUC__ || __SUNPRO_CC || __IBMCPP__ | |
| | | /* ICC defines __GNUC__ and so is covered */ | |
| | | #define __TBB_ATTRIBUTE_ALIGNED_PRESENT 1 | |
| | | #elif _MSC_VER && (_MSC_VER >= 1300 || __INTEL_COMPILER) | |
| | | #define __TBB_DECLSPEC_ALIGN_PRESENT 1 | |
| | | #endif | |
| | | | |
| | | #if (__TBB_GCC_VERSION >= 40102) && !defined(__INTEL_COMPILER) | |
| | | /** built-in atomics available in GCC since 4.1.2 **/ | |
| | | #define __TBB_GCC_BUILTIN_ATOMICS_PRESENT 1 | |
| | | #endif | |
| | | | |
| | | /** User controlled TBB features & modes **/ | |
| | | | |
| #ifndef TBB_USE_DEBUG | | #ifndef TBB_USE_DEBUG | |
| #ifdef TBB_DO_ASSERT | | #ifdef TBB_DO_ASSERT | |
| #define TBB_USE_DEBUG TBB_DO_ASSERT | | #define TBB_USE_DEBUG TBB_DO_ASSERT | |
| #else | | #else | |
|
| | | #ifdef _DEBUG | |
| | | #define TBB_USE_DEBUG _DEBUG | |
| | | #else | |
| #define TBB_USE_DEBUG 0 | | #define TBB_USE_DEBUG 0 | |
|
| | | #endif | |
| #endif /* TBB_DO_ASSERT */ | | #endif /* TBB_DO_ASSERT */ | |
|
| #else | | | |
| #define TBB_DO_ASSERT TBB_USE_DEBUG | | | |
| #endif /* TBB_USE_DEBUG */ | | #endif /* TBB_USE_DEBUG */ | |
| | | | |
| #ifndef TBB_USE_ASSERT | | #ifndef TBB_USE_ASSERT | |
| #ifdef TBB_DO_ASSERT | | #ifdef TBB_DO_ASSERT | |
| #define TBB_USE_ASSERT TBB_DO_ASSERT | | #define TBB_USE_ASSERT TBB_DO_ASSERT | |
| #else | | #else | |
| #define TBB_USE_ASSERT TBB_USE_DEBUG | | #define TBB_USE_ASSERT TBB_USE_DEBUG | |
| #endif /* TBB_DO_ASSERT */ | | #endif /* TBB_DO_ASSERT */ | |
| #endif /* TBB_USE_ASSERT */ | | #endif /* TBB_USE_ASSERT */ | |
| | | | |
| | | | |
| skipping to change at line 86 | | skipping to change at line 114 | |
| #if TBB_USE_EXCEPTIONS | | #if TBB_USE_EXCEPTIONS | |
| #error Compilation settings do not support exception handling. Plea
se do not set TBB_USE_EXCEPTIONS macro or set it to 0. | | #error Compilation settings do not support exception handling. Plea
se do not set TBB_USE_EXCEPTIONS macro or set it to 0. | |
| #elif !defined(TBB_USE_EXCEPTIONS) | | #elif !defined(TBB_USE_EXCEPTIONS) | |
| #define TBB_USE_EXCEPTIONS 0 | | #define TBB_USE_EXCEPTIONS 0 | |
| #endif | | #endif | |
| #elif !defined(TBB_USE_EXCEPTIONS) | | #elif !defined(TBB_USE_EXCEPTIONS) | |
| #define TBB_USE_EXCEPTIONS 1 | | #define TBB_USE_EXCEPTIONS 1 | |
| #endif | | #endif | |
| | | | |
| #ifndef TBB_IMPLEMENT_CPP0X | | #ifndef TBB_IMPLEMENT_CPP0X | |
|
| /** By default, use C++0x classes if available **/ | | /** By default, use C++0x classes if available **/ | |
| #if __GNUC__==4 && __GNUC_MINOR__>=4 && __GXX_EXPERIMENTAL_CXX0X__ | | #if __GNUC__==4 && __GNUC_MINOR__>=4 && __GXX_EXPERIMENTAL_CXX0X__ | |
| #define TBB_IMPLEMENT_CPP0X 0 | | #define TBB_IMPLEMENT_CPP0X 0 | |
| #else | | #else | |
| #define TBB_IMPLEMENT_CPP0X 1 | | #define TBB_IMPLEMENT_CPP0X 1 | |
| #endif | | #endif | |
| #endif /* TBB_IMPLEMENT_CPP0X */ | | #endif /* TBB_IMPLEMENT_CPP0X */ | |
| | | | |
|
| /** Feature sets **/ | | #ifndef TBB_USE_CAPTURED_EXCEPTION | |
| | | #if __TBB_EXCEPTION_PTR_PRESENT | |
| | | #define TBB_USE_CAPTURED_EXCEPTION 0 | |
| | | #else | |
| | | #define TBB_USE_CAPTURED_EXCEPTION 1 | |
| | | #endif | |
| | | #else /* defined TBB_USE_CAPTURED_EXCEPTION */ | |
| | | #if !TBB_USE_CAPTURED_EXCEPTION && !__TBB_EXCEPTION_PTR_PRESENT | |
| | | #error Current runtime does not support std::exception_ptr. Set TBB | |
| | | _USE_CAPTURED_EXCEPTION and make sure that your code is ready to catch tbb: | |
| | | :captured_exception. | |
| | | #endif | |
| | | #endif /* defined TBB_USE_CAPTURED_EXCEPTION */ | |
| | | | |
| | | /** Check whether the request to use GCC atomics can be satisfied **/ | |
| | | #if (TBB_USE_GCC_BUILTINS && !__TBB_GCC_BUILTIN_ATOMICS_PRESENT) | |
| | | #error "GCC atomic built-ins are not supported." | |
| | | #endif | |
| | | | |
| | | /** Internal TBB features & modes **/ | |
| | | | |
| | | #ifndef __TBB_DYNAMIC_LOAD_ENABLED | |
| | | #define __TBB_DYNAMIC_LOAD_ENABLED !__TBB_TASK_CPP_DIRECTLY_INCLUDED | |
| | | #elif !__TBB_DYNAMIC_LOAD_ENABLED | |
| | | #if _WIN32||_WIN64 | |
| | | #define __TBB_NO_IMPLICIT_LINKAGE 1 | |
| | | #define __TBBMALLOC_NO_IMPLICIT_LINKAGE 1 | |
| | | #else | |
| | | #define __TBB_WEAK_SYMBOLS 1 | |
| | | #endif | |
| | | #endif | |
| | | | |
| #ifndef __TBB_COUNT_TASK_NODES | | #ifndef __TBB_COUNT_TASK_NODES | |
| #define __TBB_COUNT_TASK_NODES TBB_USE_ASSERT | | #define __TBB_COUNT_TASK_NODES TBB_USE_ASSERT | |
| #endif | | #endif | |
| | | | |
| #ifndef __TBB_TASK_GROUP_CONTEXT | | #ifndef __TBB_TASK_GROUP_CONTEXT | |
|
| #define __TBB_TASK_GROUP_CONTEXT 1 | | #define __TBB_TASK_GROUP_CONTEXT 1 | |
| #endif /* __TBB_TASK_GROUP_CONTEXT */ | | #endif /* __TBB_TASK_GROUP_CONTEXT */ | |
| | | | |
|
| | | #if TBB_USE_EXCEPTIONS && !__TBB_TASK_GROUP_CONTEXT | |
| | | #error TBB_USE_EXCEPTIONS requires __TBB_TASK_GROUP_CONTEXT to be enabl | |
| | | ed | |
| | | #endif | |
| | | | |
| #ifndef __TBB_SCHEDULER_OBSERVER | | #ifndef __TBB_SCHEDULER_OBSERVER | |
|
| #define __TBB_SCHEDULER_OBSERVER 1 | | #define __TBB_SCHEDULER_OBSERVER 1 | |
| #endif /* __TBB_SCHEDULER_OBSERVER */ | | #endif /* __TBB_SCHEDULER_OBSERVER */ | |
| | | | |
|
| #ifndef __TBB_ARENA_PER_MASTER | | #ifndef __TBB_TASK_PRIORITY | |
| #define __TBB_ARENA_PER_MASTER 1 | | #define __TBB_TASK_PRIORITY __TBB_TASK_GROUP_CONTEXT | |
| #endif /* __TBB_ARENA_PER_MASTER */ | | #endif /* __TBB_TASK_PRIORITY */ | |
| | | | |
|
| /* TODO: The following condition should be extended as soon as new compiler | | #if __TBB_TASK_PRIORITY && !__TBB_TASK_GROUP_CONTEXT | |
| s/runtimes | | #error __TBB_TASK_PRIORITY requires __TBB_TASK_GROUP_CONTEXT to be enab | |
| with std::exception_ptr support appear. */ | | led | |
| #define __TBB_EXCEPTION_PTR_PRESENT (_MSC_VER >= 1600 || __GXX_EXPERIMENTA | | #endif | |
| L_CXX0X__ && (__GNUC__==4 && __GNUC_MINOR__>=4)) | | | |
| | | | |
|
| #ifndef TBB_USE_CAPTURED_EXCEPTION | | #if !defined(__TBB_SURVIVE_THREAD_SWITCH) && (_WIN32 || _WIN64 || __linux__ | |
| #if __TBB_EXCEPTION_PTR_PRESENT | | ) | |
| #define TBB_USE_CAPTURED_EXCEPTION 0 | | #define __TBB_SURVIVE_THREAD_SWITCH 1 | |
| #else | | #endif /* __TBB_SURVIVE_THREAD_SWITCH */ | |
| #define TBB_USE_CAPTURED_EXCEPTION 1 | | | |
| #endif | | | |
| #else /* defined TBB_USE_CAPTURED_EXCEPTION */ | | | |
| #if !TBB_USE_CAPTURED_EXCEPTION && !__TBB_EXCEPTION_PTR_PRESENT | | | |
| #error Current runtime does not support std::exception_ptr. Set TBB | | | |
| _USE_CAPTURED_EXCEPTION and make sure that your code is ready to catch tbb: | | | |
| :captured_exception. | | | |
| #endif | | | |
| #endif /* defined TBB_USE_CAPTURED_EXCEPTION */ | | | |
| | | | |
| #ifndef __TBB_DEFAULT_PARTITIONER | | #ifndef __TBB_DEFAULT_PARTITIONER | |
| #if TBB_DEPRECATED | | #if TBB_DEPRECATED | |
| /** Default partitioner for parallel loop templates in TBB 1.0-2.1 */ | | /** Default partitioner for parallel loop templates in TBB 1.0-2.1 */ | |
| #define __TBB_DEFAULT_PARTITIONER tbb::simple_partitioner | | #define __TBB_DEFAULT_PARTITIONER tbb::simple_partitioner | |
| #else | | #else | |
|
| /** Default partitioner for parallel loop templates in TBB 2.2 */ | | /** Default partitioner for parallel loop templates since TBB 2.2 */ | |
| #define __TBB_DEFAULT_PARTITIONER tbb::auto_partitioner | | #define __TBB_DEFAULT_PARTITIONER tbb::auto_partitioner | |
|
| #endif /* TBB_DEFAULT_PARTITIONER */ | | #endif /* TBB_DEPRECATED */ | |
| #endif /* !defined(__TBB_DEFAULT_PARTITIONER */ | | #endif /* !defined(__TBB_DEFAULT_PARTITIONER */ | |
| | | | |
|
| /** Workarounds presence **/ | | | |
| | | | |
| #if __GNUC__==4 && __GNUC_MINOR__>=4 && !defined(__INTEL_COMPILER) | | | |
| #define __TBB_GCC_WARNING_SUPPRESSION_ENABLED 1 | | | |
| #endif | | | |
| | | | |
| /** Macros of the form __TBB_XXX_BROKEN denote known issues that are caused
by | | /** Macros of the form __TBB_XXX_BROKEN denote known issues that are caused
by | |
| the bugs in compilers, standard or OS specific libraries. They should b
e | | the bugs in compilers, standard or OS specific libraries. They should b
e | |
| removed as soon as the corresponding bugs are fixed or the buggy OS/com
piler | | removed as soon as the corresponding bugs are fixed or the buggy OS/com
piler | |
| versions go out of the support list. | | versions go out of the support list. | |
| **/ | | **/ | |
| | | | |
|
| | | #if __GNUC__ && __TBB_x86_64 && __INTEL_COMPILER == 1200 | |
| | | #define __TBB_ICC_12_0_INL_ASM_FSTCW_BROKEN 1 | |
| | | #endif | |
| | | | |
| #if _MSC_VER && __INTEL_COMPILER && (__INTEL_COMPILER<1110 || __INTEL_COMPI
LER==1110 && __INTEL_COMPILER_BUILD_DATE < 20091012) | | #if _MSC_VER && __INTEL_COMPILER && (__INTEL_COMPILER<1110 || __INTEL_COMPI
LER==1110 && __INTEL_COMPILER_BUILD_DATE < 20091012) | |
| /** Necessary to avoid ICL error (or warning in non-strict mode): | | /** Necessary to avoid ICL error (or warning in non-strict mode): | |
| "exception specification for implicitly declared virtual destructor
is | | "exception specification for implicitly declared virtual destructor
is | |
| incompatible with that of overridden one". **/ | | incompatible with that of overridden one". **/ | |
| #define __TBB_DEFAULT_DTOR_THROW_SPEC_BROKEN 1 | | #define __TBB_DEFAULT_DTOR_THROW_SPEC_BROKEN 1 | |
| #endif | | #endif | |
| | | | |
| #if defined(_MSC_VER) && _MSC_VER < 1500 && !defined(__INTEL_COMPILER) | | #if defined(_MSC_VER) && _MSC_VER < 1500 && !defined(__INTEL_COMPILER) | |
| /** VS2005 and earlier do not allow declaring template class as a frien
d | | /** VS2005 and earlier do not allow declaring template class as a frien
d | |
| of classes defined in other namespaces. **/ | | of classes defined in other namespaces. **/ | |
| #define __TBB_TEMPLATE_FRIENDS_BROKEN 1 | | #define __TBB_TEMPLATE_FRIENDS_BROKEN 1 | |
| #endif | | #endif | |
| | | | |
|
| #if __GLIBC__==2 && __GLIBC_MINOR__==3 || __MINGW32__ | | #if __GLIBC__==2 && __GLIBC_MINOR__==3 || __MINGW32__ || (__APPLE__ && __IN
TEL_COMPILER==1200 && !TBB_USE_DEBUG) | |
| //! Macro controlling EH usages in TBB tests | | //! Macro controlling EH usages in TBB tests | |
| /** Some older versions of glibc crash when exception handling happens
concurrently. **/ | | /** Some older versions of glibc crash when exception handling happens
concurrently. **/ | |
| #define __TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN 1 | | #define __TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN 1 | |
| #endif | | #endif | |
| | | | |
| #if (_WIN32||_WIN64) && __INTEL_COMPILER == 1110 | | #if (_WIN32||_WIN64) && __INTEL_COMPILER == 1110 | |
| /** That's a bug in Intel compiler 11.1.044/IA-32/Windows, that leads t
o a worker thread crash on the thread's startup. **/ | | /** That's a bug in Intel compiler 11.1.044/IA-32/Windows, that leads t
o a worker thread crash on the thread's startup. **/ | |
| #define __TBB_ICL_11_1_CODE_GEN_BROKEN 1 | | #define __TBB_ICL_11_1_CODE_GEN_BROKEN 1 | |
| #endif | | #endif | |
| | | | |
| #if __GNUC__==3 && __GNUC_MINOR__==3 && !defined(__INTEL_COMPILER) | | #if __GNUC__==3 && __GNUC_MINOR__==3 && !defined(__INTEL_COMPILER) | |
| /** A bug in GCC 3.3 with access to nested classes declared in protecte
d area */ | | /** A bug in GCC 3.3 with access to nested classes declared in protecte
d area */ | |
| #define __TBB_GCC_3_3_PROTECTED_BROKEN 1 | | #define __TBB_GCC_3_3_PROTECTED_BROKEN 1 | |
| #endif | | #endif | |
| | | | |
|
| | | #if __MINGW32__ && (__GNUC__<4 || __GNUC__==4 && __GNUC_MINOR__<2) | |
| | | /** MinGW has a bug with stack alignment for routines invoked from MS R | |
| | | TLs. | |
| | | Since GCC 4.2, the bug can be worked around via a special attribute | |
| | | . **/ | |
| | | #define __TBB_SSE_STACK_ALIGNMENT_BROKEN 1 | |
| | | #endif | |
| | | | |
| | | #if __GNUC__==4 && __GNUC_MINOR__==3 && __GNUC_PATCHLEVEL__==0 | |
| | | // GCC of this version may rashly ignore control dependencies | |
| | | #define __TBB_GCC_OPTIMIZER_ORDERING_BROKEN 1 | |
| | | #endif | |
| | | | |
| #if __FreeBSD__ | | #if __FreeBSD__ | |
| /** A bug in FreeBSD 8.0 results in kernel panic when there is contenti
on | | /** A bug in FreeBSD 8.0 results in kernel panic when there is contenti
on | |
| on a mutex created with this attribute. **/ | | on a mutex created with this attribute. **/ | |
| #define __TBB_PRIO_INHERIT_BROKEN 1 | | #define __TBB_PRIO_INHERIT_BROKEN 1 | |
| | | | |
| /** A bug in FreeBSD 8.0 results in test hanging when an exception occu
rs | | /** A bug in FreeBSD 8.0 results in test hanging when an exception occu
rs | |
| during (concurrent?) object construction by means of placement new
operator. **/ | | during (concurrent?) object construction by means of placement new
operator. **/ | |
| #define __TBB_PLACEMENT_NEW_EXCEPTION_SAFETY_BROKEN 1 | | #define __TBB_PLACEMENT_NEW_EXCEPTION_SAFETY_BROKEN 1 | |
| #endif /* __FreeBSD__ */ | | #endif /* __FreeBSD__ */ | |
| | | | |
| #if (__linux__ || __APPLE__) && __i386__ && defined(__INTEL_COMPILER) | | #if (__linux__ || __APPLE__) && __i386__ && defined(__INTEL_COMPILER) | |
| /** The Intel compiler for IA-32 (Linux|Mac OS X) crashes or generates | | /** The Intel compiler for IA-32 (Linux|Mac OS X) crashes or generates | |
| incorrect code when __asm__ arguments have a cast to volatile. **/ | | incorrect code when __asm__ arguments have a cast to volatile. **/ | |
| #define __TBB_ICC_ASM_VOLATILE_BROKEN 1 | | #define __TBB_ICC_ASM_VOLATILE_BROKEN 1 | |
| #endif | | #endif | |
| | | | |
|
| | | #if !__INTEL_COMPILER && (_MSC_VER || __GNUC__==3 && __GNUC_MINOR__<=2) | |
| | | /** Bug in GCC 3.2 and MSVC compilers that sometimes return 0 for __ali | |
| | | gnof(T) | |
| | | when T has not yet been instantiated. **/ | |
| | | #define __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN 1 | |
| | | #endif | |
| | | | |
| #endif /* __TBB_tbb_config_H */ | | #endif /* __TBB_tbb_config_H */ | |
| | | | |
End of changes. 22 change blocks. |
| 44 lines changed or deleted | | 118 lines changed or added | |
|
| tbb_machine.h | | tbb_machine.h | |
| /* | | /* | |
|
| Copyright 2005-2010 Intel Corporation. All Rights Reserved. | | Copyright 2005-2011 Intel Corporation. All Rights Reserved. | |
| | | | |
| This file is part of Threading Building Blocks. | | This file is part of Threading Building Blocks. | |
| | | | |
| Threading Building Blocks is free software; you can redistribute it | | Threading Building Blocks is free software; you can redistribute it | |
| and/or modify it under the terms of the GNU General Public License | | and/or modify it under the terms of the GNU General Public License | |
| version 2 as published by the Free Software Foundation. | | version 2 as published by the Free Software Foundation. | |
| | | | |
| Threading Building Blocks is distributed in the hope that it will be | | Threading Building Blocks is distributed in the hope that it will be | |
| useful, but WITHOUT ANY WARRANTY; without even the implied warranty | | useful, but WITHOUT ANY WARRANTY; without even the implied warranty | |
| of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| | | | |
| skipping to change at line 32 | | skipping to change at line 32 | |
| this file and link it with other files to produce an executable, this | | this file and link it with other files to produce an executable, this | |
| file does not by itself cause the resulting executable to be covered by | | file does not by itself cause the resulting executable to be covered by | |
| the GNU General Public License. This exception does not however | | the GNU General Public License. This exception does not however | |
| invalidate any other reasons why the executable file might be covered b
y | | invalidate any other reasons why the executable file might be covered b
y | |
| the GNU General Public License. | | the GNU General Public License. | |
| */ | | */ | |
| | | | |
| #ifndef __TBB_machine_H | | #ifndef __TBB_machine_H | |
| #define __TBB_machine_H | | #define __TBB_machine_H | |
| | | | |
|
| | | /** This header provides basic platform abstraction layer by hooking up app | |
| | | ropriate | |
| | | architecture/OS/compiler specific headers from the /include/tbb/machine | |
| | | directory. | |
| | | If a plug-in header does not implement all the required APIs, it must s | |
| | | pecify | |
| | | the missing ones by setting one or more of the following macros: | |
| | | | |
| | | __TBB_USE_GENERIC_PART_WORD_CAS | |
| | | __TBB_USE_GENERIC_PART_WORD_FETCH_ADD | |
| | | __TBB_USE_GENERIC_PART_WORD_FETCH_STORE | |
| | | __TBB_USE_GENERIC_FETCH_ADD | |
| | | __TBB_USE_GENERIC_FETCH_STORE | |
| | | __TBB_USE_GENERIC_DWORD_FETCH_ADD | |
| | | __TBB_USE_GENERIC_DWORD_FETCH_STORE | |
| | | __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE | |
| | | __TBB_USE_GENERIC_FULL_FENCED_LOAD_STORE | |
| | | __TBB_USE_GENERIC_RELAXED_LOAD_STORE | |
| | | __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE | |
| | | | |
| | | In this case tbb_machine.h will add missing functionality based on a mi | |
| | | nimal set | |
| | | of APIs that are required to be implemented by all plug-n headers as de | |
| | | scribed | |
| | | futher. | |
| | | Note that these generic implementations may be sub-optimal for a partic | |
| | | ular | |
| | | architecture, and thus should be relied upon only after careful evaluat | |
| | | ion | |
| | | or as the last resort. | |
| | | | |
| | | Additionally __TBB_64BIT_ATOMICS can be set to 0 on a 32-bit architectu | |
| | | re to | |
| | | indicate that the port is not going to support double word atomics. It | |
| | | may also | |
| | | be set to 1 explicitly, though normally this is not necessary as tbb_ma | |
| | | chine.h | |
| | | will set it automatically. | |
| | | | |
| | | Prerequisites for each architecture port | |
| | | ---------------------------------------- | |
| | | The following functions have no generic implementation. Therefore they | |
| | | must be | |
| | | implemented in each machine architecture specific header either as a co | |
| | | nventional | |
| | | function or as a functional macro. | |
| | | | |
| | | __TBB_Yield() | |
| | | Signals OS that the current thread is willing to relinquish the rem | |
| | | ainder | |
| | | of its time quantum. | |
| | | | |
| | | __TBB_full_memory_fence() | |
| | | Must prevent all memory operations from being reordered across it ( | |
| | | both | |
| | | by hardware and compiler). All such fences must be totally ordered | |
| | | (or | |
| | | sequentially consistent). | |
| | | | |
| | | __TBB_machine_cmpswp4( volatile void *ptr, int32_t value, int32_t compa | |
| | | rand ) | |
| | | Must be provided if __TBB_USE_FENCED_ATOMICS is not set. | |
| | | | |
| | | __TBB_machine_cmpswp8( volatile void *ptr, int32_t value, int64_t compa | |
| | | rand ) | |
| | | Must be provided for 64-bit architectures if __TBB_USE_FENCED_ATOMI | |
| | | CS is not set, | |
| | | and for 32-bit architectures if __TBB_64BIT_ATOMICS is set | |
| | | | |
| | | __TBB_machine_<op><S><fence>(...), where | |
| | | <op> = {cmpswp, fetchadd, fetchstore} | |
| | | <S> = {1, 2, 4, 8} | |
| | | <fence> = {full_fence, acquire, release, relaxed} | |
| | | Must be provided if __TBB_USE_FENCED_ATOMICS is set. | |
| | | | |
| | | __TBB_control_consistency_helper() | |
| | | Bridges the memory-semantics gap between architectures providing on | |
| | | ly | |
| | | implicit C++0x "consume" semantics (like Power Architecture) and th | |
| | | ose | |
| | | also implicitly obeying control dependencies (like Itanium). | |
| | | It must be used only in conditional code where the condition is its | |
| | | elf | |
| | | data-dependent, and will then make subsequent code behave as if the | |
| | | original data dependency were acquired. | |
| | | It needs only an empty definition where implied by the architecture | |
| | | either specifically (Itanium) or because generally stronger C++0x " | |
| | | acquire" | |
| | | semantics are enforced (like x86). | |
| | | | |
| | | __TBB_acquire_consistency_helper(), __TBB_release_consistency_helper() | |
| | | Must be provided if __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE is set | |
| | | . | |
| | | Enforce acquire and release semantics in generic implementations of | |
| | | fenced | |
| | | store and load operations. Depending on the particular architecture | |
| | | /compiler | |
| | | combination they may be a hardware fence, a compiler fence, both or | |
| | | nothing. | |
| | | **/ | |
| | | | |
| #include "tbb_stddef.h" | | #include "tbb_stddef.h" | |
| | | | |
|
| | | namespace tbb { | |
| | | namespace internal { | |
| | | | |
| | | /////////////////////////////////////////////////////////////////////////// | |
| | | ///// | |
| | | // Overridable helpers declarations | |
| | | // | |
| | | // A machine/*.h file may choose to define these templates, otherwise it mu | |
| | | st | |
| | | // request default implementation by setting appropriate __TBB_USE_GENERIC_ | |
| | | XXX macro(s). | |
| | | // | |
| | | template <typename T, std::size_t S> | |
| | | struct machine_load_store; | |
| | | | |
| | | template <typename T, std::size_t S> | |
| | | struct machine_load_store_relaxed; | |
| | | | |
| | | template <typename T, std::size_t S> | |
| | | struct machine_load_store_seq_cst; | |
| | | // | |
| | | // End of overridable helpers declarations | |
| | | /////////////////////////////////////////////////////////////////////////// | |
| | | ///// | |
| | | | |
| | | template<size_t S> struct atomic_selector; | |
| | | | |
| | | template<> struct atomic_selector<1> { | |
| | | typedef int8_t word; | |
| | | inline static word fetch_store ( volatile void* location, word value ); | |
| | | }; | |
| | | | |
| | | template<> struct atomic_selector<2> { | |
| | | typedef int16_t word; | |
| | | inline static word fetch_store ( volatile void* location, word value ); | |
| | | }; | |
| | | | |
| | | template<> struct atomic_selector<4> { | |
| | | #if _MSC_VER && !_WIN64 | |
| | | // Work-around that avoids spurious /Wp64 warnings | |
| | | typedef intptr_t word; | |
| | | #else | |
| | | typedef int32_t word; | |
| | | #endif | |
| | | inline static word fetch_store ( volatile void* location, word value ); | |
| | | }; | |
| | | | |
| | | template<> struct atomic_selector<8> { | |
| | | typedef int64_t word; | |
| | | inline static word fetch_store ( volatile void* location, word value ); | |
| | | }; | |
| | | | |
| | | }} // namespaces internal, tbb | |
| | | | |
| #if _WIN32||_WIN64 | | #if _WIN32||_WIN64 | |
| | | | |
| #ifdef _MANAGED | | #ifdef _MANAGED | |
| #pragma managed(push, off) | | #pragma managed(push, off) | |
| #endif | | #endif | |
| | | | |
|
| #if __MINGW32__ | | #if __MINGW64__ || __MINGW32__ | |
| #include "machine/linux_ia32.h" | | extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void | |
| extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void ); | | ); | |
| #define __TBB_Yield() SwitchToThread() | | #define __TBB_Yield() SwitchToThread() | |
| #elif defined(_M_IX86) | | #if (TBB_USE_GCC_BUILTINS && __TBB_GCC_BUILTIN_ATOMICS_PRESENT) | |
| #include "machine/windows_ia32.h" | | #include "machine/gcc_generic.h" | |
| #elif defined(_M_AMD64) | | #elif __MINGW64__ | |
| #include "machine/windows_intel64.h" | | #include "machine/linux_intel64.h" | |
| #elif _XBOX | | #elif __MINGW32__ | |
| #include "machine/xbox360_ppc.h" | | #include "machine/linux_ia32.h" | |
| #else | | #endif | |
| #error Unsupported platform | | #elif defined(_M_IX86) | |
| #endif | | #include "machine/windows_ia32.h" | |
| | | #elif defined(_M_X64) | |
| | | #include "machine/windows_intel64.h" | |
| | | #elif _XBOX | |
| | | #include "machine/xbox360_ppc.h" | |
| | | #endif | |
| | | | |
| #ifdef _MANAGED | | #ifdef _MANAGED | |
| #pragma managed(pop) | | #pragma managed(pop) | |
| #endif | | #endif | |
| | | | |
|
| #elif __linux__ || __FreeBSD__ | | #elif __linux__ || __FreeBSD__ || __NetBSD__ | |
| | | | |
|
| #if __i386__ | | #if (TBB_USE_GCC_BUILTINS && __TBB_GCC_BUILTIN_ATOMICS_PRESENT) | |
| #include "machine/linux_ia32.h" | | #include "machine/gcc_generic.h" | |
| #elif __x86_64__ | | #elif __i386__ | |
| #include "machine/linux_intel64.h" | | #include "machine/linux_ia32.h" | |
| #elif __ia64__ | | #elif __x86_64__ | |
| #include "machine/linux_ia64.h" | | #include "machine/linux_intel64.h" | |
| #endif | | #elif __ia64__ | |
| | | #include "machine/linux_ia64.h" | |
| | | #elif __powerpc__ | |
| | | #include "machine/mac_ppc.h" | |
| | | #elif __TBB_GCC_BUILTIN_ATOMICS_PRESENT | |
| | | #include "machine/gcc_generic.h" | |
| | | #endif | |
| | | #include "machine/linux_common.h" | |
| | | | |
| #elif __APPLE__ | | #elif __APPLE__ | |
| | | | |
|
| #if __i386__ | | #if __i386__ | |
| #include "machine/linux_ia32.h" | | #include "machine/linux_ia32.h" | |
| #elif __x86_64__ | | #elif __x86_64__ | |
| #include "machine/linux_intel64.h" | | #include "machine/linux_intel64.h" | |
| #elif __POWERPC__ | | #elif __POWERPC__ | |
| #include "machine/mac_ppc.h" | | #include "machine/mac_ppc.h" | |
| #endif | | #endif | |
| | | #include "machine/macos_common.h" | |
| | | | |
| #elif _AIX | | #elif _AIX | |
| | | | |
|
| #include "machine/ibm_aix51.h" | | #include "machine/ibm_aix51.h" | |
| | | | |
| #elif __sun || __SUNPRO_CC | | #elif __sun || __SUNPRO_CC | |
| | | | |
|
| #define __asm__ asm | | #define __asm__ asm | |
| #define __volatile__ volatile | | #define __volatile__ volatile | |
| #if __i386 || __i386__ | | | |
| #include "machine/linux_ia32.h" | | | |
| #elif __x86_64__ | | | |
| #include "machine/linux_intel64.h" | | | |
| #elif __sparc | | | |
| #include "machine/sunos_sparc.h" | | | |
| #endif | | | |
| | | | |
|
| #endif | | #if __i386 || __i386__ | |
| | | #include "machine/linux_ia32.h" | |
| | | #elif __x86_64__ | |
| | | #include "machine/linux_intel64.h" | |
| | | #elif __sparc | |
| | | #include "machine/sunos_sparc.h" | |
| | | #endif | |
| | | #include <sched.h> | |
| | | | |
|
| #if !defined(__TBB_CompareAndSwap4) \ | | #define __TBB_Yield() sched_yield() | |
| || !defined(__TBB_CompareAndSwap8) \ | | | |
| || !defined(__TBB_Yield) \ | | | |
| || !defined(__TBB_release_consistency_helper) | | | |
| #error Minimal requirements for tbb_machine.h not satisfied | | | |
| #endif | | | |
| | | | |
| #ifndef __TBB_load_with_acquire | | | |
| //! Load with acquire semantics; i.e., no following memory operation ca | | | |
| n move above the load. | | | |
| template<typename T> | | | |
| inline T __TBB_load_with_acquire(const volatile T& location) { | | | |
| T temp = location; | | | |
| __TBB_release_consistency_helper(); | | | |
| return temp; | | | |
| } | | | |
| #endif | | | |
| | | | |
|
| #ifndef __TBB_store_with_release | | #endif /* OS selection */ | |
| //! Store with release semantics; i.e., no prior memory operation can m | | | |
| ove below the store. | | #ifndef __TBB_64BIT_ATOMICS | |
| template<typename T, typename V> | | #define __TBB_64BIT_ATOMICS 1 | |
| inline void __TBB_store_with_release(volatile T& location, V value) { | | | |
| __TBB_release_consistency_helper(); | | | |
| location = T(value); | | | |
| } | | | |
| #endif | | #endif | |
| | | | |
|
| | | // Special atomic functions | |
| | | #if __TBB_USE_FENCED_ATOMICS | |
| | | #define __TBB_machine_cmpswp1 __TBB_machine_cmpswp1full_fence | |
| | | #define __TBB_machine_cmpswp2 __TBB_machine_cmpswp2full_fence | |
| | | #define __TBB_machine_cmpswp4 __TBB_machine_cmpswp4full_fence | |
| | | #define __TBB_machine_cmpswp8 __TBB_machine_cmpswp8full_fence | |
| | | | |
| | | #if __TBB_WORDSIZE==8 | |
| | | #define __TBB_machine_fetchadd8 __TBB_machine_fetchadd8 | |
| | | full_fence | |
| | | #define __TBB_machine_fetchstore8 __TBB_machine_fetchstor | |
| | | e8full_fence | |
| | | #define __TBB_FetchAndAddWrelease(P,V) __TBB_machine_fetchadd8 | |
| | | release(P,V) | |
| | | #define __TBB_FetchAndIncrementWacquire(P) __TBB_machine_fetchadd8 | |
| | | acquire(P,1) | |
| | | #define __TBB_FetchAndDecrementWrelease(P) __TBB_machine_fetchadd8 | |
| | | release(P,(-1)) | |
| | | #else | |
| | | #error Define macros for 4-byte word, similarly to the above __TBB_ | |
| | | WORDSIZE==8 branch. | |
| | | #endif /* __TBB_WORDSIZE==4 */ | |
| | | #else /* !__TBB_USE_FENCED_ATOMICS */ | |
| | | #define __TBB_FetchAndAddWrelease(P,V) __TBB_FetchAndAddW(P,V) | |
| | | #define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAddW(P,1) | |
| | | #define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAddW(P,(-1)) | |
| | | #endif /* !__TBB_USE_FENCED_ATOMICS */ | |
| | | | |
| | | #if __TBB_WORDSIZE==4 | |
| | | #define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp4(P,V,C) | |
| | | #define __TBB_FetchAndAddW(P,V) __TBB_machine_fetchadd4(P,V) | |
| | | #define __TBB_FetchAndStoreW(P,V) __TBB_machine_fetchstore4(P,V) | |
| | | #elif __TBB_WORDSIZE==8 | |
| | | #if __TBB_USE_GENERIC_DWORD_LOAD_STORE || __TBB_USE_GENERIC_DWORD_FETCH | |
| | | _ADD || __TBB_USE_GENERIC_DWORD_FETCH_STORE | |
| | | #error These macros should only be used on 32-bit platforms. | |
| | | #endif | |
| | | | |
| | | #define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswp8(P,V,C) | |
| | | #define __TBB_FetchAndAddW(P,V) __TBB_machine_fetchadd8(P,V) | |
| | | #define __TBB_FetchAndStoreW(P,V) __TBB_machine_fetchstore8(P,V) | |
| | | #else /* __TBB_WORDSIZE != 8 */ | |
| | | #error Unsupported machine word size. | |
| | | #endif /* __TBB_WORDSIZE */ | |
| | | | |
| #ifndef __TBB_Pause | | #ifndef __TBB_Pause | |
| inline void __TBB_Pause(int32_t) { | | inline void __TBB_Pause(int32_t) { | |
| __TBB_Yield(); | | __TBB_Yield(); | |
| } | | } | |
| #endif | | #endif | |
| | | | |
| namespace tbb { | | namespace tbb { | |
|
| | | | |
| | | //! Sequentially consistent full memory fence. | |
| | | inline void atomic_fence () { __TBB_full_memory_fence(); } | |
| | | | |
| namespace internal { | | namespace internal { | |
| | | | |
| //! Class that implements exponential backoff. | | //! Class that implements exponential backoff. | |
| /** See implementation of spin_wait_while_eq for an example. */ | | /** See implementation of spin_wait_while_eq for an example. */ | |
|
| class atomic_backoff { | | class atomic_backoff : no_copy { | |
| //! Time delay, in units of "pause" instructions. | | //! Time delay, in units of "pause" instructions. | |
| /** Should be equal to approximately the number of "pause" instructions | | /** Should be equal to approximately the number of "pause" instructions | |
| that take the same time as an context switch. */ | | that take the same time as an context switch. */ | |
| static const int32_t LOOPS_BEFORE_YIELD = 16; | | static const int32_t LOOPS_BEFORE_YIELD = 16; | |
| int32_t count; | | int32_t count; | |
| public: | | public: | |
| atomic_backoff() : count(1) {} | | atomic_backoff() : count(1) {} | |
| | | | |
| //! Pause for a while. | | //! Pause for a while. | |
| void pause() { | | void pause() { | |
| | | | |
| skipping to change at line 206 | | skipping to change at line 367 | |
| const uint8_t bitoffset = uint8_t( 8*((uintptr_t)ptr & 0x3) ); | | const uint8_t bitoffset = uint8_t( 8*((uintptr_t)ptr & 0x3) ); | |
| #endif | | #endif | |
| const uint32_t mask = ( (1<<(S*8)) - 1 )<<bitoffset; | | const uint32_t mask = ( (1<<(S*8)) - 1 )<<bitoffset; | |
| atomic_backoff b; | | atomic_backoff b; | |
| uint32_t result; | | uint32_t result; | |
| for(;;) { | | for(;;) { | |
| result = *base; // reload the base value which might change during
the pause | | result = *base; // reload the base value which might change during
the pause | |
| uint32_t old_value = ( result & ~mask ) | ( comparand << bitoffset
); | | uint32_t old_value = ( result & ~mask ) | ( comparand << bitoffset
); | |
| uint32_t new_value = ( result & ~mask ) | ( value << bitoffset ); | | uint32_t new_value = ( result & ~mask ) | ( value << bitoffset ); | |
| // __TBB_CompareAndSwap4 presumed to have full fence. | | // __TBB_CompareAndSwap4 presumed to have full fence. | |
|
| result = __TBB_CompareAndSwap4( base, new_value, old_value ); | | // Cast shuts up /Wp64 warning | |
| | | result = (uint32_t)__TBB_machine_cmpswp4( base, new_value, old_valu | |
| | | e ); | |
| if( result==old_value // CAS succeeded | | if( result==old_value // CAS succeeded | |
| || ((result^old_value)&mask)!=0 ) // CAS failed and the bits of
interest have changed | | || ((result^old_value)&mask)!=0 ) // CAS failed and the bits of
interest have changed | |
| break; | | break; | |
| else // CAS failed but the bits of
interest left unchanged | | else // CAS failed but the bits of
interest left unchanged | |
| b.pause(); | | b.pause(); | |
| } | | } | |
| return T((result & mask) >> bitoffset); | | return T((result & mask) >> bitoffset); | |
| } | | } | |
| | | | |
| template<size_t S, typename T> | | template<size_t S, typename T> | |
|
| inline T __TBB_CompareAndSwapGeneric (volatile void *ptr, T value, T compar | | inline T __TBB_CompareAndSwapGeneric (volatile void *ptr, T value, T compar | |
| and ) { | | and ); | |
| return __TBB_CompareAndSwapW((T *)ptr,value,comparand); | | | |
| } | | | |
| | | | |
| template<> | | template<> | |
| inline uint8_t __TBB_CompareAndSwapGeneric <1,uint8_t> (volatile void *ptr,
uint8_t value, uint8_t comparand ) { | | inline uint8_t __TBB_CompareAndSwapGeneric <1,uint8_t> (volatile void *ptr,
uint8_t value, uint8_t comparand ) { | |
|
| #ifdef __TBB_CompareAndSwap1 | | #if __TBB_USE_GENERIC_PART_WORD_CAS | |
| return __TBB_CompareAndSwap1(ptr,value,comparand); | | | |
| #else | | | |
| return __TBB_MaskedCompareAndSwap<1,uint8_t>((volatile uint8_t *)ptr,va
lue,comparand); | | return __TBB_MaskedCompareAndSwap<1,uint8_t>((volatile uint8_t *)ptr,va
lue,comparand); | |
|
| | | #else | |
| | | return __TBB_machine_cmpswp1(ptr,value,comparand); | |
| #endif | | #endif | |
| } | | } | |
| | | | |
| template<> | | template<> | |
| inline uint16_t __TBB_CompareAndSwapGeneric <2,uint16_t> (volatile void *pt
r, uint16_t value, uint16_t comparand ) { | | inline uint16_t __TBB_CompareAndSwapGeneric <2,uint16_t> (volatile void *pt
r, uint16_t value, uint16_t comparand ) { | |
|
| #ifdef __TBB_CompareAndSwap2 | | #if __TBB_USE_GENERIC_PART_WORD_CAS | |
| return __TBB_CompareAndSwap2(ptr,value,comparand); | | | |
| #else | | | |
| return __TBB_MaskedCompareAndSwap<2,uint16_t>((volatile uint16_t *)ptr,
value,comparand); | | return __TBB_MaskedCompareAndSwap<2,uint16_t>((volatile uint16_t *)ptr,
value,comparand); | |
|
| | | #else | |
| | | return __TBB_machine_cmpswp2(ptr,value,comparand); | |
| #endif | | #endif | |
| } | | } | |
| | | | |
| template<> | | template<> | |
| inline uint32_t __TBB_CompareAndSwapGeneric <4,uint32_t> (volatile void *pt
r, uint32_t value, uint32_t comparand ) { | | inline uint32_t __TBB_CompareAndSwapGeneric <4,uint32_t> (volatile void *pt
r, uint32_t value, uint32_t comparand ) { | |
|
| return __TBB_CompareAndSwap4(ptr,value,comparand); | | // Cast shuts up /Wp64 warning | |
| | | return (uint32_t)__TBB_machine_cmpswp4(ptr,value,comparand); | |
| } | | } | |
| | | | |
|
| | | #if __TBB_64BIT_ATOMICS | |
| template<> | | template<> | |
| inline uint64_t __TBB_CompareAndSwapGeneric <8,uint64_t> (volatile void *pt
r, uint64_t value, uint64_t comparand ) { | | inline uint64_t __TBB_CompareAndSwapGeneric <8,uint64_t> (volatile void *pt
r, uint64_t value, uint64_t comparand ) { | |
|
| return __TBB_CompareAndSwap8(ptr,value,comparand); | | return __TBB_machine_cmpswp8(ptr,value,comparand); | |
| } | | } | |
|
| | | #endif | |
| | | | |
| template<size_t S, typename T> | | template<size_t S, typename T> | |
| inline T __TBB_FetchAndAddGeneric (volatile void *ptr, T addend) { | | inline T __TBB_FetchAndAddGeneric (volatile void *ptr, T addend) { | |
| atomic_backoff b; | | atomic_backoff b; | |
| T result; | | T result; | |
| for(;;) { | | for(;;) { | |
| result = *reinterpret_cast<volatile T *>(ptr); | | result = *reinterpret_cast<volatile T *>(ptr); | |
| // __TBB_CompareAndSwapGeneric presumed to have full fence. | | // __TBB_CompareAndSwapGeneric presumed to have full fence. | |
| if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, result+addend, result )
==result ) | | if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, result+addend, result )
==result ) | |
| break; | | break; | |
| | | | |
| skipping to change at line 277 | | skipping to change at line 440 | |
| for(;;) { | | for(;;) { | |
| result = *reinterpret_cast<volatile T *>(ptr); | | result = *reinterpret_cast<volatile T *>(ptr); | |
| // __TBB_CompareAndSwapGeneric presumed to have full fence. | | // __TBB_CompareAndSwapGeneric presumed to have full fence. | |
| if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, value, result )==result
) | | if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, value, result )==result
) | |
| break; | | break; | |
| b.pause(); | | b.pause(); | |
| } | | } | |
| return result; | | return result; | |
| } | | } | |
| | | | |
|
| | | #if __TBB_USE_GENERIC_PART_WORD_CAS | |
| | | #define __TBB_machine_cmpswp1 tbb::internal::__TBB_CompareAndSwapGeneric<1, | |
| | | uint8_t> | |
| | | #define __TBB_machine_cmpswp2 tbb::internal::__TBB_CompareAndSwapGeneric<2, | |
| | | uint16_t> | |
| | | #endif | |
| | | | |
| | | #if __TBB_USE_GENERIC_FETCH_ADD || __TBB_USE_GENERIC_PART_WORD_FETCH_ADD | |
| | | #define __TBB_machine_fetchadd1 tbb::internal::__TBB_FetchAndAddGeneric<1,u | |
| | | int8_t> | |
| | | #define __TBB_machine_fetchadd2 tbb::internal::__TBB_FetchAndAddGeneric<2,u | |
| | | int16_t> | |
| | | #endif | |
| | | | |
| | | #if __TBB_USE_GENERIC_FETCH_ADD | |
| | | #define __TBB_machine_fetchadd4 tbb::internal::__TBB_FetchAndAddGeneric<4,u | |
| | | int32_t> | |
| | | #endif | |
| | | | |
| | | #if __TBB_USE_GENERIC_FETCH_ADD || __TBB_USE_GENERIC_DWORD_FETCH_ADD | |
| | | #define __TBB_machine_fetchadd8 tbb::internal::__TBB_FetchAndAddGeneric<8,u | |
| | | int64_t> | |
| | | #endif | |
| | | | |
| | | #if __TBB_USE_GENERIC_FETCH_STORE || __TBB_USE_GENERIC_PART_WORD_FETCH_STOR | |
| | | E | |
| | | #define __TBB_machine_fetchstore1 tbb::internal::__TBB_FetchAndStoreGeneric | |
| | | <1,uint8_t> | |
| | | #define __TBB_machine_fetchstore2 tbb::internal::__TBB_FetchAndStoreGeneric | |
| | | <2,uint16_t> | |
| | | #endif | |
| | | | |
| | | #if __TBB_USE_GENERIC_FETCH_STORE | |
| | | #define __TBB_machine_fetchstore4 tbb::internal::__TBB_FetchAndStoreGeneric | |
| | | <4,uint32_t> | |
| | | #endif | |
| | | | |
| | | #if __TBB_USE_GENERIC_FETCH_STORE || __TBB_USE_GENERIC_DWORD_FETCH_STORE | |
| | | #define __TBB_machine_fetchstore8 tbb::internal::__TBB_FetchAndStoreGeneric | |
| | | <8,uint64_t> | |
| | | #endif | |
| | | | |
| | | #if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE | |
| | | #define __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(S) | |
| | | \ | |
| | | atomic_selector<S>::word atomic_selector<S>::fetch_store ( volatile voi | |
| | | d* location, word value ) { \ | |
| | | return __TBB_machine_fetchstore##S( location, value ); | |
| | | \ | |
| | | } | |
| | | | |
| | | __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(1) | |
| | | __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(2) | |
| | | __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(4) | |
| | | __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(8) | |
| | | | |
| | | #undef __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE | |
| | | #endif /* __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */ | |
| | | | |
| | | #if __TBB_USE_GENERIC_DWORD_LOAD_STORE | |
| | | inline void __TBB_machine_store8 (volatile void *ptr, int64_t value) { | |
| | | for(;;) { | |
| | | int64_t result = *(int64_t *)ptr; | |
| | | if( __TBB_machine_cmpswp8(ptr,value,result)==result ) break; | |
| | | } | |
| | | } | |
| | | | |
| | | inline int64_t __TBB_machine_load8 (const volatile void *ptr) { | |
| | | // Comparand and new value may be anything, they only must be equal, an | |
| | | d | |
| | | // the value should have a low probability to be actually found in 'loc | |
| | | ation'. | |
| | | const int64_t anyvalue = 2305843009213693951; | |
| | | return __TBB_machine_cmpswp8(const_cast<volatile void *>(ptr),anyvalue, | |
| | | anyvalue); | |
| | | } | |
| | | #endif /* __TBB_USE_GENERIC_DWORD_LOAD_STORE */ | |
| | | | |
| | | #if __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE | |
| | | /** Fenced operations use volatile qualifier to prevent compiler from optim | |
| | | izing | |
| | | them out, and on on architectures with weak memory ordering to induce c | |
| | | ompiler | |
| | | to generate code with appropriate acquire/release semantics. | |
| | | On architectures like IA32, Intel64 (and likely and Sparc TSO) volatile | |
| | | has | |
| | | no effect on code gen, and consistency helpers serve as a compiler fenc | |
| | | e (the | |
| | | latter being true for IA64/gcc as well to fix a bug in some gcc version | |
| | | s). **/ | |
| | | template <typename T, size_t S> | |
| | | struct machine_load_store { | |
| | | static T load_with_acquire ( const volatile T& location ) { | |
| | | T to_return = location; | |
| | | __TBB_acquire_consistency_helper(); | |
| | | return to_return; | |
| | | } | |
| | | static void store_with_release ( volatile T &location, T value ) { | |
| | | __TBB_release_consistency_helper(); | |
| | | location = value; | |
| | | } | |
| | | }; | |
| | | | |
| | | #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS | |
| | | template <typename T> | |
| | | struct machine_load_store<T,8> { | |
| | | static T load_with_acquire ( const volatile T& location ) { | |
| | | return (T)__TBB_machine_load8( (const volatile void*)&location ); | |
| | | } | |
| | | static void store_with_release ( volatile T& location, T value ) { | |
| | | __TBB_machine_store8( (volatile void*)&location, (int64_t)value ); | |
| | | } | |
| | | }; | |
| | | #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */ | |
| | | #endif /* __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE */ | |
| | | | |
| | | template <typename T, size_t S> | |
| | | struct machine_load_store_seq_cst { | |
| | | static T load ( const volatile T& location ) { | |
| | | __TBB_full_memory_fence(); | |
| | | return machine_load_store<T,S>::load_with_acquire( location ); | |
| | | } | |
| | | #if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE | |
| | | static void store ( volatile T &location, T value ) { | |
| | | atomic_selector<S>::fetch_store( (volatile void*)&location, (typena | |
| | | me atomic_selector<S>::word)value ); | |
| | | } | |
| | | #else /* !__TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */ | |
| | | static void store ( volatile T &location, T value ) { | |
| | | machine_load_store<T,S>::store_with_release( location, value ); | |
| | | __TBB_full_memory_fence(); | |
| | | } | |
| | | #endif /* !__TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */ | |
| | | }; | |
| | | | |
| | | #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS | |
| | | /** The implementation does not use functions __TBB_machine_load8/store8 as | |
| | | they | |
| | | are not required to be sequentially consistent. **/ | |
| | | template <typename T> | |
| | | struct machine_load_store_seq_cst<T,8> { | |
| | | static T load ( const volatile T& location ) { | |
| | | // Comparand and new value may be anything, they only must be equal | |
| | | , and | |
| | | // the value should have a low probability to be actually found in | |
| | | 'location'. | |
| | | const int64_t anyvalue = 2305843009213693951ll; | |
| | | return __TBB_machine_cmpswp8( (volatile void*)const_cast<volatile T | |
| | | *>(&location), anyvalue, anyvalue ); | |
| | | } | |
| | | static void store ( volatile T &location, T value ) { | |
| | | int64_t result = (volatile int64_t&)location; | |
| | | while ( __TBB_machine_cmpswp8((volatile void*)&location, (int64_t)v | |
| | | alue, result) != result ) | |
| | | result = (volatile int64_t&)location; | |
| | | } | |
| | | }; | |
| | | #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */ | |
| | | | |
| | | #if __TBB_USE_GENERIC_RELAXED_LOAD_STORE | |
| | | // Relaxed operations add volatile qualifier to prevent compiler from optim | |
| | | izing them out. | |
| | | /** Volatile should not incur any additional cost on IA32, Intel64, and Spa | |
| | | rc TSO | |
| | | architectures. However on architectures with weak memory ordering compi | |
| | | ler may | |
| | | generate code with acquire/release semantics for operations on volatile | |
| | | data. **/ | |
| | | template <typename T, size_t S> | |
| | | struct machine_load_store_relaxed { | |
| | | static inline T load ( const volatile T& location ) { | |
| | | return location; | |
| | | } | |
| | | static inline void store ( volatile T& location, T value ) { | |
| | | location = value; | |
| | | } | |
| | | }; | |
| | | | |
| | | #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS | |
| | | template <typename T> | |
| | | struct machine_load_store_relaxed<T,8> { | |
| | | static inline T load ( const volatile T& location ) { | |
| | | return (T)__TBB_machine_load8( (const volatile void*)&location ); | |
| | | } | |
| | | static inline void store ( volatile T& location, T value ) { | |
| | | __TBB_machine_store8( (volatile void*)&location, (int64_t)value ); | |
| | | } | |
| | | }; | |
| | | #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */ | |
| | | #endif /* __TBB_USE_GENERIC_RELAXED_LOAD_STORE */ | |
| | | | |
| | | template<typename T> | |
| | | inline T __TBB_load_with_acquire(const volatile T &location) { | |
| | | return machine_load_store<T,sizeof(T)>::load_with_acquire( location ); | |
| | | } | |
| | | template<typename T, typename V> | |
| | | inline void __TBB_store_with_release(volatile T& location, V value) { | |
| | | machine_load_store<T,sizeof(T)>::store_with_release( location, T(value) | |
| | | ); | |
| | | } | |
| | | //! Overload that exists solely to avoid /Wp64 warnings. | |
| | | inline void __TBB_store_with_release(volatile size_t& location, size_t valu | |
| | | e) { | |
| | | machine_load_store<size_t,sizeof(size_t)>::store_with_release( location | |
| | | , value ); | |
| | | } | |
| | | | |
| | | template<typename T> | |
| | | inline T __TBB_load_full_fence(const volatile T &location) { | |
| | | return machine_load_store_seq_cst<T,sizeof(T)>::load( location ); | |
| | | } | |
| | | template<typename T, typename V> | |
| | | inline void __TBB_store_full_fence(volatile T& location, V value) { | |
| | | machine_load_store_seq_cst<T,sizeof(T)>::store( location, T(value) ); | |
| | | } | |
| | | //! Overload that exists solely to avoid /Wp64 warnings. | |
| | | inline void __TBB_store_full_fence(volatile size_t& location, size_t value) | |
| | | { | |
| | | machine_load_store_seq_cst<size_t,sizeof(size_t)>::store( location, val | |
| | | ue ); | |
| | | } | |
| | | | |
| | | template<typename T> | |
| | | inline T __TBB_load_relaxed (const volatile T& location) { | |
| | | return machine_load_store_relaxed<T,sizeof(T)>::load( const_cast<T&>(lo | |
| | | cation) ); | |
| | | } | |
| | | template<typename T, typename V> | |
| | | inline void __TBB_store_relaxed ( volatile T& location, V value ) { | |
| | | machine_load_store_relaxed<T,sizeof(T)>::store( const_cast<T&>(location | |
| | | ), T(value) ); | |
| | | } | |
| | | //! Overload that exists solely to avoid /Wp64 warnings. | |
| | | inline void __TBB_store_relaxed ( volatile size_t& location, size_t value ) | |
| | | { | |
| | | machine_load_store_relaxed<size_t,sizeof(size_t)>::store( const_cast<si | |
| | | ze_t&>(location), value ); | |
| | | } | |
| | | | |
| // Macro __TBB_TypeWithAlignmentAtLeastAsStrict(T) should be a type with al
ignment at least as | | // Macro __TBB_TypeWithAlignmentAtLeastAsStrict(T) should be a type with al
ignment at least as | |
|
| // strict as type T. Type type should have a trivial default constructor a
nd destructor, so that | | // strict as type T. The type should have a trivial default constructor an
d destructor, so that | |
| // arrays of that type can be declared without initializers. | | // arrays of that type can be declared without initializers. | |
| // It is correct (but perhaps a waste of space) if __TBB_TypeWithAlignmentA
tLeastAsStrict(T) expands | | // It is correct (but perhaps a waste of space) if __TBB_TypeWithAlignmentA
tLeastAsStrict(T) expands | |
| // to a type bigger than T. | | // to a type bigger than T. | |
| // The default definition here works on machines where integers are natural
ly aligned and the | | // The default definition here works on machines where integers are natural
ly aligned and the | |
|
| // strictest alignment is 16. | | // strictest alignment is 64. | |
| #ifndef __TBB_TypeWithAlignmentAtLeastAsStrict | | #ifndef __TBB_TypeWithAlignmentAtLeastAsStrict | |
| | | | |
|
| #if __GNUC__ || __SUNPRO_CC | | #if __TBB_ATTRIBUTE_ALIGNED_PRESENT | |
| struct __TBB_machine_type_with_strictest_alignment { | | | |
| int member[4]; | | #define __TBB_DefineTypeWithAlignment(PowerOf2) \ | |
| } __attribute__((aligned(16))); | | struct __TBB_machine_type_with_alignment_##PowerOf2 { \ | |
| #elif _MSC_VER | | uint32_t member[PowerOf2/sizeof(uint32_t)]; \ | |
| __declspec(align(16)) struct __TBB_machine_type_with_strictest_alignment { | | } __attribute__((aligned(PowerOf2))); | |
| int member[4]; | | #define __TBB_alignof(T) __alignof__(T) | |
| | | | |
| | | #elif __TBB_DECLSPEC_ALIGN_PRESENT | |
| | | | |
| | | #define __TBB_DefineTypeWithAlignment(PowerOf2) \ | |
| | | __declspec(align(PowerOf2)) \ | |
| | | struct __TBB_machine_type_with_alignment_##PowerOf2 { \ | |
| | | uint32_t member[PowerOf2/sizeof(uint32_t)]; \ | |
| }; | | }; | |
|
| #else | | #define __TBB_alignof(T) __alignof(T) | |
| #error Must define __TBB_TypeWithAlignmentAtLeastAsStrict(T) or __TBB_machi | | | |
| ne_type_with_strictest_alignment | | #else /* A compiler with unknown syntax for data alignment */ | |
| | | #error Must define __TBB_TypeWithAlignmentAtLeastAsStrict(T) | |
| #endif | | #endif | |
| | | | |
|
| template<size_t N> struct type_with_alignment {__TBB_machine_type_with_stri | | /* Now declare types aligned to useful powers of two */ | |
| ctest_alignment member;}; | | // TODO: Is __TBB_DefineTypeWithAlignment(8) needed on 32 bit platforms? | |
| | | __TBB_DefineTypeWithAlignment(16) | |
| | | __TBB_DefineTypeWithAlignment(32) | |
| | | __TBB_DefineTypeWithAlignment(64) | |
| | | | |
| | | typedef __TBB_machine_type_with_alignment_64 __TBB_machine_type_with_strict | |
| | | est_alignment; | |
| | | | |
| | | // Primary template is a declaration of incomplete type so that it fails wi | |
| | | th unknown alignments | |
| | | template<size_t N> struct type_with_alignment; | |
| | | | |
| | | // Specializations for allowed alignments | |
| template<> struct type_with_alignment<1> { char member; }; | | template<> struct type_with_alignment<1> { char member; }; | |
| template<> struct type_with_alignment<2> { uint16_t member; }; | | template<> struct type_with_alignment<2> { uint16_t member; }; | |
| template<> struct type_with_alignment<4> { uint32_t member; }; | | template<> struct type_with_alignment<4> { uint32_t member; }; | |
| template<> struct type_with_alignment<8> { uint64_t member; }; | | template<> struct type_with_alignment<8> { uint64_t member; }; | |
|
| | | template<> struct type_with_alignment<16> {__TBB_machine_type_with_alignmen | |
| | | t_16 member; }; | |
| | | template<> struct type_with_alignment<32> {__TBB_machine_type_with_alignmen | |
| | | t_32 member; }; | |
| | | template<> struct type_with_alignment<64> {__TBB_machine_type_with_alignmen | |
| | | t_64 member; }; | |
| | | | |
|
| #if _MSC_VER||defined(__GNUC__)&&__GNUC__==3 && __GNUC_MINOR__<=2 | | #if __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN | |
| //! Work around for bug in GNU 3.2 and MSVC compilers. | | //! Work around for bug in GNU 3.2 and MSVC compilers. | |
| /** Bug is that compiler sometimes returns 0 for __alignof(T) when T has no
t yet been instantiated. | | /** Bug is that compiler sometimes returns 0 for __alignof(T) when T has no
t yet been instantiated. | |
| The work-around forces instantiation by forcing computation of sizeof(T
) before __alignof(T). */ | | The work-around forces instantiation by forcing computation of sizeof(T
) before __alignof(T). */ | |
| template<size_t Size, typename T> | | template<size_t Size, typename T> | |
| struct work_around_alignment_bug { | | struct work_around_alignment_bug { | |
|
| #if _MSC_VER | | static const size_t alignment = __TBB_alignof(T); | |
| static const size_t alignment = __alignof(T); | | | |
| #else | | | |
| static const size_t alignment = __alignof__(T); | | | |
| #endif | | | |
| }; | | }; | |
| #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_
alignment<tbb::internal::work_around_alignment_bug<sizeof(T),T>::alignment> | | #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_
alignment<tbb::internal::work_around_alignment_bug<sizeof(T),T>::alignment> | |
|
| #elif __GNUC__ || __SUNPRO_CC | | | |
| #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_ | | | |
| alignment<__alignof__(T)> | | | |
| #else | | #else | |
|
| #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) __TBB_machine_type_with_s | | #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_ | |
| trictest_alignment | | alignment<__TBB_alignof(T)> | |
| #endif | | #endif /* __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN */ | |
| #endif /* ____TBB_TypeWithAlignmentAtLeastAsStrict */ | | | |
| | | #endif /* __TBB_TypeWithAlignmentAtLeastAsStrict */ | |
| | | | |
| // Template class here is to avoid instantiation of the static data for mod
ules that don't use it | | // Template class here is to avoid instantiation of the static data for mod
ules that don't use it | |
| template<typename T> | | template<typename T> | |
| struct reverse { | | struct reverse { | |
| static const T byte_table[256]; | | static const T byte_table[256]; | |
| }; | | }; | |
| // An efficient implementation of the reverse function utilizes a 2^8 looku
p table holding the bit-reversed | | // An efficient implementation of the reverse function utilizes a 2^8 looku
p table holding the bit-reversed | |
| // values of [0..2^8 - 1]. Those values can also be computed on the fly at
a slightly higher cost. | | // values of [0..2^8 - 1]. Those values can also be computed on the fly at
a slightly higher cost. | |
| template<typename T> | | template<typename T> | |
| const T reverse<T>::byte_table[256] = { | | const T reverse<T>::byte_table[256] = { | |
| | | | |
| skipping to change at line 354 | | skipping to change at line 733 | |
| 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD,
0x3D, 0xBD, 0x7D, 0xFD, | | 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD,
0x3D, 0xBD, 0x7D, 0xFD, | |
| 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3,
0x33, 0xB3, 0x73, 0xF3, | | 0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3,
0x33, 0xB3, 0x73, 0xF3, | |
| 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB,
0x3B, 0xBB, 0x7B, 0xFB, | | 0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB,
0x3B, 0xBB, 0x7B, 0xFB, | |
| 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7,
0x37, 0xB7, 0x77, 0xF7, | | 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7,
0x37, 0xB7, 0x77, 0xF7, | |
| 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF,
0x3F, 0xBF, 0x7F, 0xFF | | 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF,
0x3F, 0xBF, 0x7F, 0xFF | |
| }; | | }; | |
| | | | |
| } // namespace internal | | } // namespace internal | |
| } // namespace tbb | | } // namespace tbb | |
| | | | |
|
| #ifndef __TBB_CompareAndSwap1 | | // Preserving access to legacy APIs | |
| #define __TBB_CompareAndSwap1 tbb::internal::__TBB_CompareAndSwapGeneric<1, | | using tbb::internal::__TBB_load_with_acquire; | |
| uint8_t> | | using tbb::internal::__TBB_store_with_release; | |
| #endif | | | |
| | | // Mapping historically used names to the ones expected by atomic_load_stor | |
| #ifndef __TBB_CompareAndSwap2 | | e_traits | |
| #define __TBB_CompareAndSwap2 tbb::internal::__TBB_CompareAndSwapGeneric<2, | | #define __TBB_load_acquire __TBB_load_with_acquire | |
| uint16_t> | | #define __TBB_store_release __TBB_store_with_release | |
| #endif | | | |
| | | | |
| #ifndef __TBB_CompareAndSwapW | | | |
| #define __TBB_CompareAndSwapW tbb::internal::__TBB_CompareAndSwapGeneric<si | | | |
| zeof(ptrdiff_t),ptrdiff_t> | | | |
| #endif | | | |
| | | | |
| #ifndef __TBB_FetchAndAdd1 | | | |
| #define __TBB_FetchAndAdd1 tbb::internal::__TBB_FetchAndAddGeneric<1,uint8_ | | | |
| t> | | | |
| #endif | | | |
| | | | |
| #ifndef __TBB_FetchAndAdd2 | | | |
| #define __TBB_FetchAndAdd2 tbb::internal::__TBB_FetchAndAddGeneric<2,uint16 | | | |
| _t> | | | |
| #endif | | | |
| | | | |
| #ifndef __TBB_FetchAndAdd4 | | | |
| #define __TBB_FetchAndAdd4 tbb::internal::__TBB_FetchAndAddGeneric<4,uint32 | | | |
| _t> | | | |
| #endif | | | |
| | | | |
| #ifndef __TBB_FetchAndAdd8 | | | |
| #define __TBB_FetchAndAdd8 tbb::internal::__TBB_FetchAndAddGeneric<8,uint64 | | | |
| _t> | | | |
| #endif | | | |
| | | | |
| #ifndef __TBB_FetchAndAddW | | | |
| #define __TBB_FetchAndAddW tbb::internal::__TBB_FetchAndAddGeneric<sizeof(p | | | |
| trdiff_t),ptrdiff_t> | | | |
| #endif | | | |
| | | | |
| #ifndef __TBB_FetchAndStore1 | | | |
| #define __TBB_FetchAndStore1 tbb::internal::__TBB_FetchAndStoreGeneric<1,ui | | | |
| nt8_t> | | | |
| #endif | | | |
| | | | |
| #ifndef __TBB_FetchAndStore2 | | | |
| #define __TBB_FetchAndStore2 tbb::internal::__TBB_FetchAndStoreGeneric<2,ui | | | |
| nt16_t> | | | |
| #endif | | | |
| | | | |
| #ifndef __TBB_FetchAndStore4 | | | |
| #define __TBB_FetchAndStore4 tbb::internal::__TBB_FetchAndStoreGeneric<4,ui | | | |
| nt32_t> | | | |
| #endif | | | |
| | | | |
| #ifndef __TBB_FetchAndStore8 | | | |
| #define __TBB_FetchAndStore8 tbb::internal::__TBB_FetchAndStoreGeneric<8,ui | | | |
| nt64_t> | | | |
| #endif | | | |
| | | | |
| #ifndef __TBB_FetchAndStoreW | | | |
| #define __TBB_FetchAndStoreW tbb::internal::__TBB_FetchAndStoreGeneric<size | | | |
| of(ptrdiff_t),ptrdiff_t> | | | |
| #endif | | | |
| | | | |
| #if __TBB_DECL_FENCED_ATOMICS | | | |
| | | | |
| #ifndef __TBB_CompareAndSwap1__TBB_full_fence | | | |
| #define __TBB_CompareAndSwap1__TBB_full_fence __TBB_CompareAndSwap1 | | | |
| #endif | | | |
| #ifndef __TBB_CompareAndSwap1acquire | | | |
| #define __TBB_CompareAndSwap1acquire __TBB_CompareAndSwap1__TBB_full_fence | | | |
| #endif | | | |
| #ifndef __TBB_CompareAndSwap1release | | | |
| #define __TBB_CompareAndSwap1release __TBB_CompareAndSwap1__TBB_full_fence | | | |
| #endif | | | |
| | | | |
| #ifndef __TBB_CompareAndSwap2__TBB_full_fence | | | |
| #define __TBB_CompareAndSwap2__TBB_full_fence __TBB_CompareAndSwap2 | | | |
| #endif | | | |
| #ifndef __TBB_CompareAndSwap2acquire | | | |
| #define __TBB_CompareAndSwap2acquire __TBB_CompareAndSwap2__TBB_full_fence | | | |
| #endif | | | |
| #ifndef __TBB_CompareAndSwap2release | | | |
| #define __TBB_CompareAndSwap2release __TBB_CompareAndSwap2__TBB_full_fence | | | |
| #endif | | | |
| | | | |
| #ifndef __TBB_CompareAndSwap4__TBB_full_fence | | | |
| #define __TBB_CompareAndSwap4__TBB_full_fence __TBB_CompareAndSwap4 | | | |
| #endif | | | |
| #ifndef __TBB_CompareAndSwap4acquire | | | |
| #define __TBB_CompareAndSwap4acquire __TBB_CompareAndSwap4__TBB_full_fence | | | |
| #endif | | | |
| #ifndef __TBB_CompareAndSwap4release | | | |
| #define __TBB_CompareAndSwap4release __TBB_CompareAndSwap4__TBB_full_fence | | | |
| #endif | | | |
| | | | |
| #ifndef __TBB_CompareAndSwap8__TBB_full_fence | | | |
| #define __TBB_CompareAndSwap8__TBB_full_fence __TBB_CompareAndSwap8 | | | |
| #endif | | | |
| #ifndef __TBB_CompareAndSwap8acquire | | | |
| #define __TBB_CompareAndSwap8acquire __TBB_CompareAndSwap8__TBB_full_fence | | | |
| #endif | | | |
| #ifndef __TBB_CompareAndSwap8release | | | |
| #define __TBB_CompareAndSwap8release __TBB_CompareAndSwap8__TBB_full_fence | | | |
| #endif | | | |
| | | | |
| #ifndef __TBB_FetchAndAdd1__TBB_full_fence | | | |
| #define __TBB_FetchAndAdd1__TBB_full_fence __TBB_FetchAndAdd1 | | | |
| #endif | | | |
| #ifndef __TBB_FetchAndAdd1acquire | | | |
| #define __TBB_FetchAndAdd1acquire __TBB_FetchAndAdd1__TBB_full_fence | | | |
| #endif | | | |
| #ifndef __TBB_FetchAndAdd1release | | | |
| #define __TBB_FetchAndAdd1release __TBB_FetchAndAdd1__TBB_full_fence | | | |
| #endif | | | |
| | | | |
| #ifndef __TBB_FetchAndAdd2__TBB_full_fence | | | |
| #define __TBB_FetchAndAdd2__TBB_full_fence __TBB_FetchAndAdd2 | | | |
| #endif | | | |
| #ifndef __TBB_FetchAndAdd2acquire | | | |
| #define __TBB_FetchAndAdd2acquire __TBB_FetchAndAdd2__TBB_full_fence | | | |
| #endif | | | |
| #ifndef __TBB_FetchAndAdd2release | | | |
| #define __TBB_FetchAndAdd2release __TBB_FetchAndAdd2__TBB_full_fence | | | |
| #endif | | | |
| | | | |
| #ifndef __TBB_FetchAndAdd4__TBB_full_fence | | | |
| #define __TBB_FetchAndAdd4__TBB_full_fence __TBB_FetchAndAdd4 | | | |
| #endif | | | |
| #ifndef __TBB_FetchAndAdd4acquire | | | |
| #define __TBB_FetchAndAdd4acquire __TBB_FetchAndAdd4__TBB_full_fence | | | |
| #endif | | | |
| #ifndef __TBB_FetchAndAdd4release | | | |
| #define __TBB_FetchAndAdd4release __TBB_FetchAndAdd4__TBB_full_fence | | | |
| #endif | | | |
| | | | |
| #ifndef __TBB_FetchAndAdd8__TBB_full_fence | | | |
| #define __TBB_FetchAndAdd8__TBB_full_fence __TBB_FetchAndAdd8 | | | |
| #endif | | | |
| #ifndef __TBB_FetchAndAdd8acquire | | | |
| #define __TBB_FetchAndAdd8acquire __TBB_FetchAndAdd8__TBB_full_fence | | | |
| #endif | | | |
| #ifndef __TBB_FetchAndAdd8release | | | |
| #define __TBB_FetchAndAdd8release __TBB_FetchAndAdd8__TBB_full_fence | | | |
| #endif | | | |
| | | | |
| #ifndef __TBB_FetchAndStore1__TBB_full_fence | | | |
| #define __TBB_FetchAndStore1__TBB_full_fence __TBB_FetchAndStore1 | | | |
| #endif | | | |
| #ifndef __TBB_FetchAndStore1acquire | | | |
| #define __TBB_FetchAndStore1acquire __TBB_FetchAndStore1__TBB_full_fence | | | |
| #endif | | | |
| #ifndef __TBB_FetchAndStore1release | | | |
| #define __TBB_FetchAndStore1release __TBB_FetchAndStore1__TBB_full_fence | | | |
| #endif | | | |
| | | | |
| #ifndef __TBB_FetchAndStore2__TBB_full_fence | | | |
| #define __TBB_FetchAndStore2__TBB_full_fence __TBB_FetchAndStore2 | | | |
| #endif | | | |
| #ifndef __TBB_FetchAndStore2acquire | | | |
| #define __TBB_FetchAndStore2acquire __TBB_FetchAndStore2__TBB_full_fence | | | |
| #endif | | | |
| #ifndef __TBB_FetchAndStore2release | | | |
| #define __TBB_FetchAndStore2release __TBB_FetchAndStore2__TBB_full_fence | | | |
| #endif | | | |
| | | | |
| #ifndef __TBB_FetchAndStore4__TBB_full_fence | | | |
| #define __TBB_FetchAndStore4__TBB_full_fence __TBB_FetchAndStore4 | | | |
| #endif | | | |
| #ifndef __TBB_FetchAndStore4acquire | | | |
| #define __TBB_FetchAndStore4acquire __TBB_FetchAndStore4__TBB_full_fence | | | |
| #endif | | | |
| #ifndef __TBB_FetchAndStore4release | | | |
| #define __TBB_FetchAndStore4release __TBB_FetchAndStore4__TBB_full_fence | | | |
| #endif | | | |
| | | | |
| #ifndef __TBB_FetchAndStore8__TBB_full_fence | | | |
| #define __TBB_FetchAndStore8__TBB_full_fence __TBB_FetchAndStore8 | | | |
| #endif | | | |
| #ifndef __TBB_FetchAndStore8acquire | | | |
| #define __TBB_FetchAndStore8acquire __TBB_FetchAndStore8__TBB_full_fence | | | |
| #endif | | | |
| #ifndef __TBB_FetchAndStore8release | | | |
| #define __TBB_FetchAndStore8release __TBB_FetchAndStore8__TBB_full_fence | | | |
| #endif | | | |
| | | | |
| #endif // __TBB_DECL_FENCED_ATOMICS | | | |
| | | | |
| // Special atomic functions | | | |
| #ifndef __TBB_FetchAndAddWrelease | | | |
| #define __TBB_FetchAndAddWrelease __TBB_FetchAndAddW | | | |
| #endif | | | |
| | | | |
| #ifndef __TBB_FetchAndIncrementWacquire | | | |
| #define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAddW(P,1) | | | |
| #endif | | | |
| | | | |
| #ifndef __TBB_FetchAndDecrementWrelease | | | |
| #define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAddW(P,(-1)) | | | |
| #endif | | | |
| | | | |
| #if __TBB_WORDSIZE==4 | | | |
| // On 32-bit platforms, "atomic.h" requires definition of __TBB_Store8 and | | | |
| __TBB_Load8 | | | |
| #ifndef __TBB_Store8 | | | |
| inline void __TBB_Store8 (volatile void *ptr, int64_t value) { | | | |
| tbb::internal::atomic_backoff b; | | | |
| for(;;) { | | | |
| int64_t result = *(int64_t *)ptr; | | | |
| if( __TBB_CompareAndSwap8(ptr,value,result)==result ) break; | | | |
| b.pause(); | | | |
| } | | | |
| } | | | |
| #endif | | | |
| | | | |
| #ifndef __TBB_Load8 | | | |
| inline int64_t __TBB_Load8 (const volatile void *ptr) { | | | |
| int64_t result = *(int64_t *)ptr; | | | |
| result = __TBB_CompareAndSwap8((volatile void *)ptr,result,result); | | | |
| return result; | | | |
| } | | | |
| #endif | | | |
| #endif /* __TBB_WORDSIZE==4 */ | | | |
| | | | |
| #ifndef __TBB_Log2 | | #ifndef __TBB_Log2 | |
| inline intptr_t __TBB_Log2( uintptr_t x ) { | | inline intptr_t __TBB_Log2( uintptr_t x ) { | |
| if( x==0 ) return -1; | | if( x==0 ) return -1; | |
| intptr_t result = 0; | | intptr_t result = 0; | |
| uintptr_t tmp; | | uintptr_t tmp; | |
| #if __TBB_WORDSIZE>=8 | | #if __TBB_WORDSIZE>=8 | |
| if( (tmp = x>>32) ) { x=tmp; result += 32; } | | if( (tmp = x>>32) ) { x=tmp; result += 32; } | |
| #endif | | #endif | |
| if( (tmp = x>>16) ) { x=tmp; result += 16; } | | if( (tmp = x>>16) ) { x=tmp; result += 16; } | |
| | | | |
| skipping to change at line 605 | | skipping to change at line 781 | |
| tbb::internal::atomic_backoff b; | | tbb::internal::atomic_backoff b; | |
| for(;;) { | | for(;;) { | |
| uintptr_t tmp = *(volatile uintptr_t *)operand; | | uintptr_t tmp = *(volatile uintptr_t *)operand; | |
| uintptr_t result = __TBB_CompareAndSwapW(operand, tmp&addend, tmp); | | uintptr_t result = __TBB_CompareAndSwapW(operand, tmp&addend, tmp); | |
| if( result==tmp ) break; | | if( result==tmp ) break; | |
| b.pause(); | | b.pause(); | |
| } | | } | |
| } | | } | |
| #endif | | #endif | |
| | | | |
|
| | | #ifndef __TBB_Flag | |
| | | typedef unsigned char __TBB_Flag; | |
| | | #endif | |
| | | typedef __TBB_atomic __TBB_Flag __TBB_atomic_flag; | |
| | | | |
| #ifndef __TBB_TryLockByte | | #ifndef __TBB_TryLockByte | |
|
| inline bool __TBB_TryLockByte( unsigned char &flag ) { | | inline bool __TBB_TryLockByte( __TBB_atomic_flag &flag ) { | |
| return __TBB_CompareAndSwap1(&flag,1,0)==0; | | return __TBB_machine_cmpswp1(&flag,1,0)==0; | |
| } | | } | |
| #endif | | #endif | |
| | | | |
| #ifndef __TBB_LockByte | | #ifndef __TBB_LockByte | |
|
| inline uintptr_t __TBB_LockByte( unsigned char& flag ) { | | inline __TBB_Flag __TBB_LockByte( __TBB_atomic_flag& flag ) { | |
| if ( !__TBB_TryLockByte(flag) ) { | | if ( !__TBB_TryLockByte(flag) ) { | |
| tbb::internal::atomic_backoff b; | | tbb::internal::atomic_backoff b; | |
| do { | | do { | |
| b.pause(); | | b.pause(); | |
| } while ( !__TBB_TryLockByte(flag) ); | | } while ( !__TBB_TryLockByte(flag) ); | |
| } | | } | |
| return 0; | | return 0; | |
| } | | } | |
| #endif | | #endif | |
| | | | |
|
| | | #define __TBB_UnlockByte __TBB_store_with_release | |
| | | | |
| #ifndef __TBB_ReverseByte | | #ifndef __TBB_ReverseByte | |
| inline unsigned char __TBB_ReverseByte(unsigned char src) { | | inline unsigned char __TBB_ReverseByte(unsigned char src) { | |
| return tbb::internal::reverse<unsigned char>::byte_table[src]; | | return tbb::internal::reverse<unsigned char>::byte_table[src]; | |
| } | | } | |
| #endif | | #endif | |
| | | | |
| template<typename T> | | template<typename T> | |
|
| T __TBB_ReverseBits(T src) | | T __TBB_ReverseBits(T src) { | |
| { | | | |
| T dst; | | T dst; | |
| unsigned char *original = (unsigned char *) &src; | | unsigned char *original = (unsigned char *) &src; | |
| unsigned char *reversed = (unsigned char *) &dst; | | unsigned char *reversed = (unsigned char *) &dst; | |
| | | | |
| for( int i = sizeof(T)-1; i >= 0; i-- ) | | for( int i = sizeof(T)-1; i >= 0; i-- ) | |
| reversed[i] = __TBB_ReverseByte( original[sizeof(T)-i-1] ); | | reversed[i] = __TBB_ReverseByte( original[sizeof(T)-i-1] ); | |
| | | | |
| return dst; | | return dst; | |
| } | | } | |
| | | | |
| | | | |
End of changes. 42 change blocks. |
| 335 lines changed or deleted | | 584 lines changed or added | |
|
| tbb_stddef.h | | tbb_stddef.h | |
| /* | | /* | |
|
| Copyright 2005-2010 Intel Corporation. All Rights Reserved. | | Copyright 2005-2011 Intel Corporation. All Rights Reserved. | |
| | | | |
| This file is part of Threading Building Blocks. | | This file is part of Threading Building Blocks. | |
| | | | |
| Threading Building Blocks is free software; you can redistribute it | | Threading Building Blocks is free software; you can redistribute it | |
| and/or modify it under the terms of the GNU General Public License | | and/or modify it under the terms of the GNU General Public License | |
| version 2 as published by the Free Software Foundation. | | version 2 as published by the Free Software Foundation. | |
| | | | |
| Threading Building Blocks is distributed in the hope that it will be | | Threading Building Blocks is distributed in the hope that it will be | |
| useful, but WITHOUT ANY WARRANTY; without even the implied warranty | | useful, but WITHOUT ANY WARRANTY; without even the implied warranty | |
| of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| | | | |
| skipping to change at line 33 | | skipping to change at line 33 | |
| file does not by itself cause the resulting executable to be covered by | | file does not by itself cause the resulting executable to be covered by | |
| the GNU General Public License. This exception does not however | | the GNU General Public License. This exception does not however | |
| invalidate any other reasons why the executable file might be covered b
y | | invalidate any other reasons why the executable file might be covered b
y | |
| the GNU General Public License. | | the GNU General Public License. | |
| */ | | */ | |
| | | | |
| #ifndef __TBB_tbb_stddef_H | | #ifndef __TBB_tbb_stddef_H | |
| #define __TBB_tbb_stddef_H | | #define __TBB_tbb_stddef_H | |
| | | | |
| // Marketing-driven product version | | // Marketing-driven product version | |
|
| #define TBB_VERSION_MAJOR 3 | | #define TBB_VERSION_MAJOR 4 | |
| #define TBB_VERSION_MINOR 0 | | #define TBB_VERSION_MINOR 0 | |
| | | | |
| // Engineering-focused interface version | | // Engineering-focused interface version | |
|
| #define TBB_INTERFACE_VERSION 5000 | | #define TBB_INTERFACE_VERSION 6000 | |
| #define TBB_INTERFACE_VERSION_MAJOR TBB_INTERFACE_VERSION/1000 | | #define TBB_INTERFACE_VERSION_MAJOR TBB_INTERFACE_VERSION/1000 | |
| | | | |
| // The oldest major interface version still supported | | // The oldest major interface version still supported | |
| // To be used in SONAME, manifests, etc. | | // To be used in SONAME, manifests, etc. | |
| #define TBB_COMPATIBLE_INTERFACE_VERSION 2 | | #define TBB_COMPATIBLE_INTERFACE_VERSION 2 | |
| | | | |
| #define __TBB_STRING_AUX(x) #x | | #define __TBB_STRING_AUX(x) #x | |
| #define __TBB_STRING(x) __TBB_STRING_AUX(x) | | #define __TBB_STRING(x) __TBB_STRING_AUX(x) | |
| | | | |
| // We do not need defines below for resource processing on windows | | // We do not need defines below for resource processing on windows | |
| | | | |
| skipping to change at line 104 | | skipping to change at line 104 | |
| - \subpage range_req | | - \subpage range_req | |
| - \subpage parallel_do_body_req | | - \subpage parallel_do_body_req | |
| - \subpage parallel_for_body_req | | - \subpage parallel_for_body_req | |
| - \subpage parallel_reduce_body_req | | - \subpage parallel_reduce_body_req | |
| - \subpage parallel_scan_body_req | | - \subpage parallel_scan_body_req | |
| - \subpage parallel_sort_iter_req | | - \subpage parallel_sort_iter_req | |
| **/ | | **/ | |
| | | | |
| // Define preprocessor symbols used to determine architecture | | // Define preprocessor symbols used to determine architecture | |
| #if _WIN32||_WIN64 | | #if _WIN32||_WIN64 | |
|
| # if defined(_M_AMD64) | | # if defined(_M_X64)||defined(__x86_64__) // the latter for MinGW suppor
t | |
| # define __TBB_x86_64 1 | | # define __TBB_x86_64 1 | |
| # elif defined(_M_IA64) | | # elif defined(_M_IA64) | |
| # define __TBB_ipf 1 | | # define __TBB_ipf 1 | |
| # elif defined(_M_IX86)||defined(__i386__) // the latter for MinGW suppor
t | | # elif defined(_M_IX86)||defined(__i386__) // the latter for MinGW suppor
t | |
| # define __TBB_x86_32 1 | | # define __TBB_x86_32 1 | |
| # endif | | # endif | |
| #else /* Assume generic Unix */ | | #else /* Assume generic Unix */ | |
| # if !__linux__ && !__APPLE__ | | # if !__linux__ && !__APPLE__ | |
| # define __TBB_generic_os 1 | | # define __TBB_generic_os 1 | |
| # endif | | # endif | |
| | | | |
| skipping to change at line 126 | | skipping to change at line 126 | |
| # define __TBB_x86_64 1 | | # define __TBB_x86_64 1 | |
| # elif __ia64__ | | # elif __ia64__ | |
| # define __TBB_ipf 1 | | # define __TBB_ipf 1 | |
| # elif __i386__||__i386 // __i386 is for Sun OS | | # elif __i386__||__i386 // __i386 is for Sun OS | |
| # define __TBB_x86_32 1 | | # define __TBB_x86_32 1 | |
| # else | | # else | |
| # define __TBB_generic_arch 1 | | # define __TBB_generic_arch 1 | |
| # endif | | # endif | |
| #endif | | #endif | |
| | | | |
|
| #if _MSC_VER | | // tbb_config.h should be included the first since it contains macro defini | |
| // define the parts of stdint.h that are needed, but put them inside tbb::i | | tions used in other headers | |
| nternal | | #include "tbb_config.h" | |
| namespace tbb { | | | |
| namespace internal { | | | |
| typedef __int8 int8_t; | | | |
| typedef __int16 int16_t; | | | |
| typedef __int32 int32_t; | | | |
| typedef __int64 int64_t; | | | |
| typedef unsigned __int8 uint8_t; | | | |
| typedef unsigned __int16 uint16_t; | | | |
| typedef unsigned __int32 uint32_t; | | | |
| typedef unsigned __int64 uint64_t; | | | |
| } // namespace internal | | | |
| } // namespace tbb | | | |
| #else | | | |
| #include <stdint.h> | | | |
| #endif /* _MSC_VER */ | | | |
| | | | |
| #if _MSC_VER >=1400 | | #if _MSC_VER >=1400 | |
|
| #define __TBB_EXPORTED_FUNC __cdecl | | #define __TBB_EXPORTED_FUNC __cdecl | |
| #define __TBB_EXPORTED_METHOD __thiscall | | #define __TBB_EXPORTED_METHOD __thiscall | |
| #else | | #else | |
|
| #define __TBB_EXPORTED_FUNC | | #define __TBB_EXPORTED_FUNC | |
| #define __TBB_EXPORTED_METHOD | | #define __TBB_EXPORTED_METHOD | |
| #endif | | #endif | |
| | | | |
|
| #include <cstddef> /* Need size_t and ptrdiff_t (the latter on Windows
only) from here. */ | | #include <cstddef> /* Need size_t and ptrdiff_t */ | |
| | | | |
| #if _MSC_VER | | #if _MSC_VER | |
|
| #define __TBB_tbb_windef_H | | #define __TBB_tbb_windef_H | |
| #include "_tbb_windef.h" | | #include "internal/_tbb_windef.h" | |
| #undef __TBB_tbb_windef_H | | #undef __TBB_tbb_windef_H | |
| | | #else | |
| | | #include <stdint.h> | |
| #endif | | #endif | |
| | | | |
|
| #include "tbb_config.h" | | | |
| | | | |
| //! The namespace tbb contains all components of the library. | | //! The namespace tbb contains all components of the library. | |
| namespace tbb { | | namespace tbb { | |
|
| | | | |
| | | #if _MSC_VER | |
| | | namespace internal { | |
| | | typedef __int8 int8_t; | |
| | | typedef __int16 int16_t; | |
| | | typedef __int32 int32_t; | |
| | | typedef __int64 int64_t; | |
| | | typedef unsigned __int8 uint8_t; | |
| | | typedef unsigned __int16 uint16_t; | |
| | | typedef unsigned __int32 uint32_t; | |
| | | typedef unsigned __int64 uint64_t; | |
| | | } // namespace internal | |
| | | #else /* Posix */ | |
| | | namespace internal { | |
| | | using ::int8_t; | |
| | | using ::int16_t; | |
| | | using ::int32_t; | |
| | | using ::int64_t; | |
| | | using ::uint8_t; | |
| | | using ::uint16_t; | |
| | | using ::uint32_t; | |
| | | using ::uint64_t; | |
| | | } // namespace internal | |
| | | #endif /* Posix */ | |
| | | | |
| | | using std::size_t; | |
| | | using std::ptrdiff_t; | |
| | | | |
| //! Type for an assertion handler | | //! Type for an assertion handler | |
| typedef void(*assertion_handler_type)( const char* filename, int line,
const char* expression, const char * comment ); | | typedef void(*assertion_handler_type)( const char* filename, int line,
const char* expression, const char * comment ); | |
| | | | |
| #if TBB_USE_ASSERT | | #if TBB_USE_ASSERT | |
| | | | |
|
| //! Assert that x is true. | | //! Assert that x is true. | |
| /** If x is false, print assertion failure message. | | /** If x is false, print assertion failure message. | |
| If the comment argument is not NULL, it is printed as part of the failu | | If the comment argument is not NULL, it is printed as part of the f | |
| re message. | | ailure message. | |
| The comment argument has no other effect. */ | | The comment argument has no other effect. */ | |
| #define __TBB_ASSERT(predicate,message) ((predicate)?((void)0):tbb::asserti | | #define __TBB_ASSERT(predicate,message) ((predicate)?((void)0):tbb::ass | |
| on_failure(__FILE__,__LINE__,#predicate,message)) | | ertion_failure(__FILE__,__LINE__,#predicate,message)) | |
| #define __TBB_ASSERT_EX __TBB_ASSERT | | #define __TBB_ASSERT_EX __TBB_ASSERT | |
| | | | |
| //! Set assertion handler and return previous value of it. | | //! Set assertion handler and return previous value of it. | |
| assertion_handler_type __TBB_EXPORTED_FUNC set_assertion_handler( asser
tion_handler_type new_handler ); | | assertion_handler_type __TBB_EXPORTED_FUNC set_assertion_handler( asser
tion_handler_type new_handler ); | |
| | | | |
| //! Process an assertion failure. | | //! Process an assertion failure. | |
| /** Normally called from __TBB_ASSERT macro. | | /** Normally called from __TBB_ASSERT macro. | |
| If assertion handler is null, print message for assertion failure a
nd abort. | | If assertion handler is null, print message for assertion failure a
nd abort. | |
| Otherwise call the assertion handler. */ | | Otherwise call the assertion handler. */ | |
| void __TBB_EXPORTED_FUNC assertion_failure( const char* filename, int l
ine, const char* expression, const char* comment ); | | void __TBB_EXPORTED_FUNC assertion_failure( const char* filename, int l
ine, const char* expression, const char* comment ); | |
| | | | |
|
| #else | | #else /* !TBB_USE_ASSERT */ | |
| | | | |
|
| //! No-op version of __TBB_ASSERT. | | //! No-op version of __TBB_ASSERT. | |
| #define __TBB_ASSERT(predicate,comment) ((void)0) | | #define __TBB_ASSERT(predicate,comment) ((void)0) | |
| //! "Extended" version is useful to suppress warnings if a variable is only | | //! "Extended" version is useful to suppress warnings if a variable is | |
| used with an assert | | only used with an assert | |
| #define __TBB_ASSERT_EX(predicate,comment) ((void)(1 && (predicate))) | | #define __TBB_ASSERT_EX(predicate,comment) ((void)(1 && (predicate))) | |
| | | | |
|
| #endif /* TBB_USE_ASSERT */ | | #endif /* !TBB_USE_ASSERT */ | |
| | | | |
| //! The function returns the interface version of the TBB shared library be
ing used. | | //! The function returns the interface version of the TBB shared library be
ing used. | |
| /** | | /** | |
| * The version it returns is determined at runtime, not at compile/link tim
e. | | * The version it returns is determined at runtime, not at compile/link tim
e. | |
| * So it can be different than the value of TBB_INTERFACE_VERSION obtained
at compile time. | | * So it can be different than the value of TBB_INTERFACE_VERSION obtained
at compile time. | |
| */ | | */ | |
| extern "C" int __TBB_EXPORTED_FUNC TBB_runtime_interface_version(); | | extern "C" int __TBB_EXPORTED_FUNC TBB_runtime_interface_version(); | |
| | | | |
| //! Dummy type that distinguishes splitting constructor from copy construct
or. | | //! Dummy type that distinguishes splitting constructor from copy construct
or. | |
| /** | | /** | |
| | | | |
| skipping to change at line 215 | | skipping to change at line 228 | |
| */ | | */ | |
| class split { | | class split { | |
| }; | | }; | |
| | | | |
| /** | | /** | |
| * @cond INTERNAL | | * @cond INTERNAL | |
| * @brief Identifiers declared inside namespace internal should never be us
ed directly by client code. | | * @brief Identifiers declared inside namespace internal should never be us
ed directly by client code. | |
| */ | | */ | |
| namespace internal { | | namespace internal { | |
| | | | |
|
| using std::size_t; | | | |
| | | | |
| //! Compile-time constant that is upper bound on cache line/sector size. | | //! Compile-time constant that is upper bound on cache line/sector size. | |
| /** It should be used only in situations where having a compile-time upper | | /** It should be used only in situations where having a compile-time upper | |
| bound is more useful than a run-time exact answer. | | bound is more useful than a run-time exact answer. | |
| @ingroup memory_allocation */ | | @ingroup memory_allocation */ | |
| const size_t NFS_MaxLineSize = 128; | | const size_t NFS_MaxLineSize = 128; | |
| | | | |
|
| | | /** Label for data that may be accessed from different threads, and that ma | |
| | | y eventually become wrapped | |
| | | in a formal atomic type. | |
| | | | |
| | | Note that no problems have yet been observed relating to the definition | |
| | | currently being empty, | |
| | | even if at least "volatile" would seem to be in order to avoid data som | |
| | | etimes temporarily hiding | |
| | | in a register (although "volatile" as a "poor man's atomic" lacks sever | |
| | | al other features of a proper | |
| | | atomic, some of which are now provided instead through specialized func | |
| | | tions). | |
| | | | |
| | | Note that usage is intentionally compatible with a definition as qualif | |
| | | ier "volatile", | |
| | | both as a way to have the compiler help enforce use of the label and to | |
| | | quickly rule out | |
| | | one potential issue. | |
| | | | |
| | | Note however that, with some architecture/compiler combinations, e.g. o | |
| | | n Itanium, "volatile" | |
| | | also has non-portable memory semantics that are needlessly expensive fo | |
| | | r "relaxed" operations. | |
| | | | |
| | | Note that this must only be applied to data that will not change bit pa | |
| | | tterns when cast to/from | |
| | | an integral type of the same length; tbb::atomic must be used instead f | |
| | | or, e.g., floating-point types. | |
| | | | |
| | | TODO: apply wherever relevant **/ | |
| | | #define __TBB_atomic // intentionally empty, see above | |
| | | | |
| template<class T, int S> | | template<class T, int S> | |
| struct padded_base : T { | | struct padded_base : T { | |
| char pad[NFS_MaxLineSize - sizeof(T) % NFS_MaxLineSize]; | | char pad[NFS_MaxLineSize - sizeof(T) % NFS_MaxLineSize]; | |
| }; | | }; | |
| template<class T> struct padded_base<T, 0> : T {}; | | template<class T> struct padded_base<T, 0> : T {}; | |
| | | | |
| //! Pads type T to fill out to a multiple of cache line size. | | //! Pads type T to fill out to a multiple of cache line size. | |
| template<class T> | | template<class T> | |
| struct padded : padded_base<T, sizeof(T)> {}; | | struct padded : padded_base<T, sizeof(T)> {}; | |
| | | | |
| | | | |
| skipping to change at line 263 | | skipping to change at line 295 | |
| #define __TBB_TRY | | #define __TBB_TRY | |
| #define __TBB_CATCH(e) if ( tbb::internal::__TBB_false() ) | | #define __TBB_CATCH(e) if ( tbb::internal::__TBB_false() ) | |
| #define __TBB_THROW(e) ((void)0) | | #define __TBB_THROW(e) ((void)0) | |
| #define __TBB_RETHROW() ((void)0) | | #define __TBB_RETHROW() ((void)0) | |
| #endif /* !TBB_USE_EXCEPTIONS */ | | #endif /* !TBB_USE_EXCEPTIONS */ | |
| | | | |
| //! Report a runtime warning. | | //! Report a runtime warning. | |
| void __TBB_EXPORTED_FUNC runtime_warning( const char* format, ... ); | | void __TBB_EXPORTED_FUNC runtime_warning( const char* format, ... ); | |
| | | | |
| #if TBB_USE_ASSERT | | #if TBB_USE_ASSERT | |
|
| | | static void* const poisoned_ptr = reinterpret_cast<void*>(-1); | |
| | | | |
| //! Set p to invalid pointer value. | | //! Set p to invalid pointer value. | |
| template<typename T> | | template<typename T> | |
|
| inline void poison_pointer( T* & p ) { | | inline void poison_pointer( T*& p ) { p = reinterpret_cast<T*>(poisoned_ptr | |
| p = reinterpret_cast<T*>(-1); | | ); } | |
| } | | | |
| | | /** Expected to be used in assertions only, thus no empty form is defined. | |
| | | **/ | |
| | | template<typename T> | |
| | | inline bool is_poisoned( T* p ) { return p == reinterpret_cast<T*>(poisoned | |
| | | _ptr); } | |
| #else | | #else | |
| template<typename T> | | template<typename T> | |
| inline void poison_pointer( T* ) {/*do nothing*/} | | inline void poison_pointer( T* ) {/*do nothing*/} | |
|
| #endif /* TBB_USE_ASSERT */ | | #endif /* !TBB_USE_ASSERT */ | |
| | | | |
| | | //! Cast pointer from U* to T. | |
| | | /** This method should be used sparingly as a last resort for dealing with | |
| | | situations that inherently break strict ISO C++ aliasing rules. */ | |
| | | template<typename T, typename U> | |
| | | inline T punned_cast( U* ptr ) { | |
| | | uintptr_t x = reinterpret_cast<uintptr_t>(ptr); | |
| | | return reinterpret_cast<T>(x); | |
| | | } | |
| | | | |
| //! Base class for types that should not be assigned. | | //! Base class for types that should not be assigned. | |
| class no_assign { | | class no_assign { | |
| // Deny assignment | | // Deny assignment | |
| void operator=( const no_assign& ); | | void operator=( const no_assign& ); | |
| public: | | public: | |
| #if __GNUC__ | | #if __GNUC__ | |
| //! Explicitly define default construction, because otherwise gcc issue
s gratuitous warning. | | //! Explicitly define default construction, because otherwise gcc issue
s gratuitous warning. | |
| no_assign() {} | | no_assign() {} | |
| #endif /* __GNUC__ */ | | #endif /* __GNUC__ */ | |
| | | | |
End of changes. 20 change blocks. |
| 53 lines changed or deleted | | 112 lines changed or added | |
|
| windows_ia32.h | | windows_ia32.h | |
| /* | | /* | |
|
| Copyright 2005-2010 Intel Corporation. All Rights Reserved. | | Copyright 2005-2011 Intel Corporation. All Rights Reserved. | |
| | | | |
| This file is part of Threading Building Blocks. | | This file is part of Threading Building Blocks. | |
| | | | |
| Threading Building Blocks is free software; you can redistribute it | | Threading Building Blocks is free software; you can redistribute it | |
| and/or modify it under the terms of the GNU General Public License | | and/or modify it under the terms of the GNU General Public License | |
| version 2 as published by the Free Software Foundation. | | version 2 as published by the Free Software Foundation. | |
| | | | |
| Threading Building Blocks is distributed in the hope that it will be | | Threading Building Blocks is distributed in the hope that it will be | |
| useful, but WITHOUT ANY WARRANTY; without even the implied warranty | | useful, but WITHOUT ANY WARRANTY; without even the implied warranty | |
| of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| | | | |
| skipping to change at line 29 | | skipping to change at line 29 | |
| As a special exception, you may use this file as part of a free softwar
e | | As a special exception, you may use this file as part of a free softwar
e | |
| library without restriction. Specifically, if other files instantiate | | library without restriction. Specifically, if other files instantiate | |
| templates or use macros or inline functions from this file, or you comp
ile | | templates or use macros or inline functions from this file, or you comp
ile | |
| this file and link it with other files to produce an executable, this | | this file and link it with other files to produce an executable, this | |
| file does not by itself cause the resulting executable to be covered by | | file does not by itself cause the resulting executable to be covered by | |
| the GNU General Public License. This exception does not however | | the GNU General Public License. This exception does not however | |
| invalidate any other reasons why the executable file might be covered b
y | | invalidate any other reasons why the executable file might be covered b
y | |
| the GNU General Public License. | | the GNU General Public License. | |
| */ | | */ | |
| | | | |
|
| #ifndef __TBB_machine_H | | #if !defined(__TBB_machine_H) || defined(__TBB_machine_windows_ia32_H) | |
| #error Do not include this file directly; include tbb_machine.h instead | | #error Do not #include this internal file directly; use public TBB headers | |
| | | instead. | |
| #endif | | #endif | |
| | | | |
|
| #if defined(__INTEL_COMPILER) | | #define __TBB_machine_windows_ia32_H | |
| #define __TBB_release_consistency_helper() __asm { __asm nop } | | | |
| | | #define __TBB_WORDSIZE 4 | |
| | | #define __TBB_BIG_ENDIAN 0 | |
| | | | |
| | | #if __INTEL_COMPILER | |
| | | #define __TBB_compiler_fence() __asm { __asm nop } | |
| #elif _MSC_VER >= 1300 | | #elif _MSC_VER >= 1300 | |
|
| extern "C" void _ReadWriteBarrier(); | | extern "C" void _ReadWriteBarrier(); | |
| #pragma intrinsic(_ReadWriteBarrier) | | #pragma intrinsic(_ReadWriteBarrier) | |
| #define __TBB_release_consistency_helper() _ReadWriteBarrier() | | #define __TBB_compiler_fence() _ReadWriteBarrier() | |
| #else | | #else | |
|
| #error Unsupported compiler - need to define __TBB_release_consistency_help
er to support it | | #error Unsupported compiler - need to define __TBB_{control,acquire,rel
ease}_consistency_helper to support it | |
| #endif | | #endif | |
| | | | |
|
| inline void __TBB_rel_acq_fence() { __asm { __asm mfence } } | | #define __TBB_control_consistency_helper() __TBB_compiler_fence() | |
| | | #define __TBB_acquire_consistency_helper() __TBB_compiler_fence() | |
| #define __TBB_WORDSIZE 4 | | #define __TBB_release_consistency_helper() __TBB_compiler_fence() | |
| #define __TBB_BIG_ENDIAN 0 | | #define __TBB_full_memory_fence() __asm { __asm mfence } | |
| | | | |
| #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) | | #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) | |
| // Workaround for overzealous compiler warnings in /Wp64 mode | | // Workaround for overzealous compiler warnings in /Wp64 mode | |
| #pragma warning (push) | | #pragma warning (push) | |
| #pragma warning (disable: 4244 4267) | | #pragma warning (disable: 4244 4267) | |
| #endif | | #endif | |
| | | | |
| extern "C" { | | extern "C" { | |
| __int64 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp8 (volatile void *ptr,
__int64 value, __int64 comparand ); | | __int64 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp8 (volatile void *ptr,
__int64 value, __int64 comparand ); | |
| __int64 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd8 (volatile void *ptr
, __int64 addend ); | | __int64 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd8 (volatile void *ptr
, __int64 addend ); | |
| __int64 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore8 (volatile void *p
tr, __int64 value ); | | __int64 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore8 (volatile void *p
tr, __int64 value ); | |
| void __TBB_EXPORTED_FUNC __TBB_machine_store8 (volatile void *ptr, __in
t64 value ); | | void __TBB_EXPORTED_FUNC __TBB_machine_store8 (volatile void *ptr, __in
t64 value ); | |
| __int64 __TBB_EXPORTED_FUNC __TBB_machine_load8 (const volatile void *p
tr); | | __int64 __TBB_EXPORTED_FUNC __TBB_machine_load8 (const volatile void *p
tr); | |
| } | | } | |
| | | | |
|
| template <typename T, size_t S> | | #define __TBB_MACHINE_DEFINE_ATOMICS(S,T,U,A,C) \ | |
| struct __TBB_machine_load_store { | | | |
| static inline T load_with_acquire(const volatile T& location) { | | | |
| T to_return = location; | | | |
| __TBB_release_consistency_helper(); | | | |
| return to_return; | | | |
| } | | | |
| | | | |
| static inline void store_with_release(volatile T &location, T value) { | | | |
| __TBB_release_consistency_helper(); | | | |
| location = value; | | | |
| } | | | |
| }; | | | |
| | | | |
| template <typename T> | | | |
| struct __TBB_machine_load_store<T,8> { | | | |
| static inline T load_with_acquire(const volatile T& location) { | | | |
| return __TBB_machine_load8((volatile void *)&location); | | | |
| } | | | |
| | | | |
| static inline void store_with_release(T &location, T value) { | | | |
| __TBB_machine_store8((volatile void *)&location,(__int64)value); | | | |
| } | | | |
| }; | | | |
| | | | |
| template<typename T> | | | |
| inline T __TBB_machine_load_with_acquire(const volatile T &location) { | | | |
| return __TBB_machine_load_store<T,sizeof(T)>::load_with_acquire(locatio | | | |
| n); | | | |
| } | | | |
| | | | |
| template<typename T, typename V> | | | |
| inline void __TBB_machine_store_with_release(T& location, V value) { | | | |
| __TBB_machine_load_store<T,sizeof(T)>::store_with_release(location,valu | | | |
| e); | | | |
| } | | | |
| | | | |
| //! Overload that exists solely to avoid /Wp64 warnings. | | | |
| inline void __TBB_machine_store_with_release(size_t& location, size_t value | | | |
| ) { | | | |
| __TBB_machine_load_store<size_t,sizeof(size_t)>::store_with_release(loc | | | |
| ation,value); | | | |
| } | | | |
| | | | |
| #define __TBB_load_with_acquire(L) __TBB_machine_load_with_acquire((L)) | | | |
| #define __TBB_store_with_release(L,V) __TBB_machine_store_with_release((L), | | | |
| (V)) | | | |
| | | | |
| #define __TBB_DEFINE_ATOMICS(S,T,U,A,C) \ | | | |
| static inline T __TBB_machine_cmpswp##S ( volatile void * ptr, U value, U c
omparand ) { \ | | static inline T __TBB_machine_cmpswp##S ( volatile void * ptr, U value, U c
omparand ) { \ | |
| T result; \ | | T result; \ | |
| volatile T *p = (T *)ptr; \ | | volatile T *p = (T *)ptr; \ | |
|
| __TBB_release_consistency_helper(); \ | | | |
| __asm \ | | __asm \ | |
| { \ | | { \ | |
| __asm mov edx, p \ | | __asm mov edx, p \ | |
| __asm mov C , value \ | | __asm mov C , value \ | |
| __asm mov A , comparand \ | | __asm mov A , comparand \ | |
| __asm lock cmpxchg [edx], C \ | | __asm lock cmpxchg [edx], C \ | |
| __asm mov result, A \ | | __asm mov result, A \ | |
| } \ | | } \ | |
|
| __TBB_release_consistency_helper(); \ | | | |
| return result; \ | | return result; \ | |
| } \ | | } \ | |
| \ | | \ | |
| static inline T __TBB_machine_fetchadd##S ( volatile void * ptr, U addend )
{ \ | | static inline T __TBB_machine_fetchadd##S ( volatile void * ptr, U addend )
{ \ | |
| T result; \ | | T result; \ | |
| volatile T *p = (T *)ptr; \ | | volatile T *p = (T *)ptr; \ | |
|
| __TBB_release_consistency_helper(); \ | | | |
| __asm \ | | __asm \ | |
| { \ | | { \ | |
| __asm mov edx, p \ | | __asm mov edx, p \ | |
| __asm mov A, addend \ | | __asm mov A, addend \ | |
| __asm lock xadd [edx], A \ | | __asm lock xadd [edx], A \ | |
| __asm mov result, A \ | | __asm mov result, A \ | |
| } \ | | } \ | |
|
| __TBB_release_consistency_helper(); \ | | | |
| return result; \ | | return result; \ | |
| }\ | | }\ | |
| \ | | \ | |
| static inline T __TBB_machine_fetchstore##S ( volatile void * ptr, U value
) { \ | | static inline T __TBB_machine_fetchstore##S ( volatile void * ptr, U value
) { \ | |
| T result; \ | | T result; \ | |
| volatile T *p = (T *)ptr; \ | | volatile T *p = (T *)ptr; \ | |
|
| __TBB_release_consistency_helper(); \ | | | |
| __asm \ | | __asm \ | |
| { \ | | { \ | |
| __asm mov edx, p \ | | __asm mov edx, p \ | |
| __asm mov A, value \ | | __asm mov A, value \ | |
| __asm lock xchg [edx], A \ | | __asm lock xchg [edx], A \ | |
| __asm mov result, A \ | | __asm mov result, A \ | |
| } \ | | } \ | |
|
| __TBB_release_consistency_helper(); \ | | | |
| return result; \ | | return result; \ | |
| } | | } | |
| | | | |
|
| __TBB_DEFINE_ATOMICS(1, __int8, __int8, al, cl) | | __TBB_MACHINE_DEFINE_ATOMICS(1, __int8, __int8, al, cl) | |
| __TBB_DEFINE_ATOMICS(2, __int16, __int16, ax, cx) | | __TBB_MACHINE_DEFINE_ATOMICS(2, __int16, __int16, ax, cx) | |
| __TBB_DEFINE_ATOMICS(4, __int32, __int32, eax, ecx) | | __TBB_MACHINE_DEFINE_ATOMICS(4, ptrdiff_t, ptrdiff_t, eax, ecx) | |
| __TBB_DEFINE_ATOMICS(W, ptrdiff_t, ptrdiff_t, eax, ecx) | | | |
| | | #undef __TBB_MACHINE_DEFINE_ATOMICS | |
| | | | |
| static inline __int32 __TBB_machine_lg( unsigned __int64 i ) { | | static inline __int32 __TBB_machine_lg( unsigned __int64 i ) { | |
| unsigned __int32 j; | | unsigned __int32 j; | |
| __asm | | __asm | |
| { | | { | |
| bsr eax, i | | bsr eax, i | |
| mov j, eax | | mov j, eax | |
| } | | } | |
| return j; | | return j; | |
| } | | } | |
| | | | |
| skipping to change at line 197 | | skipping to change at line 154 | |
| { | | { | |
| mov eax, delay | | mov eax, delay | |
| L1: | | L1: | |
| pause | | pause | |
| add eax, -1 | | add eax, -1 | |
| jne L1 | | jne L1 | |
| } | | } | |
| return; | | return; | |
| } | | } | |
| | | | |
|
| #define __TBB_CompareAndSwap1(P,V,C) __TBB_machine_cmpswp1(P,V,C) | | | |
| #define __TBB_CompareAndSwap2(P,V,C) __TBB_machine_cmpswp2(P,V,C) | | | |
| #define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4(P,V,C) | | | |
| #define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8(P,V,C) | | | |
| #define __TBB_CompareAndSwapW(P,V,C) __TBB_machine_cmpswpW(P,V,C) | | | |
| | | | |
| #define __TBB_FetchAndAdd1(P,V) __TBB_machine_fetchadd1(P,V) | | | |
| #define __TBB_FetchAndAdd2(P,V) __TBB_machine_fetchadd2(P,V) | | | |
| #define __TBB_FetchAndAdd4(P,V) __TBB_machine_fetchadd4(P,V) | | | |
| #define __TBB_FetchAndAdd8(P,V) __TBB_machine_fetchadd8(P,V) | | | |
| #define __TBB_FetchAndAddW(P,V) __TBB_machine_fetchaddW(P,V) | | | |
| | | | |
| #define __TBB_FetchAndStore1(P,V) __TBB_machine_fetchstore1(P,V) | | | |
| #define __TBB_FetchAndStore2(P,V) __TBB_machine_fetchstore2(P,V) | | | |
| #define __TBB_FetchAndStore4(P,V) __TBB_machine_fetchstore4(P,V) | | | |
| #define __TBB_FetchAndStore8(P,V) __TBB_machine_fetchstore8(P,V) | | | |
| #define __TBB_FetchAndStoreW(P,V) __TBB_machine_fetchstoreW(P,V) | | | |
| | | | |
| // Should define this: | | | |
| #define __TBB_Store8(P,V) __TBB_machine_store8(P,V) | | | |
| #define __TBB_Load8(P) __TBB_machine_load8(P) | | | |
| #define __TBB_AtomicOR(P,V) __TBB_machine_OR(P,V) | | #define __TBB_AtomicOR(P,V) __TBB_machine_OR(P,V) | |
| #define __TBB_AtomicAND(P,V) __TBB_machine_AND(P,V) | | #define __TBB_AtomicAND(P,V) __TBB_machine_AND(P,V) | |
| | | | |
|
| | | #define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1 | |
| | | #define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1 | |
| | | #define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1 | |
| | | | |
| // Definition of other functions | | // Definition of other functions | |
| extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void ); | | extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void ); | |
| #define __TBB_Yield() SwitchToThread() | | #define __TBB_Yield() SwitchToThread() | |
| #define __TBB_Pause(V) __TBB_machine_pause(V) | | #define __TBB_Pause(V) __TBB_machine_pause(V) | |
|
| #define __TBB_Log2(V) __TBB_machine_lg(V) | | #define __TBB_Log2(V) __TBB_machine_lg(V) | |
| | | | |
| // Use generic definitions from tbb_machine.h | | | |
| #undef __TBB_TryLockByte | | | |
| #undef __TBB_LockByte | | | |
| | | | |
| #if defined(_MSC_VER)&&_MSC_VER<1400 | | #if defined(_MSC_VER)&&_MSC_VER<1400 | |
| static inline void* __TBB_machine_get_current_teb () { | | static inline void* __TBB_machine_get_current_teb () { | |
| void* pteb; | | void* pteb; | |
| __asm mov eax, fs:[0x18] | | __asm mov eax, fs:[0x18] | |
| __asm mov pteb, eax | | __asm mov pteb, eax | |
| return pteb; | | return pteb; | |
| } | | } | |
| #endif | | #endif | |
| | | | |
| #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) | | #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) | |
| #pragma warning (pop) | | #pragma warning (pop) | |
| #endif // warnings 4244, 4267 are back | | #endif // warnings 4244, 4267 are back | |
|
| | | | |
| | | // API to retrieve/update FPU control setting | |
| | | #define __TBB_CPU_CTL_ENV_PRESENT 1 | |
| | | | |
| | | struct __TBB_cpu_ctl_env_t { | |
| | | int mxcsr; | |
| | | short x87cw; | |
| | | }; | |
| | | inline void __TBB_get_cpu_ctl_env ( __TBB_cpu_ctl_env_t* ctl ) { | |
| | | __asm { | |
| | | __asm mov eax, ctl | |
| | | __asm stmxcsr [eax] | |
| | | __asm fstcw [eax+4] | |
| | | } | |
| | | } | |
| | | inline void __TBB_set_cpu_ctl_env ( const __TBB_cpu_ctl_env_t* ctl ) { | |
| | | __asm { | |
| | | __asm mov eax, ctl | |
| | | __asm ldmxcsr [eax] | |
| | | __asm fldcw [eax+4] | |
| | | } | |
| | | } | |
| | | | |
End of changes. 18 change blocks. |
| 98 lines changed or deleted | | 30 lines changed or added | |
|
| windows_intel64.h | | windows_intel64.h | |
| /* | | /* | |
|
| Copyright 2005-2010 Intel Corporation. All Rights Reserved. | | Copyright 2005-2011 Intel Corporation. All Rights Reserved. | |
| | | | |
| This file is part of Threading Building Blocks. | | This file is part of Threading Building Blocks. | |
| | | | |
| Threading Building Blocks is free software; you can redistribute it | | Threading Building Blocks is free software; you can redistribute it | |
| and/or modify it under the terms of the GNU General Public License | | and/or modify it under the terms of the GNU General Public License | |
| version 2 as published by the Free Software Foundation. | | version 2 as published by the Free Software Foundation. | |
| | | | |
| Threading Building Blocks is distributed in the hope that it will be | | Threading Building Blocks is distributed in the hope that it will be | |
| useful, but WITHOUT ANY WARRANTY; without even the implied warranty | | useful, but WITHOUT ANY WARRANTY; without even the implied warranty | |
| of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | | of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| | | | |
| skipping to change at line 29 | | skipping to change at line 29 | |
| As a special exception, you may use this file as part of a free softwar
e | | As a special exception, you may use this file as part of a free softwar
e | |
| library without restriction. Specifically, if other files instantiate | | library without restriction. Specifically, if other files instantiate | |
| templates or use macros or inline functions from this file, or you comp
ile | | templates or use macros or inline functions from this file, or you comp
ile | |
| this file and link it with other files to produce an executable, this | | this file and link it with other files to produce an executable, this | |
| file does not by itself cause the resulting executable to be covered by | | file does not by itself cause the resulting executable to be covered by | |
| the GNU General Public License. This exception does not however | | the GNU General Public License. This exception does not however | |
| invalidate any other reasons why the executable file might be covered b
y | | invalidate any other reasons why the executable file might be covered b
y | |
| the GNU General Public License. | | the GNU General Public License. | |
| */ | | */ | |
| | | | |
|
| #ifndef __TBB_machine_H | | #if !defined(__TBB_machine_H) || defined(__TBB_machine_windows_intel64_H) | |
| #error Do not include this file directly; include tbb_machine.h instead | | #error Do not #include this internal file directly; use public TBB headers | |
| | | instead. | |
| #endif | | #endif | |
| | | | |
|
| | | #define __TBB_machine_windows_intel64_H | |
| | | | |
| | | #define __TBB_WORDSIZE 8 | |
| | | #define __TBB_BIG_ENDIAN 0 | |
| | | | |
| #include <intrin.h> | | #include <intrin.h> | |
|
| #if !defined(__INTEL_COMPILER) | | | |
| #pragma intrinsic(_InterlockedOr64) | | #if !__INTEL_COMPILER | |
| #pragma intrinsic(_InterlockedAnd64) | | #pragma intrinsic(_InterlockedOr64) | |
| #pragma intrinsic(_InterlockedCompareExchange) | | #pragma intrinsic(_InterlockedAnd64) | |
| #pragma intrinsic(_InterlockedCompareExchange64) | | #pragma intrinsic(_InterlockedCompareExchange) | |
| #pragma intrinsic(_InterlockedExchangeAdd) | | #pragma intrinsic(_InterlockedCompareExchange64) | |
| #pragma intrinsic(_InterlockedExchangeAdd64) | | #pragma intrinsic(_InterlockedExchangeAdd) | |
| #pragma intrinsic(_InterlockedExchange) | | #pragma intrinsic(_InterlockedExchangeAdd64) | |
| #pragma intrinsic(_InterlockedExchange64) | | #pragma intrinsic(_InterlockedExchange) | |
| | | #pragma intrinsic(_InterlockedExchange64) | |
| #endif /* !defined(__INTEL_COMPILER) */ | | #endif /* !defined(__INTEL_COMPILER) */ | |
| | | | |
|
| #if defined(__INTEL_COMPILER) | | #if __INTEL_COMPILER | |
| #define __TBB_release_consistency_helper() __asm { __asm nop } | | #define __TBB_compiler_fence() __asm { __asm nop } | |
| inline void __TBB_rel_acq_fence() { __asm { __asm mfence } } | | #define __TBB_full_memory_fence() __asm { __asm mfence } | |
| #elif _MSC_VER >= 1300 | | #elif _MSC_VER >= 1300 | |
|
| extern "C" void _ReadWriteBarrier(); | | extern "C" void _ReadWriteBarrier(); | |
| #pragma intrinsic(_ReadWriteBarrier) | | #pragma intrinsic(_ReadWriteBarrier) | |
| #define __TBB_release_consistency_helper() _ReadWriteBarrier() | | #pragma intrinsic(_mm_mfence) | |
| #pragma intrinsic(_mm_mfence) | | #define __TBB_compiler_fence() _ReadWriteBarrier() | |
| inline void __TBB_rel_acq_fence() { _mm_mfence(); } | | #define __TBB_full_memory_fence() _mm_mfence() | |
| #endif | | #endif | |
| | | | |
|
| #define __TBB_WORDSIZE 8 | | #define __TBB_control_consistency_helper() __TBB_compiler_fence() | |
| #define __TBB_BIG_ENDIAN 0 | | #define __TBB_acquire_consistency_helper() __TBB_compiler_fence() | |
| | | #define __TBB_release_consistency_helper() __TBB_compiler_fence() | |
| | | | |
| // ATTENTION: if you ever change argument types in machine-specific primiti
ves, | | // ATTENTION: if you ever change argument types in machine-specific primiti
ves, | |
| // please take care of atomic_word<> specializations in tbb/atomic.h | | // please take care of atomic_word<> specializations in tbb/atomic.h | |
| extern "C" { | | extern "C" { | |
| __int8 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp1 (volatile void *ptr, _
_int8 value, __int8 comparand ); | | __int8 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp1 (volatile void *ptr, _
_int8 value, __int8 comparand ); | |
| __int8 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd1 (volatile void *ptr,
__int8 addend ); | | __int8 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd1 (volatile void *ptr,
__int8 addend ); | |
| __int8 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore1 (volatile void *pt
r, __int8 value ); | | __int8 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore1 (volatile void *pt
r, __int8 value ); | |
| __int16 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp2 (volatile void *ptr,
__int16 value, __int16 comparand ); | | __int16 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp2 (volatile void *ptr,
__int16 value, __int16 comparand ); | |
| __int16 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd2 (volatile void *ptr
, __int16 addend ); | | __int16 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd2 (volatile void *ptr
, __int16 addend ); | |
| __int16 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore2 (volatile void *p
tr, __int16 value ); | | __int16 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore2 (volatile void *p
tr, __int16 value ); | |
| void __TBB_EXPORTED_FUNC __TBB_machine_pause (__int32 delay ); | | void __TBB_EXPORTED_FUNC __TBB_machine_pause (__int32 delay ); | |
| } | | } | |
| | | | |
|
| | | inline long __TBB_machine_cmpswp4 (volatile void *ptr, __int32 value, __int | |
| | | 32 comparand ) { | |
| | | return _InterlockedCompareExchange( (long*)ptr, value, comparand ); | |
| | | } | |
| | | inline long __TBB_machine_fetchadd4 (volatile void *ptr, __int32 addend ) { | |
| | | return _InterlockedExchangeAdd( (long*)ptr, addend ); | |
| | | } | |
| | | inline long __TBB_machine_fetchstore4 (volatile void *ptr, __int32 value ) | |
| | | { | |
| | | return _InterlockedExchange( (long*)ptr, value ); | |
| | | } | |
| | | | |
| | | inline __int64 __TBB_machine_cmpswp8 (volatile void *ptr, __int64 value, __ | |
| | | int64 comparand ) { | |
| | | return _InterlockedCompareExchange64( (__int64*)ptr, value, comparand ) | |
| | | ; | |
| | | } | |
| | | inline __int64 __TBB_machine_fetchadd8 (volatile void *ptr, __int64 addend | |
| | | ) { | |
| | | return _InterlockedExchangeAdd64( (__int64*)ptr, addend ); | |
| | | } | |
| | | inline __int64 __TBB_machine_fetchstore8 (volatile void *ptr, __int64 value | |
| | | ) { | |
| | | return _InterlockedExchange64( (__int64*)ptr, value ); | |
| | | } | |
| | | | |
| | | #define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1 | |
| | | #define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1 | |
| | | #define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1 | |
| | | | |
| #if !__INTEL_COMPILER | | #if !__INTEL_COMPILER | |
| extern "C" unsigned char _BitScanReverse64( unsigned long* i, unsigned __in
t64 w ); | | extern "C" unsigned char _BitScanReverse64( unsigned long* i, unsigned __in
t64 w ); | |
| #pragma intrinsic(_BitScanReverse64) | | #pragma intrinsic(_BitScanReverse64) | |
| #endif | | #endif | |
| | | | |
| inline __int64 __TBB_machine_lg( unsigned __int64 i ) { | | inline __int64 __TBB_machine_lg( unsigned __int64 i ) { | |
| #if __INTEL_COMPILER | | #if __INTEL_COMPILER | |
| unsigned __int64 j; | | unsigned __int64 j; | |
| __asm | | __asm | |
| { | | { | |
| | | | |
| skipping to change at line 99 | | skipping to change at line 130 | |
| } | | } | |
| | | | |
| inline void __TBB_machine_OR( volatile void *operand, intptr_t addend ) { | | inline void __TBB_machine_OR( volatile void *operand, intptr_t addend ) { | |
| _InterlockedOr64((__int64*)operand, addend); | | _InterlockedOr64((__int64*)operand, addend); | |
| } | | } | |
| | | | |
| inline void __TBB_machine_AND( volatile void *operand, intptr_t addend ) { | | inline void __TBB_machine_AND( volatile void *operand, intptr_t addend ) { | |
| _InterlockedAnd64((__int64*)operand, addend); | | _InterlockedAnd64((__int64*)operand, addend); | |
| } | | } | |
| | | | |
|
| #define __TBB_CompareAndSwap1(P,V,C) __TBB_machine_cmpswp1(P,V,C) | | | |
| #define __TBB_CompareAndSwap2(P,V,C) __TBB_machine_cmpswp2(P,V,C) | | | |
| #define __TBB_CompareAndSwap4(P,V,C) _InterlockedCompareExchange( (long*) P | | | |
| , V , C ) | | | |
| #define __TBB_CompareAndSwap8(P,V,C) _InterlockedCompareExchange64( (__int6 | | | |
| 4*) P , V , C ) | | | |
| #define __TBB_CompareAndSwapW(P,V,C) _InterlockedCompareExchange64( (__int6 | | | |
| 4*) P , V , C ) | | | |
| | | | |
| #define __TBB_FetchAndAdd1(P,V) __TBB_machine_fetchadd1(P,V) | | | |
| #define __TBB_FetchAndAdd2(P,V) __TBB_machine_fetchadd2(P,V) | | | |
| #define __TBB_FetchAndAdd4(P,V) _InterlockedExchangeAdd((long*) P , V ) | | | |
| #define __TBB_FetchAndAdd8(P,V) _InterlockedExchangeAdd64((__int64*) P , V | | | |
| ) | | | |
| #define __TBB_FetchAndAddW(P,V) _InterlockedExchangeAdd64((__int64*) P , V | | | |
| ) | | | |
| | | | |
| #define __TBB_FetchAndStore1(P,V) __TBB_machine_fetchstore1(P,V) | | | |
| #define __TBB_FetchAndStore2(P,V) __TBB_machine_fetchstore2(P,V) | | | |
| #define __TBB_FetchAndStore4(P,V) _InterlockedExchange((long*) P , V ) | | | |
| #define __TBB_FetchAndStore8(P,V) _InterlockedExchange64((__int64*) P , V ) | | | |
| #define __TBB_FetchAndStoreW(P,V) _InterlockedExchange64((__int64*) P , V ) | | | |
| | | | |
| // Not used if wordsize == 8 | | | |
| #undef __TBB_Store8 | | | |
| #undef __TBB_Load8 | | | |
| | | | |
| #define __TBB_AtomicOR(P,V) __TBB_machine_OR(P,V) | | #define __TBB_AtomicOR(P,V) __TBB_machine_OR(P,V) | |
| #define __TBB_AtomicAND(P,V) __TBB_machine_AND(P,V) | | #define __TBB_AtomicAND(P,V) __TBB_machine_AND(P,V) | |
| | | | |
| extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void ); | | extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void ); | |
| #define __TBB_Yield() SwitchToThread() | | #define __TBB_Yield() SwitchToThread() | |
| #define __TBB_Pause(V) __TBB_machine_pause(V) | | #define __TBB_Pause(V) __TBB_machine_pause(V) | |
|
| #define __TBB_Log2(V) __TBB_machine_lg(V) | | #define __TBB_Log2(V) __TBB_machine_lg(V) | |
| | | | |
| | | // API to retrieve/update FPU control setting | |
| | | #define __TBB_CPU_CTL_ENV_PRESENT 1 | |
| | | | |
| | | struct __TBB_cpu_ctl_env_t { | |
| | | int mxcsr; | |
| | | short x87cw; | |
| | | }; | |
| | | | |
|
| // Use generic definitions from tbb_machine.h | | extern "C" { | |
| #undef __TBB_TryLockByte | | void __TBB_EXPORTED_FUNC __TBB_get_cpu_ctl_env ( __TBB_cpu_ctl_env_t* ) | |
| #undef __TBB_LockByte | | ; | |
| | | void __TBB_EXPORTED_FUNC __TBB_set_cpu_ctl_env ( const __TBB_cpu_ctl_en | |
| | | v_t* ); | |
| | | } | |
| | | | |
End of changes. 11 change blocks. |
| 50 lines changed or deleted | | 69 lines changed or added | |
|