| atomic.h | | atomic.h | |
| | | | |
| skipping to change at line 51 | | skipping to change at line 51 | |
| #include "tbb_machine.h" | | #include "tbb_machine.h" | |
| | | | |
| #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) | | #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) | |
| // Workaround for overzealous compiler warnings | | // Workaround for overzealous compiler warnings | |
| #pragma warning (push) | | #pragma warning (push) | |
| #pragma warning (disable: 4244 4267) | | #pragma warning (disable: 4244 4267) | |
| #endif | | #endif | |
| | | | |
| namespace tbb { | | namespace tbb { | |
| | | | |
|
| //! Specifies memory fencing. | | //! Specifies memory semantics. | |
| enum memory_semantics { | | enum memory_semantics { | |
|
| //! Sequentially consistent fence. | | //! Sequential consistency | |
| full_fence, | | full_fence, | |
|
| //! Acquire fence | | //! Acquire | |
| acquire, | | acquire, | |
|
| //! Release fence | | //! Release | |
| release, | | release, | |
| //! No ordering | | //! No ordering | |
| relaxed | | relaxed | |
| }; | | }; | |
| | | | |
| //! @cond INTERNAL | | //! @cond INTERNAL | |
| namespace internal { | | namespace internal { | |
| | | | |
| #if __TBB_ATTRIBUTE_ALIGNED_PRESENT | | #if __TBB_ATTRIBUTE_ALIGNED_PRESENT | |
| #define __TBB_DECL_ATOMIC_FIELD(t,f,a) t f __attribute__ ((aligned(a))
); | | #define __TBB_DECL_ATOMIC_FIELD(t,f,a) t f __attribute__ ((aligned(a))
); | |
| | | | |
| skipping to change at line 89 | | skipping to change at line 89 | |
| typedef int8_t word; | | typedef int8_t word; | |
| int8_t value; | | int8_t value; | |
| }; | | }; | |
| template<> | | template<> | |
| struct atomic_rep<2> { // Specialization | | struct atomic_rep<2> { // Specialization | |
| typedef int16_t word; | | typedef int16_t word; | |
| __TBB_DECL_ATOMIC_FIELD(int16_t,value,2) | | __TBB_DECL_ATOMIC_FIELD(int16_t,value,2) | |
| }; | | }; | |
| template<> | | template<> | |
| struct atomic_rep<4> { // Specialization | | struct atomic_rep<4> { // Specialization | |
|
| #if _MSC_VER && __TBB_WORDSIZE==4 | | #if _MSC_VER && !_WIN64 | |
| // Work-around that avoids spurious /Wp64 warnings | | // Work-around that avoids spurious /Wp64 warnings | |
| typedef intptr_t word; | | typedef intptr_t word; | |
| #else | | #else | |
| typedef int32_t word; | | typedef int32_t word; | |
| #endif | | #endif | |
| __TBB_DECL_ATOMIC_FIELD(int32_t,value,4) | | __TBB_DECL_ATOMIC_FIELD(int32_t,value,4) | |
| }; | | }; | |
| #if __TBB_64BIT_ATOMICS | | #if __TBB_64BIT_ATOMICS | |
| template<> | | template<> | |
| struct atomic_rep<8> { // Specialization | | struct atomic_rep<8> { // Specialization | |
| | | | |
| skipping to change at line 305 | | skipping to change at line 305 | |
| template<memory_semantics M> | | template<memory_semantics M> | |
| value_type fetch_and_decrement() { | | value_type fetch_and_decrement() { | |
| return fetch_and_add<M>(__TBB_MINUS_ONE(D)); | | return fetch_and_add<M>(__TBB_MINUS_ONE(D)); | |
| } | | } | |
| | | | |
| value_type fetch_and_decrement() { | | value_type fetch_and_decrement() { | |
| return fetch_and_add(__TBB_MINUS_ONE(D)); | | return fetch_and_add(__TBB_MINUS_ONE(D)); | |
| } | | } | |
| | | | |
| public: | | public: | |
|
| value_type operator+=( D addend ) { | | value_type operator+=( D value ) { | |
| return fetch_and_add(addend)+addend; | | return fetch_and_add(value)+value; | |
| } | | } | |
| | | | |
|
| value_type operator-=( D addend ) { | | value_type operator-=( D value ) { | |
| // Additive inverse of addend computed using binary minus, | | // Additive inverse of value computed using binary minus, | |
| // instead of unary minus, for sake of avoiding compiler warnings. | | // instead of unary minus, for sake of avoiding compiler warnings. | |
|
| return operator+=(D(0)-addend); | | return operator+=(D(0)-value); | |
| } | | } | |
| | | | |
| value_type operator++() { | | value_type operator++() { | |
| return fetch_and_add(1)+1; | | return fetch_and_add(1)+1; | |
| } | | } | |
| | | | |
| value_type operator--() { | | value_type operator--() { | |
| return fetch_and_add(__TBB_MINUS_ONE(D))-1; | | return fetch_and_add(__TBB_MINUS_ONE(D))-1; | |
| } | | } | |
| | | | |
| | | | |
| skipping to change at line 362 | | skipping to change at line 362 | |
| | | | |
| #if __TBB_64BIT_ATOMICS | | #if __TBB_64BIT_ATOMICS | |
| __TBB_DECL_ATOMIC(__TBB_LONG_LONG) | | __TBB_DECL_ATOMIC(__TBB_LONG_LONG) | |
| __TBB_DECL_ATOMIC(unsigned __TBB_LONG_LONG) | | __TBB_DECL_ATOMIC(unsigned __TBB_LONG_LONG) | |
| #else | | #else | |
| // test_atomic will verify that sizeof(long long)==8 | | // test_atomic will verify that sizeof(long long)==8 | |
| #endif | | #endif | |
| __TBB_DECL_ATOMIC(long) | | __TBB_DECL_ATOMIC(long) | |
| __TBB_DECL_ATOMIC(unsigned long) | | __TBB_DECL_ATOMIC(unsigned long) | |
| | | | |
|
| #if defined(_MSC_VER) && __TBB_WORDSIZE==4 | | #if _MSC_VER && !_WIN64 | |
| /* Special version of __TBB_DECL_ATOMIC that avoids gratuitous warnings fro
m cl /Wp64 option. | | /* Special version of __TBB_DECL_ATOMIC that avoids gratuitous warnings fro
m cl /Wp64 option. | |
| It is identical to __TBB_DECL_ATOMIC(unsigned) except that it replaces o
perator=(T) | | It is identical to __TBB_DECL_ATOMIC(unsigned) except that it replaces o
perator=(T) | |
| with an operator=(U) that explicitly converts the U to a T. Types T and
U should be | | with an operator=(U) that explicitly converts the U to a T. Types T and
U should be | |
| type synonyms on the platform. Type U should be the wider variant of T
from the | | type synonyms on the platform. Type U should be the wider variant of T
from the | |
| perspective of /Wp64. */ | | perspective of /Wp64. */ | |
| #define __TBB_DECL_ATOMIC_ALT(T,U) \ | | #define __TBB_DECL_ATOMIC_ALT(T,U) \ | |
| template<> struct atomic<T>: internal::atomic_impl_with_arithmetic<T,T,
char> { \ | | template<> struct atomic<T>: internal::atomic_impl_with_arithmetic<T,T,
char> { \ | |
| T operator=( U rhs ) {return store_with_release(T(rhs));} \ | | T operator=( U rhs ) {return store_with_release(T(rhs));} \ | |
| atomic<T>& operator=( const atomic<T>& rhs ) {store_with_release(rh
s); return *this;} \ | | atomic<T>& operator=( const atomic<T>& rhs ) {store_with_release(rh
s); return *this;} \ | |
| }; | | }; | |
| __TBB_DECL_ATOMIC_ALT(unsigned,size_t) | | __TBB_DECL_ATOMIC_ALT(unsigned,size_t) | |
| __TBB_DECL_ATOMIC_ALT(int,ptrdiff_t) | | __TBB_DECL_ATOMIC_ALT(int,ptrdiff_t) | |
| #else | | #else | |
| __TBB_DECL_ATOMIC(unsigned) | | __TBB_DECL_ATOMIC(unsigned) | |
| __TBB_DECL_ATOMIC(int) | | __TBB_DECL_ATOMIC(int) | |
|
| #endif /* defined(_MSC_VER) && __TBB_WORDSIZE==4 */ | | #endif /* _MSC_VER && !_WIN64 */ | |
| | | | |
| __TBB_DECL_ATOMIC(unsigned short) | | __TBB_DECL_ATOMIC(unsigned short) | |
| __TBB_DECL_ATOMIC(short) | | __TBB_DECL_ATOMIC(short) | |
| __TBB_DECL_ATOMIC(char) | | __TBB_DECL_ATOMIC(char) | |
| __TBB_DECL_ATOMIC(signed char) | | __TBB_DECL_ATOMIC(signed char) | |
| __TBB_DECL_ATOMIC(unsigned char) | | __TBB_DECL_ATOMIC(unsigned char) | |
| | | | |
|
| #if !defined(_MSC_VER)||defined(_NATIVE_WCHAR_T_DEFINED) | | #if !_MSC_VER || defined(_NATIVE_WCHAR_T_DEFINED) | |
| __TBB_DECL_ATOMIC(wchar_t) | | __TBB_DECL_ATOMIC(wchar_t) | |
| #endif /* _MSC_VER||!defined(_NATIVE_WCHAR_T_DEFINED) */ | | #endif /* _MSC_VER||!defined(_NATIVE_WCHAR_T_DEFINED) */ | |
| | | | |
| //! Specialization for atomic<T*> with arithmetic and operator->. | | //! Specialization for atomic<T*> with arithmetic and operator->. | |
| template<typename T> struct atomic<T*>: internal::atomic_impl_with_arithmet
ic<T*,ptrdiff_t,T> { | | template<typename T> struct atomic<T*>: internal::atomic_impl_with_arithmet
ic<T*,ptrdiff_t,T> { | |
| T* operator=( T* rhs ) { | | T* operator=( T* rhs ) { | |
| // "this" required here in strict ISO C++ because store_with_releas
e is a dependent name | | // "this" required here in strict ISO C++ because store_with_releas
e is a dependent name | |
| return this->store_with_release(rhs); | | return this->store_with_release(rhs); | |
| } | | } | |
| atomic<T*>& operator=( const atomic<T*>& rhs ) { | | atomic<T*>& operator=( const atomic<T*>& rhs ) { | |
| | | | |
| skipping to change at line 424 | | skipping to change at line 424 | |
| | | | |
| // Helpers to workaround ugly syntax of calling template member function of
a | | // Helpers to workaround ugly syntax of calling template member function of
a | |
| // template class with template argument dependent on template parameters. | | // template class with template argument dependent on template parameters. | |
| | | | |
| template <memory_semantics M, typename T> | | template <memory_semantics M, typename T> | |
| T load ( const atomic<T>& a ) { return a.template load<M>(); } | | T load ( const atomic<T>& a ) { return a.template load<M>(); } | |
| | | | |
| template <memory_semantics M, typename T> | | template <memory_semantics M, typename T> | |
| void store ( atomic<T>& a, T value ) { return a.template store<M>(value); } | | void store ( atomic<T>& a, T value ) { return a.template store<M>(value); } | |
| | | | |
|
| | | namespace interface6{ | |
| | | //! Make an atomic for use in an initialization (list), as an alternative t | |
| | | o zero-initializaton or normal assignment. | |
| | | template<typename T> | |
| | | atomic<T> make_atomic(T t) { | |
| | | atomic<T> a; | |
| | | store<relaxed>(a,t); | |
| | | return a; | |
| | | } | |
| | | } | |
| | | using interface6::make_atomic; | |
| | | | |
| | | namespace internal { | |
| | | | |
| | | // only to aid in the gradual conversion of ordinary variables to proper at | |
| | | omics | |
| | | template<typename T> | |
| | | inline atomic<T>& as_atomic( T& t ) { | |
| | | return (atomic<T>&)t; | |
| | | } | |
| | | } // namespace tbb::internal | |
| | | | |
| } // namespace tbb | | } // namespace tbb | |
| | | | |
|
| #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) | | #if _MSC_VER && !__INTEL_COMPILER | |
| #pragma warning (pop) | | #pragma warning (pop) | |
| #endif // warnings 4244, 4267 are back | | #endif // warnings 4244, 4267 are back | |
| | | | |
| #endif /* __TBB_atomic_H */ | | #endif /* __TBB_atomic_H */ | |
| | | | |
End of changes. 13 change blocks. |
| 14 lines changed or deleted | | 36 lines changed or added | |
|
| concurrent_hash_map.h | | concurrent_hash_map.h | |
| | | | |
| skipping to change at line 174 | | skipping to change at line 174 | |
| return (segment_index_t(1)<<k & ~segment_index_t(1)); | | return (segment_index_t(1)<<k & ~segment_index_t(1)); | |
| } | | } | |
| | | | |
| //! @return segment size except for @arg k == 0 | | //! @return segment size except for @arg k == 0 | |
| static size_type segment_size( segment_index_t k ) { | | static size_type segment_size( segment_index_t k ) { | |
| return size_type(1)<<k; // fake value for k==0 | | return size_type(1)<<k; // fake value for k==0 | |
| } | | } | |
| | | | |
| //! @return true if @arg ptr is valid pointer | | //! @return true if @arg ptr is valid pointer | |
| static bool is_valid( void *ptr ) { | | static bool is_valid( void *ptr ) { | |
|
| return reinterpret_cast<size_t>(ptr) > size_t(63); | | return reinterpret_cast<uintptr_t>(ptr) > uintptr_t(63); | |
| } | | } | |
| | | | |
| //! Initialize buckets | | //! Initialize buckets | |
| static void init_buckets( segment_ptr_t ptr, size_type sz, bool is_
initial ) { | | static void init_buckets( segment_ptr_t ptr, size_type sz, bool is_
initial ) { | |
| if( is_initial ) std::memset(ptr, 0, sz*sizeof(bucket) ); | | if( is_initial ) std::memset(ptr, 0, sz*sizeof(bucket) ); | |
| else for(size_type i = 0; i < sz; i++, ptr++) { | | else for(size_type i = 0; i < sz; i++, ptr++) { | |
|
| *reinterpret_cast<intptr_t*>(&ptr->mutex) = 0; | | *reinterpret_cast<intptr_t*>(&ptr->mutex) = 0; | |
| ptr->node_list = rehash_req; | | ptr->node_list = rehash_req; | |
| } | | } | |
| } | | } | |
| | | | |
| //! Add node @arg n to bucket @arg b | | //! Add node @arg n to bucket @arg b | |
| static void add_to_bucket( bucket *b, node_base *n ) { | | static void add_to_bucket( bucket *b, node_base *n ) { | |
| __TBB_ASSERT(b->node_list != rehash_req, NULL); | | __TBB_ASSERT(b->node_list != rehash_req, NULL); | |
| n->next = b->node_list; | | n->next = b->node_list; | |
| b->node_list = n; // its under lock and flag is set | | b->node_list = n; // its under lock and flag is set | |
| } | | } | |
| | | | |
| //! Exception safety helper | | //! Exception safety helper | |
|
| struct enable_segment_failsafe { | | struct enable_segment_failsafe : tbb::internal::no_copy { | |
| segment_ptr_t *my_segment_ptr; | | segment_ptr_t *my_segment_ptr; | |
| enable_segment_failsafe(segments_table_t &table, segment_index_
t k) : my_segment_ptr(&table[k]) {} | | enable_segment_failsafe(segments_table_t &table, segment_index_
t k) : my_segment_ptr(&table[k]) {} | |
| ~enable_segment_failsafe() { | | ~enable_segment_failsafe() { | |
| if( my_segment_ptr ) *my_segment_ptr = 0; // indicate no al
location in progress | | if( my_segment_ptr ) *my_segment_ptr = 0; // indicate no al
location in progress | |
| } | | } | |
| }; | | }; | |
| | | | |
| //! Enable segment | | //! Enable segment | |
| void enable_segment( segment_index_t k, bool is_initial = false ) { | | void enable_segment( segment_index_t k, bool is_initial = false ) { | |
| __TBB_ASSERT( k, "Zero segment must be embedded" ); | | __TBB_ASSERT( k, "Zero segment must be embedded" ); | |
| | | | |
| skipping to change at line 1065 | | skipping to change at line 1065 | |
| __TBB_ASSERT( item_accessor.my_node, NULL ); | | __TBB_ASSERT( item_accessor.my_node, NULL ); | |
| node_base *const n = item_accessor.my_node; | | node_base *const n = item_accessor.my_node; | |
| hashcode_t const h = item_accessor.my_hash; | | hashcode_t const h = item_accessor.my_hash; | |
| hashcode_t m = (hashcode_t) itt_load_word_with_acquire( my_mask ); | | hashcode_t m = (hashcode_t) itt_load_word_with_acquire( my_mask ); | |
| do { | | do { | |
| // get bucket | | // get bucket | |
| bucket_accessor b( this, h & m, /*writer=*/true ); | | bucket_accessor b( this, h & m, /*writer=*/true ); | |
| node_base **p = &b()->node_list; | | node_base **p = &b()->node_list; | |
| while( *p && *p != n ) | | while( *p && *p != n ) | |
| p = &(*p)->next; | | p = &(*p)->next; | |
|
| if( !*p ) { // someone else was the first | | if( !*p ) { // someone else was first | |
| if( check_mask_race( h, m ) ) | | if( check_mask_race( h, m ) ) | |
| continue; | | continue; | |
| item_accessor.release(); | | item_accessor.release(); | |
| return false; | | return false; | |
| } | | } | |
| __TBB_ASSERT( *p == n, NULL ); | | __TBB_ASSERT( *p == n, NULL ); | |
| *p = n->next; // remove from container | | *p = n->next; // remove from container | |
| my_size--; | | my_size--; | |
| break; | | break; | |
| } while(true); | | } while(true); | |
| | | | |
| skipping to change at line 1132 | | skipping to change at line 1132 | |
| std::swap(this->my_allocator, table.my_allocator); | | std::swap(this->my_allocator, table.my_allocator); | |
| std::swap(this->my_hash_compare, table.my_hash_compare); | | std::swap(this->my_hash_compare, table.my_hash_compare); | |
| internal_swap(table); | | internal_swap(table); | |
| } | | } | |
| | | | |
| template<typename Key, typename T, typename HashCompare, typename A> | | template<typename Key, typename T, typename HashCompare, typename A> | |
| void concurrent_hash_map<Key,T,HashCompare,A>::rehash(size_type sz) { | | void concurrent_hash_map<Key,T,HashCompare,A>::rehash(size_type sz) { | |
| reserve( sz ); // TODO: add reduction of number of buckets as well | | reserve( sz ); // TODO: add reduction of number of buckets as well | |
| hashcode_t mask = my_mask; | | hashcode_t mask = my_mask; | |
| hashcode_t b = (mask+1)>>1; // size or first index of the last segment | | hashcode_t b = (mask+1)>>1; // size or first index of the last segment | |
|
| __TBB_ASSERT((b&(b-1))==0, NULL); | | __TBB_ASSERT((b&(b-1))==0, NULL); // zero or power of 2 | |
| bucket *bp = get_bucket( b ); // only the last segment should be scanne
d for rehashing | | bucket *bp = get_bucket( b ); // only the last segment should be scanne
d for rehashing | |
| for(; b <= mask; b++, bp++ ) { | | for(; b <= mask; b++, bp++ ) { | |
| node_base *n = bp->node_list; | | node_base *n = bp->node_list; | |
| __TBB_ASSERT( is_valid(n) || n == internal::empty_rehashed || n ==
internal::rehash_req, "Broken internal structure" ); | | __TBB_ASSERT( is_valid(n) || n == internal::empty_rehashed || n ==
internal::rehash_req, "Broken internal structure" ); | |
| __TBB_ASSERT( *reinterpret_cast<intptr_t*>(&bp->mutex) == 0, "concu
rrent or unexpectedly terminated operation during rehash() execution" ); | | __TBB_ASSERT( *reinterpret_cast<intptr_t*>(&bp->mutex) == 0, "concu
rrent or unexpectedly terminated operation during rehash() execution" ); | |
| if( n == internal::rehash_req ) { // rehash bucket, conditional bec
ause rehashing of a previous bucket may affect this one | | if( n == internal::rehash_req ) { // rehash bucket, conditional bec
ause rehashing of a previous bucket may affect this one | |
| hashcode_t h = b; bucket *b_old = bp; | | hashcode_t h = b; bucket *b_old = bp; | |
| do { | | do { | |
| __TBB_ASSERT( h > 1, "The lowermost buckets can't be rehash
ed" ); | | __TBB_ASSERT( h > 1, "The lowermost buckets can't be rehash
ed" ); | |
| hashcode_t m = ( 1u<<__TBB_Log2( h ) ) - 1; // get parent m
ask from the topmost bit | | hashcode_t m = ( 1u<<__TBB_Log2( h ) ) - 1; // get parent m
ask from the topmost bit | |
| | | | |
End of changes. 5 change blocks. |
| 7 lines changed or deleted | | 7 lines changed or added | |
|
| concurrent_vector.h | | concurrent_vector.h | |
| | | | |
| skipping to change at line 408 | | skipping to change at line 408 | |
| Methods working with memory allocation and/or new elements construction
can throw an | | Methods working with memory allocation and/or new elements construction
can throw an | |
| exception if allocator fails to allocate memory or element's default co
nstructor throws one. | | exception if allocator fails to allocate memory or element's default co
nstructor throws one. | |
| Concurrent vector's element of type T must conform to the following req
uirements: | | Concurrent vector's element of type T must conform to the following req
uirements: | |
| - Throwing an exception is forbidden for destructor of T. | | - Throwing an exception is forbidden for destructor of T. | |
| - Default constructor of T must not throw an exception OR its non-virtu
al destructor must safely work when its object memory is zero-initialized. | | - Default constructor of T must not throw an exception OR its non-virtu
al destructor must safely work when its object memory is zero-initialized. | |
| . | | . | |
| Otherwise, the program's behavior is undefined. | | Otherwise, the program's behavior is undefined. | |
| @par | | @par | |
| If an exception happens inside growth or assignment operation, an insta
nce of the vector becomes invalid unless it is stated otherwise in the meth
od documentation. | | If an exception happens inside growth or assignment operation, an insta
nce of the vector becomes invalid unless it is stated otherwise in the meth
od documentation. | |
| Invalid state means: | | Invalid state means: | |
|
| - There are no guaranties that all items were initialized by a construc
tor. The rest of items is zero-filled, including item where exception happe
ns. | | - There are no guarantees that all items were initialized by a construc
tor. The rest of items is zero-filled, including item where exception happe
ns. | |
| - An invalid vector instance cannot be repaired; it is unable to grow a
nymore. | | - An invalid vector instance cannot be repaired; it is unable to grow a
nymore. | |
| - Size and capacity reported by the vector are incorrect, and calculate
d as if the failed operation were successful. | | - Size and capacity reported by the vector are incorrect, and calculate
d as if the failed operation were successful. | |
| - Attempt to access not allocated elements using operator[] or iterator
s results in access violation or segmentation fault exception, and in case
of using at() method a C++ exception is thrown. | | - Attempt to access not allocated elements using operator[] or iterator
s results in access violation or segmentation fault exception, and in case
of using at() method a C++ exception is thrown. | |
| . | | . | |
| If a concurrent grow operation successfully completes, all the elements
it has added to the vector will remain valid and accessible even if one of
subsequent grow operations fails. | | If a concurrent grow operation successfully completes, all the elements
it has added to the vector will remain valid and accessible even if one of
subsequent grow operations fails. | |
| | | | |
| @par Fragmentation | | @par Fragmentation | |
| Unlike an STL vector, a concurrent_vector does not move existing elemen
ts if it needs | | Unlike an STL vector, a concurrent_vector does not move existing elemen
ts if it needs | |
| to allocate more memory. The container is divided into a series of cont
iguous arrays of | | to allocate more memory. The container is divided into a series of cont
iguous arrays of | |
| elements. The first reservation, growth, or assignment operation determ
ines the size of | | elements. The first reservation, growth, or assignment operation determ
ines the size of | |
| | | | |
| skipping to change at line 431 | | skipping to change at line 431 | |
| merges several smaller arrays into one solid. | | merges several smaller arrays into one solid. | |
| | | | |
| @par Changes since TBB 2.1 | | @par Changes since TBB 2.1 | |
| - Fixed guarantees of concurrent_vector::size() and grow_to_at_least()
methods to assure elements are allocated. | | - Fixed guarantees of concurrent_vector::size() and grow_to_at_least()
methods to assure elements are allocated. | |
| - Methods end()/rbegin()/back() are partly thread-safe since they use s
ize() to get the end of vector | | - Methods end()/rbegin()/back() are partly thread-safe since they use s
ize() to get the end of vector | |
| - Added resize() methods (not thread-safe) | | - Added resize() methods (not thread-safe) | |
| - Added cbegin/cend/crbegin/crend methods | | - Added cbegin/cend/crbegin/crend methods | |
| - Changed return type of methods grow* and push_back to iterator | | - Changed return type of methods grow* and push_back to iterator | |
| | | | |
| @par Changes since TBB 2.0 | | @par Changes since TBB 2.0 | |
|
| - Implemented exception-safety guaranties | | - Implemented exception-safety guarantees | |
| - Added template argument for allocator | | - Added template argument for allocator | |
| - Added allocator argument in constructors | | - Added allocator argument in constructors | |
| - Faster index calculation | | - Faster index calculation | |
| - First growth call specifies a number of segments to be merged in the
first allocation. | | - First growth call specifies a number of segments to be merged in the
first allocation. | |
| - Fixed memory blow up for swarm of vector's instances of small size | | - Fixed memory blow up for swarm of vector's instances of small size | |
| - Added grow_by(size_type n, const_reference t) growth using copying co
nstructor to init new items. | | - Added grow_by(size_type n, const_reference t) growth using copying co
nstructor to init new items. | |
| - Added STL-like constructors. | | - Added STL-like constructors. | |
| - Added operators ==, < and derivatives | | - Added operators ==, < and derivatives | |
| - Added at() method, approved for using after an exception was thrown i
nside the vector | | - Added at() method, approved for using after an exception was thrown i
nside the vector | |
| - Added get_allocator() method. | | - Added get_allocator() method. | |
| | | | |
| skipping to change at line 615 | | skipping to change at line 615 | |
| size_type grow_by( size_type delta ) { | | size_type grow_by( size_type delta ) { | |
| return delta ? internal_grow_by( delta, sizeof(T), &initialize_arra
y, NULL ) : my_early_size; | | return delta ? internal_grow_by( delta, sizeof(T), &initialize_arra
y, NULL ) : my_early_size; | |
| } | | } | |
| #else | | #else | |
| /** Returns iterator pointing to the first new element. */ | | /** Returns iterator pointing to the first new element. */ | |
| iterator grow_by( size_type delta ) { | | iterator grow_by( size_type delta ) { | |
| return iterator(*this, delta ? internal_grow_by( delta, sizeof(T),
&initialize_array, NULL ) : my_early_size); | | return iterator(*this, delta ? internal_grow_by( delta, sizeof(T),
&initialize_array, NULL ) : my_early_size); | |
| } | | } | |
| #endif | | #endif | |
| | | | |
|
| //! Grow by "delta" elements using copying constuctor. | | //! Grow by "delta" elements using copying constructor. | |
| #if TBB_DEPRECATED | | #if TBB_DEPRECATED | |
| /** Returns old size. */ | | /** Returns old size. */ | |
| size_type grow_by( size_type delta, const_reference t ) { | | size_type grow_by( size_type delta, const_reference t ) { | |
| return delta ? internal_grow_by( delta, sizeof(T), &initialize_arra
y_by, static_cast<const void*>(&t) ) : my_early_size; | | return delta ? internal_grow_by( delta, sizeof(T), &initialize_arra
y_by, static_cast<const void*>(&t) ) : my_early_size; | |
| } | | } | |
| #else | | #else | |
| /** Returns iterator pointing to the first new element. */ | | /** Returns iterator pointing to the first new element. */ | |
| iterator grow_by( size_type delta, const_reference t ) { | | iterator grow_by( size_type delta, const_reference t ) { | |
| return iterator(*this, delta ? internal_grow_by( delta, sizeof(T),
&initialize_array_by, static_cast<const void*>(&t) ) : my_early_size); | | return iterator(*this, delta ? internal_grow_by( delta, sizeof(T),
&initialize_array_by, static_cast<const void*>(&t) ) : my_early_size); | |
| } | | } | |
| | | | |
| skipping to change at line 692 | | skipping to change at line 692 | |
| reference at( size_type index ) { | | reference at( size_type index ) { | |
| return internal_subscript_with_exceptions(index); | | return internal_subscript_with_exceptions(index); | |
| } | | } | |
| | | | |
| //! Get const reference to element at given index. Throws exceptions on
errors. | | //! Get const reference to element at given index. Throws exceptions on
errors. | |
| const_reference at( size_type index ) const { | | const_reference at( size_type index ) const { | |
| return internal_subscript_with_exceptions(index); | | return internal_subscript_with_exceptions(index); | |
| } | | } | |
| | | | |
| //! Get range for iterating with parallel algorithms | | //! Get range for iterating with parallel algorithms | |
|
| range_type range( size_t grainsize = 1) { | | range_type range( size_t grainsize = 1 ) { | |
| return range_type( begin(), end(), grainsize ); | | return range_type( begin(), end(), grainsize ); | |
| } | | } | |
| | | | |
| //! Get const range for iterating with parallel algorithms | | //! Get const range for iterating with parallel algorithms | |
| const_range_type range( size_t grainsize = 1 ) const { | | const_range_type range( size_t grainsize = 1 ) const { | |
| return const_range_type( begin(), end(), grainsize ); | | return const_range_type( begin(), end(), grainsize ); | |
| } | | } | |
|
| | | | |
| //---------------------------------------------------------------------
--- | | //---------------------------------------------------------------------
--- | |
| // Capacity | | // Capacity | |
| //---------------------------------------------------------------------
--- | | //---------------------------------------------------------------------
--- | |
| //! Return size of vector. It may include elements under construction | | //! Return size of vector. It may include elements under construction | |
| size_type size() const { | | size_type size() const { | |
| size_type sz = my_early_size, cp = internal_capacity(); | | size_type sz = my_early_size, cp = internal_capacity(); | |
| return cp < sz ? cp : sz; | | return cp < sz ? cp : sz; | |
| } | | } | |
| | | | |
| //! Return false if vector is not empty or has elements under construct
ion at least. | | //! Return false if vector is not empty or has elements under construct
ion at least. | |
| | | | |
| skipping to change at line 894 | | skipping to change at line 895 | |
| const size_type n; | | const size_type n; | |
| size_type i; | | size_type i; | |
| internal_loop_guide(size_type ntrials, void *ptr) | | internal_loop_guide(size_type ntrials, void *ptr) | |
| : array(static_cast<pointer>(ptr)), n(ntrials), i(0) {} | | : array(static_cast<pointer>(ptr)), n(ntrials), i(0) {} | |
| void init() { for(; i < n; ++i) new( &array[i] ) T(); } | | void init() { for(; i < n; ++i) new( &array[i] ) T(); } | |
| void init(const void *src) { for(; i < n; ++i) new( &array[i] ) T(*
static_cast<const T*>(src)); } | | void init(const void *src) { for(; i < n; ++i) new( &array[i] ) T(*
static_cast<const T*>(src)); } | |
| void copy(const void *src) { for(; i < n; ++i) new( &array[i] ) T(s
tatic_cast<const T*>(src)[i]); } | | void copy(const void *src) { for(; i < n; ++i) new( &array[i] ) T(s
tatic_cast<const T*>(src)[i]); } | |
| void assign(const void *src) { for(; i < n; ++i) array[i] = static_
cast<const T*>(src)[i]; } | | void assign(const void *src) { for(; i < n; ++i) array[i] = static_
cast<const T*>(src)[i]; } | |
| template<class I> void iterate(I &src) { for(; i < n; ++i, ++src) n
ew( &array[i] ) T( *src ); } | | template<class I> void iterate(I &src) { for(; i < n; ++i, ++src) n
ew( &array[i] ) T( *src ); } | |
| ~internal_loop_guide() { | | ~internal_loop_guide() { | |
|
| if(i < n) // if exception raised, do zerroing on the rest of it
ems | | if(i < n) // if exception raised, do zeroing on the rest of ite
ms | |
| std::memset(array+i, 0, (n-i)*sizeof(value_type)); | | std::memset(array+i, 0, (n-i)*sizeof(value_type)); | |
| } | | } | |
| }; | | }; | |
| }; | | }; | |
| | | | |
| #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) | | #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) | |
| #pragma warning (push) | | #pragma warning (push) | |
| #pragma warning (disable: 4701) // potentially uninitialized local variable
"old" | | #pragma warning (disable: 4701) // potentially uninitialized local variable
"old" | |
| #endif | | #endif | |
| template<typename T, class A> | | template<typename T, class A> | |
| | | | |
End of changes. 6 change blocks. |
| 5 lines changed or deleted | | 6 lines changed or added | |
|
| flow_graph.h | | flow_graph.h | |
| | | | |
| skipping to change at line 310 | | skipping to change at line 310 | |
| private: | | private: | |
| Receiver &my_receiver; | | Receiver &my_receiver; | |
| Body my_body; | | Body my_body; | |
| }; | | }; | |
| | | | |
| public: | | public: | |
| //! Constructs a graph with isolated task_group_context | | //! Constructs a graph with isolated task_group_context | |
| explicit graph() : my_nodes(NULL), my_nodes_last(NULL) | | explicit graph() : my_nodes(NULL), my_nodes_last(NULL) | |
| { | | { | |
| own_context = true; | | own_context = true; | |
|
| | | cancelled = false; | |
| | | caught_exception = false; | |
| my_context = new task_group_context(); | | my_context = new task_group_context(); | |
| my_root_task = ( new ( task::allocate_root(*my_context) ) empty_tas
k ); | | my_root_task = ( new ( task::allocate_root(*my_context) ) empty_tas
k ); | |
| my_root_task->set_ref_count(1); | | my_root_task->set_ref_count(1); | |
| } | | } | |
| | | | |
| //! Constructs a graph with use_this_context as context | | //! Constructs a graph with use_this_context as context | |
| explicit graph(task_group_context& use_this_context) : | | explicit graph(task_group_context& use_this_context) : | |
| my_context(&use_this_context), my_nodes(NULL), my_nodes_last(NULL) | | my_context(&use_this_context), my_nodes(NULL), my_nodes_last(NULL) | |
| { | | { | |
| own_context = false; | | own_context = false; | |
| | | | |
| skipping to change at line 370 | | skipping to change at line 372 | |
| that need to block a wait_for_all() on the graph. For example a one
-off source. */ | | that need to block a wait_for_all() on the graph. For example a one
-off source. */ | |
| template< typename Body > | | template< typename Body > | |
| void run( Body body ) { | | void run( Body body ) { | |
| task::enqueue( * new ( task::allocate_additional_child_of( *my_root_
task ) ) | | task::enqueue( * new ( task::allocate_additional_child_of( *my_root_
task ) ) | |
| run_task< Body >( body ) ); | | run_task< Body >( body ) ); | |
| } | | } | |
| | | | |
| //! Wait until graph is idle and decrement_wait_count calls equals incr
ement_wait_count calls. | | //! Wait until graph is idle and decrement_wait_count calls equals incr
ement_wait_count calls. | |
| /** The waiting thread will go off and steal work while it is block in
the wait_for_all. */ | | /** The waiting thread will go off and steal work while it is block in
the wait_for_all. */ | |
| void wait_for_all() { | | void wait_for_all() { | |
|
| if (my_root_task) | | cancelled = false; | |
| my_root_task->wait_for_all(); | | caught_exception = false; | |
| my_root_task->set_ref_count(1); | | if (my_root_task) { | |
| | | #if TBB_USE_EXCEPTIONS | |
| | | try { | |
| | | #endif | |
| | | my_root_task->wait_for_all(); | |
| | | cancelled = my_context->is_group_execution_cancelled(); | |
| | | #if TBB_USE_EXCEPTIONS | |
| | | } | |
| | | catch(...) { | |
| | | my_root_task->set_ref_count(1); | |
| | | my_context->reset(); | |
| | | caught_exception = true; | |
| | | cancelled = true; | |
| | | throw; | |
| | | } | |
| | | #endif | |
| | | my_root_task->set_ref_count(1); | |
| | | } | |
| } | | } | |
| | | | |
| //! Returns the root task of the graph | | //! Returns the root task of the graph | |
| task * root_task() { | | task * root_task() { | |
| return my_root_task; | | return my_root_task; | |
| } | | } | |
| | | | |
| // ITERATORS | | // ITERATORS | |
| template<typename C, typename N> | | template<typename C, typename N> | |
| friend class graph_iterator; | | friend class graph_iterator; | |
| | | | |
| skipping to change at line 402 | | skipping to change at line 421 | |
| iterator end() { return iterator(this, false); } | | iterator end() { return iterator(this, false); } | |
| //! start const iterator | | //! start const iterator | |
| const_iterator begin() const { return const_iterator(this, true); } | | const_iterator begin() const { return const_iterator(this, true); } | |
| //! end const iterator | | //! end const iterator | |
| const_iterator end() const { return const_iterator(this, false); } | | const_iterator end() const { return const_iterator(this, false); } | |
| //! start const iterator | | //! start const iterator | |
| const_iterator cbegin() const { return const_iterator(this, true); } | | const_iterator cbegin() const { return const_iterator(this, true); } | |
| //! end const iterator | | //! end const iterator | |
| const_iterator cend() const { return const_iterator(this, false); } | | const_iterator cend() const { return const_iterator(this, false); } | |
| | | | |
|
| | | //! return status of graph execution | |
| | | bool is_cancelled() { return cancelled; } | |
| | | bool exception_thrown() { return caught_exception; } | |
| | | | |
| private: | | private: | |
| task *my_root_task; | | task *my_root_task; | |
| task_group_context *my_context; | | task_group_context *my_context; | |
| bool own_context; | | bool own_context; | |
|
| | | bool cancelled; | |
| | | bool caught_exception; | |
| | | | |
| graph_node *my_nodes, *my_nodes_last; | | graph_node *my_nodes, *my_nodes_last; | |
| | | | |
| spin_mutex nodelist_mutex; | | spin_mutex nodelist_mutex; | |
| void register_node(graph_node *n); | | void register_node(graph_node *n); | |
| void remove_node(graph_node *n); | | void remove_node(graph_node *n); | |
|
| | | | |
| }; | | }; | |
| | | | |
| template <typename C, typename N> | | template <typename C, typename N> | |
| graph_iterator<C,N>::graph_iterator(C *g, bool begin) : my_graph(g), curren
t_node(NULL) | | graph_iterator<C,N>::graph_iterator(C *g, bool begin) : my_graph(g), curren
t_node(NULL) | |
| { | | { | |
| if (begin) current_node = my_graph->my_nodes; | | if (begin) current_node = my_graph->my_nodes; | |
| //else it is an end iterator by default | | //else it is an end iterator by default | |
| } | | } | |
| | | | |
| template <typename C, typename N> | | template <typename C, typename N> | |
| | | | |
| skipping to change at line 795 | | skipping to change at line 821 | |
| continue_node( graph &g, int number_of_predecessors, Body body ) : | | continue_node( graph &g, int number_of_predecessors, Body body ) : | |
| graph_node(g), internal::continue_input<output_type>( g, number_of_
predecessors, body ) | | graph_node(g), internal::continue_input<output_type>( g, number_of_
predecessors, body ) | |
| {} | | {} | |
| | | | |
| //! Copy constructor | | //! Copy constructor | |
| continue_node( const continue_node& src ) : | | continue_node( const continue_node& src ) : | |
| graph_node(src.my_graph), internal::continue_input<output_type>(src
), | | graph_node(src.my_graph), internal::continue_input<output_type>(src
), | |
| internal::function_output<Output>() | | internal::function_output<Output>() | |
| {} | | {} | |
| | | | |
|
| | | bool try_put(const input_type &i) { return internal::continue_input<Out | |
| | | put>::try_put(i); } | |
| | | | |
| protected: | | protected: | |
| /* override */ internal::broadcast_cache<output_type> &successors () {
return fOutput_type::my_successors; } | | /* override */ internal::broadcast_cache<output_type> &successors () {
return fOutput_type::my_successors; } | |
| }; | | }; | |
| | | | |
| template< typename T > | | template< typename T > | |
| class overwrite_node : public graph_node, public receiver<T>, public sender
<T> { | | class overwrite_node : public graph_node, public receiver<T>, public sender
<T> { | |
| using graph_node::my_graph; | | using graph_node::my_graph; | |
| public: | | public: | |
| typedef T input_type; | | typedef T input_type; | |
| typedef T output_type; | | typedef T output_type; | |
| | | | |
End of changes. 6 change blocks. |
| 3 lines changed or deleted | | 32 lines changed or added | |
|
| gcc_generic.h | | gcc_generic.h | |
| | | | |
| skipping to change at line 40 | | skipping to change at line 40 | |
| #error Do not #include this internal file directly; use public TBB headers
instead. | | #error Do not #include this internal file directly; use public TBB headers
instead. | |
| #endif | | #endif | |
| | | | |
| #define __TBB_machine_gcc_generic_H | | #define __TBB_machine_gcc_generic_H | |
| | | | |
| #include <stdint.h> | | #include <stdint.h> | |
| #include <unistd.h> | | #include <unistd.h> | |
| | | | |
| #define __TBB_WORDSIZE __SIZEOF_POINTER__ | | #define __TBB_WORDSIZE __SIZEOF_POINTER__ | |
| | | | |
|
| // For some reason straight mapping does not work on mingw | | #ifdef __BYTE_ORDER__ | |
| #if __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ | | #if __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ | |
| #define __TBB_BIG_ENDIAN 0 | | #define __TBB_BIG_ENDIAN 1 | |
| #elif __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__ | | #elif __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__ | |
| #define __TBB_BIG_ENDIAN 1 | | #define __TBB_BIG_ENDIAN 0 | |
| #else | | #elif __BYTE_ORDER__==__ORDER_PDP_ENDIAN__ | |
| #error Unsupported endianness | | #define __TBB_BIG_ENDIAN -1 // not currently supported | |
| | | #endif | |
| #endif | | #endif | |
| | | | |
| /** As this generic implementation has absolutely no information about unde
rlying | | /** As this generic implementation has absolutely no information about unde
rlying | |
| hardware, its performance most likely will be sub-optimal because of fu
ll memory | | hardware, its performance most likely will be sub-optimal because of fu
ll memory | |
| fence usages where a more lightweight synchronization means (or none at
all) | | fence usages where a more lightweight synchronization means (or none at
all) | |
| could suffice. Thus if you use this header to enable TBB on a new platf
orm, | | could suffice. Thus if you use this header to enable TBB on a new platf
orm, | |
| consider forking it and relaxing below helpers as appropriate. **/ | | consider forking it and relaxing below helpers as appropriate. **/ | |
| #define __TBB_acquire_consistency_helper() __sync_synchronize() | | #define __TBB_acquire_consistency_helper() __sync_synchronize() | |
| #define __TBB_release_consistency_helper() __sync_synchronize() | | #define __TBB_release_consistency_helper() __sync_synchronize() | |
| #define __TBB_full_memory_fence() __sync_synchronize() | | #define __TBB_full_memory_fence() __sync_synchronize() | |
| | | | |
| skipping to change at line 76 | | skipping to change at line 77 | |
| }
\ | | }
\ | |
| | | | |
| __TBB_MACHINE_DEFINE_ATOMICS(1,int8_t) | | __TBB_MACHINE_DEFINE_ATOMICS(1,int8_t) | |
| __TBB_MACHINE_DEFINE_ATOMICS(2,int16_t) | | __TBB_MACHINE_DEFINE_ATOMICS(2,int16_t) | |
| __TBB_MACHINE_DEFINE_ATOMICS(4,int32_t) | | __TBB_MACHINE_DEFINE_ATOMICS(4,int32_t) | |
| __TBB_MACHINE_DEFINE_ATOMICS(8,int64_t) | | __TBB_MACHINE_DEFINE_ATOMICS(8,int64_t) | |
| | | | |
| #undef __TBB_MACHINE_DEFINE_ATOMICS | | #undef __TBB_MACHINE_DEFINE_ATOMICS | |
| | | | |
| namespace tbb{ namespace internal { namespace gcc_builtins { | | namespace tbb{ namespace internal { namespace gcc_builtins { | |
|
| int clz(unsigned int x){ return __builtin_clz(x);}; | | inline int clz(unsigned int x){ return __builtin_clz(x);}; | |
| int clz(unsigned long int x){ return __builtin_clzl(x);}; | | inline int clz(unsigned long int x){ return __builtin_clzl(x);}; | |
| | | inline int clz(unsigned long long int x){ return __builtin_clzll(x);}; | |
| }}} | | }}} | |
| //gcc __builtin_clz builtin count _number_ of leading zeroes | | //gcc __builtin_clz builtin count _number_ of leading zeroes | |
| static inline intptr_t __TBB_machine_lg( uintptr_t x ) { | | static inline intptr_t __TBB_machine_lg( uintptr_t x ) { | |
| return sizeof(x)*8 - tbb::internal::gcc_builtins::clz(x) -1 ; | | return sizeof(x)*8 - tbb::internal::gcc_builtins::clz(x) -1 ; | |
| } | | } | |
| | | | |
| static inline void __TBB_machine_or( volatile void *ptr, uintptr_t addend )
{ | | static inline void __TBB_machine_or( volatile void *ptr, uintptr_t addend )
{ | |
| __sync_fetch_and_or(reinterpret_cast<volatile uintptr_t *>(ptr),addend)
; | | __sync_fetch_and_or(reinterpret_cast<volatile uintptr_t *>(ptr),addend)
; | |
| } | | } | |
| | | | |
| | | | |
| skipping to change at line 114 | | skipping to change at line 116 | |
| // Machine specific atomic operations | | // Machine specific atomic operations | |
| #define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V) | | #define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V) | |
| #define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V) | | #define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V) | |
| | | | |
| #define __TBB_TryLockByte __TBB_machine_try_lock_byte | | #define __TBB_TryLockByte __TBB_machine_try_lock_byte | |
| #define __TBB_UnlockByte __TBB_machine_unlock_byte | | #define __TBB_UnlockByte __TBB_machine_unlock_byte | |
| | | | |
| // Definition of other functions | | // Definition of other functions | |
| #define __TBB_Log2(V) __TBB_machine_lg(V) | | #define __TBB_Log2(V) __TBB_machine_lg(V) | |
| | | | |
|
| #define __TBB_USE_GENERIC_FETCH_STORE 1 | | #define __TBB_USE_GENERIC_FETCH_STORE 1 | |
| #define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1 | | #define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1 | |
| #define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1 | | #define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1 | |
| | | #define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1 | |
| | | | |
| #if __TBB_WORDSIZE==4 | | #if __TBB_WORDSIZE==4 | |
|
| #define __TBB_USE_GENERIC_DWORD_LOAD_STORE 1 | | #define __TBB_USE_GENERIC_DWORD_LOAD_STORE 1 | |
| #endif | | #endif | |
| | | | |
End of changes. 4 change blocks. |
| 13 lines changed or deleted | | 16 lines changed or added | |
|
| linux_ia32.h | | linux_ia32.h | |
| | | | |
| skipping to change at line 95 | | skipping to change at line 95 | |
| __TBB_MACHINE_DEFINE_ATOMICS(1,int8_t,"","=q") | | __TBB_MACHINE_DEFINE_ATOMICS(1,int8_t,"","=q") | |
| __TBB_MACHINE_DEFINE_ATOMICS(2,int16_t,"","=r") | | __TBB_MACHINE_DEFINE_ATOMICS(2,int16_t,"","=r") | |
| __TBB_MACHINE_DEFINE_ATOMICS(4,int32_t,"l","=r") | | __TBB_MACHINE_DEFINE_ATOMICS(4,int32_t,"l","=r") | |
| | | | |
| #if __INTEL_COMPILER | | #if __INTEL_COMPILER | |
| #pragma warning( push ) | | #pragma warning( push ) | |
| // reference to EBX in a function requiring stack alignment | | // reference to EBX in a function requiring stack alignment | |
| #pragma warning( disable: 998 ) | | #pragma warning( disable: 998 ) | |
| #endif | | #endif | |
| | | | |
|
| static inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t va | | static inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t va | |
| lue, int64_t comparand ) | | lue, int64_t comparand ) { | |
| { | | #if __TBB_GCC_BUILTIN_ATOMICS_PRESENT | |
| | | return __sync_val_compare_and_swap( reinterpret_cast<volatile int64_t*> | |
| | | (ptr), comparand, value ); | |
| | | #else /* !__TBB_GCC_BUILTIN_ATOMICS_PRESENT */ | |
| int64_t result; | | int64_t result; | |
| union { | | union { | |
| int64_t i64; | | int64_t i64; | |
| int32_t i32[2]; | | int32_t i32[2]; | |
| }; | | }; | |
| i64 = value; | | i64 = value; | |
| #if __PIC__ | | #if __PIC__ | |
| /* compiling position-independent code */ | | /* compiling position-independent code */ | |
| // EBX register preserved for compliance with position-independent code
rules on IA32 | | // EBX register preserved for compliance with position-independent code
rules on IA32 | |
| int32_t tmp; | | int32_t tmp; | |
| | | | |
| skipping to change at line 142 | | skipping to change at line 144 | |
| __asm__ __volatile__ ( | | __asm__ __volatile__ ( | |
| "lock\n\t cmpxchg8b %1\n\t" | | "lock\n\t cmpxchg8b %1\n\t" | |
| : "=A"(result), "=m"(*(__TBB_VOLATILE int64_t *)ptr) | | : "=A"(result), "=m"(*(__TBB_VOLATILE int64_t *)ptr) | |
| : "m"(*(__TBB_VOLATILE int64_t *)ptr) | | : "m"(*(__TBB_VOLATILE int64_t *)ptr) | |
| , "0"(comparand) | | , "0"(comparand) | |
| , "b"(i32[0]), "c"(i32[1]) | | , "b"(i32[0]), "c"(i32[1]) | |
| : "memory" | | : "memory" | |
| ); | | ); | |
| #endif /* __PIC__ */ | | #endif /* __PIC__ */ | |
| return result; | | return result; | |
|
| | | #endif /* !__TBB_GCC_BUILTIN_ATOMICS_PRESENT */ | |
| } | | } | |
| | | | |
| #if __INTEL_COMPILER | | #if __INTEL_COMPILER | |
| #pragma warning( pop ) | | #pragma warning( pop ) | |
| #endif // warning 998 is back | | #endif // warning 998 is back | |
| | | | |
| static inline int32_t __TBB_machine_lg( uint32_t x ) { | | static inline int32_t __TBB_machine_lg( uint32_t x ) { | |
| int32_t j; | | int32_t j; | |
| __asm__ ("bsr %1,%0" : "=r"(j) : "r"(x)); | | __asm__ ("bsr %1,%0" : "=r"(j) : "r"(x)); | |
| return j; | | return j; | |
| | | | |
| skipping to change at line 169 | | skipping to change at line 172 | |
| __asm__ __volatile__("lock\nandl %1,%0" : "=m"(*(__TBB_VOLATILE uint32_
t *)ptr) : "r"(addend), "m"(*(__TBB_VOLATILE uint32_t *)ptr) : "memory"); | | __asm__ __volatile__("lock\nandl %1,%0" : "=m"(*(__TBB_VOLATILE uint32_
t *)ptr) : "r"(addend), "m"(*(__TBB_VOLATILE uint32_t *)ptr) : "memory"); | |
| } | | } | |
| | | | |
| static inline void __TBB_machine_pause( int32_t delay ) { | | static inline void __TBB_machine_pause( int32_t delay ) { | |
| for (int32_t i = 0; i < delay; i++) { | | for (int32_t i = 0; i < delay; i++) { | |
| __asm__ __volatile__("pause;"); | | __asm__ __volatile__("pause;"); | |
| } | | } | |
| return; | | return; | |
| } | | } | |
| | | | |
|
| | | //TODO: Check if it possible and profitable for IA-32 on (Linux and Windows | |
| | | ) | |
| | | //to use of 64-bit load/store via floating point registers together with fu | |
| | | ll fence | |
| | | //for sequentially consistent load/store, instead of CAS. | |
| | | | |
| | | #if __clang__ | |
| | | #define __TBB_fildq "fildll" | |
| | | #define __TBB_fistpq "fistpll" | |
| | | #else | |
| | | #define __TBB_fildq "fildq" | |
| | | #define __TBB_fistpq "fistpq" | |
| | | #endif | |
| | | | |
| static inline int64_t __TBB_machine_load8 (const volatile void *ptr) { | | static inline int64_t __TBB_machine_load8 (const volatile void *ptr) { | |
| int64_t result; | | int64_t result; | |
| if( ((uint32_t)ptr&7u)==0 ) { | | if( ((uint32_t)ptr&7u)==0 ) { | |
| // Aligned load | | // Aligned load | |
|
| __asm__ __volatile__ ( "fildq %1\n\t" | | __asm__ __volatile__ ( __TBB_fildq " %1\n\t" | |
| "fistpq %0" : "=m"(result) : "m"(*(const __ | | __TBB_fistpq " %0" : "=m"(result) : "m"(*(c | |
| TBB_VOLATILE uint64_t*)ptr) : "memory" ); | | onst __TBB_VOLATILE uint64_t*)ptr) : "memory" ); | |
| } else { | | } else { | |
| // Unaligned load | | // Unaligned load | |
| result = __TBB_machine_cmpswp8(const_cast<void*>(ptr),0,0); | | result = __TBB_machine_cmpswp8(const_cast<void*>(ptr),0,0); | |
| } | | } | |
| return result; | | return result; | |
| } | | } | |
| | | | |
| //! Handles misaligned 8-byte store | | //! Handles misaligned 8-byte store | |
| /** Defined in tbb_misc.cpp */ | | /** Defined in tbb_misc.cpp */ | |
| extern "C" void __TBB_machine_store8_slow( volatile void *ptr, int64_t valu
e ); | | extern "C" void __TBB_machine_store8_slow( volatile void *ptr, int64_t valu
e ); | |
| extern "C" void __TBB_machine_store8_slow_perf_warning( volatile void *ptr
); | | extern "C" void __TBB_machine_store8_slow_perf_warning( volatile void *ptr
); | |
| | | | |
| static inline void __TBB_machine_store8(volatile void *ptr, int64_t value)
{ | | static inline void __TBB_machine_store8(volatile void *ptr, int64_t value)
{ | |
| if( ((uint32_t)ptr&7u)==0 ) { | | if( ((uint32_t)ptr&7u)==0 ) { | |
| // Aligned store | | // Aligned store | |
|
| __asm__ __volatile__ ( "fildq %1\n\t" | | __asm__ __volatile__ ( __TBB_fildq " %1\n\t" | |
| "fistpq %0" : "=m"(*(__TBB_VOLATILE int64_t | | __TBB_fistpq " %0" : "=m"(*(__TBB_VOLATILE | |
| *)ptr) : "m"(value) : "memory" ); | | int64_t*)ptr) : "m"(value) : "memory" ); | |
| } else { | | } else { | |
| // Unaligned store | | // Unaligned store | |
| #if TBB_USE_PERFORMANCE_WARNINGS | | #if TBB_USE_PERFORMANCE_WARNINGS | |
| __TBB_machine_store8_slow_perf_warning(ptr); | | __TBB_machine_store8_slow_perf_warning(ptr); | |
| #endif /* TBB_USE_PERFORMANCE_WARNINGS */ | | #endif /* TBB_USE_PERFORMANCE_WARNINGS */ | |
| __TBB_machine_store8_slow(ptr,value); | | __TBB_machine_store8_slow(ptr,value); | |
| } | | } | |
| } | | } | |
| | | | |
| // Machine specific atomic operations | | // Machine specific atomic operations | |
| #define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V) | | #define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V) | |
| #define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V) | | #define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V) | |
| | | | |
| // Definition of other functions | | // Definition of other functions | |
| #define __TBB_Pause(V) __TBB_machine_pause(V) | | #define __TBB_Pause(V) __TBB_machine_pause(V) | |
| #define __TBB_Log2(V) __TBB_machine_lg(V) | | #define __TBB_Log2(V) __TBB_machine_lg(V) | |
| | | | |
|
| #define __TBB_USE_GENERIC_DWORD_FETCH_ADD 1 | | #define __TBB_USE_GENERIC_DWORD_FETCH_ADD 1 | |
| #define __TBB_USE_GENERIC_DWORD_FETCH_STORE 1 | | #define __TBB_USE_GENERIC_DWORD_FETCH_STORE 1 | |
| #define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1 | | #define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1 | |
| #define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1 | | #define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1 | |
| #define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1 | | #define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1 | |
| | | #define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1 | |
| | | | |
| // API to retrieve/update FPU control setting | | // API to retrieve/update FPU control setting | |
| #define __TBB_CPU_CTL_ENV_PRESENT 1 | | #define __TBB_CPU_CTL_ENV_PRESENT 1 | |
| | | | |
| struct __TBB_cpu_ctl_env_t { | | struct __TBB_cpu_ctl_env_t { | |
| int mxcsr; | | int mxcsr; | |
| short x87cw; | | short x87cw; | |
| }; | | }; | |
| | | | |
| inline void __TBB_get_cpu_ctl_env ( __TBB_cpu_ctl_env_t* ctl ) { | | inline void __TBB_get_cpu_ctl_env ( __TBB_cpu_ctl_env_t* ctl ) { | |
| | | | |
End of changes. 6 change blocks. |
| 14 lines changed or deleted | | 33 lines changed or added | |
|
| mac_ppc.h | | mac_ppc.h | |
| | | | |
| skipping to change at line 50 | | skipping to change at line 50 | |
| // Note that XL V9.0 (sometimes?) has trouble dealing with empty input and/
or clobber lists, so they should be avoided. | | // Note that XL V9.0 (sometimes?) has trouble dealing with empty input and/
or clobber lists, so they should be avoided. | |
| | | | |
| #if __powerpc64__ || __ppc64__ | | #if __powerpc64__ || __ppc64__ | |
| // IBM XL documents __powerpc64__ (and __PPC64__). | | // IBM XL documents __powerpc64__ (and __PPC64__). | |
| // Apple documents __ppc64__ (with __ppc__ only on 32-bit). | | // Apple documents __ppc64__ (with __ppc__ only on 32-bit). | |
| #define __TBB_WORDSIZE 8 | | #define __TBB_WORDSIZE 8 | |
| #else | | #else | |
| #define __TBB_WORDSIZE 4 | | #define __TBB_WORDSIZE 4 | |
| #endif | | #endif | |
| | | | |
|
| | | #ifndef __BYTE_ORDER__ | |
| | | // Hopefully endianness can be validly determined at runtime. | |
| | | // This may silently fail in some embedded systems with page-specific e | |
| | | ndianness. | |
| | | #elif __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ | |
| | | #define __TBB_BIG_ENDIAN 1 | |
| | | #elif __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__ | |
| | | #define __TBB_BIG_ENDIAN 0 | |
| | | #else | |
| | | #define __TBB_BIG_ENDIAN -1 // not currently supported | |
| | | #endif | |
| | | | |
| // On Power Architecture, (lock-free) 64-bit atomics require 64-bit hardwar
e: | | // On Power Architecture, (lock-free) 64-bit atomics require 64-bit hardwar
e: | |
| #if __TBB_WORDSIZE==8 | | #if __TBB_WORDSIZE==8 | |
| // Do not change the following definition, because TBB itself will use
64-bit atomics in 64-bit builds. | | // Do not change the following definition, because TBB itself will use
64-bit atomics in 64-bit builds. | |
| #define __TBB_64BIT_ATOMICS 1 | | #define __TBB_64BIT_ATOMICS 1 | |
| #elif __bgp__ | | #elif __bgp__ | |
|
| // Do not change the following definition on known 32-bit hardware. | | // Do not change the following definition, because this is known 32-bit
hardware. | |
| #define __TBB_64BIT_ATOMICS 0 | | #define __TBB_64BIT_ATOMICS 0 | |
| #else | | #else | |
| // To enable 64-bit atomics in 32-bit builds, set the value below to 1
instead of 0. | | // To enable 64-bit atomics in 32-bit builds, set the value below to 1
instead of 0. | |
| // You must make certain that the program will only use them on actual
64-bit hardware | | // You must make certain that the program will only use them on actual
64-bit hardware | |
| // (which typically means that the entire program is only executed on s
uch hardware), | | // (which typically means that the entire program is only executed on s
uch hardware), | |
| // because their implementation involves machine instructions that are
illegal elsewhere. | | // because their implementation involves machine instructions that are
illegal elsewhere. | |
| // The setting can be chosen independently per compilation unit, | | // The setting can be chosen independently per compilation unit, | |
| // which also means that TBB itself does not need to be rebuilt. | | // which also means that TBB itself does not need to be rebuilt. | |
| // Alternatively (but only for the current architecture and TBB version
), | | // Alternatively (but only for the current architecture and TBB version
), | |
| // override the default as a predefined macro when invoking the compile
r. | | // override the default as a predefined macro when invoking the compile
r. | |
| | | | |
| skipping to change at line 151 | | skipping to change at line 162 | |
| , [cmp] "=&r"(comparand_register) | | , [cmp] "=&r"(comparand_register) | |
| , "+m"(* (int64_t*) ptr) /* redundant with
"memory" */ | | , "+m"(* (int64_t*) ptr) /* redundant with
"memory" */ | |
| : [ptr] "r"(ptr) | | : [ptr] "r"(ptr) | |
| , [valm]"m"(value) | | , [valm]"m"(value) | |
| , [cmpm]"m"(comparand) | | , [cmpm]"m"(comparand) | |
| : "memory" /* compiler full f
ence */ | | : "memory" /* compiler full f
ence */ | |
| , "cr0" /* clobbered by cm
pd and/or stdcx. */ | | , "cr0" /* clobbered by cm
pd and/or stdcx. */ | |
| ); | | ); | |
| return result; | | return result; | |
| } | | } | |
|
| | | | |
| #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */ | | #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */ | |
| | | | |
| #define __TBB_MACHINE_DEFINE_LOAD_STORE(S,ldx,stx,cmpx)
\ | | #define __TBB_MACHINE_DEFINE_LOAD_STORE(S,ldx,stx,cmpx)
\ | |
| template <typename T>
\ | | template <typename T>
\ | |
| struct machine_load_store<T,S> {
\ | | struct machine_load_store<T,S> {
\ | |
| static inline T load_with_acquire(const volatile T& location) {
\ | | static inline T load_with_acquire(const volatile T& location) {
\ | |
| T result;
\ | | T result;
\ | |
| __asm__ __volatile__(ldx " %[res],0(%[ptr])\n"
\ | | __asm__ __volatile__(ldx " %[res],0(%[ptr])\n"
\ | |
| "0:\n\t"
\ | | "0:\n\t"
\ | |
| cmpx " %[res],%[res]\n\t"
\ | | cmpx " %[res],%[res]\n\t"
\ | |
| | | | |
| skipping to change at line 278 | | skipping to change at line 290 | |
| } | | } | |
| }; | | }; | |
| #define __TBB_machine_load_store_relaxed_8 | | #define __TBB_machine_load_store_relaxed_8 | |
| | | | |
| #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */ | | #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */ | |
| | | | |
| }} // namespaces internal, tbb | | }} // namespaces internal, tbb | |
| | | | |
| #undef __TBB_MACHINE_DEFINE_LOAD_STORE | | #undef __TBB_MACHINE_DEFINE_LOAD_STORE | |
| | | | |
|
| #define __TBB_USE_GENERIC_PART_WORD_CAS 1 | | #define __TBB_USE_GENERIC_PART_WORD_CAS 1 | |
| #define __TBB_USE_GENERIC_FETCH_ADD 1 | | #define __TBB_USE_GENERIC_FETCH_ADD 1 | |
| #define __TBB_USE_GENERIC_FETCH_STORE 1 | | #define __TBB_USE_GENERIC_FETCH_STORE 1 | |
| | | #define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1 | |
| | | | |
| #define __TBB_control_consistency_helper() __asm__ __volatile__("isync": :
:"memory") | | #define __TBB_control_consistency_helper() __asm__ __volatile__("isync": :
:"memory") | |
| #define __TBB_full_memory_fence() __asm__ __volatile__( "sync": :
:"memory") | | #define __TBB_full_memory_fence() __asm__ __volatile__( "sync": :
:"memory") | |
| | | | |
| static inline intptr_t __TBB_machine_lg( uintptr_t x ) { | | static inline intptr_t __TBB_machine_lg( uintptr_t x ) { | |
|
| | | __TBB_ASSERT(x, "__TBB_Log2(0) undefined"); | |
| // cntlzd/cntlzw starts counting at 2^63/2^31 (ignoring any higher-orde
r bits), and does not affect cr0 | | // cntlzd/cntlzw starts counting at 2^63/2^31 (ignoring any higher-orde
r bits), and does not affect cr0 | |
| #if __TBB_WORDSIZE==8 | | #if __TBB_WORDSIZE==8 | |
| __asm__ __volatile__ ("cntlzd %0,%0" : "+r"(x)); | | __asm__ __volatile__ ("cntlzd %0,%0" : "+r"(x)); | |
| return 63-static_cast<intptr_t>(x); | | return 63-static_cast<intptr_t>(x); | |
| #else | | #else | |
| __asm__ __volatile__ ("cntlzw %0,%0" : "+r"(x)); | | __asm__ __volatile__ ("cntlzw %0,%0" : "+r"(x)); | |
| return 31-static_cast<intptr_t>(x); | | return 31-static_cast<intptr_t>(x); | |
| #endif | | #endif | |
| } | | } | |
| #define __TBB_Log2(V) __TBB_machine_lg(V) | | #define __TBB_Log2(V) __TBB_machine_lg(V) | |
| | | | |
End of changes. 5 change blocks. |
| 4 lines changed or deleted | | 19 lines changed or added | |
|
| parallel_reduce.h | | parallel_reduce.h | |
| | | | |
| skipping to change at line 46 | | skipping to change at line 46 | |
| #include "tbb_profiling.h" | | #include "tbb_profiling.h" | |
| | | | |
| namespace tbb { | | namespace tbb { | |
| | | | |
| namespace interface6 { | | namespace interface6 { | |
| //! @cond INTERNAL | | //! @cond INTERNAL | |
| namespace internal { | | namespace internal { | |
| | | | |
| using namespace tbb::internal; | | using namespace tbb::internal; | |
| | | | |
|
| //! 0 if root, 1 if a left child, 2 if a right child. | | /** Values for reduction_context. */ | |
| | | enum { | |
| | | root_task, left_child, right_child | |
| | | }; | |
| | | | |
| /** Represented as a char, not enum, for compactness. */ | | /** Represented as a char, not enum, for compactness. */ | |
| typedef char reduction_context; | | typedef char reduction_context; | |
| | | | |
|
| //! Task type use to combine the partial results of parallel_reduce. | | //! Task type used to combine the partial results of parallel_reduce. | |
| /** @ingroup algorithms */ | | /** @ingroup algorithms */ | |
| template<typename Body> | | template<typename Body> | |
| class finish_reduce: public flag_task { | | class finish_reduce: public flag_task { | |
| //! Pointer to body, or NULL if the left child has not yet finished
. | | //! Pointer to body, or NULL if the left child has not yet finished
. | |
| bool has_right_zombie; | | bool has_right_zombie; | |
| const reduction_context my_context; | | const reduction_context my_context; | |
| Body* my_body; | | Body* my_body; | |
| aligned_space<Body,1> zombie_space; | | aligned_space<Body,1> zombie_space; | |
| finish_reduce( reduction_context context_ ) : | | finish_reduce( reduction_context context_ ) : | |
| has_right_zombie(false), // TODO: substitute by flag_task::chil
d_stolen? | | has_right_zombie(false), // TODO: substitute by flag_task::chil
d_stolen? | |
| | | | |
| skipping to change at line 72 | | skipping to change at line 76 | |
| my_body(NULL) | | my_body(NULL) | |
| { | | { | |
| } | | } | |
| task* execute() { | | task* execute() { | |
| if( has_right_zombie ) { | | if( has_right_zombie ) { | |
| // Right child was stolen. | | // Right child was stolen. | |
| Body* s = zombie_space.begin(); | | Body* s = zombie_space.begin(); | |
| my_body->join( *s ); | | my_body->join( *s ); | |
| s->~Body(); | | s->~Body(); | |
| } | | } | |
|
| if( my_context==1 ) // left child | | if( my_context==left_child ) | |
| itt_store_word_with_release( static_cast<finish_reduce*>(pa
rent())->my_body, my_body ); | | itt_store_word_with_release( static_cast<finish_reduce*>(pa
rent())->my_body, my_body ); | |
| return NULL; | | return NULL; | |
| } | | } | |
| template<typename Range,typename Body_, typename Partitioner> | | template<typename Range,typename Body_, typename Partitioner> | |
| friend class start_reduce; | | friend class start_reduce; | |
| }; | | }; | |
| | | | |
| //! Task type used to split the work of parallel_reduce. | | //! Task type used to split the work of parallel_reduce. | |
| /** @ingroup algorithms */ | | /** @ingroup algorithms */ | |
| template<typename Range, typename Body, typename Partitioner> | | template<typename Range, typename Body, typename Partitioner> | |
| | | | |
| skipping to change at line 99 | | skipping to change at line 103 | |
| /*override*/ task* execute(); | | /*override*/ task* execute(); | |
| template<typename Body_> | | template<typename Body_> | |
| friend class finish_reduce; | | friend class finish_reduce; | |
| | | | |
| public: | | public: | |
| //! Constructor used for root task | | //! Constructor used for root task | |
| start_reduce( const Range& range, Body* body, Partitioner& partitio
ner ) : | | start_reduce( const Range& range, Body* body, Partitioner& partitio
ner ) : | |
| my_body(body), | | my_body(body), | |
| my_range(range), | | my_range(range), | |
| my_partition(partitioner), | | my_partition(partitioner), | |
|
| my_context(0) | | my_context(root_task) | |
| { | | { | |
| } | | } | |
| //! Splitting constructor used to generate children. | | //! Splitting constructor used to generate children. | |
| /** parent_ becomes left child. Newly constructed object is right
child. */ | | /** parent_ becomes left child. Newly constructed object is right
child. */ | |
| start_reduce( start_reduce& parent_, split ) : | | start_reduce( start_reduce& parent_, split ) : | |
| my_body(parent_.my_body), | | my_body(parent_.my_body), | |
| my_range(parent_.my_range,split()), | | my_range(parent_.my_range,split()), | |
| my_partition(parent_.my_partition,split()), | | my_partition(parent_.my_partition,split()), | |
|
| my_context(2) | | my_context(right_child) | |
| { | | { | |
| my_partition.set_affinity(*this); | | my_partition.set_affinity(*this); | |
|
| parent_.my_context = 1; | | parent_.my_context = left_child; | |
| } | | } | |
| //! Construct right child from the given range as response to the d
emand. | | //! Construct right child from the given range as response to the d
emand. | |
| /** parent_ remains left child. Newly constructed object is right
child. */ | | /** parent_ remains left child. Newly constructed object is right
child. */ | |
| start_reduce( start_reduce& parent_, const Range& r, depth_t d ) : | | start_reduce( start_reduce& parent_, const Range& r, depth_t d ) : | |
| my_body(parent_.my_body), | | my_body(parent_.my_body), | |
| my_range(r), | | my_range(r), | |
| my_partition(parent_.my_partition,split()), | | my_partition(parent_.my_partition,split()), | |
|
| my_context(2) // right leaf mark | | my_context(right_child) | |
| { | | { | |
| my_partition.set_affinity(*this); | | my_partition.set_affinity(*this); | |
| my_partition.align_depth( d ); | | my_partition.align_depth( d ); | |
|
| parent_.my_context = 1; // left leaf mark | | parent_.my_context = left_child; | |
| } | | } | |
| //! Update affinity info, if any | | //! Update affinity info, if any | |
| /*override*/ void note_affinity( affinity_id id ) { | | /*override*/ void note_affinity( affinity_id id ) { | |
| my_partition.note_affinity( id ); | | my_partition.note_affinity( id ); | |
| } | | } | |
| static void run( const Range& range, Body& body, Partitioner& parti
tioner ) { | | static void run( const Range& range, Body& body, Partitioner& parti
tioner ) { | |
| if( !range.empty() ) { | | if( !range.empty() ) { | |
| #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP | | #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP | |
| task::spawn_root_and_wait( *new(task::allocate_root()) star
t_reduce(range,&body,partitioner) ); | | task::spawn_root_and_wait( *new(task::allocate_root()) star
t_reduce(range,&body,partitioner) ); | |
| #else | | #else | |
| | | | |
| skipping to change at line 157 | | skipping to change at line 161 | |
| //! create a continuation task, serve as callback for partitioner | | //! create a continuation task, serve as callback for partitioner | |
| finish_type *create_continuation() { | | finish_type *create_continuation() { | |
| return new( allocate_continuation() ) finish_type(my_context); | | return new( allocate_continuation() ) finish_type(my_context); | |
| } | | } | |
| //! Run body for range | | //! Run body for range | |
| void run_body( Range &r ) { (*my_body)( r ); } | | void run_body( Range &r ) { (*my_body)( r ); } | |
| }; | | }; | |
| template<typename Range, typename Body, typename Partitioner> | | template<typename Range, typename Body, typename Partitioner> | |
| task* start_reduce<Range,Body,Partitioner>::execute() { | | task* start_reduce<Range,Body,Partitioner>::execute() { | |
| my_partition.check_being_stolen( *this ); | | my_partition.check_being_stolen( *this ); | |
|
| if( my_context==2 ) { // right child | | if( my_context==right_child ) { | |
| finish_type* parent_ptr = static_cast<finish_type*>(parent()); | | finish_type* parent_ptr = static_cast<finish_type*>(parent()); | |
| if( !itt_load_word_with_acquire(parent_ptr->my_body) ) { // TOD
O: replace by is_stolen_task() or by parent_ptr->ref_count() == 2??? | | if( !itt_load_word_with_acquire(parent_ptr->my_body) ) { // TOD
O: replace by is_stolen_task() or by parent_ptr->ref_count() == 2??? | |
| my_body = new( parent_ptr->zombie_space.begin() ) Body(*my_
body,split()); | | my_body = new( parent_ptr->zombie_space.begin() ) Body(*my_
body,split()); | |
| parent_ptr->has_right_zombie = true; | | parent_ptr->has_right_zombie = true; | |
| } | | } | |
|
| } else __TBB_ASSERT(my_context==0,0);// because left leaf spawns ri
ght leafs without recycling | | } else __TBB_ASSERT(my_context==root_task,NULL);// because left lea
f spawns right leafs without recycling | |
| my_partition.execute(*this, my_range); | | my_partition.execute(*this, my_range); | |
|
| if( my_context==1 ) { | | if( my_context==left_child ) { | |
| finish_type* parent_ptr = static_cast<finish_type*>(parent()); | | finish_type* parent_ptr = static_cast<finish_type*>(parent()); | |
|
| __TBB_ASSERT(my_body!=parent_ptr->zombie_space.begin(),0); | | __TBB_ASSERT(my_body!=parent_ptr->zombie_space.begin(),NULL); | |
| itt_store_word_with_release(parent_ptr->my_body, my_body ); | | itt_store_word_with_release(parent_ptr->my_body, my_body ); | |
| } | | } | |
| return NULL; | | return NULL; | |
| } | | } | |
| | | | |
| #if TBB_PREVIEW_DETERMINISTIC_REDUCE | | #if TBB_PREVIEW_DETERMINISTIC_REDUCE | |
|
| //! Task type use to combine the partial results of parallel_determinis
tic_reduce. | | //! Task type used to combine the partial results of parallel_determini
stic_reduce. | |
| /** @ingroup algorithms */ | | /** @ingroup algorithms */ | |
| template<typename Body> | | template<typename Body> | |
| class finish_deterministic_reduce: public task { | | class finish_deterministic_reduce: public task { | |
| Body &my_left_body; | | Body &my_left_body; | |
| Body my_right_body; | | Body my_right_body; | |
| | | | |
| finish_deterministic_reduce( Body &body ) : | | finish_deterministic_reduce( Body &body ) : | |
| my_left_body( body ), | | my_left_body( body ), | |
| my_right_body( body, split() ) | | my_right_body( body, split() ) | |
| { | | { | |
| | | | |
End of changes. 13 change blocks. |
| 13 lines changed or deleted | | 17 lines changed or added | |
|
| partitioner.h | | partitioner.h | |
| | | | |
| skipping to change at line 224 | | skipping to change at line 224 | |
| flag_task* split_work(StartType &start) { | | flag_task* split_work(StartType &start) { | |
| flag_task* parent_ptr = start.create_continuation(); // the type he
re is to express expectation | | flag_task* parent_ptr = start.create_continuation(); // the type he
re is to express expectation | |
| start.set_parent(parent_ptr); | | start.set_parent(parent_ptr); | |
| parent_ptr->set_ref_count(2); | | parent_ptr->set_ref_count(2); | |
| StartType& right_work = *new( parent_ptr->allocate_child() ) StartT
ype(start, split()); | | StartType& right_work = *new( parent_ptr->allocate_child() ) StartT
ype(start, split()); | |
| start.spawn(right_work); | | start.spawn(right_work); | |
| return parent_ptr; | | return parent_ptr; | |
| } | | } | |
| template<typename StartType, typename Range> | | template<typename StartType, typename Range> | |
| void execute(StartType &start, Range &range) { | | void execute(StartType &start, Range &range) { | |
|
| // The algorithm in a few words ([]-denotes calls to decision matho
ds of partitioner): | | // The algorithm in a few words ([]-denotes calls to decision metho
ds of partitioner): | |
| // [If this task is stolen, adjust depth and divisions if necessary
, set flag]. | | // [If this task is stolen, adjust depth and divisions if necessary
, set flag]. | |
| // If range is divisible { | | // If range is divisible { | |
| // Spread the work while [initial divisions left]; | | // Spread the work while [initial divisions left]; | |
| // Create trap task [if necessary]; | | // Create trap task [if necessary]; | |
| // } | | // } | |
| // If not divisible or [max depth is reached], execute, else do the
range pool part | | // If not divisible or [max depth is reached], execute, else do the
range pool part | |
| task* parent_ptr = start.parent(); | | task* parent_ptr = start.parent(); | |
| if( range.is_divisible() ) { | | if( range.is_divisible() ) { | |
| if( derived().divisions_left() ) | | if( derived().divisions_left() ) | |
| do parent_ptr = split_work(start); // split until divisions
_left() | | do parent_ptr = split_work(start); // split until divisions
_left() | |
| | | | |
| skipping to change at line 299 | | skipping to change at line 299 | |
| if(my_divisor) src.my_max_depth += static_cast<depth_t>(__TBB_Log2(
src.my_divisor/my_divisor)); | | if(my_divisor) src.my_max_depth += static_cast<depth_t>(__TBB_Log2(
src.my_divisor/my_divisor)); | |
| #endif | | #endif | |
| } | | } | |
| bool check_being_stolen( task &t) { // part of old should_execute_range
() | | bool check_being_stolen( task &t) { // part of old should_execute_range
() | |
| if( !my_divisor ) { | | if( !my_divisor ) { | |
| my_divisor = 1; // todo: replace by on-stack flag (partition_st
ate's member)? | | my_divisor = 1; // todo: replace by on-stack flag (partition_st
ate's member)? | |
| if( t.is_stolen_task() ) { | | if( t.is_stolen_task() ) { | |
| #if TBB_USE_EXCEPTIONS | | #if TBB_USE_EXCEPTIONS | |
| // RTTI is available, check whether the cast is valid | | // RTTI is available, check whether the cast is valid | |
| __TBB_ASSERT(dynamic_cast<flag_task*>(t.parent()), 0); | | __TBB_ASSERT(dynamic_cast<flag_task*>(t.parent()), 0); | |
|
| // correctess of the cast rely on avoiding the root task fo
r which: | | // correctness of the cast relies on avoiding the root task
for which: | |
| // - initial value of my_divisor != 0 (protected by separat
e assertion) | | // - initial value of my_divisor != 0 (protected by separat
e assertion) | |
| // - is_stolen_task() always return false for the root task
. | | // - is_stolen_task() always return false for the root task
. | |
| #endif | | #endif | |
| static_cast<flag_task*>(t.parent())->child_stolen = true; | | static_cast<flag_task*>(t.parent())->child_stolen = true; | |
| my_max_depth++; | | my_max_depth++; | |
| return true; | | return true; | |
| } | | } | |
| } | | } | |
| return false; | | return false; | |
| } | | } | |
| | | | |
| skipping to change at line 357 | | skipping to change at line 357 | |
| } | | } | |
| public: | | public: | |
| affinity_partition_type( tbb::internal::affinity_partitioner_base_v3& a
p ) { | | affinity_partition_type( tbb::internal::affinity_partitioner_base_v3& a
p ) { | |
| __TBB_ASSERT( (factor&(factor-1))==0, "factor must be power of two"
); | | __TBB_ASSERT( (factor&(factor-1))==0, "factor must be power of two"
); | |
| ap.resize(factor); | | ap.resize(factor); | |
| my_array = ap.my_array; | | my_array = ap.my_array; | |
| map_begin = 0; | | map_begin = 0; | |
| map_end = unsigned(ap.my_size); | | map_end = unsigned(ap.my_size); | |
| set_mid(); | | set_mid(); | |
| my_delay = true; | | my_delay = true; | |
|
| my_divisor /= __TBB_INITIAL_CHUNKS; // let excatly P tasks to be di | | my_divisor /= __TBB_INITIAL_CHUNKS; // let exactly P tasks to be di | |
| stributed across workers | | stributed across workers | |
| my_max_depth = factor_power+1; // the first factor_power ranges wil | | my_max_depth = factor_power+1; // the first factor_power ranges wil | |
| l be spawned, and >=1 ranges should left | | l be spawned, and >=1 ranges should be left | |
| __TBB_ASSERT( my_max_depth < __TBB_RANGE_POOL_CAPACITY, 0 ); | | __TBB_ASSERT( my_max_depth < __TBB_RANGE_POOL_CAPACITY, 0 ); | |
| } | | } | |
| affinity_partition_type(affinity_partition_type& p, split) | | affinity_partition_type(affinity_partition_type& p, split) | |
| : auto_partition_type_base<affinity_partition_type>(p, split()), my
_array(p.my_array) { | | : auto_partition_type_base<affinity_partition_type>(p, split()), my
_array(p.my_array) { | |
| __TBB_ASSERT( p.map_end-p.map_begin<factor || (p.map_end-p.map_begi
n)%factor==0, NULL ); | | __TBB_ASSERT( p.map_end-p.map_begin<factor || (p.map_end-p.map_begi
n)%factor==0, NULL ); | |
| map_end = p.map_end; | | map_end = p.map_end; | |
| map_begin = p.map_end = p.map_mid; | | map_begin = p.map_end = p.map_mid; | |
| set_mid(); p.set_mid(); | | set_mid(); p.set_mid(); | |
| my_delay = p.my_delay; | | my_delay = p.my_delay; | |
| } | | } | |
| | | | |
End of changes. 3 change blocks. |
| 6 lines changed or deleted | | 6 lines changed or added | |
|
| queuing_rw_mutex.h | | queuing_rw_mutex.h | |
| | | | |
| skipping to change at line 51 | | skipping to change at line 51 | |
| | | | |
| #if !TBB_USE_EXCEPTIONS && _MSC_VER | | #if !TBB_USE_EXCEPTIONS && _MSC_VER | |
| #pragma warning (pop) | | #pragma warning (pop) | |
| #endif | | #endif | |
| | | | |
| #include "atomic.h" | | #include "atomic.h" | |
| #include "tbb_profiling.h" | | #include "tbb_profiling.h" | |
| | | | |
| namespace tbb { | | namespace tbb { | |
| | | | |
|
| //! Reader-writer lock with local-only spinning. | | //! Queuing reader-writer mutex with local-only spinning. | |
| /** Adapted from Krieger, Stumm, et al. pseudocode at | | /** Adapted from Krieger, Stumm, et al. pseudocode at | |
| http://www.eecg.toronto.edu/parallel/pubs_abs.html#Krieger_etal_ICPP93 | | http://www.eecg.toronto.edu/parallel/pubs_abs.html#Krieger_etal_ICPP93 | |
| @ingroup synchronization */ | | @ingroup synchronization */ | |
| class queuing_rw_mutex { | | class queuing_rw_mutex { | |
| public: | | public: | |
| //! Construct unacquired mutex. | | //! Construct unacquired mutex. | |
| queuing_rw_mutex() { | | queuing_rw_mutex() { | |
| q_tail = NULL; | | q_tail = NULL; | |
| #if TBB_USE_THREADING_TOOLS | | #if TBB_USE_THREADING_TOOLS | |
| internal_construct(); | | internal_construct(); | |
| #endif | | #endif | |
| } | | } | |
| | | | |
| //! Destructor asserts if the mutex is acquired, i.e. q_tail is non-NUL
L | | //! Destructor asserts if the mutex is acquired, i.e. q_tail is non-NUL
L | |
| ~queuing_rw_mutex() { | | ~queuing_rw_mutex() { | |
| #if TBB_USE_ASSERT | | #if TBB_USE_ASSERT | |
| __TBB_ASSERT( !q_tail, "destruction of an acquired mutex"); | | __TBB_ASSERT( !q_tail, "destruction of an acquired mutex"); | |
| #endif | | #endif | |
| } | | } | |
| | | | |
|
| class scoped_lock; | | | |
| friend class scoped_lock; | | | |
| | | | |
| //! The scoped locking pattern | | //! The scoped locking pattern | |
| /** It helps to avoid the common problem of forgetting to release lock. | | /** It helps to avoid the common problem of forgetting to release lock. | |
| It also nicely provides the "node" for queuing locks. */ | | It also nicely provides the "node" for queuing locks. */ | |
| class scoped_lock: internal::no_copy { | | class scoped_lock: internal::no_copy { | |
|
| //! Initialize fields | | //! Initialize fields to mean "no lock held". | |
| void initialize() { | | void initialize() { | |
| my_mutex = NULL; | | my_mutex = NULL; | |
| #if TBB_USE_ASSERT | | #if TBB_USE_ASSERT | |
| my_state = 0xFF; // Set to invalid state | | my_state = 0xFF; // Set to invalid state | |
| internal::poison_pointer(my_next); | | internal::poison_pointer(my_next); | |
| internal::poison_pointer(my_prev); | | internal::poison_pointer(my_prev); | |
| #endif /* TBB_USE_ASSERT */ | | #endif /* TBB_USE_ASSERT */ | |
| } | | } | |
|
| | | | |
| public: | | public: | |
| //! Construct lock that has not acquired a mutex. | | //! Construct lock that has not acquired a mutex. | |
| /** Equivalent to zero-initialization of *this. */ | | /** Equivalent to zero-initialization of *this. */ | |
| scoped_lock() {initialize();} | | scoped_lock() {initialize();} | |
| | | | |
| //! Acquire lock on given mutex. | | //! Acquire lock on given mutex. | |
| scoped_lock( queuing_rw_mutex& m, bool write=true ) { | | scoped_lock( queuing_rw_mutex& m, bool write=true ) { | |
| initialize(); | | initialize(); | |
| acquire(m,write); | | acquire(m,write); | |
| } | | } | |
| | | | |
| //! Release lock (if lock is held). | | //! Release lock (if lock is held). | |
| ~scoped_lock() { | | ~scoped_lock() { | |
| if( my_mutex ) release(); | | if( my_mutex ) release(); | |
| } | | } | |
| | | | |
| //! Acquire lock on given mutex. | | //! Acquire lock on given mutex. | |
| void acquire( queuing_rw_mutex& m, bool write=true ); | | void acquire( queuing_rw_mutex& m, bool write=true ); | |
| | | | |
|
| //! Try acquire lock on given mutex. | | //! Acquire lock on given mutex if free (i.e. non-blocking) | |
| bool try_acquire( queuing_rw_mutex& m, bool write=true ); | | bool try_acquire( queuing_rw_mutex& m, bool write=true ); | |
| | | | |
| //! Release lock. | | //! Release lock. | |
| void release(); | | void release(); | |
| | | | |
| //! Upgrade reader to become a writer. | | //! Upgrade reader to become a writer. | |
|
| /** Returns true if the upgrade happened without re-acquiring the l
ock and false if opposite */ | | /** Returns whether the upgrade happened without releasing and re-a
cquiring the lock */ | |
| bool upgrade_to_writer(); | | bool upgrade_to_writer(); | |
| | | | |
| //! Downgrade writer to become a reader. | | //! Downgrade writer to become a reader. | |
| bool downgrade_to_reader(); | | bool downgrade_to_reader(); | |
| | | | |
| private: | | private: | |
|
| //! The pointer to the current mutex to work | | //! The pointer to the mutex owned, or NULL if not holding a mutex. | |
| queuing_rw_mutex* my_mutex; | | queuing_rw_mutex* my_mutex; | |
| | | | |
| //! The pointer to the previous and next competitors for a mutex | | //! The pointer to the previous and next competitors for a mutex | |
| scoped_lock *__TBB_atomic my_prev, *__TBB_atomic my_next; | | scoped_lock *__TBB_atomic my_prev, *__TBB_atomic my_next; | |
| | | | |
| typedef unsigned char state_t; | | typedef unsigned char state_t; | |
| | | | |
| //! State of the request: reader, writer, active reader, other serv
ice states | | //! State of the request: reader, writer, active reader, other serv
ice states | |
| atomic<state_t> my_state; | | atomic<state_t> my_state; | |
| | | | |
| | | | |
End of changes. 7 change blocks. |
| 8 lines changed or deleted | | 6 lines changed or added | |
|
| spin_rw_mutex.h | | spin_rw_mutex.h | |
| | | | |
| skipping to change at line 50 | | skipping to change at line 50 | |
| | | | |
| //! Fast, unfair, spinning reader-writer lock with backoff and writer-prefe
rence | | //! Fast, unfair, spinning reader-writer lock with backoff and writer-prefe
rence | |
| /** @ingroup synchronization */ | | /** @ingroup synchronization */ | |
| class spin_rw_mutex_v3 { | | class spin_rw_mutex_v3 { | |
| //! @cond INTERNAL | | //! @cond INTERNAL | |
| | | | |
| //! Internal acquire write lock. | | //! Internal acquire write lock. | |
| bool __TBB_EXPORTED_METHOD internal_acquire_writer(); | | bool __TBB_EXPORTED_METHOD internal_acquire_writer(); | |
| | | | |
| //! Out of line code for releasing a write lock. | | //! Out of line code for releasing a write lock. | |
|
| /** This code is has debug checking and instrumentation for Intel(R) Th
read Checker and Intel(R) Thread Profiler. */ | | /** This code has debug checking and instrumentation for Intel(R) Threa
d Checker and Intel(R) Thread Profiler. */ | |
| void __TBB_EXPORTED_METHOD internal_release_writer(); | | void __TBB_EXPORTED_METHOD internal_release_writer(); | |
| | | | |
| //! Internal acquire read lock. | | //! Internal acquire read lock. | |
| void __TBB_EXPORTED_METHOD internal_acquire_reader(); | | void __TBB_EXPORTED_METHOD internal_acquire_reader(); | |
| | | | |
| //! Internal upgrade reader to become a writer. | | //! Internal upgrade reader to become a writer. | |
| bool __TBB_EXPORTED_METHOD internal_upgrade(); | | bool __TBB_EXPORTED_METHOD internal_upgrade(); | |
| | | | |
| //! Out of line code for downgrading a writer to a reader. | | //! Out of line code for downgrading a writer to a reader. | |
|
| /** This code is has debug checking and instrumentation for Intel(R) Th
read Checker and Intel(R) Thread Profiler. */ | | /** This code has debug checking and instrumentation for Intel(R) Threa
d Checker and Intel(R) Thread Profiler. */ | |
| void __TBB_EXPORTED_METHOD internal_downgrade(); | | void __TBB_EXPORTED_METHOD internal_downgrade(); | |
| | | | |
| //! Internal release read lock. | | //! Internal release read lock. | |
| void __TBB_EXPORTED_METHOD internal_release_reader(); | | void __TBB_EXPORTED_METHOD internal_release_reader(); | |
| | | | |
| //! Internal try_acquire write lock. | | //! Internal try_acquire write lock. | |
| bool __TBB_EXPORTED_METHOD internal_try_acquire_writer(); | | bool __TBB_EXPORTED_METHOD internal_try_acquire_writer(); | |
| | | | |
| //! Internal try_acquire read lock. | | //! Internal try_acquire read lock. | |
| bool __TBB_EXPORTED_METHOD internal_try_acquire_reader(); | | bool __TBB_EXPORTED_METHOD internal_try_acquire_reader(); | |
| | | | |
| skipping to change at line 117 | | skipping to change at line 117 | |
| //! Acquire lock on given mutex. | | //! Acquire lock on given mutex. | |
| void acquire( spin_rw_mutex& m, bool write = true ) { | | void acquire( spin_rw_mutex& m, bool write = true ) { | |
| __TBB_ASSERT( !mutex, "holding mutex already" ); | | __TBB_ASSERT( !mutex, "holding mutex already" ); | |
| is_writer = write; | | is_writer = write; | |
| mutex = &m; | | mutex = &m; | |
| if( write ) mutex->internal_acquire_writer(); | | if( write ) mutex->internal_acquire_writer(); | |
| else mutex->internal_acquire_reader(); | | else mutex->internal_acquire_reader(); | |
| } | | } | |
| | | | |
| //! Upgrade reader to become a writer. | | //! Upgrade reader to become a writer. | |
|
| /** Returns true if the upgrade happened without re-acquiring the l
ock and false if opposite */ | | /** Returns whether the upgrade happened without releasing and re-a
cquiring the lock */ | |
| bool upgrade_to_writer() { | | bool upgrade_to_writer() { | |
| __TBB_ASSERT( mutex, "lock is not acquired" ); | | __TBB_ASSERT( mutex, "lock is not acquired" ); | |
| __TBB_ASSERT( !is_writer, "not a reader" ); | | __TBB_ASSERT( !is_writer, "not a reader" ); | |
| is_writer = true; | | is_writer = true; | |
| return mutex->internal_upgrade(); | | return mutex->internal_upgrade(); | |
| } | | } | |
| | | | |
| //! Release lock. | | //! Release lock. | |
| void release() { | | void release() { | |
| __TBB_ASSERT( mutex, "lock is not acquired" ); | | __TBB_ASSERT( mutex, "lock is not acquired" ); | |
| | | | |
| skipping to change at line 141 | | skipping to change at line 141 | |
| if( is_writer ) m->internal_release_writer(); | | if( is_writer ) m->internal_release_writer(); | |
| else m->internal_release_reader(); | | else m->internal_release_reader(); | |
| #else | | #else | |
| if( is_writer ) __TBB_AtomicAND( &m->state, READERS ); | | if( is_writer ) __TBB_AtomicAND( &m->state, READERS ); | |
| else __TBB_FetchAndAddWrelease( &m->state, -(intptr_
t)ONE_READER); | | else __TBB_FetchAndAddWrelease( &m->state, -(intptr_
t)ONE_READER); | |
| #endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */ | | #endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */ | |
| } | | } | |
| | | | |
| //! Downgrade writer to become a reader. | | //! Downgrade writer to become a reader. | |
| bool downgrade_to_reader() { | | bool downgrade_to_reader() { | |
|
| #if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT | | | |
| __TBB_ASSERT( mutex, "lock is not acquired" ); | | __TBB_ASSERT( mutex, "lock is not acquired" ); | |
| __TBB_ASSERT( is_writer, "not a writer" ); | | __TBB_ASSERT( is_writer, "not a writer" ); | |
|
| | | #if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT | |
| mutex->internal_downgrade(); | | mutex->internal_downgrade(); | |
| #else | | #else | |
| __TBB_FetchAndAddW( &mutex->state, ((intptr_t)ONE_READER-WRITER
)); | | __TBB_FetchAndAddW( &mutex->state, ((intptr_t)ONE_READER-WRITER
)); | |
| #endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */ | | #endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */ | |
| is_writer = false; | | is_writer = false; | |
|
| | | | |
| return true; | | return true; | |
| } | | } | |
| | | | |
| //! Try acquire lock on given mutex. | | //! Try acquire lock on given mutex. | |
| bool try_acquire( spin_rw_mutex& m, bool write = true ) { | | bool try_acquire( spin_rw_mutex& m, bool write = true ) { | |
| __TBB_ASSERT( !mutex, "holding mutex already" ); | | __TBB_ASSERT( !mutex, "holding mutex already" ); | |
| bool result; | | bool result; | |
| is_writer = write; | | is_writer = write; | |
| result = write? m.internal_try_acquire_writer() | | result = write? m.internal_try_acquire_writer() | |
| : m.internal_try_acquire_reader(); | | : m.internal_try_acquire_reader(); | |
| | | | |
End of changes. 6 change blocks. |
| 5 lines changed or deleted | | 4 lines changed or added | |
|
| sunos_sparc.h | | sunos_sparc.h | |
| | | | |
| skipping to change at line 39 | | skipping to change at line 39 | |
| #if !defined(__TBB_machine_H) || defined(__TBB_machine_sunos_sparc_H) | | #if !defined(__TBB_machine_H) || defined(__TBB_machine_sunos_sparc_H) | |
| #error Do not #include this internal file directly; use public TBB headers
instead. | | #error Do not #include this internal file directly; use public TBB headers
instead. | |
| #endif | | #endif | |
| | | | |
| #define __TBB_machine_sunos_sparc_H | | #define __TBB_machine_sunos_sparc_H | |
| | | | |
| #include <stdint.h> | | #include <stdint.h> | |
| #include <unistd.h> | | #include <unistd.h> | |
| | | | |
| #define __TBB_WORDSIZE 8 | | #define __TBB_WORDSIZE 8 | |
|
| #define __TBB_BIG_ENDIAN 1 | | #define __TBB_BIG_ENDIAN 1 // assumption (hardware may support page-specifi
c bi-endianness) | |
| | | | |
| /** To those working on SPARC hardware. Consider relaxing acquire and relea
se | | /** To those working on SPARC hardware. Consider relaxing acquire and relea
se | |
| consistency helpers to no-op (as this port covers TSO mode only). **/ | | consistency helpers to no-op (as this port covers TSO mode only). **/ | |
| #define __TBB_compiler_fence() __asm__ __volatile__ ("": : :"me
mory") | | #define __TBB_compiler_fence() __asm__ __volatile__ ("": : :"me
mory") | |
| #define __TBB_control_consistency_helper() __TBB_compiler_fence() | | #define __TBB_control_consistency_helper() __TBB_compiler_fence() | |
| #define __TBB_acquire_consistency_helper() __TBB_compiler_fence() | | #define __TBB_acquire_consistency_helper() __TBB_compiler_fence() | |
| #define __TBB_release_consistency_helper() __TBB_compiler_fence() | | #define __TBB_release_consistency_helper() __TBB_compiler_fence() | |
| #define __TBB_full_memory_fence() __asm__ __volatile__("membar #Lo
adLoad|#LoadStore|#StoreStore|#StoreLoad": : : "memory") | | #define __TBB_full_memory_fence() __asm__ __volatile__("membar #Lo
adLoad|#LoadStore|#StoreStore|#StoreLoad": : : "memory") | |
| | | | |
| //-------------------------------------------------- | | //-------------------------------------------------- | |
| | | | |
| skipping to change at line 100 | | skipping to change at line 100 | |
| | | | |
| /** | | /** | |
| * Atomic fetch and add for 32 bit values, in this case implemented by cont
inuously checking success of atomicity | | * Atomic fetch and add for 32 bit values, in this case implemented by cont
inuously checking success of atomicity | |
| * @param ptr pointer to value to add addend to | | * @param ptr pointer to value to add addend to | |
| * @param addened value to add to *ptr | | * @param addened value to add to *ptr | |
| * @return value at ptr before addened was added | | * @return value at ptr before addened was added | |
| */ | | */ | |
| static inline int32_t __TBB_machine_fetchadd4(volatile void *ptr, int32_t a
ddend){ | | static inline int32_t __TBB_machine_fetchadd4(volatile void *ptr, int32_t a
ddend){ | |
| int32_t result; | | int32_t result; | |
| __asm__ __volatile__ ( | | __asm__ __volatile__ ( | |
|
| "0:\t add\t %3, %4, %0\n" // do addition | | "0:\t add\t %3, %4, %0\n" // do addition | |
| "\t cas\t [%2], %3, %0\n" // cas to store re | | "\t cas\t [%2], %3, %0\n" // cas to store | |
| sult in memory | | result in memory | |
| "\t cmp\t %3, %0\n" // check if value fr | | "\t cmp\t %3, %0\n" // check if val | |
| om memory is original | | ue from memory is original | |
| "\t bne,a,pn\t %%icc, 0b\n" // if not try ag | | "\t bne,a,pn\t %%icc, 0b\n" // if not try a | |
| ain | | gain | |
| "\t mov %0, %3\n" // use branch delay sl | | "\t mov %0, %3\n" // use branch d | |
| ot to move new value in memory to be added | | elay slot to move new value in memory to be added | |
| : "=&r"(result), "=m"(*(int32_t *)ptr) | | : "=&r"(result), "=m"(*(int32_t *)ptr) | |
| : "r"(ptr), "r"(*(int32_t *)ptr), "r"(addend), "m"(*(int32_t
*)ptr) | | : "r"(ptr), "r"(*(int32_t *)ptr), "r"(addend), "m"(*(int32_t
*)ptr) | |
| : "ccr", "memory"); | | : "ccr", "memory"); | |
| return result; | | return result; | |
| } | | } | |
| | | | |
| /** | | /** | |
| * Atomic fetch and add for 64 bit values, in this case implemented by cont
inuously checking success of atomicity | | * Atomic fetch and add for 64 bit values, in this case implemented by cont
inuously checking success of atomicity | |
| * @param ptr pointer to value to add addend to | | * @param ptr pointer to value to add addend to | |
| * @param addened value to add to *ptr | | * @param addened value to add to *ptr | |
| * @return value at ptr before addened was added | | * @return value at ptr before addened was added | |
| */ | | */ | |
| static inline int64_t __TBB_machine_fetchadd8(volatile void *ptr, int64_t a
ddend){ | | static inline int64_t __TBB_machine_fetchadd8(volatile void *ptr, int64_t a
ddend){ | |
| int64_t result; | | int64_t result; | |
| __asm__ __volatile__ ( | | __asm__ __volatile__ ( | |
|
| "0:\t add\t %3, %4, %0\n" // do addition | | "0:\t add\t %3, %4, %0\n" // do addition | |
| "\t casx\t [%2], %3, %0\n" // cas to store r | | "\t casx\t [%2], %3, %0\n" // cas to store | |
| esult in memory | | result in memory | |
| "\t cmp\t %3, %0\n" // check if value fr | | "\t cmp\t %3, %0\n" // check if val | |
| om memory is original | | ue from memory is original | |
| "\t bne,a,pn\t %%xcc, 0b\n" // if not try ag | | "\t bne,a,pn\t %%xcc, 0b\n" // if not try a | |
| ain | | gain | |
| "\t mov %0, %3\n" // use branch delay sl | | "\t mov %0, %3\n" // use branch d | |
| ot to move new value in memory to be added | | elay slot to move new value in memory to be added | |
| : "=&r"(result), "=m"(*(int64_t *)ptr) | | : "=&r"(result), "=m"(*(int64_t *)ptr) | |
| : "r"(ptr), "r"(*(int64_t *)ptr), "r"(addend), "m"(*(int64_
t *)ptr) | | : "r"(ptr), "r"(*(int64_t *)ptr), "r"(addend), "m"(*(int64_
t *)ptr) | |
| : "ccr", "memory"); | | : "ccr", "memory"); | |
| return result; | | return result; | |
| } | | } | |
| | | | |
| //-------------------------------------------------------- | | //-------------------------------------------------------- | |
| // Logarithm (base two, integer) | | // Logarithm (base two, integer) | |
| //-------------------------------------------------------- | | //-------------------------------------------------------- | |
| | | | |
| static inline int64_t __TBB_machine_lg( uint64_t x ) { | | static inline int64_t __TBB_machine_lg( uint64_t x ) { | |
|
| | | __TBB_ASSERT(x, "__TBB_Log2(0) undefined"); | |
| uint64_t count; | | uint64_t count; | |
| // one hot encode | | // one hot encode | |
| x |= (x >> 1); | | x |= (x >> 1); | |
| x |= (x >> 2); | | x |= (x >> 2); | |
| x |= (x >> 4); | | x |= (x >> 4); | |
| x |= (x >> 8); | | x |= (x >> 8); | |
| x |= (x >> 16); | | x |= (x >> 16); | |
| x |= (x >> 32); | | x |= (x >> 32); | |
| // count 1's | | // count 1's | |
| __asm__ ("popc %1, %0" : "=r"(count) : "r"(x) ); | | __asm__ ("popc %1, %0" : "=r"(count) : "r"(x) ); | |
| return count-1; | | return count-1; | |
| } | | } | |
| | | | |
| //-------------------------------------------------------- | | //-------------------------------------------------------- | |
| | | | |
|
| static inline void __TBB_machine_or( volatile void *ptr, uint64_t addend )
{ | | static inline void __TBB_machine_or( volatile void *ptr, uint64_t value ) { | |
| __asm__ __volatile__ ( | | __asm__ __volatile__ ( | |
|
| "0:\t or\t %2, %3, %%g1\n" // do addition | | "0:\t or\t %2, %3, %%g1\n" // do operation | |
| "\t casx\t [%1], %2, %%g1\n" // cas to s | | "\t casx\t [%1], %2, %%g1\n" // cas to store | |
| tore result in memory | | result in memory | |
| "\t cmp\t %2, %%g1\n" // check if | | "\t cmp\t %2, %%g1\n" // check if val | |
| value from memory is original | | ue from memory is original | |
| "\t bne,a,pn\t %%xcc, 0b\n" // if not try again | | "\t bne,a,pn\t %%xcc, 0b\n" // if not try a | |
| "\t mov %%g1, %2\n" // use bran | | gain | |
| ch delay slot to move new value in memory to be added | | "\t mov %%g1, %2\n" // use branch d | |
| | | elay slot to move new value in memory to be added | |
| : "=m"(*(int64_t *)ptr) | | : "=m"(*(int64_t *)ptr) | |
|
| : "r"(ptr), "r"(*(int64_t *)ptr), "r"(addend), "m"(*(int64_
t *)ptr) | | : "r"(ptr), "r"(*(int64_t *)ptr), "r"(value), "m"(*(int64_t
*)ptr) | |
| : "ccr", "g1", "memory"); | | : "ccr", "g1", "memory"); | |
| } | | } | |
| | | | |
|
| static inline void __TBB_machine_and( volatile void *ptr, uint64_t addend )
{ | | static inline void __TBB_machine_and( volatile void *ptr, uint64_t value )
{ | |
| __asm__ __volatile__ ( | | __asm__ __volatile__ ( | |
|
| "0:\t and\t %2, %3, %%g1\n" // do addition | | "0:\t and\t %2, %3, %%g1\n" // do operation | |
| "\t casx\t [%1], %2, %%g1\n" // cas to s | | "\t casx\t [%1], %2, %%g1\n" // cas to store | |
| tore result in memory | | result in memory | |
| "\t cmp\t %2, %%g1\n" // check if | | "\t cmp\t %2, %%g1\n" // check if val | |
| value from memory is original | | ue from memory is original | |
| "\t bne,a,pn\t %%xcc, 0b\n" // if not try a
gain | | "\t bne,a,pn\t %%xcc, 0b\n" // if not try a
gain | |
|
| "\t mov %%g1, %2\n" // use bran
ch delay slot to move new value in memory to be added | | "\t mov %%g1, %2\n" // use branch d
elay slot to move new value in memory to be added | |
| : "=m"(*(int64_t *)ptr) | | : "=m"(*(int64_t *)ptr) | |
|
| : "r"(ptr), "r"(*(int64_t *)ptr), "r"(addend), "m"(*(int64_
t *)ptr) | | : "r"(ptr), "r"(*(int64_t *)ptr), "r"(value), "m"(*(int64_t
*)ptr) | |
| : "ccr", "g1", "memory"); | | : "ccr", "g1", "memory"); | |
| } | | } | |
| | | | |
| static inline void __TBB_machine_pause( int32_t delay ) { | | static inline void __TBB_machine_pause( int32_t delay ) { | |
|
| // do nothing, inlined, doesnt matter | | // do nothing, inlined, doesn't matter | |
| } | | } | |
| | | | |
| // put 0xff in memory location, return memory value, | | // put 0xff in memory location, return memory value, | |
| // generic trylockbyte puts 0x01, however this is fine | | // generic trylockbyte puts 0x01, however this is fine | |
| // because all that matters is that 0 is unlocked | | // because all that matters is that 0 is unlocked | |
| static inline bool __TBB_machine_trylockbyte(unsigned char &flag){ | | static inline bool __TBB_machine_trylockbyte(unsigned char &flag){ | |
| unsigned char result; | | unsigned char result; | |
| __asm__ __volatile__ ( | | __asm__ __volatile__ ( | |
| "ldstub\t [%2], %0\n" | | "ldstub\t [%2], %0\n" | |
| : "=r"(result), "=m"(flag) | | : "=r"(result), "=m"(flag) | |
| : "r"(&flag), "m"(flag) | | : "r"(&flag), "m"(flag) | |
| : "memory"); | | : "memory"); | |
| return result == 0; | | return result == 0; | |
| } | | } | |
| | | | |
|
| #define __TBB_USE_GENERIC_PART_WORD_CAS 1 | | #define __TBB_USE_GENERIC_PART_WORD_CAS 1 | |
| #define __TBB_USE_GENERIC_PART_WORD_FETCH_ADD 1 | | #define __TBB_USE_GENERIC_PART_WORD_FETCH_ADD 1 | |
| #define __TBB_USE_GENERIC_FETCH_STORE 1 | | #define __TBB_USE_GENERIC_FETCH_STORE 1 | |
| #define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1 | | #define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1 | |
| #define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1 | | #define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1 | |
| | | #define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1 | |
| | | | |
| #define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V) | | #define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V) | |
| #define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V) | | #define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V) | |
| | | | |
| // Definition of other functions | | // Definition of other functions | |
| #define __TBB_Pause(V) __TBB_machine_pause(V) | | #define __TBB_Pause(V) __TBB_machine_pause(V) | |
| #define __TBB_Log2(V) __TBB_machine_lg(V) | | #define __TBB_Log2(V) __TBB_machine_lg(V) | |
| | | | |
| #define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P) | | #define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P) | |
| | | | |
End of changes. 13 change blocks. |
| 43 lines changed or deleted | | 46 lines changed or added | |
|
| tbb_config.h | | tbb_config.h | |
| | | | |
| skipping to change at line 41 | | skipping to change at line 41 | |
| | | | |
| /** This header is supposed to contain macro definitions and C style commen
ts only. | | /** This header is supposed to contain macro definitions and C style commen
ts only. | |
| The macros defined here are intended to control such aspects of TBB bui
ld as | | The macros defined here are intended to control such aspects of TBB bui
ld as | |
| - presence of compiler features | | - presence of compiler features | |
| - compilation modes | | - compilation modes | |
| - feature sets | | - feature sets | |
| - known compiler/platform issues | | - known compiler/platform issues | |
| **/ | | **/ | |
| | | | |
| #define __TBB_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC
_PATCHLEVEL__) | | #define __TBB_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC
_PATCHLEVEL__) | |
|
| | | #if __clang__ | |
| | | #define __TBB_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 10 | |
| | | 0 + __clang_patchlevel__) | |
| | | #endif | |
| | | | |
| /** Presence of compiler features **/ | | /** Presence of compiler features **/ | |
| | | | |
| #if (__TBB_GCC_VERSION >= 40400) && !defined(__INTEL_COMPILER) | | #if (__TBB_GCC_VERSION >= 40400) && !defined(__INTEL_COMPILER) | |
| /** warning suppression pragmas available in GCC since 4.4 **/ | | /** warning suppression pragmas available in GCC since 4.4 **/ | |
| #define __TBB_GCC_WARNING_SUPPRESSION_PRESENT 1 | | #define __TBB_GCC_WARNING_SUPPRESSION_PRESENT 1 | |
| #endif | | #endif | |
| | | | |
|
| /* TODO: The following condition should be extended when new compilers/runt | | /* Select particular features of C++11 based on compiler version. | |
| imes | | ICC 12.1 (Linux), GCC 4.3 and higher, clang 2.9 and higher | |
| with std::exception_ptr support appear. */ | | set __GXX_EXPERIMENTAL_CXX0X__ in c++11 mode. | |
| #define __TBB_EXCEPTION_PTR_PRESENT ((_MSC_VER >= 1600 || (__GXX_EXPERIMEN | | | |
| TAL_CXX0X__ && __GNUC__==4 && __GNUC_MINOR__>=4)) && !__INTEL_COMPILER) | | Compilers that mimics other compilers (ICC, clang) must be processed bef | |
| | | ore | |
| | | compilers they mimic. | |
| | | | |
| | | TODO: The following conditions should be extended when new compilers/run | |
| | | times | |
| | | support added. | |
| | | */ | |
| | | | |
| | | #if __INTEL_COMPILER | |
| | | #define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT __GXX_EXPERIMENTAL_CXX0X | |
| | | __ && __VARIADIC_TEMPLATES | |
| | | #define __TBB_CPP11_RVALUE_REF_PRESENT (__GXX_EXPERIMENTAL_CXX0X__ || _ | |
| | | MSC_VER >= 1600) && (__INTEL_COMPILER >= 1200) | |
| | | #define __TBB_EXCEPTION_PTR_PRESENT 0 | |
| | | #elif __clang__ | |
| | | #define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT (__GXX_EXPERIMENTAL_CXX0 | |
| | | X__ && __TBB_CLANG_VERSION >= 20900) | |
| | | #define __TBB_CPP11_RVALUE_REF_PRESENT (__GXX_EXPERIMENTAL_CXX0X__ && _ | |
| | | _TBB_CLANG_VERSION >= 20900) | |
| | | #define __TBB_EXCEPTION_PTR_PRESENT __GXX_EXPERIMENTAL_CXX0X__ | |
| | | #elif __GNUC__ | |
| | | #define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT __GXX_EXPERIMENTAL_CXX0X | |
| | | __ | |
| | | #define __TBB_CPP11_RVALUE_REF_PRESENT __GXX_EXPERIMENTAL_CXX0X__ | |
| | | #define __TBB_EXCEPTION_PTR_PRESENT __GXX_EXPERIMENTAL_CXX0X__ | |
| | | #elif _MSC_VER | |
| | | #define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT 0 | |
| | | #define __TBB_CPP11_RVALUE_REF_PRESENT 0 | |
| | | #define __TBB_EXCEPTION_PTR_PRESENT (_MSC_VER >= 1600) | |
| | | #else | |
| | | #define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT 0 | |
| | | #define __TBB_CPP11_RVALUE_REF_PRESENT 0 | |
| | | #define __TBB_EXCEPTION_PTR_PRESENT 0 | |
| | | #endif | |
| | | | |
| | | // Work around a bug in MinGW32 | |
| | | #if __MINGW32__ && __TBB_EXCEPTION_PTR_PRESENT && !defined(_GLIBCXX_ATOMIC_ | |
| | | BUILTINS_4) | |
| | | #define _GLIBCXX_ATOMIC_BUILTINS_4 | |
| | | #endif | |
| | | | |
| #if __GNUC__ || __SUNPRO_CC || __IBMCPP__ | | #if __GNUC__ || __SUNPRO_CC || __IBMCPP__ | |
| /* ICC defines __GNUC__ and so is covered */ | | /* ICC defines __GNUC__ and so is covered */ | |
| #define __TBB_ATTRIBUTE_ALIGNED_PRESENT 1 | | #define __TBB_ATTRIBUTE_ALIGNED_PRESENT 1 | |
| #elif _MSC_VER && (_MSC_VER >= 1300 || __INTEL_COMPILER) | | #elif _MSC_VER && (_MSC_VER >= 1300 || __INTEL_COMPILER) | |
| #define __TBB_DECLSPEC_ALIGN_PRESENT 1 | | #define __TBB_DECLSPEC_ALIGN_PRESENT 1 | |
| #endif | | #endif | |
| | | | |
| /* TODO: change the version back to 4.1.2 once macro __TBB_WORD_SIZE become
optional */ | | /* TODO: change the version back to 4.1.2 once macro __TBB_WORD_SIZE become
optional */ | |
| #if (__TBB_GCC_VERSION >= 40306) && !defined(__INTEL_COMPILER) | | #if (__TBB_GCC_VERSION >= 40306) && !defined(__INTEL_COMPILER) | |
| | | | |
| skipping to change at line 142 | | skipping to change at line 179 | |
| #endif | | #endif | |
| #endif /* defined TBB_USE_CAPTURED_EXCEPTION */ | | #endif /* defined TBB_USE_CAPTURED_EXCEPTION */ | |
| | | | |
| /** Check whether the request to use GCC atomics can be satisfied **/ | | /** Check whether the request to use GCC atomics can be satisfied **/ | |
| #if (TBB_USE_GCC_BUILTINS && !__TBB_GCC_BUILTIN_ATOMICS_PRESENT) | | #if (TBB_USE_GCC_BUILTINS && !__TBB_GCC_BUILTIN_ATOMICS_PRESENT) | |
| #error "GCC atomic built-ins are not supported." | | #error "GCC atomic built-ins are not supported." | |
| #endif | | #endif | |
| | | | |
| /** Internal TBB features & modes **/ | | /** Internal TBB features & modes **/ | |
| | | | |
|
| | | /** __TBB_DYNAMIC_LOAD_ENABLED describes the system possibility to dynamic | |
| | | load libraries | |
| | | __TBB_SOURCE_DIRECTLY_INCLUDED is a mode used in whitebox testing when | |
| | | it's necessary to test internal functions not exported from TBB DLLs | |
| | | **/ | |
| | | | |
| #ifndef __TBB_DYNAMIC_LOAD_ENABLED | | #ifndef __TBB_DYNAMIC_LOAD_ENABLED | |
|
| #define __TBB_DYNAMIC_LOAD_ENABLED !__TBB_TASK_CPP_DIRECTLY_INCLUDED | | #define __TBB_DYNAMIC_LOAD_ENABLED 1 | |
| #elif !__TBB_DYNAMIC_LOAD_ENABLED | | #elif !(_WIN32||_WIN64) && !__TBB_DYNAMIC_LOAD_ENABLED | |
| #if _WIN32||_WIN64 | | #define __TBB_WEAK_SYMBOLS 1 | |
| #define __TBB_NO_IMPLICIT_LINKAGE 1 | | #endif | |
| #define __TBBMALLOC_NO_IMPLICIT_LINKAGE 1 | | | |
| #else | | #if (_WIN32||_WIN64) && __TBB_SOURCE_DIRECTLY_INCLUDED | |
| #define __TBB_WEAK_SYMBOLS 1 | | #define __TBB_NO_IMPLICIT_LINKAGE 1 | |
| #endif | | #define __TBBMALLOC_NO_IMPLICIT_LINKAGE 1 | |
| #endif | | #endif | |
| | | | |
| #ifndef __TBB_COUNT_TASK_NODES | | #ifndef __TBB_COUNT_TASK_NODES | |
| #define __TBB_COUNT_TASK_NODES TBB_USE_ASSERT | | #define __TBB_COUNT_TASK_NODES TBB_USE_ASSERT | |
| #endif | | #endif | |
| | | | |
| #ifndef __TBB_TASK_GROUP_CONTEXT | | #ifndef __TBB_TASK_GROUP_CONTEXT | |
| #define __TBB_TASK_GROUP_CONTEXT 1 | | #define __TBB_TASK_GROUP_CONTEXT 1 | |
| #endif /* __TBB_TASK_GROUP_CONTEXT */ | | #endif /* __TBB_TASK_GROUP_CONTEXT */ | |
| | | | |
| | | | |
| skipping to change at line 225 | | skipping to change at line 267 | |
| //! Macro controlling EH usages in TBB tests | | //! Macro controlling EH usages in TBB tests | |
| /** Some older versions of glibc crash when exception handling happens
concurrently. **/ | | /** Some older versions of glibc crash when exception handling happens
concurrently. **/ | |
| #define __TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN 1 | | #define __TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN 1 | |
| #endif | | #endif | |
| | | | |
| #if (_WIN32||_WIN64) && __INTEL_COMPILER == 1110 | | #if (_WIN32||_WIN64) && __INTEL_COMPILER == 1110 | |
| /** That's a bug in Intel compiler 11.1.044/IA-32/Windows, that leads t
o a worker thread crash on the thread's startup. **/ | | /** That's a bug in Intel compiler 11.1.044/IA-32/Windows, that leads t
o a worker thread crash on the thread's startup. **/ | |
| #define __TBB_ICL_11_1_CODE_GEN_BROKEN 1 | | #define __TBB_ICL_11_1_CODE_GEN_BROKEN 1 | |
| #endif | | #endif | |
| | | | |
|
| #if __GNUC__==3 && __GNUC_MINOR__==3 && !defined(__INTEL_COMPILER) | | #if __clang__ || (__GNUC__==3 && __GNUC_MINOR__==3 && !defined(__INTEL_COMP | |
| /** A bug in GCC 3.3 with access to nested classes declared in protecte | | ILER)) | |
| d area */ | | /** Bugs with access to nested classes declared in protected area */ | |
| #define __TBB_GCC_3_3_PROTECTED_BROKEN 1 | | #define __TBB_PROTECTED_NESTED_CLASS_BROKEN 1 | |
| #endif | | #endif | |
| | | | |
| #if __MINGW32__ && (__GNUC__<4 || __GNUC__==4 && __GNUC_MINOR__<2) | | #if __MINGW32__ && (__GNUC__<4 || __GNUC__==4 && __GNUC_MINOR__<2) | |
| /** MinGW has a bug with stack alignment for routines invoked from MS R
TLs. | | /** MinGW has a bug with stack alignment for routines invoked from MS R
TLs. | |
| Since GCC 4.2, the bug can be worked around via a special attribute
. **/ | | Since GCC 4.2, the bug can be worked around via a special attribute
. **/ | |
| #define __TBB_SSE_STACK_ALIGNMENT_BROKEN 1 | | #define __TBB_SSE_STACK_ALIGNMENT_BROKEN 1 | |
| #endif | | #endif | |
| | | | |
| #if __GNUC__==4 && __GNUC_MINOR__==3 && __GNUC_PATCHLEVEL__==0 | | #if __GNUC__==4 && __GNUC_MINOR__==3 && __GNUC_PATCHLEVEL__==0 | |
| // GCC of this version may rashly ignore control dependencies | | // GCC of this version may rashly ignore control dependencies | |
| | | | |
| skipping to change at line 263 | | skipping to change at line 305 | |
| incorrect code when __asm__ arguments have a cast to volatile. **/ | | incorrect code when __asm__ arguments have a cast to volatile. **/ | |
| #define __TBB_ICC_ASM_VOLATILE_BROKEN 1 | | #define __TBB_ICC_ASM_VOLATILE_BROKEN 1 | |
| #endif | | #endif | |
| | | | |
| #if !__INTEL_COMPILER && (_MSC_VER || __GNUC__==3 && __GNUC_MINOR__<=2) | | #if !__INTEL_COMPILER && (_MSC_VER || __GNUC__==3 && __GNUC_MINOR__<=2) | |
| /** Bug in GCC 3.2 and MSVC compilers that sometimes return 0 for __ali
gnof(T) | | /** Bug in GCC 3.2 and MSVC compilers that sometimes return 0 for __ali
gnof(T) | |
| when T has not yet been instantiated. **/ | | when T has not yet been instantiated. **/ | |
| #define __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN 1 | | #define __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN 1 | |
| #endif | | #endif | |
| | | | |
|
| | | #if __INTEL_COMPILER | |
| | | #define __TBB_CPP11_STD_FORWARD_BROKEN 1 | |
| | | #else | |
| | | #define __TBB_CPP11_STD_FORWARD_BROKEN 0 | |
| | | #endif | |
| | | | |
| #endif /* __TBB_tbb_config_H */ | | #endif /* __TBB_tbb_config_H */ | |
| | | | |
End of changes. 6 change blocks. |
| 17 lines changed or deleted | | 73 lines changed or added | |
|
| tbb_machine.h | | tbb_machine.h | |
| | | | |
| skipping to change at line 51 | | skipping to change at line 51 | |
| __TBB_USE_GENERIC_FETCH_STORE | | __TBB_USE_GENERIC_FETCH_STORE | |
| __TBB_USE_GENERIC_DWORD_FETCH_ADD | | __TBB_USE_GENERIC_DWORD_FETCH_ADD | |
| __TBB_USE_GENERIC_DWORD_FETCH_STORE | | __TBB_USE_GENERIC_DWORD_FETCH_STORE | |
| __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE | | __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE | |
| __TBB_USE_GENERIC_FULL_FENCED_LOAD_STORE | | __TBB_USE_GENERIC_FULL_FENCED_LOAD_STORE | |
| __TBB_USE_GENERIC_RELAXED_LOAD_STORE | | __TBB_USE_GENERIC_RELAXED_LOAD_STORE | |
| __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE | | __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE | |
| | | | |
| In this case tbb_machine.h will add missing functionality based on a mi
nimal set | | In this case tbb_machine.h will add missing functionality based on a mi
nimal set | |
| of APIs that are required to be implemented by all plug-n headers as de
scribed | | of APIs that are required to be implemented by all plug-n headers as de
scribed | |
|
| futher. | | further. | |
| Note that these generic implementations may be sub-optimal for a partic
ular | | Note that these generic implementations may be sub-optimal for a partic
ular | |
| architecture, and thus should be relied upon only after careful evaluat
ion | | architecture, and thus should be relied upon only after careful evaluat
ion | |
| or as the last resort. | | or as the last resort. | |
| | | | |
| Additionally __TBB_64BIT_ATOMICS can be set to 0 on a 32-bit architectu
re to | | Additionally __TBB_64BIT_ATOMICS can be set to 0 on a 32-bit architectu
re to | |
| indicate that the port is not going to support double word atomics. It
may also | | indicate that the port is not going to support double word atomics. It
may also | |
| be set to 1 explicitly, though normally this is not necessary as tbb_ma
chine.h | | be set to 1 explicitly, though normally this is not necessary as tbb_ma
chine.h | |
| will set it automatically. | | will set it automatically. | |
| | | | |
|
| | | __TBB_BIG_ENDIAN macro can be defined by the implementation as well. | |
| | | It is used only if the __TBB_USE_GENERIC_PART_WORD_CAS is set. | |
| | | Possible values are: | |
| | | - 1 if the system is big endian, | |
| | | - 0 if it is little endian, | |
| | | - or -1 to explicitly state that __TBB_USE_GENERIC_PART_WORD_CAS ca | |
| | | n not be used. | |
| | | -1 should be used when it is known in advance that endianness can chang | |
| | | e in run time | |
| | | or it is not simple big or little but something more complex. | |
| | | The system will try to detect it in run time if it is not set(in assump | |
| | | tion that it | |
| | | is either a big or little one). | |
| | | | |
| Prerequisites for each architecture port | | Prerequisites for each architecture port | |
| ---------------------------------------- | | ---------------------------------------- | |
|
| The following functions have no generic implementation. Therefore they
must be | | The following functions and macros have no generic implementation. Ther
efore they must be | |
| implemented in each machine architecture specific header either as a co
nventional | | implemented in each machine architecture specific header either as a co
nventional | |
| function or as a functional macro. | | function or as a functional macro. | |
| | | | |
|
| | | __TBB_WORDSIZE | |
| | | This is the size of machine word in bytes, i.e. for 32 bit systems | |
| | | it | |
| | | should be defined to 4. | |
| | | | |
| __TBB_Yield() | | __TBB_Yield() | |
| Signals OS that the current thread is willing to relinquish the rem
ainder | | Signals OS that the current thread is willing to relinquish the rem
ainder | |
| of its time quantum. | | of its time quantum. | |
| | | | |
| __TBB_full_memory_fence() | | __TBB_full_memory_fence() | |
| Must prevent all memory operations from being reordered across it (
both | | Must prevent all memory operations from being reordered across it (
both | |
| by hardware and compiler). All such fences must be totally ordered
(or | | by hardware and compiler). All such fences must be totally ordered
(or | |
| sequentially consistent). | | sequentially consistent). | |
| | | | |
| __TBB_machine_cmpswp4( volatile void *ptr, int32_t value, int32_t compa
rand ) | | __TBB_machine_cmpswp4( volatile void *ptr, int32_t value, int32_t compa
rand ) | |
| | | | |
| skipping to change at line 92 | | skipping to change at line 107 | |
| | | | |
| __TBB_machine_<op><S><fence>(...), where | | __TBB_machine_<op><S><fence>(...), where | |
| <op> = {cmpswp, fetchadd, fetchstore} | | <op> = {cmpswp, fetchadd, fetchstore} | |
| <S> = {1, 2, 4, 8} | | <S> = {1, 2, 4, 8} | |
| <fence> = {full_fence, acquire, release, relaxed} | | <fence> = {full_fence, acquire, release, relaxed} | |
| Must be provided if __TBB_USE_FENCED_ATOMICS is set. | | Must be provided if __TBB_USE_FENCED_ATOMICS is set. | |
| | | | |
| __TBB_control_consistency_helper() | | __TBB_control_consistency_helper() | |
| Bridges the memory-semantics gap between architectures providing on
ly | | Bridges the memory-semantics gap between architectures providing on
ly | |
| implicit C++0x "consume" semantics (like Power Architecture) and th
ose | | implicit C++0x "consume" semantics (like Power Architecture) and th
ose | |
|
| also implicitly obeying control dependencies (like Itanium). | | also implicitly obeying control dependencies (like IA-64). | |
| It must be used only in conditional code where the condition is its
elf | | It must be used only in conditional code where the condition is its
elf | |
| data-dependent, and will then make subsequent code behave as if the | | data-dependent, and will then make subsequent code behave as if the | |
| original data dependency were acquired. | | original data dependency were acquired. | |
|
| It needs only an empty definition where implied by the architecture | | It needs only a compiler fence where implied by the architecture | |
| either specifically (Itanium) or because generally stronger C++0x " | | either specifically (like IA-64) or because generally stronger "acq | |
| acquire" | | uire" | |
| semantics are enforced (like x86). | | semantics are enforced (like x86). | |
|
| | | It is always valid, though potentially suboptimal, to replace | |
| | | control with acquire on the load and then remove the helper. | |
| | | | |
| __TBB_acquire_consistency_helper(), __TBB_release_consistency_helper() | | __TBB_acquire_consistency_helper(), __TBB_release_consistency_helper() | |
| Must be provided if __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE is set
. | | Must be provided if __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE is set
. | |
| Enforce acquire and release semantics in generic implementations of
fenced | | Enforce acquire and release semantics in generic implementations of
fenced | |
| store and load operations. Depending on the particular architecture
/compiler | | store and load operations. Depending on the particular architecture
/compiler | |
| combination they may be a hardware fence, a compiler fence, both or
nothing. | | combination they may be a hardware fence, a compiler fence, both or
nothing. | |
| **/ | | **/ | |
| | | | |
| #include "tbb_stddef.h" | | #include "tbb_stddef.h" | |
| | | | |
| | | | |
| skipping to change at line 348 | | skipping to change at line 365 | |
| } | | } | |
| | | | |
| //! Spin UNTIL the value of the variable is equal to a given value | | //! Spin UNTIL the value of the variable is equal to a given value | |
| /** T and U should be comparable types. */ | | /** T and U should be comparable types. */ | |
| template<typename T, typename U> | | template<typename T, typename U> | |
| void spin_wait_until_eq( const volatile T& location, const U value ) { | | void spin_wait_until_eq( const volatile T& location, const U value ) { | |
| atomic_backoff backoff; | | atomic_backoff backoff; | |
| while( location!=value ) backoff.pause(); | | while( location!=value ) backoff.pause(); | |
| } | | } | |
| | | | |
|
| // T should be unsigned, otherwise sign propagation will break correctness | | //TODO: add static_assert for the requirements stated below | |
| of bit manipulations. | | //TODO: check if it works with signed types | |
| // S should be either 1 or 2, for the mask calculation to work correctly. | | | |
| // Together, these rules limit applicability of Masked CAS to unsigned char | | // there are following restrictions/limitations for this operation: | |
| and unsigned short. | | // - T should be unsigned, otherwise sign propagation will break correctne | |
| template<size_t S, typename T> | | ss of bit manipulations. | |
| inline T __TBB_MaskedCompareAndSwap (volatile T *ptr, T value, T comparand | | // - T should be integer type of at most 4 bytes, for the casts and calcul | |
| ) { | | ations to work. | |
| volatile uint32_t * base = (uint32_t*)( (uintptr_t)ptr & ~(uintptr_t)0x | | // (Together, these rules limit applicability of Masked CAS to uint8_t | |
| 3 ); | | and uint16_t only, | |
| #if __TBB_BIG_ENDIAN | | // as it does nothing useful for 4 bytes). | |
| const uint8_t bitoffset = uint8_t( 8*( 4-S - (uintptr_t(ptr) & 0x3) ) ) | | // - The operation assumes that the architecture consistently uses either | |
| ; | | little-endian or big-endian: | |
| #else | | // it does not support mixed-endian or page-specific bi-endian archite | |
| const uint8_t bitoffset = uint8_t( 8*((uintptr_t)ptr & 0x3) ); | | ctures. | |
| #endif | | // This function is the only use of __TBB_BIG_ENDIAN. | |
| const uint32_t mask = ( (1<<(S*8)) - 1 )<<bitoffset; | | #if (__TBB_BIG_ENDIAN!=-1) | |
| atomic_backoff b; | | #if ( __TBB_USE_GENERIC_PART_WORD_CAS) | |
| uint32_t result; | | #error generic implementation of part-word CAS was explicitly disab | |
| for(;;) { | | led for this configuration | |
| result = *base; // reload the base value which might change during | | #endif | |
| the pause | | template<typename T> | |
| uint32_t old_value = ( result & ~mask ) | ( comparand << bitoffset | | inline T __TBB_MaskedCompareAndSwap (volatile T * const ptr, const T value, | |
| ); | | const T comparand ) { | |
| uint32_t new_value = ( result & ~mask ) | ( value << bitoffset ); | | struct endianness{ static bool is_big_endian(){ | |
| // __TBB_CompareAndSwap4 presumed to have full fence. | | #ifndef __TBB_BIG_ENDIAN | |
| | | const uint32_t probe = 0x03020100; | |
| | | return (((const char*)(&probe))[0]==0x03); | |
| | | #elif (__TBB_BIG_ENDIAN==0) || (__TBB_BIG_ENDIAN==1) | |
| | | return __TBB_BIG_ENDIAN; | |
| | | #else | |
| | | #error unexpected value of __TBB_BIG_ENDIAN | |
| | | #endif | |
| | | }}; | |
| | | | |
| | | const uint32_t byte_offset = (uint32_t) ((uintptr_t)ptr & 0x | |
| | | 3); | |
| | | volatile uint32_t * const aligned_ptr = (uint32_t*)((uintptr_t)ptr - by | |
| | | te_offset ); | |
| | | | |
| | | // location of T within uint32_t for a C++ shift operation | |
| | | const uint32_t bits_to_shift = 8*(endianness::is_big_endian() ? (4 | |
| | | - sizeof(T) - (byte_offset)) : byte_offset); | |
| | | const uint32_t mask = (((uint32_t)1<<(sizeof(T)*8)) - 1 )< | |
| | | <bits_to_shift; | |
| | | const uint32_t shifted_comparand = ((uint32_t)comparand << bits_to_shif | |
| | | t)&mask; | |
| | | const uint32_t shifted_value = ((uint32_t)value << bits_to_shif | |
| | | t)&mask; | |
| | | | |
| | | for(atomic_backoff b;;b.pause()) { | |
| | | const uint32_t surroundings = *aligned_ptr & ~mask ; // reload the | |
| | | aligned_ptr value which might change during the pause | |
| | | const uint32_t big_comparand = surroundings | shifted_comparand ; | |
| | | const uint32_t big_value = surroundings | shifted_value ; | |
| | | // __TBB_machine_cmpswp4 presumed to have full fence. | |
| // Cast shuts up /Wp64 warning | | // Cast shuts up /Wp64 warning | |
|
| result = (uint32_t)__TBB_machine_cmpswp4( base, new_value, old_valu | | const uint32_t big_result = (uint32_t)__TBB_machine_cmpswp4( aligne | |
| e ); | | d_ptr, big_value, big_comparand ); | |
| if( result==old_value // CAS succeeded | | if( big_result == big_comparand // CAS succeeded | |
| || ((result^old_value)&mask)!=0 ) // CAS failed and the bits of | | || ((big_result ^ big_comparand) & mask) != 0) // CAS failed an | |
| interest have changed | | d the bits of interest have changed | |
| break; | | { | |
| else // CAS failed but the bits of | | return T((big_result & mask) >> bits_to_shift); | |
| interest left unchanged | | } | |
| b.pause(); | | else continue; // CAS failed bu | |
| | | t the bits of interest left unchanged | |
| } | | } | |
|
| return T((result & mask) >> bitoffset); | | | |
| } | | } | |
|
| | | #endif | |
| template<size_t S, typename T> | | template<size_t S, typename T> | |
| inline T __TBB_CompareAndSwapGeneric (volatile void *ptr, T value, T compar
and ); | | inline T __TBB_CompareAndSwapGeneric (volatile void *ptr, T value, T compar
and ); | |
| | | | |
| template<> | | template<> | |
| inline uint8_t __TBB_CompareAndSwapGeneric <1,uint8_t> (volatile void *ptr,
uint8_t value, uint8_t comparand ) { | | inline uint8_t __TBB_CompareAndSwapGeneric <1,uint8_t> (volatile void *ptr,
uint8_t value, uint8_t comparand ) { | |
| #if __TBB_USE_GENERIC_PART_WORD_CAS | | #if __TBB_USE_GENERIC_PART_WORD_CAS | |
|
| return __TBB_MaskedCompareAndSwap<1,uint8_t>((volatile uint8_t *)ptr,va
lue,comparand); | | return __TBB_MaskedCompareAndSwap<uint8_t>((volatile uint8_t *)ptr,valu
e,comparand); | |
| #else | | #else | |
| return __TBB_machine_cmpswp1(ptr,value,comparand); | | return __TBB_machine_cmpswp1(ptr,value,comparand); | |
| #endif | | #endif | |
| } | | } | |
| | | | |
| template<> | | template<> | |
| inline uint16_t __TBB_CompareAndSwapGeneric <2,uint16_t> (volatile void *pt
r, uint16_t value, uint16_t comparand ) { | | inline uint16_t __TBB_CompareAndSwapGeneric <2,uint16_t> (volatile void *pt
r, uint16_t value, uint16_t comparand ) { | |
| #if __TBB_USE_GENERIC_PART_WORD_CAS | | #if __TBB_USE_GENERIC_PART_WORD_CAS | |
|
| return __TBB_MaskedCompareAndSwap<2,uint16_t>((volatile uint16_t *)ptr,
value,comparand); | | return __TBB_MaskedCompareAndSwap<uint16_t>((volatile uint16_t *)ptr,va
lue,comparand); | |
| #else | | #else | |
| return __TBB_machine_cmpswp2(ptr,value,comparand); | | return __TBB_machine_cmpswp2(ptr,value,comparand); | |
| #endif | | #endif | |
| } | | } | |
| | | | |
| template<> | | template<> | |
| inline uint32_t __TBB_CompareAndSwapGeneric <4,uint32_t> (volatile void *pt
r, uint32_t value, uint32_t comparand ) { | | inline uint32_t __TBB_CompareAndSwapGeneric <4,uint32_t> (volatile void *pt
r, uint32_t value, uint32_t comparand ) { | |
| // Cast shuts up /Wp64 warning | | // Cast shuts up /Wp64 warning | |
| return (uint32_t)__TBB_machine_cmpswp4(ptr,value,comparand); | | return (uint32_t)__TBB_machine_cmpswp4(ptr,value,comparand); | |
| } | | } | |
| | | | |
| skipping to change at line 496 | | skipping to change at line 536 | |
| inline void __TBB_machine_store8 (volatile void *ptr, int64_t value) { | | inline void __TBB_machine_store8 (volatile void *ptr, int64_t value) { | |
| for(;;) { | | for(;;) { | |
| int64_t result = *(int64_t *)ptr; | | int64_t result = *(int64_t *)ptr; | |
| if( __TBB_machine_cmpswp8(ptr,value,result)==result ) break; | | if( __TBB_machine_cmpswp8(ptr,value,result)==result ) break; | |
| } | | } | |
| } | | } | |
| | | | |
| inline int64_t __TBB_machine_load8 (const volatile void *ptr) { | | inline int64_t __TBB_machine_load8 (const volatile void *ptr) { | |
| // Comparand and new value may be anything, they only must be equal, an
d | | // Comparand and new value may be anything, they only must be equal, an
d | |
| // the value should have a low probability to be actually found in 'loc
ation'. | | // the value should have a low probability to be actually found in 'loc
ation'. | |
|
| const int64_t anyvalue = 2305843009213693951; | | const int64_t anyvalue = 2305843009213693951LL; | |
| return __TBB_machine_cmpswp8(const_cast<volatile void *>(ptr),anyvalue,
anyvalue); | | return __TBB_machine_cmpswp8(const_cast<volatile void *>(ptr),anyvalue,
anyvalue); | |
| } | | } | |
| #endif /* __TBB_USE_GENERIC_DWORD_LOAD_STORE */ | | #endif /* __TBB_USE_GENERIC_DWORD_LOAD_STORE */ | |
| | | | |
| #if __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE | | #if __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE | |
| /** Fenced operations use volatile qualifier to prevent compiler from optim
izing | | /** Fenced operations use volatile qualifier to prevent compiler from optim
izing | |
| them out, and on on architectures with weak memory ordering to induce c
ompiler | | them out, and on on architectures with weak memory ordering to induce c
ompiler | |
| to generate code with appropriate acquire/release semantics. | | to generate code with appropriate acquire/release semantics. | |
| On architectures like IA32, Intel64 (and likely and Sparc TSO) volatile
has | | On architectures like IA32, Intel64 (and likely and Sparc TSO) volatile
has | |
| no effect on code gen, and consistency helpers serve as a compiler fenc
e (the | | no effect on code gen, and consistency helpers serve as a compiler fenc
e (the | |
| | | | |
| skipping to change at line 521 | | skipping to change at line 561 | |
| T to_return = location; | | T to_return = location; | |
| __TBB_acquire_consistency_helper(); | | __TBB_acquire_consistency_helper(); | |
| return to_return; | | return to_return; | |
| } | | } | |
| static void store_with_release ( volatile T &location, T value ) { | | static void store_with_release ( volatile T &location, T value ) { | |
| __TBB_release_consistency_helper(); | | __TBB_release_consistency_helper(); | |
| location = value; | | location = value; | |
| } | | } | |
| }; | | }; | |
| | | | |
|
| | | //in general, plain load and store of 32bit compiler is not atomic for 64bi
t types | |
| #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS | | #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS | |
| template <typename T> | | template <typename T> | |
| struct machine_load_store<T,8> { | | struct machine_load_store<T,8> { | |
| static T load_with_acquire ( const volatile T& location ) { | | static T load_with_acquire ( const volatile T& location ) { | |
| return (T)__TBB_machine_load8( (const volatile void*)&location ); | | return (T)__TBB_machine_load8( (const volatile void*)&location ); | |
| } | | } | |
| static void store_with_release ( volatile T& location, T value ) { | | static void store_with_release ( volatile T& location, T value ) { | |
| __TBB_machine_store8( (volatile void*)&location, (int64_t)value ); | | __TBB_machine_store8( (volatile void*)&location, (int64_t)value ); | |
| } | | } | |
| }; | | }; | |
| #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */ | | #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */ | |
| #endif /* __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE */ | | #endif /* __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE */ | |
| | | | |
|
| | | #if __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE | |
| template <typename T, size_t S> | | template <typename T, size_t S> | |
| struct machine_load_store_seq_cst { | | struct machine_load_store_seq_cst { | |
| static T load ( const volatile T& location ) { | | static T load ( const volatile T& location ) { | |
| __TBB_full_memory_fence(); | | __TBB_full_memory_fence(); | |
| return machine_load_store<T,S>::load_with_acquire( location ); | | return machine_load_store<T,S>::load_with_acquire( location ); | |
| } | | } | |
| #if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE | | #if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE | |
| static void store ( volatile T &location, T value ) { | | static void store ( volatile T &location, T value ) { | |
| atomic_selector<S>::fetch_store( (volatile void*)&location, (typena
me atomic_selector<S>::word)value ); | | atomic_selector<S>::fetch_store( (volatile void*)&location, (typena
me atomic_selector<S>::word)value ); | |
| } | | } | |
| | | | |
| skipping to change at line 560 | | skipping to change at line 602 | |
| }; | | }; | |
| | | | |
| #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS | | #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS | |
| /** The implementation does not use functions __TBB_machine_load8/store8 as
they | | /** The implementation does not use functions __TBB_machine_load8/store8 as
they | |
| are not required to be sequentially consistent. **/ | | are not required to be sequentially consistent. **/ | |
| template <typename T> | | template <typename T> | |
| struct machine_load_store_seq_cst<T,8> { | | struct machine_load_store_seq_cst<T,8> { | |
| static T load ( const volatile T& location ) { | | static T load ( const volatile T& location ) { | |
| // Comparand and new value may be anything, they only must be equal
, and | | // Comparand and new value may be anything, they only must be equal
, and | |
| // the value should have a low probability to be actually found in
'location'. | | // the value should have a low probability to be actually found in
'location'. | |
|
| const int64_t anyvalue = 2305843009213693951ll; | | const int64_t anyvalue = 2305843009213693951LL; | |
| return __TBB_machine_cmpswp8( (volatile void*)const_cast<volatile T
*>(&location), anyvalue, anyvalue ); | | return __TBB_machine_cmpswp8( (volatile void*)const_cast<volatile T
*>(&location), anyvalue, anyvalue ); | |
| } | | } | |
| static void store ( volatile T &location, T value ) { | | static void store ( volatile T &location, T value ) { | |
| int64_t result = (volatile int64_t&)location; | | int64_t result = (volatile int64_t&)location; | |
| while ( __TBB_machine_cmpswp8((volatile void*)&location, (int64_t)v
alue, result) != result ) | | while ( __TBB_machine_cmpswp8((volatile void*)&location, (int64_t)v
alue, result) != result ) | |
| result = (volatile int64_t&)location; | | result = (volatile int64_t&)location; | |
| } | | } | |
| }; | | }; | |
| #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */ | | #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */ | |
|
| | | #endif /*__TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE */ | |
| | | | |
| #if __TBB_USE_GENERIC_RELAXED_LOAD_STORE | | #if __TBB_USE_GENERIC_RELAXED_LOAD_STORE | |
| // Relaxed operations add volatile qualifier to prevent compiler from optim
izing them out. | | // Relaxed operations add volatile qualifier to prevent compiler from optim
izing them out. | |
| /** Volatile should not incur any additional cost on IA32, Intel64, and Spa
rc TSO | | /** Volatile should not incur any additional cost on IA32, Intel64, and Spa
rc TSO | |
| architectures. However on architectures with weak memory ordering compi
ler may | | architectures. However on architectures with weak memory ordering compi
ler may | |
| generate code with acquire/release semantics for operations on volatile
data. **/ | | generate code with acquire/release semantics for operations on volatile
data. **/ | |
| template <typename T, size_t S> | | template <typename T, size_t S> | |
| struct machine_load_store_relaxed { | | struct machine_load_store_relaxed { | |
| static inline T load ( const volatile T& location ) { | | static inline T load ( const volatile T& location ) { | |
| return location; | | return location; | |
| | | | |
| skipping to change at line 599 | | skipping to change at line 642 | |
| static inline T load ( const volatile T& location ) { | | static inline T load ( const volatile T& location ) { | |
| return (T)__TBB_machine_load8( (const volatile void*)&location ); | | return (T)__TBB_machine_load8( (const volatile void*)&location ); | |
| } | | } | |
| static inline void store ( volatile T& location, T value ) { | | static inline void store ( volatile T& location, T value ) { | |
| __TBB_machine_store8( (volatile void*)&location, (int64_t)value ); | | __TBB_machine_store8( (volatile void*)&location, (int64_t)value ); | |
| } | | } | |
| }; | | }; | |
| #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */ | | #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */ | |
| #endif /* __TBB_USE_GENERIC_RELAXED_LOAD_STORE */ | | #endif /* __TBB_USE_GENERIC_RELAXED_LOAD_STORE */ | |
| | | | |
|
| | | #undef __TBB_WORDSIZE //this macro is forbidden to use outside of atomic ma | |
| | | chinery | |
| | | | |
| template<typename T> | | template<typename T> | |
| inline T __TBB_load_with_acquire(const volatile T &location) { | | inline T __TBB_load_with_acquire(const volatile T &location) { | |
| return machine_load_store<T,sizeof(T)>::load_with_acquire( location ); | | return machine_load_store<T,sizeof(T)>::load_with_acquire( location ); | |
| } | | } | |
| template<typename T, typename V> | | template<typename T, typename V> | |
| inline void __TBB_store_with_release(volatile T& location, V value) { | | inline void __TBB_store_with_release(volatile T& location, V value) { | |
| machine_load_store<T,sizeof(T)>::store_with_release( location, T(value)
); | | machine_load_store<T,sizeof(T)>::store_with_release( location, T(value)
); | |
| } | | } | |
| //! Overload that exists solely to avoid /Wp64 warnings. | | //! Overload that exists solely to avoid /Wp64 warnings. | |
| inline void __TBB_store_with_release(volatile size_t& location, size_t valu
e) { | | inline void __TBB_store_with_release(volatile size_t& location, size_t valu
e) { | |
| | | | |
| skipping to change at line 746 | | skipping to change at line 791 | |
| | | | |
| // Mapping historically used names to the ones expected by atomic_load_stor
e_traits | | // Mapping historically used names to the ones expected by atomic_load_stor
e_traits | |
| #define __TBB_load_acquire __TBB_load_with_acquire | | #define __TBB_load_acquire __TBB_load_with_acquire | |
| #define __TBB_store_release __TBB_store_with_release | | #define __TBB_store_release __TBB_store_with_release | |
| | | | |
| #ifndef __TBB_Log2 | | #ifndef __TBB_Log2 | |
| inline intptr_t __TBB_Log2( uintptr_t x ) { | | inline intptr_t __TBB_Log2( uintptr_t x ) { | |
| if( x==0 ) return -1; | | if( x==0 ) return -1; | |
| intptr_t result = 0; | | intptr_t result = 0; | |
| uintptr_t tmp; | | uintptr_t tmp; | |
|
| #if __TBB_WORDSIZE>=8 | | | |
| if( (tmp = x>>32) ) { x=tmp; result += 32; } | | if( sizeof(x)>4 && (tmp = ((uint64_t)x)>>32)) { x=tmp; result += 32; } | |
| #endif | | | |
| if( (tmp = x>>16) ) { x=tmp; result += 16; } | | if( (tmp = x>>16) ) { x=tmp; result += 16; } | |
| if( (tmp = x>>8) ) { x=tmp; result += 8; } | | if( (tmp = x>>8) ) { x=tmp; result += 8; } | |
| if( (tmp = x>>4) ) { x=tmp; result += 4; } | | if( (tmp = x>>4) ) { x=tmp; result += 4; } | |
| if( (tmp = x>>2) ) { x=tmp; result += 2; } | | if( (tmp = x>>2) ) { x=tmp; result += 2; } | |
| return (x&2)? result+1: result; | | return (x&2)? result+1: result; | |
| } | | } | |
| #endif | | #endif | |
| | | | |
| #ifndef __TBB_AtomicOR | | #ifndef __TBB_AtomicOR | |
| inline void __TBB_AtomicOR( volatile void *operand, uintptr_t addend ) { | | inline void __TBB_AtomicOR( volatile void *operand, uintptr_t addend ) { | |
| | | | |
End of changes. 20 change blocks. |
| 50 lines changed or deleted | | 106 lines changed or added | |
|
| tbb_stddef.h | | tbb_stddef.h | |
| | | | |
| skipping to change at line 37 | | skipping to change at line 37 | |
| */ | | */ | |
| | | | |
| #ifndef __TBB_tbb_stddef_H | | #ifndef __TBB_tbb_stddef_H | |
| #define __TBB_tbb_stddef_H | | #define __TBB_tbb_stddef_H | |
| | | | |
| // Marketing-driven product version | | // Marketing-driven product version | |
| #define TBB_VERSION_MAJOR 4 | | #define TBB_VERSION_MAJOR 4 | |
| #define TBB_VERSION_MINOR 0 | | #define TBB_VERSION_MINOR 0 | |
| | | | |
| // Engineering-focused interface version | | // Engineering-focused interface version | |
|
| #define TBB_INTERFACE_VERSION 6003 | | #define TBB_INTERFACE_VERSION 6004 | |
| #define TBB_INTERFACE_VERSION_MAJOR TBB_INTERFACE_VERSION/1000 | | #define TBB_INTERFACE_VERSION_MAJOR TBB_INTERFACE_VERSION/1000 | |
| | | | |
| // The oldest major interface version still supported | | // The oldest major interface version still supported | |
| // To be used in SONAME, manifests, etc. | | // To be used in SONAME, manifests, etc. | |
| #define TBB_COMPATIBLE_INTERFACE_VERSION 2 | | #define TBB_COMPATIBLE_INTERFACE_VERSION 2 | |
| | | | |
| #define __TBB_STRING_AUX(x) #x | | #define __TBB_STRING_AUX(x) #x | |
| #define __TBB_STRING(x) __TBB_STRING_AUX(x) | | #define __TBB_STRING(x) __TBB_STRING_AUX(x) | |
| | | | |
| // We do not need defines below for resource processing on windows | | // We do not need defines below for resource processing on windows | |
| | | | |
| skipping to change at line 137 | | skipping to change at line 137 | |
| #include "tbb_config.h" | | #include "tbb_config.h" | |
| | | | |
| #if _MSC_VER >=1400 | | #if _MSC_VER >=1400 | |
| #define __TBB_EXPORTED_FUNC __cdecl | | #define __TBB_EXPORTED_FUNC __cdecl | |
| #define __TBB_EXPORTED_METHOD __thiscall | | #define __TBB_EXPORTED_METHOD __thiscall | |
| #else | | #else | |
| #define __TBB_EXPORTED_FUNC | | #define __TBB_EXPORTED_FUNC | |
| #define __TBB_EXPORTED_METHOD | | #define __TBB_EXPORTED_METHOD | |
| #endif | | #endif | |
| | | | |
|
| | | #if __INTEL_COMPILER || _MSC_VER | |
| | | #define __TBB_NOINLINE(decl) __declspec(noinline) decl | |
| | | #elif __GNUC__ | |
| | | #define __TBB_NOINLINE(decl) decl __attribute__ ((noinline)) | |
| | | #else | |
| | | #define __TBB_NOINLINE(decl) decl | |
| | | #endif | |
| | | | |
| #include <cstddef> /* Need size_t and ptrdiff_t */ | | #include <cstddef> /* Need size_t and ptrdiff_t */ | |
| | | | |
| #if _MSC_VER | | #if _MSC_VER | |
| #define __TBB_tbb_windef_H | | #define __TBB_tbb_windef_H | |
| #include "internal/_tbb_windef.h" | | #include "internal/_tbb_windef.h" | |
| #undef __TBB_tbb_windef_H | | #undef __TBB_tbb_windef_H | |
| #endif | | #endif | |
| #if !defined(_MSC_VER) || _MSC_VER>=1600 | | #if !defined(_MSC_VER) || _MSC_VER>=1600 | |
| #include <stdint.h> | | #include <stdint.h> | |
| #endif | | #endif | |
| | | | |
| skipping to change at line 247 | | skipping to change at line 255 | |
| | | | |
| Note that no problems have yet been observed relating to the definition
currently being empty, | | Note that no problems have yet been observed relating to the definition
currently being empty, | |
| even if at least "volatile" would seem to be in order to avoid data som
etimes temporarily hiding | | even if at least "volatile" would seem to be in order to avoid data som
etimes temporarily hiding | |
| in a register (although "volatile" as a "poor man's atomic" lacks sever
al other features of a proper | | in a register (although "volatile" as a "poor man's atomic" lacks sever
al other features of a proper | |
| atomic, some of which are now provided instead through specialized func
tions). | | atomic, some of which are now provided instead through specialized func
tions). | |
| | | | |
| Note that usage is intentionally compatible with a definition as qualif
ier "volatile", | | Note that usage is intentionally compatible with a definition as qualif
ier "volatile", | |
| both as a way to have the compiler help enforce use of the label and to
quickly rule out | | both as a way to have the compiler help enforce use of the label and to
quickly rule out | |
| one potential issue. | | one potential issue. | |
| | | | |
|
| Note however that, with some architecture/compiler combinations, e.g. o
n Itanium, "volatile" | | Note however that, with some architecture/compiler combinations, e.g. o
n IA-64, "volatile" | |
| also has non-portable memory semantics that are needlessly expensive fo
r "relaxed" operations. | | also has non-portable memory semantics that are needlessly expensive fo
r "relaxed" operations. | |
| | | | |
| Note that this must only be applied to data that will not change bit pa
tterns when cast to/from | | Note that this must only be applied to data that will not change bit pa
tterns when cast to/from | |
| an integral type of the same length; tbb::atomic must be used instead f
or, e.g., floating-point types. | | an integral type of the same length; tbb::atomic must be used instead f
or, e.g., floating-point types. | |
| | | | |
| TODO: apply wherever relevant **/ | | TODO: apply wherever relevant **/ | |
| #define __TBB_atomic // intentionally empty, see above | | #define __TBB_atomic // intentionally empty, see above | |
| | | | |
| template<class T, int S> | | template<class T, int S> | |
| struct padded_base : T { | | struct padded_base : T { | |
| | | | |
| skipping to change at line 353 | | skipping to change at line 361 | |
| }; | | }; | |
| | | | |
| #if _MSC_VER | | #if _MSC_VER | |
| //! Microsoft std::allocator has non-standard extension that strips const f
rom a type. | | //! Microsoft std::allocator has non-standard extension that strips const f
rom a type. | |
| template<typename T> | | template<typename T> | |
| struct allocator_type<const T> { | | struct allocator_type<const T> { | |
| typedef T value_type; | | typedef T value_type; | |
| }; | | }; | |
| #endif | | #endif | |
| | | | |
|
| | | //! A function to select either 32-bit or 64-bit value, depending on machin | |
| | | e word size. | |
| | | inline size_t size_t_select( unsigned u, unsigned long long ull ) { | |
| | | /* Explicit cast of the arguments to size_t is done to avoid compiler w | |
| | | arnings | |
| | | (e.g. by Clang and MSVC) about possible truncation. The value of the | |
| | | right size, | |
| | | which is selected by ?:, is anyway not truncated or promoted. | |
| | | MSVC still warns if this trick is applied directly to constants, hen | |
| | | ce this function. */ | |
| | | return (sizeof(size_t)==sizeof(u)) ? size_t(u) : size_t(ull); | |
| | | } | |
| | | | |
| // Struct to be used as a version tag for inline functions. | | // Struct to be used as a version tag for inline functions. | |
| /** Version tag can be necessary to prevent loader on Linux from using the
wrong | | /** Version tag can be necessary to prevent loader on Linux from using the
wrong | |
| symbol in debug builds (when inline functions are compiled as out-of-li
ne). **/ | | symbol in debug builds (when inline functions are compiled as out-of-li
ne). **/ | |
| struct version_tag_v3 {}; | | struct version_tag_v3 {}; | |
| | | | |
| typedef version_tag_v3 version_tag; | | typedef version_tag_v3 version_tag; | |
| | | | |
| } // internal | | } // internal | |
| //! @endcond | | //! @endcond | |
| | | | |
| | | | |
End of changes. 4 change blocks. |
| 2 lines changed or deleted | | 23 lines changed or added | |
|
| windows_intel64.h | | windows_intel64.h | |
| | | | |
| skipping to change at line 58 | | skipping to change at line 58 | |
| #pragma intrinsic(_InterlockedExchange64) | | #pragma intrinsic(_InterlockedExchange64) | |
| #endif /* !defined(__INTEL_COMPILER) */ | | #endif /* !defined(__INTEL_COMPILER) */ | |
| | | | |
| #if __INTEL_COMPILER | | #if __INTEL_COMPILER | |
| #define __TBB_compiler_fence() __asm { __asm nop } | | #define __TBB_compiler_fence() __asm { __asm nop } | |
| #define __TBB_full_memory_fence() __asm { __asm mfence } | | #define __TBB_full_memory_fence() __asm { __asm mfence } | |
| #elif _MSC_VER >= 1300 | | #elif _MSC_VER >= 1300 | |
| extern "C" void _ReadWriteBarrier(); | | extern "C" void _ReadWriteBarrier(); | |
| #pragma intrinsic(_ReadWriteBarrier) | | #pragma intrinsic(_ReadWriteBarrier) | |
| #pragma intrinsic(_mm_mfence) | | #pragma intrinsic(_mm_mfence) | |
|
| | | #pragma intrinsic(_mm_pause) | |
| #define __TBB_compiler_fence() _ReadWriteBarrier() | | #define __TBB_compiler_fence() _ReadWriteBarrier() | |
| #define __TBB_full_memory_fence() _mm_mfence() | | #define __TBB_full_memory_fence() _mm_mfence() | |
| #endif | | #endif | |
| | | | |
| #define __TBB_control_consistency_helper() __TBB_compiler_fence() | | #define __TBB_control_consistency_helper() __TBB_compiler_fence() | |
| #define __TBB_acquire_consistency_helper() __TBB_compiler_fence() | | #define __TBB_acquire_consistency_helper() __TBB_compiler_fence() | |
| #define __TBB_release_consistency_helper() __TBB_compiler_fence() | | #define __TBB_release_consistency_helper() __TBB_compiler_fence() | |
| | | | |
| // ATTENTION: if you ever change argument types in machine-specific primiti
ves, | | // ATTENTION: if you ever change argument types in machine-specific primiti
ves, | |
| // please take care of atomic_word<> specializations in tbb/atomic.h | | // please take care of atomic_word<> specializations in tbb/atomic.h | |
| extern "C" { | | extern "C" { | |
| __int8 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp1 (volatile void *ptr, _
_int8 value, __int8 comparand ); | | __int8 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp1 (volatile void *ptr, _
_int8 value, __int8 comparand ); | |
| __int8 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd1 (volatile void *ptr,
__int8 addend ); | | __int8 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd1 (volatile void *ptr,
__int8 addend ); | |
| __int8 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore1 (volatile void *pt
r, __int8 value ); | | __int8 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore1 (volatile void *pt
r, __int8 value ); | |
| __int16 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp2 (volatile void *ptr,
__int16 value, __int16 comparand ); | | __int16 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp2 (volatile void *ptr,
__int16 value, __int16 comparand ); | |
| __int16 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd2 (volatile void *ptr
, __int16 addend ); | | __int16 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd2 (volatile void *ptr
, __int16 addend ); | |
| __int16 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore2 (volatile void *p
tr, __int16 value ); | | __int16 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore2 (volatile void *p
tr, __int16 value ); | |
|
| void __TBB_EXPORTED_FUNC __TBB_machine_pause (__int32 delay ); | | | |
| } | | } | |
| | | | |
| inline long __TBB_machine_cmpswp4 (volatile void *ptr, __int32 value, __int
32 comparand ) { | | inline long __TBB_machine_cmpswp4 (volatile void *ptr, __int32 value, __int
32 comparand ) { | |
| return _InterlockedCompareExchange( (long*)ptr, value, comparand ); | | return _InterlockedCompareExchange( (long*)ptr, value, comparand ); | |
| } | | } | |
| inline long __TBB_machine_fetchadd4 (volatile void *ptr, __int32 addend ) { | | inline long __TBB_machine_fetchadd4 (volatile void *ptr, __int32 addend ) { | |
| return _InterlockedExchangeAdd( (long*)ptr, addend ); | | return _InterlockedExchangeAdd( (long*)ptr, addend ); | |
| } | | } | |
| inline long __TBB_machine_fetchstore4 (volatile void *ptr, __int32 value )
{ | | inline long __TBB_machine_fetchstore4 (volatile void *ptr, __int32 value )
{ | |
| return _InterlockedExchange( (long*)ptr, value ); | | return _InterlockedExchange( (long*)ptr, value ); | |
| | | | |
| skipping to change at line 98 | | skipping to change at line 98 | |
| inline __int64 __TBB_machine_cmpswp8 (volatile void *ptr, __int64 value, __
int64 comparand ) { | | inline __int64 __TBB_machine_cmpswp8 (volatile void *ptr, __int64 value, __
int64 comparand ) { | |
| return _InterlockedCompareExchange64( (__int64*)ptr, value, comparand )
; | | return _InterlockedCompareExchange64( (__int64*)ptr, value, comparand )
; | |
| } | | } | |
| inline __int64 __TBB_machine_fetchadd8 (volatile void *ptr, __int64 addend
) { | | inline __int64 __TBB_machine_fetchadd8 (volatile void *ptr, __int64 addend
) { | |
| return _InterlockedExchangeAdd64( (__int64*)ptr, addend ); | | return _InterlockedExchangeAdd64( (__int64*)ptr, addend ); | |
| } | | } | |
| inline __int64 __TBB_machine_fetchstore8 (volatile void *ptr, __int64 value
) { | | inline __int64 __TBB_machine_fetchstore8 (volatile void *ptr, __int64 value
) { | |
| return _InterlockedExchange64( (__int64*)ptr, value ); | | return _InterlockedExchange64( (__int64*)ptr, value ); | |
| } | | } | |
| | | | |
|
| #define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1 | | inline void __TBB_machine_pause_v6 (__int32 delay ) { | |
| #define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1 | | for (;delay>0; --delay ) | |
| #define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1 | | _mm_pause(); | |
| | | } | |
| | | | |
| | | #define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1 | |
| | | #define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1 | |
| | | #define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1 | |
| | | #define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1 | |
| | | | |
|
| #if !__INTEL_COMPILER | | | |
| extern "C" unsigned char _BitScanReverse64( unsigned long* i, unsigned __in
t64 w ); | | extern "C" unsigned char _BitScanReverse64( unsigned long* i, unsigned __in
t64 w ); | |
| #pragma intrinsic(_BitScanReverse64) | | #pragma intrinsic(_BitScanReverse64) | |
|
| #endif | | | |
| | | | |
| inline __int64 __TBB_machine_lg( unsigned __int64 i ) { | | inline __int64 __TBB_machine_lg( unsigned __int64 i ) { | |
|
| #if __INTEL_COMPILER | | | |
| unsigned __int64 j; | | | |
| __asm | | | |
| { | | | |
| bsr rax, i | | | |
| mov j, rax | | | |
| } | | | |
| #else | | | |
| unsigned long j; | | unsigned long j; | |
| _BitScanReverse64( &j, i ); | | _BitScanReverse64( &j, i ); | |
|
| #endif | | | |
| return j; | | return j; | |
| } | | } | |
| | | | |
| inline void __TBB_machine_OR( volatile void *operand, intptr_t addend ) { | | inline void __TBB_machine_OR( volatile void *operand, intptr_t addend ) { | |
| _InterlockedOr64((__int64*)operand, addend); | | _InterlockedOr64((__int64*)operand, addend); | |
| } | | } | |
| | | | |
| inline void __TBB_machine_AND( volatile void *operand, intptr_t addend ) { | | inline void __TBB_machine_AND( volatile void *operand, intptr_t addend ) { | |
| _InterlockedAnd64((__int64*)operand, addend); | | _InterlockedAnd64((__int64*)operand, addend); | |
| } | | } | |
| | | | |
| #define __TBB_AtomicOR(P,V) __TBB_machine_OR(P,V) | | #define __TBB_AtomicOR(P,V) __TBB_machine_OR(P,V) | |
| #define __TBB_AtomicAND(P,V) __TBB_machine_AND(P,V) | | #define __TBB_AtomicAND(P,V) __TBB_machine_AND(P,V) | |
| | | | |
| extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void ); | | extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void ); | |
| #define __TBB_Yield() SwitchToThread() | | #define __TBB_Yield() SwitchToThread() | |
|
| #define __TBB_Pause(V) __TBB_machine_pause(V) | | #define __TBB_Pause(V) __TBB_machine_pause_v6(V) | |
| #define __TBB_Log2(V) __TBB_machine_lg(V) | | #define __TBB_Log2(V) __TBB_machine_lg(V) | |
| | | | |
| // API to retrieve/update FPU control setting | | // API to retrieve/update FPU control setting | |
| #define __TBB_CPU_CTL_ENV_PRESENT 1 | | #define __TBB_CPU_CTL_ENV_PRESENT 1 | |
| | | | |
| struct __TBB_cpu_ctl_env_t { | | struct __TBB_cpu_ctl_env_t { | |
| int mxcsr; | | int mxcsr; | |
| short x87cw; | | short x87cw; | |
| }; | | }; | |
| | | | |
| | | | |
End of changes. 8 change blocks. |
| 16 lines changed or deleted | | 11 lines changed or added | |
|