_aggregator_impl.h   _aggregator_impl.h 
skipping to change at line 56 skipping to change at line 56
Derived *next; Derived *next;
aggregated_operation() : status(0), next(NULL) {} aggregated_operation() : status(0), next(NULL) {}
}; };
//! Aggregator base class //! Aggregator base class
/** An aggregator for collecting operations coming from multiple sources an d executing /** An aggregator for collecting operations coming from multiple sources an d executing
them serially on a single thread. operation_type must be derived from them serially on a single thread. operation_type must be derived from
aggregated_operation. The parameter handler_type is a functor that will be passed the aggregated_operation. The parameter handler_type is a functor that will be passed the
list of operations and is expected to handle each operation appropriate ly, setting the list of operations and is expected to handle each operation appropriate ly, setting the
status of each operation to non-zero.*/ status of each operation to non-zero.*/
template < typename handler_type, typename operation_type > template < typename handler_type, typename operation_type >
class aggregator { class aggregator {
public: public:
aggregator() : handler_busy(false) { pending_operations = NULL; } aggregator() : handler_busy(false) { pending_operations = NULL; }
explicit aggregator(handler_type h) : handler_busy(false), handle_opera tions(h) { explicit aggregator(handler_type h) : handler_busy(false), handle_opera tions(h) {
pending_operations = NULL; pending_operations = NULL;
} }
void initialize_handler(handler_type h) { handle_operations = h; } void initialize_handler(handler_type h) { handle_operations = h; }
//! Place operation in list //! Place operation in list
 End of changes. 1 change blocks. 
1 lines changed or deleted 1 lines changed or added


 _concurrent_queue_impl.h   _concurrent_queue_impl.h 
skipping to change at line 307 skipping to change at line 307
template<typename T> template<typename T>
micro_queue<T>& micro_queue<T>::assign( const micro_queue<T>& src, concurre nt_queue_base_v3<T>& base ) { micro_queue<T>& micro_queue<T>::assign( const micro_queue<T>& src, concurre nt_queue_base_v3<T>& base ) {
head_counter = src.head_counter; head_counter = src.head_counter;
tail_counter = src.tail_counter; tail_counter = src.tail_counter;
page_mutex = src.page_mutex; page_mutex = src.page_mutex;
const page* srcp = src.head_page; const page* srcp = src.head_page;
if( is_valid_page(srcp) ) { if( is_valid_page(srcp) ) {
ticket g_index = head_counter; ticket g_index = head_counter;
__TBB_TRY { __TBB_TRY {
size_t n_items = (tail_counter-head_counter)/concurrent_queue_r ep_base::n_queue; size_t n_items = (tail_counter-head_counter)/concurrent_queue_ rep_base::n_queue;
size_t index = head_counter/concurrent_queue_rep_base::n_queue & (base.my_rep->items_per_page-1); size_t index = head_counter/concurrent_queue_rep_base::n_queue & (base.my_rep->items_per_page-1);
size_t end_in_first_page = (index+n_items<base.my_rep->items_pe r_page)?(index+n_items):base.my_rep->items_per_page; size_t end_in_first_page = (index+n_items<base.my_rep->items_pe r_page)?(index+n_items):base.my_rep->items_per_page;
head_page = make_copy( base, srcp, index, end_in_first_page, g_ index ); head_page = make_copy( base, srcp, index, end_in_first_page, g_ index );
page* cur_page = head_page; page* cur_page = head_page;
if( srcp != src.tail_page ) { if( srcp != src.tail_page ) {
for( srcp = srcp->next; srcp!=src.tail_page; srcp=srcp->nex t ) { for( srcp = srcp->next; srcp!=src.tail_page; srcp=srcp->nex t ) {
cur_page->next = make_copy( base, srcp, 0, base.my_rep- >items_per_page, g_index ); cur_page->next = make_copy( base, srcp, 0, base.my_rep- >items_per_page, g_index );
cur_page = cur_page->next; cur_page = cur_page->next;
skipping to change at line 514 skipping to change at line 514
template<typename T> template<typename T>
concurrent_queue_base_v3<T>::concurrent_queue_base_v3() { concurrent_queue_base_v3<T>::concurrent_queue_base_v3() {
const size_t item_size = sizeof(T); const size_t item_size = sizeof(T);
my_rep = cache_aligned_allocator<concurrent_queue_rep<T> >().allocate(1 ); my_rep = cache_aligned_allocator<concurrent_queue_rep<T> >().allocate(1 );
__TBB_ASSERT( (size_t)my_rep % NFS_GetLineSize()==0, "alignment error" ); __TBB_ASSERT( (size_t)my_rep % NFS_GetLineSize()==0, "alignment error" );
__TBB_ASSERT( (size_t)&my_rep->head_counter % NFS_GetLineSize()==0, "al ignment error" ); __TBB_ASSERT( (size_t)&my_rep->head_counter % NFS_GetLineSize()==0, "al ignment error" );
__TBB_ASSERT( (size_t)&my_rep->tail_counter % NFS_GetLineSize()==0, "al ignment error" ); __TBB_ASSERT( (size_t)&my_rep->tail_counter % NFS_GetLineSize()==0, "al ignment error" );
__TBB_ASSERT( (size_t)&my_rep->array % NFS_GetLineSize()==0, "alignment error" ); __TBB_ASSERT( (size_t)&my_rep->array % NFS_GetLineSize()==0, "alignment error" );
memset(my_rep,0,sizeof(concurrent_queue_rep<T>)); memset(my_rep,0,sizeof(concurrent_queue_rep<T>));
my_rep->item_size = item_size; my_rep->item_size = item_size;
my_rep->items_per_page = item_size<=8 ? 32 : my_rep->items_per_page = item_size<= 8 ? 32 :
item_size<=16 ? 16 : item_size<= 16 ? 16 :
item_size<=32 ? 8 : item_size<= 32 ? 8 :
item_size<=64 ? 4 : item_size<= 64 ? 4 :
item_size<=128 ? 2 : item_size<=128 ? 2 :
1; 1;
} }
template<typename T> template<typename T>
bool concurrent_queue_base_v3<T>::internal_try_pop( void* dst ) { bool concurrent_queue_base_v3<T>::internal_try_pop( void* dst ) {
concurrent_queue_rep<T>& r = *my_rep; concurrent_queue_rep<T>& r = *my_rep;
ticket k; ticket k;
do { do {
k = r.head_counter; k = r.head_counter;
for(;;) { for(;;) {
skipping to change at line 840 skipping to change at line 840
//! Capacity of the queue //! Capacity of the queue
ptrdiff_t my_capacity; ptrdiff_t my_capacity;
//! Always a power of 2 //! Always a power of 2
size_t items_per_page; size_t items_per_page;
//! Size of an item //! Size of an item
size_t item_size; size_t item_size;
#if __TBB_GCC_3_3_PROTECTED_BROKEN #if __TBB_PROTECTED_NESTED_CLASS_BROKEN
public: public:
#endif #endif
template<typename T> template<typename T>
struct padded_page: page { struct padded_page: page {
//! Not defined anywhere - exists to quiet warnings. //! Not defined anywhere - exists to quiet warnings.
padded_page(); padded_page();
//! Not defined anywhere - exists to quiet warnings. //! Not defined anywhere - exists to quiet warnings.
void operator=( const padded_page& ); void operator=( const padded_page& );
//! Must be last field. //! Must be last field.
T last; T last;
 End of changes. 3 change blocks. 
7 lines changed or deleted 7 lines changed or added


 _concurrent_unordered_impl.h   _concurrent_unordered_impl.h 
skipping to change at line 1019 skipping to change at line 1019
raw_const_iterator it = get_bucket(bucket); raw_const_iterator it = get_bucket(bucket);
// Find the end of the bucket, denoted by the dummy element // Find the end of the bucket, denoted by the dummy element
do ++it; do ++it;
while(it != my_solist.raw_end() && !it.get_node_ptr()->is_dummy()); while(it != my_solist.raw_end() && !it.get_node_ptr()->is_dummy());
// Return the first real element past the end of the bucket // Return the first real element past the end of the bucket
return my_solist.first_real_iterator(it); return my_solist.first_real_iterator(it);
} }
const_local_iterator unsafe_cbegin(size_type bucket) const { const_local_iterator unsafe_cbegin(size_type /*bucket*/) const {
return ((const self_type *) this)->begin(); return ((const self_type *) this)->begin();
} }
const_local_iterator unsafe_cend(size_type bucket) const { const_local_iterator unsafe_cend(size_type /*bucket*/) const {
return ((const self_type *) this)->end(); return ((const self_type *) this)->end();
} }
// Hash policy // Hash policy
float load_factor() const { float load_factor() const {
return (float) size() / (float) unsafe_bucket_count(); return (float) size() / (float) unsafe_bucket_count();
} }
float max_load_factor() const { float max_load_factor() const {
return my_maximum_bucket_size; return my_maximum_bucket_size;
skipping to change at line 1384 skipping to change at line 1384
solist_t my_solist ; // List where all the elements are kept solist_t my_solist ; // List where all the elements are kept
typename allocator_type::template rebind<raw_iterator>::other my_alloca tor; // Allocator object for segments typename allocator_type::template rebind<raw_iterator>::other my_alloca tor; // Allocator object for segments
float my_maximu m_bucket_size; // Maximum size of the bucket float my_maximu m_bucket_size; // Maximum size of the bucket
atomic<raw_iterator*> my_bucket s[pointers_per_table]; // The segment table atomic<raw_iterator*> my_bucket s[pointers_per_table]; // The segment table
}; };
#if _MSC_VER #if _MSC_VER
#pragma warning(pop) // warning 4127 -- while (true) has a constant express ion in it #pragma warning(pop) // warning 4127 -- while (true) has a constant express ion in it
#endif #endif
//! Hash multiplier //! Hash multiplier
static const size_t hash_multiplier = sizeof(size_t)==4? 2654435769U : 1140 0714819323198485ULL; static const size_t hash_multiplier = tbb::internal::size_t_select(26544357 69U, 11400714819323198485ULL);
} // namespace internal } // namespace internal
//! @endcond //! @endcond
//! Hasher functions //! Hasher functions
template<typename T> template<typename T>
inline size_t tbb_hasher( const T& t ) { inline size_t tbb_hasher( const T& t ) {
return static_cast<size_t>( t ) * internal::hash_multiplier; return static_cast<size_t>( t ) * internal::hash_multiplier;
} }
template<typename P> template<typename P>
inline size_t tbb_hasher( P* ptr ) { inline size_t tbb_hasher( P* ptr ) {
size_t const h = reinterpret_cast<size_t>( ptr ); size_t const h = reinterpret_cast<size_t>( ptr );
 End of changes. 3 change blocks. 
3 lines changed or deleted 3 lines changed or added


 _flow_graph_tagged_buffer_impl.h   _flow_graph_tagged_buffer_impl.h 
skipping to change at line 77 skipping to change at line 77
size_t my_size; size_t my_size;
size_t nelements; size_t nelements;
element_type** array; element_type** array;
std::vector<element_type, Allocator> *lists; std::vector<element_type, Allocator> *lists;
element_type* free_list; element_type* free_list;
size_t mask() { return my_size - 1; } size_t mask() { return my_size - 1; }
static size_t hash(TagType t) { static size_t hash(TagType t) {
#if __TBB_WORDSIZE == 4 return uintptr_t(t)*tbb::internal::size_t_select(0x9E3779B9,0x9E377
return uintptr_t(t)*0x9E3779B9; 9B97F4A7C15ULL);
#else
return uintptr_t(t)*0x9E3779B97F4A7C15;
#endif
} }
void set_up_free_list( element_type **p_free_list, list_array_type *la, size_t sz) { void set_up_free_list( element_type **p_free_list, list_array_type *la, size_t sz) {
for(size_t i=0; i < sz - 1; ++i ) { // construct free list for(size_t i=0; i < sz - 1; ++i ) { // construct free list
(*la)[i].next = &((*la)[i+1]); (*la)[i].next = &((*la)[i+1]);
(*la)[i].t = NO_TAG; (*la)[i].t = NO_TAG;
} }
(*la)[sz-1].next = NULL; (*la)[sz-1].next = NULL;
*p_free_list = &((*la)[0]); *p_free_list = &((*la)[0]);
} }
 End of changes. 1 change blocks. 
5 lines changed or deleted 2 lines changed or added


 atomic.h   atomic.h 
skipping to change at line 51 skipping to change at line 51
#include "tbb_machine.h" #include "tbb_machine.h"
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
// Workaround for overzealous compiler warnings // Workaround for overzealous compiler warnings
#pragma warning (push) #pragma warning (push)
#pragma warning (disable: 4244 4267) #pragma warning (disable: 4244 4267)
#endif #endif
namespace tbb { namespace tbb {
//! Specifies memory fencing. //! Specifies memory semantics.
enum memory_semantics { enum memory_semantics {
//! Sequentially consistent fence. //! Sequential consistency
full_fence, full_fence,
//! Acquire fence //! Acquire
acquire, acquire,
//! Release fence //! Release
release, release,
//! No ordering //! No ordering
relaxed relaxed
}; };
//! @cond INTERNAL //! @cond INTERNAL
namespace internal { namespace internal {
#if __TBB_ATTRIBUTE_ALIGNED_PRESENT #if __TBB_ATTRIBUTE_ALIGNED_PRESENT
#define __TBB_DECL_ATOMIC_FIELD(t,f,a) t f __attribute__ ((aligned(a)) ); #define __TBB_DECL_ATOMIC_FIELD(t,f,a) t f __attribute__ ((aligned(a)) );
skipping to change at line 89 skipping to change at line 89
typedef int8_t word; typedef int8_t word;
int8_t value; int8_t value;
}; };
template<> template<>
struct atomic_rep<2> { // Specialization struct atomic_rep<2> { // Specialization
typedef int16_t word; typedef int16_t word;
__TBB_DECL_ATOMIC_FIELD(int16_t,value,2) __TBB_DECL_ATOMIC_FIELD(int16_t,value,2)
}; };
template<> template<>
struct atomic_rep<4> { // Specialization struct atomic_rep<4> { // Specialization
#if _MSC_VER && __TBB_WORDSIZE==4 #if _MSC_VER && !_WIN64
// Work-around that avoids spurious /Wp64 warnings // Work-around that avoids spurious /Wp64 warnings
typedef intptr_t word; typedef intptr_t word;
#else #else
typedef int32_t word; typedef int32_t word;
#endif #endif
__TBB_DECL_ATOMIC_FIELD(int32_t,value,4) __TBB_DECL_ATOMIC_FIELD(int32_t,value,4)
}; };
#if __TBB_64BIT_ATOMICS #if __TBB_64BIT_ATOMICS
template<> template<>
struct atomic_rep<8> { // Specialization struct atomic_rep<8> { // Specialization
skipping to change at line 305 skipping to change at line 305
template<memory_semantics M> template<memory_semantics M>
value_type fetch_and_decrement() { value_type fetch_and_decrement() {
return fetch_and_add<M>(__TBB_MINUS_ONE(D)); return fetch_and_add<M>(__TBB_MINUS_ONE(D));
} }
value_type fetch_and_decrement() { value_type fetch_and_decrement() {
return fetch_and_add(__TBB_MINUS_ONE(D)); return fetch_and_add(__TBB_MINUS_ONE(D));
} }
public: public:
value_type operator+=( D addend ) { value_type operator+=( D value ) {
return fetch_and_add(addend)+addend; return fetch_and_add(value)+value;
} }
value_type operator-=( D addend ) { value_type operator-=( D value ) {
// Additive inverse of addend computed using binary minus, // Additive inverse of value computed using binary minus,
// instead of unary minus, for sake of avoiding compiler warnings. // instead of unary minus, for sake of avoiding compiler warnings.
return operator+=(D(0)-addend); return operator+=(D(0)-value);
} }
value_type operator++() { value_type operator++() {
return fetch_and_add(1)+1; return fetch_and_add(1)+1;
} }
value_type operator--() { value_type operator--() {
return fetch_and_add(__TBB_MINUS_ONE(D))-1; return fetch_and_add(__TBB_MINUS_ONE(D))-1;
} }
skipping to change at line 362 skipping to change at line 362
#if __TBB_64BIT_ATOMICS #if __TBB_64BIT_ATOMICS
__TBB_DECL_ATOMIC(__TBB_LONG_LONG) __TBB_DECL_ATOMIC(__TBB_LONG_LONG)
__TBB_DECL_ATOMIC(unsigned __TBB_LONG_LONG) __TBB_DECL_ATOMIC(unsigned __TBB_LONG_LONG)
#else #else
// test_atomic will verify that sizeof(long long)==8 // test_atomic will verify that sizeof(long long)==8
#endif #endif
__TBB_DECL_ATOMIC(long) __TBB_DECL_ATOMIC(long)
__TBB_DECL_ATOMIC(unsigned long) __TBB_DECL_ATOMIC(unsigned long)
#if defined(_MSC_VER) && __TBB_WORDSIZE==4 #if _MSC_VER && !_WIN64
/* Special version of __TBB_DECL_ATOMIC that avoids gratuitous warnings fro m cl /Wp64 option. /* Special version of __TBB_DECL_ATOMIC that avoids gratuitous warnings fro m cl /Wp64 option.
It is identical to __TBB_DECL_ATOMIC(unsigned) except that it replaces o perator=(T) It is identical to __TBB_DECL_ATOMIC(unsigned) except that it replaces o perator=(T)
with an operator=(U) that explicitly converts the U to a T. Types T and U should be with an operator=(U) that explicitly converts the U to a T. Types T and U should be
type synonyms on the platform. Type U should be the wider variant of T from the type synonyms on the platform. Type U should be the wider variant of T from the
perspective of /Wp64. */ perspective of /Wp64. */
#define __TBB_DECL_ATOMIC_ALT(T,U) \ #define __TBB_DECL_ATOMIC_ALT(T,U) \
template<> struct atomic<T>: internal::atomic_impl_with_arithmetic<T,T, char> { \ template<> struct atomic<T>: internal::atomic_impl_with_arithmetic<T,T, char> { \
T operator=( U rhs ) {return store_with_release(T(rhs));} \ T operator=( U rhs ) {return store_with_release(T(rhs));} \
atomic<T>& operator=( const atomic<T>& rhs ) {store_with_release(rh s); return *this;} \ atomic<T>& operator=( const atomic<T>& rhs ) {store_with_release(rh s); return *this;} \
}; };
__TBB_DECL_ATOMIC_ALT(unsigned,size_t) __TBB_DECL_ATOMIC_ALT(unsigned,size_t)
__TBB_DECL_ATOMIC_ALT(int,ptrdiff_t) __TBB_DECL_ATOMIC_ALT(int,ptrdiff_t)
#else #else
__TBB_DECL_ATOMIC(unsigned) __TBB_DECL_ATOMIC(unsigned)
__TBB_DECL_ATOMIC(int) __TBB_DECL_ATOMIC(int)
#endif /* defined(_MSC_VER) && __TBB_WORDSIZE==4 */ #endif /* _MSC_VER && !_WIN64 */
__TBB_DECL_ATOMIC(unsigned short) __TBB_DECL_ATOMIC(unsigned short)
__TBB_DECL_ATOMIC(short) __TBB_DECL_ATOMIC(short)
__TBB_DECL_ATOMIC(char) __TBB_DECL_ATOMIC(char)
__TBB_DECL_ATOMIC(signed char) __TBB_DECL_ATOMIC(signed char)
__TBB_DECL_ATOMIC(unsigned char) __TBB_DECL_ATOMIC(unsigned char)
#if !defined(_MSC_VER)||defined(_NATIVE_WCHAR_T_DEFINED) #if !_MSC_VER || defined(_NATIVE_WCHAR_T_DEFINED)
__TBB_DECL_ATOMIC(wchar_t) __TBB_DECL_ATOMIC(wchar_t)
#endif /* _MSC_VER||!defined(_NATIVE_WCHAR_T_DEFINED) */ #endif /* _MSC_VER||!defined(_NATIVE_WCHAR_T_DEFINED) */
//! Specialization for atomic<T*> with arithmetic and operator->. //! Specialization for atomic<T*> with arithmetic and operator->.
template<typename T> struct atomic<T*>: internal::atomic_impl_with_arithmet ic<T*,ptrdiff_t,T> { template<typename T> struct atomic<T*>: internal::atomic_impl_with_arithmet ic<T*,ptrdiff_t,T> {
T* operator=( T* rhs ) { T* operator=( T* rhs ) {
// "this" required here in strict ISO C++ because store_with_releas e is a dependent name // "this" required here in strict ISO C++ because store_with_releas e is a dependent name
return this->store_with_release(rhs); return this->store_with_release(rhs);
} }
atomic<T*>& operator=( const atomic<T*>& rhs ) { atomic<T*>& operator=( const atomic<T*>& rhs ) {
skipping to change at line 424 skipping to change at line 424
// Helpers to workaround ugly syntax of calling template member function of a // Helpers to workaround ugly syntax of calling template member function of a
// template class with template argument dependent on template parameters. // template class with template argument dependent on template parameters.
template <memory_semantics M, typename T> template <memory_semantics M, typename T>
T load ( const atomic<T>& a ) { return a.template load<M>(); } T load ( const atomic<T>& a ) { return a.template load<M>(); }
template <memory_semantics M, typename T> template <memory_semantics M, typename T>
void store ( atomic<T>& a, T value ) { return a.template store<M>(value); } void store ( atomic<T>& a, T value ) { return a.template store<M>(value); }
namespace interface6{
//! Make an atomic for use in an initialization (list), as an alternative t
o zero-initializaton or normal assignment.
template<typename T>
atomic<T> make_atomic(T t) {
atomic<T> a;
store<relaxed>(a,t);
return a;
}
}
using interface6::make_atomic;
namespace internal {
// only to aid in the gradual conversion of ordinary variables to proper at
omics
template<typename T>
inline atomic<T>& as_atomic( T& t ) {
return (atomic<T>&)t;
}
} // namespace tbb::internal
} // namespace tbb } // namespace tbb
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) #if _MSC_VER && !__INTEL_COMPILER
#pragma warning (pop) #pragma warning (pop)
#endif // warnings 4244, 4267 are back #endif // warnings 4244, 4267 are back
#endif /* __TBB_atomic_H */ #endif /* __TBB_atomic_H */
 End of changes. 13 change blocks. 
14 lines changed or deleted 36 lines changed or added


 cache_aligned_allocator.h   cache_aligned_allocator.h 
skipping to change at line 34 skipping to change at line 34
the GNU General Public License. This exception does not however the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered b y invalidate any other reasons why the executable file might be covered b y
the GNU General Public License. the GNU General Public License.
*/ */
#ifndef __TBB_cache_aligned_allocator_H #ifndef __TBB_cache_aligned_allocator_H
#define __TBB_cache_aligned_allocator_H #define __TBB_cache_aligned_allocator_H
#include <new> #include <new>
#include "tbb_stddef.h" #include "tbb_stddef.h"
#if __TBB_CPP11_RVALUE_REF_PRESENT && !__TBB_CPP11_STD_FORWARD_BROKEN
#include <utility> // std::forward
#endif
namespace tbb { namespace tbb {
//! @cond INTERNAL //! @cond INTERNAL
namespace internal { namespace internal {
//! Cache/sector line size. //! Cache/sector line size.
/** @ingroup memory_allocation */ /** @ingroup memory_allocation */
size_t __TBB_EXPORTED_FUNC NFS_GetLineSize(); size_t __TBB_EXPORTED_FUNC NFS_GetLineSize();
//! Allocate memory on cache/sector line boundary. //! Allocate memory on cache/sector line boundary.
skipping to change at line 102 skipping to change at line 105
void deallocate( pointer p, size_type ) { void deallocate( pointer p, size_type ) {
internal::NFS_Free(p); internal::NFS_Free(p);
} }
//! Largest value for which method allocate might succeed. //! Largest value for which method allocate might succeed.
size_type max_size() const throw() { size_type max_size() const throw() {
return (~size_t(0)-internal::NFS_MaxLineSize)/sizeof(value_type); return (~size_t(0)-internal::NFS_MaxLineSize)/sizeof(value_type);
} }
//! Copy-construct value at location pointed to by p. //! Copy-construct value at location pointed to by p.
#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT && __TBB_CPP11_RVALUE_REF_PRESEN
T
template<typename... Args>
void construct(pointer p, Args&&... args)
#if __TBB_CPP11_STD_FORWARD_BROKEN
{ ::new((void *)p) T((args)...); }
#else
{ ::new((void *)p) T(std::forward<Args>(args)...); }
#endif
#else // __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT && __TBB_CPP11_RVALUE_REF_P
RESENT
void construct( pointer p, const value_type& value ) {::new((void*)(p)) value_type(value);} void construct( pointer p, const value_type& value ) {::new((void*)(p)) value_type(value);}
#endif // __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT && __TBB_CPP11_RVALUE_REF_ PRESENT
//! Destroy value at location pointed to by p. //! Destroy value at location pointed to by p.
void destroy( pointer p ) {p->~value_type();} void destroy( pointer p ) {p->~value_type();}
}; };
#if _MSC_VER && !defined(__INTEL_COMPILER) #if _MSC_VER && !defined(__INTEL_COMPILER)
#pragma warning (pop) #pragma warning (pop)
#endif // warning 4100 is back #endif // warning 4100 is back
//! Analogous to std::allocator<void>, as defined in ISO C++ Standard, Sect ion 20.4.1 //! Analogous to std::allocator<void>, as defined in ISO C++ Standard, Sect ion 20.4.1
 End of changes. 3 change blocks. 
0 lines changed or deleted 15 lines changed or added


 concurrent_hash_map.h   concurrent_hash_map.h 
skipping to change at line 174 skipping to change at line 174
return (segment_index_t(1)<<k & ~segment_index_t(1)); return (segment_index_t(1)<<k & ~segment_index_t(1));
} }
//! @return segment size except for @arg k == 0 //! @return segment size except for @arg k == 0
static size_type segment_size( segment_index_t k ) { static size_type segment_size( segment_index_t k ) {
return size_type(1)<<k; // fake value for k==0 return size_type(1)<<k; // fake value for k==0
} }
//! @return true if @arg ptr is valid pointer //! @return true if @arg ptr is valid pointer
static bool is_valid( void *ptr ) { static bool is_valid( void *ptr ) {
return reinterpret_cast<size_t>(ptr) > size_t(63); return reinterpret_cast<uintptr_t>(ptr) > uintptr_t(63);
} }
//! Initialize buckets //! Initialize buckets
static void init_buckets( segment_ptr_t ptr, size_type sz, bool is_ initial ) { static void init_buckets( segment_ptr_t ptr, size_type sz, bool is_ initial ) {
if( is_initial ) std::memset(ptr, 0, sz*sizeof(bucket) ); if( is_initial ) std::memset(ptr, 0, sz*sizeof(bucket) );
else for(size_type i = 0; i < sz; i++, ptr++) { else for(size_type i = 0; i < sz; i++, ptr++) {
*reinterpret_cast<intptr_t*>(&ptr->mutex) = 0; *reinterpret_cast<intptr_t*>(&ptr->mutex) = 0;
ptr->node_list = rehash_req; ptr->node_list = rehash_req;
} }
} }
//! Add node @arg n to bucket @arg b //! Add node @arg n to bucket @arg b
static void add_to_bucket( bucket *b, node_base *n ) { static void add_to_bucket( bucket *b, node_base *n ) {
__TBB_ASSERT(b->node_list != rehash_req, NULL); __TBB_ASSERT(b->node_list != rehash_req, NULL);
n->next = b->node_list; n->next = b->node_list;
b->node_list = n; // its under lock and flag is set b->node_list = n; // its under lock and flag is set
} }
//! Exception safety helper //! Exception safety helper
struct enable_segment_failsafe { struct enable_segment_failsafe : tbb::internal::no_copy {
segment_ptr_t *my_segment_ptr; segment_ptr_t *my_segment_ptr;
enable_segment_failsafe(segments_table_t &table, segment_index_ t k) : my_segment_ptr(&table[k]) {} enable_segment_failsafe(segments_table_t &table, segment_index_ t k) : my_segment_ptr(&table[k]) {}
~enable_segment_failsafe() { ~enable_segment_failsafe() {
if( my_segment_ptr ) *my_segment_ptr = 0; // indicate no al location in progress if( my_segment_ptr ) *my_segment_ptr = 0; // indicate no al location in progress
} }
}; };
//! Enable segment //! Enable segment
void enable_segment( segment_index_t k, bool is_initial = false ) { void enable_segment( segment_index_t k, bool is_initial = false ) {
__TBB_ASSERT( k, "Zero segment must be embedded" ); __TBB_ASSERT( k, "Zero segment must be embedded" );
skipping to change at line 1065 skipping to change at line 1065
__TBB_ASSERT( item_accessor.my_node, NULL ); __TBB_ASSERT( item_accessor.my_node, NULL );
node_base *const n = item_accessor.my_node; node_base *const n = item_accessor.my_node;
hashcode_t const h = item_accessor.my_hash; hashcode_t const h = item_accessor.my_hash;
hashcode_t m = (hashcode_t) itt_load_word_with_acquire( my_mask ); hashcode_t m = (hashcode_t) itt_load_word_with_acquire( my_mask );
do { do {
// get bucket // get bucket
bucket_accessor b( this, h & m, /*writer=*/true ); bucket_accessor b( this, h & m, /*writer=*/true );
node_base **p = &b()->node_list; node_base **p = &b()->node_list;
while( *p && *p != n ) while( *p && *p != n )
p = &(*p)->next; p = &(*p)->next;
if( !*p ) { // someone else was the first if( !*p ) { // someone else was first
if( check_mask_race( h, m ) ) if( check_mask_race( h, m ) )
continue; continue;
item_accessor.release(); item_accessor.release();
return false; return false;
} }
__TBB_ASSERT( *p == n, NULL ); __TBB_ASSERT( *p == n, NULL );
*p = n->next; // remove from container *p = n->next; // remove from container
my_size--; my_size--;
break; break;
} while(true); } while(true);
skipping to change at line 1132 skipping to change at line 1132
std::swap(this->my_allocator, table.my_allocator); std::swap(this->my_allocator, table.my_allocator);
std::swap(this->my_hash_compare, table.my_hash_compare); std::swap(this->my_hash_compare, table.my_hash_compare);
internal_swap(table); internal_swap(table);
} }
template<typename Key, typename T, typename HashCompare, typename A> template<typename Key, typename T, typename HashCompare, typename A>
void concurrent_hash_map<Key,T,HashCompare,A>::rehash(size_type sz) { void concurrent_hash_map<Key,T,HashCompare,A>::rehash(size_type sz) {
reserve( sz ); // TODO: add reduction of number of buckets as well reserve( sz ); // TODO: add reduction of number of buckets as well
hashcode_t mask = my_mask; hashcode_t mask = my_mask;
hashcode_t b = (mask+1)>>1; // size or first index of the last segment hashcode_t b = (mask+1)>>1; // size or first index of the last segment
__TBB_ASSERT((b&(b-1))==0, NULL); __TBB_ASSERT((b&(b-1))==0, NULL); // zero or power of 2
bucket *bp = get_bucket( b ); // only the last segment should be scanne d for rehashing bucket *bp = get_bucket( b ); // only the last segment should be scanne d for rehashing
for(; b <= mask; b++, bp++ ) { for(; b <= mask; b++, bp++ ) {
node_base *n = bp->node_list; node_base *n = bp->node_list;
__TBB_ASSERT( is_valid(n) || n == internal::empty_rehashed || n == internal::rehash_req, "Broken internal structure" ); __TBB_ASSERT( is_valid(n) || n == internal::empty_rehashed || n == internal::rehash_req, "Broken internal structure" );
__TBB_ASSERT( *reinterpret_cast<intptr_t*>(&bp->mutex) == 0, "concu rrent or unexpectedly terminated operation during rehash() execution" ); __TBB_ASSERT( *reinterpret_cast<intptr_t*>(&bp->mutex) == 0, "concu rrent or unexpectedly terminated operation during rehash() execution" );
if( n == internal::rehash_req ) { // rehash bucket, conditional bec ause rehashing of a previous bucket may affect this one if( n == internal::rehash_req ) { // rehash bucket, conditional bec ause rehashing of a previous bucket may affect this one
hashcode_t h = b; bucket *b_old = bp; hashcode_t h = b; bucket *b_old = bp;
do { do {
__TBB_ASSERT( h > 1, "The lowermost buckets can't be rehash ed" ); __TBB_ASSERT( h > 1, "The lowermost buckets can't be rehash ed" );
hashcode_t m = ( 1u<<__TBB_Log2( h ) ) - 1; // get parent m ask from the topmost bit hashcode_t m = ( 1u<<__TBB_Log2( h ) ) - 1; // get parent m ask from the topmost bit
 End of changes. 5 change blocks. 
7 lines changed or deleted 7 lines changed or added


 concurrent_priority_queue.h   concurrent_priority_queue.h 
skipping to change at line 234 skipping to change at line 234
mark-1 (it may be empty). Then there are 0 or more elements mark-1 (it may be empty). Then there are 0 or more elements
that have not yet been inserted into the heap, in positions that have not yet been inserted into the heap, in positions
mark through my_size-1. */ mark through my_size-1. */
std::vector<value_type, allocator_type> data; std::vector<value_type, allocator_type> data;
void handle_operations(cpq_operation *op_list) { void handle_operations(cpq_operation *op_list) {
cpq_operation *tmp, *pop_list=NULL; cpq_operation *tmp, *pop_list=NULL;
__TBB_ASSERT(mark == data.size(), NULL); __TBB_ASSERT(mark == data.size(), NULL);
// first pass processes all constant time operations: pushes, // First pass processes all constant (amortized; reallocation may h
// tops, some pops. Also reserve. appen) time pushes and pops.
while (op_list) { while (op_list) {
// ITT note: &(op_list->status) tag is used to cover accesses t o op_list // ITT note: &(op_list->status) tag is used to cover accesses t o op_list
// node. This thread is going to handle the operation, and so w ill acquire it // node. This thread is going to handle the operation, and so w ill acquire it
// and perform the associated operation w/o triggering a race c ondition; the // and perform the associated operation w/o triggering a race c ondition; the
// thread that created the operation is waiting on the status f ield, so when // thread that created the operation is waiting on the status f ield, so when
// this thread is done with the operation, it will perform a // this thread is done with the operation, it will perform a
// store_with_release to give control back to the waiting threa d in // store_with_release to give control back to the waiting threa d in
// aggregator::insert_operation. // aggregator::insert_operation.
call_itt_notify(acquired, &(op_list->status)); call_itt_notify(acquired, &(op_list->status));
__TBB_ASSERT(op_list->type != INVALID_OP, NULL); __TBB_ASSERT(op_list->type != INVALID_OP, NULL);
 End of changes. 1 change blocks. 
2 lines changed or deleted 2 lines changed or added


 concurrent_vector.h   concurrent_vector.h 
skipping to change at line 408 skipping to change at line 408
Methods working with memory allocation and/or new elements construction can throw an Methods working with memory allocation and/or new elements construction can throw an
exception if allocator fails to allocate memory or element's default co nstructor throws one. exception if allocator fails to allocate memory or element's default co nstructor throws one.
Concurrent vector's element of type T must conform to the following req uirements: Concurrent vector's element of type T must conform to the following req uirements:
- Throwing an exception is forbidden for destructor of T. - Throwing an exception is forbidden for destructor of T.
- Default constructor of T must not throw an exception OR its non-virtu al destructor must safely work when its object memory is zero-initialized. - Default constructor of T must not throw an exception OR its non-virtu al destructor must safely work when its object memory is zero-initialized.
. .
Otherwise, the program's behavior is undefined. Otherwise, the program's behavior is undefined.
@par @par
If an exception happens inside growth or assignment operation, an insta nce of the vector becomes invalid unless it is stated otherwise in the meth od documentation. If an exception happens inside growth or assignment operation, an insta nce of the vector becomes invalid unless it is stated otherwise in the meth od documentation.
Invalid state means: Invalid state means:
- There are no guaranties that all items were initialized by a construc tor. The rest of items is zero-filled, including item where exception happe ns. - There are no guarantees that all items were initialized by a construc tor. The rest of items is zero-filled, including item where exception happe ns.
- An invalid vector instance cannot be repaired; it is unable to grow a nymore. - An invalid vector instance cannot be repaired; it is unable to grow a nymore.
- Size and capacity reported by the vector are incorrect, and calculate d as if the failed operation were successful. - Size and capacity reported by the vector are incorrect, and calculate d as if the failed operation were successful.
- Attempt to access not allocated elements using operator[] or iterator s results in access violation or segmentation fault exception, and in case of using at() method a C++ exception is thrown. - Attempt to access not allocated elements using operator[] or iterator s results in access violation or segmentation fault exception, and in case of using at() method a C++ exception is thrown.
. .
If a concurrent grow operation successfully completes, all the elements it has added to the vector will remain valid and accessible even if one of subsequent grow operations fails. If a concurrent grow operation successfully completes, all the elements it has added to the vector will remain valid and accessible even if one of subsequent grow operations fails.
@par Fragmentation @par Fragmentation
Unlike an STL vector, a concurrent_vector does not move existing elemen ts if it needs Unlike an STL vector, a concurrent_vector does not move existing elemen ts if it needs
to allocate more memory. The container is divided into a series of cont iguous arrays of to allocate more memory. The container is divided into a series of cont iguous arrays of
elements. The first reservation, growth, or assignment operation determ ines the size of elements. The first reservation, growth, or assignment operation determ ines the size of
skipping to change at line 431 skipping to change at line 431
merges several smaller arrays into one solid. merges several smaller arrays into one solid.
@par Changes since TBB 2.1 @par Changes since TBB 2.1
- Fixed guarantees of concurrent_vector::size() and grow_to_at_least() methods to assure elements are allocated. - Fixed guarantees of concurrent_vector::size() and grow_to_at_least() methods to assure elements are allocated.
- Methods end()/rbegin()/back() are partly thread-safe since they use s ize() to get the end of vector - Methods end()/rbegin()/back() are partly thread-safe since they use s ize() to get the end of vector
- Added resize() methods (not thread-safe) - Added resize() methods (not thread-safe)
- Added cbegin/cend/crbegin/crend methods - Added cbegin/cend/crbegin/crend methods
- Changed return type of methods grow* and push_back to iterator - Changed return type of methods grow* and push_back to iterator
@par Changes since TBB 2.0 @par Changes since TBB 2.0
- Implemented exception-safety guaranties - Implemented exception-safety guarantees
- Added template argument for allocator - Added template argument for allocator
- Added allocator argument in constructors - Added allocator argument in constructors
- Faster index calculation - Faster index calculation
- First growth call specifies a number of segments to be merged in the first allocation. - First growth call specifies a number of segments to be merged in the first allocation.
- Fixed memory blow up for swarm of vector's instances of small size - Fixed memory blow up for swarm of vector's instances of small size
- Added grow_by(size_type n, const_reference t) growth using copying co nstructor to init new items. - Added grow_by(size_type n, const_reference t) growth using copying co nstructor to init new items.
- Added STL-like constructors. - Added STL-like constructors.
- Added operators ==, < and derivatives - Added operators ==, < and derivatives
- Added at() method, approved for using after an exception was thrown i nside the vector - Added at() method, approved for using after an exception was thrown i nside the vector
- Added get_allocator() method. - Added get_allocator() method.
skipping to change at line 615 skipping to change at line 615
size_type grow_by( size_type delta ) { size_type grow_by( size_type delta ) {
return delta ? internal_grow_by( delta, sizeof(T), &initialize_arra y, NULL ) : my_early_size; return delta ? internal_grow_by( delta, sizeof(T), &initialize_arra y, NULL ) : my_early_size;
} }
#else #else
/** Returns iterator pointing to the first new element. */ /** Returns iterator pointing to the first new element. */
iterator grow_by( size_type delta ) { iterator grow_by( size_type delta ) {
return iterator(*this, delta ? internal_grow_by( delta, sizeof(T), &initialize_array, NULL ) : my_early_size); return iterator(*this, delta ? internal_grow_by( delta, sizeof(T), &initialize_array, NULL ) : my_early_size);
} }
#endif #endif
//! Grow by "delta" elements using copying constuctor. //! Grow by "delta" elements using copying constructor.
#if TBB_DEPRECATED #if TBB_DEPRECATED
/** Returns old size. */ /** Returns old size. */
size_type grow_by( size_type delta, const_reference t ) { size_type grow_by( size_type delta, const_reference t ) {
return delta ? internal_grow_by( delta, sizeof(T), &initialize_arra y_by, static_cast<const void*>(&t) ) : my_early_size; return delta ? internal_grow_by( delta, sizeof(T), &initialize_arra y_by, static_cast<const void*>(&t) ) : my_early_size;
} }
#else #else
/** Returns iterator pointing to the first new element. */ /** Returns iterator pointing to the first new element. */
iterator grow_by( size_type delta, const_reference t ) { iterator grow_by( size_type delta, const_reference t ) {
return iterator(*this, delta ? internal_grow_by( delta, sizeof(T), &initialize_array_by, static_cast<const void*>(&t) ) : my_early_size); return iterator(*this, delta ? internal_grow_by( delta, sizeof(T), &initialize_array_by, static_cast<const void*>(&t) ) : my_early_size);
} }
skipping to change at line 692 skipping to change at line 692
reference at( size_type index ) { reference at( size_type index ) {
return internal_subscript_with_exceptions(index); return internal_subscript_with_exceptions(index);
} }
//! Get const reference to element at given index. Throws exceptions on errors. //! Get const reference to element at given index. Throws exceptions on errors.
const_reference at( size_type index ) const { const_reference at( size_type index ) const {
return internal_subscript_with_exceptions(index); return internal_subscript_with_exceptions(index);
} }
//! Get range for iterating with parallel algorithms //! Get range for iterating with parallel algorithms
range_type range( size_t grainsize = 1) { range_type range( size_t grainsize = 1 ) {
return range_type( begin(), end(), grainsize ); return range_type( begin(), end(), grainsize );
} }
//! Get const range for iterating with parallel algorithms //! Get const range for iterating with parallel algorithms
const_range_type range( size_t grainsize = 1 ) const { const_range_type range( size_t grainsize = 1 ) const {
return const_range_type( begin(), end(), grainsize ); return const_range_type( begin(), end(), grainsize );
} }
//--------------------------------------------------------------------- --- //--------------------------------------------------------------------- ---
// Capacity // Capacity
//--------------------------------------------------------------------- --- //--------------------------------------------------------------------- ---
//! Return size of vector. It may include elements under construction //! Return size of vector. It may include elements under construction
size_type size() const { size_type size() const {
size_type sz = my_early_size, cp = internal_capacity(); size_type sz = my_early_size, cp = internal_capacity();
return cp < sz ? cp : sz; return cp < sz ? cp : sz;
} }
//! Return false if vector is not empty or has elements under construct ion at least. //! Return false if vector is not empty or has elements under construct ion at least.
skipping to change at line 894 skipping to change at line 895
const size_type n; const size_type n;
size_type i; size_type i;
internal_loop_guide(size_type ntrials, void *ptr) internal_loop_guide(size_type ntrials, void *ptr)
: array(static_cast<pointer>(ptr)), n(ntrials), i(0) {} : array(static_cast<pointer>(ptr)), n(ntrials), i(0) {}
void init() { for(; i < n; ++i) new( &array[i] ) T(); } void init() { for(; i < n; ++i) new( &array[i] ) T(); }
void init(const void *src) { for(; i < n; ++i) new( &array[i] ) T(* static_cast<const T*>(src)); } void init(const void *src) { for(; i < n; ++i) new( &array[i] ) T(* static_cast<const T*>(src)); }
void copy(const void *src) { for(; i < n; ++i) new( &array[i] ) T(s tatic_cast<const T*>(src)[i]); } void copy(const void *src) { for(; i < n; ++i) new( &array[i] ) T(s tatic_cast<const T*>(src)[i]); }
void assign(const void *src) { for(; i < n; ++i) array[i] = static_ cast<const T*>(src)[i]; } void assign(const void *src) { for(; i < n; ++i) array[i] = static_ cast<const T*>(src)[i]; }
template<class I> void iterate(I &src) { for(; i < n; ++i, ++src) n ew( &array[i] ) T( *src ); } template<class I> void iterate(I &src) { for(; i < n; ++i, ++src) n ew( &array[i] ) T( *src ); }
~internal_loop_guide() { ~internal_loop_guide() {
if(i < n) // if exception raised, do zerroing on the rest of it ems if(i < n) // if exception raised, do zeroing on the rest of ite ms
std::memset(array+i, 0, (n-i)*sizeof(value_type)); std::memset(array+i, 0, (n-i)*sizeof(value_type));
} }
}; };
}; };
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
#pragma warning (push) #pragma warning (push)
#pragma warning (disable: 4701) // potentially uninitialized local variable "old" #pragma warning (disable: 4701) // potentially uninitialized local variable "old"
#endif #endif
template<typename T, class A> template<typename T, class A>
 End of changes. 6 change blocks. 
5 lines changed or deleted 6 lines changed or added


 enumerable_thread_specific.h   enumerable_thread_specific.h 
skipping to change at line 63 skipping to change at line 63
namespace internal { namespace internal {
template<ets_key_usage_type ETS_key_type> template<ets_key_usage_type ETS_key_type>
class ets_base: tbb::internal::no_copy { class ets_base: tbb::internal::no_copy {
protected: protected:
#if _WIN32||_WIN64 #if _WIN32||_WIN64
typedef DWORD key_type; typedef DWORD key_type;
#else #else
typedef pthread_t key_type; typedef pthread_t key_type;
#endif #endif
#if __TBB_GCC_3_3_PROTECTED_BROKEN #if __TBB_PROTECTED_NESTED_CLASS_BROKEN
public: public:
#endif #endif
struct slot; struct slot;
struct array { struct array {
array* next; array* next;
size_t lg_size; size_t lg_size;
slot& at( size_t k ) { slot& at( size_t k ) {
return ((slot*)(void*)(this+1))[k]; return ((slot*)(void*)(this+1))[k];
} }
skipping to change at line 90 skipping to change at line 90
struct slot { struct slot {
key_type key; key_type key;
void* ptr; void* ptr;
bool empty() const {return !key;} bool empty() const {return !key;}
bool match( key_type k ) const {return key==k;} bool match( key_type k ) const {return key==k;}
bool claim( key_type k ) { bool claim( key_type k ) {
__TBB_ASSERT(sizeof(tbb::atomic<key_type>)==sizeof(key_ type), NULL); __TBB_ASSERT(sizeof(tbb::atomic<key_type>)==sizeof(key_ type), NULL);
return tbb::internal::punned_cast<tbb::atomic<key_type> *>(&key)->compare_and_swap(k,0)==0; return tbb::internal::punned_cast<tbb::atomic<key_type> *>(&key)->compare_and_swap(k,0)==0;
} }
}; };
#if __TBB_GCC_3_3_PROTECTED_BROKEN #if __TBB_PROTECTED_NESTED_CLASS_BROKEN
protected: protected:
#endif #endif
static key_type key_of_current_thread() { static key_type key_of_current_thread() {
tbb::tbb_thread::id id = tbb::this_tbb_thread::get_id(); tbb::tbb_thread::id id = tbb::this_tbb_thread::get_id();
key_type k; key_type k;
memcpy( &k, &id, sizeof(k) ); memcpy( &k, &id, sizeof(k) );
return k; return k;
} }
skipping to change at line 123 skipping to change at line 123
std::memset( a+1, 0, n*sizeof(slot) ); std::memset( a+1, 0, n*sizeof(slot) );
return a; return a;
} }
void free(array* a) { void free(array* a) {
size_t n = 1<<(a->lg_size); size_t n = 1<<(a->lg_size);
free_array( (void *)a, size_t(sizeof(array)+n*sizeof(slot)) ); free_array( (void *)a, size_t(sizeof(array)+n*sizeof(slot)) );
} }
static size_t hash( key_type k ) { static size_t hash( key_type k ) {
// Multiplicative hashing. Client should use *upper* bits. // Multiplicative hashing. Client should use *upper* bits.
// casts required for Mac gcc4.* compiler // casts required for Mac gcc4.* compiler
#if __TBB_WORDSIZE == 4 return uintptr_t(k)*tbb::internal::size_t_select(0x9E3779B9
return uintptr_t(k)*0x9E3779B9; ,0x9E3779B97F4A7C15ULL);
#else
return uintptr_t(k)*0x9E3779B97F4A7C15;
#endif
} }
ets_base() {my_root=NULL; my_count=0;} ets_base() {my_root=NULL; my_count=0;}
virtual ~ets_base(); // g++ complains if this is not virtual.. . virtual ~ets_base(); // g++ complains if this is not virtual.. .
void* table_lookup( bool& exists ); void* table_lookup( bool& exists );
void table_clear(); void table_clear();
slot& table_find( key_type k ) { slot& table_find( key_type k ) {
size_t h = hash(k); size_t h = hash(k);
array* r = my_root; array* r = my_root;
size_t mask = r->mask(); size_t mask = r->mask();
 End of changes. 3 change blocks. 
7 lines changed or deleted 4 lines changed or added


 flow_graph.h   flow_graph.h 
skipping to change at line 310 skipping to change at line 310
private: private:
Receiver &my_receiver; Receiver &my_receiver;
Body my_body; Body my_body;
}; };
public: public:
//! Constructs a graph with isolated task_group_context //! Constructs a graph with isolated task_group_context
explicit graph() : my_nodes(NULL), my_nodes_last(NULL) explicit graph() : my_nodes(NULL), my_nodes_last(NULL)
{ {
own_context = true; own_context = true;
cancelled = false;
caught_exception = false;
my_context = new task_group_context(); my_context = new task_group_context();
my_root_task = ( new ( task::allocate_root(*my_context) ) empty_tas k ); my_root_task = ( new ( task::allocate_root(*my_context) ) empty_tas k );
my_root_task->set_ref_count(1); my_root_task->set_ref_count(1);
} }
//! Constructs a graph with use_this_context as context //! Constructs a graph with use_this_context as context
explicit graph(task_group_context& use_this_context) : explicit graph(task_group_context& use_this_context) :
my_context(&use_this_context), my_nodes(NULL), my_nodes_last(NULL) my_context(&use_this_context), my_nodes(NULL), my_nodes_last(NULL)
{ {
own_context = false; own_context = false;
skipping to change at line 370 skipping to change at line 372
that need to block a wait_for_all() on the graph. For example a one -off source. */ that need to block a wait_for_all() on the graph. For example a one -off source. */
template< typename Body > template< typename Body >
void run( Body body ) { void run( Body body ) {
task::enqueue( * new ( task::allocate_additional_child_of( *my_root_ task ) ) task::enqueue( * new ( task::allocate_additional_child_of( *my_root_ task ) )
run_task< Body >( body ) ); run_task< Body >( body ) );
} }
//! Wait until graph is idle and decrement_wait_count calls equals incr ement_wait_count calls. //! Wait until graph is idle and decrement_wait_count calls equals incr ement_wait_count calls.
/** The waiting thread will go off and steal work while it is block in the wait_for_all. */ /** The waiting thread will go off and steal work while it is block in the wait_for_all. */
void wait_for_all() { void wait_for_all() {
if (my_root_task) cancelled = false;
my_root_task->wait_for_all(); caught_exception = false;
my_root_task->set_ref_count(1); if (my_root_task) {
#if TBB_USE_EXCEPTIONS
try {
#endif
my_root_task->wait_for_all();
cancelled = my_context->is_group_execution_cancelled();
#if TBB_USE_EXCEPTIONS
}
catch(...) {
my_root_task->set_ref_count(1);
my_context->reset();
caught_exception = true;
cancelled = true;
throw;
}
#endif
my_root_task->set_ref_count(1);
}
} }
//! Returns the root task of the graph //! Returns the root task of the graph
task * root_task() { task * root_task() {
return my_root_task; return my_root_task;
} }
// ITERATORS // ITERATORS
template<typename C, typename N> template<typename C, typename N>
friend class graph_iterator; friend class graph_iterator;
skipping to change at line 402 skipping to change at line 421
iterator end() { return iterator(this, false); } iterator end() { return iterator(this, false); }
//! start const iterator //! start const iterator
const_iterator begin() const { return const_iterator(this, true); } const_iterator begin() const { return const_iterator(this, true); }
//! end const iterator //! end const iterator
const_iterator end() const { return const_iterator(this, false); } const_iterator end() const { return const_iterator(this, false); }
//! start const iterator //! start const iterator
const_iterator cbegin() const { return const_iterator(this, true); } const_iterator cbegin() const { return const_iterator(this, true); }
//! end const iterator //! end const iterator
const_iterator cend() const { return const_iterator(this, false); } const_iterator cend() const { return const_iterator(this, false); }
//! return status of graph execution
bool is_cancelled() { return cancelled; }
bool exception_thrown() { return caught_exception; }
private: private:
task *my_root_task; task *my_root_task;
task_group_context *my_context; task_group_context *my_context;
bool own_context; bool own_context;
bool cancelled;
bool caught_exception;
graph_node *my_nodes, *my_nodes_last; graph_node *my_nodes, *my_nodes_last;
spin_mutex nodelist_mutex; spin_mutex nodelist_mutex;
void register_node(graph_node *n); void register_node(graph_node *n);
void remove_node(graph_node *n); void remove_node(graph_node *n);
}; };
template <typename C, typename N> template <typename C, typename N>
graph_iterator<C,N>::graph_iterator(C *g, bool begin) : my_graph(g), curren t_node(NULL) graph_iterator<C,N>::graph_iterator(C *g, bool begin) : my_graph(g), curren t_node(NULL)
{ {
if (begin) current_node = my_graph->my_nodes; if (begin) current_node = my_graph->my_nodes;
//else it is an end iterator by default //else it is an end iterator by default
} }
template <typename C, typename N> template <typename C, typename N>
skipping to change at line 795 skipping to change at line 821
continue_node( graph &g, int number_of_predecessors, Body body ) : continue_node( graph &g, int number_of_predecessors, Body body ) :
graph_node(g), internal::continue_input<output_type>( g, number_of_ predecessors, body ) graph_node(g), internal::continue_input<output_type>( g, number_of_ predecessors, body )
{} {}
//! Copy constructor //! Copy constructor
continue_node( const continue_node& src ) : continue_node( const continue_node& src ) :
graph_node(src.my_graph), internal::continue_input<output_type>(src ), graph_node(src.my_graph), internal::continue_input<output_type>(src ),
internal::function_output<Output>() internal::function_output<Output>()
{} {}
bool try_put(const input_type &i) { return internal::continue_input<Out
put>::try_put(i); }
protected: protected:
/* override */ internal::broadcast_cache<output_type> &successors () { return fOutput_type::my_successors; } /* override */ internal::broadcast_cache<output_type> &successors () { return fOutput_type::my_successors; }
}; };
template< typename T > template< typename T >
class overwrite_node : public graph_node, public receiver<T>, public sender <T> { class overwrite_node : public graph_node, public receiver<T>, public sender <T> {
using graph_node::my_graph; using graph_node::my_graph;
public: public:
typedef T input_type; typedef T input_type;
typedef T output_type; typedef T output_type;
 End of changes. 6 change blocks. 
3 lines changed or deleted 32 lines changed or added


 gcc_generic.h   gcc_generic.h 
skipping to change at line 40 skipping to change at line 40
#error Do not #include this internal file directly; use public TBB headers instead. #error Do not #include this internal file directly; use public TBB headers instead.
#endif #endif
#define __TBB_machine_gcc_generic_H #define __TBB_machine_gcc_generic_H
#include <stdint.h> #include <stdint.h>
#include <unistd.h> #include <unistd.h>
#define __TBB_WORDSIZE __SIZEOF_POINTER__ #define __TBB_WORDSIZE __SIZEOF_POINTER__
// For some reason straight mapping does not work on mingw #ifdef __BYTE_ORDER__
#if __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ #if __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
#define __TBB_BIG_ENDIAN 0 #define __TBB_BIG_ENDIAN 1
#elif __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__ #elif __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
#define __TBB_BIG_ENDIAN 1 #define __TBB_BIG_ENDIAN 0
#else #elif __BYTE_ORDER__==__ORDER_PDP_ENDIAN__
#error Unsupported endianness #define __TBB_BIG_ENDIAN -1 // not currently supported
#endif
#endif #endif
/** As this generic implementation has absolutely no information about unde rlying /** As this generic implementation has absolutely no information about unde rlying
hardware, its performance most likely will be sub-optimal because of fu ll memory hardware, its performance most likely will be sub-optimal because of fu ll memory
fence usages where a more lightweight synchronization means (or none at all) fence usages where a more lightweight synchronization means (or none at all)
could suffice. Thus if you use this header to enable TBB on a new platf orm, could suffice. Thus if you use this header to enable TBB on a new platf orm,
consider forking it and relaxing below helpers as appropriate. **/ consider forking it and relaxing below helpers as appropriate. **/
#define __TBB_acquire_consistency_helper() __sync_synchronize() #define __TBB_acquire_consistency_helper() __sync_synchronize()
#define __TBB_release_consistency_helper() __sync_synchronize() #define __TBB_release_consistency_helper() __sync_synchronize()
#define __TBB_full_memory_fence() __sync_synchronize() #define __TBB_full_memory_fence() __sync_synchronize()
skipping to change at line 76 skipping to change at line 77
} \ } \
__TBB_MACHINE_DEFINE_ATOMICS(1,int8_t) __TBB_MACHINE_DEFINE_ATOMICS(1,int8_t)
__TBB_MACHINE_DEFINE_ATOMICS(2,int16_t) __TBB_MACHINE_DEFINE_ATOMICS(2,int16_t)
__TBB_MACHINE_DEFINE_ATOMICS(4,int32_t) __TBB_MACHINE_DEFINE_ATOMICS(4,int32_t)
__TBB_MACHINE_DEFINE_ATOMICS(8,int64_t) __TBB_MACHINE_DEFINE_ATOMICS(8,int64_t)
#undef __TBB_MACHINE_DEFINE_ATOMICS #undef __TBB_MACHINE_DEFINE_ATOMICS
namespace tbb{ namespace internal { namespace gcc_builtins { namespace tbb{ namespace internal { namespace gcc_builtins {
int clz(unsigned int x){ return __builtin_clz(x);}; inline int clz(unsigned int x){ return __builtin_clz(x);};
int clz(unsigned long int x){ return __builtin_clzl(x);}; inline int clz(unsigned long int x){ return __builtin_clzl(x);};
inline int clz(unsigned long long int x){ return __builtin_clzll(x);};
}}} }}}
//gcc __builtin_clz builtin count _number_ of leading zeroes //gcc __builtin_clz builtin count _number_ of leading zeroes
static inline intptr_t __TBB_machine_lg( uintptr_t x ) { static inline intptr_t __TBB_machine_lg( uintptr_t x ) {
return sizeof(x)*8 - tbb::internal::gcc_builtins::clz(x) -1 ; return sizeof(x)*8 - tbb::internal::gcc_builtins::clz(x) -1 ;
} }
static inline void __TBB_machine_or( volatile void *ptr, uintptr_t addend ) { static inline void __TBB_machine_or( volatile void *ptr, uintptr_t addend ) {
__sync_fetch_and_or(reinterpret_cast<volatile uintptr_t *>(ptr),addend) ; __sync_fetch_and_or(reinterpret_cast<volatile uintptr_t *>(ptr),addend) ;
} }
skipping to change at line 114 skipping to change at line 116
// Machine specific atomic operations // Machine specific atomic operations
#define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V) #define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V)
#define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V) #define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V)
#define __TBB_TryLockByte __TBB_machine_try_lock_byte #define __TBB_TryLockByte __TBB_machine_try_lock_byte
#define __TBB_UnlockByte __TBB_machine_unlock_byte #define __TBB_UnlockByte __TBB_machine_unlock_byte
// Definition of other functions // Definition of other functions
#define __TBB_Log2(V) __TBB_machine_lg(V) #define __TBB_Log2(V) __TBB_machine_lg(V)
#define __TBB_USE_GENERIC_FETCH_STORE 1 #define __TBB_USE_GENERIC_FETCH_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1 #define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1 #define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
#if __TBB_WORDSIZE==4 #if __TBB_WORDSIZE==4
#define __TBB_USE_GENERIC_DWORD_LOAD_STORE 1 #define __TBB_USE_GENERIC_DWORD_LOAD_STORE 1
#endif #endif
 End of changes. 4 change blocks. 
13 lines changed or deleted 16 lines changed or added


 ibm_aix51.h   ibm_aix51.h 
skipping to change at line 38 skipping to change at line 38
// TODO: revise by comparing with mac_ppc.h // TODO: revise by comparing with mac_ppc.h
#if !defined(__TBB_machine_H) || defined(__TBB_machine_ibm_aix51_H) #if !defined(__TBB_machine_H) || defined(__TBB_machine_ibm_aix51_H)
#error Do not #include this internal file directly; use public TBB headers instead. #error Do not #include this internal file directly; use public TBB headers instead.
#endif #endif
#define __TBB_machine_ibm_aix51_H #define __TBB_machine_ibm_aix51_H
#define __TBB_WORDSIZE 8 #define __TBB_WORDSIZE 8
#define __TBB_BIG_ENDIAN 1 #define __TBB_BIG_ENDIAN 1 // assumption based on operating system
#include <stdint.h> #include <stdint.h>
#include <unistd.h> #include <unistd.h>
#include <sched.h> #include <sched.h>
extern "C" { extern "C" {
int32_t __TBB_machine_cas_32 (volatile void* ptr, int32_t value, int32_t co mparand); int32_t __TBB_machine_cas_32 (volatile void* ptr, int32_t value, int32_t co mparand);
int64_t __TBB_machine_cas_64 (volatile void* ptr, int64_t value, int64_t co mparand); int64_t __TBB_machine_cas_64 (volatile void* ptr, int64_t value, int64_t co mparand);
void __TBB_machine_flush (); void __TBB_machine_flush ();
void __TBB_machine_lwsync (); void __TBB_machine_lwsync ();
void __TBB_machine_isync (); void __TBB_machine_isync ();
} }
// Mapping of old entry point names retained for the sake of backward binar y compatibility // Mapping of old entry point names retained for the sake of backward binar y compatibility
#define __TBB_machine_cmpswp4 __TBB_machine_cas_32 #define __TBB_machine_cmpswp4 __TBB_machine_cas_32
#define __TBB_machine_cmpswp8 __TBB_machine_cas_64 #define __TBB_machine_cmpswp8 __TBB_machine_cas_64
#define __TBB_Yield() sched_yield() #define __TBB_Yield() sched_yield()
#define __TBB_USE_GENERIC_PART_WORD_CAS 1 #define __TBB_USE_GENERIC_PART_WORD_CAS 1
#define __TBB_USE_GENERIC_FETCH_ADD 1 #define __TBB_USE_GENERIC_FETCH_ADD 1
#define __TBB_USE_GENERIC_FETCH_STORE 1 #define __TBB_USE_GENERIC_FETCH_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1 #define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1 #define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
#if __GNUC__ #if __GNUC__
#define __TBB_control_consistency_helper() __asm__ __volatile__( "isync ": : :"memory") #define __TBB_control_consistency_helper() __asm__ __volatile__( "isync ": : :"memory")
#define __TBB_acquire_consistency_helper() __asm__ __volatile__("lwsync ": : :"memory") #define __TBB_acquire_consistency_helper() __asm__ __volatile__("lwsync ": : :"memory")
#define __TBB_release_consistency_helper() __asm__ __volatile__("lwsync ": : :"memory") #define __TBB_release_consistency_helper() __asm__ __volatile__("lwsync ": : :"memory")
#define __TBB_full_memory_fence() __asm__ __volatile__( "sync ": : :"memory") #define __TBB_full_memory_fence() __asm__ __volatile__( "sync ": : :"memory")
#else #else
// IBM C++ Compiler does not support inline assembly // IBM C++ Compiler does not support inline assembly
// TODO: Since XL 9.0 or earlier GCC syntax is supported. Replace with more // TODO: Since XL 9.0 or earlier GCC syntax is supported. Replace with more
// lightweight implementation (like in mac_ppc.h) // lightweight implementation (like in mac_ppc.h)
 End of changes. 2 change blocks. 
6 lines changed or deleted 7 lines changed or added


 index.html   index.html 
<HTML> <HTML>
<BODY> <BODY>
<H2>Overview</H2> <H2>Overview</H2>
Include files for Threading Building Blocks. Include files for Intel&reg; Threading Building Blocks (Intel&reg; TBB).
<H2>Directories</H2> <H2>Directories</H2>
<DL> <DL>
<DT><A HREF="tbb/index.html">tbb</A> <DT><A HREF="tbb/index.html">tbb</A>
<DD>Include files for Threading Building Blocks classes and functions. <DD>Include files for Intel TBB classes and functions.
</DL> </DL>
<HR> <HR>
<A HREF="../index.html">Up to parent directory</A> <A HREF="../index.html">Up to parent directory</A>
<p></p> <p></p>
Copyright &copy; 2005-2012 Intel Corporation. All Rights Reserved. Copyright &copy; 2005-2012 Intel Corporation. All Rights Reserved.
<P></P> <P></P>
Intel is a registered trademark or trademark of Intel Corporation Intel is a registered trademark or trademark of Intel Corporation
or its subsidiaries in the United States and other countries. or its subsidiaries in the United States and other countries.
<p></p> <p></p>
 End of changes. 2 change blocks. 
2 lines changed or deleted 2 lines changed or added


 linux_ia32.h   linux_ia32.h 
skipping to change at line 95 skipping to change at line 95
__TBB_MACHINE_DEFINE_ATOMICS(1,int8_t,"","=q") __TBB_MACHINE_DEFINE_ATOMICS(1,int8_t,"","=q")
__TBB_MACHINE_DEFINE_ATOMICS(2,int16_t,"","=r") __TBB_MACHINE_DEFINE_ATOMICS(2,int16_t,"","=r")
__TBB_MACHINE_DEFINE_ATOMICS(4,int32_t,"l","=r") __TBB_MACHINE_DEFINE_ATOMICS(4,int32_t,"l","=r")
#if __INTEL_COMPILER #if __INTEL_COMPILER
#pragma warning( push ) #pragma warning( push )
// reference to EBX in a function requiring stack alignment // reference to EBX in a function requiring stack alignment
#pragma warning( disable: 998 ) #pragma warning( disable: 998 )
#endif #endif
static inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t va static inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t va
lue, int64_t comparand ) lue, int64_t comparand ) {
{ #if __TBB_GCC_BUILTIN_ATOMICS_PRESENT
return __sync_val_compare_and_swap( reinterpret_cast<volatile int64_t*>
(ptr), comparand, value );
#else /* !__TBB_GCC_BUILTIN_ATOMICS_PRESENT */
int64_t result; int64_t result;
union { union {
int64_t i64; int64_t i64;
int32_t i32[2]; int32_t i32[2];
}; };
i64 = value; i64 = value;
#if __PIC__ #if __PIC__
/* compiling position-independent code */ /* compiling position-independent code */
// EBX register preserved for compliance with position-independent code rules on IA32 // EBX register preserved for compliance with position-independent code rules on IA32
int32_t tmp; int32_t tmp;
skipping to change at line 142 skipping to change at line 144
__asm__ __volatile__ ( __asm__ __volatile__ (
"lock\n\t cmpxchg8b %1\n\t" "lock\n\t cmpxchg8b %1\n\t"
: "=A"(result), "=m"(*(__TBB_VOLATILE int64_t *)ptr) : "=A"(result), "=m"(*(__TBB_VOLATILE int64_t *)ptr)
: "m"(*(__TBB_VOLATILE int64_t *)ptr) : "m"(*(__TBB_VOLATILE int64_t *)ptr)
, "0"(comparand) , "0"(comparand)
, "b"(i32[0]), "c"(i32[1]) , "b"(i32[0]), "c"(i32[1])
: "memory" : "memory"
); );
#endif /* __PIC__ */ #endif /* __PIC__ */
return result; return result;
#endif /* !__TBB_GCC_BUILTIN_ATOMICS_PRESENT */
} }
#if __INTEL_COMPILER #if __INTEL_COMPILER
#pragma warning( pop ) #pragma warning( pop )
#endif // warning 998 is back #endif // warning 998 is back
static inline int32_t __TBB_machine_lg( uint32_t x ) { static inline int32_t __TBB_machine_lg( uint32_t x ) {
int32_t j; int32_t j;
__asm__ ("bsr %1,%0" : "=r"(j) : "r"(x)); __asm__ ("bsr %1,%0" : "=r"(j) : "r"(x));
return j; return j;
skipping to change at line 169 skipping to change at line 172
__asm__ __volatile__("lock\nandl %1,%0" : "=m"(*(__TBB_VOLATILE uint32_ t *)ptr) : "r"(addend), "m"(*(__TBB_VOLATILE uint32_t *)ptr) : "memory"); __asm__ __volatile__("lock\nandl %1,%0" : "=m"(*(__TBB_VOLATILE uint32_ t *)ptr) : "r"(addend), "m"(*(__TBB_VOLATILE uint32_t *)ptr) : "memory");
} }
static inline void __TBB_machine_pause( int32_t delay ) { static inline void __TBB_machine_pause( int32_t delay ) {
for (int32_t i = 0; i < delay; i++) { for (int32_t i = 0; i < delay; i++) {
__asm__ __volatile__("pause;"); __asm__ __volatile__("pause;");
} }
return; return;
} }
//TODO: Check if it possible and profitable for IA-32 on (Linux and Windows
)
//to use of 64-bit load/store via floating point registers together with fu
ll fence
//for sequentially consistent load/store, instead of CAS.
#if __clang__
#define __TBB_fildq "fildll"
#define __TBB_fistpq "fistpll"
#else
#define __TBB_fildq "fildq"
#define __TBB_fistpq "fistpq"
#endif
static inline int64_t __TBB_machine_load8 (const volatile void *ptr) { static inline int64_t __TBB_machine_load8 (const volatile void *ptr) {
int64_t result; int64_t result;
if( ((uint32_t)ptr&7u)==0 ) { if( ((uint32_t)ptr&7u)==0 ) {
// Aligned load // Aligned load
__asm__ __volatile__ ( "fildq %1\n\t" __asm__ __volatile__ ( __TBB_fildq " %1\n\t"
"fistpq %0" : "=m"(result) : "m"(*(const __ __TBB_fistpq " %0" : "=m"(result) : "m"(*(c
TBB_VOLATILE uint64_t*)ptr) : "memory" ); onst __TBB_VOLATILE uint64_t*)ptr) : "memory" );
} else { } else {
// Unaligned load // Unaligned load
result = __TBB_machine_cmpswp8(const_cast<void*>(ptr),0,0); result = __TBB_machine_cmpswp8(const_cast<void*>(ptr),0,0);
} }
return result; return result;
} }
//! Handles misaligned 8-byte store //! Handles misaligned 8-byte store
/** Defined in tbb_misc.cpp */ /** Defined in tbb_misc.cpp */
extern "C" void __TBB_machine_store8_slow( volatile void *ptr, int64_t valu e ); extern "C" void __TBB_machine_store8_slow( volatile void *ptr, int64_t valu e );
extern "C" void __TBB_machine_store8_slow_perf_warning( volatile void *ptr ); extern "C" void __TBB_machine_store8_slow_perf_warning( volatile void *ptr );
static inline void __TBB_machine_store8(volatile void *ptr, int64_t value) { static inline void __TBB_machine_store8(volatile void *ptr, int64_t value) {
if( ((uint32_t)ptr&7u)==0 ) { if( ((uint32_t)ptr&7u)==0 ) {
// Aligned store // Aligned store
__asm__ __volatile__ ( "fildq %1\n\t" __asm__ __volatile__ ( __TBB_fildq " %1\n\t"
"fistpq %0" : "=m"(*(__TBB_VOLATILE int64_t __TBB_fistpq " %0" : "=m"(*(__TBB_VOLATILE
*)ptr) : "m"(value) : "memory" ); int64_t*)ptr) : "m"(value) : "memory" );
} else { } else {
// Unaligned store // Unaligned store
#if TBB_USE_PERFORMANCE_WARNINGS #if TBB_USE_PERFORMANCE_WARNINGS
__TBB_machine_store8_slow_perf_warning(ptr); __TBB_machine_store8_slow_perf_warning(ptr);
#endif /* TBB_USE_PERFORMANCE_WARNINGS */ #endif /* TBB_USE_PERFORMANCE_WARNINGS */
__TBB_machine_store8_slow(ptr,value); __TBB_machine_store8_slow(ptr,value);
} }
} }
// Machine specific atomic operations // Machine specific atomic operations
#define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V) #define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V)
#define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V) #define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V)
// Definition of other functions // Definition of other functions
#define __TBB_Pause(V) __TBB_machine_pause(V) #define __TBB_Pause(V) __TBB_machine_pause(V)
#define __TBB_Log2(V) __TBB_machine_lg(V) #define __TBB_Log2(V) __TBB_machine_lg(V)
#define __TBB_USE_GENERIC_DWORD_FETCH_ADD 1 #define __TBB_USE_GENERIC_DWORD_FETCH_ADD 1
#define __TBB_USE_GENERIC_DWORD_FETCH_STORE 1 #define __TBB_USE_GENERIC_DWORD_FETCH_STORE 1
#define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1 #define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1 #define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1 #define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
// API to retrieve/update FPU control setting // API to retrieve/update FPU control setting
#define __TBB_CPU_CTL_ENV_PRESENT 1 #define __TBB_CPU_CTL_ENV_PRESENT 1
struct __TBB_cpu_ctl_env_t { struct __TBB_cpu_ctl_env_t {
int mxcsr; int mxcsr;
short x87cw; short x87cw;
}; };
inline void __TBB_get_cpu_ctl_env ( __TBB_cpu_ctl_env_t* ctl ) { inline void __TBB_get_cpu_ctl_env ( __TBB_cpu_ctl_env_t* ctl ) {
 End of changes. 6 change blocks. 
14 lines changed or deleted 33 lines changed or added


 linux_ia64.h   linux_ia64.h 
skipping to change at line 179 skipping to change at line 179
namespace tbb { namespace tbb {
namespace internal { namespace internal {
__TBB_MACHINE_DEFINE_ATOMICS(1,int8_t); __TBB_MACHINE_DEFINE_ATOMICS(1,int8_t);
__TBB_MACHINE_DEFINE_ATOMICS(2,int16_t); __TBB_MACHINE_DEFINE_ATOMICS(2,int16_t);
__TBB_MACHINE_DEFINE_ATOMICS(4,int32_t); __TBB_MACHINE_DEFINE_ATOMICS(4,int32_t);
__TBB_MACHINE_DEFINE_ATOMICS(8,int64_t); __TBB_MACHINE_DEFINE_ATOMICS(8,int64_t);
}} // namespaces internal, tbb }} // namespaces internal, tbb
#undef __TBB_MACHINE_DEFINE_ATOMICS #undef __TBB_MACHINE_DEFINE_ATOMICS
#define __TBB_USE_FENCED_ATOMICS 1 #define __TBB_USE_FENCED_ATOMICS 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1 #define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
// Definition of Lock functions // Definition of Lock functions
#define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P) #define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P)
#define __TBB_LockByte(P) __TBB_machine_lockbyte(P) #define __TBB_LockByte(P) __TBB_machine_lockbyte(P)
// Definition of other utility functions // Definition of other utility functions
#define __TBB_Pause(V) __TBB_machine_pause(V) #define __TBB_Pause(V) __TBB_machine_pause(V)
#define __TBB_Log2(V) __TBB_machine_lg(V) #define __TBB_Log2(V) __TBB_machine_lg(V)
 End of changes. 1 change blocks. 
2 lines changed or deleted 3 lines changed or added


 linux_intel64.h   linux_intel64.h 
skipping to change at line 90 skipping to change at line 90
} \ } \
__TBB_MACHINE_DEFINE_ATOMICS(1,int8_t,"") __TBB_MACHINE_DEFINE_ATOMICS(1,int8_t,"")
__TBB_MACHINE_DEFINE_ATOMICS(2,int16_t,"") __TBB_MACHINE_DEFINE_ATOMICS(2,int16_t,"")
__TBB_MACHINE_DEFINE_ATOMICS(4,int32_t,"") __TBB_MACHINE_DEFINE_ATOMICS(4,int32_t,"")
__TBB_MACHINE_DEFINE_ATOMICS(8,int64_t,"q") __TBB_MACHINE_DEFINE_ATOMICS(8,int64_t,"q")
#undef __TBB_MACHINE_DEFINE_ATOMICS #undef __TBB_MACHINE_DEFINE_ATOMICS
static inline int64_t __TBB_machine_lg( uint64_t x ) { static inline int64_t __TBB_machine_lg( uint64_t x ) {
__TBB_ASSERT(x, "__TBB_Log2(0) undefined");
int64_t j; int64_t j;
__asm__ ("bsr %1,%0" : "=r"(j) : "r"(x)); __asm__ ("bsr %1,%0" : "=r"(j) : "r"(x));
return j; return j;
} }
static inline void __TBB_machine_or( volatile void *ptr, uint64_t addend ) static inline void __TBB_machine_or( volatile void *ptr, uint64_t value ) {
{ __asm__ __volatile__("lock\norq %1,%0" : "=m"(*(volatile uint64_t*)ptr)
__asm__ __volatile__("lock\norq %1,%0" : "=m"(*(volatile uint64_t*)ptr) : "r"(value), "m"(*(volatile uint64_t*)ptr) : "memory");
: "r"(addend), "m"(*(volatile uint64_t*)ptr) : "memory");
} }
static inline void __TBB_machine_and( volatile void *ptr, uint64_t addend ) static inline void __TBB_machine_and( volatile void *ptr, uint64_t value )
{ {
__asm__ __volatile__("lock\nandq %1,%0" : "=m"(*(volatile uint64_t*)ptr __asm__ __volatile__("lock\nandq %1,%0" : "=m"(*(volatile uint64_t*)ptr
) : "r"(addend), "m"(*(volatile uint64_t*)ptr) : "memory"); ) : "r"(value), "m"(*(volatile uint64_t*)ptr) : "memory");
} }
#define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V) #define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V)
#define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V) #define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V)
// Definition of other functions // Definition of other functions
#ifndef __TBB_Pause #ifndef __TBB_Pause
static inline void __TBB_machine_pause( int32_t delay ) { static inline void __TBB_machine_pause( int32_t delay ) {
for (int32_t i = 0; i < delay; i++) { for (int32_t i = 0; i < delay; i++) {
__asm__ __volatile__("pause;"); __asm__ __volatile__("pause;");
} }
return; return;
} }
#define __TBB_Pause(V) __TBB_machine_pause(V) #define __TBB_Pause(V) __TBB_machine_pause(V)
#endif /* !__TBB_Pause */ #endif /* !__TBB_Pause */
#define __TBB_Log2(V) __TBB_machine_lg(V) #define __TBB_Log2(V) __TBB_machine_lg(V)
#define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1 #define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1 #define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1 #define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
// API to retrieve/update FPU control setting // API to retrieve/update FPU control setting
#ifndef __TBB_CPU_CTL_ENV_PRESENT #ifndef __TBB_CPU_CTL_ENV_PRESENT
#define __TBB_CPU_CTL_ENV_PRESENT 1 #define __TBB_CPU_CTL_ENV_PRESENT 1
struct __TBB_cpu_ctl_env_t { struct __TBB_cpu_ctl_env_t {
int mxcsr; int mxcsr;
short x87cw; short x87cw;
}; };
 End of changes. 4 change blocks. 
11 lines changed or deleted 12 lines changed or added


 mac_ppc.h   mac_ppc.h 
skipping to change at line 50 skipping to change at line 50
// Note that XL V9.0 (sometimes?) has trouble dealing with empty input and/ or clobber lists, so they should be avoided. // Note that XL V9.0 (sometimes?) has trouble dealing with empty input and/ or clobber lists, so they should be avoided.
#if __powerpc64__ || __ppc64__ #if __powerpc64__ || __ppc64__
// IBM XL documents __powerpc64__ (and __PPC64__). // IBM XL documents __powerpc64__ (and __PPC64__).
// Apple documents __ppc64__ (with __ppc__ only on 32-bit). // Apple documents __ppc64__ (with __ppc__ only on 32-bit).
#define __TBB_WORDSIZE 8 #define __TBB_WORDSIZE 8
#else #else
#define __TBB_WORDSIZE 4 #define __TBB_WORDSIZE 4
#endif #endif
#ifndef __BYTE_ORDER__
// Hopefully endianness can be validly determined at runtime.
// This may silently fail in some embedded systems with page-specific e
ndianness.
#elif __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
#define __TBB_BIG_ENDIAN 1
#elif __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__
#define __TBB_BIG_ENDIAN 0
#else
#define __TBB_BIG_ENDIAN -1 // not currently supported
#endif
// On Power Architecture, (lock-free) 64-bit atomics require 64-bit hardwar e: // On Power Architecture, (lock-free) 64-bit atomics require 64-bit hardwar e:
#if __TBB_WORDSIZE==8 #if __TBB_WORDSIZE==8
// Do not change the following definition, because TBB itself will use 64-bit atomics in 64-bit builds. // Do not change the following definition, because TBB itself will use 64-bit atomics in 64-bit builds.
#define __TBB_64BIT_ATOMICS 1 #define __TBB_64BIT_ATOMICS 1
#elif __bgp__ #elif __bgp__
// Do not change the following definition on known 32-bit hardware. // Do not change the following definition, because this is known 32-bit hardware.
#define __TBB_64BIT_ATOMICS 0 #define __TBB_64BIT_ATOMICS 0
#else #else
// To enable 64-bit atomics in 32-bit builds, set the value below to 1 instead of 0. // To enable 64-bit atomics in 32-bit builds, set the value below to 1 instead of 0.
// You must make certain that the program will only use them on actual 64-bit hardware // You must make certain that the program will only use them on actual 64-bit hardware
// (which typically means that the entire program is only executed on s uch hardware), // (which typically means that the entire program is only executed on s uch hardware),
// because their implementation involves machine instructions that are illegal elsewhere. // because their implementation involves machine instructions that are illegal elsewhere.
// The setting can be chosen independently per compilation unit, // The setting can be chosen independently per compilation unit,
// which also means that TBB itself does not need to be rebuilt. // which also means that TBB itself does not need to be rebuilt.
// Alternatively (but only for the current architecture and TBB version ), // Alternatively (but only for the current architecture and TBB version ),
// override the default as a predefined macro when invoking the compile r. // override the default as a predefined macro when invoking the compile r.
skipping to change at line 151 skipping to change at line 162
, [cmp] "=&r"(comparand_register) , [cmp] "=&r"(comparand_register)
, "+m"(* (int64_t*) ptr) /* redundant with "memory" */ , "+m"(* (int64_t*) ptr) /* redundant with "memory" */
: [ptr] "r"(ptr) : [ptr] "r"(ptr)
, [valm]"m"(value) , [valm]"m"(value)
, [cmpm]"m"(comparand) , [cmpm]"m"(comparand)
: "memory" /* compiler full f ence */ : "memory" /* compiler full f ence */
, "cr0" /* clobbered by cm pd and/or stdcx. */ , "cr0" /* clobbered by cm pd and/or stdcx. */
); );
return result; return result;
} }
#endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */ #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
#define __TBB_MACHINE_DEFINE_LOAD_STORE(S,ldx,stx,cmpx) \ #define __TBB_MACHINE_DEFINE_LOAD_STORE(S,ldx,stx,cmpx) \
template <typename T> \ template <typename T> \
struct machine_load_store<T,S> { \ struct machine_load_store<T,S> { \
static inline T load_with_acquire(const volatile T& location) { \ static inline T load_with_acquire(const volatile T& location) { \
T result; \ T result; \
__asm__ __volatile__(ldx " %[res],0(%[ptr])\n" \ __asm__ __volatile__(ldx " %[res],0(%[ptr])\n" \
"0:\n\t" \ "0:\n\t" \
cmpx " %[res],%[res]\n\t" \ cmpx " %[res],%[res]\n\t" \
skipping to change at line 278 skipping to change at line 290
} }
}; };
#define __TBB_machine_load_store_relaxed_8 #define __TBB_machine_load_store_relaxed_8
#endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */ #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
}} // namespaces internal, tbb }} // namespaces internal, tbb
#undef __TBB_MACHINE_DEFINE_LOAD_STORE #undef __TBB_MACHINE_DEFINE_LOAD_STORE
#define __TBB_USE_GENERIC_PART_WORD_CAS 1 #define __TBB_USE_GENERIC_PART_WORD_CAS 1
#define __TBB_USE_GENERIC_FETCH_ADD 1 #define __TBB_USE_GENERIC_FETCH_ADD 1
#define __TBB_USE_GENERIC_FETCH_STORE 1 #define __TBB_USE_GENERIC_FETCH_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
#define __TBB_control_consistency_helper() __asm__ __volatile__("isync": : :"memory") #define __TBB_control_consistency_helper() __asm__ __volatile__("isync": : :"memory")
#define __TBB_full_memory_fence() __asm__ __volatile__( "sync": : :"memory") #define __TBB_full_memory_fence() __asm__ __volatile__( "sync": : :"memory")
static inline intptr_t __TBB_machine_lg( uintptr_t x ) { static inline intptr_t __TBB_machine_lg( uintptr_t x ) {
__TBB_ASSERT(x, "__TBB_Log2(0) undefined");
// cntlzd/cntlzw starts counting at 2^63/2^31 (ignoring any higher-orde r bits), and does not affect cr0 // cntlzd/cntlzw starts counting at 2^63/2^31 (ignoring any higher-orde r bits), and does not affect cr0
#if __TBB_WORDSIZE==8 #if __TBB_WORDSIZE==8
__asm__ __volatile__ ("cntlzd %0,%0" : "+r"(x)); __asm__ __volatile__ ("cntlzd %0,%0" : "+r"(x));
return 63-static_cast<intptr_t>(x); return 63-static_cast<intptr_t>(x);
#else #else
__asm__ __volatile__ ("cntlzw %0,%0" : "+r"(x)); __asm__ __volatile__ ("cntlzw %0,%0" : "+r"(x));
return 31-static_cast<intptr_t>(x); return 31-static_cast<intptr_t>(x);
#endif #endif
} }
#define __TBB_Log2(V) __TBB_machine_lg(V) #define __TBB_Log2(V) __TBB_machine_lg(V)
 End of changes. 5 change blocks. 
4 lines changed or deleted 19 lines changed or added


 macos_common.h   macos_common.h 
skipping to change at line 92 skipping to change at line 92
#define __TBB_machine_cmpswp8 __TBB_machine_cmpswp8_OsX #define __TBB_machine_cmpswp8 __TBB_machine_cmpswp8_OsX
#endif /* __TBB_UnknownArchitecture || __TBB_WORDSIZE==4 */ #endif /* __TBB_UnknownArchitecture || __TBB_WORDSIZE==4 */
#if __TBB_UnknownArchitecture #if __TBB_UnknownArchitecture
#ifndef __TBB_WORDSIZE #ifndef __TBB_WORDSIZE
#define __TBB_WORDSIZE 4 #define __TBB_WORDSIZE 4
#endif #endif
#define __TBB_BIG_ENDIAN __BIG_ENDIAN__ #ifdef __TBB_BIG_ENDIAN
// Already determined based on hardware architecture.
#elif __BIG_ENDIAN__
#define __TBB_BIG_ENDIAN 1
#elif __LITTLE_ENDIAN__
#define __TBB_BIG_ENDIAN 0
#else
#define __TBB_BIG_ENDIAN -1 // not currently supported
#endif
/** As this generic implementation has absolutely no information about unde rlying /** As this generic implementation has absolutely no information about unde rlying
hardware, its performance most likely will be sub-optimal because of fu ll memory hardware, its performance most likely will be sub-optimal because of fu ll memory
fence usages where a more lightweight synchronization means (or none at all) fence usages where a more lightweight synchronization means (or none at all)
could suffice. Thus if you use this header to enable TBB on a new platf orm, could suffice. Thus if you use this header to enable TBB on a new platf orm,
consider forking it and relaxing below helpers as appropriate. **/ consider forking it and relaxing below helpers as appropriate. **/
#define __TBB_control_consistency_helper() OSMemoryBarrier() #define __TBB_control_consistency_helper() OSMemoryBarrier()
#define __TBB_acquire_consistency_helper() OSMemoryBarrier() #define __TBB_acquire_consistency_helper() OSMemoryBarrier()
#define __TBB_release_consistency_helper() OSMemoryBarrier() #define __TBB_release_consistency_helper() OSMemoryBarrier()
#define __TBB_full_memory_fence() OSMemoryBarrier() #define __TBB_full_memory_fence() OSMemoryBarrier()
skipping to change at line 127 skipping to change at line 135
__TBB_ASSERT( !((uintptr_t)ptr&0x3), "address not properly aligned for Mac OS atomics"); __TBB_ASSERT( !((uintptr_t)ptr&0x3), "address not properly aligned for Mac OS atomics");
return OSAtomicAdd32Barrier(addend, (int32_t*)ptr) - addend; return OSAtomicAdd32Barrier(addend, (int32_t*)ptr) - addend;
} }
static inline int64_t __TBB_machine_fetchadd8(volatile void *ptr, int64_t a ddend) static inline int64_t __TBB_machine_fetchadd8(volatile void *ptr, int64_t a ddend)
{ {
__TBB_ASSERT( !((uintptr_t)ptr&0x7), "address not properly aligned for Mac OS atomics"); __TBB_ASSERT( !((uintptr_t)ptr&0x7), "address not properly aligned for Mac OS atomics");
return OSAtomicAdd64Barrier(addend, (int64_t*)ptr) - addend; return OSAtomicAdd64Barrier(addend, (int64_t*)ptr) - addend;
} }
#define __TBB_USE_GENERIC_PART_WORD_CAS 1 #define __TBB_USE_GENERIC_PART_WORD_CAS 1
#define __TBB_USE_GENERIC_PART_WORD_FETCH_ADD 1 #define __TBB_USE_GENERIC_PART_WORD_FETCH_ADD 1
#define __TBB_USE_GENERIC_FETCH_STORE 1 #define __TBB_USE_GENERIC_FETCH_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1 #define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1 #define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
#if __TBB_WORDSIZE == 4 #if __TBB_WORDSIZE == 4
#define __TBB_USE_GENERIC_DWORD_LOAD_STORE 1 #define __TBB_USE_GENERIC_DWORD_LOAD_STORE 1
#endif #endif
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
#endif /* __TBB_UnknownArchitecture */ #endif /* __TBB_UnknownArchitecture */
 End of changes. 4 change blocks. 
7 lines changed or deleted 16 lines changed or added


 memory_pool.h   memory_pool.h 
skipping to change at line 39 skipping to change at line 39
#ifndef __TBB_memory_pool_H #ifndef __TBB_memory_pool_H
#define __TBB_memory_pool_H #define __TBB_memory_pool_H
#if !TBB_PREVIEW_MEMORY_POOL #if !TBB_PREVIEW_MEMORY_POOL
#error Set TBB_PREVIEW_MEMORY_POOL to include memory_pool.h #error Set TBB_PREVIEW_MEMORY_POOL to include memory_pool.h
#endif #endif
/** @file */ /** @file */
#include "scalable_allocator.h" #include "scalable_allocator.h"
#include "tbb_stddef.h" #include "tbb_stddef.h"
#include "tbb_machine.h" // TODO: Itanium requires linkage with TBB library #include "tbb_machine.h" // TODO: avoid linkage with libtbb on IA-64
#include <new> // std::bad_alloc #include <new> // std::bad_alloc
#if __TBB_CPP11_RVALUE_REF_PRESENT && !__TBB_CPP11_STD_FORWARD_BROKEN
#include <utility> // std::forward
#endif
#if __TBB_EXTRA_DEBUG #if __TBB_EXTRA_DEBUG
#define __TBBMALLOC_ASSERT ASSERT #define __TBBMALLOC_ASSERT ASSERT
#else #else
#define __TBBMALLOC_ASSERT(a,b) ((void)0) #define __TBBMALLOC_ASSERT(a,b) ((void)0)
#endif #endif
namespace tbb { namespace tbb {
namespace interface6 { namespace interface6 {
//! @cond INTERNAL //! @cond INTERNAL
skipping to change at line 136 skipping to change at line 139
//! Free previously allocated block of memory. //! Free previously allocated block of memory.
void deallocate( pointer p, size_type ) { void deallocate( pointer p, size_type ) {
my_pool->free(p); my_pool->free(p);
} }
//! Largest value for which method allocate might succeed. //! Largest value for which method allocate might succeed.
size_type max_size() const throw() { size_type max_size() const throw() {
size_type max = static_cast<size_type>(-1) / sizeof (value_type); size_type max = static_cast<size_type>(-1) / sizeof (value_type);
return (max > 0 ? max : 1); return (max > 0 ? max : 1);
} }
//! Copy-construct value at location pointed to by p. //! Copy-construct value at location pointed to by p.
#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT && __TBB_CPP11_RVALUE_REF_PRESEN
T
template<typename... Args>
void construct(pointer p, Args&&... args)
#if __TBB_CPP11_STD_FORWARD_BROKEN
{ ::new((void *)p) T((args)...); }
#else
{ ::new((void *)p) T(std::forward<Args>(args)...); }
#endif
#else // __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT && __TBB_CPP11_RVALUE_REF_P
RESENT
void construct( pointer p, const value_type& value ) { ::new((void*)(p) ) value_type(value); } void construct( pointer p, const value_type& value ) { ::new((void*)(p) ) value_type(value); }
#endif // __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT && __TBB_CPP11_RVALUE_REF_ PRESENT
//! Destroy value at location pointed to by p. //! Destroy value at location pointed to by p.
void destroy( pointer p ) { p->~value_type(); } void destroy( pointer p ) { p->~value_type(); }
}; };
#if _MSC_VER && !defined(__INTEL_COMPILER) #if _MSC_VER && !defined(__INTEL_COMPILER)
#pragma warning (pop) #pragma warning (pop)
#endif // warning 4100 is back #endif // warning 4100 is back
skipping to change at line 243 skipping to change at line 256
self.my_alloc.deallocate( static_cast<typename Alloc::value_type*>(raw_ ptr), raw_bytes/unit_size ); self.my_alloc.deallocate( static_cast<typename Alloc::value_type*>(raw_ ptr), raw_bytes/unit_size );
return 0; return 0;
} }
inline fixed_pool::fixed_pool(void *buf, size_t size) : my_buffer(buf), my_ size(size) { inline fixed_pool::fixed_pool(void *buf, size_t size) : my_buffer(buf), my_ size(size) {
rml::MemPoolPolicy args(allocate_request, 0, size, /*fixedPool=*/true); rml::MemPoolPolicy args(allocate_request, 0, size, /*fixedPool=*/true);
rml::MemPoolError res = rml::pool_create_v1(intptr_t(this), &args, &my_ pool); rml::MemPoolError res = rml::pool_create_v1(intptr_t(this), &args, &my_ pool);
if( res!=rml::POOL_OK ) __TBB_THROW(std::bad_alloc()); if( res!=rml::POOL_OK ) __TBB_THROW(std::bad_alloc());
} }
inline void *fixed_pool::allocate_request(intptr_t pool_id, size_t & bytes) { inline void *fixed_pool::allocate_request(intptr_t pool_id, size_t & bytes) {
fixed_pool &self = *reinterpret_cast<fixed_pool*>(pool_id); fixed_pool &self = *reinterpret_cast<fixed_pool*>(pool_id);
if( bytes > self.my_size || !__TBB_CompareAndSwapW(&self.my_size, 0, (b ytes=self.my_size)) ) if( !__TBB_CompareAndSwapW(&self.my_size, 0, (bytes=self.my_size)) )
return 0; // all the memory was given already return 0; // all the memory was given already
return self.my_buffer; return self.my_buffer;
} }
} //namespace interface6 } //namespace interface6
using interface6::memory_pool_allocator; using interface6::memory_pool_allocator;
using interface6::memory_pool; using interface6::memory_pool;
using interface6::fixed_pool; using interface6::fixed_pool;
} //namespace tbb } //namespace tbb
 End of changes. 5 change blocks. 
2 lines changed or deleted 17 lines changed or added


 parallel_invoke.h   parallel_invoke.h 
skipping to change at line 142 skipping to change at line 142
// Waits for all child tasks // Waits for all child tasks
template <typename F0> template <typename F0>
void run_and_finish(const F0& f0) void run_and_finish(const F0& f0)
{ {
internal::function_invoker<F0>* invoker = new (allocate_child() ) internal::function_invoker<F0>(f0); internal::function_invoker<F0>* invoker = new (allocate_child() ) internal::function_invoker<F0>(f0);
__TBB_ASSERT(invoker, "Child task allocation failed"); __TBB_ASSERT(invoker, "Child task allocation failed");
spawn_and_wait_for_all(*invoker); spawn_and_wait_for_all(*invoker);
} }
}; };
// The class destroys root if exception occured as well as in normal ca se // The class destroys root if exception occurred as well as in normal c ase
class parallel_invoke_cleaner: internal::no_copy { class parallel_invoke_cleaner: internal::no_copy {
public: public:
#if __TBB_TASK_GROUP_CONTEXT #if __TBB_TASK_GROUP_CONTEXT
parallel_invoke_cleaner(int number_of_children, tbb::task_group_con text& context) parallel_invoke_cleaner(int number_of_children, tbb::task_group_con text& context)
: root(*new(task::allocate_root(context)) internal::parallel_in voke_helper(number_of_children)) : root(*new(task::allocate_root(context)) internal::parallel_in voke_helper(number_of_children))
#else #else
parallel_invoke_cleaner(int number_of_children, tbb::task_group_con text&) parallel_invoke_cleaner(int number_of_children, tbb::task_group_con text&)
: root(*new(task::allocate_root()) internal::parallel_invoke_he lper(number_of_children)) : root(*new(task::allocate_root()) internal::parallel_invoke_he lper(number_of_children))
#endif /* !__TBB_TASK_GROUP_CONTEXT */ #endif /* !__TBB_TASK_GROUP_CONTEXT */
{} {}
 End of changes. 1 change blocks. 
1 lines changed or deleted 1 lines changed or added


 parallel_reduce.h   parallel_reduce.h 
skipping to change at line 46 skipping to change at line 46
#include "tbb_profiling.h" #include "tbb_profiling.h"
namespace tbb { namespace tbb {
namespace interface6 { namespace interface6 {
//! @cond INTERNAL //! @cond INTERNAL
namespace internal { namespace internal {
using namespace tbb::internal; using namespace tbb::internal;
//! 0 if root, 1 if a left child, 2 if a right child. /** Values for reduction_context. */
enum {
root_task, left_child, right_child
};
/** Represented as a char, not enum, for compactness. */ /** Represented as a char, not enum, for compactness. */
typedef char reduction_context; typedef char reduction_context;
//! Task type use to combine the partial results of parallel_reduce. //! Task type used to combine the partial results of parallel_reduce.
/** @ingroup algorithms */ /** @ingroup algorithms */
template<typename Body> template<typename Body>
class finish_reduce: public flag_task { class finish_reduce: public flag_task {
//! Pointer to body, or NULL if the left child has not yet finished . //! Pointer to body, or NULL if the left child has not yet finished .
bool has_right_zombie; bool has_right_zombie;
const reduction_context my_context; const reduction_context my_context;
Body* my_body; Body* my_body;
aligned_space<Body,1> zombie_space; aligned_space<Body,1> zombie_space;
finish_reduce( reduction_context context_ ) : finish_reduce( reduction_context context_ ) :
has_right_zombie(false), // TODO: substitute by flag_task::chil d_stolen? has_right_zombie(false), // TODO: substitute by flag_task::chil d_stolen?
skipping to change at line 72 skipping to change at line 76
my_body(NULL) my_body(NULL)
{ {
} }
task* execute() { task* execute() {
if( has_right_zombie ) { if( has_right_zombie ) {
// Right child was stolen. // Right child was stolen.
Body* s = zombie_space.begin(); Body* s = zombie_space.begin();
my_body->join( *s ); my_body->join( *s );
s->~Body(); s->~Body();
} }
if( my_context==1 ) // left child if( my_context==left_child )
itt_store_word_with_release( static_cast<finish_reduce*>(pa rent())->my_body, my_body ); itt_store_word_with_release( static_cast<finish_reduce*>(pa rent())->my_body, my_body );
return NULL; return NULL;
} }
template<typename Range,typename Body_, typename Partitioner> template<typename Range,typename Body_, typename Partitioner>
friend class start_reduce; friend class start_reduce;
}; };
//! Task type used to split the work of parallel_reduce. //! Task type used to split the work of parallel_reduce.
/** @ingroup algorithms */ /** @ingroup algorithms */
template<typename Range, typename Body, typename Partitioner> template<typename Range, typename Body, typename Partitioner>
skipping to change at line 99 skipping to change at line 103
/*override*/ task* execute(); /*override*/ task* execute();
template<typename Body_> template<typename Body_>
friend class finish_reduce; friend class finish_reduce;
public: public:
//! Constructor used for root task //! Constructor used for root task
start_reduce( const Range& range, Body* body, Partitioner& partitio ner ) : start_reduce( const Range& range, Body* body, Partitioner& partitio ner ) :
my_body(body), my_body(body),
my_range(range), my_range(range),
my_partition(partitioner), my_partition(partitioner),
my_context(0) my_context(root_task)
{ {
} }
//! Splitting constructor used to generate children. //! Splitting constructor used to generate children.
/** parent_ becomes left child. Newly constructed object is right child. */ /** parent_ becomes left child. Newly constructed object is right child. */
start_reduce( start_reduce& parent_, split ) : start_reduce( start_reduce& parent_, split ) :
my_body(parent_.my_body), my_body(parent_.my_body),
my_range(parent_.my_range,split()), my_range(parent_.my_range,split()),
my_partition(parent_.my_partition,split()), my_partition(parent_.my_partition,split()),
my_context(2) my_context(right_child)
{ {
my_partition.set_affinity(*this); my_partition.set_affinity(*this);
parent_.my_context = 1; parent_.my_context = left_child;
} }
//! Construct right child from the given range as response to the d emand. //! Construct right child from the given range as response to the d emand.
/** parent_ remains left child. Newly constructed object is right child. */ /** parent_ remains left child. Newly constructed object is right child. */
start_reduce( start_reduce& parent_, const Range& r, depth_t d ) : start_reduce( start_reduce& parent_, const Range& r, depth_t d ) :
my_body(parent_.my_body), my_body(parent_.my_body),
my_range(r), my_range(r),
my_partition(parent_.my_partition,split()), my_partition(parent_.my_partition,split()),
my_context(2) // right leaf mark my_context(right_child)
{ {
my_partition.set_affinity(*this); my_partition.set_affinity(*this);
my_partition.align_depth( d ); my_partition.align_depth( d );
parent_.my_context = 1; // left leaf mark parent_.my_context = left_child;
} }
//! Update affinity info, if any //! Update affinity info, if any
/*override*/ void note_affinity( affinity_id id ) { /*override*/ void note_affinity( affinity_id id ) {
my_partition.note_affinity( id ); my_partition.note_affinity( id );
} }
static void run( const Range& range, Body& body, Partitioner& parti tioner ) { static void run( const Range& range, Body& body, Partitioner& parti tioner ) {
if( !range.empty() ) { if( !range.empty() ) {
#if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP #if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
task::spawn_root_and_wait( *new(task::allocate_root()) star t_reduce(range,&body,partitioner) ); task::spawn_root_and_wait( *new(task::allocate_root()) star t_reduce(range,&body,partitioner) );
#else #else
skipping to change at line 157 skipping to change at line 161
//! create a continuation task, serve as callback for partitioner //! create a continuation task, serve as callback for partitioner
finish_type *create_continuation() { finish_type *create_continuation() {
return new( allocate_continuation() ) finish_type(my_context); return new( allocate_continuation() ) finish_type(my_context);
} }
//! Run body for range //! Run body for range
void run_body( Range &r ) { (*my_body)( r ); } void run_body( Range &r ) { (*my_body)( r ); }
}; };
template<typename Range, typename Body, typename Partitioner> template<typename Range, typename Body, typename Partitioner>
task* start_reduce<Range,Body,Partitioner>::execute() { task* start_reduce<Range,Body,Partitioner>::execute() {
my_partition.check_being_stolen( *this ); my_partition.check_being_stolen( *this );
if( my_context==2 ) { // right child if( my_context==right_child ) {
finish_type* parent_ptr = static_cast<finish_type*>(parent()); finish_type* parent_ptr = static_cast<finish_type*>(parent());
if( !itt_load_word_with_acquire(parent_ptr->my_body) ) { // TOD O: replace by is_stolen_task() or by parent_ptr->ref_count() == 2??? if( !itt_load_word_with_acquire(parent_ptr->my_body) ) { // TOD O: replace by is_stolen_task() or by parent_ptr->ref_count() == 2???
my_body = new( parent_ptr->zombie_space.begin() ) Body(*my_ body,split()); my_body = new( parent_ptr->zombie_space.begin() ) Body(*my_ body,split());
parent_ptr->has_right_zombie = true; parent_ptr->has_right_zombie = true;
} }
} else __TBB_ASSERT(my_context==0,0);// because left leaf spawns ri ght leafs without recycling } else __TBB_ASSERT(my_context==root_task,NULL);// because left lea f spawns right leafs without recycling
my_partition.execute(*this, my_range); my_partition.execute(*this, my_range);
if( my_context==1 ) { if( my_context==left_child ) {
finish_type* parent_ptr = static_cast<finish_type*>(parent()); finish_type* parent_ptr = static_cast<finish_type*>(parent());
__TBB_ASSERT(my_body!=parent_ptr->zombie_space.begin(),0); __TBB_ASSERT(my_body!=parent_ptr->zombie_space.begin(),NULL);
itt_store_word_with_release(parent_ptr->my_body, my_body ); itt_store_word_with_release(parent_ptr->my_body, my_body );
} }
return NULL; return NULL;
} }
#if TBB_PREVIEW_DETERMINISTIC_REDUCE #if TBB_PREVIEW_DETERMINISTIC_REDUCE
//! Task type use to combine the partial results of parallel_determinis tic_reduce. //! Task type used to combine the partial results of parallel_determini stic_reduce.
/** @ingroup algorithms */ /** @ingroup algorithms */
template<typename Body> template<typename Body>
class finish_deterministic_reduce: public task { class finish_deterministic_reduce: public task {
Body &my_left_body; Body &my_left_body;
Body my_right_body; Body my_right_body;
finish_deterministic_reduce( Body &body ) : finish_deterministic_reduce( Body &body ) :
my_left_body( body ), my_left_body( body ),
my_right_body( body, split() ) my_right_body( body, split() )
{ {
 End of changes. 13 change blocks. 
13 lines changed or deleted 17 lines changed or added


 parallel_scan.h   parallel_scan.h 
skipping to change at line 230 skipping to change at line 230
return_slot(&return_slot_), return_slot(&return_slot_),
parent_sum(NULL), parent_sum(NULL),
is_final(true), is_final(true),
is_right_child(false), is_right_child(false),
range(range_), range(range_),
partition(partitioner_) partition(partitioner_)
{ {
__TBB_ASSERT( !*return_slot, NULL ); __TBB_ASSERT( !*return_slot, NULL );
} }
static void run( const Range& range, Body& body, const Partitioner static void run( const Range& range_, Body& body_, const Partitione
& partitioner ) { r& partitioner_ ) {
if( !range.empty() ) { if( !range_.empty() ) {
typedef internal::start_scan<Range,Body,Partitioner> start_ pass1_type; typedef internal::start_scan<Range,Body,Partitioner> start_ pass1_type;
internal::sum_node<Range,Body>* root = NULL; internal::sum_node<Range,Body>* root = NULL;
typedef internal::final_sum<Range,Body> final_sum_type; typedef internal::final_sum<Range,Body> final_sum_type;
final_sum_type* temp_body = new(task::allocate_root()) fina l_sum_type( body ); final_sum_type* temp_body = new(task::allocate_root()) fina l_sum_type( body_ );
start_pass1_type& pass1 = *new(task::allocate_root()) start _pass1_type( start_pass1_type& pass1 = *new(task::allocate_root()) start _pass1_type(
/*return_slot=*/root, /*return_slot=*/root,
range, range_,
*temp_body, *temp_body,
partitioner ); partitioner_ );
task::spawn_root_and_wait( pass1 ); task::spawn_root_and_wait( pass1 );
if( root ) { if( root ) {
root->body = temp_body; root->body = temp_body;
root->incoming = NULL; root->incoming = NULL;
root->stuff_last = &body; root->stuff_last = &body_;
task::spawn_root_and_wait( *root ); task::spawn_root_and_wait( *root );
} else { } else {
body.assign(temp_body->body); body_.assign(temp_body->body);
temp_body->finish_construction( range, NULL ); temp_body->finish_construction( range_, NULL );
temp_body->destroy(*temp_body); temp_body->destroy(*temp_body);
} }
} }
} }
}; };
template<typename Range, typename Body, typename Partitioner> template<typename Range, typename Body, typename Partitioner>
task* start_scan<Range,Body,Partitioner>::execute() { task* start_scan<Range,Body,Partitioner>::execute() {
typedef internal::finish_scan<Range,Body> finish_pass1_type; typedef internal::finish_scan<Range,Body> finish_pass1_type;
finish_pass1_type* p = parent_sum ? static_cast<finish_pass1_type*> ( parent() ) : NULL; finish_pass1_type* p = parent_sum ? static_cast<finish_pass1_type*> ( parent() ) : NULL;
 End of changes. 6 change blocks. 
9 lines changed or deleted 9 lines changed or added


 partitioner.h   partitioner.h 
skipping to change at line 224 skipping to change at line 224
flag_task* split_work(StartType &start) { flag_task* split_work(StartType &start) {
flag_task* parent_ptr = start.create_continuation(); // the type he re is to express expectation flag_task* parent_ptr = start.create_continuation(); // the type he re is to express expectation
start.set_parent(parent_ptr); start.set_parent(parent_ptr);
parent_ptr->set_ref_count(2); parent_ptr->set_ref_count(2);
StartType& right_work = *new( parent_ptr->allocate_child() ) StartT ype(start, split()); StartType& right_work = *new( parent_ptr->allocate_child() ) StartT ype(start, split());
start.spawn(right_work); start.spawn(right_work);
return parent_ptr; return parent_ptr;
} }
template<typename StartType, typename Range> template<typename StartType, typename Range>
void execute(StartType &start, Range &range) { void execute(StartType &start, Range &range) {
// The algorithm in a few words ([]-denotes calls to decision matho ds of partitioner): // The algorithm in a few words ([]-denotes calls to decision metho ds of partitioner):
// [If this task is stolen, adjust depth and divisions if necessary , set flag]. // [If this task is stolen, adjust depth and divisions if necessary , set flag].
// If range is divisible { // If range is divisible {
// Spread the work while [initial divisions left]; // Spread the work while [initial divisions left];
// Create trap task [if necessary]; // Create trap task [if necessary];
// } // }
// If not divisible or [max depth is reached], execute, else do the range pool part // If not divisible or [max depth is reached], execute, else do the range pool part
task* parent_ptr = start.parent(); task* parent_ptr = start.parent();
if( range.is_divisible() ) { if( range.is_divisible() ) {
if( derived().divisions_left() ) if( derived().divisions_left() )
do parent_ptr = split_work(start); // split until divisions _left() do parent_ptr = split_work(start); // split until divisions _left()
skipping to change at line 299 skipping to change at line 299
if(my_divisor) src.my_max_depth += static_cast<depth_t>(__TBB_Log2( src.my_divisor/my_divisor)); if(my_divisor) src.my_max_depth += static_cast<depth_t>(__TBB_Log2( src.my_divisor/my_divisor));
#endif #endif
} }
bool check_being_stolen( task &t) { // part of old should_execute_range () bool check_being_stolen( task &t) { // part of old should_execute_range ()
if( !my_divisor ) { if( !my_divisor ) {
my_divisor = 1; // todo: replace by on-stack flag (partition_st ate's member)? my_divisor = 1; // todo: replace by on-stack flag (partition_st ate's member)?
if( t.is_stolen_task() ) { if( t.is_stolen_task() ) {
#if TBB_USE_EXCEPTIONS #if TBB_USE_EXCEPTIONS
// RTTI is available, check whether the cast is valid // RTTI is available, check whether the cast is valid
__TBB_ASSERT(dynamic_cast<flag_task*>(t.parent()), 0); __TBB_ASSERT(dynamic_cast<flag_task*>(t.parent()), 0);
// correctess of the cast rely on avoiding the root task fo r which: // correctness of the cast relies on avoiding the root task for which:
// - initial value of my_divisor != 0 (protected by separat e assertion) // - initial value of my_divisor != 0 (protected by separat e assertion)
// - is_stolen_task() always return false for the root task . // - is_stolen_task() always return false for the root task .
#endif #endif
static_cast<flag_task*>(t.parent())->child_stolen = true; static_cast<flag_task*>(t.parent())->child_stolen = true;
my_max_depth++; my_max_depth++;
return true; return true;
} }
} }
return false; return false;
} }
skipping to change at line 357 skipping to change at line 357
} }
public: public:
affinity_partition_type( tbb::internal::affinity_partitioner_base_v3& a p ) { affinity_partition_type( tbb::internal::affinity_partitioner_base_v3& a p ) {
__TBB_ASSERT( (factor&(factor-1))==0, "factor must be power of two" ); __TBB_ASSERT( (factor&(factor-1))==0, "factor must be power of two" );
ap.resize(factor); ap.resize(factor);
my_array = ap.my_array; my_array = ap.my_array;
map_begin = 0; map_begin = 0;
map_end = unsigned(ap.my_size); map_end = unsigned(ap.my_size);
set_mid(); set_mid();
my_delay = true; my_delay = true;
my_divisor /= __TBB_INITIAL_CHUNKS; // let excatly P tasks to be di my_divisor /= __TBB_INITIAL_CHUNKS; // let exactly P tasks to be di
stributed across workers stributed across workers
my_max_depth = factor_power+1; // the first factor_power ranges wil my_max_depth = factor_power+1; // the first factor_power ranges wil
l be spawned, and >=1 ranges should left l be spawned, and >=1 ranges should be left
__TBB_ASSERT( my_max_depth < __TBB_RANGE_POOL_CAPACITY, 0 ); __TBB_ASSERT( my_max_depth < __TBB_RANGE_POOL_CAPACITY, 0 );
} }
affinity_partition_type(affinity_partition_type& p, split) affinity_partition_type(affinity_partition_type& p, split)
: auto_partition_type_base<affinity_partition_type>(p, split()), my _array(p.my_array) { : auto_partition_type_base<affinity_partition_type>(p, split()), my _array(p.my_array) {
__TBB_ASSERT( p.map_end-p.map_begin<factor || (p.map_end-p.map_begi n)%factor==0, NULL ); __TBB_ASSERT( p.map_end-p.map_begin<factor || (p.map_end-p.map_begi n)%factor==0, NULL );
map_end = p.map_end; map_end = p.map_end;
map_begin = p.map_end = p.map_mid; map_begin = p.map_end = p.map_mid;
set_mid(); p.set_mid(); set_mid(); p.set_mid();
my_delay = p.my_delay; my_delay = p.my_delay;
} }
 End of changes. 3 change blocks. 
6 lines changed or deleted 6 lines changed or added


 queuing_mutex.h   queuing_mutex.h 
skipping to change at line 51 skipping to change at line 51
#if !TBB_USE_EXCEPTIONS && _MSC_VER #if !TBB_USE_EXCEPTIONS && _MSC_VER
#pragma warning (pop) #pragma warning (pop)
#endif #endif
#include "atomic.h" #include "atomic.h"
#include "tbb_profiling.h" #include "tbb_profiling.h"
namespace tbb { namespace tbb {
//! Queuing lock with local-only spinning. //! Queuing mutex with local-only spinning.
/** @ingroup synchronization */ /** @ingroup synchronization */
class queuing_mutex { class queuing_mutex {
public: public:
//! Construct unacquired mutex. //! Construct unacquired mutex.
queuing_mutex() { queuing_mutex() {
q_tail = NULL; q_tail = NULL;
#if TBB_USE_THREADING_TOOLS #if TBB_USE_THREADING_TOOLS
internal_construct(); internal_construct();
#endif #endif
} }
skipping to change at line 74 skipping to change at line 74
/** It helps to avoid the common problem of forgetting to release lock. /** It helps to avoid the common problem of forgetting to release lock.
It also nicely provides the "node" for queuing locks. */ It also nicely provides the "node" for queuing locks. */
class scoped_lock: internal::no_copy { class scoped_lock: internal::no_copy {
//! Initialize fields to mean "no lock held". //! Initialize fields to mean "no lock held".
void initialize() { void initialize() {
mutex = NULL; mutex = NULL;
#if TBB_USE_ASSERT #if TBB_USE_ASSERT
internal::poison_pointer(next); internal::poison_pointer(next);
#endif /* TBB_USE_ASSERT */ #endif /* TBB_USE_ASSERT */
} }
public: public:
//! Construct lock that has not acquired a mutex. //! Construct lock that has not acquired a mutex.
/** Equivalent to zero-initialization of *this. */ /** Equivalent to zero-initialization of *this. */
scoped_lock() {initialize();} scoped_lock() {initialize();}
//! Acquire lock on given mutex. //! Acquire lock on given mutex.
scoped_lock( queuing_mutex& m ) { scoped_lock( queuing_mutex& m ) {
initialize(); initialize();
acquire(m); acquire(m);
} }
skipping to change at line 119 skipping to change at line 120
a byte seems to help performance slightly. */ a byte seems to help performance slightly. */
uintptr_t going; uintptr_t going;
}; };
void __TBB_EXPORTED_METHOD internal_construct(); void __TBB_EXPORTED_METHOD internal_construct();
// Mutex traits // Mutex traits
static const bool is_rw_mutex = false; static const bool is_rw_mutex = false;
static const bool is_recursive_mutex = false; static const bool is_recursive_mutex = false;
static const bool is_fair_mutex = true; static const bool is_fair_mutex = true;
private: private:
//! The last competitor requesting the lock //! The last competitor requesting the lock
atomic<scoped_lock*> q_tail; atomic<scoped_lock*> q_tail;
}; };
__TBB_DEFINE_PROFILING_SET_NAME(queuing_mutex) __TBB_DEFINE_PROFILING_SET_NAME(queuing_mutex)
} // namespace tbb } // namespace tbb
 End of changes. 3 change blocks. 
1 lines changed or deleted 3 lines changed or added


 queuing_rw_mutex.h   queuing_rw_mutex.h 
skipping to change at line 51 skipping to change at line 51
#if !TBB_USE_EXCEPTIONS && _MSC_VER #if !TBB_USE_EXCEPTIONS && _MSC_VER
#pragma warning (pop) #pragma warning (pop)
#endif #endif
#include "atomic.h" #include "atomic.h"
#include "tbb_profiling.h" #include "tbb_profiling.h"
namespace tbb { namespace tbb {
//! Reader-writer lock with local-only spinning. //! Queuing reader-writer mutex with local-only spinning.
/** Adapted from Krieger, Stumm, et al. pseudocode at /** Adapted from Krieger, Stumm, et al. pseudocode at
http://www.eecg.toronto.edu/parallel/pubs_abs.html#Krieger_etal_ICPP93 http://www.eecg.toronto.edu/parallel/pubs_abs.html#Krieger_etal_ICPP93
@ingroup synchronization */ @ingroup synchronization */
class queuing_rw_mutex { class queuing_rw_mutex {
public: public:
//! Construct unacquired mutex. //! Construct unacquired mutex.
queuing_rw_mutex() { queuing_rw_mutex() {
q_tail = NULL; q_tail = NULL;
#if TBB_USE_THREADING_TOOLS #if TBB_USE_THREADING_TOOLS
internal_construct(); internal_construct();
#endif #endif
} }
//! Destructor asserts if the mutex is acquired, i.e. q_tail is non-NUL L //! Destructor asserts if the mutex is acquired, i.e. q_tail is non-NUL L
~queuing_rw_mutex() { ~queuing_rw_mutex() {
#if TBB_USE_ASSERT #if TBB_USE_ASSERT
__TBB_ASSERT( !q_tail, "destruction of an acquired mutex"); __TBB_ASSERT( !q_tail, "destruction of an acquired mutex");
#endif #endif
} }
class scoped_lock;
friend class scoped_lock;
//! The scoped locking pattern //! The scoped locking pattern
/** It helps to avoid the common problem of forgetting to release lock. /** It helps to avoid the common problem of forgetting to release lock.
It also nicely provides the "node" for queuing locks. */ It also nicely provides the "node" for queuing locks. */
class scoped_lock: internal::no_copy { class scoped_lock: internal::no_copy {
//! Initialize fields //! Initialize fields to mean "no lock held".
void initialize() { void initialize() {
my_mutex = NULL; my_mutex = NULL;
#if TBB_USE_ASSERT #if TBB_USE_ASSERT
my_state = 0xFF; // Set to invalid state my_state = 0xFF; // Set to invalid state
internal::poison_pointer(my_next); internal::poison_pointer(my_next);
internal::poison_pointer(my_prev); internal::poison_pointer(my_prev);
#endif /* TBB_USE_ASSERT */ #endif /* TBB_USE_ASSERT */
} }
public: public:
//! Construct lock that has not acquired a mutex. //! Construct lock that has not acquired a mutex.
/** Equivalent to zero-initialization of *this. */ /** Equivalent to zero-initialization of *this. */
scoped_lock() {initialize();} scoped_lock() {initialize();}
//! Acquire lock on given mutex. //! Acquire lock on given mutex.
scoped_lock( queuing_rw_mutex& m, bool write=true ) { scoped_lock( queuing_rw_mutex& m, bool write=true ) {
initialize(); initialize();
acquire(m,write); acquire(m,write);
} }
//! Release lock (if lock is held). //! Release lock (if lock is held).
~scoped_lock() { ~scoped_lock() {
if( my_mutex ) release(); if( my_mutex ) release();
} }
//! Acquire lock on given mutex. //! Acquire lock on given mutex.
void acquire( queuing_rw_mutex& m, bool write=true ); void acquire( queuing_rw_mutex& m, bool write=true );
//! Try acquire lock on given mutex. //! Acquire lock on given mutex if free (i.e. non-blocking)
bool try_acquire( queuing_rw_mutex& m, bool write=true ); bool try_acquire( queuing_rw_mutex& m, bool write=true );
//! Release lock. //! Release lock.
void release(); void release();
//! Upgrade reader to become a writer. //! Upgrade reader to become a writer.
/** Returns true if the upgrade happened without re-acquiring the l ock and false if opposite */ /** Returns whether the upgrade happened without releasing and re-a cquiring the lock */
bool upgrade_to_writer(); bool upgrade_to_writer();
//! Downgrade writer to become a reader. //! Downgrade writer to become a reader.
bool downgrade_to_reader(); bool downgrade_to_reader();
private: private:
//! The pointer to the current mutex to work //! The pointer to the mutex owned, or NULL if not holding a mutex.
queuing_rw_mutex* my_mutex; queuing_rw_mutex* my_mutex;
//! The pointer to the previous and next competitors for a mutex //! The pointer to the previous and next competitors for a mutex
scoped_lock *__TBB_atomic my_prev, *__TBB_atomic my_next; scoped_lock *__TBB_atomic my_prev, *__TBB_atomic my_next;
typedef unsigned char state_t; typedef unsigned char state_t;
//! State of the request: reader, writer, active reader, other serv ice states //! State of the request: reader, writer, active reader, other serv ice states
atomic<state_t> my_state; atomic<state_t> my_state;
 End of changes. 7 change blocks. 
8 lines changed or deleted 6 lines changed or added


 reader_writer_lock.h   reader_writer_lock.h 
skipping to change at line 231 skipping to change at line 231
//! The list of pending readers //! The list of pending readers
atomic<scoped_lock_read*> reader_head; atomic<scoped_lock_read*> reader_head;
//! The list of pending writers //! The list of pending writers
atomic<scoped_lock*> writer_head; atomic<scoped_lock*> writer_head;
//! The last node in the list of pending writers //! The last node in the list of pending writers
atomic<scoped_lock*> writer_tail; atomic<scoped_lock*> writer_tail;
//! Writer that owns the mutex; tbb_thread::id() otherwise. //! Writer that owns the mutex; tbb_thread::id() otherwise.
tbb_thread::id my_current_writer; tbb_thread::id my_current_writer;
//! Status of mutex //! Status of mutex
atomic<unsigned> rdr_count_and_flags; atomic<uintptr_t> rdr_count_and_flags; // used with __TBB_AtomicOR, whi ch assumes uintptr_t
}; };
} // namespace interface5 } // namespace interface5
using interface5::reader_writer_lock; using interface5::reader_writer_lock;
} // namespace tbb } // namespace tbb
#endif /* __TBB_reader_writer_lock_H */ #endif /* __TBB_reader_writer_lock_H */
 End of changes. 1 change blocks. 
1 lines changed or deleted 1 lines changed or added


 scalable_allocator.h   scalable_allocator.h 
skipping to change at line 168 skipping to change at line 168
/* Ensure that including this header does not cause implicit linkage with T BB */ /* Ensure that including this header does not cause implicit linkage with T BB */
#ifndef __TBB_NO_IMPLICIT_LINKAGE #ifndef __TBB_NO_IMPLICIT_LINKAGE
#define __TBB_NO_IMPLICIT_LINKAGE 1 #define __TBB_NO_IMPLICIT_LINKAGE 1
#include "tbb_stddef.h" #include "tbb_stddef.h"
#undef __TBB_NO_IMPLICIT_LINKAGE #undef __TBB_NO_IMPLICIT_LINKAGE
#else #else
#include "tbb_stddef.h" #include "tbb_stddef.h"
#endif #endif
#if __TBB_CPP11_RVALUE_REF_PRESENT && !__TBB_CPP11_STD_FORWARD_BROKEN
#include <utility> // std::forward
#endif
namespace tbb { namespace tbb {
#if _MSC_VER && !defined(__INTEL_COMPILER) #if _MSC_VER && !defined(__INTEL_COMPILER)
// Workaround for erroneous "unreferenced parameter" warning in method destroy. // Workaround for erroneous "unreferenced parameter" warning in method destroy.
#pragma warning (push) #pragma warning (push)
#pragma warning (disable: 4100) #pragma warning (disable: 4100)
#endif #endif
//! Meets "allocator" requirements of ISO C++ Standard, Section 20.1.5 //! Meets "allocator" requirements of ISO C++ Standard, Section 20.1.5
/** The members are ordered the same way they are in section 20.4.1 /** The members are ordered the same way they are in section 20.4.1
skipping to change at line 216 skipping to change at line 220
//! Free previously allocated block of memory //! Free previously allocated block of memory
void deallocate( pointer p, size_type ) { void deallocate( pointer p, size_type ) {
scalable_free( p ); scalable_free( p );
} }
//! Largest value for which method allocate might succeed. //! Largest value for which method allocate might succeed.
size_type max_size() const throw() { size_type max_size() const throw() {
size_type absolutemax = static_cast<size_type>(-1) / sizeof (value_ type); size_type absolutemax = static_cast<size_type>(-1) / sizeof (value_ type);
return (absolutemax > 0 ? absolutemax : 1); return (absolutemax > 0 ? absolutemax : 1);
} }
#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT && __TBB_CPP11_RVALUE_REF_PRESEN
T
template<typename... Args>
void construct(pointer p, Args&&... args)
#if __TBB_CPP11_STD_FORWARD_BROKEN
{ ::new((void *)p) T((args)...); }
#else
{ ::new((void *)p) T(std::forward<Args>(args)...); }
#endif
#else // __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT && __TBB_CPP11_RVALUE_REF_P
RESENT
void construct( pointer p, const value_type& value ) {::new((void*)(p)) value_type(value);} void construct( pointer p, const value_type& value ) {::new((void*)(p)) value_type(value);}
#endif // __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT && __TBB_CPP11_RVALUE_REF_ PRESENT
void destroy( pointer p ) {p->~value_type();} void destroy( pointer p ) {p->~value_type();}
}; };
#if _MSC_VER && !defined(__INTEL_COMPILER) #if _MSC_VER && !defined(__INTEL_COMPILER)
#pragma warning (pop) #pragma warning (pop)
#endif // warning 4100 is back #endif // warning 4100 is back
//! Analogous to std::allocator<void>, as defined in ISO C++ Standard, Sect ion 20.4.1 //! Analogous to std::allocator<void>, as defined in ISO C++ Standard, Sect ion 20.4.1
/** @ingroup memory_allocation */ /** @ingroup memory_allocation */
template<> template<>
 End of changes. 3 change blocks. 
0 lines changed or deleted 16 lines changed or added


 spin_rw_mutex.h   spin_rw_mutex.h 
skipping to change at line 50 skipping to change at line 50
//! Fast, unfair, spinning reader-writer lock with backoff and writer-prefe rence //! Fast, unfair, spinning reader-writer lock with backoff and writer-prefe rence
/** @ingroup synchronization */ /** @ingroup synchronization */
class spin_rw_mutex_v3 { class spin_rw_mutex_v3 {
//! @cond INTERNAL //! @cond INTERNAL
//! Internal acquire write lock. //! Internal acquire write lock.
bool __TBB_EXPORTED_METHOD internal_acquire_writer(); bool __TBB_EXPORTED_METHOD internal_acquire_writer();
//! Out of line code for releasing a write lock. //! Out of line code for releasing a write lock.
/** This code is has debug checking and instrumentation for Intel(R) Th read Checker and Intel(R) Thread Profiler. */ /** This code has debug checking and instrumentation for Intel(R) Threa d Checker and Intel(R) Thread Profiler. */
void __TBB_EXPORTED_METHOD internal_release_writer(); void __TBB_EXPORTED_METHOD internal_release_writer();
//! Internal acquire read lock. //! Internal acquire read lock.
void __TBB_EXPORTED_METHOD internal_acquire_reader(); void __TBB_EXPORTED_METHOD internal_acquire_reader();
//! Internal upgrade reader to become a writer. //! Internal upgrade reader to become a writer.
bool __TBB_EXPORTED_METHOD internal_upgrade(); bool __TBB_EXPORTED_METHOD internal_upgrade();
//! Out of line code for downgrading a writer to a reader. //! Out of line code for downgrading a writer to a reader.
/** This code is has debug checking and instrumentation for Intel(R) Th read Checker and Intel(R) Thread Profiler. */ /** This code has debug checking and instrumentation for Intel(R) Threa d Checker and Intel(R) Thread Profiler. */
void __TBB_EXPORTED_METHOD internal_downgrade(); void __TBB_EXPORTED_METHOD internal_downgrade();
//! Internal release read lock. //! Internal release read lock.
void __TBB_EXPORTED_METHOD internal_release_reader(); void __TBB_EXPORTED_METHOD internal_release_reader();
//! Internal try_acquire write lock. //! Internal try_acquire write lock.
bool __TBB_EXPORTED_METHOD internal_try_acquire_writer(); bool __TBB_EXPORTED_METHOD internal_try_acquire_writer();
//! Internal try_acquire read lock. //! Internal try_acquire read lock.
bool __TBB_EXPORTED_METHOD internal_try_acquire_reader(); bool __TBB_EXPORTED_METHOD internal_try_acquire_reader();
skipping to change at line 117 skipping to change at line 117
//! Acquire lock on given mutex. //! Acquire lock on given mutex.
void acquire( spin_rw_mutex& m, bool write = true ) { void acquire( spin_rw_mutex& m, bool write = true ) {
__TBB_ASSERT( !mutex, "holding mutex already" ); __TBB_ASSERT( !mutex, "holding mutex already" );
is_writer = write; is_writer = write;
mutex = &m; mutex = &m;
if( write ) mutex->internal_acquire_writer(); if( write ) mutex->internal_acquire_writer();
else mutex->internal_acquire_reader(); else mutex->internal_acquire_reader();
} }
//! Upgrade reader to become a writer. //! Upgrade reader to become a writer.
/** Returns true if the upgrade happened without re-acquiring the l ock and false if opposite */ /** Returns whether the upgrade happened without releasing and re-a cquiring the lock */
bool upgrade_to_writer() { bool upgrade_to_writer() {
__TBB_ASSERT( mutex, "lock is not acquired" ); __TBB_ASSERT( mutex, "lock is not acquired" );
__TBB_ASSERT( !is_writer, "not a reader" ); __TBB_ASSERT( !is_writer, "not a reader" );
is_writer = true; is_writer = true;
return mutex->internal_upgrade(); return mutex->internal_upgrade();
} }
//! Release lock. //! Release lock.
void release() { void release() {
__TBB_ASSERT( mutex, "lock is not acquired" ); __TBB_ASSERT( mutex, "lock is not acquired" );
skipping to change at line 141 skipping to change at line 141
if( is_writer ) m->internal_release_writer(); if( is_writer ) m->internal_release_writer();
else m->internal_release_reader(); else m->internal_release_reader();
#else #else
if( is_writer ) __TBB_AtomicAND( &m->state, READERS ); if( is_writer ) __TBB_AtomicAND( &m->state, READERS );
else __TBB_FetchAndAddWrelease( &m->state, -(intptr_ t)ONE_READER); else __TBB_FetchAndAddWrelease( &m->state, -(intptr_ t)ONE_READER);
#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */ #endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */
} }
//! Downgrade writer to become a reader. //! Downgrade writer to become a reader.
bool downgrade_to_reader() { bool downgrade_to_reader() {
#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
__TBB_ASSERT( mutex, "lock is not acquired" ); __TBB_ASSERT( mutex, "lock is not acquired" );
__TBB_ASSERT( is_writer, "not a writer" ); __TBB_ASSERT( is_writer, "not a writer" );
#if TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT
mutex->internal_downgrade(); mutex->internal_downgrade();
#else #else
__TBB_FetchAndAddW( &mutex->state, ((intptr_t)ONE_READER-WRITER )); __TBB_FetchAndAddW( &mutex->state, ((intptr_t)ONE_READER-WRITER ));
#endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */ #endif /* TBB_USE_THREADING_TOOLS||TBB_USE_ASSERT */
is_writer = false; is_writer = false;
return true; return true;
} }
//! Try acquire lock on given mutex. //! Try acquire lock on given mutex.
bool try_acquire( spin_rw_mutex& m, bool write = true ) { bool try_acquire( spin_rw_mutex& m, bool write = true ) {
__TBB_ASSERT( !mutex, "holding mutex already" ); __TBB_ASSERT( !mutex, "holding mutex already" );
bool result; bool result;
is_writer = write; is_writer = write;
result = write? m.internal_try_acquire_writer() result = write? m.internal_try_acquire_writer()
: m.internal_try_acquire_reader(); : m.internal_try_acquire_reader();
 End of changes. 6 change blocks. 
5 lines changed or deleted 4 lines changed or added


 sunos_sparc.h   sunos_sparc.h 
skipping to change at line 39 skipping to change at line 39
#if !defined(__TBB_machine_H) || defined(__TBB_machine_sunos_sparc_H) #if !defined(__TBB_machine_H) || defined(__TBB_machine_sunos_sparc_H)
#error Do not #include this internal file directly; use public TBB headers instead. #error Do not #include this internal file directly; use public TBB headers instead.
#endif #endif
#define __TBB_machine_sunos_sparc_H #define __TBB_machine_sunos_sparc_H
#include <stdint.h> #include <stdint.h>
#include <unistd.h> #include <unistd.h>
#define __TBB_WORDSIZE 8 #define __TBB_WORDSIZE 8
#define __TBB_BIG_ENDIAN 1 #define __TBB_BIG_ENDIAN 1 // assumption (hardware may support page-specifi c bi-endianness)
/** To those working on SPARC hardware. Consider relaxing acquire and relea se /** To those working on SPARC hardware. Consider relaxing acquire and relea se
consistency helpers to no-op (as this port covers TSO mode only). **/ consistency helpers to no-op (as this port covers TSO mode only). **/
#define __TBB_compiler_fence() __asm__ __volatile__ ("": : :"me mory") #define __TBB_compiler_fence() __asm__ __volatile__ ("": : :"me mory")
#define __TBB_control_consistency_helper() __TBB_compiler_fence() #define __TBB_control_consistency_helper() __TBB_compiler_fence()
#define __TBB_acquire_consistency_helper() __TBB_compiler_fence() #define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
#define __TBB_release_consistency_helper() __TBB_compiler_fence() #define __TBB_release_consistency_helper() __TBB_compiler_fence()
#define __TBB_full_memory_fence() __asm__ __volatile__("membar #Lo adLoad|#LoadStore|#StoreStore|#StoreLoad": : : "memory") #define __TBB_full_memory_fence() __asm__ __volatile__("membar #Lo adLoad|#LoadStore|#StoreStore|#StoreLoad": : : "memory")
//-------------------------------------------------- //--------------------------------------------------
skipping to change at line 100 skipping to change at line 100
/** /**
* Atomic fetch and add for 32 bit values, in this case implemented by cont inuously checking success of atomicity * Atomic fetch and add for 32 bit values, in this case implemented by cont inuously checking success of atomicity
* @param ptr pointer to value to add addend to * @param ptr pointer to value to add addend to
* @param addened value to add to *ptr * @param addened value to add to *ptr
* @return value at ptr before addened was added * @return value at ptr before addened was added
*/ */
static inline int32_t __TBB_machine_fetchadd4(volatile void *ptr, int32_t a ddend){ static inline int32_t __TBB_machine_fetchadd4(volatile void *ptr, int32_t a ddend){
int32_t result; int32_t result;
__asm__ __volatile__ ( __asm__ __volatile__ (
"0:\t add\t %3, %4, %0\n" // do addition "0:\t add\t %3, %4, %0\n" // do addition
"\t cas\t [%2], %3, %0\n" // cas to store re "\t cas\t [%2], %3, %0\n" // cas to store
sult in memory result in memory
"\t cmp\t %3, %0\n" // check if value fr "\t cmp\t %3, %0\n" // check if val
om memory is original ue from memory is original
"\t bne,a,pn\t %%icc, 0b\n" // if not try ag "\t bne,a,pn\t %%icc, 0b\n" // if not try a
ain gain
"\t mov %0, %3\n" // use branch delay sl "\t mov %0, %3\n" // use branch d
ot to move new value in memory to be added elay slot to move new value in memory to be added
: "=&r"(result), "=m"(*(int32_t *)ptr) : "=&r"(result), "=m"(*(int32_t *)ptr)
: "r"(ptr), "r"(*(int32_t *)ptr), "r"(addend), "m"(*(int32_t *)ptr) : "r"(ptr), "r"(*(int32_t *)ptr), "r"(addend), "m"(*(int32_t *)ptr)
: "ccr", "memory"); : "ccr", "memory");
return result; return result;
} }
/** /**
* Atomic fetch and add for 64 bit values, in this case implemented by cont inuously checking success of atomicity * Atomic fetch and add for 64 bit values, in this case implemented by cont inuously checking success of atomicity
* @param ptr pointer to value to add addend to * @param ptr pointer to value to add addend to
* @param addened value to add to *ptr * @param addened value to add to *ptr
* @return value at ptr before addened was added * @return value at ptr before addened was added
*/ */
static inline int64_t __TBB_machine_fetchadd8(volatile void *ptr, int64_t a ddend){ static inline int64_t __TBB_machine_fetchadd8(volatile void *ptr, int64_t a ddend){
int64_t result; int64_t result;
__asm__ __volatile__ ( __asm__ __volatile__ (
"0:\t add\t %3, %4, %0\n" // do addition "0:\t add\t %3, %4, %0\n" // do addition
"\t casx\t [%2], %3, %0\n" // cas to store r "\t casx\t [%2], %3, %0\n" // cas to store
esult in memory result in memory
"\t cmp\t %3, %0\n" // check if value fr "\t cmp\t %3, %0\n" // check if val
om memory is original ue from memory is original
"\t bne,a,pn\t %%xcc, 0b\n" // if not try ag "\t bne,a,pn\t %%xcc, 0b\n" // if not try a
ain gain
"\t mov %0, %3\n" // use branch delay sl "\t mov %0, %3\n" // use branch d
ot to move new value in memory to be added elay slot to move new value in memory to be added
: "=&r"(result), "=m"(*(int64_t *)ptr) : "=&r"(result), "=m"(*(int64_t *)ptr)
: "r"(ptr), "r"(*(int64_t *)ptr), "r"(addend), "m"(*(int64_ t *)ptr) : "r"(ptr), "r"(*(int64_t *)ptr), "r"(addend), "m"(*(int64_ t *)ptr)
: "ccr", "memory"); : "ccr", "memory");
return result; return result;
} }
//-------------------------------------------------------- //--------------------------------------------------------
// Logarithm (base two, integer) // Logarithm (base two, integer)
//-------------------------------------------------------- //--------------------------------------------------------
static inline int64_t __TBB_machine_lg( uint64_t x ) { static inline int64_t __TBB_machine_lg( uint64_t x ) {
__TBB_ASSERT(x, "__TBB_Log2(0) undefined");
uint64_t count; uint64_t count;
// one hot encode // one hot encode
x |= (x >> 1); x |= (x >> 1);
x |= (x >> 2); x |= (x >> 2);
x |= (x >> 4); x |= (x >> 4);
x |= (x >> 8); x |= (x >> 8);
x |= (x >> 16); x |= (x >> 16);
x |= (x >> 32); x |= (x >> 32);
// count 1's // count 1's
__asm__ ("popc %1, %0" : "=r"(count) : "r"(x) ); __asm__ ("popc %1, %0" : "=r"(count) : "r"(x) );
return count-1; return count-1;
} }
//-------------------------------------------------------- //--------------------------------------------------------
static inline void __TBB_machine_or( volatile void *ptr, uint64_t addend ) { static inline void __TBB_machine_or( volatile void *ptr, uint64_t value ) {
__asm__ __volatile__ ( __asm__ __volatile__ (
"0:\t or\t %2, %3, %%g1\n" // do addition "0:\t or\t %2, %3, %%g1\n" // do operation
"\t casx\t [%1], %2, %%g1\n" // cas to s "\t casx\t [%1], %2, %%g1\n" // cas to store
tore result in memory result in memory
"\t cmp\t %2, %%g1\n" // check if "\t cmp\t %2, %%g1\n" // check if val
value from memory is original ue from memory is original
"\t bne,a,pn\t %%xcc, 0b\n" // if not try again "\t bne,a,pn\t %%xcc, 0b\n" // if not try a
"\t mov %%g1, %2\n" // use bran gain
ch delay slot to move new value in memory to be added "\t mov %%g1, %2\n" // use branch d
elay slot to move new value in memory to be added
: "=m"(*(int64_t *)ptr) : "=m"(*(int64_t *)ptr)
: "r"(ptr), "r"(*(int64_t *)ptr), "r"(addend), "m"(*(int64_ t *)ptr) : "r"(ptr), "r"(*(int64_t *)ptr), "r"(value), "m"(*(int64_t *)ptr)
: "ccr", "g1", "memory"); : "ccr", "g1", "memory");
} }
static inline void __TBB_machine_and( volatile void *ptr, uint64_t addend ) { static inline void __TBB_machine_and( volatile void *ptr, uint64_t value ) {
__asm__ __volatile__ ( __asm__ __volatile__ (
"0:\t and\t %2, %3, %%g1\n" // do addition "0:\t and\t %2, %3, %%g1\n" // do operation
"\t casx\t [%1], %2, %%g1\n" // cas to s "\t casx\t [%1], %2, %%g1\n" // cas to store
tore result in memory result in memory
"\t cmp\t %2, %%g1\n" // check if "\t cmp\t %2, %%g1\n" // check if val
value from memory is original ue from memory is original
"\t bne,a,pn\t %%xcc, 0b\n" // if not try a gain "\t bne,a,pn\t %%xcc, 0b\n" // if not try a gain
"\t mov %%g1, %2\n" // use bran ch delay slot to move new value in memory to be added "\t mov %%g1, %2\n" // use branch d elay slot to move new value in memory to be added
: "=m"(*(int64_t *)ptr) : "=m"(*(int64_t *)ptr)
: "r"(ptr), "r"(*(int64_t *)ptr), "r"(addend), "m"(*(int64_ t *)ptr) : "r"(ptr), "r"(*(int64_t *)ptr), "r"(value), "m"(*(int64_t *)ptr)
: "ccr", "g1", "memory"); : "ccr", "g1", "memory");
} }
static inline void __TBB_machine_pause( int32_t delay ) { static inline void __TBB_machine_pause( int32_t delay ) {
// do nothing, inlined, doesnt matter // do nothing, inlined, doesn't matter
} }
// put 0xff in memory location, return memory value, // put 0xff in memory location, return memory value,
// generic trylockbyte puts 0x01, however this is fine // generic trylockbyte puts 0x01, however this is fine
// because all that matters is that 0 is unlocked // because all that matters is that 0 is unlocked
static inline bool __TBB_machine_trylockbyte(unsigned char &flag){ static inline bool __TBB_machine_trylockbyte(unsigned char &flag){
unsigned char result; unsigned char result;
__asm__ __volatile__ ( __asm__ __volatile__ (
"ldstub\t [%2], %0\n" "ldstub\t [%2], %0\n"
: "=r"(result), "=m"(flag) : "=r"(result), "=m"(flag)
: "r"(&flag), "m"(flag) : "r"(&flag), "m"(flag)
: "memory"); : "memory");
return result == 0; return result == 0;
} }
#define __TBB_USE_GENERIC_PART_WORD_CAS 1 #define __TBB_USE_GENERIC_PART_WORD_CAS 1
#define __TBB_USE_GENERIC_PART_WORD_FETCH_ADD 1 #define __TBB_USE_GENERIC_PART_WORD_FETCH_ADD 1
#define __TBB_USE_GENERIC_FETCH_STORE 1 #define __TBB_USE_GENERIC_FETCH_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1 #define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1 #define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
#define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V) #define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V)
#define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V) #define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V)
// Definition of other functions // Definition of other functions
#define __TBB_Pause(V) __TBB_machine_pause(V) #define __TBB_Pause(V) __TBB_machine_pause(V)
#define __TBB_Log2(V) __TBB_machine_lg(V) #define __TBB_Log2(V) __TBB_machine_lg(V)
#define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P) #define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P)
 End of changes. 13 change blocks. 
43 lines changed or deleted 46 lines changed or added


 tbb.h   tbb.h 
skipping to change at line 40 skipping to change at line 40
#define __TBB_tbb_H #define __TBB_tbb_H
/** /**
This header bulk-includes declarations or definitions of all the functi onality This header bulk-includes declarations or definitions of all the functi onality
provided by TBB (save for malloc dependent headers). provided by TBB (save for malloc dependent headers).
If you use only a few TBB constructs, consider including specific heade rs only. If you use only a few TBB constructs, consider including specific heade rs only.
Any header listed below can be included independently of others. Any header listed below can be included independently of others.
**/ **/
#if TBB_PREVIEW_AGGREGATOR
#include "aggregator.h"
#endif
#include "aligned_space.h" #include "aligned_space.h"
#include "atomic.h" #include "atomic.h"
#include "blocked_range.h" #include "blocked_range.h"
#include "blocked_range2d.h" #include "blocked_range2d.h"
#include "blocked_range3d.h" #include "blocked_range3d.h"
#include "cache_aligned_allocator.h" #include "cache_aligned_allocator.h"
#include "combinable.h" #include "combinable.h"
#include "concurrent_unordered_map.h" #include "concurrent_unordered_map.h"
#include "concurrent_hash_map.h" #include "concurrent_hash_map.h"
#include "concurrent_queue.h" #include "concurrent_queue.h"
skipping to change at line 68 skipping to change at line 71
#include "parallel_for_each.h" #include "parallel_for_each.h"
#include "parallel_invoke.h" #include "parallel_invoke.h"
#include "parallel_reduce.h" #include "parallel_reduce.h"
#include "parallel_scan.h" #include "parallel_scan.h"
#include "parallel_sort.h" #include "parallel_sort.h"
#include "partitioner.h" #include "partitioner.h"
#include "pipeline.h" #include "pipeline.h"
#include "queuing_mutex.h" #include "queuing_mutex.h"
#include "queuing_rw_mutex.h" #include "queuing_rw_mutex.h"
#include "reader_writer_lock.h" #include "reader_writer_lock.h"
#if TBB_PREVIEW_CONCURRENT_PRIORITY_QUEUE
#include "concurrent_priority_queue.h" #include "concurrent_priority_queue.h"
#endif
#include "recursive_mutex.h" #include "recursive_mutex.h"
#include "spin_mutex.h" #include "spin_mutex.h"
#include "spin_rw_mutex.h" #include "spin_rw_mutex.h"
#include "task.h" #include "task.h"
#include "task_group.h" #include "task_group.h"
#include "task_scheduler_init.h" #include "task_scheduler_init.h"
#include "task_scheduler_observer.h" #include "task_scheduler_observer.h"
#include "tbb_allocator.h" #include "tbb_allocator.h"
#include "tbb_exception.h" #include "tbb_exception.h"
#include "tbb_thread.h" #include "tbb_thread.h"
 End of changes. 3 change blocks. 
2 lines changed or deleted 3 lines changed or added


 tbb_allocator.h   tbb_allocator.h 
skipping to change at line 34 skipping to change at line 34
the GNU General Public License. This exception does not however the GNU General Public License. This exception does not however
invalidate any other reasons why the executable file might be covered b y invalidate any other reasons why the executable file might be covered b y
the GNU General Public License. the GNU General Public License.
*/ */
#ifndef __TBB_tbb_allocator_H #ifndef __TBB_tbb_allocator_H
#define __TBB_tbb_allocator_H #define __TBB_tbb_allocator_H
#include "tbb_stddef.h" #include "tbb_stddef.h"
#include <new> #include <new>
#if __TBB_CPP11_RVALUE_REF_PRESENT && !__TBB_CPP11_STD_FORWARD_BROKEN
#include <utility> // std::forward
#endif
#if !TBB_USE_EXCEPTIONS && _MSC_VER #if !TBB_USE_EXCEPTIONS && _MSC_VER
// Suppress "C++ exception handler used, but unwind semantics are not e nabled" warning in STL headers // Suppress "C++ exception handler used, but unwind semantics are not e nabled" warning in STL headers
#pragma warning (push) #pragma warning (push)
#pragma warning (disable: 4530) #pragma warning (disable: 4530)
#endif #endif
#include <cstring> #include <cstring>
#if !TBB_USE_EXCEPTIONS && _MSC_VER #if !TBB_USE_EXCEPTIONS && _MSC_VER
skipping to change at line 121 skipping to change at line 124
internal::deallocate_via_handler_v3(p); internal::deallocate_via_handler_v3(p);
} }
//! Largest value for which method allocate might succeed. //! Largest value for which method allocate might succeed.
size_type max_size() const throw() { size_type max_size() const throw() {
size_type max = static_cast<size_type>(-1) / sizeof (value_type); size_type max = static_cast<size_type>(-1) / sizeof (value_type);
return (max > 0 ? max : 1); return (max > 0 ? max : 1);
} }
//! Copy-construct value at location pointed to by p. //! Copy-construct value at location pointed to by p.
#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT && __TBB_CPP11_RVALUE_REF_PRESEN
T
template<typename... Args>
void construct(pointer p, Args&&... args)
#if __TBB_CPP11_STD_FORWARD_BROKEN
{ ::new((void *)p) T((args)...); }
#else
{ ::new((void *)p) T(std::forward<Args>(args)...); }
#endif
#else // __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT && __TBB_CPP11_RVALUE_REF_P
RESENT
void construct( pointer p, const value_type& value ) {::new((void*)(p)) value_type(value);} void construct( pointer p, const value_type& value ) {::new((void*)(p)) value_type(value);}
#endif // __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT && __TBB_CPP11_RVALUE_REF_ PRESENT
//! Destroy value at location pointed to by p. //! Destroy value at location pointed to by p.
void destroy( pointer p ) {p->~value_type();} void destroy( pointer p ) {p->~value_type();}
//! Returns current allocator //! Returns current allocator
static malloc_type allocator_type() { static malloc_type allocator_type() {
return internal::is_malloc_used_v3() ? standard : scalable; return internal::is_malloc_used_v3() ? standard : scalable;
} }
}; };
 End of changes. 3 change blocks. 
0 lines changed or deleted 15 lines changed or added


 tbb_config.h   tbb_config.h 
skipping to change at line 41 skipping to change at line 41
/** This header is supposed to contain macro definitions and C style commen ts only. /** This header is supposed to contain macro definitions and C style commen ts only.
The macros defined here are intended to control such aspects of TBB bui ld as The macros defined here are intended to control such aspects of TBB bui ld as
- presence of compiler features - presence of compiler features
- compilation modes - compilation modes
- feature sets - feature sets
- known compiler/platform issues - known compiler/platform issues
**/ **/
#define __TBB_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC _PATCHLEVEL__) #define __TBB_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC _PATCHLEVEL__)
#if __clang__
#define __TBB_CLANG_VERSION (__clang_major__ * 10000 + __clang_minor__ * 10
0 + __clang_patchlevel__)
#endif
/** Presence of compiler features **/ /** Presence of compiler features **/
#if (__TBB_GCC_VERSION >= 40400) && !defined(__INTEL_COMPILER) #if (__TBB_GCC_VERSION >= 40400) && !defined(__INTEL_COMPILER)
/** warning suppression pragmas available in GCC since 4.4 **/ /** warning suppression pragmas available in GCC since 4.4 **/
#define __TBB_GCC_WARNING_SUPPRESSION_PRESENT 1 #define __TBB_GCC_WARNING_SUPPRESSION_PRESENT 1
#endif #endif
/* TODO: The following condition should be extended when new compilers/runt /* Select particular features of C++11 based on compiler version.
imes ICC 12.1 (Linux), GCC 4.3 and higher, clang 2.9 and higher
with std::exception_ptr support appear. */ set __GXX_EXPERIMENTAL_CXX0X__ in c++11 mode.
#define __TBB_EXCEPTION_PTR_PRESENT ((_MSC_VER >= 1600 || (__GXX_EXPERIMEN
TAL_CXX0X__ && __GNUC__==4 && __GNUC_MINOR__>=4)) && !__INTEL_COMPILER) Compilers that mimics other compilers (ICC, clang) must be processed bef
ore
compilers they mimic.
TODO: The following conditions should be extended when new compilers/run
times
support added.
*/
#if __INTEL_COMPILER
#define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT __GXX_EXPERIMENTAL_CXX0X
__ && __VARIADIC_TEMPLATES
#define __TBB_CPP11_RVALUE_REF_PRESENT (__GXX_EXPERIMENTAL_CXX0X__ || _
MSC_VER >= 1600) && (__INTEL_COMPILER >= 1200)
#define __TBB_EXCEPTION_PTR_PRESENT 0
#elif __clang__
#define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT (__GXX_EXPERIMENTAL_CXX0
X__ && __TBB_CLANG_VERSION >= 20900)
#define __TBB_CPP11_RVALUE_REF_PRESENT (__GXX_EXPERIMENTAL_CXX0X__ && _
_TBB_CLANG_VERSION >= 20900)
#define __TBB_EXCEPTION_PTR_PRESENT __GXX_EXPERIMENTAL_CXX0X__
#elif __GNUC__
#define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT __GXX_EXPERIMENTAL_CXX0X
__
#define __TBB_CPP11_RVALUE_REF_PRESENT __GXX_EXPERIMENTAL_CXX0X__
#define __TBB_EXCEPTION_PTR_PRESENT __GXX_EXPERIMENTAL_CXX0X__
#elif _MSC_VER
#define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT 0
#define __TBB_CPP11_RVALUE_REF_PRESENT 0
#define __TBB_EXCEPTION_PTR_PRESENT (_MSC_VER >= 1600)
#else
#define __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT 0
#define __TBB_CPP11_RVALUE_REF_PRESENT 0
#define __TBB_EXCEPTION_PTR_PRESENT 0
#endif
// Work around a bug in MinGW32
#if __MINGW32__ && __TBB_EXCEPTION_PTR_PRESENT && !defined(_GLIBCXX_ATOMIC_
BUILTINS_4)
#define _GLIBCXX_ATOMIC_BUILTINS_4
#endif
#if __GNUC__ || __SUNPRO_CC || __IBMCPP__ #if __GNUC__ || __SUNPRO_CC || __IBMCPP__
/* ICC defines __GNUC__ and so is covered */ /* ICC defines __GNUC__ and so is covered */
#define __TBB_ATTRIBUTE_ALIGNED_PRESENT 1 #define __TBB_ATTRIBUTE_ALIGNED_PRESENT 1
#elif _MSC_VER && (_MSC_VER >= 1300 || __INTEL_COMPILER) #elif _MSC_VER && (_MSC_VER >= 1300 || __INTEL_COMPILER)
#define __TBB_DECLSPEC_ALIGN_PRESENT 1 #define __TBB_DECLSPEC_ALIGN_PRESENT 1
#endif #endif
/* TODO: change the version back to 4.1.2 once macro __TBB_WORD_SIZE become optional */ /* TODO: change the version back to 4.1.2 once macro __TBB_WORD_SIZE become optional */
#if (__TBB_GCC_VERSION >= 40306) && !defined(__INTEL_COMPILER) #if (__TBB_GCC_VERSION >= 40306) && !defined(__INTEL_COMPILER)
skipping to change at line 142 skipping to change at line 179
#endif #endif
#endif /* defined TBB_USE_CAPTURED_EXCEPTION */ #endif /* defined TBB_USE_CAPTURED_EXCEPTION */
/** Check whether the request to use GCC atomics can be satisfied **/ /** Check whether the request to use GCC atomics can be satisfied **/
#if (TBB_USE_GCC_BUILTINS && !__TBB_GCC_BUILTIN_ATOMICS_PRESENT) #if (TBB_USE_GCC_BUILTINS && !__TBB_GCC_BUILTIN_ATOMICS_PRESENT)
#error "GCC atomic built-ins are not supported." #error "GCC atomic built-ins are not supported."
#endif #endif
/** Internal TBB features & modes **/ /** Internal TBB features & modes **/
/** __TBB_DYNAMIC_LOAD_ENABLED describes the system possibility to dynamic
load libraries
__TBB_SOURCE_DIRECTLY_INCLUDED is a mode used in whitebox testing when
it's necessary to test internal functions not exported from TBB DLLs
**/
#ifndef __TBB_DYNAMIC_LOAD_ENABLED #ifndef __TBB_DYNAMIC_LOAD_ENABLED
#define __TBB_DYNAMIC_LOAD_ENABLED !__TBB_TASK_CPP_DIRECTLY_INCLUDED #define __TBB_DYNAMIC_LOAD_ENABLED 1
#elif !__TBB_DYNAMIC_LOAD_ENABLED #elif !(_WIN32||_WIN64) && !__TBB_DYNAMIC_LOAD_ENABLED
#if _WIN32||_WIN64 #define __TBB_WEAK_SYMBOLS 1
#define __TBB_NO_IMPLICIT_LINKAGE 1 #endif
#define __TBBMALLOC_NO_IMPLICIT_LINKAGE 1
#else #if (_WIN32||_WIN64) && __TBB_SOURCE_DIRECTLY_INCLUDED
#define __TBB_WEAK_SYMBOLS 1 #define __TBB_NO_IMPLICIT_LINKAGE 1
#endif #define __TBBMALLOC_NO_IMPLICIT_LINKAGE 1
#endif #endif
#ifndef __TBB_COUNT_TASK_NODES #ifndef __TBB_COUNT_TASK_NODES
#define __TBB_COUNT_TASK_NODES TBB_USE_ASSERT #define __TBB_COUNT_TASK_NODES TBB_USE_ASSERT
#endif #endif
#ifndef __TBB_TASK_GROUP_CONTEXT #ifndef __TBB_TASK_GROUP_CONTEXT
#define __TBB_TASK_GROUP_CONTEXT 1 #define __TBB_TASK_GROUP_CONTEXT 1
#endif /* __TBB_TASK_GROUP_CONTEXT */ #endif /* __TBB_TASK_GROUP_CONTEXT */
skipping to change at line 225 skipping to change at line 267
//! Macro controlling EH usages in TBB tests //! Macro controlling EH usages in TBB tests
/** Some older versions of glibc crash when exception handling happens concurrently. **/ /** Some older versions of glibc crash when exception handling happens concurrently. **/
#define __TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN 1 #define __TBB_THROW_ACROSS_MODULE_BOUNDARY_BROKEN 1
#endif #endif
#if (_WIN32||_WIN64) && __INTEL_COMPILER == 1110 #if (_WIN32||_WIN64) && __INTEL_COMPILER == 1110
/** That's a bug in Intel compiler 11.1.044/IA-32/Windows, that leads t o a worker thread crash on the thread's startup. **/ /** That's a bug in Intel compiler 11.1.044/IA-32/Windows, that leads t o a worker thread crash on the thread's startup. **/
#define __TBB_ICL_11_1_CODE_GEN_BROKEN 1 #define __TBB_ICL_11_1_CODE_GEN_BROKEN 1
#endif #endif
#if __GNUC__==3 && __GNUC_MINOR__==3 && !defined(__INTEL_COMPILER) #if __clang__ || (__GNUC__==3 && __GNUC_MINOR__==3 && !defined(__INTEL_COMP
/** A bug in GCC 3.3 with access to nested classes declared in protecte ILER))
d area */ /** Bugs with access to nested classes declared in protected area */
#define __TBB_GCC_3_3_PROTECTED_BROKEN 1 #define __TBB_PROTECTED_NESTED_CLASS_BROKEN 1
#endif #endif
#if __MINGW32__ && (__GNUC__<4 || __GNUC__==4 && __GNUC_MINOR__<2) #if __MINGW32__ && (__GNUC__<4 || __GNUC__==4 && __GNUC_MINOR__<2)
/** MinGW has a bug with stack alignment for routines invoked from MS R TLs. /** MinGW has a bug with stack alignment for routines invoked from MS R TLs.
Since GCC 4.2, the bug can be worked around via a special attribute . **/ Since GCC 4.2, the bug can be worked around via a special attribute . **/
#define __TBB_SSE_STACK_ALIGNMENT_BROKEN 1 #define __TBB_SSE_STACK_ALIGNMENT_BROKEN 1
#endif #endif
#if __GNUC__==4 && __GNUC_MINOR__==3 && __GNUC_PATCHLEVEL__==0 #if __GNUC__==4 && __GNUC_MINOR__==3 && __GNUC_PATCHLEVEL__==0
// GCC of this version may rashly ignore control dependencies // GCC of this version may rashly ignore control dependencies
skipping to change at line 263 skipping to change at line 305
incorrect code when __asm__ arguments have a cast to volatile. **/ incorrect code when __asm__ arguments have a cast to volatile. **/
#define __TBB_ICC_ASM_VOLATILE_BROKEN 1 #define __TBB_ICC_ASM_VOLATILE_BROKEN 1
#endif #endif
#if !__INTEL_COMPILER && (_MSC_VER || __GNUC__==3 && __GNUC_MINOR__<=2) #if !__INTEL_COMPILER && (_MSC_VER || __GNUC__==3 && __GNUC_MINOR__<=2)
/** Bug in GCC 3.2 and MSVC compilers that sometimes return 0 for __ali gnof(T) /** Bug in GCC 3.2 and MSVC compilers that sometimes return 0 for __ali gnof(T)
when T has not yet been instantiated. **/ when T has not yet been instantiated. **/
#define __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN 1 #define __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN 1
#endif #endif
#if __INTEL_COMPILER
#define __TBB_CPP11_STD_FORWARD_BROKEN 1
#else
#define __TBB_CPP11_STD_FORWARD_BROKEN 0
#endif
#endif /* __TBB_tbb_config_H */ #endif /* __TBB_tbb_config_H */
 End of changes. 6 change blocks. 
17 lines changed or deleted 73 lines changed or added


 tbb_machine.h   tbb_machine.h 
skipping to change at line 51 skipping to change at line 51
__TBB_USE_GENERIC_FETCH_STORE __TBB_USE_GENERIC_FETCH_STORE
__TBB_USE_GENERIC_DWORD_FETCH_ADD __TBB_USE_GENERIC_DWORD_FETCH_ADD
__TBB_USE_GENERIC_DWORD_FETCH_STORE __TBB_USE_GENERIC_DWORD_FETCH_STORE
__TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE
__TBB_USE_GENERIC_FULL_FENCED_LOAD_STORE __TBB_USE_GENERIC_FULL_FENCED_LOAD_STORE
__TBB_USE_GENERIC_RELAXED_LOAD_STORE __TBB_USE_GENERIC_RELAXED_LOAD_STORE
__TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE
In this case tbb_machine.h will add missing functionality based on a mi nimal set In this case tbb_machine.h will add missing functionality based on a mi nimal set
of APIs that are required to be implemented by all plug-n headers as de scribed of APIs that are required to be implemented by all plug-n headers as de scribed
futher. further.
Note that these generic implementations may be sub-optimal for a partic ular Note that these generic implementations may be sub-optimal for a partic ular
architecture, and thus should be relied upon only after careful evaluat ion architecture, and thus should be relied upon only after careful evaluat ion
or as the last resort. or as the last resort.
Additionally __TBB_64BIT_ATOMICS can be set to 0 on a 32-bit architectu re to Additionally __TBB_64BIT_ATOMICS can be set to 0 on a 32-bit architectu re to
indicate that the port is not going to support double word atomics. It may also indicate that the port is not going to support double word atomics. It may also
be set to 1 explicitly, though normally this is not necessary as tbb_ma chine.h be set to 1 explicitly, though normally this is not necessary as tbb_ma chine.h
will set it automatically. will set it automatically.
__TBB_BIG_ENDIAN macro can be defined by the implementation as well.
It is used only if the __TBB_USE_GENERIC_PART_WORD_CAS is set.
Possible values are:
- 1 if the system is big endian,
- 0 if it is little endian,
- or -1 to explicitly state that __TBB_USE_GENERIC_PART_WORD_CAS ca
n not be used.
-1 should be used when it is known in advance that endianness can chang
e in run time
or it is not simple big or little but something more complex.
The system will try to detect it in run time if it is not set(in assump
tion that it
is either a big or little one).
Prerequisites for each architecture port Prerequisites for each architecture port
---------------------------------------- ----------------------------------------
The following functions have no generic implementation. Therefore they must be The following functions and macros have no generic implementation. Ther efore they must be
implemented in each machine architecture specific header either as a co nventional implemented in each machine architecture specific header either as a co nventional
function or as a functional macro. function or as a functional macro.
__TBB_WORDSIZE
This is the size of machine word in bytes, i.e. for 32 bit systems
it
should be defined to 4.
__TBB_Yield() __TBB_Yield()
Signals OS that the current thread is willing to relinquish the rem ainder Signals OS that the current thread is willing to relinquish the rem ainder
of its time quantum. of its time quantum.
__TBB_full_memory_fence() __TBB_full_memory_fence()
Must prevent all memory operations from being reordered across it ( both Must prevent all memory operations from being reordered across it ( both
by hardware and compiler). All such fences must be totally ordered (or by hardware and compiler). All such fences must be totally ordered (or
sequentially consistent). sequentially consistent).
__TBB_machine_cmpswp4( volatile void *ptr, int32_t value, int32_t compa rand ) __TBB_machine_cmpswp4( volatile void *ptr, int32_t value, int32_t compa rand )
skipping to change at line 92 skipping to change at line 107
__TBB_machine_<op><S><fence>(...), where __TBB_machine_<op><S><fence>(...), where
<op> = {cmpswp, fetchadd, fetchstore} <op> = {cmpswp, fetchadd, fetchstore}
<S> = {1, 2, 4, 8} <S> = {1, 2, 4, 8}
<fence> = {full_fence, acquire, release, relaxed} <fence> = {full_fence, acquire, release, relaxed}
Must be provided if __TBB_USE_FENCED_ATOMICS is set. Must be provided if __TBB_USE_FENCED_ATOMICS is set.
__TBB_control_consistency_helper() __TBB_control_consistency_helper()
Bridges the memory-semantics gap between architectures providing on ly Bridges the memory-semantics gap between architectures providing on ly
implicit C++0x "consume" semantics (like Power Architecture) and th ose implicit C++0x "consume" semantics (like Power Architecture) and th ose
also implicitly obeying control dependencies (like Itanium). also implicitly obeying control dependencies (like IA-64).
It must be used only in conditional code where the condition is its elf It must be used only in conditional code where the condition is its elf
data-dependent, and will then make subsequent code behave as if the data-dependent, and will then make subsequent code behave as if the
original data dependency were acquired. original data dependency were acquired.
It needs only an empty definition where implied by the architecture It needs only a compiler fence where implied by the architecture
either specifically (Itanium) or because generally stronger C++0x " either specifically (like IA-64) or because generally stronger "acq
acquire" uire"
semantics are enforced (like x86). semantics are enforced (like x86).
It is always valid, though potentially suboptimal, to replace
control with acquire on the load and then remove the helper.
__TBB_acquire_consistency_helper(), __TBB_release_consistency_helper() __TBB_acquire_consistency_helper(), __TBB_release_consistency_helper()
Must be provided if __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE is set . Must be provided if __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE is set .
Enforce acquire and release semantics in generic implementations of fenced Enforce acquire and release semantics in generic implementations of fenced
store and load operations. Depending on the particular architecture /compiler store and load operations. Depending on the particular architecture /compiler
combination they may be a hardware fence, a compiler fence, both or nothing. combination they may be a hardware fence, a compiler fence, both or nothing.
**/ **/
#include "tbb_stddef.h" #include "tbb_stddef.h"
skipping to change at line 348 skipping to change at line 365
} }
//! Spin UNTIL the value of the variable is equal to a given value //! Spin UNTIL the value of the variable is equal to a given value
/** T and U should be comparable types. */ /** T and U should be comparable types. */
template<typename T, typename U> template<typename T, typename U>
void spin_wait_until_eq( const volatile T& location, const U value ) { void spin_wait_until_eq( const volatile T& location, const U value ) {
atomic_backoff backoff; atomic_backoff backoff;
while( location!=value ) backoff.pause(); while( location!=value ) backoff.pause();
} }
// T should be unsigned, otherwise sign propagation will break correctness //TODO: add static_assert for the requirements stated below
of bit manipulations. //TODO: check if it works with signed types
// S should be either 1 or 2, for the mask calculation to work correctly.
// Together, these rules limit applicability of Masked CAS to unsigned char // there are following restrictions/limitations for this operation:
and unsigned short. // - T should be unsigned, otherwise sign propagation will break correctne
template<size_t S, typename T> ss of bit manipulations.
inline T __TBB_MaskedCompareAndSwap (volatile T *ptr, T value, T comparand // - T should be integer type of at most 4 bytes, for the casts and calcul
) { ations to work.
volatile uint32_t * base = (uint32_t*)( (uintptr_t)ptr & ~(uintptr_t)0x // (Together, these rules limit applicability of Masked CAS to uint8_t
3 ); and uint16_t only,
#if __TBB_BIG_ENDIAN // as it does nothing useful for 4 bytes).
const uint8_t bitoffset = uint8_t( 8*( 4-S - (uintptr_t(ptr) & 0x3) ) ) // - The operation assumes that the architecture consistently uses either
; little-endian or big-endian:
#else // it does not support mixed-endian or page-specific bi-endian archite
const uint8_t bitoffset = uint8_t( 8*((uintptr_t)ptr & 0x3) ); ctures.
#endif // This function is the only use of __TBB_BIG_ENDIAN.
const uint32_t mask = ( (1<<(S*8)) - 1 )<<bitoffset; #if (__TBB_BIG_ENDIAN!=-1)
atomic_backoff b; #if ( __TBB_USE_GENERIC_PART_WORD_CAS)
uint32_t result; #error generic implementation of part-word CAS was explicitly disab
for(;;) { led for this configuration
result = *base; // reload the base value which might change during #endif
the pause template<typename T>
uint32_t old_value = ( result & ~mask ) | ( comparand << bitoffset inline T __TBB_MaskedCompareAndSwap (volatile T * const ptr, const T value,
); const T comparand ) {
uint32_t new_value = ( result & ~mask ) | ( value << bitoffset ); struct endianness{ static bool is_big_endian(){
// __TBB_CompareAndSwap4 presumed to have full fence. #ifndef __TBB_BIG_ENDIAN
const uint32_t probe = 0x03020100;
return (((const char*)(&probe))[0]==0x03);
#elif (__TBB_BIG_ENDIAN==0) || (__TBB_BIG_ENDIAN==1)
return __TBB_BIG_ENDIAN;
#else
#error unexpected value of __TBB_BIG_ENDIAN
#endif
}};
const uint32_t byte_offset = (uint32_t) ((uintptr_t)ptr & 0x
3);
volatile uint32_t * const aligned_ptr = (uint32_t*)((uintptr_t)ptr - by
te_offset );
// location of T within uint32_t for a C++ shift operation
const uint32_t bits_to_shift = 8*(endianness::is_big_endian() ? (4
- sizeof(T) - (byte_offset)) : byte_offset);
const uint32_t mask = (((uint32_t)1<<(sizeof(T)*8)) - 1 )<
<bits_to_shift;
const uint32_t shifted_comparand = ((uint32_t)comparand << bits_to_shif
t)&mask;
const uint32_t shifted_value = ((uint32_t)value << bits_to_shif
t)&mask;
for(atomic_backoff b;;b.pause()) {
const uint32_t surroundings = *aligned_ptr & ~mask ; // reload the
aligned_ptr value which might change during the pause
const uint32_t big_comparand = surroundings | shifted_comparand ;
const uint32_t big_value = surroundings | shifted_value ;
// __TBB_machine_cmpswp4 presumed to have full fence.
// Cast shuts up /Wp64 warning // Cast shuts up /Wp64 warning
result = (uint32_t)__TBB_machine_cmpswp4( base, new_value, old_valu const uint32_t big_result = (uint32_t)__TBB_machine_cmpswp4( aligne
e ); d_ptr, big_value, big_comparand );
if( result==old_value // CAS succeeded if( big_result == big_comparand // CAS succeeded
|| ((result^old_value)&mask)!=0 ) // CAS failed and the bits of || ((big_result ^ big_comparand) & mask) != 0) // CAS failed an
interest have changed d the bits of interest have changed
break; {
else // CAS failed but the bits of return T((big_result & mask) >> bits_to_shift);
interest left unchanged }
b.pause(); else continue; // CAS failed bu
t the bits of interest left unchanged
} }
return T((result & mask) >> bitoffset);
} }
#endif
template<size_t S, typename T> template<size_t S, typename T>
inline T __TBB_CompareAndSwapGeneric (volatile void *ptr, T value, T compar and ); inline T __TBB_CompareAndSwapGeneric (volatile void *ptr, T value, T compar and );
template<> template<>
inline uint8_t __TBB_CompareAndSwapGeneric <1,uint8_t> (volatile void *ptr, uint8_t value, uint8_t comparand ) { inline uint8_t __TBB_CompareAndSwapGeneric <1,uint8_t> (volatile void *ptr, uint8_t value, uint8_t comparand ) {
#if __TBB_USE_GENERIC_PART_WORD_CAS #if __TBB_USE_GENERIC_PART_WORD_CAS
return __TBB_MaskedCompareAndSwap<1,uint8_t>((volatile uint8_t *)ptr,va lue,comparand); return __TBB_MaskedCompareAndSwap<uint8_t>((volatile uint8_t *)ptr,valu e,comparand);
#else #else
return __TBB_machine_cmpswp1(ptr,value,comparand); return __TBB_machine_cmpswp1(ptr,value,comparand);
#endif #endif
} }
template<> template<>
inline uint16_t __TBB_CompareAndSwapGeneric <2,uint16_t> (volatile void *pt r, uint16_t value, uint16_t comparand ) { inline uint16_t __TBB_CompareAndSwapGeneric <2,uint16_t> (volatile void *pt r, uint16_t value, uint16_t comparand ) {
#if __TBB_USE_GENERIC_PART_WORD_CAS #if __TBB_USE_GENERIC_PART_WORD_CAS
return __TBB_MaskedCompareAndSwap<2,uint16_t>((volatile uint16_t *)ptr, value,comparand); return __TBB_MaskedCompareAndSwap<uint16_t>((volatile uint16_t *)ptr,va lue,comparand);
#else #else
return __TBB_machine_cmpswp2(ptr,value,comparand); return __TBB_machine_cmpswp2(ptr,value,comparand);
#endif #endif
} }
template<> template<>
inline uint32_t __TBB_CompareAndSwapGeneric <4,uint32_t> (volatile void *pt r, uint32_t value, uint32_t comparand ) { inline uint32_t __TBB_CompareAndSwapGeneric <4,uint32_t> (volatile void *pt r, uint32_t value, uint32_t comparand ) {
// Cast shuts up /Wp64 warning // Cast shuts up /Wp64 warning
return (uint32_t)__TBB_machine_cmpswp4(ptr,value,comparand); return (uint32_t)__TBB_machine_cmpswp4(ptr,value,comparand);
} }
skipping to change at line 496 skipping to change at line 536
inline void __TBB_machine_store8 (volatile void *ptr, int64_t value) { inline void __TBB_machine_store8 (volatile void *ptr, int64_t value) {
for(;;) { for(;;) {
int64_t result = *(int64_t *)ptr; int64_t result = *(int64_t *)ptr;
if( __TBB_machine_cmpswp8(ptr,value,result)==result ) break; if( __TBB_machine_cmpswp8(ptr,value,result)==result ) break;
} }
} }
inline int64_t __TBB_machine_load8 (const volatile void *ptr) { inline int64_t __TBB_machine_load8 (const volatile void *ptr) {
// Comparand and new value may be anything, they only must be equal, an d // Comparand and new value may be anything, they only must be equal, an d
// the value should have a low probability to be actually found in 'loc ation'. // the value should have a low probability to be actually found in 'loc ation'.
const int64_t anyvalue = 2305843009213693951; const int64_t anyvalue = 2305843009213693951LL;
return __TBB_machine_cmpswp8(const_cast<volatile void *>(ptr),anyvalue, anyvalue); return __TBB_machine_cmpswp8(const_cast<volatile void *>(ptr),anyvalue, anyvalue);
} }
#endif /* __TBB_USE_GENERIC_DWORD_LOAD_STORE */ #endif /* __TBB_USE_GENERIC_DWORD_LOAD_STORE */
#if __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE #if __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE
/** Fenced operations use volatile qualifier to prevent compiler from optim izing /** Fenced operations use volatile qualifier to prevent compiler from optim izing
them out, and on on architectures with weak memory ordering to induce c ompiler them out, and on on architectures with weak memory ordering to induce c ompiler
to generate code with appropriate acquire/release semantics. to generate code with appropriate acquire/release semantics.
On architectures like IA32, Intel64 (and likely and Sparc TSO) volatile has On architectures like IA32, Intel64 (and likely and Sparc TSO) volatile has
no effect on code gen, and consistency helpers serve as a compiler fenc e (the no effect on code gen, and consistency helpers serve as a compiler fenc e (the
skipping to change at line 521 skipping to change at line 561
T to_return = location; T to_return = location;
__TBB_acquire_consistency_helper(); __TBB_acquire_consistency_helper();
return to_return; return to_return;
} }
static void store_with_release ( volatile T &location, T value ) { static void store_with_release ( volatile T &location, T value ) {
__TBB_release_consistency_helper(); __TBB_release_consistency_helper();
location = value; location = value;
} }
}; };
//in general, plain load and store of 32bit compiler is not atomic for 64bi t types
#if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
template <typename T> template <typename T>
struct machine_load_store<T,8> { struct machine_load_store<T,8> {
static T load_with_acquire ( const volatile T& location ) { static T load_with_acquire ( const volatile T& location ) {
return (T)__TBB_machine_load8( (const volatile void*)&location ); return (T)__TBB_machine_load8( (const volatile void*)&location );
} }
static void store_with_release ( volatile T& location, T value ) { static void store_with_release ( volatile T& location, T value ) {
__TBB_machine_store8( (volatile void*)&location, (int64_t)value ); __TBB_machine_store8( (volatile void*)&location, (int64_t)value );
} }
}; };
#endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */ #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
#endif /* __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE */ #endif /* __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE */
#if __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE
template <typename T, size_t S> template <typename T, size_t S>
struct machine_load_store_seq_cst { struct machine_load_store_seq_cst {
static T load ( const volatile T& location ) { static T load ( const volatile T& location ) {
__TBB_full_memory_fence(); __TBB_full_memory_fence();
return machine_load_store<T,S>::load_with_acquire( location ); return machine_load_store<T,S>::load_with_acquire( location );
} }
#if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE #if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE
static void store ( volatile T &location, T value ) { static void store ( volatile T &location, T value ) {
atomic_selector<S>::fetch_store( (volatile void*)&location, (typena me atomic_selector<S>::word)value ); atomic_selector<S>::fetch_store( (volatile void*)&location, (typena me atomic_selector<S>::word)value );
} }
skipping to change at line 560 skipping to change at line 602
}; };
#if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
/** The implementation does not use functions __TBB_machine_load8/store8 as they /** The implementation does not use functions __TBB_machine_load8/store8 as they
are not required to be sequentially consistent. **/ are not required to be sequentially consistent. **/
template <typename T> template <typename T>
struct machine_load_store_seq_cst<T,8> { struct machine_load_store_seq_cst<T,8> {
static T load ( const volatile T& location ) { static T load ( const volatile T& location ) {
// Comparand and new value may be anything, they only must be equal , and // Comparand and new value may be anything, they only must be equal , and
// the value should have a low probability to be actually found in 'location'. // the value should have a low probability to be actually found in 'location'.
const int64_t anyvalue = 2305843009213693951ll; const int64_t anyvalue = 2305843009213693951LL;
return __TBB_machine_cmpswp8( (volatile void*)const_cast<volatile T *>(&location), anyvalue, anyvalue ); return __TBB_machine_cmpswp8( (volatile void*)const_cast<volatile T *>(&location), anyvalue, anyvalue );
} }
static void store ( volatile T &location, T value ) { static void store ( volatile T &location, T value ) {
int64_t result = (volatile int64_t&)location; int64_t result = (volatile int64_t&)location;
while ( __TBB_machine_cmpswp8((volatile void*)&location, (int64_t)v alue, result) != result ) while ( __TBB_machine_cmpswp8((volatile void*)&location, (int64_t)v alue, result) != result )
result = (volatile int64_t&)location; result = (volatile int64_t&)location;
} }
}; };
#endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */ #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
#endif /*__TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE */
#if __TBB_USE_GENERIC_RELAXED_LOAD_STORE #if __TBB_USE_GENERIC_RELAXED_LOAD_STORE
// Relaxed operations add volatile qualifier to prevent compiler from optim izing them out. // Relaxed operations add volatile qualifier to prevent compiler from optim izing them out.
/** Volatile should not incur any additional cost on IA32, Intel64, and Spa rc TSO /** Volatile should not incur any additional cost on IA32, Intel64, and Spa rc TSO
architectures. However on architectures with weak memory ordering compi ler may architectures. However on architectures with weak memory ordering compi ler may
generate code with acquire/release semantics for operations on volatile data. **/ generate code with acquire/release semantics for operations on volatile data. **/
template <typename T, size_t S> template <typename T, size_t S>
struct machine_load_store_relaxed { struct machine_load_store_relaxed {
static inline T load ( const volatile T& location ) { static inline T load ( const volatile T& location ) {
return location; return location;
skipping to change at line 599 skipping to change at line 642
static inline T load ( const volatile T& location ) { static inline T load ( const volatile T& location ) {
return (T)__TBB_machine_load8( (const volatile void*)&location ); return (T)__TBB_machine_load8( (const volatile void*)&location );
} }
static inline void store ( volatile T& location, T value ) { static inline void store ( volatile T& location, T value ) {
__TBB_machine_store8( (volatile void*)&location, (int64_t)value ); __TBB_machine_store8( (volatile void*)&location, (int64_t)value );
} }
}; };
#endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */ #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
#endif /* __TBB_USE_GENERIC_RELAXED_LOAD_STORE */ #endif /* __TBB_USE_GENERIC_RELAXED_LOAD_STORE */
#undef __TBB_WORDSIZE //this macro is forbidden to use outside of atomic ma
chinery
template<typename T> template<typename T>
inline T __TBB_load_with_acquire(const volatile T &location) { inline T __TBB_load_with_acquire(const volatile T &location) {
return machine_load_store<T,sizeof(T)>::load_with_acquire( location ); return machine_load_store<T,sizeof(T)>::load_with_acquire( location );
} }
template<typename T, typename V> template<typename T, typename V>
inline void __TBB_store_with_release(volatile T& location, V value) { inline void __TBB_store_with_release(volatile T& location, V value) {
machine_load_store<T,sizeof(T)>::store_with_release( location, T(value) ); machine_load_store<T,sizeof(T)>::store_with_release( location, T(value) );
} }
//! Overload that exists solely to avoid /Wp64 warnings. //! Overload that exists solely to avoid /Wp64 warnings.
inline void __TBB_store_with_release(volatile size_t& location, size_t valu e) { inline void __TBB_store_with_release(volatile size_t& location, size_t valu e) {
skipping to change at line 746 skipping to change at line 791
// Mapping historically used names to the ones expected by atomic_load_stor e_traits // Mapping historically used names to the ones expected by atomic_load_stor e_traits
#define __TBB_load_acquire __TBB_load_with_acquire #define __TBB_load_acquire __TBB_load_with_acquire
#define __TBB_store_release __TBB_store_with_release #define __TBB_store_release __TBB_store_with_release
#ifndef __TBB_Log2 #ifndef __TBB_Log2
inline intptr_t __TBB_Log2( uintptr_t x ) { inline intptr_t __TBB_Log2( uintptr_t x ) {
if( x==0 ) return -1; if( x==0 ) return -1;
intptr_t result = 0; intptr_t result = 0;
uintptr_t tmp; uintptr_t tmp;
#if __TBB_WORDSIZE>=8
if( (tmp = x>>32) ) { x=tmp; result += 32; } if( sizeof(x)>4 && (tmp = ((uint64_t)x)>>32)) { x=tmp; result += 32; }
#endif
if( (tmp = x>>16) ) { x=tmp; result += 16; } if( (tmp = x>>16) ) { x=tmp; result += 16; }
if( (tmp = x>>8) ) { x=tmp; result += 8; } if( (tmp = x>>8) ) { x=tmp; result += 8; }
if( (tmp = x>>4) ) { x=tmp; result += 4; } if( (tmp = x>>4) ) { x=tmp; result += 4; }
if( (tmp = x>>2) ) { x=tmp; result += 2; } if( (tmp = x>>2) ) { x=tmp; result += 2; }
return (x&2)? result+1: result; return (x&2)? result+1: result;
} }
#endif #endif
#ifndef __TBB_AtomicOR #ifndef __TBB_AtomicOR
inline void __TBB_AtomicOR( volatile void *operand, uintptr_t addend ) { inline void __TBB_AtomicOR( volatile void *operand, uintptr_t addend ) {
 End of changes. 20 change blocks. 
50 lines changed or deleted 106 lines changed or added


 tbb_stddef.h   tbb_stddef.h 
skipping to change at line 37 skipping to change at line 37
*/ */
#ifndef __TBB_tbb_stddef_H #ifndef __TBB_tbb_stddef_H
#define __TBB_tbb_stddef_H #define __TBB_tbb_stddef_H
// Marketing-driven product version // Marketing-driven product version
#define TBB_VERSION_MAJOR 4 #define TBB_VERSION_MAJOR 4
#define TBB_VERSION_MINOR 0 #define TBB_VERSION_MINOR 0
// Engineering-focused interface version // Engineering-focused interface version
#define TBB_INTERFACE_VERSION 6003 #define TBB_INTERFACE_VERSION 6004
#define TBB_INTERFACE_VERSION_MAJOR TBB_INTERFACE_VERSION/1000 #define TBB_INTERFACE_VERSION_MAJOR TBB_INTERFACE_VERSION/1000
// The oldest major interface version still supported // The oldest major interface version still supported
// To be used in SONAME, manifests, etc. // To be used in SONAME, manifests, etc.
#define TBB_COMPATIBLE_INTERFACE_VERSION 2 #define TBB_COMPATIBLE_INTERFACE_VERSION 2
#define __TBB_STRING_AUX(x) #x #define __TBB_STRING_AUX(x) #x
#define __TBB_STRING(x) __TBB_STRING_AUX(x) #define __TBB_STRING(x) __TBB_STRING_AUX(x)
// We do not need defines below for resource processing on windows // We do not need defines below for resource processing on windows
skipping to change at line 137 skipping to change at line 137
#include "tbb_config.h" #include "tbb_config.h"
#if _MSC_VER >=1400 #if _MSC_VER >=1400
#define __TBB_EXPORTED_FUNC __cdecl #define __TBB_EXPORTED_FUNC __cdecl
#define __TBB_EXPORTED_METHOD __thiscall #define __TBB_EXPORTED_METHOD __thiscall
#else #else
#define __TBB_EXPORTED_FUNC #define __TBB_EXPORTED_FUNC
#define __TBB_EXPORTED_METHOD #define __TBB_EXPORTED_METHOD
#endif #endif
#if __INTEL_COMPILER || _MSC_VER
#define __TBB_NOINLINE(decl) __declspec(noinline) decl
#elif __GNUC__
#define __TBB_NOINLINE(decl) decl __attribute__ ((noinline))
#else
#define __TBB_NOINLINE(decl) decl
#endif
#include <cstddef> /* Need size_t and ptrdiff_t */ #include <cstddef> /* Need size_t and ptrdiff_t */
#if _MSC_VER #if _MSC_VER
#define __TBB_tbb_windef_H #define __TBB_tbb_windef_H
#include "internal/_tbb_windef.h" #include "internal/_tbb_windef.h"
#undef __TBB_tbb_windef_H #undef __TBB_tbb_windef_H
#endif #endif
#if !defined(_MSC_VER) || _MSC_VER>=1600 #if !defined(_MSC_VER) || _MSC_VER>=1600
#include <stdint.h> #include <stdint.h>
#endif #endif
skipping to change at line 247 skipping to change at line 255
Note that no problems have yet been observed relating to the definition currently being empty, Note that no problems have yet been observed relating to the definition currently being empty,
even if at least "volatile" would seem to be in order to avoid data som etimes temporarily hiding even if at least "volatile" would seem to be in order to avoid data som etimes temporarily hiding
in a register (although "volatile" as a "poor man's atomic" lacks sever al other features of a proper in a register (although "volatile" as a "poor man's atomic" lacks sever al other features of a proper
atomic, some of which are now provided instead through specialized func tions). atomic, some of which are now provided instead through specialized func tions).
Note that usage is intentionally compatible with a definition as qualif ier "volatile", Note that usage is intentionally compatible with a definition as qualif ier "volatile",
both as a way to have the compiler help enforce use of the label and to quickly rule out both as a way to have the compiler help enforce use of the label and to quickly rule out
one potential issue. one potential issue.
Note however that, with some architecture/compiler combinations, e.g. o n Itanium, "volatile" Note however that, with some architecture/compiler combinations, e.g. o n IA-64, "volatile"
also has non-portable memory semantics that are needlessly expensive fo r "relaxed" operations. also has non-portable memory semantics that are needlessly expensive fo r "relaxed" operations.
Note that this must only be applied to data that will not change bit pa tterns when cast to/from Note that this must only be applied to data that will not change bit pa tterns when cast to/from
an integral type of the same length; tbb::atomic must be used instead f or, e.g., floating-point types. an integral type of the same length; tbb::atomic must be used instead f or, e.g., floating-point types.
TODO: apply wherever relevant **/ TODO: apply wherever relevant **/
#define __TBB_atomic // intentionally empty, see above #define __TBB_atomic // intentionally empty, see above
template<class T, int S> template<class T, int S>
struct padded_base : T { struct padded_base : T {
skipping to change at line 353 skipping to change at line 361
}; };
#if _MSC_VER #if _MSC_VER
//! Microsoft std::allocator has non-standard extension that strips const f rom a type. //! Microsoft std::allocator has non-standard extension that strips const f rom a type.
template<typename T> template<typename T>
struct allocator_type<const T> { struct allocator_type<const T> {
typedef T value_type; typedef T value_type;
}; };
#endif #endif
//! A function to select either 32-bit or 64-bit value, depending on machin
e word size.
inline size_t size_t_select( unsigned u, unsigned long long ull ) {
/* Explicit cast of the arguments to size_t is done to avoid compiler w
arnings
(e.g. by Clang and MSVC) about possible truncation. The value of the
right size,
which is selected by ?:, is anyway not truncated or promoted.
MSVC still warns if this trick is applied directly to constants, hen
ce this function. */
return (sizeof(size_t)==sizeof(u)) ? size_t(u) : size_t(ull);
}
// Struct to be used as a version tag for inline functions. // Struct to be used as a version tag for inline functions.
/** Version tag can be necessary to prevent loader on Linux from using the wrong /** Version tag can be necessary to prevent loader on Linux from using the wrong
symbol in debug builds (when inline functions are compiled as out-of-li ne). **/ symbol in debug builds (when inline functions are compiled as out-of-li ne). **/
struct version_tag_v3 {}; struct version_tag_v3 {};
typedef version_tag_v3 version_tag; typedef version_tag_v3 version_tag;
} // internal } // internal
//! @endcond //! @endcond
 End of changes. 4 change blocks. 
2 lines changed or deleted 23 lines changed or added


 windows_ia32.h   windows_ia32.h 
skipping to change at line 114 skipping to change at line 114
} \ } \
return result; \ return result; \
} }
__TBB_MACHINE_DEFINE_ATOMICS(1, __int8, __int8, al, cl) __TBB_MACHINE_DEFINE_ATOMICS(1, __int8, __int8, al, cl)
__TBB_MACHINE_DEFINE_ATOMICS(2, __int16, __int16, ax, cx) __TBB_MACHINE_DEFINE_ATOMICS(2, __int16, __int16, ax, cx)
__TBB_MACHINE_DEFINE_ATOMICS(4, ptrdiff_t, ptrdiff_t, eax, ecx) __TBB_MACHINE_DEFINE_ATOMICS(4, ptrdiff_t, ptrdiff_t, eax, ecx)
#undef __TBB_MACHINE_DEFINE_ATOMICS #undef __TBB_MACHINE_DEFINE_ATOMICS
static inline __int32 __TBB_machine_lg( unsigned __int64 i ) { #if ( _MSC_VER>=1400 && !defined(__INTEL_COMPILER) ) || (__INTEL_COMPILER>
unsigned __int32 j; =1200)
// MSVC did not have this intrinsic prior to VC8.
// ICL 11.1 fails to compile a TBB example if __TBB_Log2 uses the intrinsic
.
#define __TBB_LOG2_USE_BSR_INTRINSIC 1
extern "C" unsigned char _BitScanReverse( unsigned long* i, unsigned long w
);
#pragma intrinsic(_BitScanReverse)
#endif
static inline intptr_t __TBB_machine_lg( uintptr_t i ) {
unsigned long j;
#if __TBB_LOG2_USE_BSR_INTRINSIC
_BitScanReverse( &j, i );
#else
__asm __asm
{ {
bsr eax, i bsr eax, i
mov j, eax mov j, eax
} }
#endif
return j; return j;
} }
static inline void __TBB_machine_OR( volatile void *operand, __int32 addend ) { static inline void __TBB_machine_OR( volatile void *operand, __int32 addend ) {
__asm __asm
{ {
mov eax, addend mov eax, addend
mov edx, [operand] mov edx, [operand]
lock or [edx], eax lock or [edx], eax
} }
skipping to change at line 146 skipping to change at line 158
mov eax, addend mov eax, addend
mov edx, [operand] mov edx, [operand]
lock and [edx], eax lock and [edx], eax
} }
} }
static inline void __TBB_machine_pause (__int32 delay ) { static inline void __TBB_machine_pause (__int32 delay ) {
_asm _asm
{ {
mov eax, delay mov eax, delay
L1: __TBB_L1:
pause pause
add eax, -1 add eax, -1
jne L1 jne __TBB_L1
} }
return; return;
} }
#define __TBB_AtomicOR(P,V) __TBB_machine_OR(P,V) #define __TBB_AtomicOR(P,V) __TBB_machine_OR(P,V)
#define __TBB_AtomicAND(P,V) __TBB_machine_AND(P,V) #define __TBB_AtomicAND(P,V) __TBB_machine_AND(P,V)
#define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1 //TODO: Check if it possible and profitable for IA-32 on (Linux and Windows
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1 )
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1 //to use of 64-bit load/store via floating point registers together with fu
ll fence
//for sequentially consistent load/store, instead of CAS.
#define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
// Definition of other functions // Definition of other functions
extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void ); extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
#define __TBB_Yield() SwitchToThread() #define __TBB_Yield() SwitchToThread()
#define __TBB_Pause(V) __TBB_machine_pause(V) #define __TBB_Pause(V) __TBB_machine_pause(V)
#define __TBB_Log2(V) __TBB_machine_lg(V) #define __TBB_Log2(V) __TBB_machine_lg(V)
#if defined(_MSC_VER)&&_MSC_VER<1400 #if defined(_MSC_VER)&&_MSC_VER<1400
static inline void* __TBB_machine_get_current_teb () { static inline void* __TBB_machine_get_current_teb () {
void* pteb; void* pteb;
 End of changes. 5 change blocks. 
7 lines changed or deleted 28 lines changed or added


 windows_intel64.h   windows_intel64.h 
skipping to change at line 58 skipping to change at line 58
#pragma intrinsic(_InterlockedExchange64) #pragma intrinsic(_InterlockedExchange64)
#endif /* !defined(__INTEL_COMPILER) */ #endif /* !defined(__INTEL_COMPILER) */
#if __INTEL_COMPILER #if __INTEL_COMPILER
#define __TBB_compiler_fence() __asm { __asm nop } #define __TBB_compiler_fence() __asm { __asm nop }
#define __TBB_full_memory_fence() __asm { __asm mfence } #define __TBB_full_memory_fence() __asm { __asm mfence }
#elif _MSC_VER >= 1300 #elif _MSC_VER >= 1300
extern "C" void _ReadWriteBarrier(); extern "C" void _ReadWriteBarrier();
#pragma intrinsic(_ReadWriteBarrier) #pragma intrinsic(_ReadWriteBarrier)
#pragma intrinsic(_mm_mfence) #pragma intrinsic(_mm_mfence)
#pragma intrinsic(_mm_pause)
#define __TBB_compiler_fence() _ReadWriteBarrier() #define __TBB_compiler_fence() _ReadWriteBarrier()
#define __TBB_full_memory_fence() _mm_mfence() #define __TBB_full_memory_fence() _mm_mfence()
#endif #endif
#define __TBB_control_consistency_helper() __TBB_compiler_fence() #define __TBB_control_consistency_helper() __TBB_compiler_fence()
#define __TBB_acquire_consistency_helper() __TBB_compiler_fence() #define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
#define __TBB_release_consistency_helper() __TBB_compiler_fence() #define __TBB_release_consistency_helper() __TBB_compiler_fence()
// ATTENTION: if you ever change argument types in machine-specific primiti ves, // ATTENTION: if you ever change argument types in machine-specific primiti ves,
// please take care of atomic_word<> specializations in tbb/atomic.h // please take care of atomic_word<> specializations in tbb/atomic.h
extern "C" { extern "C" {
__int8 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp1 (volatile void *ptr, _ _int8 value, __int8 comparand ); __int8 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp1 (volatile void *ptr, _ _int8 value, __int8 comparand );
__int8 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd1 (volatile void *ptr, __int8 addend ); __int8 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd1 (volatile void *ptr, __int8 addend );
__int8 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore1 (volatile void *pt r, __int8 value ); __int8 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore1 (volatile void *pt r, __int8 value );
__int16 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp2 (volatile void *ptr, __int16 value, __int16 comparand ); __int16 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp2 (volatile void *ptr, __int16 value, __int16 comparand );
__int16 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd2 (volatile void *ptr , __int16 addend ); __int16 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd2 (volatile void *ptr , __int16 addend );
__int16 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore2 (volatile void *p tr, __int16 value ); __int16 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore2 (volatile void *p tr, __int16 value );
void __TBB_EXPORTED_FUNC __TBB_machine_pause (__int32 delay );
} }
inline long __TBB_machine_cmpswp4 (volatile void *ptr, __int32 value, __int 32 comparand ) { inline long __TBB_machine_cmpswp4 (volatile void *ptr, __int32 value, __int 32 comparand ) {
return _InterlockedCompareExchange( (long*)ptr, value, comparand ); return _InterlockedCompareExchange( (long*)ptr, value, comparand );
} }
inline long __TBB_machine_fetchadd4 (volatile void *ptr, __int32 addend ) { inline long __TBB_machine_fetchadd4 (volatile void *ptr, __int32 addend ) {
return _InterlockedExchangeAdd( (long*)ptr, addend ); return _InterlockedExchangeAdd( (long*)ptr, addend );
} }
inline long __TBB_machine_fetchstore4 (volatile void *ptr, __int32 value ) { inline long __TBB_machine_fetchstore4 (volatile void *ptr, __int32 value ) {
return _InterlockedExchange( (long*)ptr, value ); return _InterlockedExchange( (long*)ptr, value );
skipping to change at line 98 skipping to change at line 98
inline __int64 __TBB_machine_cmpswp8 (volatile void *ptr, __int64 value, __ int64 comparand ) { inline __int64 __TBB_machine_cmpswp8 (volatile void *ptr, __int64 value, __ int64 comparand ) {
return _InterlockedCompareExchange64( (__int64*)ptr, value, comparand ) ; return _InterlockedCompareExchange64( (__int64*)ptr, value, comparand ) ;
} }
inline __int64 __TBB_machine_fetchadd8 (volatile void *ptr, __int64 addend ) { inline __int64 __TBB_machine_fetchadd8 (volatile void *ptr, __int64 addend ) {
return _InterlockedExchangeAdd64( (__int64*)ptr, addend ); return _InterlockedExchangeAdd64( (__int64*)ptr, addend );
} }
inline __int64 __TBB_machine_fetchstore8 (volatile void *ptr, __int64 value ) { inline __int64 __TBB_machine_fetchstore8 (volatile void *ptr, __int64 value ) {
return _InterlockedExchange64( (__int64*)ptr, value ); return _InterlockedExchange64( (__int64*)ptr, value );
} }
#define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1 inline void __TBB_machine_pause_v6 (__int32 delay ) {
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1 for (;delay>0; --delay )
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1 _mm_pause();
}
#define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
#if !__INTEL_COMPILER
extern "C" unsigned char _BitScanReverse64( unsigned long* i, unsigned __in t64 w ); extern "C" unsigned char _BitScanReverse64( unsigned long* i, unsigned __in t64 w );
#pragma intrinsic(_BitScanReverse64) #pragma intrinsic(_BitScanReverse64)
#endif
inline __int64 __TBB_machine_lg( unsigned __int64 i ) { inline __int64 __TBB_machine_lg( unsigned __int64 i ) {
#if __INTEL_COMPILER
unsigned __int64 j;
__asm
{
bsr rax, i
mov j, rax
}
#else
unsigned long j; unsigned long j;
_BitScanReverse64( &j, i ); _BitScanReverse64( &j, i );
#endif
return j; return j;
} }
inline void __TBB_machine_OR( volatile void *operand, intptr_t addend ) { inline void __TBB_machine_OR( volatile void *operand, intptr_t addend ) {
_InterlockedOr64((__int64*)operand, addend); _InterlockedOr64((__int64*)operand, addend);
} }
inline void __TBB_machine_AND( volatile void *operand, intptr_t addend ) { inline void __TBB_machine_AND( volatile void *operand, intptr_t addend ) {
_InterlockedAnd64((__int64*)operand, addend); _InterlockedAnd64((__int64*)operand, addend);
} }
#define __TBB_AtomicOR(P,V) __TBB_machine_OR(P,V) #define __TBB_AtomicOR(P,V) __TBB_machine_OR(P,V)
#define __TBB_AtomicAND(P,V) __TBB_machine_AND(P,V) #define __TBB_AtomicAND(P,V) __TBB_machine_AND(P,V)
extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void ); extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
#define __TBB_Yield() SwitchToThread() #define __TBB_Yield() SwitchToThread()
#define __TBB_Pause(V) __TBB_machine_pause(V) #define __TBB_Pause(V) __TBB_machine_pause_v6(V)
#define __TBB_Log2(V) __TBB_machine_lg(V) #define __TBB_Log2(V) __TBB_machine_lg(V)
// API to retrieve/update FPU control setting // API to retrieve/update FPU control setting
#define __TBB_CPU_CTL_ENV_PRESENT 1 #define __TBB_CPU_CTL_ENV_PRESENT 1
struct __TBB_cpu_ctl_env_t { struct __TBB_cpu_ctl_env_t {
int mxcsr; int mxcsr;
short x87cw; short x87cw;
}; };
 End of changes. 8 change blocks. 
16 lines changed or deleted 11 lines changed or added


 xbox360_ppc.h   xbox360_ppc.h 
skipping to change at line 72 skipping to change at line 72
} }
inline __int64 __TBB_machine_cmpswp8(volatile void *ptr, __int64 value, __i nt64 comparand ) inline __int64 __TBB_machine_cmpswp8(volatile void *ptr, __int64 value, __i nt64 comparand )
{ {
__sync(); __sync();
__int64 result = InterlockedCompareExchange64((volatile LONG64*)ptr, value , comparand); __int64 result = InterlockedCompareExchange64((volatile LONG64*)ptr, value , comparand);
__isync(); __isync();
return result; return result;
} }
#define __TBB_USE_GENERIC_PART_WORD_CAS 1 #define __TBB_USE_GENERIC_PART_WORD_CAS 1
#define __TBB_USE_GENERIC_FETCH_ADD 1 #define __TBB_USE_GENERIC_FETCH_ADD 1
#define __TBB_USE_GENERIC_FETCH_STORE 1 #define __TBB_USE_GENERIC_FETCH_STORE 1
#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1 #define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE 1
#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1 #define __TBB_USE_GENERIC_RELAXED_LOAD_STORE 1
#define __TBB_USE_GENERIC_DWORD_LOAD_STORE 1 #define __TBB_USE_GENERIC_DWORD_LOAD_STORE 1
#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
#pragma optimize( "", off ) #pragma optimize( "", off )
inline void __TBB_machine_pause (__int32 delay ) inline void __TBB_machine_pause (__int32 delay )
{ {
for (__int32 i=0; i<delay; i++) {;}; for (__int32 i=0; i<delay; i++) {;};
} }
#pragma optimize( "", on ) #pragma optimize( "", on )
#define __TBB_Yield() Sleep(0) #define __TBB_Yield() Sleep(0)
#define __TBB_Pause(V) __TBB_machine_pause(V) #define __TBB_Pause(V) __TBB_machine_pause(V)
 End of changes. 1 change blocks. 
6 lines changed or deleted 7 lines changed or added

This html diff was produced by rfcdiff 1.41. The latest version is available from http://tools.ietf.org/tools/rfcdiff/