diff --git a/libcxx/docs/ReleaseNotes.rst b/libcxx/docs/ReleaseNotes.rst --- a/libcxx/docs/ReleaseNotes.rst +++ b/libcxx/docs/ReleaseNotes.rst @@ -90,7 +90,6 @@ now provided when implementations in the global namespace are provided by the C library. - Implemented ```` header from C++17 -- `D122780 `_ Improved the performance of std::sort - The ``ranges`` versions of ``copy``, ``move``, ``copy_backward`` and ``move_backward`` are now also optimized for ``std::deque<>::iterator``, which can lead to up to 20x performance improvements on certain algorithms. - The ``std`` and ``ranges`` versions of ``copy``, ``move``, ``copy_backward`` and ``move_backward`` are now also diff --git a/libcxx/include/__algorithm/sort.h b/libcxx/include/__algorithm/sort.h --- a/libcxx/include/__algorithm/sort.h +++ b/libcxx/include/__algorithm/sort.h @@ -11,15 +11,10 @@ #include <__algorithm/comp.h> #include <__algorithm/comp_ref_type.h> -#include <__algorithm/iter_swap.h> #include <__algorithm/iterator_operations.h> #include <__algorithm/min_element.h> #include <__algorithm/partial_sort.h> #include <__algorithm/unwrap_iter.h> -#include <__assert> -#include <__bit/blsr.h> -#include <__bit/countl.h> -#include <__bit/countr.h> #include <__config> #include <__debug> #include <__debug_utils/randomize_range.h> @@ -28,10 +23,11 @@ #include <__iterator/iterator_traits.h> #include <__memory/destruct_n.h> #include <__memory/unique_ptr.h> -#include <__type_traits/conditional.h> #include <__type_traits/is_arithmetic.h> +#include <__type_traits/is_trivially_copy_assignable.h> +#include <__type_traits/is_trivially_copy_constructible.h> #include <__utility/move.h> -#include <__utility/pair.h> +#include #include #include @@ -132,7 +128,8 @@ _LIBCPP_HIDE_FROM_ABI unsigned __sort4(_ForwardIterator __x1, _ForwardIterator __x2, _ForwardIterator __x3, _ForwardIterator __x4, _Compare __c) { - using _Ops = _IterOps<_AlgPolicy>; + using _Ops = _IterOps<_AlgPolicy>; + unsigned __r = std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c); if (__c(*__x4, *__x3)) { _Ops::iter_swap(__x3, __x4); @@ -187,7 +184,7 @@ _Compare __c) { using _WrappedComp = typename _WrapAlgPolicy<_AlgPolicy, _Compare>::type; _WrappedComp __wrapped_comp(__c); - return std::__sort5<_WrappedComp, _ForwardIterator>( + return std::__sort5<_WrappedComp>( std::move(__x1), std::move(__x2), std::move(__x3), std::move(__x4), std::move(__x5), __wrapped_comp); } @@ -212,13 +209,6 @@ integral_constant::value && sizeof(_Tp) <= sizeof(void*) && is_arithmetic<_Tp>::value && __is_simple_comparator<_Compare>::value>; -namespace __detail { - -// Size in bits for the bitset in use. -enum { __block_size = sizeof(uint64_t) * 8 }; - -} // namespace __detail - // Ensures that __c(*__x, *__y) is true by swapping *__x and *__y if necessary. template inline _LIBCPP_HIDE_FROM_ABI void __cond_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _Compare __c) { @@ -278,15 +268,10 @@ std::__sort4<_AlgPolicy, _Compare>(__x1, __x2, __x3, __x4, __c); } -template +template inline _LIBCPP_HIDE_FROM_ABI __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>::value, void> -__sort5_maybe_branchless( - _RandomAccessIterator __x1, - _RandomAccessIterator __x2, - _RandomAccessIterator __x3, - _RandomAccessIterator __x4, - _RandomAccessIterator __x5, - _Compare __c) { +__sort5_maybe_branchless(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3, + _RandomAccessIterator __x4, _RandomAccessIterator __x5, _Compare __c) { std::__cond_swap<_Compare>(__x1, __x2, __c); std::__cond_swap<_Compare>(__x4, __x5, __c); std::__partially_sorted_swap<_Compare>(__x3, __x4, __x5, __c); @@ -315,48 +300,34 @@ } } -// Sort the iterator range [__first, __last) using the comparator __comp using -// the insertion sort algorithm. template _LIBCPP_HIDE_FROM_ABI void __insertion_sort(_BidirectionalIterator __first, _BidirectionalIterator __last, _Compare __comp) { using _Ops = _IterOps<_AlgPolicy>; typedef typename iterator_traits<_BidirectionalIterator>::value_type value_type; - if (__first == __last) - return; - _BidirectionalIterator __i = __first; - for (++__i; __i != __last; ++__i) { - _BidirectionalIterator __j = __i; - --__j; - if (__comp(*__i, *__j)) { - value_type __t(_Ops::__iter_move(__i)); - _BidirectionalIterator __k = __j; - __j = __i; - do { + if (__first != __last) { + _BidirectionalIterator __i = __first; + for (++__i; __i != __last; ++__i) { + _BidirectionalIterator __j = __i; + value_type __t(_Ops::__iter_move(__j)); + for (_BidirectionalIterator __k = __i; __k != __first && __comp(__t, *--__k); --__j) *__j = _Ops::__iter_move(__k); - __j = __k; - } while (__j != __first && __comp(__t, *--__k)); *__j = std::move(__t); } } } -// Sort the iterator range [__first, __last) using the comparator __comp using -// the insertion sort algorithm. Insertion sort has two loops, outer and inner. -// The implementation below has not bounds check (unguarded) for the inner loop. -// Assumes that there is an element in the position (__first - 1) and that each -// element in the input range is greater or equal to the element at __first - 1. template -_LIBCPP_HIDE_FROM_ABI void -__insertion_sort_unguarded(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { +_LIBCPP_HIDE_FROM_ABI +void __insertion_sort_3(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { using _Ops = _IterOps<_AlgPolicy>; + typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type; - if (__first == __last) - return; - for (_RandomAccessIterator __i = __first + difference_type(1); __i != __last; ++__i) { - _RandomAccessIterator __j = __i - difference_type(1); + _RandomAccessIterator __j = __first + difference_type(2); + std::__sort3_maybe_branchless<_AlgPolicy, _Compare>(__first, __first + difference_type(1), __j, __comp); + for (_RandomAccessIterator __i = __j + difference_type(1); __i != __last; ++__i) { if (__comp(*__i, *__j)) { value_type __t(_Ops::__iter_move(__i)); _RandomAccessIterator __k = __j; @@ -364,9 +335,10 @@ do { *__j = _Ops::__iter_move(__k); __j = __k; - } while (__comp(__t, *--__k)); // No need for bounds check due to the assumption stated above. + } while (__j != __first && __comp(__t, *--__k)); *__j = std::move(__t); } + __j = __i; } } @@ -387,7 +359,7 @@ return true; case 2: if (__comp(*--__last, *__first)) - _Ops::iter_swap(__first, __last); + _IterOps<_AlgPolicy>::iter_swap(__first, __last); return true; case 3: std::__sort3_maybe_branchless<_AlgPolicy, _Compare>(__first, __first + difference_type(1), --__last, __comp); @@ -456,336 +428,17 @@ } } -template -inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos( - _RandomAccessIterator __first, _RandomAccessIterator __last, uint64_t& __left_bitset, uint64_t& __right_bitset) { - using _Ops = _IterOps<_AlgPolicy>; - typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type; - // Swap one pair on each iteration as long as both bitsets have at least one - // element for swapping. - while (__left_bitset != 0 && __right_bitset != 0) { - difference_type tz_left = __libcpp_ctz(__left_bitset); - __left_bitset = __libcpp_blsr(__left_bitset); - difference_type tz_right = __libcpp_ctz(__right_bitset); - __right_bitset = __libcpp_blsr(__right_bitset); - _Ops::iter_swap(__first + tz_left, __last - tz_right); - } -} - -template ::value_type> -inline _LIBCPP_HIDE_FROM_ABI void -__populate_left_bitset(_RandomAccessIterator __first, _Compare __comp, _ValueType& __pivot, uint64_t& __left_bitset) { - // Possible vectorization. With a proper "-march" flag, the following loop - // will be compiled into a set of SIMD instructions. - _RandomAccessIterator __iter = __first; - for (int __j = 0; __j < __detail::__block_size;) { - bool __comp_result = !__comp(*__iter, __pivot); - __left_bitset |= (static_cast(__comp_result) << __j); - __j++; - ++__iter; - } -} - -template ::value_type> -inline _LIBCPP_HIDE_FROM_ABI void -__populate_right_bitset(_RandomAccessIterator __lm1, _Compare __comp, _ValueType& __pivot, uint64_t& __right_bitset) { - // Possible vectorization. With a proper "-march" flag, the following loop - // will be compiled into a set of SIMD instructions. - _RandomAccessIterator __iter = __lm1; - for (int __j = 0; __j < __detail::__block_size;) { - bool __comp_result = __comp(*__iter, __pivot); - __right_bitset |= (static_cast(__comp_result) << __j); - __j++; - --__iter; - } -} - -template ::value_type> -inline _LIBCPP_HIDE_FROM_ABI void __bitset_partition_partial_blocks( - _RandomAccessIterator& __first, - _RandomAccessIterator& __lm1, - _Compare __comp, - _ValueType& __pivot, - uint64_t& __left_bitset, - uint64_t& __right_bitset) { - typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type; - difference_type __remaining_len = __lm1 - __first + 1; - difference_type __l_size; - difference_type __r_size; - if (__left_bitset == 0 && __right_bitset == 0) { - __l_size = __remaining_len / 2; - __r_size = __remaining_len - __l_size; - } else if (__left_bitset == 0) { - // We know at least one side is a full block. - __l_size = __remaining_len - __detail::__block_size; - __r_size = __detail::__block_size; - } else { // if (__right_bitset == 0) - __l_size = __detail::__block_size; - __r_size = __remaining_len - __detail::__block_size; - } - // Record the comparison outcomes for the elements currently on the left side. - if (__left_bitset == 0) { - _RandomAccessIterator __iter = __first; - for (int j = 0; j < __l_size; j++) { - bool __comp_result = !__comp(*__iter, __pivot); - __left_bitset |= (static_cast(__comp_result) << j); - ++__iter; - } - } - // Record the comparison outcomes for the elements currently on the right - // side. - if (__right_bitset == 0) { - _RandomAccessIterator __iter = __lm1; - for (int j = 0; j < __r_size; j++) { - bool __comp_result = __comp(*__iter, __pivot); - __right_bitset |= (static_cast(__comp_result) << j); - --__iter; - } - } - std::__swap_bitmap_pos<_AlgPolicy, _RandomAccessIterator>(__first, __lm1, __left_bitset, __right_bitset); - __first += (__left_bitset == 0) ? __l_size : 0; - __lm1 -= (__right_bitset == 0) ? __r_size : 0; -} - -template -inline _LIBCPP_HIDE_FROM_ABI void __swap_bitmap_pos_within( - _RandomAccessIterator& __first, _RandomAccessIterator& __lm1, uint64_t& __left_bitset, uint64_t& __right_bitset) { - using _Ops = _IterOps<_AlgPolicy>; - typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type; - if (__left_bitset) { - // Swap within the left side. Need to find set positions in the reverse - // order. - while (__left_bitset != 0) { - difference_type __tz_left = __detail::__block_size - 1 - __libcpp_clz(__left_bitset); - __left_bitset &= (static_cast(1) << __tz_left) - 1; - _RandomAccessIterator it = __first + __tz_left; - if (it != __lm1) { - _Ops::iter_swap(it, __lm1); - } - --__lm1; - } - __first = __lm1 + difference_type(1); - } else if (__right_bitset) { - // Swap within the right side. Need to find set positions in the reverse - // order. - while (__right_bitset != 0) { - difference_type __tz_right = __detail::__block_size - 1 - __libcpp_clz(__right_bitset); - __right_bitset &= (static_cast(1) << __tz_right) - 1; - _RandomAccessIterator it = __lm1 - __tz_right; - if (it != __first) { - _Ops::iter_swap(it, __first); - } - ++__first; - } - } -} - -// Partition [__first, __last) using the comparator __comp. *__first has the -// chosen pivot. Elements that are equivalent are kept to the left of the -// pivot. Returns the iterator for the pivot and a bool value which is true if -// the provided range is already sorted, false otherwise. We assume that the -// length of the range is at least three elements. -// -// __bitset_partition uses bitsets for storing outcomes of the comparisons -// between the pivot and other elements. -template -_LIBCPP_HIDE_FROM_ABI std::pair<_RandomAccessIterator, bool> -__bitset_partition(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { - using _Ops = _IterOps<_AlgPolicy>; - typedef typename std::iterator_traits<_RandomAccessIterator>::value_type value_type; - typedef typename std::iterator_traits<_RandomAccessIterator>::difference_type difference_type; - _LIBCPP_ASSERT(__last - __first >= difference_type(3), ""); - - _RandomAccessIterator __begin = __first; - value_type __pivot(_Ops::__iter_move(__first)); - // Find the first element greater than the pivot. - if (__comp(__pivot, *(__last - difference_type(1)))) { - // Not guarded since we know the last element is greater than the pivot. - while (!__comp(__pivot, *++__first)) { - } - } else { - while (++__first < __last && !__comp(__pivot, *__first)) { - } - } - // Find the last element less than or equal to the pivot. - if (__first < __last) { - // It will be always guarded because __introsort will do the median-of-three - // before calling this. - while (__comp(__pivot, *--__last)) { - } - } - // If the first element greater than the pivot is at or after the - // last element less than or equal to the pivot, then we have covered the - // entire range without swapping elements. This implies the range is already - // partitioned. - bool __already_partitioned = __first >= __last; - if (!__already_partitioned) { - _Ops::iter_swap(__first, __last); - ++__first; - } - - // In [__first, __last) __last is not inclusive. From now on, it uses last - // minus one to be inclusive on both sides. - _RandomAccessIterator __lm1 = __last - difference_type(1); - uint64_t __left_bitset = 0; - uint64_t __right_bitset = 0; - - // Reminder: length = __lm1 - __first + 1. - while (__lm1 - __first >= 2 * __detail::__block_size - 1) { - // Record the comparison outcomes for the elements currently on the left - // side. - if (__left_bitset == 0) - std::__populate_left_bitset<_Compare>(__first, __comp, __pivot, __left_bitset); - // Record the comparison outcomes for the elements currently on the right - // side. - if (__right_bitset == 0) - std::__populate_right_bitset<_Compare>(__lm1, __comp, __pivot, __right_bitset); - // Swap the elements recorded to be the candidates for swapping in the - // bitsets. - std::__swap_bitmap_pos<_AlgPolicy, _RandomAccessIterator>(__first, __lm1, __left_bitset, __right_bitset); - // Only advance the iterator if all the elements that need to be moved to - // other side were moved. - __first += (__left_bitset == 0) ? difference_type(__detail::__block_size) : difference_type(0); - __lm1 -= (__right_bitset == 0) ? difference_type(__detail::__block_size) : difference_type(0); - } - // Now, we have a less-than a block worth of elements on at least one of the - // sides. - std::__bitset_partition_partial_blocks<_AlgPolicy, _Compare>( - __first, __lm1, __comp, __pivot, __left_bitset, __right_bitset); - // At least one the bitsets would be empty. For the non-empty one, we need to - // properly partition the elements that appear within that bitset. - std::__swap_bitmap_pos_within<_AlgPolicy>(__first, __lm1, __left_bitset, __right_bitset); - - // Move the pivot to its correct position. - _RandomAccessIterator __pivot_pos = __first - difference_type(1); - if (__begin != __pivot_pos) { - *__begin = _Ops::__iter_move(__pivot_pos); - } - *__pivot_pos = std::move(__pivot); - return std::make_pair(__pivot_pos, __already_partitioned); -} - -// Partition [__first, __last) using the comparator __comp. *__first has the -// chosen pivot. Elements that are equivalent are kept to the right of the -// pivot. Returns the iterator for the pivot and a bool value which is true if -// the provided range is already sorted, false otherwise. We assume that the -// length of the range is at least three elements. -template -_LIBCPP_HIDE_FROM_ABI std::pair<_RandomAccessIterator, bool> -__partition_with_equals_on_right(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { - using _Ops = _IterOps<_AlgPolicy>; - typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; - typedef typename std::iterator_traits<_RandomAccessIterator>::value_type value_type; - _LIBCPP_ASSERT(__last - __first >= difference_type(3), ""); - _RandomAccessIterator __begin = __first; - value_type __pivot(_Ops::__iter_move(__first)); - // Find the first element greater or equal to the pivot. It will be always - // guarded because __introsort will do the median-of-three before calling - // this. - while (__comp(*++__first, __pivot)) - ; - - // Find the last element less than the pivot. - if (__begin == __first - difference_type(1)) { - while (__first < __last && !__comp(*--__last, __pivot)) - ; - } else { - // Guarded. - while (!__comp(*--__last, __pivot)) - ; - } - - // If the first element greater than or equal to the pivot is at or after the - // last element less than the pivot, then we have covered the entire range - // without swapping elements. This implies the range is already partitioned. - bool __already_partitioned = __first >= __last; - // Go through the remaining elements. Swap pairs of elements (one to the - // right of the pivot and the other to left of the pivot) that are not on the - // correct side of the pivot. - while (__first < __last) { - _Ops::iter_swap(__first, __last); - while (__comp(*++__first, __pivot)) - ; - while (!__comp(*--__last, __pivot)) - ; - } - // Move the pivot to its correct position. - _RandomAccessIterator __pivot_pos = __first - difference_type(1); - if (__begin != __pivot_pos) { - *__begin = _Ops::__iter_move(__pivot_pos); - } - *__pivot_pos = std::move(__pivot); - return std::make_pair(__pivot_pos, __already_partitioned); -} - -// Similar to the above function. Elements equivalent to the pivot are put to -// the left of the pivot. Returns the iterator to the pivot element. -template -_LIBCPP_HIDE_FROM_ABI _RandomAccessIterator -__partition_with_equals_on_left(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { +template +void __introsort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp, + typename iterator_traits<_RandomAccessIterator>::difference_type __depth) { using _Ops = _IterOps<_AlgPolicy>; - typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; - typedef typename std::iterator_traits<_RandomAccessIterator>::value_type value_type; - _RandomAccessIterator __begin = __first; - value_type __pivot(_Ops::__iter_move(__first)); - if (__comp(__pivot, *(__last - difference_type(1)))) { - // Guarded. - while (!__comp(__pivot, *++__first)) { - } - } else { - while (++__first < __last && !__comp(__pivot, *__first)) { - } - } - - if (__first < __last) { - // It will be always guarded because __introsort will do the - // median-of-three before calling this. - while (__comp(__pivot, *--__last)) { - } - } - while (__first < __last) { - _Ops::iter_swap(__first, __last); - while (!__comp(__pivot, *++__first)) - ; - while (__comp(__pivot, *--__last)) - ; - } - _RandomAccessIterator __pivot_pos = __first - difference_type(1); - if (__begin != __pivot_pos) { - *__begin = _Ops::__iter_move(__pivot_pos); - } - *__pivot_pos = std::move(__pivot); - return __first; -} -// The main sorting function. Implements introsort combined with other ideas: -// - option of using block quick sort for partitioning, -// - guarded and unguarded insertion sort for small lengths, -// - Tuckey's ninther technique for computing the pivot, -// - check on whether partition was not required. -// The implementation is partly based on Orson Peters' pattern-defeating -// quicksort, published at: . -template -void __introsort(_RandomAccessIterator __first, - _RandomAccessIterator __last, - _Compare __comp, - typename iterator_traits<_RandomAccessIterator>::difference_type __depth, - bool __leftmost = true) { - using _Ops = _IterOps<_AlgPolicy>; typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; - using _Comp_ref = __comp_ref_type<_Compare>; - // Upper bound for using insertion sort for sorting. - _LIBCPP_CONSTEXPR difference_type __limit = 24; - // Lower bound for using Tuckey's ninther technique for median computation. - _LIBCPP_CONSTEXPR difference_type __ninther_threshold = 128; + typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type; + const difference_type __limit = + is_trivially_copy_constructible::value && is_trivially_copy_assignable::value ? 30 : 6; while (true) { + __restart: difference_type __len = __last - __first; switch (__len) { case 0: @@ -793,7 +446,7 @@ return; case 2: if (__comp(*--__last, *__first)) - _Ops::iter_swap(__first, __last); + _IterOps<_AlgPolicy>::iter_swap(__first, __last); return; case 3: std::__sort3_maybe_branchless<_AlgPolicy, _Compare>(__first, __first + difference_type(1), --__last, __comp); @@ -808,60 +461,127 @@ --__last, __comp); return; } - // Use insertion sort if the length of the range is below the specified limit. - if (__len < __limit) { - if (__leftmost) { - std::__insertion_sort<_AlgPolicy, _Compare>(__first, __last, __comp); - } else { - std::__insertion_sort_unguarded<_AlgPolicy, _Compare>(__first, __last, __comp); - } + if (__len <= __limit) { + std::__insertion_sort_3<_AlgPolicy, _Compare>(__first, __last, __comp); return; } + // __len > 5 if (__depth == 0) { // Fallback to heap sort as Introsort suggests. std::__partial_sort<_AlgPolicy, _Compare>(__first, __last, __last, __comp); return; } --__depth; + _RandomAccessIterator __m = __first; + _RandomAccessIterator __lm1 = __last; + --__lm1; + unsigned __n_swaps; { - difference_type __half_len = __len / 2; - // Use Tuckey's ninther technique or median of 3 for pivot selection - // depending on the length of the range being sorted. - if (__len > __ninther_threshold) { - std::__sort3<_AlgPolicy, _Compare>(__first, __first + __half_len, __last - difference_type(1), __comp); - std::__sort3<_AlgPolicy, _Compare>( - __first + difference_type(1), __first + (__half_len - 1), __last - difference_type(2), __comp); - std::__sort3<_AlgPolicy, _Compare>( - __first + difference_type(2), __first + (__half_len + 1), __last - difference_type(3), __comp); - std::__sort3<_AlgPolicy, _Compare>( - __first + (__half_len - 1), __first + __half_len, __first + (__half_len + 1), __comp); - _Ops::iter_swap(__first, __first + __half_len); + difference_type __delta; + if (__len >= 1000) { + __delta = __len / 2; + __m += __delta; + __delta /= 2; + __n_swaps = std::__sort5_wrap_policy<_AlgPolicy, _Compare>( + __first, __first + __delta, __m, __m + __delta, __lm1, __comp); } else { - std::__sort3<_AlgPolicy, _Compare>(__first + __half_len, __first, __last - difference_type(1), __comp); + __delta = __len / 2; + __m += __delta; + __n_swaps = std::__sort3<_AlgPolicy, _Compare>(__first, __m, __lm1, __comp); } } - // The elements to the left of the current iterator range are already - // sorted. If the current iterator range to be sorted is not the - // leftmost part of the entire iterator range and the pivot is same as - // the highest element in the range to the left, then we know that all - // the elements in the range [first, pivot] would be equal to the pivot, - // assuming the equal elements are put on the left side when - // partitioned. This also means that we do not need to sort the left - // side of the partition. - if (!__leftmost && !__comp(*(__first - difference_type(1)), *__first)) { - __first = std::__partition_with_equals_on_left<_AlgPolicy, _RandomAccessIterator, _Comp_ref>( - __first, __last, _Comp_ref(__comp)); - continue; + // *__m is median + // partition [__first, __m) < *__m and *__m <= [__m, __last) + // (this inhibits tossing elements equivalent to __m around unnecessarily) + _RandomAccessIterator __i = __first; + _RandomAccessIterator __j = __lm1; + // j points beyond range to be tested, *__m is known to be <= *__lm1 + // The search going up is known to be guarded but the search coming down isn't. + // Prime the downward search with a guard. + if (!__comp(*__i, *__m)) // if *__first == *__m + { + // *__first == *__m, *__first doesn't go in first part + // manually guard downward moving __j against __i + while (true) { + if (__i == --__j) { + // *__first == *__m, *__m <= all other elements + // Parition instead into [__first, __i) == *__first and *__first < [__i, __last) + ++__i; // __first + 1 + __j = __last; + if (!__comp(*__first, *--__j)) // we need a guard if *__first == *(__last-1) + { + while (true) { + if (__i == __j) + return; // [__first, __last) all equivalent elements + if (__comp(*__first, *__i)) { + _Ops::iter_swap(__i, __j); + ++__n_swaps; + ++__i; + break; + } + ++__i; + } + } + // [__first, __i) == *__first and *__first < [__j, __last) and __j == __last - 1 + if (__i == __j) + return; + while (true) { + while (!__comp(*__first, *__i)) + ++__i; + while (__comp(*__first, *--__j)) + ; + if (__i >= __j) + break; + _Ops::iter_swap(__i, __j); + ++__n_swaps; + ++__i; + } + // [__first, __i) == *__first and *__first < [__i, __last) + // The first part is sorted, sort the second part + // std::__sort<_Compare>(__i, __last, __comp); + __first = __i; + goto __restart; + } + if (__comp(*__j, *__m)) { + _Ops::iter_swap(__i, __j); + ++__n_swaps; + break; // found guard for downward moving __j, now use unguarded partition + } + } + } + // It is known that *__i < *__m + ++__i; + // j points beyond range to be tested, *__m is known to be <= *__lm1 + // if not yet partitioned... + if (__i < __j) { + // known that *(__i - 1) < *__m + // known that __i <= __m + while (true) { + // __m still guards upward moving __i + while (__comp(*__i, *__m)) + ++__i; + // It is now known that a guard exists for downward moving __j + while (!__comp(*--__j, *__m)) + ; + if (__i > __j) + break; + _Ops::iter_swap(__i, __j); + ++__n_swaps; + // It is known that __m != __j + // If __m just moved, follow it + if (__m == __i) + __m = __j; + ++__i; + } + } + // [__first, __i) < *__m and *__m <= [__i, __last) + if (__i != __m && __comp(*__m, *__i)) { + _Ops::iter_swap(__i, __m); + ++__n_swaps; } - // Use bitset partition only if asked for. - auto __ret = - _UseBitSetPartition - ? std::__bitset_partition<_AlgPolicy, _RandomAccessIterator, _Compare>(__first, __last, __comp) - : std::__partition_with_equals_on_right<_AlgPolicy, _RandomAccessIterator, _Compare>(__first, __last, __comp); - _RandomAccessIterator __i = __ret.first; // [__first, __i) < *__i and *__i <= [__i+1, __last) // If we were given a perfect partition, see if insertion sort is quick... - if (__ret.second) { + if (__n_swaps == 0) { using _WrappedComp = typename _WrapAlgPolicy<_AlgPolicy, _Compare>::type; _WrappedComp __wrapped_comp(__comp); bool __fs = std::__insertion_sort_incomplete<_WrappedComp>(__first, __i, __wrapped_comp); @@ -877,11 +597,14 @@ } } } - // Sort the left partiton recursively and the right partition with tail recursion elimination. - std::__introsort<_AlgPolicy, _Compare, _RandomAccessIterator, _UseBitSetPartition>( - __first, __i, __comp, __depth, __leftmost); - __leftmost = false; - __first = ++__i; + // sort smaller range with recursive call and larger with tail recursion elimination + if (__i - __first < __last - __i) { + std::__introsort<_AlgPolicy, _Compare>(__first, __i, __comp, __depth); + __first = ++__i; + } else { + std::__introsort<_AlgPolicy, _Compare>(__i + difference_type(1), __last, __comp, __depth); + __last = __i; + } } } @@ -913,14 +636,7 @@ using _AlgPolicy = typename _Unwrap::_AlgPolicy; using _Compare = typename _Unwrap::_Comp; _Compare __comp = _Unwrap::__get_comp(__wrapped_comp); - // Only use bitset partitioning for arithmetic types. We should also check - // that the default comparator is in use so that we are sure that there are no - // branches in the comparator. - std::__introsort<_AlgPolicy, - _Compare, - _RandomAccessIterator, - __use_branchless_sort<_Compare, _RandomAccessIterator>::value>( - __first, __last, __comp, __depth_limit); + std::__introsort<_AlgPolicy, _Compare>(__first, __last, __comp, __depth_limit); } template