libstdc++
balanced_quicksort.h
Go to the documentation of this file.
1// -*- C++ -*-
2
3// Copyright (C) 2007-2020 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the terms
7// of the GNU General Public License as published by the Free Software
8// Foundation; either version 3, or (at your option) any later
9// version.
10
11// This library is distributed in the hope that it will be useful, but
12// WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14// General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25/** @file parallel/balanced_quicksort.h
26 * @brief Implementation of a dynamically load-balanced parallel quicksort.
27 *
28 * It works in-place and needs only logarithmic extra memory.
29 * The algorithm is similar to the one proposed in
30 *
31 * P. Tsigas and Y. Zhang.
32 * A simple, fast parallel implementation of quicksort and
33 * its performance evaluation on SUN enterprise 10000.
34 * In 11th Euromicro Conference on Parallel, Distributed and
35 * Network-Based Processing, page 372, 2003.
36 *
37 * This file is a GNU parallel extension to the Standard C++ Library.
38 */
39
40// Written by Johannes Singler.
41
42#ifndef _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H
43#define _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H 1
44
46#include <bits/stl_algo.h>
47#include <bits/stl_function.h>
48
49#include <parallel/settings.h>
50#include <parallel/partition.h>
52#include <parallel/queue.h>
53
54#if _GLIBCXX_PARALLEL_ASSERTIONS
55#include <parallel/checkers.h>
56#ifdef _GLIBCXX_HAVE_UNISTD_H
57#include <unistd.h>
58#endif
59#endif
60
62{
63 /** @brief Information local to one thread in the parallel quicksort run. */
64 template<typename _RAIter>
66 {
68 typedef typename _TraitsType::difference_type _DifferenceType;
69
70 /** @brief Continuous part of the sequence, described by an
71 iterator pair. */
73
74 /** @brief Initial piece to work on. */
76
77 /** @brief Work-stealing queue. */
79
80 /** @brief Number of threads involved in this algorithm. */
82
83 /** @brief Pointer to a counter of elements left over to sort. */
84 volatile _DifferenceType* _M_elements_leftover;
85
86 /** @brief The complete sequence to sort. */
88
89 /** @brief Constructor.
90 * @param __queue_size size of the work-stealing queue. */
91 _QSBThreadLocal(int __queue_size) : _M_leftover_parts(__queue_size) { }
92 };
93
94 /** @brief Balanced quicksort divide step.
95 * @param __begin Begin iterator of subsequence.
96 * @param __end End iterator of subsequence.
97 * @param __comp Comparator.
98 * @param __num_threads Number of threads that are allowed to work on
99 * this part.
100 * @pre @c (__end-__begin)>=1 */
101 template<typename _RAIter, typename _Compare>
103 __qsb_divide(_RAIter __begin, _RAIter __end,
104 _Compare __comp, _ThreadIndex __num_threads)
105 {
106 _GLIBCXX_PARALLEL_ASSERT(__num_threads > 0);
107
108 typedef std::iterator_traits<_RAIter> _TraitsType;
109 typedef typename _TraitsType::value_type _ValueType;
110 typedef typename _TraitsType::difference_type _DifferenceType;
111
112 _RAIter __pivot_pos =
113 __median_of_three_iterators(__begin, __begin + (__end - __begin) / 2,
114 __end - 1, __comp);
115
116#if defined(_GLIBCXX_PARALLEL_ASSERTIONS)
117 // Must be in between somewhere.
118 _DifferenceType __n = __end - __begin;
119
120 _GLIBCXX_PARALLEL_ASSERT((!__comp(*__pivot_pos, *__begin)
121 && !__comp(*(__begin + __n / 2),
122 *__pivot_pos))
123 || (!__comp(*__pivot_pos, *__begin)
124 && !__comp(*(__end - 1), *__pivot_pos))
125 || (!__comp(*__pivot_pos, *(__begin + __n / 2))
126 && !__comp(*__begin, *__pivot_pos))
127 || (!__comp(*__pivot_pos, *(__begin + __n / 2))
128 && !__comp(*(__end - 1), *__pivot_pos))
129 || (!__comp(*__pivot_pos, *(__end - 1))
130 && !__comp(*__begin, *__pivot_pos))
131 || (!__comp(*__pivot_pos, *(__end - 1))
132 && !__comp(*(__begin + __n / 2),
133 *__pivot_pos)));
134#endif
135
136 // Swap pivot value to end.
137 if (__pivot_pos != (__end - 1))
138 std::iter_swap(__pivot_pos, __end - 1);
139 __pivot_pos = __end - 1;
140
142 __pred(__comp, *__pivot_pos);
143
144 // Divide, returning __end - __begin - 1 in the worst case.
145 _DifferenceType __split_pos = __parallel_partition(__begin, __end - 1,
146 __pred,
147 __num_threads);
148
149 // Swap back pivot to middle.
150 std::iter_swap(__begin + __split_pos, __pivot_pos);
151 __pivot_pos = __begin + __split_pos;
152
153#if _GLIBCXX_PARALLEL_ASSERTIONS
154 _RAIter __r;
155 for (__r = __begin; __r != __pivot_pos; ++__r)
156 _GLIBCXX_PARALLEL_ASSERT(__comp(*__r, *__pivot_pos));
157 for (; __r != __end; ++__r)
158 _GLIBCXX_PARALLEL_ASSERT(!__comp(*__r, *__pivot_pos));
159#endif
160
161 return __split_pos;
162 }
163
164 /** @brief Quicksort conquer step.
165 * @param __tls Array of thread-local storages.
166 * @param __begin Begin iterator of subsequence.
167 * @param __end End iterator of subsequence.
168 * @param __comp Comparator.
169 * @param __iam Number of the thread processing this function.
170 * @param __num_threads
171 * Number of threads that are allowed to work on this part. */
172 template<typename _RAIter, typename _Compare>
173 void
175 _RAIter __begin, _RAIter __end,
176 _Compare __comp,
177 _ThreadIndex __iam, _ThreadIndex __num_threads,
178 bool __parent_wait)
179 {
180 typedef std::iterator_traits<_RAIter> _TraitsType;
181 typedef typename _TraitsType::value_type _ValueType;
182 typedef typename _TraitsType::difference_type _DifferenceType;
183
184 _DifferenceType __n = __end - __begin;
185
186 if (__num_threads <= 1 || __n <= 1)
187 {
188 __tls[__iam]->_M_initial.first = __begin;
189 __tls[__iam]->_M_initial.second = __end;
190
191 __qsb_local_sort_with_helping(__tls, __comp, __iam, __parent_wait);
192
193 return;
194 }
195
196 // Divide step.
197 _DifferenceType __split_pos =
198 __qsb_divide(__begin, __end, __comp, __num_threads);
199
200#if _GLIBCXX_PARALLEL_ASSERTIONS
201 _GLIBCXX_PARALLEL_ASSERT(0 <= __split_pos &&
202 __split_pos < (__end - __begin));
203#endif
204
206 __num_threads_leftside = std::max<_ThreadIndex>
207 (1, std::min<_ThreadIndex>(__num_threads - 1, __split_pos
208 * __num_threads / __n));
209
210# pragma omp atomic
211 *__tls[__iam]->_M_elements_leftover -= (_DifferenceType)1;
212
213 // Conquer step.
214# pragma omp parallel num_threads(2)
215 {
216 bool __wait;
217 if(omp_get_num_threads() < 2)
218 __wait = false;
219 else
220 __wait = __parent_wait;
221
222# pragma omp sections
223 {
224# pragma omp section
225 {
226 __qsb_conquer(__tls, __begin, __begin + __split_pos, __comp,
227 __iam, __num_threads_leftside, __wait);
228 __wait = __parent_wait;
229 }
230 // The pivot_pos is left in place, to ensure termination.
231# pragma omp section
232 {
233 __qsb_conquer(__tls, __begin + __split_pos + 1, __end, __comp,
234 __iam + __num_threads_leftside,
235 __num_threads - __num_threads_leftside, __wait);
236 __wait = __parent_wait;
237 }
238 }
239 }
240 }
241
242 /**
243 * @brief Quicksort step doing load-balanced local sort.
244 * @param __tls Array of thread-local storages.
245 * @param __comp Comparator.
246 * @param __iam Number of the thread processing this function.
247 */
248 template<typename _RAIter, typename _Compare>
249 void
251 _Compare& __comp, _ThreadIndex __iam,
252 bool __wait)
253 {
254 typedef std::iterator_traits<_RAIter> _TraitsType;
255 typedef typename _TraitsType::value_type _ValueType;
256 typedef typename _TraitsType::difference_type _DifferenceType;
258
259 _QSBThreadLocal<_RAIter>& __tl = *__tls[__iam];
260
261 _DifferenceType
263 if (__base_case_n < 2)
264 __base_case_n = 2;
265 _ThreadIndex __num_threads = __tl._M_num_threads;
266
267 // Every thread has its own random number generator.
268 _RandomNumber __rng(__iam + 1);
269
270 _Piece __current = __tl._M_initial;
271
272 _DifferenceType __elements_done = 0;
273#if _GLIBCXX_PARALLEL_ASSERTIONS
274 _DifferenceType __total_elements_done = 0;
275#endif
276
277 for (;;)
278 {
279 // Invariant: __current must be a valid (maybe empty) range.
280 _RAIter __begin = __current.first, __end = __current.second;
281 _DifferenceType __n = __end - __begin;
282
283 if (__n > __base_case_n)
284 {
285 // Divide.
286 _RAIter __pivot_pos = __begin + __rng(__n);
287
288 // Swap __pivot_pos value to end.
289 if (__pivot_pos != (__end - 1))
290 std::iter_swap(__pivot_pos, __end - 1);
291 __pivot_pos = __end - 1;
292
294 <_Compare, _ValueType, _ValueType, bool>
295 __pred(__comp, *__pivot_pos);
296
297 // Divide, leave pivot unchanged in last place.
298 _RAIter __split_pos1, __split_pos2;
299 __split_pos1 = __gnu_sequential::partition(__begin, __end - 1,
300 __pred);
301
302 // Left side: < __pivot_pos; __right side: >= __pivot_pos.
303#if _GLIBCXX_PARALLEL_ASSERTIONS
304 _GLIBCXX_PARALLEL_ASSERT(__begin <= __split_pos1
305 && __split_pos1 < __end);
306#endif
307 // Swap pivot back to middle.
308 if (__split_pos1 != __pivot_pos)
309 std::iter_swap(__split_pos1, __pivot_pos);
310 __pivot_pos = __split_pos1;
311
312 // In case all elements are equal, __split_pos1 == 0.
313 if ((__split_pos1 + 1 - __begin) < (__n >> 7)
314 || (__end - __split_pos1) < (__n >> 7))
315 {
316 // Very unequal split, one part smaller than one 128th
317 // elements not strictly larger than the pivot.
319 <_Compare, _ValueType, _ValueType, bool>, _ValueType>
321 <_Compare, _ValueType, _ValueType, bool>
322 (__comp, *__pivot_pos));
323
324 // Find other end of pivot-equal range.
325 __split_pos2 = __gnu_sequential::partition(__split_pos1 + 1,
326 __end, __pred);
327 }
328 else
329 // Only skip the pivot.
330 __split_pos2 = __split_pos1 + 1;
331
332 // Elements equal to pivot are done.
333 __elements_done += (__split_pos2 - __split_pos1);
334#if _GLIBCXX_PARALLEL_ASSERTIONS
335 __total_elements_done += (__split_pos2 - __split_pos1);
336#endif
337 // Always push larger part onto stack.
338 if (((__split_pos1 + 1) - __begin) < (__end - (__split_pos2)))
339 {
340 // Right side larger.
341 if ((__split_pos2) != __end)
342 __tl._M_leftover_parts.push_front
343 (std::make_pair(__split_pos2, __end));
344
345 //__current.first = __begin; //already set anyway
346 __current.second = __split_pos1;
347 continue;
348 }
349 else
350 {
351 // Left side larger.
352 if (__begin != __split_pos1)
353 __tl._M_leftover_parts.push_front(std::make_pair
354 (__begin, __split_pos1));
355
356 __current.first = __split_pos2;
357 //__current.second = __end; //already set anyway
358 continue;
359 }
360 }
361 else
362 {
363 __gnu_sequential::sort(__begin, __end, __comp);
364 __elements_done += __n;
365#if _GLIBCXX_PARALLEL_ASSERTIONS
366 __total_elements_done += __n;
367#endif
368
369 // Prefer own stack, small pieces.
370 if (__tl._M_leftover_parts.pop_front(__current))
371 continue;
372
373# pragma omp atomic
374 *__tl._M_elements_leftover -= __elements_done;
375
376 __elements_done = 0;
377
378#if _GLIBCXX_PARALLEL_ASSERTIONS
379 double __search_start = omp_get_wtime();
380#endif
381
382 // Look for new work.
383 bool __successfully_stolen = false;
384 while (__wait && *__tl._M_elements_leftover > 0
385 && !__successfully_stolen
387 // Possible dead-lock.
388 && (omp_get_wtime() < (__search_start + 1.0))
389#endif
390 )
391 {
392 _ThreadIndex __victim;
393 __victim = __rng(__num_threads);
394
395 // Large pieces.
396 __successfully_stolen = (__victim != __iam)
397 && __tls[__victim]->_M_leftover_parts.pop_back(__current);
398 if (!__successfully_stolen)
399 __yield();
400#if !defined(__ICC) && !defined(__ECC)
401# pragma omp flush
402#endif
403 }
404
405#if _GLIBCXX_PARALLEL_ASSERTIONS
406 if (omp_get_wtime() >= (__search_start + 1.0))
407 {
408 sleep(1);
409 _GLIBCXX_PARALLEL_ASSERT(omp_get_wtime()
410 < (__search_start + 1.0));
411 }
412#endif
413 if (!__successfully_stolen)
414 {
415#if _GLIBCXX_PARALLEL_ASSERTIONS
416 _GLIBCXX_PARALLEL_ASSERT(*__tl._M_elements_leftover == 0);
417#endif
418 return;
419 }
420 }
421 }
422 }
423
424 /** @brief Top-level quicksort routine.
425 * @param __begin Begin iterator of sequence.
426 * @param __end End iterator of sequence.
427 * @param __comp Comparator.
428 * @param __num_threads Number of threads that are allowed to work on
429 * this part.
430 */
431 template<typename _RAIter, typename _Compare>
432 void
433 __parallel_sort_qsb(_RAIter __begin, _RAIter __end,
434 _Compare __comp, _ThreadIndex __num_threads)
435 {
436 _GLIBCXX_CALL(__end - __begin)
437
438 typedef std::iterator_traits<_RAIter> _TraitsType;
439 typedef typename _TraitsType::value_type _ValueType;
440 typedef typename _TraitsType::difference_type _DifferenceType;
442
443 typedef _QSBThreadLocal<_RAIter> _TLSType;
444
445 _DifferenceType __n = __end - __begin;
446
447 if (__n <= 1)
448 return;
449
450 // At least one element per processor.
451 if (__num_threads > __n)
452 __num_threads = static_cast<_ThreadIndex>(__n);
453
454 // Initialize thread local storage
455 _TLSType** __tls = new _TLSType*[__num_threads];
456 _DifferenceType __queue_size = (__num_threads
457 * (_ThreadIndex)(__rd_log2(__n) + 1));
458 for (_ThreadIndex __t = 0; __t < __num_threads; ++__t)
459 __tls[__t] = new _QSBThreadLocal<_RAIter>(__queue_size);
460
461 // There can never be more than ceil(__rd_log2(__n)) ranges on the
462 // stack, because
463 // 1. Only one processor pushes onto the stack
464 // 2. The largest range has at most length __n
465 // 3. Each range is larger than half of the range remaining
466 volatile _DifferenceType __elements_leftover = __n;
467 for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
468 {
469 __tls[__i]->_M_elements_leftover = &__elements_leftover;
470 __tls[__i]->_M_num_threads = __num_threads;
471 __tls[__i]->_M_global = std::make_pair(__begin, __end);
472
473 // Just in case nothing is left to assign.
474 __tls[__i]->_M_initial = std::make_pair(__end, __end);
475 }
476
477 // Main recursion call.
478 __qsb_conquer(__tls, __begin, __begin + __n, __comp, 0,
479 __num_threads, true);
480
481#if _GLIBCXX_PARALLEL_ASSERTIONS
482 // All stack must be empty.
483 _Piece __dummy;
484 for (_ThreadIndex __i = 1; __i < __num_threads; ++__i)
485 _GLIBCXX_PARALLEL_ASSERT(
486 !__tls[__i]->_M_leftover_parts.pop_back(__dummy));
487#endif
488
489 for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
490 delete __tls[__i];
491 delete[] __tls;
492 }
493} // namespace __gnu_parallel
494
495#endif /* _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H */
#define _GLIBCXX_PARALLEL_ASSERTIONS
Switch on many _GLIBCXX_PARALLEL_ASSERTions in parallel code. Should be switched on only locally.
#define _GLIBCXX_CALL(__n)
Macro to produce log message when entering a function.
Lock-free double-ended queue. This file is a GNU parallel extension to the Standard C++ Library.
Random number generator based on the Mersenne twister. This file is a GNU parallel extension to the S...
Routines for checking the correctness of algorithm results. This file is a GNU parallel extension to ...
Parallel implementation of std::partition(), std::nth_element(), and std::partial_sort()....
Includes the original header files concerned with iterators except for stream iterators....
Runtime settings and tuning parameters, heuristics to decide whether to use parallelized algorithms.
GNU parallel code for public use.
uint16_t _ThreadIndex
Unsigned integer to index a thread number. The maximum thread number (for each processor) must fit in...
Definition: types.h:123
void __parallel_sort_qsb(_RAIter __begin, _RAIter __end, _Compare __comp, _ThreadIndex __num_threads)
Top-level quicksort routine.
_RAIter __median_of_three_iterators(_RAIter __a, _RAIter __b, _RAIter __c, _Compare __comp)
Compute the median of three referenced elements, according to __comp.
Definition: base.h:398
void __qsb_local_sort_with_helping(_QSBThreadLocal< _RAIter > **__tls, _Compare &__comp, _ThreadIndex __iam, bool __wait)
Quicksort step doing load-balanced local sort.
void __qsb_conquer(_QSBThreadLocal< _RAIter > **__tls, _RAIter __begin, _RAIter __end, _Compare __comp, _ThreadIndex __iam, _ThreadIndex __num_threads, bool __parent_wait)
Quicksort conquer step.
void __yield()
Yield control to another thread, without waiting for the end of the time slice.
std::iterator_traits< _RAIter >::difference_type __parallel_partition(_RAIter __begin, _RAIter __end, _Predicate __pred, _ThreadIndex __num_threads)
Parallel implementation of std::partition.
Definition: partition.h:56
_Size __rd_log2(_Size __n)
Calculates the rounded-down logarithm of __n for base 2.
Definition: base.h:102
std::iterator_traits< _RAIter >::difference_type __qsb_divide(_RAIter __begin, _RAIter __end, _Compare __comp, _ThreadIndex __num_threads)
Balanced quicksort divide step.
Traits class for iterators.
Information local to one thread in the parallel quicksort run.
_Piece _M_initial
Initial piece to work on.
volatile _DifferenceType * _M_elements_leftover
Pointer to a counter of elements left over to sort.
_ThreadIndex _M_num_threads
Number of threads involved in this algorithm.
_QSBThreadLocal(int __queue_size)
Constructor.
std::pair< _RAIter, _RAIter > _Piece
Continuous part of the sequence, described by an iterator pair.
_RestrictedBoundedConcurrentQueue< _Piece > _M_leftover_parts
Work-stealing queue.
_Piece _M_global
The complete sequence to sort.
Similar to std::unary_negate, but giving the argument types explicitly.
Definition: base.h:175
Similar to std::binder1st, but giving the argument types explicitly.
Definition: base.h:194
Similar to std::binder2nd, but giving the argument types explicitly.
Definition: base.h:222
Subsequence description.
Double-ended queue of bounded size, allowing lock-free atomic access. push_front() and pop_front() mu...
Definition: queue.h:53
Random number generator, based on the Mersenne twister.
Definition: random_number.h:43
_SequenceIndex sort_qsb_base_case_maximal_n
Maximal subsequence __length to switch to unbalanced __base case. Applies to std::sort with dynamical...
Definition: settings.h:240
static const _Settings & get()
Get the global settings.