1 | // <barrier> -*- C++ -*-
|
---|
2 |
|
---|
3 | // Copyright (C) 2020-2021 Free Software Foundation, Inc.
|
---|
4 | //
|
---|
5 | // This file is part of the GNU ISO C++ Library. This library is free
|
---|
6 | // software; you can redistribute it and/or modify it under the
|
---|
7 | // terms of the GNU General Public License as published by the
|
---|
8 | // Free Software Foundation; either version 3, or (at your option)
|
---|
9 | // any later version.
|
---|
10 |
|
---|
11 | // This library is distributed in the hope that it will be useful,
|
---|
12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
14 | // GNU General Public License for more details.
|
---|
15 |
|
---|
16 | // You should have received a copy of the GNU General Public License along
|
---|
17 | // with this library; see the file COPYING3. If not see
|
---|
18 | // <http://www.gnu.org/licenses/>.
|
---|
19 |
|
---|
20 | // This implementation is based on libcxx/include/barrier
|
---|
21 | //===-- barrier.h --------------------------------------------------===//
|
---|
22 | //
|
---|
23 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
---|
24 | // See https://llvm.org/LICENSE.txt for license information.
|
---|
25 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
---|
26 | //
|
---|
27 | //===---------------------------------------------------------------===//
|
---|
28 |
|
---|
29 | /** @file include/barrier
|
---|
30 | * This is a Standard C++ Library header.
|
---|
31 | */
|
---|
32 |
|
---|
33 | #ifndef _GLIBCXX_BARRIER
|
---|
34 | #define _GLIBCXX_BARRIER 1
|
---|
35 |
|
---|
36 | #pragma GCC system_header
|
---|
37 |
|
---|
38 | #if __cplusplus > 201703L
|
---|
39 | #include <bits/atomic_base.h>
|
---|
40 | #if __cpp_lib_atomic_wait && __cpp_aligned_new
|
---|
41 | #include <bits/std_thread.h>
|
---|
42 | #include <bits/unique_ptr.h>
|
---|
43 |
|
---|
44 | #include <array>
|
---|
45 |
|
---|
46 | #define __cpp_lib_barrier 201907L
|
---|
47 |
|
---|
48 | namespace std _GLIBCXX_VISIBILITY(default)
|
---|
49 | {
|
---|
50 | _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
---|
51 |
|
---|
52 | struct __empty_completion
|
---|
53 | {
|
---|
54 | _GLIBCXX_ALWAYS_INLINE void
|
---|
55 | operator()() noexcept
|
---|
56 | { }
|
---|
57 | };
|
---|
58 |
|
---|
59 | /*
|
---|
60 |
|
---|
61 | The default implementation of __tree_barrier is a classic tree barrier.
|
---|
62 |
|
---|
63 | It looks different from literature pseudocode for two main reasons:
|
---|
64 | 1. Threads that call into std::barrier functions do not provide indices,
|
---|
65 | so a numbering step is added before the actual barrier algorithm,
|
---|
66 | appearing as an N+1 round to the N rounds of the tree barrier.
|
---|
67 | 2. A great deal of attention has been paid to avoid cache line thrashing
|
---|
68 | by flattening the tree structure into cache-line sized arrays, that
|
---|
69 | are indexed in an efficient way.
|
---|
70 |
|
---|
71 | */
|
---|
72 |
|
---|
73 | enum class __barrier_phase_t : unsigned char { };
|
---|
74 |
|
---|
75 | template<typename _CompletionF>
|
---|
76 | class __tree_barrier
|
---|
77 | {
|
---|
78 | using __atomic_phase_ref_t = std::__atomic_ref<__barrier_phase_t>;
|
---|
79 | using __atomic_phase_const_ref_t = std::__atomic_ref<const __barrier_phase_t>;
|
---|
80 | static constexpr auto __phase_alignment =
|
---|
81 | __atomic_phase_ref_t::required_alignment;
|
---|
82 |
|
---|
83 | using __tickets_t = std::array<__barrier_phase_t, 64>;
|
---|
84 | struct alignas(64) /* naturally-align the heap state */ __state_t
|
---|
85 | {
|
---|
86 | alignas(__phase_alignment) __tickets_t __tickets;
|
---|
87 | };
|
---|
88 |
|
---|
89 | ptrdiff_t _M_expected;
|
---|
90 | unique_ptr<__state_t[]> _M_state;
|
---|
91 | __atomic_base<ptrdiff_t> _M_expected_adjustment;
|
---|
92 | _CompletionF _M_completion;
|
---|
93 |
|
---|
94 | alignas(__phase_alignment) __barrier_phase_t _M_phase;
|
---|
95 |
|
---|
96 | bool
|
---|
97 | _M_arrive(__barrier_phase_t __old_phase, size_t __current)
|
---|
98 | {
|
---|
99 | const auto __old_phase_val = static_cast<unsigned char>(__old_phase);
|
---|
100 | const auto __half_step =
|
---|
101 | static_cast<__barrier_phase_t>(__old_phase_val + 1);
|
---|
102 | const auto __full_step =
|
---|
103 | static_cast<__barrier_phase_t>(__old_phase_val + 2);
|
---|
104 |
|
---|
105 | size_t __current_expected = _M_expected;
|
---|
106 | __current %= ((_M_expected + 1) >> 1);
|
---|
107 |
|
---|
108 | for (int __round = 0; ; ++__round)
|
---|
109 | {
|
---|
110 | if (__current_expected <= 1)
|
---|
111 | return true;
|
---|
112 | size_t const __end_node = ((__current_expected + 1) >> 1),
|
---|
113 | __last_node = __end_node - 1;
|
---|
114 | for ( ; ; ++__current)
|
---|
115 | {
|
---|
116 | if (__current == __end_node)
|
---|
117 | __current = 0;
|
---|
118 | auto __expect = __old_phase;
|
---|
119 | __atomic_phase_ref_t __phase(_M_state[__current]
|
---|
120 | .__tickets[__round]);
|
---|
121 | if (__current == __last_node && (__current_expected & 1))
|
---|
122 | {
|
---|
123 | if (__phase.compare_exchange_strong(__expect, __full_step,
|
---|
124 | memory_order_acq_rel))
|
---|
125 | break; // I'm 1 in 1, go to next __round
|
---|
126 | }
|
---|
127 | else if (__phase.compare_exchange_strong(__expect, __half_step,
|
---|
128 | memory_order_acq_rel))
|
---|
129 | {
|
---|
130 | return false; // I'm 1 in 2, done with arrival
|
---|
131 | }
|
---|
132 | else if (__expect == __half_step)
|
---|
133 | {
|
---|
134 | if (__phase.compare_exchange_strong(__expect, __full_step,
|
---|
135 | memory_order_acq_rel))
|
---|
136 | break; // I'm 2 in 2, go to next __round
|
---|
137 | }
|
---|
138 | }
|
---|
139 | __current_expected = __last_node + 1;
|
---|
140 | __current >>= 1;
|
---|
141 | }
|
---|
142 | }
|
---|
143 |
|
---|
144 | public:
|
---|
145 | using arrival_token = __barrier_phase_t;
|
---|
146 |
|
---|
147 | static constexpr ptrdiff_t
|
---|
148 | max() noexcept
|
---|
149 | { return __PTRDIFF_MAX__; }
|
---|
150 |
|
---|
151 | __tree_barrier(ptrdiff_t __expected, _CompletionF __completion)
|
---|
152 | : _M_expected(__expected), _M_expected_adjustment(0),
|
---|
153 | _M_completion(move(__completion)),
|
---|
154 | _M_phase(static_cast<__barrier_phase_t>(0))
|
---|
155 | {
|
---|
156 | size_t const __count = (_M_expected + 1) >> 1;
|
---|
157 |
|
---|
158 | _M_state = std::make_unique<__state_t[]>(__count);
|
---|
159 | }
|
---|
160 |
|
---|
161 | [[nodiscard]] arrival_token
|
---|
162 | arrive(ptrdiff_t __update)
|
---|
163 | {
|
---|
164 | std::hash<std::thread::id> __hasher;
|
---|
165 | size_t __current = __hasher(std::this_thread::get_id());
|
---|
166 | __atomic_phase_ref_t __phase(_M_phase);
|
---|
167 | const auto __old_phase = __phase.load(memory_order_relaxed);
|
---|
168 | const auto __cur = static_cast<unsigned char>(__old_phase);
|
---|
169 | for(; __update; --__update)
|
---|
170 | {
|
---|
171 | if(_M_arrive(__old_phase, __current))
|
---|
172 | {
|
---|
173 | _M_completion();
|
---|
174 | _M_expected += _M_expected_adjustment.load(memory_order_relaxed);
|
---|
175 | _M_expected_adjustment.store(0, memory_order_relaxed);
|
---|
176 | auto __new_phase = static_cast<__barrier_phase_t>(__cur + 2);
|
---|
177 | __phase.store(__new_phase, memory_order_release);
|
---|
178 | __phase.notify_all();
|
---|
179 | }
|
---|
180 | }
|
---|
181 | return __old_phase;
|
---|
182 | }
|
---|
183 |
|
---|
184 | void
|
---|
185 | wait(arrival_token&& __old_phase) const
|
---|
186 | {
|
---|
187 | __atomic_phase_const_ref_t __phase(_M_phase);
|
---|
188 | auto const __test_fn = [=]
|
---|
189 | {
|
---|
190 | return __phase.load(memory_order_acquire) != __old_phase;
|
---|
191 | };
|
---|
192 | std::__atomic_wait_address(&_M_phase, __test_fn);
|
---|
193 | }
|
---|
194 |
|
---|
195 | void
|
---|
196 | arrive_and_drop()
|
---|
197 | {
|
---|
198 | _M_expected_adjustment.fetch_sub(1, memory_order_relaxed);
|
---|
199 | (void)arrive(1);
|
---|
200 | }
|
---|
201 | };
|
---|
202 |
|
---|
203 | template<typename _CompletionF = __empty_completion>
|
---|
204 | class barrier
|
---|
205 | {
|
---|
206 | // Note, we may introduce a "central" barrier algorithm at some point
|
---|
207 | // for more space constrained targets
|
---|
208 | using __algorithm_t = __tree_barrier<_CompletionF>;
|
---|
209 | __algorithm_t _M_b;
|
---|
210 |
|
---|
211 | public:
|
---|
212 | class arrival_token final
|
---|
213 | {
|
---|
214 | public:
|
---|
215 | arrival_token(arrival_token&&) = default;
|
---|
216 | arrival_token& operator=(arrival_token&&) = default;
|
---|
217 | ~arrival_token() = default;
|
---|
218 |
|
---|
219 | private:
|
---|
220 | friend class barrier;
|
---|
221 | using __token = typename __algorithm_t::arrival_token;
|
---|
222 | explicit arrival_token(__token __tok) noexcept : _M_tok(__tok) { }
|
---|
223 | __token _M_tok;
|
---|
224 | };
|
---|
225 |
|
---|
226 | static constexpr ptrdiff_t
|
---|
227 | max() noexcept
|
---|
228 | { return __algorithm_t::max(); }
|
---|
229 |
|
---|
230 | explicit
|
---|
231 | barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
|
---|
232 | : _M_b(__count, std::move(__completion))
|
---|
233 | { }
|
---|
234 |
|
---|
235 | barrier(barrier const&) = delete;
|
---|
236 | barrier& operator=(barrier const&) = delete;
|
---|
237 |
|
---|
238 | [[nodiscard]] arrival_token
|
---|
239 | arrive(ptrdiff_t __update = 1)
|
---|
240 | { return arrival_token{_M_b.arrive(__update)}; }
|
---|
241 |
|
---|
242 | void
|
---|
243 | wait(arrival_token&& __phase) const
|
---|
244 | { _M_b.wait(std::move(__phase._M_tok)); }
|
---|
245 |
|
---|
246 | void
|
---|
247 | arrive_and_wait()
|
---|
248 | { wait(arrive()); }
|
---|
249 |
|
---|
250 | void
|
---|
251 | arrive_and_drop()
|
---|
252 | { _M_b.arrive_and_drop(); }
|
---|
253 | };
|
---|
254 |
|
---|
255 | _GLIBCXX_END_NAMESPACE_VERSION
|
---|
256 | } // namespace
|
---|
257 | #endif // __cpp_lib_atomic_wait && __cpp_aligned_new
|
---|
258 | #endif // __cplusplus > 201703L
|
---|
259 | #endif // _GLIBCXX_BARRIER
|
---|