[1166] | 1 | // <barrier> -*- C++ -*-
|
---|
| 2 |
|
---|
| 3 | // Copyright (C) 2020-2021 Free Software Foundation, Inc.
|
---|
| 4 | //
|
---|
| 5 | // This file is part of the GNU ISO C++ Library. This library is free
|
---|
| 6 | // software; you can redistribute it and/or modify it under the
|
---|
| 7 | // terms of the GNU General Public License as published by the
|
---|
| 8 | // Free Software Foundation; either version 3, or (at your option)
|
---|
| 9 | // any later version.
|
---|
| 10 |
|
---|
| 11 | // This library is distributed in the hope that it will be useful,
|
---|
| 12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 14 | // GNU General Public License for more details.
|
---|
| 15 |
|
---|
| 16 | // You should have received a copy of the GNU General Public License along
|
---|
| 17 | // with this library; see the file COPYING3. If not see
|
---|
| 18 | // <http://www.gnu.org/licenses/>.
|
---|
| 19 |
|
---|
| 20 | // This implementation is based on libcxx/include/barrier
|
---|
| 21 | //===-- barrier.h --------------------------------------------------===//
|
---|
| 22 | //
|
---|
| 23 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
---|
| 24 | // See https://llvm.org/LICENSE.txt for license information.
|
---|
| 25 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
---|
| 26 | //
|
---|
| 27 | //===---------------------------------------------------------------===//
|
---|
| 28 |
|
---|
| 29 | /** @file include/barrier
|
---|
| 30 | * This is a Standard C++ Library header.
|
---|
| 31 | */
|
---|
| 32 |
|
---|
| 33 | #ifndef _GLIBCXX_BARRIER
|
---|
| 34 | #define _GLIBCXX_BARRIER 1
|
---|
| 35 |
|
---|
| 36 | #pragma GCC system_header
|
---|
| 37 |
|
---|
| 38 | #if __cplusplus > 201703L
|
---|
| 39 | #include <bits/atomic_base.h>
|
---|
| 40 | #if __cpp_lib_atomic_wait && __cpp_aligned_new
|
---|
| 41 | #include <bits/std_thread.h>
|
---|
| 42 | #include <bits/unique_ptr.h>
|
---|
| 43 |
|
---|
| 44 | #include <array>
|
---|
| 45 |
|
---|
| 46 | #define __cpp_lib_barrier 201907L
|
---|
| 47 |
|
---|
| 48 | namespace std _GLIBCXX_VISIBILITY(default)
|
---|
| 49 | {
|
---|
| 50 | _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
---|
| 51 |
|
---|
| 52 | struct __empty_completion
|
---|
| 53 | {
|
---|
| 54 | _GLIBCXX_ALWAYS_INLINE void
|
---|
| 55 | operator()() noexcept
|
---|
| 56 | { }
|
---|
| 57 | };
|
---|
| 58 |
|
---|
| 59 | /*
|
---|
| 60 |
|
---|
| 61 | The default implementation of __tree_barrier is a classic tree barrier.
|
---|
| 62 |
|
---|
| 63 | It looks different from literature pseudocode for two main reasons:
|
---|
| 64 | 1. Threads that call into std::barrier functions do not provide indices,
|
---|
| 65 | so a numbering step is added before the actual barrier algorithm,
|
---|
| 66 | appearing as an N+1 round to the N rounds of the tree barrier.
|
---|
| 67 | 2. A great deal of attention has been paid to avoid cache line thrashing
|
---|
| 68 | by flattening the tree structure into cache-line sized arrays, that
|
---|
| 69 | are indexed in an efficient way.
|
---|
| 70 |
|
---|
| 71 | */
|
---|
| 72 |
|
---|
| 73 | enum class __barrier_phase_t : unsigned char { };
|
---|
| 74 |
|
---|
| 75 | template<typename _CompletionF>
|
---|
| 76 | class __tree_barrier
|
---|
| 77 | {
|
---|
| 78 | using __atomic_phase_ref_t = std::__atomic_ref<__barrier_phase_t>;
|
---|
| 79 | using __atomic_phase_const_ref_t = std::__atomic_ref<const __barrier_phase_t>;
|
---|
| 80 | static constexpr auto __phase_alignment =
|
---|
| 81 | __atomic_phase_ref_t::required_alignment;
|
---|
| 82 |
|
---|
| 83 | using __tickets_t = std::array<__barrier_phase_t, 64>;
|
---|
| 84 | struct alignas(64) /* naturally-align the heap state */ __state_t
|
---|
| 85 | {
|
---|
| 86 | alignas(__phase_alignment) __tickets_t __tickets;
|
---|
| 87 | };
|
---|
| 88 |
|
---|
| 89 | ptrdiff_t _M_expected;
|
---|
| 90 | unique_ptr<__state_t[]> _M_state;
|
---|
| 91 | __atomic_base<ptrdiff_t> _M_expected_adjustment;
|
---|
| 92 | _CompletionF _M_completion;
|
---|
| 93 |
|
---|
| 94 | alignas(__phase_alignment) __barrier_phase_t _M_phase;
|
---|
| 95 |
|
---|
| 96 | bool
|
---|
| 97 | _M_arrive(__barrier_phase_t __old_phase, size_t __current)
|
---|
| 98 | {
|
---|
| 99 | const auto __old_phase_val = static_cast<unsigned char>(__old_phase);
|
---|
| 100 | const auto __half_step =
|
---|
| 101 | static_cast<__barrier_phase_t>(__old_phase_val + 1);
|
---|
| 102 | const auto __full_step =
|
---|
| 103 | static_cast<__barrier_phase_t>(__old_phase_val + 2);
|
---|
| 104 |
|
---|
| 105 | size_t __current_expected = _M_expected;
|
---|
| 106 | __current %= ((_M_expected + 1) >> 1);
|
---|
| 107 |
|
---|
| 108 | for (int __round = 0; ; ++__round)
|
---|
| 109 | {
|
---|
| 110 | if (__current_expected <= 1)
|
---|
| 111 | return true;
|
---|
| 112 | size_t const __end_node = ((__current_expected + 1) >> 1),
|
---|
| 113 | __last_node = __end_node - 1;
|
---|
| 114 | for ( ; ; ++__current)
|
---|
| 115 | {
|
---|
| 116 | if (__current == __end_node)
|
---|
| 117 | __current = 0;
|
---|
| 118 | auto __expect = __old_phase;
|
---|
| 119 | __atomic_phase_ref_t __phase(_M_state[__current]
|
---|
| 120 | .__tickets[__round]);
|
---|
| 121 | if (__current == __last_node && (__current_expected & 1))
|
---|
| 122 | {
|
---|
| 123 | if (__phase.compare_exchange_strong(__expect, __full_step,
|
---|
| 124 | memory_order_acq_rel))
|
---|
| 125 | break; // I'm 1 in 1, go to next __round
|
---|
| 126 | }
|
---|
| 127 | else if (__phase.compare_exchange_strong(__expect, __half_step,
|
---|
| 128 | memory_order_acq_rel))
|
---|
| 129 | {
|
---|
| 130 | return false; // I'm 1 in 2, done with arrival
|
---|
| 131 | }
|
---|
| 132 | else if (__expect == __half_step)
|
---|
| 133 | {
|
---|
| 134 | if (__phase.compare_exchange_strong(__expect, __full_step,
|
---|
| 135 | memory_order_acq_rel))
|
---|
| 136 | break; // I'm 2 in 2, go to next __round
|
---|
| 137 | }
|
---|
| 138 | }
|
---|
| 139 | __current_expected = __last_node + 1;
|
---|
| 140 | __current >>= 1;
|
---|
| 141 | }
|
---|
| 142 | }
|
---|
| 143 |
|
---|
| 144 | public:
|
---|
| 145 | using arrival_token = __barrier_phase_t;
|
---|
| 146 |
|
---|
| 147 | static constexpr ptrdiff_t
|
---|
| 148 | max() noexcept
|
---|
| 149 | { return __PTRDIFF_MAX__; }
|
---|
| 150 |
|
---|
| 151 | __tree_barrier(ptrdiff_t __expected, _CompletionF __completion)
|
---|
| 152 | : _M_expected(__expected), _M_expected_adjustment(0),
|
---|
| 153 | _M_completion(move(__completion)),
|
---|
| 154 | _M_phase(static_cast<__barrier_phase_t>(0))
|
---|
| 155 | {
|
---|
| 156 | size_t const __count = (_M_expected + 1) >> 1;
|
---|
| 157 |
|
---|
| 158 | _M_state = std::make_unique<__state_t[]>(__count);
|
---|
| 159 | }
|
---|
| 160 |
|
---|
| 161 | [[nodiscard]] arrival_token
|
---|
| 162 | arrive(ptrdiff_t __update)
|
---|
| 163 | {
|
---|
| 164 | std::hash<std::thread::id> __hasher;
|
---|
| 165 | size_t __current = __hasher(std::this_thread::get_id());
|
---|
| 166 | __atomic_phase_ref_t __phase(_M_phase);
|
---|
| 167 | const auto __old_phase = __phase.load(memory_order_relaxed);
|
---|
| 168 | const auto __cur = static_cast<unsigned char>(__old_phase);
|
---|
| 169 | for(; __update; --__update)
|
---|
| 170 | {
|
---|
| 171 | if(_M_arrive(__old_phase, __current))
|
---|
| 172 | {
|
---|
| 173 | _M_completion();
|
---|
| 174 | _M_expected += _M_expected_adjustment.load(memory_order_relaxed);
|
---|
| 175 | _M_expected_adjustment.store(0, memory_order_relaxed);
|
---|
| 176 | auto __new_phase = static_cast<__barrier_phase_t>(__cur + 2);
|
---|
| 177 | __phase.store(__new_phase, memory_order_release);
|
---|
| 178 | __phase.notify_all();
|
---|
| 179 | }
|
---|
| 180 | }
|
---|
| 181 | return __old_phase;
|
---|
| 182 | }
|
---|
| 183 |
|
---|
| 184 | void
|
---|
| 185 | wait(arrival_token&& __old_phase) const
|
---|
| 186 | {
|
---|
| 187 | __atomic_phase_const_ref_t __phase(_M_phase);
|
---|
| 188 | auto const __test_fn = [=]
|
---|
| 189 | {
|
---|
| 190 | return __phase.load(memory_order_acquire) != __old_phase;
|
---|
| 191 | };
|
---|
| 192 | std::__atomic_wait_address(&_M_phase, __test_fn);
|
---|
| 193 | }
|
---|
| 194 |
|
---|
| 195 | void
|
---|
| 196 | arrive_and_drop()
|
---|
| 197 | {
|
---|
| 198 | _M_expected_adjustment.fetch_sub(1, memory_order_relaxed);
|
---|
| 199 | (void)arrive(1);
|
---|
| 200 | }
|
---|
| 201 | };
|
---|
| 202 |
|
---|
| 203 | template<typename _CompletionF = __empty_completion>
|
---|
| 204 | class barrier
|
---|
| 205 | {
|
---|
| 206 | // Note, we may introduce a "central" barrier algorithm at some point
|
---|
| 207 | // for more space constrained targets
|
---|
| 208 | using __algorithm_t = __tree_barrier<_CompletionF>;
|
---|
| 209 | __algorithm_t _M_b;
|
---|
| 210 |
|
---|
| 211 | public:
|
---|
| 212 | class arrival_token final
|
---|
| 213 | {
|
---|
| 214 | public:
|
---|
| 215 | arrival_token(arrival_token&&) = default;
|
---|
| 216 | arrival_token& operator=(arrival_token&&) = default;
|
---|
| 217 | ~arrival_token() = default;
|
---|
| 218 |
|
---|
| 219 | private:
|
---|
| 220 | friend class barrier;
|
---|
| 221 | using __token = typename __algorithm_t::arrival_token;
|
---|
| 222 | explicit arrival_token(__token __tok) noexcept : _M_tok(__tok) { }
|
---|
| 223 | __token _M_tok;
|
---|
| 224 | };
|
---|
| 225 |
|
---|
| 226 | static constexpr ptrdiff_t
|
---|
| 227 | max() noexcept
|
---|
| 228 | { return __algorithm_t::max(); }
|
---|
| 229 |
|
---|
| 230 | explicit
|
---|
| 231 | barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
|
---|
| 232 | : _M_b(__count, std::move(__completion))
|
---|
| 233 | { }
|
---|
| 234 |
|
---|
| 235 | barrier(barrier const&) = delete;
|
---|
| 236 | barrier& operator=(barrier const&) = delete;
|
---|
| 237 |
|
---|
| 238 | [[nodiscard]] arrival_token
|
---|
| 239 | arrive(ptrdiff_t __update = 1)
|
---|
| 240 | { return arrival_token{_M_b.arrive(__update)}; }
|
---|
| 241 |
|
---|
| 242 | void
|
---|
| 243 | wait(arrival_token&& __phase) const
|
---|
| 244 | { _M_b.wait(std::move(__phase._M_tok)); }
|
---|
| 245 |
|
---|
| 246 | void
|
---|
| 247 | arrive_and_wait()
|
---|
| 248 | { wait(arrive()); }
|
---|
| 249 |
|
---|
| 250 | void
|
---|
| 251 | arrive_and_drop()
|
---|
| 252 | { _M_b.arrive_and_drop(); }
|
---|
| 253 | };
|
---|
| 254 |
|
---|
| 255 | _GLIBCXX_END_NAMESPACE_VERSION
|
---|
| 256 | } // namespace
|
---|
| 257 | #endif // __cpp_lib_atomic_wait && __cpp_aligned_new
|
---|
| 258 | #endif // __cplusplus > 201703L
|
---|
| 259 | #endif // _GLIBCXX_BARRIER
|
---|