1 | // class template regex -*- C++ -*-
|
---|
2 |
|
---|
3 | // Copyright (C) 2013-2021 Free Software Foundation, Inc.
|
---|
4 | //
|
---|
5 | // This file is part of the GNU ISO C++ Library. This library is free
|
---|
6 | // software; you can redistribute it and/or modify it under the
|
---|
7 | // terms of the GNU General Public License as published by the
|
---|
8 | // Free Software Foundation; either version 3, or (at your option)
|
---|
9 | // any later version.
|
---|
10 |
|
---|
11 | // This library is distributed in the hope that it will be useful,
|
---|
12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
14 | // GNU General Public License for more details.
|
---|
15 |
|
---|
16 | // Under Section 7 of GPL version 3, you are granted additional
|
---|
17 | // permissions described in the GCC Runtime Library Exception, version
|
---|
18 | // 3.1, as published by the Free Software Foundation.
|
---|
19 |
|
---|
20 | // You should have received a copy of the GNU General Public License and
|
---|
21 | // a copy of the GCC Runtime Library Exception along with this program;
|
---|
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
---|
23 | // <http://www.gnu.org/licenses/>.
|
---|
24 |
|
---|
25 | /**
|
---|
26 | * @file bits/regex_executor.h
|
---|
27 | * This is an internal header file, included by other library headers.
|
---|
28 | * Do not attempt to use it directly. @headername{regex}
|
---|
29 | */
|
---|
30 |
|
---|
31 | // FIXME convert comments to doxygen format.
|
---|
32 |
|
---|
33 | namespace std _GLIBCXX_VISIBILITY(default)
|
---|
34 | {
|
---|
35 | _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
---|
36 |
|
---|
37 | namespace __detail
|
---|
38 | {
|
---|
39 | /**
|
---|
40 | * @addtogroup regex-detail
|
---|
41 | * @{
|
---|
42 | */
|
---|
43 |
|
---|
44 | /**
|
---|
45 | * @brief Takes a regex and an input string and does the matching.
|
---|
46 | *
|
---|
47 | * The %_Executor class has two modes: DFS mode and BFS mode, controlled
|
---|
48 | * by the template parameter %__dfs_mode.
|
---|
49 | */
|
---|
50 | template<typename _BiIter, typename _Alloc, typename _TraitsT,
|
---|
51 | bool __dfs_mode>
|
---|
52 | class _Executor
|
---|
53 | {
|
---|
54 | using __search_mode = integral_constant<bool, __dfs_mode>;
|
---|
55 | using __dfs = true_type;
|
---|
56 | using __bfs = false_type;
|
---|
57 |
|
---|
58 | enum class _Match_mode : unsigned char { _Exact, _Prefix };
|
---|
59 |
|
---|
60 | public:
|
---|
61 | typedef typename iterator_traits<_BiIter>::value_type _CharT;
|
---|
62 | typedef basic_regex<_CharT, _TraitsT> _RegexT;
|
---|
63 | typedef std::vector<sub_match<_BiIter>, _Alloc> _ResultsVec;
|
---|
64 | typedef regex_constants::match_flag_type _FlagT;
|
---|
65 | typedef typename _TraitsT::char_class_type _ClassT;
|
---|
66 | typedef _NFA<_TraitsT> _NFAT;
|
---|
67 |
|
---|
68 | public:
|
---|
69 | _Executor(_BiIter __begin,
|
---|
70 | _BiIter __end,
|
---|
71 | _ResultsVec& __results,
|
---|
72 | const _RegexT& __re,
|
---|
73 | _FlagT __flags)
|
---|
74 | : _M_begin(__begin),
|
---|
75 | _M_end(__end),
|
---|
76 | _M_re(__re),
|
---|
77 | _M_nfa(*__re._M_automaton),
|
---|
78 | _M_results(__results),
|
---|
79 | _M_rep_count(_M_nfa.size()),
|
---|
80 | _M_states(_M_nfa._M_start(), _M_nfa.size()),
|
---|
81 | _M_flags((__flags & regex_constants::match_prev_avail)
|
---|
82 | ? (__flags
|
---|
83 | & ~regex_constants::match_not_bol
|
---|
84 | & ~regex_constants::match_not_bow)
|
---|
85 | : __flags)
|
---|
86 | { }
|
---|
87 |
|
---|
88 | // Set matched when string exactly matches the pattern.
|
---|
89 | bool
|
---|
90 | _M_match()
|
---|
91 | {
|
---|
92 | _M_current = _M_begin;
|
---|
93 | return _M_main(_Match_mode::_Exact);
|
---|
94 | }
|
---|
95 |
|
---|
96 | // Set matched when some prefix of the string matches the pattern.
|
---|
97 | bool
|
---|
98 | _M_search_from_first()
|
---|
99 | {
|
---|
100 | _M_current = _M_begin;
|
---|
101 | return _M_main(_Match_mode::_Prefix);
|
---|
102 | }
|
---|
103 |
|
---|
104 | bool
|
---|
105 | _M_search();
|
---|
106 |
|
---|
107 | private:
|
---|
108 | void
|
---|
109 | _M_rep_once_more(_Match_mode __match_mode, _StateIdT);
|
---|
110 |
|
---|
111 | void
|
---|
112 | _M_handle_repeat(_Match_mode, _StateIdT);
|
---|
113 |
|
---|
114 | void
|
---|
115 | _M_handle_subexpr_begin(_Match_mode, _StateIdT);
|
---|
116 |
|
---|
117 | void
|
---|
118 | _M_handle_subexpr_end(_Match_mode, _StateIdT);
|
---|
119 |
|
---|
120 | void
|
---|
121 | _M_handle_line_begin_assertion(_Match_mode, _StateIdT);
|
---|
122 |
|
---|
123 | void
|
---|
124 | _M_handle_line_end_assertion(_Match_mode, _StateIdT);
|
---|
125 |
|
---|
126 | void
|
---|
127 | _M_handle_word_boundary(_Match_mode, _StateIdT);
|
---|
128 |
|
---|
129 | void
|
---|
130 | _M_handle_subexpr_lookahead(_Match_mode, _StateIdT);
|
---|
131 |
|
---|
132 | void
|
---|
133 | _M_handle_match(_Match_mode, _StateIdT);
|
---|
134 |
|
---|
135 | void
|
---|
136 | _M_handle_backref(_Match_mode, _StateIdT);
|
---|
137 |
|
---|
138 | void
|
---|
139 | _M_handle_accept(_Match_mode, _StateIdT);
|
---|
140 |
|
---|
141 | void
|
---|
142 | _M_handle_alternative(_Match_mode, _StateIdT);
|
---|
143 |
|
---|
144 | void
|
---|
145 | _M_dfs(_Match_mode __match_mode, _StateIdT __start);
|
---|
146 |
|
---|
147 | bool
|
---|
148 | _M_main(_Match_mode __match_mode)
|
---|
149 | { return _M_main_dispatch(__match_mode, __search_mode{}); }
|
---|
150 |
|
---|
151 | bool
|
---|
152 | _M_main_dispatch(_Match_mode __match_mode, __dfs);
|
---|
153 |
|
---|
154 | bool
|
---|
155 | _M_main_dispatch(_Match_mode __match_mode, __bfs);
|
---|
156 |
|
---|
157 | bool
|
---|
158 | _M_is_word(_CharT __ch) const
|
---|
159 | {
|
---|
160 | static const _CharT __s[2] = { 'w' };
|
---|
161 | return _M_re._M_automaton->_M_traits.isctype
|
---|
162 | (__ch, _M_re._M_automaton->_M_traits.lookup_classname(__s, __s+1));
|
---|
163 | }
|
---|
164 |
|
---|
165 | bool
|
---|
166 | _M_at_begin() const
|
---|
167 | {
|
---|
168 | return _M_current == _M_begin
|
---|
169 | && !(_M_flags & (regex_constants::match_not_bol
|
---|
170 | | regex_constants::match_prev_avail));
|
---|
171 | }
|
---|
172 |
|
---|
173 | bool
|
---|
174 | _M_at_end() const
|
---|
175 | {
|
---|
176 | return _M_current == _M_end
|
---|
177 | && !(_M_flags & regex_constants::match_not_eol);
|
---|
178 | }
|
---|
179 |
|
---|
180 | bool
|
---|
181 | _M_word_boundary() const;
|
---|
182 |
|
---|
183 | bool
|
---|
184 | _M_lookahead(_StateIdT __next);
|
---|
185 |
|
---|
186 | // Holds additional information used in BFS-mode.
|
---|
187 | template<typename _SearchMode, typename _ResultsVec>
|
---|
188 | struct _State_info;
|
---|
189 |
|
---|
190 | template<typename _ResultsVec>
|
---|
191 | struct _State_info<__bfs, _ResultsVec>
|
---|
192 | {
|
---|
193 | explicit
|
---|
194 | _State_info(_StateIdT __start, size_t __n)
|
---|
195 | : _M_visited_states(new bool[__n]()), _M_start(__start)
|
---|
196 | { }
|
---|
197 |
|
---|
198 | bool _M_visited(_StateIdT __i)
|
---|
199 | {
|
---|
200 | if (_M_visited_states[__i])
|
---|
201 | return true;
|
---|
202 | _M_visited_states[__i] = true;
|
---|
203 | return false;
|
---|
204 | }
|
---|
205 |
|
---|
206 | void _M_queue(_StateIdT __i, const _ResultsVec& __res)
|
---|
207 | { _M_match_queue.emplace_back(__i, __res); }
|
---|
208 |
|
---|
209 | // Dummy implementations for BFS mode.
|
---|
210 | _BiIter* _M_get_sol_pos() { return nullptr; }
|
---|
211 |
|
---|
212 | // Saves states that need to be considered for the next character.
|
---|
213 | vector<pair<_StateIdT, _ResultsVec>> _M_match_queue;
|
---|
214 | // Indicates which states are already visited.
|
---|
215 | unique_ptr<bool[]> _M_visited_states;
|
---|
216 | // To record current solution.
|
---|
217 | _StateIdT _M_start;
|
---|
218 | };
|
---|
219 |
|
---|
220 | template<typename _ResultsVec>
|
---|
221 | struct _State_info<__dfs, _ResultsVec>
|
---|
222 | {
|
---|
223 | explicit
|
---|
224 | _State_info(_StateIdT __start, size_t) : _M_start(__start)
|
---|
225 | { }
|
---|
226 |
|
---|
227 | // Dummy implementations for DFS mode.
|
---|
228 | bool _M_visited(_StateIdT) const { return false; }
|
---|
229 | void _M_queue(_StateIdT, const _ResultsVec&) { }
|
---|
230 |
|
---|
231 | _BiIter* _M_get_sol_pos() { return &_M_sol_pos; }
|
---|
232 |
|
---|
233 | // To record current solution.
|
---|
234 | _StateIdT _M_start;
|
---|
235 | _BiIter _M_sol_pos;
|
---|
236 | };
|
---|
237 |
|
---|
238 | public:
|
---|
239 | _ResultsVec _M_cur_results;
|
---|
240 | _BiIter _M_current;
|
---|
241 | _BiIter _M_begin;
|
---|
242 | const _BiIter _M_end;
|
---|
243 | const _RegexT& _M_re;
|
---|
244 | const _NFAT& _M_nfa;
|
---|
245 | _ResultsVec& _M_results;
|
---|
246 | vector<pair<_BiIter, int>> _M_rep_count;
|
---|
247 | _State_info<__search_mode, _ResultsVec> _M_states;
|
---|
248 | _FlagT _M_flags;
|
---|
249 | // Do we have a solution so far?
|
---|
250 | bool _M_has_sol;
|
---|
251 | };
|
---|
252 |
|
---|
253 | ///@} regex-detail
|
---|
254 | } // namespace __detail
|
---|
255 | _GLIBCXX_END_NAMESPACE_VERSION
|
---|
256 | } // namespace std
|
---|
257 |
|
---|
258 | #include <bits/regex_executor.tcc>
|
---|