1 | // class template regex -*- C++ -*-
|
---|
2 |
|
---|
3 | // Copyright (C) 2013-2021 Free Software Foundation, Inc.
|
---|
4 | //
|
---|
5 | // This file is part of the GNU ISO C++ Library. This library is free
|
---|
6 | // software; you can redistribute it and/or modify it under the
|
---|
7 | // terms of the GNU General Public License as published by the
|
---|
8 | // Free Software Foundation; either version 3, or (at your option)
|
---|
9 | // any later version.
|
---|
10 |
|
---|
11 | // This library is distributed in the hope that it will be useful,
|
---|
12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
14 | // GNU General Public License for more details.
|
---|
15 |
|
---|
16 | // Under Section 7 of GPL version 3, you are granted additional
|
---|
17 | // permissions described in the GCC Runtime Library Exception, version
|
---|
18 | // 3.1, as published by the Free Software Foundation.
|
---|
19 |
|
---|
20 | // You should have received a copy of the GNU General Public License and
|
---|
21 | // a copy of the GCC Runtime Library Exception along with this program;
|
---|
22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
---|
23 | // <http://www.gnu.org/licenses/>.
|
---|
24 |
|
---|
25 | /**
|
---|
26 | * @file bits/regex_scanner.h
|
---|
27 | * This is an internal header file, included by other library headers.
|
---|
28 | * Do not attempt to use it directly. @headername{regex}
|
---|
29 | */
|
---|
30 |
|
---|
31 | namespace std _GLIBCXX_VISIBILITY(default)
|
---|
32 | {
|
---|
33 | _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
---|
34 |
|
---|
35 | namespace __detail
|
---|
36 | {
|
---|
37 | /**
|
---|
38 | * @addtogroup regex-detail
|
---|
39 | * @{
|
---|
40 | */
|
---|
41 |
|
---|
42 | struct _ScannerBase
|
---|
43 | {
|
---|
44 | public:
|
---|
45 | /// Token types returned from the scanner.
|
---|
46 | enum _TokenT : unsigned
|
---|
47 | {
|
---|
48 | _S_token_anychar,
|
---|
49 | _S_token_ord_char,
|
---|
50 | _S_token_oct_num,
|
---|
51 | _S_token_hex_num,
|
---|
52 | _S_token_backref,
|
---|
53 | _S_token_subexpr_begin,
|
---|
54 | _S_token_subexpr_no_group_begin,
|
---|
55 | _S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n'
|
---|
56 | _S_token_subexpr_end,
|
---|
57 | _S_token_bracket_begin,
|
---|
58 | _S_token_bracket_neg_begin,
|
---|
59 | _S_token_bracket_end,
|
---|
60 | _S_token_interval_begin,
|
---|
61 | _S_token_interval_end,
|
---|
62 | _S_token_quoted_class,
|
---|
63 | _S_token_char_class_name,
|
---|
64 | _S_token_collsymbol,
|
---|
65 | _S_token_equiv_class_name,
|
---|
66 | _S_token_opt,
|
---|
67 | _S_token_or,
|
---|
68 | _S_token_closure0,
|
---|
69 | _S_token_closure1,
|
---|
70 | _S_token_line_begin,
|
---|
71 | _S_token_line_end,
|
---|
72 | _S_token_word_bound, // neg if _M_value[0] == 'n'
|
---|
73 | _S_token_comma,
|
---|
74 | _S_token_dup_count,
|
---|
75 | _S_token_eof,
|
---|
76 | _S_token_bracket_dash,
|
---|
77 | _S_token_unknown = -1u
|
---|
78 | };
|
---|
79 |
|
---|
80 | protected:
|
---|
81 | typedef regex_constants::syntax_option_type _FlagT;
|
---|
82 |
|
---|
83 | enum _StateT
|
---|
84 | {
|
---|
85 | _S_state_normal,
|
---|
86 | _S_state_in_brace,
|
---|
87 | _S_state_in_bracket,
|
---|
88 | };
|
---|
89 |
|
---|
90 | protected:
|
---|
91 | _ScannerBase(_FlagT __flags)
|
---|
92 | : _M_state(_S_state_normal),
|
---|
93 | _M_flags(__flags),
|
---|
94 | _M_escape_tbl(_M_is_ecma()
|
---|
95 | ? _M_ecma_escape_tbl
|
---|
96 | : _M_awk_escape_tbl),
|
---|
97 | _M_spec_char(_M_is_ecma()
|
---|
98 | ? _M_ecma_spec_char
|
---|
99 | : _M_flags & regex_constants::basic
|
---|
100 | ? _M_basic_spec_char
|
---|
101 | : _M_flags & regex_constants::extended
|
---|
102 | ? _M_extended_spec_char
|
---|
103 | : _M_flags & regex_constants::grep
|
---|
104 | ? ".[\\*^$\n"
|
---|
105 | : _M_flags & regex_constants::egrep
|
---|
106 | ? ".[\\()*+?{|^$\n"
|
---|
107 | : _M_flags & regex_constants::awk
|
---|
108 | ? _M_extended_spec_char
|
---|
109 | : nullptr),
|
---|
110 | _M_at_bracket_start(false)
|
---|
111 | { __glibcxx_assert(_M_spec_char); }
|
---|
112 |
|
---|
113 | protected:
|
---|
114 | const char*
|
---|
115 | _M_find_escape(char __c)
|
---|
116 | {
|
---|
117 | auto __it = _M_escape_tbl;
|
---|
118 | for (; __it->first != '\0'; ++__it)
|
---|
119 | if (__it->first == __c)
|
---|
120 | return &__it->second;
|
---|
121 | return nullptr;
|
---|
122 | }
|
---|
123 |
|
---|
124 | bool
|
---|
125 | _M_is_ecma() const
|
---|
126 | { return _M_flags & regex_constants::ECMAScript; }
|
---|
127 |
|
---|
128 | bool
|
---|
129 | _M_is_basic() const
|
---|
130 | { return _M_flags & (regex_constants::basic | regex_constants::grep); }
|
---|
131 |
|
---|
132 | bool
|
---|
133 | _M_is_extended() const
|
---|
134 | {
|
---|
135 | return _M_flags & (regex_constants::extended
|
---|
136 | | regex_constants::egrep
|
---|
137 | | regex_constants::awk);
|
---|
138 | }
|
---|
139 |
|
---|
140 | bool
|
---|
141 | _M_is_grep() const
|
---|
142 | { return _M_flags & (regex_constants::grep | regex_constants::egrep); }
|
---|
143 |
|
---|
144 | bool
|
---|
145 | _M_is_awk() const
|
---|
146 | { return _M_flags & regex_constants::awk; }
|
---|
147 |
|
---|
148 | protected:
|
---|
149 | // TODO: Make them static in the next abi change.
|
---|
150 | const std::pair<char, _TokenT> _M_token_tbl[9] =
|
---|
151 | {
|
---|
152 | {'^', _S_token_line_begin},
|
---|
153 | {'$', _S_token_line_end},
|
---|
154 | {'.', _S_token_anychar},
|
---|
155 | {'*', _S_token_closure0},
|
---|
156 | {'+', _S_token_closure1},
|
---|
157 | {'?', _S_token_opt},
|
---|
158 | {'|', _S_token_or},
|
---|
159 | {'\n', _S_token_or}, // grep and egrep
|
---|
160 | {'\0', _S_token_or},
|
---|
161 | };
|
---|
162 | const std::pair<char, char> _M_ecma_escape_tbl[8] =
|
---|
163 | {
|
---|
164 | {'0', '\0'},
|
---|
165 | {'b', '\b'},
|
---|
166 | {'f', '\f'},
|
---|
167 | {'n', '\n'},
|
---|
168 | {'r', '\r'},
|
---|
169 | {'t', '\t'},
|
---|
170 | {'v', '\v'},
|
---|
171 | {'\0', '\0'},
|
---|
172 | };
|
---|
173 | const std::pair<char, char> _M_awk_escape_tbl[11] =
|
---|
174 | {
|
---|
175 | {'"', '"'},
|
---|
176 | {'/', '/'},
|
---|
177 | {'\\', '\\'},
|
---|
178 | {'a', '\a'},
|
---|
179 | {'b', '\b'},
|
---|
180 | {'f', '\f'},
|
---|
181 | {'n', '\n'},
|
---|
182 | {'r', '\r'},
|
---|
183 | {'t', '\t'},
|
---|
184 | {'v', '\v'},
|
---|
185 | {'\0', '\0'},
|
---|
186 | };
|
---|
187 | const char* _M_ecma_spec_char = "^$\\.*+?()[]{}|";
|
---|
188 | const char* _M_basic_spec_char = ".[\\*^$";
|
---|
189 | const char* _M_extended_spec_char = ".[\\()*+?{|^$";
|
---|
190 |
|
---|
191 | _StateT _M_state;
|
---|
192 | _FlagT _M_flags;
|
---|
193 | _TokenT _M_token;
|
---|
194 | const std::pair<char, char>* _M_escape_tbl;
|
---|
195 | const char* _M_spec_char;
|
---|
196 | bool _M_at_bracket_start;
|
---|
197 | };
|
---|
198 |
|
---|
199 | /**
|
---|
200 | * @brief Scans an input range for regex tokens.
|
---|
201 | *
|
---|
202 | * The %_Scanner class interprets the regular expression pattern in
|
---|
203 | * the input range passed to its constructor as a sequence of parse
|
---|
204 | * tokens passed to the regular expression compiler. The sequence
|
---|
205 | * of tokens provided depends on the flag settings passed to the
|
---|
206 | * constructor: different regular expression grammars will interpret
|
---|
207 | * the same input pattern in syntactically different ways.
|
---|
208 | */
|
---|
209 | template<typename _CharT>
|
---|
210 | class _Scanner
|
---|
211 | : public _ScannerBase
|
---|
212 | {
|
---|
213 | public:
|
---|
214 | typedef const _CharT* _IterT;
|
---|
215 | typedef std::basic_string<_CharT> _StringT;
|
---|
216 | typedef regex_constants::syntax_option_type _FlagT;
|
---|
217 | typedef const std::ctype<_CharT> _CtypeT;
|
---|
218 |
|
---|
219 | _Scanner(_IterT __begin, _IterT __end,
|
---|
220 | _FlagT __flags, std::locale __loc);
|
---|
221 |
|
---|
222 | void
|
---|
223 | _M_advance();
|
---|
224 |
|
---|
225 | _TokenT
|
---|
226 | _M_get_token() const
|
---|
227 | { return _M_token; }
|
---|
228 |
|
---|
229 | const _StringT&
|
---|
230 | _M_get_value() const
|
---|
231 | { return _M_value; }
|
---|
232 |
|
---|
233 | #ifdef _GLIBCXX_DEBUG
|
---|
234 | std::ostream&
|
---|
235 | _M_print(std::ostream&);
|
---|
236 | #endif
|
---|
237 |
|
---|
238 | private:
|
---|
239 | void
|
---|
240 | _M_scan_normal();
|
---|
241 |
|
---|
242 | void
|
---|
243 | _M_scan_in_bracket();
|
---|
244 |
|
---|
245 | void
|
---|
246 | _M_scan_in_brace();
|
---|
247 |
|
---|
248 | void
|
---|
249 | _M_eat_escape_ecma();
|
---|
250 |
|
---|
251 | void
|
---|
252 | _M_eat_escape_posix();
|
---|
253 |
|
---|
254 | void
|
---|
255 | _M_eat_escape_awk();
|
---|
256 |
|
---|
257 | void
|
---|
258 | _M_eat_class(char);
|
---|
259 |
|
---|
260 | _IterT _M_current;
|
---|
261 | _IterT _M_end;
|
---|
262 | _CtypeT& _M_ctype;
|
---|
263 | _StringT _M_value;
|
---|
264 | void (_Scanner::* _M_eat_escape)();
|
---|
265 | };
|
---|
266 |
|
---|
267 | ///@} regex-detail
|
---|
268 | } // namespace __detail
|
---|
269 | _GLIBCXX_END_NAMESPACE_VERSION
|
---|
270 | } // namespace std
|
---|
271 |
|
---|
272 | #include <bits/regex_scanner.tcc>
|
---|