[1166] | 1 | // class template regex -*- C++ -*-
|
---|
| 2 |
|
---|
| 3 | // Copyright (C) 2013-2021 Free Software Foundation, Inc.
|
---|
| 4 | //
|
---|
| 5 | // This file is part of the GNU ISO C++ Library. This library is free
|
---|
| 6 | // software; you can redistribute it and/or modify it under the
|
---|
| 7 | // terms of the GNU General Public License as published by the
|
---|
| 8 | // Free Software Foundation; either version 3, or (at your option)
|
---|
| 9 | // any later version.
|
---|
| 10 |
|
---|
| 11 | // This library is distributed in the hope that it will be useful,
|
---|
| 12 | // but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 13 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 14 | // GNU General Public License for more details.
|
---|
| 15 |
|
---|
| 16 | // Under Section 7 of GPL version 3, you are granted additional
|
---|
| 17 | // permissions described in the GCC Runtime Library Exception, version
|
---|
| 18 | // 3.1, as published by the Free Software Foundation.
|
---|
| 19 |
|
---|
| 20 | // You should have received a copy of the GNU General Public License and
|
---|
| 21 | // a copy of the GCC Runtime Library Exception along with this program;
|
---|
| 22 | // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
---|
| 23 | // <http://www.gnu.org/licenses/>.
|
---|
| 24 |
|
---|
| 25 | /**
|
---|
| 26 | * @file bits/regex_scanner.h
|
---|
| 27 | * This is an internal header file, included by other library headers.
|
---|
| 28 | * Do not attempt to use it directly. @headername{regex}
|
---|
| 29 | */
|
---|
| 30 |
|
---|
| 31 | namespace std _GLIBCXX_VISIBILITY(default)
|
---|
| 32 | {
|
---|
| 33 | _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
---|
| 34 |
|
---|
| 35 | namespace __detail
|
---|
| 36 | {
|
---|
| 37 | /**
|
---|
| 38 | * @addtogroup regex-detail
|
---|
| 39 | * @{
|
---|
| 40 | */
|
---|
| 41 |
|
---|
| 42 | struct _ScannerBase
|
---|
| 43 | {
|
---|
| 44 | public:
|
---|
| 45 | /// Token types returned from the scanner.
|
---|
| 46 | enum _TokenT : unsigned
|
---|
| 47 | {
|
---|
| 48 | _S_token_anychar,
|
---|
| 49 | _S_token_ord_char,
|
---|
| 50 | _S_token_oct_num,
|
---|
| 51 | _S_token_hex_num,
|
---|
| 52 | _S_token_backref,
|
---|
| 53 | _S_token_subexpr_begin,
|
---|
| 54 | _S_token_subexpr_no_group_begin,
|
---|
| 55 | _S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n'
|
---|
| 56 | _S_token_subexpr_end,
|
---|
| 57 | _S_token_bracket_begin,
|
---|
| 58 | _S_token_bracket_neg_begin,
|
---|
| 59 | _S_token_bracket_end,
|
---|
| 60 | _S_token_interval_begin,
|
---|
| 61 | _S_token_interval_end,
|
---|
| 62 | _S_token_quoted_class,
|
---|
| 63 | _S_token_char_class_name,
|
---|
| 64 | _S_token_collsymbol,
|
---|
| 65 | _S_token_equiv_class_name,
|
---|
| 66 | _S_token_opt,
|
---|
| 67 | _S_token_or,
|
---|
| 68 | _S_token_closure0,
|
---|
| 69 | _S_token_closure1,
|
---|
| 70 | _S_token_line_begin,
|
---|
| 71 | _S_token_line_end,
|
---|
| 72 | _S_token_word_bound, // neg if _M_value[0] == 'n'
|
---|
| 73 | _S_token_comma,
|
---|
| 74 | _S_token_dup_count,
|
---|
| 75 | _S_token_eof,
|
---|
| 76 | _S_token_bracket_dash,
|
---|
| 77 | _S_token_unknown = -1u
|
---|
| 78 | };
|
---|
| 79 |
|
---|
| 80 | protected:
|
---|
| 81 | typedef regex_constants::syntax_option_type _FlagT;
|
---|
| 82 |
|
---|
| 83 | enum _StateT
|
---|
| 84 | {
|
---|
| 85 | _S_state_normal,
|
---|
| 86 | _S_state_in_brace,
|
---|
| 87 | _S_state_in_bracket,
|
---|
| 88 | };
|
---|
| 89 |
|
---|
| 90 | protected:
|
---|
| 91 | _ScannerBase(_FlagT __flags)
|
---|
| 92 | : _M_state(_S_state_normal),
|
---|
| 93 | _M_flags(__flags),
|
---|
| 94 | _M_escape_tbl(_M_is_ecma()
|
---|
| 95 | ? _M_ecma_escape_tbl
|
---|
| 96 | : _M_awk_escape_tbl),
|
---|
| 97 | _M_spec_char(_M_is_ecma()
|
---|
| 98 | ? _M_ecma_spec_char
|
---|
| 99 | : _M_flags & regex_constants::basic
|
---|
| 100 | ? _M_basic_spec_char
|
---|
| 101 | : _M_flags & regex_constants::extended
|
---|
| 102 | ? _M_extended_spec_char
|
---|
| 103 | : _M_flags & regex_constants::grep
|
---|
| 104 | ? ".[\\*^$\n"
|
---|
| 105 | : _M_flags & regex_constants::egrep
|
---|
| 106 | ? ".[\\()*+?{|^$\n"
|
---|
| 107 | : _M_flags & regex_constants::awk
|
---|
| 108 | ? _M_extended_spec_char
|
---|
| 109 | : nullptr),
|
---|
| 110 | _M_at_bracket_start(false)
|
---|
| 111 | { __glibcxx_assert(_M_spec_char); }
|
---|
| 112 |
|
---|
| 113 | protected:
|
---|
| 114 | const char*
|
---|
| 115 | _M_find_escape(char __c)
|
---|
| 116 | {
|
---|
| 117 | auto __it = _M_escape_tbl;
|
---|
| 118 | for (; __it->first != '\0'; ++__it)
|
---|
| 119 | if (__it->first == __c)
|
---|
| 120 | return &__it->second;
|
---|
| 121 | return nullptr;
|
---|
| 122 | }
|
---|
| 123 |
|
---|
| 124 | bool
|
---|
| 125 | _M_is_ecma() const
|
---|
| 126 | { return _M_flags & regex_constants::ECMAScript; }
|
---|
| 127 |
|
---|
| 128 | bool
|
---|
| 129 | _M_is_basic() const
|
---|
| 130 | { return _M_flags & (regex_constants::basic | regex_constants::grep); }
|
---|
| 131 |
|
---|
| 132 | bool
|
---|
| 133 | _M_is_extended() const
|
---|
| 134 | {
|
---|
| 135 | return _M_flags & (regex_constants::extended
|
---|
| 136 | | regex_constants::egrep
|
---|
| 137 | | regex_constants::awk);
|
---|
| 138 | }
|
---|
| 139 |
|
---|
| 140 | bool
|
---|
| 141 | _M_is_grep() const
|
---|
| 142 | { return _M_flags & (regex_constants::grep | regex_constants::egrep); }
|
---|
| 143 |
|
---|
| 144 | bool
|
---|
| 145 | _M_is_awk() const
|
---|
| 146 | { return _M_flags & regex_constants::awk; }
|
---|
| 147 |
|
---|
| 148 | protected:
|
---|
| 149 | // TODO: Make them static in the next abi change.
|
---|
| 150 | const std::pair<char, _TokenT> _M_token_tbl[9] =
|
---|
| 151 | {
|
---|
| 152 | {'^', _S_token_line_begin},
|
---|
| 153 | {'$', _S_token_line_end},
|
---|
| 154 | {'.', _S_token_anychar},
|
---|
| 155 | {'*', _S_token_closure0},
|
---|
| 156 | {'+', _S_token_closure1},
|
---|
| 157 | {'?', _S_token_opt},
|
---|
| 158 | {'|', _S_token_or},
|
---|
| 159 | {'\n', _S_token_or}, // grep and egrep
|
---|
| 160 | {'\0', _S_token_or},
|
---|
| 161 | };
|
---|
| 162 | const std::pair<char, char> _M_ecma_escape_tbl[8] =
|
---|
| 163 | {
|
---|
| 164 | {'0', '\0'},
|
---|
| 165 | {'b', '\b'},
|
---|
| 166 | {'f', '\f'},
|
---|
| 167 | {'n', '\n'},
|
---|
| 168 | {'r', '\r'},
|
---|
| 169 | {'t', '\t'},
|
---|
| 170 | {'v', '\v'},
|
---|
| 171 | {'\0', '\0'},
|
---|
| 172 | };
|
---|
| 173 | const std::pair<char, char> _M_awk_escape_tbl[11] =
|
---|
| 174 | {
|
---|
| 175 | {'"', '"'},
|
---|
| 176 | {'/', '/'},
|
---|
| 177 | {'\\', '\\'},
|
---|
| 178 | {'a', '\a'},
|
---|
| 179 | {'b', '\b'},
|
---|
| 180 | {'f', '\f'},
|
---|
| 181 | {'n', '\n'},
|
---|
| 182 | {'r', '\r'},
|
---|
| 183 | {'t', '\t'},
|
---|
| 184 | {'v', '\v'},
|
---|
| 185 | {'\0', '\0'},
|
---|
| 186 | };
|
---|
| 187 | const char* _M_ecma_spec_char = "^$\\.*+?()[]{}|";
|
---|
| 188 | const char* _M_basic_spec_char = ".[\\*^$";
|
---|
| 189 | const char* _M_extended_spec_char = ".[\\()*+?{|^$";
|
---|
| 190 |
|
---|
| 191 | _StateT _M_state;
|
---|
| 192 | _FlagT _M_flags;
|
---|
| 193 | _TokenT _M_token;
|
---|
| 194 | const std::pair<char, char>* _M_escape_tbl;
|
---|
| 195 | const char* _M_spec_char;
|
---|
| 196 | bool _M_at_bracket_start;
|
---|
| 197 | };
|
---|
| 198 |
|
---|
| 199 | /**
|
---|
| 200 | * @brief Scans an input range for regex tokens.
|
---|
| 201 | *
|
---|
| 202 | * The %_Scanner class interprets the regular expression pattern in
|
---|
| 203 | * the input range passed to its constructor as a sequence of parse
|
---|
| 204 | * tokens passed to the regular expression compiler. The sequence
|
---|
| 205 | * of tokens provided depends on the flag settings passed to the
|
---|
| 206 | * constructor: different regular expression grammars will interpret
|
---|
| 207 | * the same input pattern in syntactically different ways.
|
---|
| 208 | */
|
---|
| 209 | template<typename _CharT>
|
---|
| 210 | class _Scanner
|
---|
| 211 | : public _ScannerBase
|
---|
| 212 | {
|
---|
| 213 | public:
|
---|
| 214 | typedef const _CharT* _IterT;
|
---|
| 215 | typedef std::basic_string<_CharT> _StringT;
|
---|
| 216 | typedef regex_constants::syntax_option_type _FlagT;
|
---|
| 217 | typedef const std::ctype<_CharT> _CtypeT;
|
---|
| 218 |
|
---|
| 219 | _Scanner(_IterT __begin, _IterT __end,
|
---|
| 220 | _FlagT __flags, std::locale __loc);
|
---|
| 221 |
|
---|
| 222 | void
|
---|
| 223 | _M_advance();
|
---|
| 224 |
|
---|
| 225 | _TokenT
|
---|
| 226 | _M_get_token() const
|
---|
| 227 | { return _M_token; }
|
---|
| 228 |
|
---|
| 229 | const _StringT&
|
---|
| 230 | _M_get_value() const
|
---|
| 231 | { return _M_value; }
|
---|
| 232 |
|
---|
| 233 | #ifdef _GLIBCXX_DEBUG
|
---|
| 234 | std::ostream&
|
---|
| 235 | _M_print(std::ostream&);
|
---|
| 236 | #endif
|
---|
| 237 |
|
---|
| 238 | private:
|
---|
| 239 | void
|
---|
| 240 | _M_scan_normal();
|
---|
| 241 |
|
---|
| 242 | void
|
---|
| 243 | _M_scan_in_bracket();
|
---|
| 244 |
|
---|
| 245 | void
|
---|
| 246 | _M_scan_in_brace();
|
---|
| 247 |
|
---|
| 248 | void
|
---|
| 249 | _M_eat_escape_ecma();
|
---|
| 250 |
|
---|
| 251 | void
|
---|
| 252 | _M_eat_escape_posix();
|
---|
| 253 |
|
---|
| 254 | void
|
---|
| 255 | _M_eat_escape_awk();
|
---|
| 256 |
|
---|
| 257 | void
|
---|
| 258 | _M_eat_class(char);
|
---|
| 259 |
|
---|
| 260 | _IterT _M_current;
|
---|
| 261 | _IterT _M_end;
|
---|
| 262 | _CtypeT& _M_ctype;
|
---|
| 263 | _StringT _M_value;
|
---|
| 264 | void (_Scanner::* _M_eat_escape)();
|
---|
| 265 | };
|
---|
| 266 |
|
---|
| 267 | ///@} regex-detail
|
---|
| 268 | } // namespace __detail
|
---|
| 269 | _GLIBCXX_END_NAMESPACE_VERSION
|
---|
| 270 | } // namespace std
|
---|
| 271 |
|
---|
| 272 | #include <bits/regex_scanner.tcc>
|
---|