1// class template regex -*- C++ -*-
2
3// Copyright (C) 2013-2019 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25/**
26 * @file bits/regex.tcc
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
29 */
30
31namespace std _GLIBCXX_VISIBILITY(default)
32{
33_GLIBCXX_BEGIN_NAMESPACE_VERSION
34
35namespace __detail
36{
37 // Result of merging regex_match and regex_search.
38 //
39 // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
40 // the other one if possible, for test purpose).
41 //
42 // That __match_mode is true means regex_match, else regex_search.
43 template<typename _BiIter, typename _Alloc,
44 typename _CharT, typename _TraitsT,
45 _RegexExecutorPolicy __policy,
46 bool __match_mode>
47 bool
48 __regex_algo_impl(_BiIter __s,
49 _BiIter __e,
50 match_results<_BiIter, _Alloc>& __m,
51 const basic_regex<_CharT, _TraitsT>& __re,
52 regex_constants::match_flag_type __flags)
53 {
54 if (__re._M_automaton == nullptr)
55 return false;
56
57 typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
58 __m._M_begin = __s;
59 __m._M_resize(__re._M_automaton->_M_sub_count());
60 for (auto& __it : __res)
61 __it.matched = false;
62
63 bool __ret;
64 if ((__re.flags() & regex_constants::__polynomial)
65 || (__policy == _RegexExecutorPolicy::_S_alternate
66 && !__re._M_automaton->_M_has_backref))
67 {
68 _Executor<_BiIter, _Alloc, _TraitsT, false>
69 __executor(__s, __e, __m, __re, __flags);
70 if (__match_mode)
71 __ret = __executor._M_match();
72 else
73 __ret = __executor._M_search();
74 }
75 else
76 {
77 _Executor<_BiIter, _Alloc, _TraitsT, true>
78 __executor(__s, __e, __m, __re, __flags);
79 if (__match_mode)
80 __ret = __executor._M_match();
81 else
82 __ret = __executor._M_search();
83 }
84 if (__ret)
85 {
86 for (auto& __it : __res)
87 if (!__it.matched)
88 __it.first = __it.second = __e;
89 auto& __pre = __m._M_prefix();
90 auto& __suf = __m._M_suffix();
91 if (__match_mode)
92 {
93 __pre.matched = false;
94 __pre.first = __s;
95 __pre.second = __s;
96 __suf.matched = false;
97 __suf.first = __e;
98 __suf.second = __e;
99 }
100 else
101 {
102 __pre.first = __s;
103 __pre.second = __res[0].first;
104 __pre.matched = (__pre.first != __pre.second);
105 __suf.first = __res[0].second;
106 __suf.second = __e;
107 __suf.matched = (__suf.first != __suf.second);
108 }
109 }
110 else
111 {
112 __m._M_resize(0);
113 for (auto& __it : __res)
114 {
115 __it.matched = false;
116 __it.first = __it.second = __e;
117 }
118 }
119 return __ret;
120 }
121}
122
123 template<typename _Ch_type>
124 template<typename _Fwd_iter>
125 typename regex_traits<_Ch_type>::string_type
126 regex_traits<_Ch_type>::
127 lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
128 {
129 typedef std::ctype<char_type> __ctype_type;
130 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
131
132 static const char* __collatenames[] =
133 {
134 "NUL",
135 "SOH",
136 "STX",
137 "ETX",
138 "EOT",
139 "ENQ",
140 "ACK",
141 "alert",
142 "backspace",
143 "tab",
144 "newline",
145 "vertical-tab",
146 "form-feed",
147 "carriage-return",
148 "SO",
149 "SI",
150 "DLE",
151 "DC1",
152 "DC2",
153 "DC3",
154 "DC4",
155 "NAK",
156 "SYN",
157 "ETB",
158 "CAN",
159 "EM",
160 "SUB",
161 "ESC",
162 "IS4",
163 "IS3",
164 "IS2",
165 "IS1",
166 "space",
167 "exclamation-mark",
168 "quotation-mark",
169 "number-sign",
170 "dollar-sign",
171 "percent-sign",
172 "ampersand",
173 "apostrophe",
174 "left-parenthesis",
175 "right-parenthesis",
176 "asterisk",
177 "plus-sign",
178 "comma",
179 "hyphen",
180 "period",
181 "slash",
182 "zero",
183 "one",
184 "two",
185 "three",
186 "four",
187 "five",
188 "six",
189 "seven",
190 "eight",
191 "nine",
192 "colon",
193 "semicolon",
194 "less-than-sign",
195 "equals-sign",
196 "greater-than-sign",
197 "question-mark",
198 "commercial-at",
199 "A",
200 "B",
201 "C",
202 "D",
203 "E",
204 "F",
205 "G",
206 "H",
207 "I",
208 "J",
209 "K",
210 "L",
211 "M",
212 "N",
213 "O",
214 "P",
215 "Q",
216 "R",
217 "S",
218 "T",
219 "U",
220 "V",
221 "W",
222 "X",
223 "Y",
224 "Z",
225 "left-square-bracket",
226 "backslash",
227 "right-square-bracket",
228 "circumflex",
229 "underscore",
230 "grave-accent",
231 "a",
232 "b",
233 "c",
234 "d",
235 "e",
236 "f",
237 "g",
238 "h",
239 "i",
240 "j",
241 "k",
242 "l",
243 "m",
244 "n",
245 "o",
246 "p",
247 "q",
248 "r",
249 "s",
250 "t",
251 "u",
252 "v",
253 "w",
254 "x",
255 "y",
256 "z",
257 "left-curly-bracket",
258 "vertical-line",
259 "right-curly-bracket",
260 "tilde",
261 "DEL",
262 };
263
264 string __s;
265 for (; __first != __last; ++__first)
266 __s += __fctyp.narrow(*__first, 0);
267
268 for (const auto& __it : __collatenames)
269 if (__s == __it)
270 return string_type(1, __fctyp.widen(
271 static_cast<char>(&__it - __collatenames)));
272
273 // TODO Add digraph support:
274 // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
275
276 return string_type();
277 }
278
279 template<typename _Ch_type>
280 template<typename _Fwd_iter>
281 typename regex_traits<_Ch_type>::char_class_type
282 regex_traits<_Ch_type>::
283 lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
284 {
285 typedef std::ctype<char_type> __ctype_type;
286 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
287
288 // Mappings from class name to class mask.
289 static const pair<const char*, char_class_type> __classnames[] =
290 {
291 {"d", ctype_base::digit},
292 {"w", {ctype_base::alnum, _RegexMask::_S_under}},
293 {"s", ctype_base::space},
294 {"alnum", ctype_base::alnum},
295 {"alpha", ctype_base::alpha},
296 {"blank", ctype_base::blank},
297 {"cntrl", ctype_base::cntrl},
298 {"digit", ctype_base::digit},
299 {"graph", ctype_base::graph},
300 {"lower", ctype_base::lower},
301 {"print", ctype_base::print},
302 {"punct", ctype_base::punct},
303 {"space", ctype_base::space},
304 {"upper", ctype_base::upper},
305 {"xdigit", ctype_base::xdigit},
306 };
307
308 string __s;
309 for (; __first != __last; ++__first)
310 __s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
311
312 for (const auto& __it : __classnames)
313 if (__s == __it.first)
314 {
315 if (__icase
316 && ((__it.second
317 & (ctype_base::lower | ctype_base::upper)) != 0))
318 return ctype_base::alpha;
319 return __it.second;
320 }
321 return 0;
322 }
323
324 template<typename _Ch_type>
325 bool
326 regex_traits<_Ch_type>::
327 isctype(_Ch_type __c, char_class_type __f) const
328 {
329 typedef std::ctype<char_type> __ctype_type;
330 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
331
332 return __fctyp.is(__f._M_base, __c)
333 // [[:w:]]
334 || ((__f._M_extended & _RegexMask::_S_under)
335 && __c == __fctyp.widen('_'));
336 }
337
338 template<typename _Ch_type>
339 int
340 regex_traits<_Ch_type>::
341 value(_Ch_type __ch, int __radix) const
342 {
343 std::basic_istringstream<char_type> __is(string_type(1, __ch));
344 long __v;
345 if (__radix == 8)
346 __is >> std::oct;
347 else if (__radix == 16)
348 __is >> std::hex;
349 __is >> __v;
350 return __is.fail() ? -1 : __v;
351 }
352
353 template<typename _Bi_iter, typename _Alloc>
354 template<typename _Out_iter>
355 _Out_iter match_results<_Bi_iter, _Alloc>::
356 format(_Out_iter __out,
357 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
358 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
359 match_flag_type __flags) const
360 {
361 __glibcxx_assert( ready() );
362 regex_traits<char_type> __traits;
363 typedef std::ctype<char_type> __ctype_type;
364 const __ctype_type&
365 __fctyp(use_facet<__ctype_type>(__traits.getloc()));
366
367 auto __output = [&](size_t __idx)
368 {
369 auto& __sub = (*this)[__idx];
370 if (__sub.matched)
371 __out = std::copy(__sub.first, __sub.second, __out);
372 };
373
374 if (__flags & regex_constants::format_sed)
375 {
376 bool __escaping = false;
377 for (; __fmt_first != __fmt_last; __fmt_first++)
378 {
379 if (__escaping)
380 {
381 __escaping = false;
382 if (__fctyp.is(__ctype_type::digit, *__fmt_first))
383 __output(__traits.value(*__fmt_first, 10));
384 else
385 *__out++ = *__fmt_first;
386 continue;
387 }
388 if (*__fmt_first == '\\')
389 {
390 __escaping = true;
391 continue;
392 }
393 if (*__fmt_first == '&')
394 {
395 __output(0);
396 continue;
397 }
398 *__out++ = *__fmt_first;
399 }
400 if (__escaping)
401 *__out++ = '\\';
402 }
403 else
404 {
405 while (1)
406 {
407 auto __next = std::find(__fmt_first, __fmt_last, '$');
408 if (__next == __fmt_last)
409 break;
410
411 __out = std::copy(__fmt_first, __next, __out);
412
413 auto __eat = [&](char __ch) -> bool
414 {
415 if (*__next == __ch)
416 {
417 ++__next;
418 return true;
419 }
420 return false;
421 };
422
423 if (++__next == __fmt_last)
424 *__out++ = '$';
425 else if (__eat('$'))
426 *__out++ = '$';
427 else if (__eat('&'))
428 __output(0);
429 else if (__eat('`'))
430 {
431 auto& __sub = _M_prefix();
432 if (__sub.matched)
433 __out = std::copy(__sub.first, __sub.second, __out);
434 }
435 else if (__eat('\''))
436 {
437 auto& __sub = _M_suffix();
438 if (__sub.matched)
439 __out = std::copy(__sub.first, __sub.second, __out);
440 }
441 else if (__fctyp.is(__ctype_type::digit, *__next))
442 {
443 long __num = __traits.value(*__next, 10);
444 if (++__next != __fmt_last
445 && __fctyp.is(__ctype_type::digit, *__next))
446 {
447 __num *= 10;
448 __num += __traits.value(*__next++, 10);
449 }
450 if (0 <= __num && __num < this->size())
451 __output(__num);
452 }
453 else
454 *__out++ = '$';
455 __fmt_first = __next;
456 }
457 __out = std::copy(__fmt_first, __fmt_last, __out);
458 }
459 return __out;
460 }
461
462 template<typename _Out_iter, typename _Bi_iter,
463 typename _Rx_traits, typename _Ch_type>
464 _Out_iter
465 regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
466 const basic_regex<_Ch_type, _Rx_traits>& __e,
467 const _Ch_type* __fmt,
468 regex_constants::match_flag_type __flags)
469 {
470 typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
471 _IterT __i(__first, __last, __e, __flags);
472 _IterT __end;
473 if (__i == __end)
474 {
475 if (!(__flags & regex_constants::format_no_copy))
476 __out = std::copy(__first, __last, __out);
477 }
478 else
479 {
480 sub_match<_Bi_iter> __last;
481 auto __len = char_traits<_Ch_type>::length(__fmt);
482 for (; __i != __end; ++__i)
483 {
484 if (!(__flags & regex_constants::format_no_copy))
485 __out = std::copy(__i->prefix().first, __i->prefix().second,
486 __out);
487 __out = __i->format(__out, __fmt, __fmt + __len, __flags);
488 __last = __i->suffix();
489 if (__flags & regex_constants::format_first_only)
490 break;
491 }
492 if (!(__flags & regex_constants::format_no_copy))
493 __out = std::copy(__last.first, __last.second, __out);
494 }
495 return __out;
496 }
497
498 template<typename _Bi_iter,
499 typename _Ch_type,
500 typename _Rx_traits>
501 bool
502 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
503 operator==(const regex_iterator& __rhs) const noexcept
504 {
505 if (_M_pregex == nullptr && __rhs._M_pregex == nullptr)
506 return true;
507 return _M_pregex == __rhs._M_pregex
508 && _M_begin == __rhs._M_begin
509 && _M_end == __rhs._M_end
510 && _M_flags == __rhs._M_flags
511 && _M_match[0] == __rhs._M_match[0];
512 }
513
514 template<typename _Bi_iter,
515 typename _Ch_type,
516 typename _Rx_traits>
517 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
518 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
519 operator++()
520 {
521 // In all cases in which the call to regex_search returns true,
522 // match.prefix().first shall be equal to the previous value of
523 // match[0].second, and for each index i in the half-open range
524 // [0, match.size()) for which match[i].matched is true,
525 // match[i].position() shall return distance(begin, match[i].first).
526 // [28.12.1.4.5]
527 if (_M_match[0].matched)
528 {
529 auto __start = _M_match[0].second;
530 auto __prefix_first = _M_match[0].second;
531 if (_M_match[0].first == _M_match[0].second)
532 {
533 if (__start == _M_end)
534 {
535 _M_pregex = nullptr;
536 return *this;
537 }
538 else
539 {
540 if (regex_search(__start, _M_end, _M_match, *_M_pregex,
541 _M_flags
542 | regex_constants::match_not_null
543 | regex_constants::match_continuous))
544 {
545 __glibcxx_assert(_M_match[0].matched);
546 auto& __prefix = _M_match._M_prefix();
547 __prefix.first = __prefix_first;
548 __prefix.matched = __prefix.first != __prefix.second;
549 // [28.12.1.4.5]
550 _M_match._M_begin = _M_begin;
551 return *this;
552 }
553 else
554 ++__start;
555 }
556 }
557 _M_flags |= regex_constants::match_prev_avail;
558 if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
559 {
560 __glibcxx_assert(_M_match[0].matched);
561 auto& __prefix = _M_match._M_prefix();
562 __prefix.first = __prefix_first;
563 __prefix.matched = __prefix.first != __prefix.second;
564 // [28.12.1.4.5]
565 _M_match._M_begin = _M_begin;
566 }
567 else
568 _M_pregex = nullptr;
569 }
570 return *this;
571 }
572
573 template<typename _Bi_iter,
574 typename _Ch_type,
575 typename _Rx_traits>
576 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
577 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
578 operator=(const regex_token_iterator& __rhs)
579 {
580 _M_position = __rhs._M_position;
581 _M_subs = __rhs._M_subs;
582 _M_n = __rhs._M_n;
583 _M_suffix = __rhs._M_suffix;
584 _M_has_m1 = __rhs._M_has_m1;
585 _M_normalize_result();
586 return *this;
587 }
588
589 template<typename _Bi_iter,
590 typename _Ch_type,
591 typename _Rx_traits>
592 bool
593 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
594 operator==(const regex_token_iterator& __rhs) const
595 {
596 if (_M_end_of_seq() && __rhs._M_end_of_seq())
597 return true;
598 if (_M_suffix.matched && __rhs._M_suffix.matched
599 && _M_suffix == __rhs._M_suffix)
600 return true;
601 if (_M_end_of_seq() || _M_suffix.matched
602 || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
603 return false;
604 return _M_position == __rhs._M_position
605 && _M_n == __rhs._M_n
606 && _M_subs == __rhs._M_subs;
607 }
608
609 template<typename _Bi_iter,
610 typename _Ch_type,
611 typename _Rx_traits>
612 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
613 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
614 operator++()
615 {
616 _Position __prev = _M_position;
617 if (_M_suffix.matched)
618 *this = regex_token_iterator();
619 else if (_M_n + 1 < _M_subs.size())
620 {
621 _M_n++;
622 _M_result = &_M_current_match();
623 }
624 else
625 {
626 _M_n = 0;
627 ++_M_position;
628 if (_M_position != _Position())
629 _M_result = &_M_current_match();
630 else if (_M_has_m1 && __prev->suffix().length() != 0)
631 {
632 _M_suffix.matched = true;
633 _M_suffix.first = __prev->suffix().first;
634 _M_suffix.second = __prev->suffix().second;
635 _M_result = &_M_suffix;
636 }
637 else
638 *this = regex_token_iterator();
639 }
640 return *this;
641 }
642
643 template<typename _Bi_iter,
644 typename _Ch_type,
645 typename _Rx_traits>
646 void
647 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
648 _M_init(_Bi_iter __a, _Bi_iter __b)
649 {
650 _M_has_m1 = false;
651 for (auto __it : _M_subs)
652 if (__it == -1)
653 {
654 _M_has_m1 = true;
655 break;
656 }
657 if (_M_position != _Position())
658 _M_result = &_M_current_match();
659 else if (_M_has_m1)
660 {
661 _M_suffix.matched = true;
662 _M_suffix.first = __a;
663 _M_suffix.second = __b;
664 _M_result = &_M_suffix;
665 }
666 else
667 _M_result = nullptr;
668 }
669
670_GLIBCXX_END_NAMESPACE_VERSION
671} // namespace
672