Skip to content

Commit 6afe8b0

Browse files
author
Howard Hinnant
committed
continued regex development...
llvm-svn: 109512
1 parent 7639967 commit 6afe8b0

File tree

6 files changed

+203
-145
lines changed

6 files changed

+203
-145
lines changed

libcxx/include/regex

Lines changed: 42 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -747,12 +747,12 @@ enum syntax_option_type
747747
nosubs = 1 << 1,
748748
optimize = 1 << 2,
749749
collate = 1 << 3,
750-
ECMAScript = 1 << 4,
751-
basic = 1 << 5,
752-
extended = 1 << 6,
753-
awk = 1 << 7,
754-
grep = 1 << 8,
755-
egrep = 1 << 9
750+
ECMAScript = 0,
751+
basic = 1 << 4,
752+
extended = 1 << 5,
753+
awk = 1 << 6,
754+
grep = 1 << 7,
755+
egrep = 1 << 8
756756
};
757757

758758
inline
@@ -907,7 +907,9 @@ enum error_type
907907
error_badrepeat,
908908
error_complexity,
909909
error_stack,
910-
error_temp
910+
__re_err_grammar,
911+
__re_err_empty,
912+
__re_err_unknown
911913
};
912914

913915
} // regex_constants
@@ -1538,8 +1540,17 @@ __loop<_CharT>::__exec(__state& __s) const
15381540
}
15391541
else
15401542
{
1541-
if (__max_ > 0)
1543+
__s.__loop_data_[__loop_id_].first = 0;
1544+
bool __do_repeat = 0 < __max_;
1545+
bool __do_alt = 0 >= __min_;
1546+
if (__do_repeat && __do_alt)
15421547
__s.__do_ = __state::__split;
1548+
else if (__do_repeat)
1549+
{
1550+
__s.__do_ = __state::__accept_but_not_consume;
1551+
__s.__node_ = this->first();
1552+
__init_repeat(__s);
1553+
}
15431554
else
15441555
{
15451556
__s.__do_ = __state::__accept_but_not_consume;
@@ -2727,7 +2738,6 @@ private:
27272738
bool
27282739
__match_at_start(const _CharT* __first, const _CharT* __last,
27292740
match_results<const _CharT*, _Allocator>& __m,
2730-
vector<size_t>& __lc,
27312741
regex_constants::match_flag_type __flags) const;
27322742
template <class _Allocator>
27332743
bool
@@ -2738,13 +2748,11 @@ private:
27382748
bool
27392749
__match_at_start_posix_nosubs(const _CharT* __first, const _CharT* __last,
27402750
match_results<const _CharT*, _Allocator>& __m,
2741-
vector<size_t>& __lc,
27422751
regex_constants::match_flag_type __flags) const;
27432752
template <class _Allocator>
27442753
bool
27452754
__match_at_start_posix_subs(const _CharT* __first, const _CharT* __last,
27462755
match_results<const _CharT*, _Allocator>& __m,
2747-
vector<size_t>& __lc,
27482756
regex_constants::match_flag_type __flags) const;
27492757

27502758
template <class _B, class _A, class _C, class _T>
@@ -2810,7 +2818,7 @@ basic_regex<_CharT, _Traits>::__parse(_ForwardIterator __first,
28102818
__h.release();
28112819
__end_ = __start_.get();
28122820
}
2813-
switch (__flags_ & 0x3F0)
2821+
switch (__flags_ & 0x1F0)
28142822
{
28152823
case ECMAScript:
28162824
__parse_ecma_exp(__first, __last);
@@ -2828,7 +2836,7 @@ basic_regex<_CharT, _Traits>::__parse(_ForwardIterator __first,
28282836
case egrep:
28292837
break;
28302838
default:
2831-
throw regex_error(regex_constants::error_temp);
2839+
throw regex_error(regex_constants::__re_err_grammar);
28322840
}
28332841
}
28342842

@@ -2859,7 +2867,7 @@ basic_regex<_CharT, _Traits>::__parse_basic_reg_exp(_ForwardIterator __first,
28592867
}
28602868
}
28612869
if (__first != __last)
2862-
throw regex_error(regex_constants::error_temp);
2870+
throw regex_error(regex_constants::__re_err_empty);
28632871
}
28642872
return __first;
28652873
}
@@ -2873,14 +2881,14 @@ basic_regex<_CharT, _Traits>::__parse_extended_reg_exp(_ForwardIterator __first,
28732881
__owns_one_state<_CharT>* __sa = __end_;
28742882
_ForwardIterator __temp = __parse_ERE_branch(__first, __last);
28752883
if (__temp == __first)
2876-
throw regex_error(regex_constants::error_temp);
2884+
throw regex_error(regex_constants::__re_err_empty);
28772885
__first = __temp;
28782886
while (__first != __last && *__first == '|')
28792887
{
28802888
__owns_one_state<_CharT>* __sb = __end_;
28812889
__temp = __parse_ERE_branch(++__first, __last);
28822890
if (__temp == __first)
2883-
throw regex_error(regex_constants::error_temp);
2891+
throw regex_error(regex_constants::__re_err_empty);
28842892
__push_alternation(__sa, __sb);
28852893
__first = __temp;
28862894
}
@@ -2895,7 +2903,7 @@ basic_regex<_CharT, _Traits>::__parse_ERE_branch(_ForwardIterator __first,
28952903
{
28962904
_ForwardIterator __temp = __parse_ERE_expression(__first, __last);
28972905
if (__temp == __first)
2898-
throw regex_error(regex_constants::error_temp);
2906+
throw regex_error(regex_constants::__re_err_empty);
28992907
do
29002908
{
29012909
__first = __temp;
@@ -4879,7 +4887,7 @@ basic_regex<_CharT, _Traits>::__match_at_start_ecma(
48794887
__states.pop_back();
48804888
break;
48814889
default:
4882-
throw regex_error(regex_constants::error_temp);
4890+
throw regex_error(regex_constants::__re_err_unknown);
48834891
break;
48844892
}
48854893
} while (!__states.empty());
@@ -4893,7 +4901,6 @@ bool
48934901
basic_regex<_CharT, _Traits>::__match_at_start_posix_nosubs(
48944902
const _CharT* __first, const _CharT* __last,
48954903
match_results<const _CharT*, _Allocator>& __m,
4896-
vector<size_t>& __lc,
48974904
regex_constants::match_flag_type __flags) const
48984905
{
48994906
deque<__state> __states;
@@ -4919,11 +4926,9 @@ basic_regex<_CharT, _Traits>::__match_at_start_posix_nosubs(
49194926
switch (__s.__do_)
49204927
{
49214928
case __state::__end_state:
4922-
if (__highest_j < __s.__current_ - __s.__first_)
4923-
{
4929+
if (!__matched || __highest_j < __s.__current_ - __s.__first_)
49244930
__highest_j = __s.__current_ - __s.__first_;
4925-
__matched = true;
4926-
}
4931+
__matched = true;
49274932
if (__highest_j == _N)
49284933
__states.clear();
49294934
else
@@ -4950,7 +4955,7 @@ basic_regex<_CharT, _Traits>::__match_at_start_posix_nosubs(
49504955
__states.pop_back();
49514956
break;
49524957
default:
4953-
throw regex_error(regex_constants::error_temp);
4958+
throw regex_error(regex_constants::__re_err_unknown);
49544959
break;
49554960
}
49564961
} while (!__states.empty());
@@ -4971,7 +4976,6 @@ bool
49714976
basic_regex<_CharT, _Traits>::__match_at_start_posix_subs(
49724977
const _CharT* __first, const _CharT* __last,
49734978
match_results<const _CharT*, _Allocator>& __m,
4974-
vector<size_t>& __lc,
49754979
regex_constants::match_flag_type __flags) const
49764980
{
49774981
vector<__state> __states;
@@ -5001,16 +5005,16 @@ basic_regex<_CharT, _Traits>::__match_at_start_posix_subs(
50015005
switch (__s.__do_)
50025006
{
50035007
case __state::__end_state:
5004-
if (__j == 0 || __highest_j < __j)
5008+
if (!__matched || __highest_j < __s.__current_ - __s.__first_)
50055009
{
5006-
__matched = true;
5007-
__highest_j = __j;
5010+
__highest_j = __s.__current_ - __s.__first_;
50085011
__best_state = __s;
5009-
if (__highest_j == _N || __highest_j == 0)
5010-
__states.clear();
5011-
else
5012-
__states.pop_back();
50135012
}
5013+
__matched = true;
5014+
if (__highest_j == _N)
5015+
__states.clear();
5016+
else
5017+
__states.pop_back();
50145018
break;
50155019
case __state::__accept_and_consume:
50165020
__j += __s.__current_ - __current;
@@ -5031,7 +5035,7 @@ basic_regex<_CharT, _Traits>::__match_at_start_posix_subs(
50315035
__states.pop_back();
50325036
break;
50335037
default:
5034-
throw regex_error(regex_constants::error_temp);
5038+
throw regex_error(regex_constants::__re_err_unknown);
50355039
break;
50365040
}
50375041
} while (!__states.empty());
@@ -5054,14 +5058,13 @@ bool
50545058
basic_regex<_CharT, _Traits>::__match_at_start(
50555059
const _CharT* __first, const _CharT* __last,
50565060
match_results<const _CharT*, _Allocator>& __m,
5057-
vector<size_t>& __lc,
50585061
regex_constants::match_flag_type __flags) const
50595062
{
5060-
if (__flags_ & ECMAScript)
5063+
if ((__flags_ & 0x1F0) == ECMAScript)
50615064
return __match_at_start_ecma(__first, __last, __m, __flags);
50625065
if (mark_count() == 0)
5063-
return __match_at_start_posix_nosubs(__first, __last, __m, __lc, __flags);
5064-
return __match_at_start_posix_subs(__first, __last, __m, __lc, __flags);
5066+
return __match_at_start_posix_nosubs(__first, __last, __m, __flags);
5067+
return __match_at_start_posix_subs(__first, __last, __m, __flags);
50655068
}
50665069

50675070
template <class _CharT, class _Traits>
@@ -5075,8 +5078,7 @@ basic_regex<_CharT, _Traits>::__search(
50755078
if (__left_anchor_)
50765079
__flags |= regex_constants::match_continuous;
50775080
__m.__init(1 + mark_count(), __first, __last);
5078-
vector<size_t> __lc(__loop_count());
5079-
if (__match_at_start(__first, __last, __m, __lc, __flags))
5081+
if (__match_at_start(__first, __last, __m, __flags))
50805082
{
50815083
__m.__prefix_.second = __m[0].first;
50825084
__m.__prefix_.matched = __m.__prefix_.first != __m.__prefix_.second;
@@ -5089,7 +5091,7 @@ basic_regex<_CharT, _Traits>::__search(
50895091
__m.__matches_.assign(__m.size(), __m.__unmatched_);
50905092
for (++__first; __first != __last; ++__first)
50915093
{
5092-
if (__match_at_start(__first, __last, __m, __lc, __flags))
5094+
if (__match_at_start(__first, __last, __m, __flags))
50935095
{
50945096
__m.__prefix_.second = __m[0].first;
50955097
__m.__prefix_.matched = __m.__prefix_.first != __m.__prefix_.second;

libcxx/src/regex.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ make_error_type_string(regex_constants::error_type ecode)
5050
case regex_constants::error_stack:
5151
return "There was insufficient memory to determine whether the regular "
5252
"expression could match the specified character sequence.";
53+
case regex_constants::__re_err_grammar:
54+
return "An invalid regex grammar has been requested.";
55+
case regex_constants::__re_err_empty:
56+
return "An empty regex is not allowed in the POSIX grammar.";
5357
default:
5458
break;
5559
}

libcxx/test/re/re.alg/re.alg.search/basic.pass.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -733,6 +733,22 @@ int main()
733733
assert(std::regex_search(s, m, std::regex("[ace1-9]*",
734734
std::regex_constants::basic)));
735735
assert(m.size() == 1);
736+
assert(!m.prefix().matched);
737+
assert(m.prefix().first == s);
738+
assert(m.prefix().second == m[0].first);
739+
assert(m.suffix().matched);
740+
assert(m.suffix().first == m[0].second);
741+
assert(m.suffix().second == s + std::char_traits<char>::length(s));
742+
assert(m.length(0) == 0);
743+
assert(m.position(0) == 0);
744+
assert(m.str(0) == "");
745+
}
746+
{
747+
std::cmatch m;
748+
const char s[] = "01a45cef9";
749+
assert(std::regex_search(s, m, std::regex("[ace1-9]\\{1,\\}",
750+
std::regex_constants::basic)));
751+
assert(m.size() == 1);
736752
assert(m.prefix().matched);
737753
assert(m.prefix().first == s);
738754
assert(m.prefix().second == m[0].first);
@@ -1476,6 +1492,22 @@ int main()
14761492
assert(std::regex_search(s, m, std::wregex(L"[ace1-9]*",
14771493
std::regex_constants::basic)));
14781494
assert(m.size() == 1);
1495+
assert(!m.prefix().matched);
1496+
assert(m.prefix().first == s);
1497+
assert(m.prefix().second == m[0].first);
1498+
assert(m.suffix().matched);
1499+
assert(m.suffix().first == m[0].second);
1500+
assert(m.suffix().second == s + std::char_traits<wchar_t>::length(s));
1501+
assert(m.length(0) == 0);
1502+
assert(m.position(0) == 0);
1503+
assert(m.str(0) == L"");
1504+
}
1505+
{
1506+
std::wcmatch m;
1507+
const wchar_t s[] = L"01a45cef9";
1508+
assert(std::regex_search(s, m, std::wregex(L"[ace1-9]\\{1,\\}",
1509+
std::regex_constants::basic)));
1510+
assert(m.size() == 1);
14791511
assert(m.prefix().matched);
14801512
assert(m.prefix().first == s);
14811513
assert(m.prefix().second == m[0].first);

0 commit comments

Comments
 (0)