11
11
// ===----------------------------------------------------------------------===//
12
12
13
13
#include " llvm/Support/GlobPattern.h"
14
- #include " llvm/ADT/ArrayRef.h"
15
14
#include " llvm/ADT/StringRef.h"
16
15
#include " llvm/Support/Errc.h"
17
16
@@ -54,18 +53,115 @@ static Expected<BitVector> expand(StringRef S, StringRef Original) {
54
53
return BV;
55
54
}
56
55
57
- Expected<GlobPattern> GlobPattern::create (StringRef S) {
56
+ // Identify brace expansions in S and return the list of patterns they expand
57
+ // into.
58
+ static Expected<SmallVector<std::string, 1 >>
59
+ parseBraceExpansions (StringRef S, std::optional<size_t > MaxSubPatterns) {
60
+ SmallVector<std::string> SubPatterns = {S.str ()};
61
+ if (!MaxSubPatterns || !S.contains (' {' ))
62
+ return SubPatterns;
63
+
64
+ struct BraceExpansion {
65
+ size_t Start;
66
+ size_t Length;
67
+ SmallVector<StringRef, 2 > Terms;
68
+ };
69
+ SmallVector<BraceExpansion, 0 > BraceExpansions;
70
+
71
+ BraceExpansion *CurrentBE = nullptr ;
72
+ size_t TermBegin;
73
+ for (size_t I = 0 , E = S.size (); I != E; ++I) {
74
+ if (S[I] == ' [' ) {
75
+ I = S.find (' ]' , I + 2 );
76
+ if (I == std::string::npos)
77
+ return make_error<StringError>(" invalid glob pattern, unmatched '['" ,
78
+ errc::invalid_argument);
79
+ } else if (S[I] == ' {' ) {
80
+ if (CurrentBE)
81
+ return make_error<StringError>(
82
+ " nested brace expansions are not supported" ,
83
+ errc::invalid_argument);
84
+ CurrentBE = &BraceExpansions.emplace_back ();
85
+ CurrentBE->Start = I;
86
+ TermBegin = I + 1 ;
87
+ } else if (S[I] == ' ,' ) {
88
+ if (!CurrentBE)
89
+ continue ;
90
+ CurrentBE->Terms .push_back (S.substr (TermBegin, I - TermBegin));
91
+ TermBegin = I + 1 ;
92
+ } else if (S[I] == ' }' ) {
93
+ if (!CurrentBE)
94
+ continue ;
95
+ if (CurrentBE->Terms .empty ())
96
+ return make_error<StringError>(
97
+ " empty or singleton brace expansions are not supported" ,
98
+ errc::invalid_argument);
99
+ CurrentBE->Terms .push_back (S.substr (TermBegin, I - TermBegin));
100
+ CurrentBE->Length = I - CurrentBE->Start + 1 ;
101
+ CurrentBE = nullptr ;
102
+ } else if (S[I] == ' \\ ' ) {
103
+ if (++I == E)
104
+ return make_error<StringError>(" invalid glob pattern, stray '\\ '" ,
105
+ errc::invalid_argument);
106
+ }
107
+ }
108
+ if (CurrentBE)
109
+ return make_error<StringError>(" incomplete brace expansion" ,
110
+ errc::invalid_argument);
111
+
112
+ size_t NumSubPatterns = 1 ;
113
+ for (auto &BE : BraceExpansions) {
114
+ if (NumSubPatterns > std::numeric_limits<size_t >::max () / BE.Terms .size ()) {
115
+ NumSubPatterns = std::numeric_limits<size_t >::max ();
116
+ break ;
117
+ }
118
+ NumSubPatterns *= BE.Terms .size ();
119
+ }
120
+ if (NumSubPatterns > *MaxSubPatterns)
121
+ return make_error<StringError>(" too many brace expansions" ,
122
+ errc::invalid_argument);
123
+ // Replace brace expansions in reverse order so that we don't invalidate
124
+ // earlier start indices
125
+ for (auto &BE : reverse (BraceExpansions)) {
126
+ SmallVector<std::string> OrigSubPatterns;
127
+ std::swap (SubPatterns, OrigSubPatterns);
128
+ for (StringRef Term : BE.Terms )
129
+ for (StringRef Orig : OrigSubPatterns)
130
+ SubPatterns.emplace_back (Orig).replace (BE.Start , BE.Length , Term);
131
+ }
132
+ return SubPatterns;
133
+ }
134
+
135
+ Expected<GlobPattern>
136
+ GlobPattern::create (StringRef S, std::optional<size_t > MaxSubPatterns) {
58
137
GlobPattern Pat;
59
138
60
139
// Store the prefix that does not contain any metacharacter.
61
- size_t PrefixSize = S.find_first_of (" ?*[\\ " );
140
+ size_t PrefixSize = S.find_first_of (" ?*[{ \\ " );
62
141
Pat.Prefix = S.substr (0 , PrefixSize);
63
142
if (PrefixSize == std::string::npos)
64
143
return Pat;
65
144
S = S.substr (PrefixSize);
66
145
146
+ SmallVector<std::string, 1 > SubPats;
147
+ if (auto Err = parseBraceExpansions (S, MaxSubPatterns).moveInto (SubPats))
148
+ return Err;
149
+ for (StringRef SubPat : SubPats) {
150
+ auto SubGlobOrErr = SubGlobPattern::create (SubPat);
151
+ if (!SubGlobOrErr)
152
+ return SubGlobOrErr.takeError ();
153
+ Pat.SubGlobs .push_back (*SubGlobOrErr);
154
+ }
155
+
156
+ return Pat;
157
+ }
158
+
159
+ Expected<GlobPattern::SubGlobPattern>
160
+ GlobPattern::SubGlobPattern::create (StringRef S) {
161
+ SubGlobPattern Pat;
162
+
67
163
// Parse brackets.
68
- Pat.Pat = S ;
164
+ Pat.Pat . assign (S. begin (), S. end ()) ;
69
165
for (size_t I = 0 , E = S.size (); I != E; ++I) {
70
166
if (S[I] == ' [' ) {
71
167
// ']' is allowed as the first character of a character class. '[]' is
@@ -83,7 +179,7 @@ Expected<GlobPattern> GlobPattern::create(StringRef S) {
83
179
return BV.takeError ();
84
180
if (Invert)
85
181
BV->flip ();
86
- Pat.Brackets .push_back (Bracket{S. data () + J + 1 , std::move (*BV)});
182
+ Pat.Brackets .push_back (Bracket{J + 1 , std::move (*BV)});
87
183
I = J;
88
184
} else if (S[I] == ' \\ ' ) {
89
185
if (++I == E)
@@ -95,13 +191,20 @@ Expected<GlobPattern> GlobPattern::create(StringRef S) {
95
191
}
96
192
97
193
bool GlobPattern::match (StringRef S) const {
98
- return S.consume_front (Prefix) && matchOne (S);
194
+ if (!S.consume_front (Prefix))
195
+ return false ;
196
+ if (SubGlobs.empty () && S.empty ())
197
+ return true ;
198
+ for (auto &Glob : SubGlobs)
199
+ if (Glob.match (S))
200
+ return true ;
201
+ return false ;
99
202
}
100
203
101
204
// Factor the pattern into segments split by '*'. The segment is matched
102
205
// sequentianlly by finding the first occurrence past the end of the previous
103
206
// match.
104
- bool GlobPattern::matchOne (StringRef Str) const {
207
+ bool GlobPattern::SubGlobPattern::match (StringRef Str) const {
105
208
const char *P = Pat.data (), *SegmentBegin = nullptr , *S = Str.data (),
106
209
*SavedS = S;
107
210
const char *const PEnd = P + Pat.size (), *const End = S + Str.size ();
@@ -118,7 +221,7 @@ bool GlobPattern::matchOne(StringRef Str) const {
118
221
continue ;
119
222
} else if (*P == ' [' ) {
120
223
if (Brackets[B].Bytes [uint8_t (*S)]) {
121
- P = Brackets[B++].Next ;
224
+ P = Pat. data () + Brackets[B++].NextOffset ;
122
225
++S;
123
226
continue ;
124
227
}
@@ -143,5 +246,5 @@ bool GlobPattern::matchOne(StringRef Str) const {
143
246
}
144
247
// All bytes in Str have been matched. Return true if the rest part of Pat is
145
248
// empty or contains only '*'.
146
- return Pat .find_first_not_of (' *' , P - Pat.data ()) == std::string::npos;
249
+ return getPat () .find_first_not_of (' *' , P - Pat.data ()) == std::string::npos;
147
250
}
0 commit comments