Skip to content

Commit 9e1b625

Browse files
committed
(GH-824) Refactor Token Folding Operations
Previously each token type detection was separated into discrete blocks to make reading the code easier. However this meant there were many enumerations of the Tokens array as well as passing around intermediate arrays/lists. This commit takes the individual methods and overlaps them to reduce the number of enumerations and regular expression matching to a minimum. Note that there are considerable code comments here due to the code now being more complex on initial review.
1 parent fc65ffe commit 9e1b625

File tree

1 file changed

+121
-145
lines changed

1 file changed

+121
-145
lines changed

src/PowerShellEditorServices/Language/TokenOperations.cs

+121-145
Original file line numberDiff line numberDiff line change
@@ -21,20 +21,15 @@ internal static class TokenOperations
2121
private const string RegionKindRegion = "region";
2222
private const string RegionKindNone = null;
2323

24-
// Opening tokens for { } and @{ }
25-
private static readonly TokenKind[] s_openingBraces = new []
26-
{
27-
TokenKind.LCurly,
28-
TokenKind.AtCurly
29-
};
30-
31-
// Opening tokens for ( ), @( ), $( )
32-
private static readonly TokenKind[] s_openingParens = new []
33-
{
34-
TokenKind.LParen,
35-
TokenKind.AtParen,
36-
TokenKind.DollarParen
37-
};
24+
// These regular expressions are used to match lines which mark the start and end of region comment in a PowerShell
25+
// script. They are based on the defaults in the VS Code Language Configuration at;
26+
// https://github.com/Microsoft/vscode/blob/64186b0a26/extensions/powershell/language-configuration.json#L26-L31
27+
static private readonly Regex s_startRegionTextRegex = new Regex(
28+
@"^\s*#region\b",
29+
RegexOptions.IgnoreCase | RegexOptions.Compiled);
30+
static private readonly Regex s_endRegionTextRegex = new Regex(
31+
@"^\s*#endregion\b",
32+
RegexOptions.IgnoreCase | RegexOptions.Compiled);
3833

3934
/// <summary>
4035
/// Extracts all of the unique foldable regions in a script given the list tokens
@@ -44,32 +39,124 @@ internal static FoldingReferenceList FoldableRegions(
4439
{
4540
var refList = new FoldingReferenceList();
4641

47-
// Find matching braces { -> }
48-
// Find matching hashes @{ -> }
49-
MatchTokenElements(tokens, s_openingBraces, TokenKind.RCurly, RegionKindNone, ref refList);
50-
51-
// Find matching parentheses ( -> )
52-
// Find matching array literals @( -> )
53-
// Find matching subexpressions $( -> )
54-
MatchTokenElements(tokens, s_openingParens, TokenKind.RParen, RegionKindNone, ref refList);
42+
Stack<Token> tokenCurlyStack = new Stack<Token>();
43+
Stack<Token> tokenParenStack = new Stack<Token>();
44+
foreach (Token token in tokens)
45+
{
46+
switch (token.Kind)
47+
{
48+
// Find matching braces { -> }
49+
// Find matching hashes @{ -> }
50+
case TokenKind.LCurly:
51+
case TokenKind.AtCurly:
52+
tokenCurlyStack.Push(token);
53+
break;
54+
55+
case TokenKind.RCurly:
56+
if (tokenCurlyStack.Count > 0)
57+
{
58+
refList.SafeAdd(CreateFoldingReference(tokenCurlyStack.Pop(), token, RegionKindNone));
59+
}
60+
break;
61+
62+
// Find matching parentheses ( -> )
63+
// Find matching array literals @( -> )
64+
// Find matching subexpressions $( -> )
65+
case TokenKind.LParen:
66+
case TokenKind.AtParen:
67+
case TokenKind.DollarParen:
68+
tokenParenStack.Push(token);
69+
break;
70+
71+
case TokenKind.RParen:
72+
if (tokenParenStack.Count > 0)
73+
{
74+
refList.SafeAdd(CreateFoldingReference(tokenParenStack.Pop(), token, RegionKindNone));
75+
}
76+
break;
77+
78+
// Find contiguous here strings @' -> '@
79+
// Find unopinionated variable names ${ \n \n }
80+
// Find contiguous expandable here strings @" -> "@
81+
case TokenKind.HereStringLiteral:
82+
case TokenKind.Variable:
83+
case TokenKind.HereStringExpandable:
84+
if (token.Extent.StartLineNumber != token.Extent.EndLineNumber)
85+
{
86+
refList.SafeAdd(CreateFoldingReference(token, token, RegionKindNone));
87+
}
88+
break;
89+
90+
default:
91+
break;
92+
}
93+
}
5594

56-
// Find contiguous here strings @' -> '@
57-
MatchTokenElement(tokens, TokenKind.HereStringLiteral, RegionKindNone, ref refList);
95+
// Find matching comment regions #region -> #endregion
96+
// Given a list of tokens, find the tokens that are comments and
97+
// the comment text is either `#region` or `#endregion`, and then use a stack to determine
98+
// the ranges they span
99+
//
100+
// Find blocks of line comments # comment1\n# comment2\n...
101+
// Finding blocks of comment tokens is more complicated as the newline characters are not
102+
// classed as comments. To workaround this we search for valid block comments (See IsBlockCmment)
103+
// and then determine contiguous line numbers from there
104+
//
105+
// Find comments regions <# -> #>
106+
// Match the token start and end of kind TokenKind.Comment
107+
var tokenCommentRegionStack = new Stack<Token>();
108+
Token blockStartToken = null;
109+
int blockNextLine = -1;
58110

59-
// Find unopinionated variable names ${ \n \n }
60-
MatchTokenElement(tokens, TokenKind.Variable, RegionKindNone, ref refList);
111+
for (int index = 0; index < tokens.Length; index++)
112+
{
113+
Token token = tokens[index];
114+
if (token.Kind != TokenKind.Comment) { continue; }
115+
116+
// Processing for comment regions <# -> #>
117+
if (token.Extent.StartLineNumber != token.Extent.EndLineNumber)
118+
{
119+
refList.SafeAdd(CreateFoldingReference(token, token, RegionKindComment));
120+
continue;
121+
}
61122

62-
// Find contiguous here strings @" -> "@
63-
MatchTokenElement(tokens, TokenKind.HereStringExpandable, RegionKindNone, ref refList);
123+
if (!IsBlockComment(index, tokens)) { continue; }
64124

65-
// Find matching comment regions #region -> #endregion
66-
MatchCustomCommentRegionTokenElements(tokens, RegionKindRegion, ref refList);
125+
// Regex's are very expensive. Use them sparingly!
126+
// Processing for #region -> #endregion
127+
if (s_startRegionTextRegex.IsMatch(token.Text))
128+
{
129+
tokenCommentRegionStack.Push(token);
130+
continue;
131+
}
132+
else if (s_endRegionTextRegex.IsMatch(token.Text))
133+
{
134+
// Mismatched regions in the script can cause bad stacks.
135+
if (tokenCommentRegionStack.Count > 0)
136+
{
137+
refList.SafeAdd(CreateFoldingReference(tokenCommentRegionStack.Pop(), token, RegionKindRegion));
138+
}
139+
continue;
140+
}
67141

68-
// Find blocks of line comments # comment1\n# comment2\n...
69-
MatchBlockCommentTokenElement(tokens, RegionKindComment, ref refList);
142+
// If it's neither a start or end region then it could be block line comment
143+
// Processing for blocks of line comments # comment1\n# comment2\n...
144+
int thisLine = token.Extent.StartLineNumber - 1;
145+
if ((blockStartToken != null) && (thisLine != blockNextLine))
146+
{
147+
refList.SafeAdd(CreateFoldingReference(blockStartToken, blockNextLine - 1, RegionKindComment));
148+
blockStartToken = token;
149+
}
150+
if (blockStartToken == null) { blockStartToken = token; }
151+
blockNextLine = thisLine + 1;
152+
}
70153

71-
// Find comments regions <# -> #>
72-
MatchTokenElement(tokens, TokenKind.Comment, RegionKindComment, ref refList);
154+
// If we exit the token array and we're still processing comment lines, then the
155+
// comment block simply ends at the end of document
156+
if (blockStartToken != null)
157+
{
158+
refList.SafeAdd(CreateFoldingReference(blockStartToken, blockNextLine - 1, RegionKindComment));
159+
}
73160

74161
return refList;
75162
}
@@ -114,45 +201,6 @@ static private FoldingReference CreateFoldingReference(
114201
};
115202
}
116203

117-
/// <summary>
118-
/// Given an array of tokens, find matching regions which start (array of tokens) and end with a different TokenKind
119-
/// </summary>
120-
static private void MatchTokenElements(
121-
Token[] tokens,
122-
TokenKind[] startTokenKind,
123-
TokenKind endTokenKind,
124-
string matchKind,
125-
ref FoldingReferenceList refList)
126-
{
127-
Stack<Token> tokenStack = new Stack<Token>();
128-
foreach (Token token in tokens)
129-
{
130-
if (Array.IndexOf(startTokenKind, token.Kind) != -1) {
131-
tokenStack.Push(token);
132-
}
133-
if ((tokenStack.Count > 0) && (token.Kind == endTokenKind)) {
134-
refList.SafeAdd(CreateFoldingReference(tokenStack.Pop(), token, matchKind));
135-
}
136-
}
137-
}
138-
139-
/// <summary>
140-
/// Given an array of token, finds a specific token
141-
/// </summary>
142-
static private void MatchTokenElement(
143-
Token[] tokens,
144-
TokenKind tokenKind,
145-
string matchKind,
146-
ref FoldingReferenceList refList)
147-
{
148-
foreach (Token token in tokens)
149-
{
150-
if ((token.Kind == tokenKind) && (token.Extent.StartLineNumber != token.Extent.EndLineNumber)) {
151-
refList.SafeAdd(CreateFoldingReference(token, token, matchKind));
152-
}
153-
}
154-
}
155-
156204
/// <summary>
157205
/// Returns true if a Token is a block comment;
158206
/// - Must be a TokenKind.comment
@@ -167,77 +215,5 @@ static private bool IsBlockComment(int index, Token[] tokens) {
167215
if (tokens[index - 1].Kind != TokenKind.NewLine) { return false; }
168216
return thisToken.Text.StartsWith("#");
169217
}
170-
171-
// This regular expressions is used to detect a line comment (as opposed to an inline comment), that is not a region
172-
// block directive i.e.
173-
// - No text between the beginning of the line and `#`
174-
// - Comment does start with region
175-
// - Comment does start with endregion
176-
static private readonly Regex s_nonRegionLineCommentRegex = new Regex(
177-
@"\s*#(?!region\b|endregion\b)",
178-
RegexOptions.IgnoreCase | RegexOptions.Compiled);
179-
180-
/// <summary>
181-
/// Finding blocks of comment tokens is more complicated as the newline characters are not
182-
/// classed as comments. To workaround this we search for valid block comments (See IsBlockCmment)
183-
/// and then determine contiguous line numbers from there
184-
/// </summary>
185-
static private void MatchBlockCommentTokenElement(
186-
Token[] tokens,
187-
string matchKind,
188-
ref FoldingReferenceList refList)
189-
{
190-
Token startToken = null;
191-
int nextLine = -1;
192-
for (int index = 0; index < tokens.Length; index++)
193-
{
194-
Token thisToken = tokens[index];
195-
if (IsBlockComment(index, tokens) && s_nonRegionLineCommentRegex.IsMatch(thisToken.Text)) {
196-
int thisLine = thisToken.Extent.StartLineNumber - 1;
197-
if ((startToken != null) && (thisLine != nextLine)) {
198-
refList.SafeAdd(CreateFoldingReference(startToken, nextLine - 1, matchKind));
199-
startToken = thisToken;
200-
}
201-
if (startToken == null) { startToken = thisToken; }
202-
nextLine = thisLine + 1;
203-
}
204-
}
205-
// If we exit the token array and we're still processing comment lines, then the
206-
// comment block simply ends at the end of document
207-
if (startToken != null) {
208-
refList.SafeAdd(CreateFoldingReference(startToken, nextLine - 1, matchKind));
209-
}
210-
}
211-
212-
/// <summary>
213-
/// Given a list of tokens, find the tokens that are comments and
214-
/// the comment text is either `# region` or `# endregion`, and then use a stack to determine
215-
/// the ranges they span
216-
/// </summary>
217-
static private void MatchCustomCommentRegionTokenElements(
218-
Token[] tokens,
219-
string matchKind,
220-
ref FoldingReferenceList refList)
221-
{
222-
// These regular expressions are used to match lines which mark the start and end of region comment in a PowerShell
223-
// script. They are based on the defaults in the VS Code Language Configuration at;
224-
// https://github.com/Microsoft/vscode/blob/64186b0a26/extensions/powershell/language-configuration.json#L26-L31
225-
string startRegionText = @"^\s*#region\b";
226-
string endRegionText = @"^\s*#endregion\b";
227-
228-
Stack<Token> tokenStack = new Stack<Token>();
229-
for (int index = 0; index < tokens.Length; index++)
230-
{
231-
if (IsBlockComment(index, tokens)) {
232-
Token token = tokens[index];
233-
if (Regex.IsMatch(token.Text, startRegionText, RegexOptions.IgnoreCase)) {
234-
tokenStack.Push(token);
235-
}
236-
if ((tokenStack.Count > 0) && (Regex.IsMatch(token.Text, endRegionText, RegexOptions.IgnoreCase))) {
237-
refList.SafeAdd(CreateFoldingReference(tokenStack.Pop(), token, matchKind));
238-
}
239-
}
240-
}
241-
}
242218
}
243219
}

0 commit comments

Comments
 (0)