Skip to content

Commit 174f0cb

Browse files
committed
(GH-824) Refactor Token Folding Operations
Previously each token type detection was separated into discrete blocks to make reading the code easier. However this meant there were many enumerations of the Tokens array as well as passing around intermediate arrays/lists. This commit takes the individual methods and overlaps them to reduce the number of enumerations and regular expression matching to a minimum. Note that there are considerable code comments here due to the code now being more complex on initial review.
1 parent 66db01e commit 174f0cb

File tree

1 file changed

+99
-147
lines changed

1 file changed

+99
-147
lines changed

src/PowerShellEditorServices/Language/TokenOperations.cs

+99-147
Original file line numberDiff line numberDiff line change
@@ -21,20 +21,15 @@ internal static class TokenOperations
2121
private const string RegionKindRegion = "region";
2222
private const string RegionKindNone = null;
2323

24-
// Opening tokens for { } and @{ }
25-
private static readonly TokenKind[] s_openingBraces = new []
26-
{
27-
TokenKind.LCurly,
28-
TokenKind.AtCurly
29-
};
30-
31-
// Opening tokens for ( ), @( ), $( )
32-
private static readonly TokenKind[] s_openingParens = new []
33-
{
34-
TokenKind.LParen,
35-
TokenKind.AtParen,
36-
TokenKind.DollarParen
37-
};
24+
// These regular expressions are used to match lines which mark the start and end of region comment in a PowerShell
25+
// script. They are based on the defaults in the VS Code Language Configuration at;
26+
// https://github.com/Microsoft/vscode/blob/64186b0a26/extensions/powershell/language-configuration.json#L26-L31
27+
static private readonly Regex s_startRegionTextRegex = new Regex(
28+
@"^\s*#region\b",
29+
RegexOptions.IgnoreCase | RegexOptions.Compiled);
30+
static private readonly Regex s_endRegionTextRegex = new Regex(
31+
@"^\s*#endregion\b",
32+
RegexOptions.IgnoreCase | RegexOptions.Compiled);
3833

3934
/// <summary>
4035
/// Extracts all of the unique foldable regions in a script given the list tokens
@@ -44,32 +39,100 @@ internal static FoldingReferenceList FoldableRegions(
4439
{
4540
var refList = new FoldingReferenceList();
4641

47-
// Find matching braces { -> }
48-
// Find matching hashes @{ -> }
49-
MatchTokenElements(tokens, s_openingBraces, TokenKind.RCurly, RegionKindNone, ref refList);
50-
51-
// Find matching parentheses ( -> )
52-
// Find matching array literals @( -> )
53-
// Find matching subexpressions $( -> )
54-
MatchTokenElements(tokens, s_openingParens, TokenKind.RParen, RegionKindNone, ref refList);
55-
56-
// Find contiguous here strings @' -> '@
57-
MatchTokenElement(tokens, TokenKind.HereStringLiteral, RegionKindNone, ref refList);
58-
59-
// Find unopinionated variable names ${ \n \n }
60-
MatchTokenElement(tokens, TokenKind.Variable, RegionKindNone, ref refList);
61-
62-
// Find contiguous here strings @" -> "@
63-
MatchTokenElement(tokens, TokenKind.HereStringExpandable, RegionKindNone, ref refList);
42+
Stack<Token> tokenCurlyStack = new Stack<Token>();
43+
Stack<Token> tokenParenStack = new Stack<Token>();
44+
foreach (Token token in tokens)
45+
{
46+
// Find matching braces { -> }
47+
// Find matching hashes @{ -> }
48+
if ((token.Kind == TokenKind.LCurly) || (token.Kind == TokenKind.AtCurly)) {
49+
tokenCurlyStack.Push(token);
50+
}
51+
if ((tokenCurlyStack.Count > 0) && (token.Kind == TokenKind.RCurly)) {
52+
refList.SafeAdd(CreateFoldingReference(tokenCurlyStack.Pop(), token, RegionKindNone));
53+
}
54+
// Find matching parentheses ( -> )
55+
// Find matching array literals @( -> )
56+
// Find matching subexpressions $( -> )
57+
if ((token.Kind == TokenKind.LParen) || (token.Kind == TokenKind.AtParen) || (token.Kind == TokenKind.DollarParen)) {
58+
tokenParenStack.Push(token);
59+
}
60+
if ((tokenParenStack.Count > 0) && (token.Kind == TokenKind.RParen)) {
61+
refList.SafeAdd(CreateFoldingReference(tokenParenStack.Pop(), token, RegionKindNone));
62+
}
63+
// Find contiguous here strings @' -> '@
64+
// Find unopinionated variable names ${ \n \n }
65+
// Find contiguous expandable here strings @" -> "@
66+
if ((
67+
(token.Kind == TokenKind.HereStringLiteral) ||
68+
(token.Kind == TokenKind.Variable) ||
69+
(token.Kind == TokenKind.HereStringExpandable)
70+
) && (token.Extent.StartLineNumber != token.Extent.EndLineNumber)) {
71+
refList.SafeAdd(CreateFoldingReference(token, token, RegionKindNone));
72+
}
73+
}
6474

6575
// Find matching comment regions #region -> #endregion
66-
MatchCustomCommentRegionTokenElements(tokens, RegionKindRegion, ref refList);
67-
76+
// Given a list of tokens, find the tokens that are comments and
77+
// the comment text is either `# region` or `# endregion`, and then use a stack to determine
78+
// the ranges they span
79+
//
6880
// Find blocks of line comments # comment1\n# comment2\n...
69-
MatchBlockCommentTokenElement(tokens, RegionKindComment, ref refList);
70-
81+
// Finding blocks of comment tokens is more complicated as the newline characters are not
82+
// classed as comments. To workaround this we search for valid block comments (See IsBlockCmment)
83+
// and then determine contiguous line numbers from there
84+
//
7185
// Find comments regions <# -> #>
72-
MatchTokenElement(tokens, TokenKind.Comment, RegionKindComment, ref refList);
86+
// Match the token start and end of kind TokenKind.Comment
87+
var tokenCommentRegionStack = new Stack<Token>();
88+
Token blockStartToken = null;
89+
int blockNextLine = -1;
90+
91+
for (int index = 0; index < tokens.Length; index++)
92+
{
93+
Token token = tokens[index];
94+
if (token.Kind != TokenKind.Comment) { continue; }
95+
// Processing for comment regions <# -> #>
96+
if (token.Extent.StartLineNumber != token.Extent.EndLineNumber)
97+
{
98+
refList.SafeAdd(CreateFoldingReference(token, token, RegionKindComment));
99+
continue;
100+
}
101+
102+
if (!IsBlockComment(index, tokens)) { continue; }
103+
// Regex's are very expensive. Use them sparingly!
104+
// Processing for # region -> # endregion
105+
if (s_startRegionTextRegex.IsMatch(token.Text))
106+
{
107+
tokenCommentRegionStack.Push(token);
108+
continue;
109+
}
110+
if (s_endRegionTextRegex.IsMatch(token.Text))
111+
{
112+
// Mismatched regions in the script can cause bad stacks.
113+
if (tokenCommentRegionStack.Count > 0)
114+
{
115+
refList.SafeAdd(CreateFoldingReference(tokenCommentRegionStack.Pop(), token, RegionKindRegion));
116+
}
117+
continue;
118+
}
119+
// If it's neither a start or end region then it could be block line comment
120+
// Processing for blocks of line comments # comment1\n# comment2\n...
121+
int thisLine = token.Extent.StartLineNumber - 1;
122+
if ((blockStartToken != null) && (thisLine != blockNextLine))
123+
{
124+
refList.SafeAdd(CreateFoldingReference(blockStartToken, blockNextLine - 1, RegionKindComment));
125+
blockStartToken = token;
126+
}
127+
if (blockStartToken == null) { blockStartToken = token; }
128+
blockNextLine = thisLine + 1;
129+
}
130+
// If we exit the token array and we're still processing comment lines, then the
131+
// comment block simply ends at the end of document
132+
if (blockStartToken != null)
133+
{
134+
refList.SafeAdd(CreateFoldingReference(blockStartToken, blockNextLine - 1, RegionKindComment));
135+
}
73136

74137
return refList;
75138
}
@@ -114,45 +177,6 @@ static private FoldingReference CreateFoldingReference(
114177
};
115178
}
116179

117-
/// <summary>
118-
/// Given an array of tokens, find matching regions which start (array of tokens) and end with a different TokenKind
119-
/// </summary>
120-
static private void MatchTokenElements(
121-
Token[] tokens,
122-
TokenKind[] startTokenKind,
123-
TokenKind endTokenKind,
124-
string matchKind,
125-
ref FoldingReferenceList refList)
126-
{
127-
Stack<Token> tokenStack = new Stack<Token>();
128-
foreach (Token token in tokens)
129-
{
130-
if (Array.IndexOf(startTokenKind, token.Kind) != -1) {
131-
tokenStack.Push(token);
132-
}
133-
if ((tokenStack.Count > 0) && (token.Kind == endTokenKind)) {
134-
refList.SafeAdd(CreateFoldingReference(tokenStack.Pop(), token, matchKind));
135-
}
136-
}
137-
}
138-
139-
/// <summary>
140-
/// Given an array of token, finds a specific token
141-
/// </summary>
142-
static private void MatchTokenElement(
143-
Token[] tokens,
144-
TokenKind tokenKind,
145-
string matchKind,
146-
ref FoldingReferenceList refList)
147-
{
148-
foreach (Token token in tokens)
149-
{
150-
if ((token.Kind == tokenKind) && (token.Extent.StartLineNumber != token.Extent.EndLineNumber)) {
151-
refList.SafeAdd(CreateFoldingReference(token, token, matchKind));
152-
}
153-
}
154-
}
155-
156180
/// <summary>
157181
/// Returns true if a Token is a block comment;
158182
/// - Must be a TokenKind.comment
@@ -167,77 +191,5 @@ static private bool IsBlockComment(int index, Token[] tokens) {
167191
if (tokens[index - 1].Kind != TokenKind.NewLine) { return false; }
168192
return thisToken.Text.StartsWith("#");
169193
}
170-
171-
// This regular expressions is used to detect a line comment (as opposed to an inline comment), that is not a region
172-
// block directive i.e.
173-
// - No text between the beginning of the line and `#`
174-
// - Comment does start with region
175-
// - Comment does start with endregion
176-
static private readonly Regex s_nonRegionLineCommentRegex = new Regex(
177-
@"\s*#(?!region\b|endregion\b)",
178-
RegexOptions.IgnoreCase | RegexOptions.Compiled);
179-
180-
/// <summary>
181-
/// Finding blocks of comment tokens is more complicated as the newline characters are not
182-
/// classed as comments. To workaround this we search for valid block comments (See IsBlockCmment)
183-
/// and then determine contiguous line numbers from there
184-
/// </summary>
185-
static private void MatchBlockCommentTokenElement(
186-
Token[] tokens,
187-
string matchKind,
188-
ref FoldingReferenceList refList)
189-
{
190-
Token startToken = null;
191-
int nextLine = -1;
192-
for (int index = 0; index < tokens.Length; index++)
193-
{
194-
Token thisToken = tokens[index];
195-
if (IsBlockComment(index, tokens) && s_nonRegionLineCommentRegex.IsMatch(thisToken.Text)) {
196-
int thisLine = thisToken.Extent.StartLineNumber - 1;
197-
if ((startToken != null) && (thisLine != nextLine)) {
198-
refList.SafeAdd(CreateFoldingReference(startToken, nextLine - 1, matchKind));
199-
startToken = thisToken;
200-
}
201-
if (startToken == null) { startToken = thisToken; }
202-
nextLine = thisLine + 1;
203-
}
204-
}
205-
// If we exit the token array and we're still processing comment lines, then the
206-
// comment block simply ends at the end of document
207-
if (startToken != null) {
208-
refList.SafeAdd(CreateFoldingReference(startToken, nextLine - 1, matchKind));
209-
}
210-
}
211-
212-
/// <summary>
213-
/// Given a list of tokens, find the tokens that are comments and
214-
/// the comment text is either `# region` or `# endregion`, and then use a stack to determine
215-
/// the ranges they span
216-
/// </summary>
217-
static private void MatchCustomCommentRegionTokenElements(
218-
Token[] tokens,
219-
string matchKind,
220-
ref FoldingReferenceList refList)
221-
{
222-
// These regular expressions are used to match lines which mark the start and end of region comment in a PowerShell
223-
// script. They are based on the defaults in the VS Code Language Configuration at;
224-
// https://github.com/Microsoft/vscode/blob/64186b0a26/extensions/powershell/language-configuration.json#L26-L31
225-
string startRegionText = @"^\s*#region\b";
226-
string endRegionText = @"^\s*#endregion\b";
227-
228-
Stack<Token> tokenStack = new Stack<Token>();
229-
for (int index = 0; index < tokens.Length; index++)
230-
{
231-
if (IsBlockComment(index, tokens)) {
232-
Token token = tokens[index];
233-
if (Regex.IsMatch(token.Text, startRegionText, RegexOptions.IgnoreCase)) {
234-
tokenStack.Push(token);
235-
}
236-
if ((tokenStack.Count > 0) && (Regex.IsMatch(token.Text, endRegionText, RegexOptions.IgnoreCase))) {
237-
refList.SafeAdd(CreateFoldingReference(tokenStack.Pop(), token, matchKind));
238-
}
239-
}
240-
}
241-
}
242194
}
243195
}

0 commit comments

Comments
 (0)