Skip to content

Commit 7f9972d

Browse files
committed
(GH-824) Refactor Token Folding Operations
Previously each token type detection was separated into discrete blocks to make reading the code easier. However this meant there were many enumerations of the Tokens array as well as passing around intermediate arrays/lists. This commit takes the individual methods and overlaps them to reduce the number of enumerations and regular expression matching to a minimum. Note that there are considerable code comments here due to the code now being more complex on initial review.
1 parent e487d6b commit 7f9972d

File tree

4 files changed

+148
-159
lines changed

4 files changed

+148
-159
lines changed

src/PowerShellEditorServices.Protocol/Server/LanguageServer.cs

+6-6
Original file line numberDiff line numberDiff line change
@@ -1388,14 +1388,14 @@ private FoldingRange[] Fold(
13881388
// If we're showing the last line, decrement the Endline of all regions by one.
13891389
int endLineOffset = this.currentSettings.CodeFolding.ShowLastLine ? -1 : 0;
13901390

1391-
foreach (KeyValuePair<int, FoldingReference> entry in TokenOperations.FoldableRegions(script.ScriptTokens))
1391+
foreach (FoldingReference fold in TokenOperations.FoldableReferences(script.ScriptTokens).References)
13921392
{
13931393
result.Add(new FoldingRange {
1394-
EndCharacter = entry.Value.EndCharacter,
1395-
EndLine = entry.Value.EndLine + endLineOffset,
1396-
Kind = entry.Value.Kind,
1397-
StartCharacter = entry.Value.StartCharacter,
1398-
StartLine = entry.Value.StartLine
1394+
EndCharacter = fold.EndCharacter,
1395+
EndLine = fold.EndLine + endLineOffset,
1396+
Kind = fold.Kind,
1397+
StartCharacter = fold.StartCharacter,
1398+
StartLine = fold.StartLine
13991399
});
14001400
}
14011401
return result.ToArray();

src/PowerShellEditorServices/Language/FoldingReference.cs

+19-6
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,21 @@ public int CompareTo(FoldingReference that) {
6666
/// A class that holds a list of FoldingReferences and ensures that when adding a reference that the
6767
/// folding rules are obeyed, e.g. Only one fold per start line
6868
/// </summary>
69-
public class FoldingReferenceList : Dictionary<int, FoldingReference>
69+
public class FoldingReferenceList
7070
{
71+
private Dictionary<int, FoldingReference> references = new Dictionary<int, FoldingReference>();
72+
73+
/// <summary>
74+
/// Return all references in the list
75+
/// </summary>
76+
public IEnumerable<FoldingReference> References
77+
{
78+
get
79+
{
80+
return references.Values;
81+
}
82+
}
83+
7184
/// <summary>
7285
/// Adds a FoldingReference to the list and enforces ordering rules e.g. Only one fold per start line
7386
/// </summary>
@@ -76,13 +89,13 @@ public void SafeAdd(FoldingReference item)
7689
if (item == null) { return; }
7790

7891
// Only add the item if it hasn't been seen before or it's the largest range
79-
if (TryGetValue(item.StartLine, out FoldingReference currentItem))
92+
if (references.TryGetValue(item.StartLine, out FoldingReference currentItem))
8093
{
81-
if (currentItem.CompareTo(item) == 1) { this[item.StartLine] = item; }
94+
if (currentItem.CompareTo(item) == 1) { references[item.StartLine] = item; }
8295
}
8396
else
8497
{
85-
this[item.StartLine] = item;
98+
references[item.StartLine] = item;
8699
}
87100
}
88101

@@ -91,8 +104,8 @@ public void SafeAdd(FoldingReference item)
91104
/// </summary>
92105
public FoldingReference[] ToArray()
93106
{
94-
var result = new FoldingReference[Count];
95-
Values.CopyTo(result, 0);
107+
var result = new FoldingReference[references.Count];
108+
references.Values.CopyTo(result, 0);
96109
return result;
97110
}
98111
}

src/PowerShellEditorServices/Language/TokenOperations.cs

+122-146
Original file line numberDiff line numberDiff line change
@@ -21,55 +21,142 @@ internal static class TokenOperations
2121
private const string RegionKindRegion = "region";
2222
private const string RegionKindNone = null;
2323

24-
// Opening tokens for { } and @{ }
25-
private static readonly TokenKind[] s_openingBraces = new []
26-
{
27-
TokenKind.LCurly,
28-
TokenKind.AtCurly
29-
};
30-
31-
// Opening tokens for ( ), @( ), $( )
32-
private static readonly TokenKind[] s_openingParens = new []
33-
{
34-
TokenKind.LParen,
35-
TokenKind.AtParen,
36-
TokenKind.DollarParen
37-
};
24+
// These regular expressions are used to match lines which mark the start and end of region comment in a PowerShell
25+
// script. They are based on the defaults in the VS Code Language Configuration at;
26+
// https://github.com/Microsoft/vscode/blob/64186b0a26/extensions/powershell/language-configuration.json#L26-L31
27+
static private readonly Regex s_startRegionTextRegex = new Regex(
28+
@"^\s*#region\b",
29+
RegexOptions.IgnoreCase | RegexOptions.Compiled);
30+
static private readonly Regex s_endRegionTextRegex = new Regex(
31+
@"^\s*#endregion\b",
32+
RegexOptions.IgnoreCase | RegexOptions.Compiled);
3833

3934
/// <summary>
4035
/// Extracts all of the unique foldable regions in a script given the list tokens
4136
/// </summary>
42-
internal static FoldingReferenceList FoldableRegions(
37+
internal static FoldingReferenceList FoldableReferences(
4338
Token[] tokens)
4439
{
4540
var refList = new FoldingReferenceList();
4641

47-
// Find matching braces { -> }
48-
// Find matching hashes @{ -> }
49-
MatchTokenElements(tokens, s_openingBraces, TokenKind.RCurly, RegionKindNone, ref refList);
50-
51-
// Find matching parentheses ( -> )
52-
// Find matching array literals @( -> )
53-
// Find matching subexpressions $( -> )
54-
MatchTokenElements(tokens, s_openingParens, TokenKind.RParen, RegionKindNone, ref refList);
42+
Stack<Token> tokenCurlyStack = new Stack<Token>();
43+
Stack<Token> tokenParenStack = new Stack<Token>();
44+
foreach (Token token in tokens)
45+
{
46+
switch (token.Kind)
47+
{
48+
// Find matching braces { -> }
49+
// Find matching hashes @{ -> }
50+
case TokenKind.LCurly:
51+
case TokenKind.AtCurly:
52+
tokenCurlyStack.Push(token);
53+
break;
54+
55+
case TokenKind.RCurly:
56+
if (tokenCurlyStack.Count > 0)
57+
{
58+
refList.SafeAdd(CreateFoldingReference(tokenCurlyStack.Pop(), token, RegionKindNone));
59+
}
60+
break;
61+
62+
// Find matching parentheses ( -> )
63+
// Find matching array literals @( -> )
64+
// Find matching subexpressions $( -> )
65+
case TokenKind.LParen:
66+
case TokenKind.AtParen:
67+
case TokenKind.DollarParen:
68+
tokenParenStack.Push(token);
69+
break;
70+
71+
case TokenKind.RParen:
72+
if (tokenParenStack.Count > 0)
73+
{
74+
refList.SafeAdd(CreateFoldingReference(tokenParenStack.Pop(), token, RegionKindNone));
75+
}
76+
break;
77+
78+
// Find contiguous here strings @' -> '@
79+
// Find unopinionated variable names ${ \n \n }
80+
// Find contiguous expandable here strings @" -> "@
81+
case TokenKind.HereStringLiteral:
82+
case TokenKind.Variable:
83+
case TokenKind.HereStringExpandable:
84+
if (token.Extent.StartLineNumber != token.Extent.EndLineNumber)
85+
{
86+
refList.SafeAdd(CreateFoldingReference(token, token, RegionKindNone));
87+
}
88+
break;
89+
90+
default:
91+
break;
92+
}
93+
}
5594

56-
// Find contiguous here strings @' -> '@
57-
MatchTokenElement(tokens, TokenKind.HereStringLiteral, RegionKindNone, ref refList);
95+
// Find matching comment regions #region -> #endregion
96+
// Given a list of tokens, find the tokens that are comments and
97+
// the comment text is either `#region` or `#endregion`, and then use a stack to determine
98+
// the ranges they span
99+
//
100+
// Find blocks of line comments # comment1\n# comment2\n...
101+
// Finding blocks of comment tokens is more complicated as the newline characters are not
102+
// classed as comments. To workaround this we search for valid block comments (See IsBlockCmment)
103+
// and then determine contiguous line numbers from there
104+
//
105+
// Find comments regions <# -> #>
106+
// Match the token start and end of kind TokenKind.Comment
107+
var tokenCommentRegionStack = new Stack<Token>();
108+
Token blockStartToken = null;
109+
int blockNextLine = -1;
58110

59-
// Find unopinionated variable names ${ \n \n }
60-
MatchTokenElement(tokens, TokenKind.Variable, RegionKindNone, ref refList);
111+
for (int index = 0; index < tokens.Length; index++)
112+
{
113+
Token token = tokens[index];
114+
if (token.Kind != TokenKind.Comment) { continue; }
115+
116+
// Processing for comment regions <# -> #>
117+
if (token.Extent.StartLineNumber != token.Extent.EndLineNumber)
118+
{
119+
refList.SafeAdd(CreateFoldingReference(token, token, RegionKindComment));
120+
continue;
121+
}
61122

62-
// Find contiguous here strings @" -> "@
63-
MatchTokenElement(tokens, TokenKind.HereStringExpandable, RegionKindNone, ref refList);
123+
if (!IsBlockComment(index, tokens)) { continue; }
64124

65-
// Find matching comment regions #region -> #endregion
66-
MatchCustomCommentRegionTokenElements(tokens, RegionKindRegion, ref refList);
125+
// Regex's are very expensive. Use them sparingly!
126+
// Processing for #region -> #endregion
127+
if (s_startRegionTextRegex.IsMatch(token.Text))
128+
{
129+
tokenCommentRegionStack.Push(token);
130+
continue;
131+
}
132+
if (s_endRegionTextRegex.IsMatch(token.Text))
133+
{
134+
// Mismatched regions in the script can cause bad stacks.
135+
if (tokenCommentRegionStack.Count > 0)
136+
{
137+
refList.SafeAdd(CreateFoldingReference(tokenCommentRegionStack.Pop(), token, RegionKindRegion));
138+
}
139+
continue;
140+
}
67141

68-
// Find blocks of line comments # comment1\n# comment2\n...
69-
MatchBlockCommentTokenElement(tokens, RegionKindComment, ref refList);
142+
// If it's neither a start or end region then it could be block line comment
143+
// Processing for blocks of line comments # comment1\n# comment2\n...
144+
int thisLine = token.Extent.StartLineNumber - 1;
145+
if ((blockStartToken != null) && (thisLine != blockNextLine))
146+
{
147+
refList.SafeAdd(CreateFoldingReference(blockStartToken, blockNextLine - 1, RegionKindComment));
148+
blockStartToken = token;
149+
}
150+
if (blockStartToken == null) { blockStartToken = token; }
151+
blockNextLine = thisLine + 1;
152+
}
70153

71-
// Find comments regions <# -> #>
72-
MatchTokenElement(tokens, TokenKind.Comment, RegionKindComment, ref refList);
154+
// If we exit the token array and we're still processing comment lines, then the
155+
// comment block simply ends at the end of document
156+
if (blockStartToken != null)
157+
{
158+
refList.SafeAdd(CreateFoldingReference(blockStartToken, blockNextLine - 1, RegionKindComment));
159+
}
73160

74161
return refList;
75162
}
@@ -114,45 +201,6 @@ static private FoldingReference CreateFoldingReference(
114201
};
115202
}
116203

117-
/// <summary>
118-
/// Given an array of tokens, find matching regions which start (array of tokens) and end with a different TokenKind
119-
/// </summary>
120-
static private void MatchTokenElements(
121-
Token[] tokens,
122-
TokenKind[] startTokenKind,
123-
TokenKind endTokenKind,
124-
string matchKind,
125-
ref FoldingReferenceList refList)
126-
{
127-
Stack<Token> tokenStack = new Stack<Token>();
128-
foreach (Token token in tokens)
129-
{
130-
if (Array.IndexOf(startTokenKind, token.Kind) != -1) {
131-
tokenStack.Push(token);
132-
}
133-
if ((tokenStack.Count > 0) && (token.Kind == endTokenKind)) {
134-
refList.SafeAdd(CreateFoldingReference(tokenStack.Pop(), token, matchKind));
135-
}
136-
}
137-
}
138-
139-
/// <summary>
140-
/// Given an array of token, finds a specific token
141-
/// </summary>
142-
static private void MatchTokenElement(
143-
Token[] tokens,
144-
TokenKind tokenKind,
145-
string matchKind,
146-
ref FoldingReferenceList refList)
147-
{
148-
foreach (Token token in tokens)
149-
{
150-
if ((token.Kind == tokenKind) && (token.Extent.StartLineNumber != token.Extent.EndLineNumber)) {
151-
refList.SafeAdd(CreateFoldingReference(token, token, matchKind));
152-
}
153-
}
154-
}
155-
156204
/// <summary>
157205
/// Returns true if a Token is a block comment;
158206
/// - Must be a TokenKind.comment
@@ -167,77 +215,5 @@ static private bool IsBlockComment(int index, Token[] tokens) {
167215
if (tokens[index - 1].Kind != TokenKind.NewLine) { return false; }
168216
return thisToken.Text.StartsWith("#");
169217
}
170-
171-
// This regular expressions is used to detect a line comment (as opposed to an inline comment), that is not a region
172-
// block directive i.e.
173-
// - No text between the beginning of the line and `#`
174-
// - Comment does start with region
175-
// - Comment does start with endregion
176-
static private readonly Regex s_nonRegionLineCommentRegex = new Regex(
177-
@"\s*#(?!region\b|endregion\b)",
178-
RegexOptions.IgnoreCase | RegexOptions.Compiled);
179-
180-
/// <summary>
181-
/// Finding blocks of comment tokens is more complicated as the newline characters are not
182-
/// classed as comments. To workaround this we search for valid block comments (See IsBlockCmment)
183-
/// and then determine contiguous line numbers from there
184-
/// </summary>
185-
static private void MatchBlockCommentTokenElement(
186-
Token[] tokens,
187-
string matchKind,
188-
ref FoldingReferenceList refList)
189-
{
190-
Token startToken = null;
191-
int nextLine = -1;
192-
for (int index = 0; index < tokens.Length; index++)
193-
{
194-
Token thisToken = tokens[index];
195-
if (IsBlockComment(index, tokens) && s_nonRegionLineCommentRegex.IsMatch(thisToken.Text)) {
196-
int thisLine = thisToken.Extent.StartLineNumber - 1;
197-
if ((startToken != null) && (thisLine != nextLine)) {
198-
refList.SafeAdd(CreateFoldingReference(startToken, nextLine - 1, matchKind));
199-
startToken = thisToken;
200-
}
201-
if (startToken == null) { startToken = thisToken; }
202-
nextLine = thisLine + 1;
203-
}
204-
}
205-
// If we exit the token array and we're still processing comment lines, then the
206-
// comment block simply ends at the end of document
207-
if (startToken != null) {
208-
refList.SafeAdd(CreateFoldingReference(startToken, nextLine - 1, matchKind));
209-
}
210-
}
211-
212-
/// <summary>
213-
/// Given a list of tokens, find the tokens that are comments and
214-
/// the comment text is either `# region` or `# endregion`, and then use a stack to determine
215-
/// the ranges they span
216-
/// </summary>
217-
static private void MatchCustomCommentRegionTokenElements(
218-
Token[] tokens,
219-
string matchKind,
220-
ref FoldingReferenceList refList)
221-
{
222-
// These regular expressions are used to match lines which mark the start and end of region comment in a PowerShell
223-
// script. They are based on the defaults in the VS Code Language Configuration at;
224-
// https://github.com/Microsoft/vscode/blob/64186b0a26/extensions/powershell/language-configuration.json#L26-L31
225-
string startRegionText = @"^\s*#region\b";
226-
string endRegionText = @"^\s*#endregion\b";
227-
228-
Stack<Token> tokenStack = new Stack<Token>();
229-
for (int index = 0; index < tokens.Length; index++)
230-
{
231-
if (IsBlockComment(index, tokens)) {
232-
Token token = tokens[index];
233-
if (Regex.IsMatch(token.Text, startRegionText, RegexOptions.IgnoreCase)) {
234-
tokenStack.Push(token);
235-
}
236-
if ((tokenStack.Count > 0) && (Regex.IsMatch(token.Text, endRegionText, RegexOptions.IgnoreCase))) {
237-
refList.SafeAdd(CreateFoldingReference(tokenStack.Pop(), token, matchKind));
238-
}
239-
}
240-
}
241-
}
242218
}
243219
}

test/PowerShellEditorServices.Test/Language/TokenOperationsTests.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ private FoldingReference[] GetRegions(string text) {
2020
text,
2121
Version.Parse("5.0"));
2222

23-
var result = Microsoft.PowerShell.EditorServices.TokenOperations.FoldableRegions(scriptFile.ScriptTokens).ToArray();
23+
var result = Microsoft.PowerShell.EditorServices.TokenOperations.FoldableReferences(scriptFile.ScriptTokens).ToArray();
2424
// The foldable regions need to be deterministic for testing so sort the array.
2525
Array.Sort(result);
2626
return result;

0 commit comments

Comments
 (0)