Skip to content

Commit 4638c67

Browse files
authored
Merge pull request #2552 from ahoppen/ahoppen/6.0/improve-visitor-performance
[6.0] Significantly improve performance of SyntaxVisitor
2 parents 3cfa2b3 + 205cf95 commit 4638c67

File tree

4 files changed

+811
-610
lines changed

4 files changed

+811
-610
lines changed

CodeGeneration/Sources/generate-swift-syntax/templates/swiftsyntax/SyntaxVisitorFile.swift

Lines changed: 110 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,32 @@ let syntaxVisitorFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
3232
try! ClassDeclSyntax("open class SyntaxVisitor") {
3333
DeclSyntax("public let viewMode: SyntaxTreeViewMode")
3434

35+
DeclSyntax(
36+
"""
37+
/// `Syntax.Info` objects created in `visitChildren` but whose `Syntax` nodes were not retained by the `visit`
38+
/// functions implemented by a subclass of `SyntaxVisitor`.
39+
///
40+
/// Instead of deallocating them and allocating memory for new syntax nodes, store the allocated memory in an array.
41+
/// We can then re-use them to create new syntax nodes.
42+
///
43+
/// The array's size should be a typical nesting depth of a Swift file. That way we can store all allocated syntax
44+
/// nodes when unwinding the visitation stack.
45+
///
46+
/// The actual `info` stored in the `Syntax.Info` objects is garbage. It needs to be set when any of the `Syntax.Info`
47+
/// objects get re-used.
48+
private var recyclableNodeInfos: ContiguousArray<Syntax.Info?> = ContiguousArray(repeating: nil, count: 64)
49+
"""
50+
)
51+
52+
DeclSyntax(
53+
"""
54+
/// A bit is set to 1 if the corresponding index in `recyclableNodeInfos` is occupied and ready to be reused.
55+
///
56+
/// The last bit in this UInt64 corresponds to index 0 in `recyclableNodeInfos`.
57+
private var recyclableNodeInfosUsageBitmap: UInt64 = 0
58+
"""
59+
)
60+
3561
DeclSyntax(
3662
"""
3763
public init(viewMode: SyntaxTreeViewMode) {
@@ -45,7 +71,8 @@ let syntaxVisitorFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
4571
/// Walk all nodes of the given syntax tree, calling the corresponding `visit`
4672
/// function for every node that is being visited.
4773
public func walk(_ node: some SyntaxProtocol) {
48-
visit(Syntax(node))
74+
var syntaxNode = Syntax(node)
75+
visit(&syntaxNode)
4976
}
5077
"""
5178
)
@@ -94,21 +121,30 @@ let syntaxVisitorFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
94121

95122
DeclSyntax(
96123
"""
97-
/// Interpret `data` as a node of type `nodeType`, visit it, calling
124+
/// Cast `node` to a node of type `nodeType`, visit it, calling
98125
/// the `visit` and `visitPost` functions during visitation.
126+
///
127+
/// - Note: node is an `inout` parameter so that callers don't have to retain it before passing it to `visitImpl`.
128+
/// With it being an `inout` parameter, the caller and `visitImpl` can work on the same reference of `node` without
129+
/// any reference counting.
130+
/// - Note: Inline so that the optimizer can look through the calles to `visit` and `visitPost`, which means it
131+
/// doesn't need to retain `self` when forming closures to the unapplied function references on `self`.
132+
@inline(__always)
99133
private func visitImpl<NodeType: SyntaxProtocol>(
100-
_ node: Syntax,
134+
_ node: inout Syntax,
101135
_ nodeType: NodeType.Type,
102136
_ visit: (NodeType) -> SyntaxVisitorContinueKind,
103137
_ visitPost: (NodeType) -> Void
104138
) {
105-
let node = node.cast(NodeType.self)
106-
let needsChildren = (visit(node) == .visitChildren)
139+
let castedNode = node.cast(NodeType.self)
140+
// We retain castedNode.info here before passing it to visit.
141+
// I don't think that's necessary because castedNode is already retained but don't know how to prevent it.
142+
let needsChildren = (visit(castedNode) == .visitChildren)
107143
// Avoid calling into visitChildren if possible.
108144
if needsChildren && !node.raw.layoutView!.children.isEmpty {
109-
visitChildren(node)
145+
visitChildren(&node)
110146
}
111-
visitPost(node)
147+
visitPost(castedNode)
112148
}
113149
"""
114150
)
@@ -149,7 +185,7 @@ let syntaxVisitorFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
149185
/// that determines the correct visitation function will be popped of the
150186
/// stack before the function is being called, making the switch's stack
151187
/// space transient instead of having it linger in the call stack.
152-
private func visitationFunc(for node: Syntax) -> ((Syntax) -> Void)
188+
private func visitationFunc(for node: Syntax) -> ((inout Syntax) -> Void)
153189
"""
154190
) {
155191
try SwitchExprSyntax("switch node.raw.kind") {
@@ -168,16 +204,16 @@ let syntaxVisitorFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
168204

169205
for node in NON_BASE_SYNTAX_NODES {
170206
SwitchCaseSyntax("case .\(node.varOrCaseName):") {
171-
StmtSyntax("return { self.visitImpl($0, \(node.kind.syntaxType).self, self.visit, self.visitPost) }")
207+
StmtSyntax("return { self.visitImpl(&$0, \(node.kind.syntaxType).self, self.visit, self.visitPost) }")
172208
}
173209
}
174210
}
175211
}
176212

177213
DeclSyntax(
178214
"""
179-
private func visit(_ node: Syntax) {
180-
return visitationFunc(for: node)(node)
215+
private func visit(_ node: inout Syntax) {
216+
return visitationFunc(for: node)(&node)
181217
}
182218
"""
183219
)
@@ -188,7 +224,12 @@ let syntaxVisitorFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
188224
poundKeyword: .poundElseToken(),
189225
elements: .statements(
190226
CodeBlockItemListSyntax {
191-
try! FunctionDeclSyntax("private func visit(_ node: Syntax)") {
227+
try! FunctionDeclSyntax(
228+
"""
229+
/// - Note: `node` is `inout` to avoid ref-counting. See comment in `visitImpl`
230+
private func visit(_ node: inout Syntax)
231+
"""
232+
) {
192233
try SwitchExprSyntax("switch node.raw.kind") {
193234
SwitchCaseSyntax("case .token:") {
194235
DeclSyntax("let node = node.cast(TokenSyntax.self)")
@@ -203,7 +244,7 @@ let syntaxVisitorFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
203244

204245
for node in NON_BASE_SYNTAX_NODES {
205246
SwitchCaseSyntax("case .\(node.varOrCaseName):") {
206-
ExprSyntax("visitImpl(node, \(node.kind.syntaxType).self, visit, visitPost)")
247+
ExprSyntax("visitImpl(&node, \(node.kind.syntaxType).self, visit, visitPost)")
207248
}
208249
}
209250
}
@@ -217,13 +258,66 @@ let syntaxVisitorFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
217258

218259
DeclSyntax(
219260
"""
220-
private func visitChildren(_ node: some SyntaxProtocol) {
221-
let syntaxNode = Syntax(node)
261+
/// - Note: `node` is `inout` to avoid reference counting. See comment in `visitImpl`.
262+
private func visitChildren(_ syntaxNode: inout Syntax) {
222263
for childRaw in NonNilRawSyntaxChildren(syntaxNode, viewMode: viewMode) {
223-
visit(Syntax(childRaw, parent: syntaxNode))
264+
// syntaxNode gets retained here. That seems unnecessary but I don't know how to remove it.
265+
var childNode: Syntax
266+
if let recycledInfoIndex = recyclableNodeInfosUsageBitmap.indexOfRightmostOne {
267+
var recycledInfo: Syntax.Info? = nil
268+
// Use `swap` to extract the recyclable syntax node without incurring ref-counting.
269+
swap(&recycledInfo, &recyclableNodeInfos[recycledInfoIndex])
270+
assert(recycledInfo != nil, "Slot indicated by the bitmap did not contain a value")
271+
recyclableNodeInfosUsageBitmap.setBitToZero(at: recycledInfoIndex)
272+
// syntaxNode.info gets retained here. This is necessary because we build up the parent tree.
273+
recycledInfo!.info = .nonRoot(.init(parent: syntaxNode, absoluteInfo: childRaw.info))
274+
childNode = Syntax(childRaw.raw, info: recycledInfo!)
275+
} else {
276+
childNode = Syntax(childRaw, parent: syntaxNode)
277+
}
278+
visit(&childNode)
279+
if isKnownUniquelyReferenced(&childNode.info) {
280+
// The node didn't get stored by the subclass's visit method. We can re-use the memory of its `Syntax.Info`
281+
// for future syntax nodes.
282+
childNode.info.info = nil
283+
if let emptySlot = recyclableNodeInfosUsageBitmap.indexOfRightmostZero {
284+
// Use `swap` to store the recyclable syntax node without incurring ref-counting.
285+
swap(&recyclableNodeInfos[emptySlot], &childNode.info)
286+
assert(childNode.info == nil, "Slot should not have contained a value")
287+
recyclableNodeInfosUsageBitmap.setBitToOne(at: emptySlot)
288+
}
289+
}
224290
}
225291
}
226292
"""
227293
)
228294
}
295+
296+
DeclSyntax(
297+
"""
298+
fileprivate extension UInt64 {
299+
var indexOfRightmostZero: Int? {
300+
return (~self).indexOfRightmostOne
301+
}
302+
303+
var indexOfRightmostOne: Int? {
304+
let trailingZeroCount = self.trailingZeroBitCount
305+
if trailingZeroCount == Self.bitWidth {
306+
// All indicies are 0
307+
return nil
308+
}
309+
return trailingZeroCount
310+
}
311+
312+
mutating func setBitToZero(at index: Int) {
313+
self &= ~(1 << index)
314+
}
315+
316+
mutating func setBitToOne(at index: Int) {
317+
self |= 1 << index
318+
}
319+
}
320+
321+
"""
322+
)
229323
}

Sources/SwiftSyntax/Syntax.swift

Lines changed: 37 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,9 @@
1414
/// Each node has accessors for its known children, and allows efficient
1515
/// iteration over the children through its `children` property.
1616
public struct Syntax: SyntaxProtocol, SyntaxHashable {
17-
fileprivate enum Info: Sendable {
18-
case root(Root)
19-
indirect case nonRoot(NonRoot)
20-
17+
/// We need a heap indirection to store a syntax node's parent. We could use an indirect enum here but explicitly
18+
/// modelling it using a class allows us to re-use these heap-allocated objects in `SyntaxVisitor`.
19+
final class Info: Sendable {
2120
// For root node.
2221
struct Root: Sendable {
2322
private var arena: RetainedSyntaxArena
@@ -32,27 +31,50 @@ public struct Syntax: SyntaxProtocol, SyntaxHashable {
3231
var parent: Syntax
3332
var absoluteInfo: AbsoluteSyntaxInfo
3433
}
34+
35+
enum InfoImpl: Sendable {
36+
case root(Root)
37+
case nonRoot(NonRoot)
38+
}
39+
40+
init(_ info: InfoImpl) {
41+
self.info = info
42+
}
43+
44+
/// The actual stored information that references the parent or the tree's root.
45+
///
46+
/// - Important: Must only be set to `nil` when `Syntax.Info` is used in a memory recycling pool
47+
/// (eg. in `SyntaxVisitor`). In that case the `Syntax.Info` is considered garbage memory that can be re-used
48+
/// later. `info` needs to be set to a real value when `Syntax.Info` is recycled from the memory recycling pool.
49+
var info: InfoImpl!
3550
}
3651

37-
private let info: Info
52+
/// Reference to the node's parent or, if this node is the root of a tree, a reference to the `SyntaxArena` to keep
53+
/// the syntax tree alive.
54+
///
55+
/// - Important: In almost all use cases you should not access this directly. Prefer accessors like `parent`.
56+
/// - Important: Must only be set to `nil` when this `Syntax` node is known to get destroyed and the `Info` should be
57+
/// stored in a memory recycling pool (eg. in `SyntaxVisitor`). After setting `info` to `nil`, this `Syntax` node
58+
/// is considered garbage and should not be accessed anymore in any way.
59+
var info: Info!
3860
let raw: RawSyntax
3961

4062
private var rootInfo: Info.Root {
41-
switch info {
63+
switch info.info! {
4264
case .root(let info): return info
4365
case .nonRoot(let info): return info.parent.rootInfo
4466
}
4567
}
4668

4769
private var nonRootInfo: Info.NonRoot? {
48-
switch info {
70+
switch info.info! {
4971
case .root(_): return nil
5072
case .nonRoot(let info): return info
5173
}
5274
}
5375

5476
private var root: Syntax {
55-
switch info {
77+
switch info.info! {
5678
case .root(_): return self
5779
case .nonRoot(let info): return info.parent.root
5880
}
@@ -99,13 +121,13 @@ public struct Syntax: SyntaxProtocol, SyntaxHashable {
99121
}
100122

101123
/// "designated" memberwise initializer of `Syntax`.
102-
private init(_ raw: RawSyntax, info: Info) {
124+
init(_ raw: RawSyntax, info: Info) {
103125
self.raw = raw
104126
self.info = info
105127
}
106128

107129
init(_ raw: RawSyntax, parent: Syntax, absoluteInfo: AbsoluteSyntaxInfo) {
108-
self.init(raw, info: .nonRoot(.init(parent: parent, absoluteInfo: absoluteInfo)))
130+
self.init(raw, info: Info(.nonRoot(.init(parent: parent, absoluteInfo: absoluteInfo))))
109131
}
110132

111133
/// Creates a `Syntax` with the provided raw syntax and parent.
@@ -125,12 +147,12 @@ public struct Syntax: SyntaxProtocol, SyntaxHashable {
125147
/// has a chance to retain it.
126148
static func forRoot(_ raw: RawSyntax, rawNodeArena: RetainedSyntaxArena) -> Syntax {
127149
precondition(rawNodeArena == raw.arenaReference)
128-
return Syntax(raw, info: .root(.init(arena: rawNodeArena)))
150+
return Syntax(raw, info: Info(.root(.init(arena: rawNodeArena))))
129151
}
130152

131153
static func forRoot(_ raw: RawSyntax, rawNodeArena: SyntaxArena) -> Syntax {
132154
precondition(rawNodeArena == raw.arenaReference)
133-
return Syntax(raw, info: .root(.init(arena: RetainedSyntaxArena(rawNodeArena))))
155+
return Syntax(raw, info: Info(.root(.init(arena: RetainedSyntaxArena(rawNodeArena)))))
134156
}
135157

136158
/// Returns the child data at the provided index in this data's layout.
@@ -252,6 +274,9 @@ public struct Syntax: SyntaxProtocol, SyntaxHashable {
252274
}
253275

254276
/// Create a ``Syntax`` node from a specialized syntax node.
277+
// Inline always so the optimizer can optimize this to a member access on `syntax` without having to go through
278+
// generics.
279+
@inline(__always)
255280
public init(_ syntax: some SyntaxProtocol) {
256281
self = syntax._syntaxNode
257282
}

Sources/SwiftSyntax/SyntaxChildren.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,8 @@ struct NonNilRawSyntaxChildren: BidirectionalCollection, Sendable {
411411
self.viewMode = viewMode
412412
}
413413

414+
/// - Note: Inline so we don't retain `Syntax.Info` when creating `NonNilRawSyntaxChildren` from a `Syntax`.
415+
@inline(__always)
414416
init(_ node: Syntax, viewMode: SyntaxTreeViewMode) {
415417
self.init(node.absoluteRaw, viewMode: viewMode)
416418
}

0 commit comments

Comments
 (0)