Skip to content

Commit b892dda

Browse files
committed
Significantly improve performance of SyntaxVisitor
There are two core ideas here: - Before this change, we were allocating `Syntax.Info` for every visited node. We need a heap-allocated object here because `Syntax` can access its parent. In most cases, however, the syntax node was not stored by the visitor and we would just destroy the `Syntax.Info` after finishing the `visit` call. Instead, check if `Syntax.Info` is uniquely referenced, which means that the the node was not stored. In that case, don’t deallocate the memory but place it into a reuse pool. I have seen reductions in `malloc` calls from ~50,000 to ~100 (500x) in `testEmptyVisitorPerformance`. - Now retain and release calls made up a significant portion of the visitor’s performance. Add some annotations to eliminate reference counting. I added comments for the remaining retain calls. I measured that this improves the performance of the `EmptyParametersRule` in SwiftLint by ~35% compared to `509.0.0` (and by ~50% compared to `510.0.0`). Fixes #2091 rdar://114330163
1 parent d9bf34e commit b892dda

File tree

4 files changed

+739
-610
lines changed

4 files changed

+739
-610
lines changed

CodeGeneration/Sources/generate-swift-syntax/templates/swiftsyntax/SyntaxVisitorFile.swift

Lines changed: 70 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,24 @@ let syntaxVisitorFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
3232
try! ClassDeclSyntax("open class SyntaxVisitor") {
3333
DeclSyntax("public let viewMode: SyntaxTreeViewMode")
3434

35+
DeclSyntax(
36+
"""
37+
/// `Syntax.Info` objects created in `visitChildren` but whose `Syntax` nodes were not retained by the `visit`
38+
/// functions implemented by a subclass of `SyntaxVisitor`.
39+
///
40+
/// Instead of deallocating them and allocating memory for new syntax nodes, store the allocated memory in an array.
41+
/// We can then re-use them to create new syntax nodes.
42+
///
43+
/// The array's size should be a typical nesting depth of a Swift file. That way we can store all allocated syntax
44+
/// nodes when unwinding the visitation stack. It shouldn't be much larger because that would mean that we need to
45+
/// look through more memory to find a cache miss. 40 has been chosen empirically to strike a good balance here.
46+
///
47+
/// The actual `info` stored in the `Syntax.Info` objects is garbage. It needs to be set when any of the `Syntax.Info`
48+
/// objects get re-used.
49+
private var recyclableNodeInfos: ContiguousArray<Syntax.Info?> = ContiguousArray(repeating: nil, count: 40)
50+
"""
51+
)
52+
3553
DeclSyntax(
3654
"""
3755
public init(viewMode: SyntaxTreeViewMode) {
@@ -45,7 +63,8 @@ let syntaxVisitorFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
4563
/// Walk all nodes of the given syntax tree, calling the corresponding `visit`
4664
/// function for every node that is being visited.
4765
public func walk(_ node: some SyntaxProtocol) {
48-
visit(Syntax(node))
66+
var syntaxNode = Syntax(node)
67+
visit(&syntaxNode)
4968
}
5069
"""
5170
)
@@ -94,21 +113,30 @@ let syntaxVisitorFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
94113

95114
DeclSyntax(
96115
"""
97-
/// Interpret `data` as a node of type `nodeType`, visit it, calling
116+
/// Cast `node` to a node of type `nodeType`, visit it, calling
98117
/// the `visit` and `visitPost` functions during visitation.
118+
///
119+
/// - Note: node is an `inout` parameter so that callers don't have to retain it before passing it to `visitImpl`.
120+
/// With it being an `inout` parameter, the caller and `visitImpl` can work on the same reference of `node` without
121+
/// any reference counting.
122+
/// - Note: Inline so that the optimizer can look through the calles to `visit` and `visitPost`, which means it
123+
/// doesn't need to retain `self` when forming closures to the unapplied function references on `self`.
124+
@inline(__always)
99125
private func visitImpl<NodeType: SyntaxProtocol>(
100-
_ node: Syntax,
126+
_ node: inout Syntax,
101127
_ nodeType: NodeType.Type,
102128
_ visit: (NodeType) -> SyntaxVisitorContinueKind,
103129
_ visitPost: (NodeType) -> Void
104130
) {
105-
let node = node.cast(NodeType.self)
106-
let needsChildren = (visit(node) == .visitChildren)
131+
let castedNode = node.cast(NodeType.self)
132+
// We retain castedNode.info here before passing it to visit.
133+
// I don't think that's necessary because castedNode is already retained but don't know how to prevent it.
134+
let needsChildren = (visit(castedNode) == .visitChildren)
107135
// Avoid calling into visitChildren if possible.
108136
if needsChildren && !node.raw.layoutView!.children.isEmpty {
109-
visitChildren(node)
137+
visitChildren(&node)
110138
}
111-
visitPost(node)
139+
visitPost(castedNode)
112140
}
113141
"""
114142
)
@@ -149,7 +177,7 @@ let syntaxVisitorFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
149177
/// that determines the correct visitation function will be popped of the
150178
/// stack before the function is being called, making the switch's stack
151179
/// space transient instead of having it linger in the call stack.
152-
private func visitationFunc(for node: Syntax) -> ((Syntax) -> Void)
180+
private func visitationFunc(for node: Syntax) -> ((inout Syntax) -> Void)
153181
"""
154182
) {
155183
try SwitchExprSyntax("switch node.raw.kind") {
@@ -168,16 +196,16 @@ let syntaxVisitorFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
168196

169197
for node in NON_BASE_SYNTAX_NODES {
170198
SwitchCaseSyntax("case .\(node.varOrCaseName):") {
171-
StmtSyntax("return { self.visitImpl($0, \(node.kind.syntaxType).self, self.visit, self.visitPost) }")
199+
StmtSyntax("return { self.visitImpl(&$0, \(node.kind.syntaxType).self, self.visit, self.visitPost) }")
172200
}
173201
}
174202
}
175203
}
176204

177205
DeclSyntax(
178206
"""
179-
private func visit(_ node: Syntax) {
180-
return visitationFunc(for: node)(node)
207+
private func visit(_ node: inout Syntax) {
208+
return visitationFunc(for: node)(&node)
181209
}
182210
"""
183211
)
@@ -188,7 +216,12 @@ let syntaxVisitorFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
188216
poundKeyword: .poundElseToken(),
189217
elements: .statements(
190218
CodeBlockItemListSyntax {
191-
try! FunctionDeclSyntax("private func visit(_ node: Syntax)") {
219+
try! FunctionDeclSyntax(
220+
"""
221+
/// - Note: `node` is `inout` to avoid ref-counting. See comment in `visitImpl`
222+
private func visit(_ node: inout Syntax)
223+
"""
224+
) {
192225
try SwitchExprSyntax("switch node.raw.kind") {
193226
SwitchCaseSyntax("case .token:") {
194227
DeclSyntax("let node = node.cast(TokenSyntax.self)")
@@ -203,7 +236,7 @@ let syntaxVisitorFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
203236

204237
for node in NON_BASE_SYNTAX_NODES {
205238
SwitchCaseSyntax("case .\(node.varOrCaseName):") {
206-
ExprSyntax("visitImpl(node, \(node.kind.syntaxType).self, visit, visitPost)")
239+
ExprSyntax("visitImpl(&node, \(node.kind.syntaxType).self, visit, visitPost)")
207240
}
208241
}
209242
}
@@ -217,10 +250,31 @@ let syntaxVisitorFile = SourceFileSyntax(leadingTrivia: copyrightHeader) {
217250

218251
DeclSyntax(
219252
"""
220-
private func visitChildren(_ node: some SyntaxProtocol) {
221-
let syntaxNode = Syntax(node)
253+
/// - Note: `node` is `inout` to avoid reference counting. See comment in `visitImpl`.
254+
private func visitChildren(_ syntaxNode: inout Syntax) {
222255
for childRaw in NonNilRawSyntaxChildren(syntaxNode, viewMode: viewMode) {
223-
visit(Syntax(childRaw, parent: syntaxNode))
256+
// syntaxNode gets retained here. That seems unnecessary but I don't know how to remove it.
257+
var childNode: Syntax
258+
if let recycledInfoIndex = recyclableNodeInfos.firstIndex(where: { $0 != nil }) {
259+
var recycledInfo: Syntax.Info? = nil
260+
// Use `swap` to extract the recyclable syntax node without incurring ref-counting.
261+
swap(&recycledInfo, &recyclableNodeInfos[recycledInfoIndex])
262+
// syntaxNode.info gets retained here. This is necessary because we build up the parent tree.
263+
recycledInfo!.info = .nonRoot(.init(parent: syntaxNode, absoluteInfo: childRaw.info))
264+
childNode = Syntax(childRaw.raw, info: recycledInfo!)
265+
} else {
266+
childNode = Syntax(childRaw, parent: syntaxNode)
267+
}
268+
visit(&childNode)
269+
if isKnownUniquelyReferenced(&childNode.info) {
270+
// The node didn't get stored by the subclass's visit method. We can re-use the memory of its `Syntax.Info`
271+
// for future syntax nodes.
272+
childNode.info.info = nil
273+
if let emptySlot = recyclableNodeInfos.firstIndex(where: { $0 == nil }) {
274+
// Use `swap` to store the recyclable syntax node without incurring ref-counting.
275+
swap(&recyclableNodeInfos[emptySlot], &childNode.info)
276+
}
277+
}
224278
}
225279
}
226280
"""

Sources/SwiftSyntax/Syntax.swift

Lines changed: 37 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,9 @@
1414
/// Each node has accessors for its known children, and allows efficient
1515
/// iteration over the children through its `children` property.
1616
public struct Syntax: SyntaxProtocol, SyntaxHashable {
17-
fileprivate enum Info: Sendable {
18-
case root(Root)
19-
indirect case nonRoot(NonRoot)
20-
17+
/// We need a heap indirection to store a syntax node's parent. We could use an indirect enum here but explicitly
18+
/// modelling it using a class allows us to re-use these heap-allocated objects in `SyntaxVisitor`.
19+
final class Info: Sendable {
2120
// For root node.
2221
struct Root: Sendable {
2322
private var arena: RetainedSyntaxArena
@@ -32,27 +31,50 @@ public struct Syntax: SyntaxProtocol, SyntaxHashable {
3231
var parent: Syntax
3332
var absoluteInfo: AbsoluteSyntaxInfo
3433
}
34+
35+
enum InfoImpl: Sendable {
36+
case root(Root)
37+
case nonRoot(NonRoot)
38+
}
39+
40+
init(_ info: InfoImpl) {
41+
self.info = info
42+
}
43+
44+
/// The actual stored information that references the parent or the tree's root.
45+
///
46+
/// - Important: Must only be set to `nil` when `Syntax.Info` is used in a memory recycling pool
47+
/// (eg. in `SyntaxVisitor`). In that case the `Syntax.Info` is considered garbage memory that can be re-used
48+
/// later. `info` needs to be set to a real value when `Syntax.Info` is recycled from the memory recycling pool.
49+
var info: InfoImpl!
3550
}
3651

37-
private let info: Info
52+
/// Reference to the node's parent or, if this node is the root of a tree, a reference to the `SyntaxArena` to keep
53+
/// the syntax tree alive.
54+
///
55+
/// - Important: In almost all use cases you should not access this directly. Prefer accessors like `parent`.
56+
/// - Important: Must only be set to `nil` when this `Syntax` node is known to get destroyed and the `Info` should be
57+
/// stored in a memory recycling pool (eg. in `SyntaxVisitor`). After setting `info` to `nil`, this `Syntax` node
58+
/// is considered garbage and should not be accessed anymore in any way.
59+
var info: Info!
3860
let raw: RawSyntax
3961

4062
private var rootInfo: Info.Root {
41-
switch info {
63+
switch info.info! {
4264
case .root(let info): return info
4365
case .nonRoot(let info): return info.parent.rootInfo
4466
}
4567
}
4668

4769
private var nonRootInfo: Info.NonRoot? {
48-
switch info {
70+
switch info.info! {
4971
case .root(_): return nil
5072
case .nonRoot(let info): return info
5173
}
5274
}
5375

5476
private var root: Syntax {
55-
switch info {
77+
switch info.info! {
5678
case .root(_): return self
5779
case .nonRoot(let info): return info.parent.root
5880
}
@@ -99,13 +121,13 @@ public struct Syntax: SyntaxProtocol, SyntaxHashable {
99121
}
100122

101123
/// "designated" memberwise initializer of `Syntax`.
102-
private init(_ raw: RawSyntax, info: Info) {
124+
init(_ raw: RawSyntax, info: Info) {
103125
self.raw = raw
104126
self.info = info
105127
}
106128

107129
init(_ raw: RawSyntax, parent: Syntax, absoluteInfo: AbsoluteSyntaxInfo) {
108-
self.init(raw, info: .nonRoot(.init(parent: parent, absoluteInfo: absoluteInfo)))
130+
self.init(raw, info: Info(.nonRoot(.init(parent: parent, absoluteInfo: absoluteInfo))))
109131
}
110132

111133
/// Creates a `Syntax` with the provided raw syntax and parent.
@@ -125,12 +147,12 @@ public struct Syntax: SyntaxProtocol, SyntaxHashable {
125147
/// has a chance to retain it.
126148
static func forRoot(_ raw: RawSyntax, rawNodeArena: RetainedSyntaxArena) -> Syntax {
127149
precondition(rawNodeArena == raw.arenaReference)
128-
return Syntax(raw, info: .root(.init(arena: rawNodeArena)))
150+
return Syntax(raw, info: Info(.root(.init(arena: rawNodeArena))))
129151
}
130152

131153
static func forRoot(_ raw: RawSyntax, rawNodeArena: SyntaxArena) -> Syntax {
132154
precondition(rawNodeArena == raw.arenaReference)
133-
return Syntax(raw, info: .root(.init(arena: RetainedSyntaxArena(rawNodeArena))))
155+
return Syntax(raw, info: Info(.root(.init(arena: RetainedSyntaxArena(rawNodeArena)))))
134156
}
135157

136158
/// Returns the child data at the provided index in this data's layout.
@@ -252,6 +274,9 @@ public struct Syntax: SyntaxProtocol, SyntaxHashable {
252274
}
253275

254276
/// Create a ``Syntax`` node from a specialized syntax node.
277+
// Inline always so the optimizer can optimize this to a member access on `syntax` without having to go through
278+
// generics.
279+
@inline(__always)
255280
public init(_ syntax: some SyntaxProtocol) {
256281
self = syntax._syntaxNode
257282
}

Sources/SwiftSyntax/SyntaxChildren.swift

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,8 @@ struct NonNilRawSyntaxChildren: BidirectionalCollection, Sendable {
411411
self.viewMode = viewMode
412412
}
413413

414+
/// - Note: Inline so we don't retain `Syntax.Info` when creating `NonNilRawSyntaxChildren` from a `Syntax`.
415+
@inline(__always)
414416
init(_ node: Syntax, viewMode: SyntaxTreeViewMode) {
415417
self.init(node.absoluteRaw, viewMode: viewMode)
416418
}

0 commit comments

Comments
 (0)