From 1f7527b24abc57e2c6f310de5a949a55126ab0d9 Mon Sep 17 00:00:00 2001 From: Yuta Saito Date: Thu, 8 Aug 2024 23:33:15 +0000 Subject: [PATCH 1/4] [XMLParser] Use `TaskLocal` for storing the current parser Instead of thread-local storage, use `TaskLocal` to store the current parser. This solves three issues: 1. If someone calls `XMLParser.parse()` with a new parser instance in a delegate method call, it overwrote the current parser and wrote it back after the call as `nil`, not the previous current parser. This reentrancy issue can be a problem especially when someone uses external entity resolving since the feature depends on the current parser tracking. Using `TaskLocal` solves this issue since it tracks values as a stack and restores the previous value at the end of the `withValue` call. 2. Since jobs of different tasks can be scheduled on the same thread, different tasks can refer to the same thread-local storage. This wouldn't be a problem for now since the `parse()` method doesn't have any suspention points and different tasks can't run on the same thread during the parsing. However, it's better to use `TaskLocal` to leverage the concurrency model of Swift. 3. The global variable `_currentParser` existed in the WASI platform path but it's unsafe in the Swift concurrency model. It wouldn't be a problem on WASI since it's always single-threaded, we should avoid platform-specific assumption as much as possible. --- Sources/FoundationXML/XMLParser.swift | 85 ++++++++++----------------- Tests/Foundation/TestXMLParser.swift | 43 +++++++++++++- 2 files changed, 74 insertions(+), 54 deletions(-) diff --git a/Sources/FoundationXML/XMLParser.swift b/Sources/FoundationXML/XMLParser.swift index d89d0ee1f4..e3d718a86f 100644 --- a/Sources/FoundationXML/XMLParser.swift +++ b/Sources/FoundationXML/XMLParser.swift @@ -398,9 +398,7 @@ extension XMLParser : @unchecked Sendable { } open class XMLParser : NSObject { private var _handler: _CFXMLInterfaceSAXHandler -#if !os(WASI) internal var _stream: InputStream? -#endif internal var _data: Data? internal var _chunkSize = Int(4096 * 32) // a suitably large number for a decent chunk size @@ -469,33 +467,35 @@ open class XMLParser : NSObject { open var externalEntityResolvingPolicy: ExternalEntityResolvingPolicy = .never open var allowedExternalEntityURLs: Set? - -#if os(WASI) - private static var _currentParser: XMLParser? -#endif - internal static func currentParser() -> XMLParser? { -#if os(WASI) - return _currentParser -#else - if let current = Thread.current.threadDictionary["__CurrentNSXMLParser"] { - return current as? XMLParser - } else { - return nil + /// The current parser is stored in a task local variable to allow for + /// concurrent parsing in different tasks with different parsers. + /// + /// Rationale for `@unchecked Sendable`: + /// While the ``XMLParser`` class itself is not `Sendable`, `TaskLocal` + /// requires the value type to be `Sendable`. The sendability requirement + /// of `TaskLocal` is only for the "default" value and values set with + /// `withValue` will not be shared between tasks. + /// So as long as 1. the default value is safe to be shared between tasks + /// and 2. the `Sendable` conformance of `_CurrentParser` is not used + /// outside of `TaskLocal`, it is safe to mark it as `@unchecked Sendable`. + private struct _CurrentParser: @unchecked Sendable { + let parser: XMLParser? + + static var `default`: _CurrentParser { + return _CurrentParser(parser: nil) } -#endif + } + + @TaskLocal + private static var _currentParser: _CurrentParser = .default + + internal static func currentParser() -> XMLParser? { + return _currentParser.parser } - internal static func setCurrentParser(_ parser: XMLParser?) { -#if os(WASI) - _currentParser = parser -#else - if let p = parser { - Thread.current.threadDictionary["__CurrentNSXMLParser"] = p - } else { - Thread.current.threadDictionary.removeObject(forKey: "__CurrentNSXMLParser") - } -#endif + internal static func withCurrentParser(_ parser: XMLParser, _ body: () -> R) -> R { + return self.$_currentParser.withValue(_CurrentParser(parser: parser), operation: body) } internal func _handleParseResult(_ parseResult: Int32) -> Bool { @@ -569,7 +569,6 @@ open class XMLParser : NSObject { return result } -#if !os(WASI) internal func parseFrom(_ stream : InputStream) -> Bool { var result = true @@ -598,37 +597,17 @@ open class XMLParser : NSObject { return result } -#else - internal func parse(from data: Data) -> Bool { - var result = true - var chunkStart = 0 - var chunkEnd = min(_chunkSize, data.count) - while result && chunkStart < chunkEnd { - let chunk = data[chunkStart.. Bool { -#if os(WASI) - return _data.map { parse(from: $0) } ?? false -#else - XMLParser.setCurrentParser(self) - defer { XMLParser.setCurrentParser(nil) } - - if _stream != nil { - return parseFrom(_stream!) - } else if _data != nil { - return parseData(_data!, lastChunkOfData: true) + return Self.withCurrentParser(self) { + if _stream != nil { + return parseFrom(_stream!) + } else if _data != nil { + return parseData(_data!, lastChunkOfData: true) + } + return false } - - return false -#endif } // called by the delegate to stop the parse. The delegate will get an error message sent to it. diff --git a/Tests/Foundation/TestXMLParser.swift b/Tests/Foundation/TestXMLParser.swift index c98741eb38..df3685a82e 100644 --- a/Tests/Foundation/TestXMLParser.swift +++ b/Tests/Foundation/TestXMLParser.swift @@ -198,5 +198,46 @@ class TestXMLParser : XCTestCase { ElementNameChecker("noPrefix").check() ElementNameChecker("myPrefix:myLocalName").check() } - + + func testExternalEntity() throws { + class Delegate: XMLParserDelegateEventStream { + override func parserDidStartDocument(_ parser: XMLParser) { + // Start a child parser, updating `currentParser` to the child parser + // to ensure that `currentParser` won't be reset to `nil`, which would + // ignore any external entity related configuration. + let childParser = XMLParser(data: "".data(using: .utf8)!) + XCTAssertTrue(childParser.parse()) + super.parserDidStartDocument(parser) + } + } + try withTemporaryDirectory { dir, _ in + let greetingPath = dir.appendingPathComponent("greeting.xml") + try Data("".utf8).write(to: greetingPath) + let xml = """ + + + ]> + &greeting; + """ + + let parser = XMLParser(data: xml.data(using: .utf8)!) + // Explicitly disable external entity resolving + parser.externalEntityResolvingPolicy = .never + let delegate = Delegate() + parser.delegate = delegate + // The parse result changes depending on the libxml2 version + // because of the following libxml2 commit (shipped in libxml2 2.9.10): + // https://gitlab.gnome.org/GNOME/libxml2/-/commit/eddfbc38fa7e84ccd480eab3738e40d1b2c83979 + // So we don't check the parse result here. + _ = parser.parse() + XCTAssertEqual(delegate.events, [ + .startDocument, + .didStartElement("doc", nil, nil, [:]), + // Should not have parsed the external entity + .didEndElement("doc", nil, nil), + .endDocument, + ]) + } + } } From 4e9d4270e7dd5dc18631b33c57427b973ee5a1d0 Mon Sep 17 00:00:00 2001 From: Yuta Saito Date: Fri, 9 Aug 2024 01:51:50 +0000 Subject: [PATCH 2/4] Remove unnecessary `#if os(WASI)` condition in XMLParser.swift --- Sources/FoundationXML/XMLParser.swift | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Sources/FoundationXML/XMLParser.swift b/Sources/FoundationXML/XMLParser.swift index e3d718a86f..39eea6c3d8 100644 --- a/Sources/FoundationXML/XMLParser.swift +++ b/Sources/FoundationXML/XMLParser.swift @@ -412,9 +412,6 @@ open class XMLParser : NSObject { // initializes the parser with the specified URL. public convenience init?(contentsOf url: URL) { -#if os(WASI) - return nil -#else setupXMLParsing() if url.isFileURL { if let stream = InputStream(url: url) { @@ -432,7 +429,6 @@ open class XMLParser : NSObject { return nil } } -#endif } // create the parser from data @@ -448,7 +444,6 @@ open class XMLParser : NSObject { _CFXMLInterfaceDestroyContext(_parserContext) } -#if !os(WASI) //create a parser that incrementally pulls data from the specified stream and parses it. public init(stream: InputStream) { setupXMLParsing() @@ -456,7 +451,6 @@ open class XMLParser : NSObject { _handler = _CFXMLInterfaceCreateSAXHandler() _parserContext = nil } -#endif open weak var delegate: XMLParserDelegate? From fe0e1d0f9ad5537e024c897e95ebcca1cb8c0fb2 Mon Sep 17 00:00:00 2001 From: Yuta Saito Date: Fri, 23 Aug 2024 06:20:59 +0000 Subject: [PATCH 3/4] Keep the current parser in TLS instead of TaskLocal TaskLocal storage is inherited by non-detached child tasks, which can lead to the parser being shared between tasks. This is not our intention and can lead to inconsistent state. Instead, we should keep the current parser in thread-local storage. This should be safe as long as we don't have any structured suspension points in `withCurrentParser` block. --- Sources/FoundationXML/XMLParser.swift | 65 ++++++++++++++++++--------- 1 file changed, 44 insertions(+), 21 deletions(-) diff --git a/Sources/FoundationXML/XMLParser.swift b/Sources/FoundationXML/XMLParser.swift index 39eea6c3d8..952c25cd58 100644 --- a/Sources/FoundationXML/XMLParser.swift +++ b/Sources/FoundationXML/XMLParser.swift @@ -462,34 +462,57 @@ open class XMLParser : NSObject { open var allowedExternalEntityURLs: Set? - /// The current parser is stored in a task local variable to allow for - /// concurrent parsing in different tasks with different parsers. - /// - /// Rationale for `@unchecked Sendable`: - /// While the ``XMLParser`` class itself is not `Sendable`, `TaskLocal` - /// requires the value type to be `Sendable`. The sendability requirement - /// of `TaskLocal` is only for the "default" value and values set with - /// `withValue` will not be shared between tasks. - /// So as long as 1. the default value is safe to be shared between tasks - /// and 2. the `Sendable` conformance of `_CurrentParser` is not used - /// outside of `TaskLocal`, it is safe to mark it as `@unchecked Sendable`. - private struct _CurrentParser: @unchecked Sendable { - let parser: XMLParser? - - static var `default`: _CurrentParser { - return _CurrentParser(parser: nil) + /// The current parser context for the current thread. + private class _CurrentParserContext { + var _stack: [XMLParser] = [] + var _current: XMLParser? { + return _stack.last } } - @TaskLocal - private static var _currentParser: _CurrentParser = .default + #if os(WASI) + /// The current parser associated with the current thread. (assuming no multi-threading) + /// FIXME: Unify the implementation with the other platforms once we unlock `threadDictionary` + /// or migrate to `FoundationEssentials._ThreadLocal`. + private static nonisolated(unsafe) var _currentParserContext: _CurrentParserContext? + #else + /// The current parser associated with the current thread. + private static var _currentParserContext: _CurrentParserContext? { + get { + return Thread.current.threadDictionary["__CurrentNSXMLParser"] as? _CurrentParserContext + } + set { + Thread.current.threadDictionary["__CurrentNSXMLParser"] = newValue + } + } + #endif + /// The current parser associated with the current thread. internal static func currentParser() -> XMLParser? { - return _currentParser.parser + if let ctx = _currentParserContext { + return ctx._current + } + return nil } - + + /// Execute the given closure with the current parser set to the given parser. internal static func withCurrentParser(_ parser: XMLParser, _ body: () -> R) -> R { - return self.$_currentParser.withValue(_CurrentParser(parser: parser), operation: body) + var ctx: _CurrentParserContext + if let current = _currentParserContext { + // Use the existing context if it exists + ctx = current + } else { + // Create a new context in TLS + ctx = _CurrentParserContext() + _currentParserContext = ctx + } + // Push the parser onto the stack + ctx._stack.append(parser) + defer { + // Pop the parser off the stack + ctx._stack.removeLast() + } + return body() } internal func _handleParseResult(_ parseResult: Int32) -> Bool { From 54b22fe772662b2ca94a017cb7daf8ccf2cbeca9 Mon Sep 17 00:00:00 2001 From: Yuta Saito Date: Sun, 15 Sep 2024 02:08:03 +0000 Subject: [PATCH 4/4] Simplify the current parser context management --- Sources/FoundationXML/XMLParser.swift | 37 +++++---------------------- 1 file changed, 7 insertions(+), 30 deletions(-) diff --git a/Sources/FoundationXML/XMLParser.swift b/Sources/FoundationXML/XMLParser.swift index 952c25cd58..62c94679ee 100644 --- a/Sources/FoundationXML/XMLParser.swift +++ b/Sources/FoundationXML/XMLParser.swift @@ -462,24 +462,16 @@ open class XMLParser : NSObject { open var allowedExternalEntityURLs: Set? - /// The current parser context for the current thread. - private class _CurrentParserContext { - var _stack: [XMLParser] = [] - var _current: XMLParser? { - return _stack.last - } - } - #if os(WASI) /// The current parser associated with the current thread. (assuming no multi-threading) /// FIXME: Unify the implementation with the other platforms once we unlock `threadDictionary` /// or migrate to `FoundationEssentials._ThreadLocal`. - private static nonisolated(unsafe) var _currentParserContext: _CurrentParserContext? + private static nonisolated(unsafe) var _currentParser: XMLParser? = nil #else /// The current parser associated with the current thread. - private static var _currentParserContext: _CurrentParserContext? { + private static var _currentParser: XMLParser? { get { - return Thread.current.threadDictionary["__CurrentNSXMLParser"] as? _CurrentParserContext + return Thread.current.threadDictionary["__CurrentNSXMLParser"] as? XMLParser } set { Thread.current.threadDictionary["__CurrentNSXMLParser"] = newValue @@ -489,29 +481,14 @@ open class XMLParser : NSObject { /// The current parser associated with the current thread. internal static func currentParser() -> XMLParser? { - if let ctx = _currentParserContext { - return ctx._current - } - return nil + return _currentParser } /// Execute the given closure with the current parser set to the given parser. internal static func withCurrentParser(_ parser: XMLParser, _ body: () -> R) -> R { - var ctx: _CurrentParserContext - if let current = _currentParserContext { - // Use the existing context if it exists - ctx = current - } else { - // Create a new context in TLS - ctx = _CurrentParserContext() - _currentParserContext = ctx - } - // Push the parser onto the stack - ctx._stack.append(parser) - defer { - // Pop the parser off the stack - ctx._stack.removeLast() - } + let oldParser = _currentParser + _currentParser = parser + defer { _currentParser = oldParser } return body() }