Skip to content

Commit daffa8d

Browse files
authored
Avoid using PeekBuffer for newline searching (#240)
PeekBuffer creates a lot of troubles when it comes to using ByteBuffer, but even with byte array, switching to indexOf-based algorithm significantly improves performance.
1 parent 854ea11 commit daffa8d

File tree

1 file changed

+52
-42
lines changed

1 file changed

+52
-42
lines changed

core/common/src/Utf8.kt

Lines changed: 52 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -244,28 +244,29 @@ internal fun Buffer.readUtf8CodePoint(): Int {
244244
*
245245
* @sample kotlinx.io.samples.KotlinxIoCoreCommonSamples.readLinesSample
246246
*/
247+
@OptIn(InternalIoApi::class)
247248
public fun Source.readLine(): String? {
248249
if (!request(1)) return null
249250

250-
val peekSource = peek()
251-
var offset = 0L
252-
var newlineSize = 0L
253-
while (peekSource.request(1)) {
254-
val b = peekSource.readByte().toInt()
255-
if (b == '\n'.code) {
256-
newlineSize = 1L
257-
break
258-
} else if (b == '\r'.code) {
259-
if (peekSource.startsWith('\n'.code.toByte())) {
260-
newlineSize = 2L
261-
break
251+
var lfIndex = this.indexOf('\n'.code.toByte())
252+
return when (lfIndex) {
253+
-1L -> readString()
254+
0L -> {
255+
skip(1)
256+
""
257+
}
258+
259+
else -> {
260+
var skipBytes = 1
261+
if (buffer[lfIndex - 1] == '\r'.code.toByte()) {
262+
lfIndex -= 1
263+
skipBytes += 1
262264
}
265+
val string = readString(lfIndex)
266+
skip(skipBytes.toLong())
267+
string
263268
}
264-
offset++
265269
}
266-
val line = readString(offset)
267-
skip(newlineSize)
268-
return line
269270
}
270271

271272
/**
@@ -288,39 +289,48 @@ public fun Source.readLine(): String? {
288289
*
289290
* @sample kotlinx.io.samples.KotlinxIoCoreCommonSamples.readLinesSample
290291
*/
292+
@OptIn(InternalIoApi::class)
291293
public fun Source.readLineStrict(limit: Long = Long.MAX_VALUE): String {
292294
require(limit >= 0) { "limit ($limit) < 0" }
293295
require(1)
294296

295-
val peekSource = peek()
296-
var offset = 0L
297-
var newlineSize = 0L
298-
while (offset < limit && peekSource.request(1)) {
299-
val b = peekSource.readByte().toInt()
300-
if (b == '\n'.code) {
301-
newlineSize = 1L
302-
break
303-
} else if (b == '\r'.code) {
304-
if (peekSource.startsWith('\n'.code.toByte())) {
305-
newlineSize = 2L
306-
break
307-
}
308-
}
309-
offset++
297+
var lfIndex = indexOf('\n'.code.toByte(), startIndex = 0, endIndex = limit)
298+
299+
if (lfIndex == 0L) {
300+
skip(1)
301+
return ""
310302
}
311-
if (offset == limit) {
312-
if (!peekSource.request(1)) throw EOFException()
313-
val nlCandidate = peekSource.readByte().toInt()
314-
if (nlCandidate == '\n'.code) {
315-
newlineSize = 1
316-
} else if (nlCandidate == '\r'.code && peekSource.startsWith('\n'.code.toByte())) {
317-
newlineSize = 2
303+
304+
if (lfIndex > 0) {
305+
var skipBytes = 1L
306+
if (buffer[lfIndex - 1] == '\r'.code.toByte()) {
307+
lfIndex -= 1
308+
skipBytes += 1
318309
}
310+
val str = readString(lfIndex)
311+
skip(skipBytes)
312+
return str
313+
}
314+
315+
// we reached the end of the source before hitting the limit
316+
if (buffer.size < limit) throw EOFException()
317+
// we can't read data anymore
318+
if (limit == Long.MAX_VALUE) throw EOFException()
319+
// there is no more data
320+
if (!request(limit + 1)) throw EOFException()
321+
322+
val b = buffer[limit]
323+
if (b == '\n'.code.toByte()) {
324+
val str = readString(limit)
325+
skip(1)
326+
return str
319327
}
320-
if (newlineSize == 0L) throw EOFException()
321-
val line = readString(offset)
322-
skip(newlineSize)
323-
return line
328+
// check if the last byte is CR and the byte passed it is LF
329+
if (b != '\r'.code.toByte() || !request(limit + 2)) throw EOFException()
330+
if (buffer[limit + 1] != '\n'.code.toByte()) throw EOFException()
331+
val res = readString(limit)
332+
skip(2)
333+
return res
324334
}
325335

326336
private fun Buffer.commonReadUtf8CodePoint(): Int {

0 commit comments

Comments
 (0)