70
70
package kotlinx.io
71
71
72
72
import kotlinx.io.internal.*
73
+ import kotlinx.io.unsafe.UnsafeBufferOperations
74
+ import kotlinx.io.unsafe.withData
75
+ import kotlin.math.min
73
76
74
77
/* *
75
78
* Returns the number of bytes used to encode the slice of `string` as UTF-8 when using [Sink.writeString].
@@ -457,6 +460,7 @@ private fun Buffer.commonReadUtf8CodePoint(): Int {
457
460
}
458
461
}
459
462
463
+ @OptIn(UnsafeIoApi ::class )
460
464
private inline fun Buffer.commonWriteUtf8 (beginIndex : Int , endIndex : Int , charAt : (Int ) -> Char ) {
461
465
// Transcode a UTF-16 chars to UTF-8 bytes.
462
466
var i = beginIndex
@@ -465,45 +469,49 @@ private inline fun Buffer.commonWriteUtf8(beginIndex: Int, endIndex: Int, charAt
465
469
466
470
when {
467
471
c < 0x80 -> {
468
- val tail = writableSegment(1 )
469
- val data = tail.data
470
- val segmentOffset = tail.limit - i
471
- val runLimit = minOf(endIndex, Segment .SIZE - segmentOffset)
472
-
473
- // Emit a 7-bit character with 1 byte.
474
- data[segmentOffset + i++ ] = c.toByte() // 0xxxxxxx
475
-
476
- // Fast-path contiguous runs of ASCII characters. This is ugly, but yields a ~4x performance
477
- // improvement over independent calls to writeByte().
478
- while (i < runLimit) {
479
- c = charAt(i).code
480
- if (c >= 0x80 ) break
481
- data[segmentOffset + i++ ] = c.toByte() // 0xxxxxxx
472
+ UnsafeBufferOperations .writeToTail(this , 1 ) { ctx, segment ->
473
+ val segmentOffset = - i
474
+ val runLimit = minOf(endIndex, i + segment.remainingCapacity)
475
+
476
+ // Emit a 7-bit character with 1 byte.
477
+ ctx.setUnchecked(segment, segmentOffset + i++ , c.toByte()) // 0xxxxxxx
478
+
479
+ // Fast-path contiguous runs of ASCII characters. This is ugly, but yields a ~4x performance
480
+ // improvement over independent calls to writeByte().
481
+ while (i < runLimit) {
482
+ c = charAt(i).code
483
+ if (c >= 0x80 ) break
484
+ ctx.setUnchecked(segment, segmentOffset + i++ , c.toByte()) // 0xxxxxxx
485
+ }
486
+
487
+ i + segmentOffset // Equivalent to i - (previous i).
482
488
}
483
-
484
- val runSize = i + segmentOffset - tail.limit // Equivalent to i - (previous i).
485
- tail.limit + = runSize
486
- sizeMut + = runSize.toLong()
487
489
}
488
490
489
491
c < 0x800 -> {
490
492
// Emit a 11-bit character with 2 bytes.
491
- val tail = writableSegment(2 )
492
- tail.data[tail.limit] = (c shr 6 or 0xc0 ).toByte() // 110xxxxx
493
- tail.data[tail.limit + 1 ] = (c and 0x3f or 0x80 ).toByte() // 10xxxxxx
494
- tail.limit + = 2
495
- sizeMut + = 2L
493
+ UnsafeBufferOperations .writeToTail(this , 2 ) { ctx, segment ->
494
+ ctx.setUnchecked(
495
+ segment, 0 ,
496
+ (c shr 6 or 0xc0 ).toByte(), // 110xxxxx
497
+ (c and 0x3f or 0x80 ).toByte() // 10xxxxxx
498
+ )
499
+ 2
500
+ }
496
501
i++
497
502
}
498
503
499
504
c < 0xd800 || c > 0xdfff -> {
500
505
// Emit a 16-bit character with 3 bytes.
501
- val tail = writableSegment(3 )
502
- tail.data[tail.limit] = (c shr 12 or 0xe0 ).toByte() // 1110xxxx
503
- tail.data[tail.limit + 1 ] = (c shr 6 and 0x3f or 0x80 ).toByte() // 10xxxxxx
504
- tail.data[tail.limit + 2 ] = (c and 0x3f or 0x80 ).toByte() // 10xxxxxx
505
- tail.limit + = 3
506
- sizeMut + = 3L
506
+ UnsafeBufferOperations .writeToTail(this , 3 ) { ctx, segment ->
507
+ ctx.setUnchecked(
508
+ segment, 0 ,
509
+ (c shr 12 or 0xe0 ).toByte(), // 1110xxxx
510
+ (c shr 6 and 0x3f or 0x80 ).toByte(), // 10xxxxxx
511
+ (c and 0x3f or 0x80 ).toByte() // 10xxxxxx
512
+ )
513
+ 3
514
+ }
507
515
i++
508
516
}
509
517
@@ -522,20 +530,23 @@ private inline fun Buffer.commonWriteUtf8(beginIndex: Int, endIndex: Int, charAt
522
530
val codePoint = 0x010000 + (c and 0x03ff shl 10 or (low and 0x03ff ))
523
531
524
532
// Emit a 21-bit character with 4 bytes.
525
- val tail = writableSegment(4 )
526
- tail.data[tail.limit] = (codePoint shr 18 or 0xf0 ).toByte() // 11110xxx
527
- tail.data[tail.limit + 1 ] = (codePoint shr 12 and 0x3f or 0x80 ).toByte() // 10xxxxxx
528
- tail.data[tail.limit + 2 ] = (codePoint shr 6 and 0x3f or 0x80 ).toByte() // 10xxyyyy
529
- tail.data[tail.limit + 3 ] = (codePoint and 0x3f or 0x80 ).toByte() // 10yyyyyy
530
- tail.limit + = 4
531
- sizeMut + = 4L
533
+ UnsafeBufferOperations .writeToTail(this , 4 ) { ctx, segment ->
534
+ ctx.setUnchecked(segment, 0 ,
535
+ (codePoint shr 18 or 0xf0 ).toByte(), // 11110xxx
536
+ (codePoint shr 12 and 0x3f or 0x80 ).toByte(), // 10xxxxxx
537
+ (codePoint shr 6 and 0x3f or 0x80 ).toByte(), // 10xxyyyy
538
+ (codePoint and 0x3f or 0x80 ).toByte() // 10yyyyyy
539
+ )
540
+ 4
541
+ }
532
542
i + = 2
533
543
}
534
544
}
535
545
}
536
546
}
537
547
}
538
548
549
+ @OptIn(UnsafeIoApi ::class )
539
550
private fun Buffer.commonWriteUtf8CodePoint (codePoint : Int ) {
540
551
when {
541
552
codePoint < 0 || codePoint > 0x10ffff -> {
@@ -551,11 +562,11 @@ private fun Buffer.commonWriteUtf8CodePoint(codePoint: Int) {
551
562
552
563
codePoint < 0x800 -> {
553
564
// Emit a 11-bit code point with 2 bytes.
554
- val tail = writableSegment( 2 )
555
- tail.data[tail.limit] = (codePoint shr 6 or 0xc0 ).toByte() // 110xxxxx
556
- tail.data[tail.limit + 1 ] = ( codePoint and 0x3f or 0x80 ).toByte() // 10xxxxxx
557
- tail.limit + = 2
558
- sizeMut + = 2L
565
+ UnsafeBufferOperations .writeToTail( this , 2 ) { ctx, segment ->
566
+ ctx.setUnchecked(segment, 0 , (codePoint shr 6 or 0xc0 ).toByte() ) // 110xxxxx
567
+ ctx.setUnchecked(segment, 1 , ( codePoint and 0x3f or 0x80 ).toByte() ) // 10xxxxxx
568
+ 2
569
+ }
559
570
}
560
571
561
572
codePoint in 0xd800 .. 0xdfff -> {
@@ -565,48 +576,47 @@ private fun Buffer.commonWriteUtf8CodePoint(codePoint: Int) {
565
576
566
577
codePoint < 0x10000 -> {
567
578
// Emit a 16-bit code point with 3 bytes.
568
- val tail = writableSegment( 3 )
569
- tail.data[tail.limit] = (codePoint shr 12 or 0xe0 ).toByte() // 1110xxxx
570
- tail.data[tail.limit + 1 ] = ( codePoint shr 6 and 0x3f or 0x80 ).toByte() // 10xxxxxx
571
- tail.data[tail.limit + 2 ] = ( codePoint and 0x3f or 0x80 ).toByte() // 10xxxxxx
572
- tail.limit + = 3
573
- sizeMut + = 3L
579
+ UnsafeBufferOperations .writeToTail( this , 3 ) { ctx, segment ->
580
+ ctx.setUnchecked(segment, 0 , (codePoint shr 12 or 0xe0 ).toByte() ) // 1110xxxx
581
+ ctx.setUnchecked(segment, 1 , ( codePoint shr 6 and 0x3f or 0x80 ).toByte() ) // 10xxxxxx
582
+ ctx.setUnchecked(segment, 2 , ( codePoint and 0x3f or 0x80 ).toByte() ) // 10xxxxxx
583
+ 3
584
+ }
574
585
}
575
586
576
587
else -> { // [0x10000, 0x10ffff]
577
588
// Emit a 21-bit code point with 4 bytes.
578
- val tail = writableSegment( 4 )
579
- tail.data[tail.limit] = ( codePoint shr 18 or 0xf0 ).toByte() // 11110xxx
580
- tail.data[tail.limit + 1 ] = ( codePoint shr 12 and 0x3f or 0x80 ).toByte() // 10xxxxxx
581
- tail.data[tail.limit + 2 ] = ( codePoint shr 6 and 0x3f or 0x80 ).toByte() // 10xxyyyy
582
- tail.data[tail.limit + 3 ] = ( codePoint and 0x3f or 0x80 ).toByte() // 10yyyyyy
583
- tail.limit + = 4
584
- sizeMut + = 4L
589
+ UnsafeBufferOperations .writeToTail( this , 4 ) { ctx, segment ->
590
+ ctx.setUnchecked(segment, 0 , ( codePoint shr 18 or 0xf0 ).toByte() ) // 11110xxx
591
+ ctx.setUnchecked(segment, 1 , ( codePoint shr 12 and 0x3f or 0x80 ).toByte() ) // 10xxxxxx
592
+ ctx.setUnchecked(segment, 2 , ( codePoint shr 6 and 0x3f or 0x80 ).toByte() ) // 10xxyyyy
593
+ ctx.setUnchecked(segment, 3 , ( codePoint and 0x3f or 0x80 ).toByte() ) // 10yyyyyy
594
+ 4
595
+ }
585
596
}
586
597
}
587
598
}
588
599
600
+ @OptIn(UnsafeIoApi ::class )
589
601
private fun Buffer.commonReadUtf8 (byteCount : Long ): String {
590
602
require(byteCount >= 0 && byteCount <= Int .MAX_VALUE ) {
591
603
" byteCount ($byteCount ) is not within the range [0..${Int .MAX_VALUE } )"
592
604
}
593
605
require(byteCount)
594
606
if (byteCount == 0L ) return " "
595
607
596
- val s = head!!
597
- if (s.pos + byteCount > s.limit) {
598
- // If the string spans multiple segments, delegate to readBytes().
599
-
600
- return readByteArray(byteCount.toInt()).commonToUtf8String()
601
- }
602
-
603
- val result = s.data.commonToUtf8String(s.pos, s.pos + byteCount.toInt())
604
- s.pos + = byteCount.toInt()
605
- sizeMut - = byteCount
606
-
607
- if (s.pos == s.limit) {
608
- recycleHead()
608
+ UnsafeBufferOperations .iterate(this ) { ctx, head ->
609
+ head!!
610
+ if (head.size >= byteCount) {
611
+ var result = " "
612
+ ctx.withData(head) { data, pos, limit ->
613
+ result = data.commonToUtf8String(pos, min(limit, pos + byteCount.toInt()))
614
+ skip(byteCount)
615
+ return result
616
+ }
617
+ }
609
618
}
610
619
611
- return result
620
+ // If the string spans multiple segments, delegate to readBytes().
621
+ return readByteArray(byteCount.toInt()).commonToUtf8String()
612
622
}
0 commit comments