From 19c5a4df2736a76adc2891d549c81d6ba5cc5f43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Karmaz=C3=ADn?= Date: Sat, 3 Apr 2021 20:58:00 +0200 Subject: [PATCH 1/3] Implement Instant parsing in common module Instant.parse now supports: - Time zone offsets in the form of +-hh:mm - Time part allows the omission of colons (allows hhmmss) - Time part allows the omission of seconds Implementation notes: Input string is split by the time delimiter (T|t) into date and time parts. Date parsing is delegated to LocalDate.parse Time parsing employs the well known algorithm from Iso8601Utils.java originally implemented in Jackson. This commit is based on Moshi's version of that file. --- core/common/src/Instant.kt | 5 +- core/common/src/InstantParser.kt | 141 +++++++++++++++++++++++++++++++ core/common/test/InstantTest.kt | 54 ++++++++++++ core/js/src/Instant.kt | 7 +- core/jvm/src/Instant.kt | 7 +- core/native/src/Instant.kt | 51 +---------- 6 files changed, 202 insertions(+), 63 deletions(-) create mode 100644 core/common/src/InstantParser.kt diff --git a/core/common/src/Instant.kt b/core/common/src/Instant.kt index 01bfaa0d8..9b0a2eb76 100644 --- a/core/common/src/Instant.kt +++ b/core/common/src/Instant.kt @@ -123,12 +123,15 @@ public expect class Instant : Comparable { /** * Parses a string that represents an instant in ISO-8601 format including date and time components and - * the mandatory `Z` designator of the UTC+0 time zone and returns the parsed [Instant] value. + * time zone offset. * * Examples of instants in ISO-8601 format: + * - `2020-08-30T18:43Z` * - `2020-08-30T18:43:00Z` * - `2020-08-30T18:43:00.500Z` * - `2020-08-30T18:43:00.123456789Z` + * - `2020-08-30T18:43:00+01:00` + * - `2020-08-30T18:43:00+0100` * * @throws IllegalArgumentException if the text cannot be parsed or the boundaries of [Instant] are exceeded. */ diff --git a/core/common/src/InstantParser.kt b/core/common/src/InstantParser.kt new file mode 100644 index 000000000..5d467f778 --- /dev/null +++ b/core/common/src/InstantParser.kt @@ -0,0 +1,141 @@ +/* + * Copyright 2019-2021 JetBrains s.r.o. + * Use of this source code is governed by the Apache 2.0 License that can be found in the LICENSE.txt file. + */ + +package kotlinx.datetime + +import kotlin.math.min +import kotlin.math.pow + +internal fun parseInstantCommon(string: String): Instant = parseIsoString(string) + +/* + * The algorithm for parsing time and zone offset was adapted from + * https://github.com/square/moshi/blob/aea17e09bc6a3f9015d3de0e951923f1033d299e/adapters/src/main/java/com/squareup/moshi/adapters/Iso8601Utils.java + */ +private fun parseIsoString(isoString: String): Instant { + try { + val dateTimeSplit = isoString.split('T', ignoreCase = true) + if (dateTimeSplit.size != 2) { + throw DateTimeFormatException("ISO 8601 datetime must contain exactly one (T|t) delimiter.") + } + val localDate = LocalDate.parse(dateTimeSplit[0]) + + // Iso8601Utils.parse + val timePart = dateTimeSplit[1] + var offset = 0 + val hour = parseInt(timePart, offset, offset + 2).also { offset += 2 } + if (checkOffset(timePart, offset, ':')) { + offset += 1 + } + val minutes = parseInt(timePart, offset, offset + 2).also { offset += 2 } + if (checkOffset(timePart, offset, ':')) { + offset += 1 + } + + var seconds = 0 + var nanosecond = 0 + // seconds and fraction can be optional + if (timePart.length > offset) { + val c = timePart[offset] + if (c != 'Z' && c != 'z' && c != '+' && c != '-') { + seconds = parseInt(timePart, offset, offset + 2).also { offset += 2 } + if (seconds > 59 && seconds < 63) { // https://github.com/Kotlin/kotlinx-datetime/issues/5 + seconds = 59 // truncate up to 3 leap seconds + } + if (checkOffset(timePart, offset, '.')) { + offset += 1 + val endOffset = + indexOfNonDigit(timePart, offset + 1) // assume at least one digit + val parseEndOffset = + min(endOffset, offset + 9) // parse up to 9 digits + val fraction = parseInt(timePart, offset, parseEndOffset) + nanosecond = (10.0.pow(9 - (parseEndOffset - offset)) * fraction).toInt() + offset = endOffset + } + } + } + + // extract timezone + if (timePart.length <= offset) { + throw IllegalArgumentException("No time zone indicator in '$timePart'") + } + val timezone: TimeZone + val timezoneIndicator = timePart[offset] + if (timezoneIndicator == 'Z' || timezoneIndicator == 'z') { + timezone = TimeZone.UTC + } else if (timezoneIndicator == '+' || timezoneIndicator == '-') { + val timezoneOffset = timePart.substring(offset) + // 18-Jun-2015, tatu: Minor simplification, skip offset of "+0000"/"+00:00" + if ("+0000" == timezoneOffset || "+00:00" == timezoneOffset) { + timezone = TimeZone.UTC + } else { + val timezoneId = "UTC$timezoneOffset" + timezone = TimeZone.of(timezoneId) + val act = timezone.id + if (act != timezoneId) { + /* 22-Jan-2015, tatu: Looks like canonical version has colons, + * but we may be given one without. If so, don't sweat. + * Yes, very inefficient. Hopefully not hit often. + * If it becomes a perf problem, add 'loose' comparison instead. + */ + val cleaned = act.replace(":", "") + if (cleaned != timezoneId) { + throw IllegalTimeZoneException( + "Mismatching time zone indicator: " + + timezoneId + + " given, resolves to " + + timezone.id + ) + } + } + } + } else { + throw DateTimeFormatException("Invalid time zone indicator '$timezoneIndicator'") + } + return localDate.atTime(hour, minutes, seconds, nanosecond).toInstant(timezone) + } catch (e: NumberFormatException) { + throw DateTimeFormatException(e) + } +} + +/** + * Check if the expected character exist at the given offset in the value. + * + * @param value the string to check at the specified offset + * @param offset the offset to look for the expected character + * @param expected the expected character + * @return true if the expected character exist at the given offset + */ +private fun checkOffset(value: String, offset: Int, expected: Char): Boolean { + return (offset < value.length) && (value[offset] == expected) +} + +/** + * Parse an integer located between 2 given offsets in a string + * + * @param value the string to parse + * @param beginIndex the start index for the integer in the string + * @param endIndex the end index for the integer in the string + * @return the int + * @throws NumberFormatException if the value is not a number + */ +@OptIn(ExperimentalStdlibApi::class) +private fun parseInt(value: String, beginIndex: Int, endIndex: Int): Int { + if ((beginIndex < 0) || (endIndex > value.length) || (beginIndex > endIndex)) { + throw NumberFormatException(value) + } + return value.substring(beginIndex, endIndex).toInt() +} + +/** + * Returns the index of the first character in the string that is not a digit, starting at offset. + */ +private fun indexOfNonDigit(string: String, offset: Int): Int { + for (i in offset until string.length) { + val c = string[i] + if (c < '0' || c > '9') return i + } + return string.length +} diff --git a/core/common/test/InstantTest.kt b/core/common/test/InstantTest.kt index 8f0e13948..51d76d682 100644 --- a/core/common/test/InstantTest.kt +++ b/core/common/test/InstantTest.kt @@ -59,9 +59,13 @@ class InstantTest { @Test fun parseIsoString() { val instants = arrayOf( + Triple("1970-01-01T0000Z", 0, 0), + Triple("1970-01-01T00:00Z", 0, 0), + Triple("1970-01-01T000000Z", 0, 0), Triple("1970-01-01T00:00:00Z", 0, 0), Triple("1970-01-01t00:00:00Z", 0, 0), Triple("1970-01-01T00:00:00z", 0, 0), + Triple("1970-01-01t00:00:00z", 0, 0), Triple("1970-01-01T00:00:00.0Z", 0, 0), Triple("1970-01-01T00:00:00.000000000Z", 0, 0), Triple("1970-01-01T00:00:00.000000001Z", 0, 1), @@ -80,11 +84,61 @@ class InstantTest { } assertInvalidFormat { Instant.parse("x") } + assertInvalidFormat { Instant.parse("1970-01-01T00:00.1Z") } assertInvalidFormat { Instant.parse("12020-12-31T23:59:59.000000000Z") } // this string represents an Instant that is currently larger than Instant.MAX any of the implementations: assertInvalidFormat { Instant.parse("+1000000001-12-31T23:59:59.000000000Z") } } + @Test + fun isoTimezoneOffsets() { + val validOffsets = arrayOf( + "1970-01-01T00:00:00Z", + "1970-01-01T00:00:00z", + + "1970-01-01T00:00:00+00:00", + "1970-01-01T00:00:00+0000", + + "1970-01-01T01:00:00+01:00", + "1970-01-01T01:00:00+0100", + + "1970-01-01T18:00:00+18:00", + "1970-01-01T18:00:00+1800", + + "1970-01-01T00:01:00+00:01", + "1970-01-01T00:01:00+0001", + + "1969-12-31T23:00:00-01:00", + "1969-12-31T23:00:00-0100", + + "1969-12-31T06:00:00-18:00", + "1969-12-31T06:00:00-1800", + + "1969-12-31T23:59:00-00:01", + "1969-12-31T23:59:00-0001", + ) + validOffsets.forEach { + assertEquals(0, Instant.parse(it).toEpochMilliseconds()) + } + + val invalidOffsets = arrayOf( + "1970-01-01T18:01:00+18:01", + "1970-01-01T18:01:00+1801", + + "1969-12-31T05:59:00-18:01", + "1969-12-31T05:59:00-1801", + + "1970-01-01T01:00:00+01", + "1970-01-01T01:00:00+01", + + "1970-01-01T01:00:00+1:00", + "1970-01-01T01:00:00+100", + ) + invalidOffsets.forEach { + assertFailsWith { Instant.parse(it) } + } + } + @OptIn(ExperimentalTime::class) @Test fun instantCalendarArithmetic() { diff --git a/core/js/src/Instant.kt b/core/js/src/Instant.kt index 35f2e0f3c..3b626e6b2 100644 --- a/core/js/src/Instant.kt +++ b/core/js/src/Instant.kt @@ -75,12 +75,7 @@ public actual class Instant internal constructor(internal val value: jtInstant) if (epochMilliseconds > 0) MAX else MIN } - actual fun parse(isoString: String): Instant = try { - Instant(jtInstant.parse(isoString)) - } catch (e: Throwable) { - if (e.isJodaDateTimeParseException()) throw DateTimeFormatException(e) - throw e - } + actual fun parse(isoString: String): Instant = parseInstantCommon(isoString) actual fun fromEpochSeconds(epochSeconds: Long, nanosecondAdjustment: Long): Instant = try { /* Performing normalization here because otherwise this fails: diff --git a/core/jvm/src/Instant.kt b/core/jvm/src/Instant.kt index 79cf7554d..64aa601e8 100644 --- a/core/jvm/src/Instant.kt +++ b/core/jvm/src/Instant.kt @@ -9,7 +9,6 @@ package kotlinx.datetime import kotlinx.datetime.serializers.InstantIso8601Serializer import kotlinx.serialization.Serializable import java.time.DateTimeException -import java.time.format.DateTimeParseException import java.time.temporal.ChronoUnit import kotlin.time.* import java.time.Instant as jtInstant @@ -62,11 +61,7 @@ public actual class Instant internal constructor(internal val value: jtInstant) actual fun fromEpochMilliseconds(epochMilliseconds: Long): Instant = Instant(jtInstant.ofEpochMilli(epochMilliseconds)) - actual fun parse(isoString: String): Instant = try { - Instant(jtInstant.parse(isoString)) - } catch (e: DateTimeParseException) { - throw DateTimeFormatException(e) - } + actual fun parse(isoString: String): Instant = parseInstantCommon(isoString) actual fun fromEpochSeconds(epochSeconds: Long, nanosecondAdjustment: Long): Instant = try { Instant(jtInstant.ofEpochSecond(epochSeconds, nanosecondAdjustment)) diff --git a/core/native/src/Instant.kt b/core/native/src/Instant.kt index 74f3baac8..0916a76e3 100644 --- a/core/native/src/Instant.kt +++ b/core/native/src/Instant.kt @@ -23,54 +23,6 @@ public actual enum class DayOfWeek { SUNDAY; } -// This is a function and not a value due to https://github.com/Kotlin/kotlinx-datetime/issues/5 -// org.threeten.bp.format.DateTimeFormatterBuilder.InstantPrinterParser#parse -private val instantParser: Parser - get() = localDateParser - .chainIgnoring(concreteCharParser('T').or(concreteCharParser('t'))) - .chain(intParser(2, 2)) // hour - .chainIgnoring(concreteCharParser(':')) - .chain(intParser(2, 2)) // minute - .chainIgnoring(concreteCharParser(':')) - .chain(intParser(2, 2)) // second - .chain(optional( - concreteCharParser('.') - .chainSkipping(fractionParser(0, 9, 9)) // nanos - )) - .chainIgnoring(concreteCharParser('Z').or(concreteCharParser('z'))) - .map { - val (dateHourMinuteSecond, nanosVal) = it - val (dateHourMinute, secondsVal) = dateHourMinuteSecond - val (dateHour, minutesVal) = dateHourMinute - val (dateVal, hoursVal) = dateHour - - val nano = nanosVal ?: 0 - val (days, hours, min, seconds) = if (hoursVal == 24 && minutesVal == 0 && secondsVal == 0 && nano == 0) { - listOf(1, 0, 0, 0) - } else if (hoursVal == 23 && minutesVal == 59 && secondsVal == 60) { - // parsed a leap second, but it seems it isn't used - listOf(0, 23, 59, 59) - } else { - listOf(0, hoursVal, minutesVal, secondsVal) - } - - // never fails: 9_999 years are always supported - val localDate = dateVal.withYear(dateVal.year % 10000).plus(days, DateTimeUnit.DAY) - val localTime = LocalTime.of(hours, min, seconds, 0) - val secDelta: Long = try { - safeMultiply((dateVal.year / 10000).toLong(), SECONDS_PER_10000_YEARS) - } catch (e: ArithmeticException) { - throw DateTimeFormatException(e) - } - val epochDay = localDate.toEpochDay().toLong() - val instantSecs = epochDay * 86400 + localTime.toSecondOfDay() + secDelta - try { - Instant(instantSecs, nano) - } catch (e: IllegalArgumentException) { - throw DateTimeFormatException(e) - } - } - /** * The minimum supported epoch second. */ @@ -243,8 +195,7 @@ public actual class Instant internal constructor(actual val epochSeconds: Long, actual fun fromEpochSeconds(epochSeconds: Long, nanosecondAdjustment: Int): Instant = fromEpochSeconds(epochSeconds, nanosecondAdjustment.toLong()) - actual fun parse(isoString: String): Instant = - instantParser.parse(isoString) + actual fun parse(isoString: String): Instant = parseInstantCommon(isoString) actual val DISTANT_PAST: Instant = fromEpochSeconds(DISTANT_PAST_SECONDS, 999_999_999) From b36700cb29ba673a5e01f514a6d1a59d7a9baec4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Karmaz=C3=ADn?= Date: Sat, 3 Apr 2021 22:04:39 +0200 Subject: [PATCH 2/3] Throw DateTimeFormatException instead of IllegalArgumentException Last forgotten IAE among code that throws DateTimeFormatException --- core/common/src/InstantParser.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/common/src/InstantParser.kt b/core/common/src/InstantParser.kt index 5d467f778..b0d7f5894 100644 --- a/core/common/src/InstantParser.kt +++ b/core/common/src/InstantParser.kt @@ -59,7 +59,7 @@ private fun parseIsoString(isoString: String): Instant { // extract timezone if (timePart.length <= offset) { - throw IllegalArgumentException("No time zone indicator in '$timePart'") + throw DateTimeFormatException("No time zone indicator in '$timePart'") } val timezone: TimeZone val timezoneIndicator = timePart[offset] From 61fc0c252557b78be897eb6446449b0e0022562c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ond=C5=99ej=20Karmaz=C3=ADn?= Date: Sun, 4 Apr 2021 11:44:36 +0200 Subject: [PATCH 3/3] Add test case for empty fraction --- core/common/test/InstantTest.kt | 1 + 1 file changed, 1 insertion(+) diff --git a/core/common/test/InstantTest.kt b/core/common/test/InstantTest.kt index 51d76d682..54c848d8c 100644 --- a/core/common/test/InstantTest.kt +++ b/core/common/test/InstantTest.kt @@ -85,6 +85,7 @@ class InstantTest { assertInvalidFormat { Instant.parse("x") } assertInvalidFormat { Instant.parse("1970-01-01T00:00.1Z") } + assertInvalidFormat { Instant.parse("1970-01-01T00:00:00.Z") } assertInvalidFormat { Instant.parse("12020-12-31T23:59:59.000000000Z") } // this string represents an Instant that is currently larger than Instant.MAX any of the implementations: assertInvalidFormat { Instant.parse("+1000000001-12-31T23:59:59.000000000Z") }