Skip to content

Commit 2094289

Browse files
Implement FSA-based timezone parser for DateTimeComponents.Format
- Introduce a finite state automaton to parse timezone identifiers character by character. - Support named identifiers: UTC, GMT, UT, Z and lowercase z. - Parse fixed-offset formats in various styles: - Single‐digit offsets (+1, -5) - Full offsets with optional colons (+010203, -05:30:15) - Handle combined identifiers (e.g., UTC+01:00, GMT-05:30:45, UT+3). - Add comprehensive tests covering valid, invalid, edge‐case, and boundary scenarios. Fixes #444
1 parent 4dadf6f commit 2094289

File tree

3 files changed

+261
-2
lines changed

3 files changed

+261
-2
lines changed

core/common/src/format/DateTimeComponents.kt

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ import kotlinx.datetime.DayOfWeek
1010
import kotlinx.datetime.internal.*
1111
import kotlinx.datetime.internal.format.*
1212
import kotlinx.datetime.internal.format.parser.Copyable
13+
import kotlinx.datetime.internal.format.parser.ParserStructure
14+
import kotlinx.datetime.internal.format.parser.TimeZoneParserOperation
1315
import kotlinx.datetime.internal.safeMultiply
1416
import kotlin.reflect.*
1517

@@ -570,6 +572,18 @@ internal class TimeZoneIdDirective(private val knownZones: Set<String>) :
570572
get() =
571573
"${DateTimeFormatBuilder.WithDateTimeComponents::timeZoneId.name}()"
572574

575+
override fun parser(): ParserStructure<DateTimeComponentsContents> =
576+
ParserStructure(
577+
emptyList(),
578+
listOf(
579+
super.parser(),
580+
ParserStructure(
581+
listOf(TimeZoneParserOperation(timeZoneField.accessor)),
582+
emptyList()
583+
)
584+
)
585+
)
586+
573587
override fun equals(other: Any?): Boolean = other is TimeZoneIdDirective && other.knownZones == knownZones
574588
override fun hashCode(): Int = knownZones.hashCode()
575589
}

core/common/src/internal/format/parser/ParserOperation.kt

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,118 @@ internal class UnconditionalModification<Output>(
139139
}
140140
}
141141

142+
internal class TimeZoneParserOperation<Output>(
143+
private val setter: AssignableField<Output, String>
144+
) : ParserOperation<Output> {
145+
146+
override fun consume(storage: Output, input: CharSequence, startIndex: Int): ParseResult {
147+
val lastMatch = validateTimezone(input, startIndex)
148+
return if (lastMatch > startIndex) {
149+
setter.setWithoutReassigning(storage, input.substring(startIndex, lastMatch), startIndex, lastMatch)
150+
ParseResult.Ok(lastMatch)
151+
} else {
152+
ParseResult.Error(startIndex) { "Invalid timezone format" }
153+
}
154+
}
155+
156+
companion object {
157+
private enum class State {
158+
START,
159+
AFTER_PREFIX,
160+
AFTER_SIGN,
161+
AFTER_HOUR,
162+
AFTER_MINUTE,
163+
AFTER_COLON_MINUTE,
164+
END,
165+
INVALID
166+
}
167+
168+
private fun validateTimezone(input: CharSequence, startIndex: Int): Int {
169+
var index = startIndex
170+
var lastValidIndex = startIndex
171+
172+
fun validatePrefix(validValues: List<String>): Boolean =
173+
validValues.firstOrNull { input.startsWith(it) }?.let {
174+
index += it.length
175+
lastValidIndex = index
176+
true
177+
} ?: false
178+
179+
fun validateTimeComponent(length: Int): Boolean {
180+
if ((index..<(index + length)).all { input.getOrNull(it)?.isAsciiDigit() ?: false }) {
181+
index += length
182+
lastValidIndex = index
183+
return true
184+
}
185+
return false
186+
}
187+
188+
var state = State.START
189+
while (index < input.length) {
190+
state = when (state) {
191+
State.START -> when {
192+
input[index] == 'Z' || input[index] == 'z' -> {
193+
index++
194+
State.END
195+
}
196+
197+
input[index] in listOf('+', '-') -> {
198+
index++
199+
State.AFTER_SIGN
200+
}
201+
202+
validatePrefix(listOf("UTC", "GMT", "UT")) -> State.AFTER_PREFIX
203+
else -> State.INVALID
204+
}
205+
206+
State.AFTER_PREFIX -> when {
207+
input[index] in listOf('+', '-') -> {
208+
index++
209+
State.AFTER_SIGN
210+
}
211+
212+
else -> State.INVALID
213+
}
214+
215+
State.AFTER_SIGN -> when {
216+
validateTimeComponent(2) -> State.AFTER_HOUR
217+
validateTimeComponent(1) -> State.END
218+
else -> State.INVALID
219+
}
220+
221+
State.AFTER_HOUR -> when {
222+
input[index] == ':' -> {
223+
index++
224+
if (validateTimeComponent(2)) State.AFTER_COLON_MINUTE else State.INVALID
225+
}
226+
227+
validateTimeComponent(2) -> State.AFTER_MINUTE
228+
else -> State.INVALID
229+
}
230+
231+
State.AFTER_MINUTE -> when {
232+
validateTimeComponent(2) -> State.END
233+
else -> State.INVALID
234+
}
235+
236+
State.AFTER_COLON_MINUTE -> when {
237+
input[index] == ':' -> {
238+
index++
239+
if (validateTimeComponent(2)) State.END else State.INVALID
240+
}
241+
242+
else -> State.INVALID
243+
}
244+
245+
State.END, State.INVALID -> break
246+
}
247+
}
248+
249+
return if (state == State.END) index else lastValidIndex
250+
}
251+
}
252+
}
253+
142254
/**
143255
* Matches the longest suitable string from `strings` and calls [consume] with the matched string.
144256
*/

core/common/test/format/DateTimeComponentsFormatTest.kt

Lines changed: 135 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2019-2023 JetBrains s.r.o. and contributors.
2+
* Copyright 2019-2025 JetBrains s.r.o. and contributors.
33
* Use of this source code is governed by the Apache 2.0 License that can be found in the LICENSE.txt file.
44
*/
55

@@ -8,7 +8,6 @@ package kotlinx.datetime.test.format
88
import kotlinx.datetime.*
99
import kotlinx.datetime.format.*
1010
import kotlin.reflect.KMutableProperty1
11-
import kotlin.reflect.KProperty
1211
import kotlin.test.*
1312

1413
class DateTimeComponentsFormatTest {
@@ -268,4 +267,138 @@ class DateTimeComponentsFormatTest {
268267
}
269268
}
270269
}
270+
271+
private object TimezoneTestData {
272+
val correctParsableOffsets = listOf(
273+
// Single digit hours (H format)
274+
"1", "9", "0",
275+
// Two-digit hours (HH format)
276+
"09", "11", "18",
277+
// Hours and minutes without a separator (HHMM format)
278+
"0110", "0230", "0930",
279+
// Hours, minutes, and seconds without a separator (HHMMSS format)
280+
"010000", "000100", "012345",
281+
// Hours and minutes with colon separator (HH:MM format)
282+
"01:15", "02:35", "09:35",
283+
// Hours, minutes, and seconds with colon separators (HH:MM:SS format)
284+
"01:10:32", "15:51:00", "17:54:32"
285+
)
286+
287+
val incorrectParsableOffsets = listOf(
288+
// Invalid hours (exceeding typical timezone ranges)
289+
"19", "99", "20",
290+
// HHMM format with invalid minutes (>59) or hours (>18)
291+
"2010", "0260", "0999", "9999",
292+
// HHMMSS format with invalid hours, minutes, or seconds
293+
"180001", "006000", "000099", "999999",
294+
// HH:MM format with invalid hours or minutes
295+
"30:10", "02:70", "99:99",
296+
// HH:MM:SS format with invalid hours, minutes, or seconds
297+
"19:00:00", "00:60:00", "99:99:99",
298+
)
299+
300+
val incorrectUnparsableOffsets = listOf(
301+
// Single non-digit characters
302+
"a", "_", "+",
303+
// Two characters: letter+digit, letter+symbol, digit+symbol
304+
"a9", "y!", "1#",
305+
// Three digits (invalid length - not 2 or 4 digits)
306+
"110", "020",
307+
// Five digits (invalid length - not 4 or 6 digits)
308+
"18000", "02300",
309+
// HH:MM format violations: single digit hour, missing minute, missing hour
310+
"3:10", "2:70", "99:", ":20",
311+
// Invalid colon-separated formats: too many digits in an hour/minute component
312+
"12:3456", "1234:56",
313+
// HH:MM:SS format violations: single digit hour, single digit minute, single digit second
314+
"1:00:00", "00:6:00", "09:99:9",
315+
// Colon placement errors
316+
":00:00", "00::00", "09:99:", "::00", "00::", "::",
317+
// HH:MM:SS format violations: 3-digit hour, 3-digit minute, 3-digit second
318+
"180:00:00", "00:610:00", "99:99:199"
319+
)
320+
321+
val tzPrefixes = listOf("UTC", "GMT", "UT")
322+
323+
val timezoneDbIdentifiers = listOf(
324+
"America/New_York", "Europe/London", "Asia/Tokyo", "Australia/Sydney",
325+
"Pacific/Auckland", "Africa/Cairo", "America/Los_Angeles", "Europe/Paris",
326+
"Asia/Singapore", "Australia/Melbourne", "Africa/Johannesburg", "Europe/Isle_of_Man"
327+
)
328+
329+
val invalidTimezoneIds = listOf("INVALID", "XYZ", "ABC/DEF", "NOT_A_TIMEZONE", "SYSTEM")
330+
}
331+
332+
@Test
333+
fun testZuluTimeZone() {
334+
// Replace it to:
335+
// listOf("z", "Z").forEach(::assertParseableAsTimeZone)
336+
// when TimeZone.of("z") works correctly
337+
assertParseableAsTimeZone("Z")
338+
assertIncorrectlyParseableAsTimeZone("z")
339+
}
340+
341+
@Test
342+
fun testSpecialNamedTimezones() {
343+
TimezoneTestData.tzPrefixes.forEach(::assertParseableAsTimeZone)
344+
}
345+
346+
@Test
347+
fun testPrefixWithCorrectParsableOffset() {
348+
val timezoneIds =
349+
generateTimezoneIds(TimezoneTestData.tzPrefixes + "", TimezoneTestData.correctParsableOffsets)
350+
timezoneIds.forEach(::assertParseableAsTimeZone)
351+
}
352+
353+
@Test
354+
fun testPrefixWithIncorrectParsableOffset() {
355+
val timezoneIds =
356+
generateTimezoneIds(TimezoneTestData.tzPrefixes + "", TimezoneTestData.incorrectParsableOffsets)
357+
timezoneIds.forEach(::assertIncorrectlyParseableAsTimeZone)
358+
}
359+
360+
@Test
361+
fun testPrefixWithIncorrectUnparsableOffset() {
362+
val timezoneIds =
363+
generateTimezoneIds(TimezoneTestData.tzPrefixes + "", TimezoneTestData.incorrectUnparsableOffsets)
364+
timezoneIds.forEach(::assertNonParseableAsTimeZone)
365+
}
366+
367+
@Test
368+
fun testTimezoneDBIdentifiers() {
369+
TimezoneTestData.timezoneDbIdentifiers.forEach(::assertParseableAsTimeZone)
370+
}
371+
372+
@Test
373+
fun testInvalidTimezoneIds() {
374+
TimezoneTestData.invalidTimezoneIds.forEach(::assertNonParseableAsTimeZone)
375+
}
376+
377+
private fun generateTimezoneIds(prefixes: List<String>, offsets: List<String>): List<String> = buildList {
378+
for (prefix in prefixes) {
379+
for (sign in listOf('+', '-')) {
380+
for (offset in offsets) {
381+
add("$prefix$sign$offset")
382+
}
383+
}
384+
}
385+
}
386+
387+
private fun assertParseableAsTimeZone(zoneId: String) {
388+
TimeZone.of(zoneId)
389+
val result = DateTimeComponents.Format { timeZoneId() }.parse(zoneId)
390+
assertEquals(zoneId, result.timeZoneId)
391+
}
392+
393+
private fun assertIncorrectlyParseableAsTimeZone(zoneId: String) {
394+
assertFailsWith<IllegalTimeZoneException> { TimeZone.of(zoneId) }
395+
val result = DateTimeComponents.Format { timeZoneId() }.parse(zoneId)
396+
assertEquals(zoneId, result.timeZoneId)
397+
}
398+
399+
private fun assertNonParseableAsTimeZone(zoneId: String) {
400+
assertFailsWith<DateTimeFormatException> {
401+
DateTimeComponents.Format { timeZoneId() }.parse(zoneId)
402+
}
403+
}
271404
}

0 commit comments

Comments
 (0)