Skip to content

Commit a2cbdda

Browse files
authored
Fix line-ending backslash whitespace escaping for multiline strings (#391)
Fixes #372.
1 parent d56d9f6 commit a2cbdda

File tree

3 files changed

+54
-35
lines changed

3 files changed

+54
-35
lines changed

internal/toml-test/tests/valid/string/multiline.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,5 +50,9 @@
5050
"whitespace-after-bs": {
5151
"type": "string",
5252
"value": "The quick brown fox jumps over the lazy dog."
53+
},
54+
"only-ignore-first": {
55+
"type": "string",
56+
"value": "Here are two\nlines of text.\nAnd another\n two.\n"
5357
}
5458
}

internal/toml-test/tests/valid/string/multiline.toml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,16 @@ whitespace-after-bs = """\
3535
the lazy dog.\
3636
"""
3737

38+
only-ignore-first = """
39+
Here are \
40+
two
41+
lines of text.
42+
And \
43+
44+
another
45+
two.
46+
"""
47+
3848
no-space = """a\
3949
b"""
4050

parse.go

Lines changed: 40 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ func (p *parser) value(it item, parentIsArray bool) (interface{}, tomlType) {
245245
case itemString:
246246
return p.replaceEscapes(it, it.val), p.typeOfPrimitive(it)
247247
case itemMultilineString:
248-
return p.replaceEscapes(it, stripFirstNewline(p.stripEscapedNewlines(it.val))), p.typeOfPrimitive(it)
248+
return p.replaceEscapes(it, p.stripEscapedNewlines(stripFirstNewline(it.val))), p.typeOfPrimitive(it)
249249
case itemRawString:
250250
return it.val, p.typeOfPrimitive(it)
251251
case itemRawMultilineString:
@@ -681,49 +681,54 @@ func stripFirstNewline(s string) string {
681681
return s
682682
}
683683

684-
// Remove newlines inside triple-quoted strings if a line ends with "\".
684+
// stripEscapedNewlines removes whitespace after line-ending backslashes in
685+
// multiline strings.
686+
//
687+
// A line-ending backslash is an unescaped \ followed only by whitespace until
688+
// the next newline. After a line-ending backslash, all whitespace is removed
689+
// until the next non-whitespace character.
685690
func (p *parser) stripEscapedNewlines(s string) string {
686-
split := strings.Split(s, "\n")
687-
if len(split) < 1 {
688-
return s
689-
}
690-
691-
escNL := false // Keep track of the last non-blank line was escaped.
692-
for i, line := range split {
693-
line = strings.TrimRight(line, " \t\r")
694-
695-
if len(line) == 0 || line[len(line)-1] != '\\' {
696-
split[i] = strings.TrimRight(split[i], "\r")
697-
if !escNL && i != len(split)-1 {
698-
split[i] += "\n"
699-
}
700-
continue
691+
var b strings.Builder
692+
var i int
693+
for {
694+
ix := strings.Index(s[i:], `\`)
695+
if ix < 0 {
696+
b.WriteString(s)
697+
return b.String()
701698
}
699+
i += ix
702700

703-
escBS := true
704-
for j := len(line) - 1; j >= 0 && line[j] == '\\'; j-- {
705-
escBS = !escBS
701+
if len(s) > i+1 && s[i+1] == '\\' {
702+
// Escaped backslash.
703+
i += 2
704+
continue
706705
}
707-
if escNL {
708-
line = strings.TrimLeft(line, " \t\r")
706+
// Scan until the next non-whitespace.
707+
j := i + 1
708+
whitespaceLoop:
709+
for ; j < len(s); j++ {
710+
switch s[j] {
711+
case ' ', '\t', '\r', '\n':
712+
default:
713+
break whitespaceLoop
714+
}
709715
}
710-
escNL = !escBS
711-
712-
if escBS {
713-
split[i] += "\n"
716+
if j == i+1 {
717+
// Not a whitespace escape.
718+
i++
714719
continue
715720
}
716-
717-
if i == len(split)-1 {
718-
p.panicf("invalid escape: '\\ '")
719-
}
720-
721-
split[i] = line[:len(line)-1] // Remove \
722-
if len(split)-1 > i {
723-
split[i+1] = strings.TrimLeft(split[i+1], " \t\r")
721+
if !strings.Contains(s[i:j], "\n") {
722+
// This is not a line-ending backslash.
723+
// (It's a bad escape sequence, but we can let
724+
// replaceEscapes catch it.)
725+
i++
726+
continue
724727
}
728+
b.WriteString(s[:i])
729+
s = s[j:]
730+
i = 0
725731
}
726-
return strings.Join(split, "")
727732
}
728733

729734
func (p *parser) replaceEscapes(it item, str string) string {

0 commit comments

Comments
 (0)