Skip to content

Commit 2657738

Browse files
committed
Treat cpp string as UTF8
Implementation borrowed from net/mail package (message.go) Solves arduino/Arduino#5277
1 parent 5c89a02 commit 2657738

File tree

1 file changed

+96
-18
lines changed

1 file changed

+96
-18
lines changed

src/arduino.cc/builder/utils/utils.go

+96-18
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,17 @@ import (
3434
"arduino.cc/builder/gohasissues"
3535
"arduino.cc/builder/i18n"
3636
"arduino.cc/builder/types"
37+
"bytes"
3738
"crypto/md5"
3839
"encoding/hex"
40+
"errors"
3941
"io/ioutil"
4042
"os"
4143
"os/exec"
4244
"path/filepath"
4345
"runtime"
4446
"strings"
47+
"unicode/utf8"
4548
)
4649

4750
func KeysOfMapOfStringInterface(input map[string]interface{}) []string {
@@ -435,30 +438,105 @@ func ParseCppString(line string) (string, string, bool) {
435438
return "", line, false
436439
}
437440

441+
s, rem, err := consumeQuotedString(line)
442+
443+
return s, rem, err == nil
444+
}
445+
446+
// from package net/mail/message.go
447+
// consumeQuotedString parses the quoted string at the start of p.
448+
func consumeQuotedString(s string) (qs string, rem string, err error) {
449+
// Assume first byte is '"'.
438450
i := 1
439-
res := ""
451+
qsb := make([]rune, 0, 10)
452+
453+
escaped := false
454+
455+
Loop:
440456
for {
441-
if i >= len(line) {
442-
return "", line, false
443-
}
457+
r, size := utf8.DecodeRuneInString(s[i:])
458+
459+
switch {
460+
case size == 0:
461+
return "", s, errors.New("Unclosed quoted-string")
444462

445-
switch line[i] {
446-
// Backslash, next character is used unmodified
447-
case '\\':
448-
i++
449-
if i >= len(line) {
450-
return "", line, false
463+
case size == 1 && r == utf8.RuneError:
464+
return "", s, errors.New("Invalid utf-8 in quoted-string")
465+
466+
case escaped:
467+
// quoted-pair = ("\" (VCHAR / WSP))
468+
469+
if !isVchar(r) && !isWSP(r) {
470+
return "", s, errors.New("Bad character in quoted-string")
451471
}
452-
res += string(line[i])
453-
break
454-
// Quote, end of string
455-
case '"':
456-
return res, line[i+1:], true
472+
473+
qsb = append(qsb, r)
474+
escaped = false
475+
476+
case isQtext(r) || isWSP(r):
477+
// qtext (printable US-ASCII excluding " and \), or
478+
// FWS (almost; we're ignoring CRLF)
479+
qsb = append(qsb, r)
480+
481+
case r == '"':
482+
break Loop
483+
484+
case r == '\\':
485+
escaped = true
486+
457487
default:
458-
res += string(line[i])
459-
break
488+
return "", s, errors.New("Bad character in quoted-string")
489+
460490
}
461491

462-
i++
492+
i += size
493+
}
494+
s = s[i+1:]
495+
if len(qsb) == 0 {
496+
return "", s, errors.New("Empty quoted-string")
497+
}
498+
return string(qsb), s, nil
499+
}
500+
501+
// isQtext reports whether r is an RFC 5322 qtext character.
502+
func isQtext(r rune) bool {
503+
// Printable US-ASCII, excluding backslash or quote.
504+
if r == '\\' || r == '"' {
505+
return false
506+
}
507+
return isVchar(r)
508+
}
509+
510+
// quoteString renders a string as an RFC 5322 quoted-string.
511+
func quoteString(s string) string {
512+
var buf bytes.Buffer
513+
buf.WriteByte('"')
514+
for _, r := range s {
515+
if isQtext(r) || isWSP(r) {
516+
buf.WriteRune(r)
517+
} else if isVchar(r) {
518+
buf.WriteByte('\\')
519+
buf.WriteRune(r)
520+
}
463521
}
522+
buf.WriteByte('"')
523+
return buf.String()
524+
}
525+
526+
// isVchar reports whether r is an RFC 5322 VCHAR character.
527+
func isVchar(r rune) bool {
528+
// Visible (printing) characters.
529+
return '!' <= r && r <= '~' || isMultibyte(r)
530+
}
531+
532+
// isMultibyte reports whether r is a multi-byte UTF-8 character
533+
// as supported by RFC 6532
534+
func isMultibyte(r rune) bool {
535+
return r >= utf8.RuneSelf
536+
}
537+
538+
// isWSP reports whether r is a WSP (white space).
539+
// WSP is a space or horizontal tab (RFC 5234 Appendix B).
540+
func isWSP(r rune) bool {
541+
return r == ' ' || r == '\t'
464542
}

0 commit comments

Comments
 (0)