Skip to content

Commit ee9a093

Browse files
committed
Treat cpp string as UTF8
Implementation borrowed from net/mail package (message.go) Solves arduino/Arduino#5277
1 parent 5b1ad1a commit ee9a093

File tree

1 file changed

+96
-18
lines changed

1 file changed

+96
-18
lines changed

src/arduino.cc/builder/utils/utils.go

+96-18
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,17 @@
3030
package utils
3131

3232
import (
33+
"bytes"
3334
"crypto/md5"
3435
"encoding/hex"
36+
"errors"
3537
"io/ioutil"
3638
"os"
3739
"os/exec"
3840
"path/filepath"
3941
"runtime"
4042
"strings"
43+
"unicode/utf8"
4144

4245
"arduino.cc/builder/constants"
4346
"arduino.cc/builder/gohasissues"
@@ -436,30 +439,105 @@ func ParseCppString(line string) (string, string, bool) {
436439
return "", line, false
437440
}
438441

442+
s, rem, err := consumeQuotedString(line)
443+
444+
return s, rem, err == nil
445+
}
446+
447+
// from package net/mail/message.go
448+
// consumeQuotedString parses the quoted string at the start of p.
449+
func consumeQuotedString(s string) (qs string, rem string, err error) {
450+
// Assume first byte is '"'.
439451
i := 1
440-
res := ""
452+
qsb := make([]rune, 0, 10)
453+
454+
escaped := false
455+
456+
Loop:
441457
for {
442-
if i >= len(line) {
443-
return "", line, false
444-
}
458+
r, size := utf8.DecodeRuneInString(s[i:])
459+
460+
switch {
461+
case size == 0:
462+
return "", s, errors.New("Unclosed quoted-string")
445463

446-
switch line[i] {
447-
// Backslash, next character is used unmodified
448-
case '\\':
449-
i++
450-
if i >= len(line) {
451-
return "", line, false
464+
case size == 1 && r == utf8.RuneError:
465+
return "", s, errors.New("Invalid utf-8 in quoted-string")
466+
467+
case escaped:
468+
// quoted-pair = ("\" (VCHAR / WSP))
469+
470+
if !isVchar(r) && !isWSP(r) {
471+
return "", s, errors.New("Bad character in quoted-string")
452472
}
453-
res += string(line[i])
454-
break
455-
// Quote, end of string
456-
case '"':
457-
return res, line[i+1:], true
473+
474+
qsb = append(qsb, r)
475+
escaped = false
476+
477+
case isQtext(r) || isWSP(r):
478+
// qtext (printable US-ASCII excluding " and \), or
479+
// FWS (almost; we're ignoring CRLF)
480+
qsb = append(qsb, r)
481+
482+
case r == '"':
483+
break Loop
484+
485+
case r == '\\':
486+
escaped = true
487+
458488
default:
459-
res += string(line[i])
460-
break
489+
return "", s, errors.New("Bad character in quoted-string")
490+
461491
}
462492

463-
i++
493+
i += size
494+
}
495+
s = s[i+1:]
496+
if len(qsb) == 0 {
497+
return "", s, errors.New("Empty quoted-string")
498+
}
499+
return string(qsb), s, nil
500+
}
501+
502+
// isQtext reports whether r is an RFC 5322 qtext character.
503+
func isQtext(r rune) bool {
504+
// Printable US-ASCII, excluding backslash or quote.
505+
if r == '\\' || r == '"' {
506+
return false
507+
}
508+
return isVchar(r)
509+
}
510+
511+
// quoteString renders a string as an RFC 5322 quoted-string.
512+
func quoteString(s string) string {
513+
var buf bytes.Buffer
514+
buf.WriteByte('"')
515+
for _, r := range s {
516+
if isQtext(r) || isWSP(r) {
517+
buf.WriteRune(r)
518+
} else if isVchar(r) {
519+
buf.WriteByte('\\')
520+
buf.WriteRune(r)
521+
}
464522
}
523+
buf.WriteByte('"')
524+
return buf.String()
525+
}
526+
527+
// isVchar reports whether r is an RFC 5322 VCHAR character.
528+
func isVchar(r rune) bool {
529+
// Visible (printing) characters.
530+
return '!' <= r && r <= '~' || isMultibyte(r)
531+
}
532+
533+
// isMultibyte reports whether r is a multi-byte UTF-8 character
534+
// as supported by RFC 6532
535+
func isMultibyte(r rune) bool {
536+
return r >= utf8.RuneSelf
537+
}
538+
539+
// isWSP reports whether r is a WSP (white space).
540+
// WSP is a space or horizontal tab (RFC 5234 Appendix B).
541+
func isWSP(r rune) bool {
542+
return r == ' ' || r == '\t'
465543
}

0 commit comments

Comments
 (0)