Skip to content

Commit 728769b

Browse files
author
Denis Krivak
committed
Fix working with non-ascii symbols.
1 parent a0deae5 commit 728769b

File tree

3 files changed

+110
-12
lines changed

3 files changed

+110
-12
lines changed

checks.go

+32-8
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,9 @@ func checkCommentForPeriod(c comment) *Issue {
7777
// Make a replacement. Use `pos.line` to get an original line from
7878
// attached lines. Use `iss.Pos.Column` because it's a position in
7979
// the original line.
80-
original := []rune(c.lines[pos.line-1])
81-
iss.Replacement = string(original[:iss.Pos.Column-1]) + "." +
82-
string(original[iss.Pos.Column-1:])
80+
original := c.lines[pos.line-1]
81+
iss.Replacement = original[:iss.Pos.Column-1] + "." +
82+
original[iss.Pos.Column-1:]
8383

8484
// Save replacement to raw lines to be able to combine it with
8585
// further replacements
@@ -118,9 +118,11 @@ func checkCommentForCapital(c comment) []Issue {
118118
// Make a replacement. Use `pos.line` to get an original line from
119119
// attached lines. Use `iss.Pos.Column` because it's a position in
120120
// the original line.
121-
rep := []rune(c.lines[pos.line-1])
122-
rep[iss.Pos.Column-1] = unicode.ToTitle(rep[iss.Pos.Column-1])
123-
iss.Replacement = string(rep)
121+
line := c.lines[pos.line-1]
122+
col := byteToRuneColumn(line, iss.Pos.Column) - 1
123+
rep := string(unicode.ToTitle([]rune(line)[col])) // capital letter
124+
iss.Replacement = line[:iss.Pos.Column-1] + rep +
125+
line[iss.Pos.Column-1+len(rep):]
124126

125127
// Save replacement to raw lines to be able to combine it with
126128
// further replacements
@@ -158,7 +160,7 @@ func checkPeriod(comment string) (pos position, ok bool) {
158160
return position{}, true
159161
}
160162

161-
pos.column = len([]rune(line)) + 1
163+
pos.column = len(line) + 1
162164
return pos, false
163165
}
164166

@@ -209,7 +211,10 @@ func checkCapital(comment string, skipFirst bool) (pp []position) {
209211
continue
210212
}
211213
if state == endOfSentence && unicode.IsLower(r) {
212-
pp = append(pp, position{line: pos.line, column: pos.column})
214+
pp = append(pp, position{
215+
line: pos.line,
216+
column: runeToByteColumn(comment, pos.column),
217+
})
213218
}
214219
state = empty
215220
}
@@ -267,3 +272,22 @@ func hasSuffix(s string, suffixes []string) bool {
267272
}
268273
return false
269274
}
275+
276+
// The following two functions convert byte and rune indexes.
277+
//
278+
// Example:
279+
// text: a b c Ш e f
280+
// runes: 1 2 3 4 5 6
281+
// bytes: 0 1 2 3 5 6
282+
// The reason of the difference is that the size of "Ш" is 2 bytes.
283+
// NOTE: Works only for 1-based indexes (line columns).
284+
285+
// byteToRuneColumn converts byte index inside the string to rune index.
286+
func byteToRuneColumn(s string, i int) int {
287+
return len([]rune(s[:i-1])) + 1
288+
}
289+
290+
// runeToByteColumn converts rune index inside the string to byte index.
291+
func runeToByteColumn(s string, i int) int {
292+
return len(string([]rune(s)[:i-1])) + 1
293+
}

checks_test.go

+76-2
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ func TestCheckPeriod(t *testing.T) {
8282
{
8383
name: "cyrillic, without period",
8484
text: "Кириллица",
85-
issue: position{line: 1, column: 10},
85+
issue: position{line: 1, column: 19},
8686
},
8787
{
8888
name: "parenthesis, with period",
@@ -169,7 +169,7 @@ func TestCheckCapital(t *testing.T) {
169169
text: "Кириллица? кириллица!",
170170
skipFirst: false,
171171
issues: []position{
172-
{line: 1, column: 12},
172+
{line: 1, column: 21},
173173
},
174174
},
175175
{
@@ -395,3 +395,77 @@ func TestHasSuffix(t *testing.T) {
395395
})
396396
}
397397
}
398+
399+
func TestByteToRuneColumn(t *testing.T) {
400+
testCases := []struct {
401+
name string
402+
str string
403+
index int
404+
out int
405+
}{
406+
{
407+
name: "ascii symbols",
408+
str: "hello, world",
409+
index: 5,
410+
out: 5,
411+
},
412+
{
413+
name: "cyrillic symbols at the end",
414+
str: "hello, мир",
415+
index: 5,
416+
out: 5,
417+
},
418+
{
419+
name: "cyrillic symbols at the beginning",
420+
str: "привет, world",
421+
index: 15,
422+
out: 9,
423+
},
424+
}
425+
426+
for _, tt := range testCases {
427+
tt := tt
428+
t.Run(tt.name, func(t *testing.T) {
429+
if out := byteToRuneColumn(tt.str, tt.index); out != tt.out {
430+
t.Fatalf("Wrong column\n expected: %d\n got: %d", tt.out, out)
431+
}
432+
})
433+
}
434+
}
435+
436+
func TestRuneToByteColumn(t *testing.T) {
437+
testCases := []struct {
438+
name string
439+
str string
440+
index int
441+
out int
442+
}{
443+
{
444+
name: "ascii symbols",
445+
str: "hello, world",
446+
index: 5,
447+
out: 5,
448+
},
449+
{
450+
name: "cyrillic symbols at the end",
451+
str: "hello, мир",
452+
index: 5,
453+
out: 5,
454+
},
455+
{
456+
name: "cyrillic symbols at the beginning",
457+
str: "привет, world",
458+
index: 9,
459+
out: 15,
460+
},
461+
}
462+
463+
for _, tt := range testCases {
464+
tt := tt
465+
t.Run(tt.name, func(t *testing.T) {
466+
if out := runeToByteColumn(tt.str, tt.index); out != tt.out {
467+
t.Fatalf("Wrong column\n expected: %d\n got: %d", tt.out, out)
468+
}
469+
})
470+
}
471+
}

godot.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -27,8 +27,8 @@ type Issue struct {
2727

2828
// position is a position inside a comment (might be multiline comment).
2929
type position struct {
30-
line int
31-
column int
30+
line int // starts at 1
31+
column int // starts at 1, byte count
3232
}
3333

3434
// comment is an internal representation of AST comment entity with additional

0 commit comments

Comments
 (0)