-
-
Notifications
You must be signed in to change notification settings - Fork 5.8k
Switch Unicode Escaping to a VSCode-like system #19990
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 20 commits
Commits
Show all changes
26 commits
Select commit
Hold shift + click to select a range
2e935d3
Switch Unicode Escaping to a VSCode-like system
zeripath 0f00129
placate lint
zeripath 1b626e2
Merge remote-tracking branch 'origin/main' into vscode-escape
zeripath 5d0aaf1
fix template issue
zeripath 0fb4df2
Merge remote-tracking branch 'origin' into vscode-escape
zeripath 7826c68
placate yet another linter
zeripath 872844f
more placation
zeripath a638e64
Merge branch 'main' into vscode-escape
zeripath 8a448aa
Use var colors
zeripath f49a102
Merge remote-tracking branch 'origin/main' into vscode-escape
zeripath 3eddae1
add missing fix
zeripath d7b03b0
permit raw nbsps in rendered markdown
zeripath bb36c71
Merge branch 'main' into vscode-escape
lafriks c74f7bf
Update modules/charset/ambiguous.go
zeripath a3702f2
Merge branch 'main' into vscode-escape
wxiaoguang 739c4ba
Merge branch 'main' into vscode-escape
zeripath 9b83725
Merge remote-tracking branch 'origin/main' into vscode-escape
zeripath a16e264
as per review
zeripath 39f15b9
as per review
zeripath c73d810
fix test
zeripath bf0d9dc
placate lint
zeripath cd27248
Merge remote-tracking branch 'origin/main' into vscode-escape
zeripath bd1336b
Make it clearer where ambiguous.json comes from
zeripath 31954cc
use template for code declarations in diff
zeripath 9c336a6
furhter subtemplating
zeripath afc0064
Merge branch 'main' into vscode-escape
zeripath File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
// This file is generated by modules/charset/ambiguous/generate.go DO NOT EDIT | ||
// Copyright 2022 The Gitea Authors. All rights reserved. | ||
// Use of this source code is governed by a MIT-style | ||
// license that can be found in the LICENSE file. | ||
|
||
package charset | ||
|
||
import ( | ||
"sort" | ||
"strings" | ||
"unicode" | ||
|
||
"code.gitea.io/gitea/modules/translation" | ||
) | ||
|
||
// AmbiguousTablesForLocale provides the table of ambiguous characters for this locale. | ||
func AmbiguousTablesForLocale(locale translation.Locale) []*AmbiguousTable { | ||
key := locale.Language() | ||
var table *AmbiguousTable | ||
var ok bool | ||
for len(key) > 0 { | ||
if table, ok = AmbiguousCharacters[key]; ok { | ||
break | ||
} | ||
idx := strings.LastIndexAny(key, "-_") | ||
if idx < 0 { | ||
key = "" | ||
} else { | ||
key = key[:idx] | ||
} | ||
} | ||
if table == nil { | ||
table = AmbiguousCharacters["_default"] | ||
} | ||
|
||
return []*AmbiguousTable{ | ||
table, | ||
AmbiguousCharacters["_common"], | ||
} | ||
} | ||
|
||
func isAmbiguous(r rune, confusableTo *rune, tables ...*AmbiguousTable) bool { | ||
for _, table := range tables { | ||
if !unicode.Is(table.RangeTable, r) { | ||
continue | ||
} | ||
i := sort.Search(len(table.Confusable), func(i int) bool { | ||
return table.Confusable[i] >= r | ||
}) | ||
(*confusableTo) = table.With[i] | ||
return true | ||
} | ||
return false | ||
} |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,176 @@ | ||
// Copyright 2022 The Gitea Authors. All rights reserved. | ||
// Use of this source code is governed by a MIT-style | ||
// license that can be found in the LICENSE file. | ||
|
||
package main | ||
|
||
import ( | ||
"bytes" | ||
"flag" | ||
"fmt" | ||
"go/format" | ||
"os" | ||
"sort" | ||
"text/template" | ||
"unicode" | ||
|
||
"code.gitea.io/gitea/modules/json" | ||
|
||
"golang.org/x/text/unicode/rangetable" | ||
) | ||
|
||
// ambiguous.json provides a one to one mapping of ambiguous characters to other characters | ||
// See https://github.com/hediet/vscode-unicode-data | ||
|
||
type AmbiguousTable struct { | ||
Confusable []rune | ||
With []rune | ||
Locale string | ||
RangeTable *unicode.RangeTable | ||
} | ||
|
||
type RunePair struct { | ||
Confusable rune | ||
With rune | ||
} | ||
|
||
var verbose bool | ||
|
||
func main() { | ||
flag.Usage = func() { | ||
fmt.Fprintf(os.Stderr, `%s: Generate AmbiguousCharacter | ||
|
||
Usage: %[1]s [-v] [-o output.go] ambiguous.json | ||
`, os.Args[0]) | ||
flag.PrintDefaults() | ||
} | ||
|
||
output := "" | ||
flag.BoolVar(&verbose, "v", false, "verbose output") | ||
flag.StringVar(&output, "o", "ambiguous_gen.go", "file to output to") | ||
flag.Parse() | ||
input := flag.Arg(0) | ||
if input == "" { | ||
input = "ambiguous.json" | ||
} | ||
|
||
bs, err := os.ReadFile(input) | ||
if err != nil { | ||
fatalf("Unable to read: %s Err: %v", input, err) | ||
} | ||
|
||
var unwrapped string | ||
if err := json.Unmarshal(bs, &unwrapped); err != nil { | ||
fatalf("Unable to unwrap content in: %s Err: %v", input, err) | ||
} | ||
|
||
fromJSON := map[string][]uint32{} | ||
if err := json.Unmarshal([]byte(unwrapped), &fromJSON); err != nil { | ||
fatalf("Unable to unmarshal content in: %s Err: %v", input, err) | ||
} | ||
|
||
tables := make([]*AmbiguousTable, 0, len(fromJSON)) | ||
for locale, chars := range fromJSON { | ||
table := &AmbiguousTable{Locale: locale} | ||
table.Confusable = make([]rune, 0, len(chars)/2) | ||
table.With = make([]rune, 0, len(chars)/2) | ||
pairs := make([]RunePair, len(chars)/2) | ||
for i := 0; i < len(chars); i += 2 { | ||
pairs[i/2].Confusable, pairs[i/2].With = rune(chars[i]), rune(chars[i+1]) | ||
} | ||
sort.Slice(pairs, func(i, j int) bool { | ||
return pairs[i].Confusable < pairs[j].Confusable | ||
}) | ||
for _, pair := range pairs { | ||
table.Confusable = append(table.Confusable, pair.Confusable) | ||
table.With = append(table.With, pair.With) | ||
} | ||
table.RangeTable = rangetable.New(table.Confusable...) | ||
tables = append(tables, table) | ||
} | ||
sort.Slice(tables, func(i, j int) bool { | ||
return tables[i].Locale < tables[j].Locale | ||
}) | ||
data := map[string]interface{}{ | ||
"Tables": tables, | ||
} | ||
|
||
if err := runTemplate(generatorTemplate, output, &data); err != nil { | ||
fatalf("Unable to run template: %v", err) | ||
} | ||
} | ||
|
||
func runTemplate(t *template.Template, filename string, data interface{}) error { | ||
buf := bytes.NewBuffer(nil) | ||
if err := t.Execute(buf, data); err != nil { | ||
return fmt.Errorf("unable to execute template: %w", err) | ||
} | ||
bs, err := format.Source(buf.Bytes()) | ||
if err != nil { | ||
verbosef("Bad source:\n%s", buf.String()) | ||
return fmt.Errorf("unable to format source: %w", err) | ||
} | ||
file, err := os.Create(filename) | ||
if err != nil { | ||
return fmt.Errorf("failed to create file %s because %w", filename, err) | ||
} | ||
defer file.Close() | ||
_, err = file.Write(bs) | ||
if err != nil { | ||
return fmt.Errorf("unable to write generated source: %w", err) | ||
} | ||
return nil | ||
} | ||
|
||
var generatorTemplate = template.Must(template.New("ambiguousTemplate").Parse(`// This file is generated by modules/charset/ambiguous/generate.go DO NOT EDIT | ||
// Copyright 2022 The Gitea Authors. All rights reserved. | ||
// Use of this source code is governed by a MIT-style | ||
// license that can be found in the LICENSE file. | ||
|
||
package charset | ||
|
||
import "unicode" | ||
|
||
// AmbiguousTable matches a confusable rune with its partner for the Locale | ||
type AmbiguousTable struct { | ||
Confusable []rune | ||
With []rune | ||
Locale string | ||
RangeTable *unicode.RangeTable | ||
} | ||
|
||
// AmbiguousCharacters provides a map by locale name to the confusable characters in that locale | ||
var AmbiguousCharacters = map[string]*AmbiguousTable{ | ||
{{range .Tables}}{{printf "%q:" .Locale}} { | ||
Confusable: []rune{ {{range .Confusable}}{{.}},{{end}} }, | ||
With: []rune{ {{range .With}}{{.}},{{end}} }, | ||
Locale: {{printf "%q" .Locale}}, | ||
RangeTable: &unicode.RangeTable{ | ||
R16: []unicode.Range16{ | ||
{{range .RangeTable.R16 }} {Lo:{{.Lo}}, Hi:{{.Hi}}, Stride: {{.Stride}}}, | ||
{{end}} }, | ||
R32: []unicode.Range32{ | ||
{{range .RangeTable.R32}} {Lo:{{.Lo}}, Hi:{{.Hi}}, Stride: {{.Stride}}}, | ||
{{end}} }, | ||
LatinOffset: {{.RangeTable.LatinOffset}}, | ||
}, | ||
}, | ||
{{end}} | ||
} | ||
|
||
`)) | ||
|
||
func logf(format string, args ...interface{}) { | ||
fmt.Fprintf(os.Stderr, format+"\n", args...) | ||
} | ||
|
||
func verbosef(format string, args ...interface{}) { | ||
if verbose { | ||
logf(format, args...) | ||
} | ||
} | ||
|
||
func fatalf(format string, args ...interface{}) { | ||
logf("fatal: "+format+"\n", args...) | ||
os.Exit(1) | ||
} |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
// Copyright 2022 The Gitea Authors. All rights reserved. | ||
// Use of this source code is governed by a MIT-style | ||
// license that can be found in the LICENSE file. | ||
|
||
package charset | ||
|
||
import ( | ||
"sort" | ||
"testing" | ||
"unicode" | ||
|
||
"github.com/stretchr/testify/assert" | ||
) | ||
|
||
func TestAmbiguousCharacters(t *testing.T) { | ||
for locale, ambiguous := range AmbiguousCharacters { | ||
assert.Equal(t, locale, ambiguous.Locale) | ||
assert.Equal(t, len(ambiguous.Confusable), len(ambiguous.With)) | ||
assert.True(t, sort.SliceIsSorted(ambiguous.Confusable, func(i, j int) bool { | ||
return ambiguous.Confusable[i] < ambiguous.Confusable[j] | ||
})) | ||
|
||
for _, confusable := range ambiguous.Confusable { | ||
assert.True(t, unicode.Is(ambiguous.RangeTable, confusable)) | ||
i := sort.Search(len(ambiguous.Confusable), func(j int) bool { | ||
return ambiguous.Confusable[j] >= confusable | ||
}) | ||
found := i < len(ambiguous.Confusable) && ambiguous.Confusable[i] == confusable | ||
assert.True(t, found, "%c is not in %d", confusable, i) | ||
} | ||
} | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
// Copyright 2022 The Gitea Authors. All rights reserved. | ||
// Use of this source code is governed by a MIT-style | ||
// license that can be found in the LICENSE file. | ||
|
||
package charset | ||
|
||
import ( | ||
"bytes" | ||
"io" | ||
) | ||
|
||
// BreakWriter wraps an io.Writer to always write '\n' as '<br>' | ||
type BreakWriter struct { | ||
io.Writer | ||
} | ||
|
||
// Write writes the provided bs transparently replacing '\n' with '<br>' | ||
zeripath marked this conversation as resolved.
Show resolved
Hide resolved
|
||
func (b *BreakWriter) Write(bs []byte) (n int, err error) { | ||
pos := 0 | ||
for pos < len(bs) { | ||
idx := bytes.IndexByte(bs[pos:], '\n') | ||
if idx < 0 { | ||
wn, err := b.Writer.Write(bs[pos:]) | ||
return n + wn, err | ||
} | ||
|
||
if idx > 0 { | ||
wn, err := b.Writer.Write(bs[pos : pos+idx]) | ||
n += wn | ||
if err != nil { | ||
return n, err | ||
} | ||
} | ||
|
||
if _, err = b.Writer.Write([]byte("<br>")); err != nil { | ||
return n, err | ||
} | ||
pos += idx + 1 | ||
|
||
n++ | ||
} | ||
|
||
return n, err | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
// Copyright 2022 The Gitea Authors. All rights reserved. | ||
// Use of this source code is governed by a MIT-style | ||
// license that can be found in the LICENSE file. | ||
|
||
package charset | ||
|
||
import ( | ||
"strings" | ||
"testing" | ||
) | ||
|
||
func TestBreakWriter_Write(t *testing.T) { | ||
tests := []struct { | ||
name string | ||
kase string | ||
expect string | ||
wantErr bool | ||
}{ | ||
{ | ||
name: "noline", | ||
kase: "abcdefghijklmnopqrstuvwxyz", | ||
expect: "abcdefghijklmnopqrstuvwxyz", | ||
}, | ||
{ | ||
name: "endline", | ||
kase: "abcdefghijklmnopqrstuvwxyz\n", | ||
expect: "abcdefghijklmnopqrstuvwxyz<br>", | ||
}, | ||
{ | ||
name: "startline", | ||
kase: "\nabcdefghijklmnopqrstuvwxyz", | ||
expect: "<br>abcdefghijklmnopqrstuvwxyz", | ||
}, | ||
{ | ||
name: "onlyline", | ||
kase: "\n\n\n", | ||
expect: "<br><br><br>", | ||
}, | ||
{ | ||
name: "empty", | ||
kase: "", | ||
expect: "", | ||
}, | ||
{ | ||
name: "midline", | ||
kase: "\nabc\ndefghijkl\nmnopqrstuvwxy\nz", | ||
expect: "<br>abc<br>defghijkl<br>mnopqrstuvwxy<br>z", | ||
}, | ||
} | ||
for _, tt := range tests { | ||
t.Run(tt.name, func(t *testing.T) { | ||
buf := &strings.Builder{} | ||
b := &BreakWriter{ | ||
Writer: buf, | ||
} | ||
n, err := b.Write([]byte(tt.kase)) | ||
if (err != nil) != tt.wantErr { | ||
t.Errorf("BreakWriter.Write() error = %v, wantErr %v", err, tt.wantErr) | ||
return | ||
} | ||
if n != len(tt.kase) { | ||
t.Errorf("BreakWriter.Write() = %v, want %v", n, len(tt.kase)) | ||
} | ||
if buf.String() != tt.expect { | ||
t.Errorf("BreakWriter.Write() wrote %q, want %v", buf.String(), tt.expect) | ||
} | ||
}) | ||
} | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.