@@ -24,6 +24,8 @@ import (
24
24
"strconv"
25
25
"strings"
26
26
"unicode"
27
+
28
+ "golang.org/x/text/unicode/rangetable"
27
29
)
28
30
29
31
func main () {
@@ -450,9 +452,7 @@ const progHeader = `// Copyright 2013 The Go Authors. All rights reserved.
450
452
// Use of this source code is governed by a BSD-style
451
453
// license that can be found in the LICENSE file.
452
454
453
- // Code generated by maketables; DO NOT EDIT.
454
- // To regenerate, run:
455
- // maketables --tables=%s --data=%s --casefolding=%s
455
+ // Code generated by go generate; DO NOT EDIT.
456
456
457
457
package unicode
458
458
@@ -504,7 +504,7 @@ func printCategories() {
504
504
fullCategoryTest (list )
505
505
return
506
506
}
507
- printf (progHeader , * tablelist , * dataURL , * casefoldingURL )
507
+ printf (progHeader )
508
508
509
509
println ("// Version is the Unicode edition from which the tables are derived." )
510
510
printf ("const Version = %q\n \n " , version ())
@@ -596,91 +596,38 @@ func printCategories() {
596
596
597
597
type Op func (code rune ) bool
598
598
599
- const format = "\t \t {0x%04x, 0x%04x, %d},\n "
600
-
601
599
func dumpRange (header string , inCategory Op ) {
602
- print (header )
603
- next := rune (0 )
604
- latinOffset := 0
605
- print ("\t R16: []Range16{\n " )
606
- // one Range for each iteration
607
- count := & range16Count
608
- size := 16
609
- for {
610
- // look for start of range
611
- for next < rune (len (chars )) && ! inCategory (next ) {
612
- next ++
613
- }
614
- if next >= rune (len (chars )) {
615
- // no characters remain
616
- break
617
- }
618
-
619
- // start of range
620
- lo := next
621
- hi := next
622
- stride := rune (1 )
623
- // accept lo
624
- next ++
625
- // look for another character to set the stride
626
- for next < rune (len (chars )) && ! inCategory (next ) {
627
- next ++
628
- }
629
- if next >= rune (len (chars )) {
630
- // no more characters
631
- printf (format , lo , hi , stride )
632
- break
633
- }
634
- // set stride
635
- stride = next - lo
636
- // check for length of run. next points to first jump in stride
637
- for i := next ; i < rune (len (chars )); i ++ {
638
- if inCategory (i ) == (((i - lo ) % stride ) == 0 ) {
639
- // accept
640
- if inCategory (i ) {
641
- hi = i
642
- }
643
- } else {
644
- // no more characters in this run
645
- break
646
- }
647
- }
648
- if uint32 (hi ) <= unicode .MaxLatin1 {
649
- latinOffset ++
600
+ runes := []rune {}
601
+ for i := range chars {
602
+ r := rune (i )
603
+ if inCategory (r ) {
604
+ runes = append (runes , r )
650
605
}
651
- size , count = printRange (uint32 (lo ), uint32 (hi ), uint32 (stride ), size , count )
652
- // next range: start looking where this range ends
653
- next = hi + 1
654
- }
655
- print ("\t },\n " )
656
- if latinOffset > 0 {
657
- printf ("\t LatinOffset: %d,\n " , latinOffset )
658
606
}
659
- print ( "} \n \n " )
607
+ printRangeTable ( header , runes )
660
608
}
661
609
662
- func printRange (lo , hi , stride uint32 , size int , count * int ) (int , * int ) {
663
- if size == 16 && hi >= 1 << 16 {
664
- if lo < 1 << 16 {
665
- if lo + stride != hi {
666
- logger .Fatalf ("unexpected straddle: %U %U %d" , lo , hi , stride )
667
- }
668
- // No range contains U+FFFF as an instance, so split
669
- // the range into two entries. That way we can maintain
670
- // the invariant that R32 contains only >= 1<<16.
671
- printf (format , lo , lo , 1 )
672
- lo = hi
673
- stride = 1
674
- * count ++
610
+ func printRangeTable (header string , runes []rune ) {
611
+ rt := rangetable .New (runes ... )
612
+ print (header )
613
+ println ("\t R16: []Range16{" )
614
+ for _ , r := range rt .R16 {
615
+ printf ("\t \t {%#04x, %#04x, %d},\n " , r .Lo , r .Hi , r .Stride )
616
+ range16Count ++
617
+ }
618
+ println ("\t }," )
619
+ if len (rt .R32 ) > 0 {
620
+ println ("\t R32: []Range32{" )
621
+ for _ , r := range rt .R32 {
622
+ printf ("\t \t {%#x, %#x, %d},\n " , r .Lo , r .Hi , r .Stride )
623
+ range32Count ++
675
624
}
676
- print ("\t },\n " )
677
- print ("\t R32: []Range32{\n " )
678
- size = 32
679
- count = & range32Count
680
- }
681
- printf (format , lo , hi , stride )
682
- * count ++
683
- return size , count
625
+ println ("\t }," )
626
+ }
627
+ if rt .LatinOffset > 0 {
628
+ printf ("\t LatinOffset: %d,\n " , rt .LatinOffset )
629
+ }
630
+ printf ("}\n \n " )
684
631
}
685
632
686
633
func fullCategoryTest (list []string ) {
@@ -751,26 +698,6 @@ func parseScript(line string, scripts map[string][]Script) {
751
698
scripts [name ] = append (scripts [name ], Script {uint32 (lo ), uint32 (hi ), name })
752
699
}
753
700
754
- // The script tables have a lot of adjacent elements. Fold them together.
755
- func foldAdjacent (r []Script ) []unicode.Range32 {
756
- s := make ([]unicode.Range32 , 0 , len (r ))
757
- j := 0
758
- for i := 0 ; i < len (r ); i ++ {
759
- if j > 0 && r [i ].lo == s [j - 1 ].Hi + 1 {
760
- s [j - 1 ].Hi = r [i ].hi
761
- } else {
762
- s = s [0 : j + 1 ]
763
- s [j ] = unicode.Range32 {
764
- Lo : uint32 (r [i ].lo ),
765
- Hi : uint32 (r [i ].hi ),
766
- Stride : 1 ,
767
- }
768
- j ++
769
- }
770
- }
771
- return s
772
- }
773
-
774
701
func fullScriptTest (list []string , installed map [string ]* unicode.RangeTable , scripts map [string ][]Script ) {
775
702
for _ , name := range list {
776
703
if _ , ok := scripts [name ]; ! ok {
@@ -796,13 +723,11 @@ var deprecatedAliases = map[string]string{
796
723
797
724
// PropList.txt has the same format as Scripts.txt so we can share its parser.
798
725
func printScriptOrProperty (doProps bool ) {
799
- flag := "scripts"
800
726
flaglist := * scriptlist
801
727
file := "Scripts.txt"
802
728
table := scripts
803
729
installed := unicode .Scripts
804
730
if doProps {
805
- flag = "props"
806
731
flaglist = * proplist
807
732
file = "PropList.txt"
808
733
table = props
@@ -831,13 +756,6 @@ func printScriptOrProperty(doProps bool) {
831
756
return
832
757
}
833
758
834
- printf (
835
- "// Generated by running\n " +
836
- "// maketables --%s=%s --url=%s\n " +
837
- "// DO NOT EDIT\n \n " ,
838
- flag ,
839
- flaglist ,
840
- * url )
841
759
if flaglist == "all" {
842
760
if doProps {
843
761
println ("// Properties is the set of Unicode property tables." )
@@ -874,19 +792,14 @@ func printScriptOrProperty(doProps bool) {
874
792
alias , name )
875
793
ndecl ++
876
794
}
877
- printf ("var _%s = &RangeTable {\n " , name )
878
- ranges := foldAdjacent (table [name ])
879
- print ("\t R16: []Range16{\n " )
880
- size := 16
881
- count := & range16Count
882
- for _ , s := range ranges {
883
- size , count = printRange (s .Lo , s .Hi , s .Stride , size , count )
884
- }
885
- print ("\t },\n " )
886
- if off := findLatinOffset (ranges ); off > 0 {
887
- printf ("\t LatinOffset: %d,\n " , off )
795
+ decl := fmt .Sprintf ("var _%s = &RangeTable {\n " , name )
796
+ runes := []rune {}
797
+ for _ , scr := range table [name ] {
798
+ for r := scr .lo ; r <= scr .hi ; r ++ {
799
+ runes = append (runes , rune (r ))
800
+ }
888
801
}
889
- print ( "} \n \n " )
802
+ printRangeTable ( decl , runes )
890
803
}
891
804
decl .Sort ()
892
805
println ("// These variables have type *RangeTable." )
@@ -897,14 +810,6 @@ func printScriptOrProperty(doProps bool) {
897
810
print (")\n \n " )
898
811
}
899
812
900
- func findLatinOffset (ranges []unicode.Range32 ) int {
901
- i := 0
902
- for i < len (ranges ) && ranges [i ].Hi <= unicode .MaxLatin1 {
903
- i ++
904
- }
905
- return i
906
- }
907
-
908
813
const (
909
814
CaseUpper = 1 << iota
910
815
CaseLower
@@ -1054,14 +959,10 @@ func printCases() {
1054
959
return
1055
960
}
1056
961
printf (
1057
- "// Generated by running\n " +
1058
- "// maketables --data=%s --casefolding=%s\n " +
1059
- "// DO NOT EDIT\n \n " +
1060
- "// CaseRanges is the table describing case mappings for all letters with\n " +
1061
- "// non-self mappings.\n " +
1062
- "var CaseRanges = _CaseRanges\n " +
1063
- "var _CaseRanges = []CaseRange {\n " ,
1064
- * dataURL , * casefoldingURL )
962
+ "// CaseRanges is the table describing case mappings for all letters with\n " +
963
+ "// non-self mappings.\n " +
964
+ "var CaseRanges = _CaseRanges\n " +
965
+ "var _CaseRanges = []CaseRange {\n " )
1065
966
1066
967
var startState * caseState // the start of a run; nil for not active
1067
968
var prevState = & caseState {} // the state of the previous character
0 commit comments