Skip to content

Commit e01ecd0

Browse files
Scheme: Fixed number pattern (#2648)
1 parent 05afbb1 commit e01ecd0

File tree

4 files changed

+150
-85
lines changed

4 files changed

+150
-85
lines changed

components/prism-scheme.js

Lines changed: 117 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1,78 +1,121 @@
1-
Prism.languages.scheme = {
2-
// this supports "normal" single-line comments:
3-
// ; comment
4-
// and (potentially nested) multiline comments:
5-
// #| comment #| nested |# still comment |#
6-
// (only 1 level of nesting is supported)
7-
'comment': /;.*|#;\s*\((?:[^()]|\([^()]*\))*\)|#\|(?:[^#|]|#(?!\|)|\|(?!#)|#\|(?:[^#|]|#(?!\|)|\|(?!#))*\|#)*\|#/,
8-
'string': {
9-
pattern: /"(?:[^"\\]|\\.)*"/,
10-
greedy: true
11-
},
12-
'symbol': {
13-
pattern: /'[^()#'\s]+/,
14-
greedy: true
15-
},
16-
'character': {
17-
pattern: /#\\(?:[ux][a-fA-F\d]+\b|[-a-zA-Z]+\b|\S)/,
18-
greedy: true,
19-
alias: 'string'
20-
},
21-
'lambda-parameter': [
22-
// https://www.cs.cmu.edu/Groups/AI/html/r4rs/r4rs_6.html#SEC30
23-
{
24-
pattern: /((?:^|[^'`#])\(lambda\s+)(?:[^|()'\s]+|\|(?:[^\\|]|\\.)*\|)/,
1+
(function (Prism) {
2+
Prism.languages.scheme = {
3+
// this supports "normal" single-line comments:
4+
// ; comment
5+
// and (potentially nested) multiline comments:
6+
// #| comment #| nested |# still comment |#
7+
// (only 1 level of nesting is supported)
8+
'comment': /;.*|#;\s*\((?:[^()]|\([^()]*\))*\)|#\|(?:[^#|]|#(?!\|)|\|(?!#)|#\|(?:[^#|]|#(?!\|)|\|(?!#))*\|#)*\|#/,
9+
'string': {
10+
pattern: /"(?:[^"\\]|\\.)*"/,
11+
greedy: true
12+
},
13+
'symbol': {
14+
pattern: /'[^()#'\s]+/,
15+
greedy: true
16+
},
17+
'character': {
18+
pattern: /#\\(?:[ux][a-fA-F\d]+\b|[-a-zA-Z]+\b|\S)/,
19+
greedy: true,
20+
alias: 'string'
21+
},
22+
'lambda-parameter': [
23+
// https://www.cs.cmu.edu/Groups/AI/html/r4rs/r4rs_6.html#SEC30
24+
{
25+
pattern: /((?:^|[^'`#])\(lambda\s+)(?:[^|()'\s]+|\|(?:[^\\|]|\\.)*\|)/,
26+
lookbehind: true
27+
},
28+
{
29+
pattern: /((?:^|[^'`#])\(lambda\s+\()[^()']+/,
30+
lookbehind: true
31+
}
32+
],
33+
'keyword': {
34+
pattern: /((?:^|[^'`#])\()(?:begin|case(?:-lambda)?|cond(?:-expand)?|define(?:-library|-macro|-record-type|-syntax|-values)?|defmacro|delay(?:-force)?|do|else|export|except|guard|if|import|include(?:-ci|-library-declarations)?|lambda|let(?:rec)?(?:-syntax|-values|\*)?|let\*-values|only|parameterize|prefix|(?:quasi-?)?quote|rename|set!|syntax-(?:case|rules)|unless|unquote(?:-splicing)?|when)(?=[()\s]|$)/,
35+
lookbehind: true
36+
},
37+
'builtin': {
38+
// all functions of the base library of R7RS plus some of built-ins of R5Rs
39+
pattern: /((?:^|[^'`#])\()(?:abs|and|append|apply|assoc|ass[qv]|binary-port\?|boolean=?\?|bytevector(?:-append|-copy|-copy!|-length|-u8-ref|-u8-set!|\?)?|caar|cadr|call-with-(?:current-continuation|port|values)|call\/cc|car|cdar|cddr|cdr|ceiling|char(?:->integer|-ready\?|\?|<\?|<=\?|=\?|>\?|>=\?)|close-(?:input-port|output-port|port)|complex\?|cons|current-(?:error|input|output)-port|denominator|dynamic-wind|eof-object\??|eq\?|equal\?|eqv\?|error|error-object(?:-irritants|-message|\?)|eval|even\?|exact(?:-integer-sqrt|-integer\?|\?)?|expt|features|file-error\?|floor(?:-quotient|-remainder|\/)?|flush-output-port|for-each|gcd|get-output-(?:bytevector|string)|inexact\??|input-port(?:-open\?|\?)|integer(?:->char|\?)|lcm|length|list(?:->string|->vector|-copy|-ref|-set!|-tail|\?)?|make-(?:bytevector|list|parameter|string|vector)|map|max|member|memq|memv|min|modulo|negative\?|newline|not|null\?|number(?:->string|\?)|numerator|odd\?|open-(?:input|output)-(?:bytevector|string)|or|output-port(?:-open\?|\?)|pair\?|peek-char|peek-u8|port\?|positive\?|procedure\?|quotient|raise|raise-continuable|rational\?|rationalize|read-(?:bytevector|bytevector!|char|error\?|line|string|u8)|real\?|remainder|reverse|round|set-c[ad]r!|square|string(?:->list|->number|->symbol|->utf8|->vector|-append|-copy|-copy!|-fill!|-for-each|-length|-map|-ref|-set!|\?|<\?|<=\?|=\?|>\?|>=\?)?|substring|symbol(?:->string|\?|=\?)|syntax-error|textual-port\?|truncate(?:-quotient|-remainder|\/)?|u8-ready\?|utf8->string|values|vector(?:->list|->string|-append|-copy|-copy!|-fill!|-for-each|-length|-map|-ref|-set!|\?)?|with-exception-handler|write-(?:bytevector|char|string|u8)|zero\?)(?=[()\s]|$)/,
40+
lookbehind: true
41+
},
42+
'operator': {
43+
pattern: /((?:^|[^'`#])\()(?:[-+*%/]|[<>]=?|=>?)(?=[()\s]|$)/,
2544
lookbehind: true
2645
},
27-
{
28-
pattern: /((?:^|[^'`#])\(lambda\s+\()[^()']+/,
46+
'number': {
47+
// The number pattern from [the R7RS spec](https://small.r7rs.org/attachment/r7rs.pdf).
48+
//
49+
// <number> := <num 2>|<num 8>|<num 10>|<num 16>
50+
// <num R> := <prefix R><complex R>
51+
// <complex R> := <real R>(?:@<real R>|<imaginary R>)?|<imaginary R>
52+
// <imaginary R> := [+-](?:<ureal R>|(?:inf|nan)\.0)?i
53+
// <real R> := [+-]?<ureal R>|[+-](?:inf|nan)\.0
54+
// <ureal R> := <uint R>(?:\/<uint R>)?
55+
// | <decimal R>
56+
//
57+
// <decimal 10> := (?:\d+(?:\.\d*)?|\.\d+)(?:e[+-]?\d+)?
58+
// <uint R> := <digit R>+
59+
// <prefix R> := <radix R>(?:#[ei])?|(?:#[ei])?<radix R>
60+
// <radix 2> := #b
61+
// <radix 8> := #o
62+
// <radix 10> := (?:#d)?
63+
// <radix 16> := #x
64+
// <digit 2> := [01]
65+
// <digit 8> := [0-7]
66+
// <digit 10> := \d
67+
// <digit 16> := [0-9a-f]
68+
//
69+
// The problem with this grammar is that the resulting regex is way to complex, so we simplify by grouping all
70+
// non-decimal bases together. This results in a decimal (dec) and combined binary, octal, and hexadecimal (box)
71+
// pattern:
72+
pattern: RegExp(SortedBNF({
73+
'<ureal dec>': /\d+(?:\/\d+)?|(?:\d+(?:\.\d*)?|\.\d+)(?:e[+-]?\d+)?/.source,
74+
'<real dec>': /[+-]?<ureal dec>|[+-](?:inf|nan)\.0/.source,
75+
'<imaginary dec>': /[+-](?:<ureal dec>|(?:inf|nan)\.0)?i/.source,
76+
'<complex dec>': /<real dec>(?:@<real dec>|<imaginary dec>)?|<imaginary dec>/.source,
77+
'<num dec>': /(?:#d(?:#[ei])?|#[ei](?:#d)?)?<complex dec>/.source,
78+
79+
'<ureal box>': /[0-9a-f]+(?:\/[0-9a-f]+)?/.source,
80+
'<real box>': /[+-]?<ureal box>|[+-](?:inf|nan)\.0/.source,
81+
'<imaginary box>': /[+-](?:<ureal box>|(?:inf|nan)\.0)?i/.source,
82+
'<complex box>': /<real box>(?:@<real box>|<imaginary box>)?|<imaginary box>/.source,
83+
'<num box>': /#[box](?:#[ei])?|(?:#[ei])?#[box]<complex box>/.source,
84+
85+
'<number>': /(^|[\s()])(?:<num dec>|<num box>)(?=[()\s]|$)/.source,
86+
}), 'i'),
2987
lookbehind: true
88+
},
89+
'boolean': {
90+
pattern: /(^|[\s()])#(?:[ft]|false|true)(?=[()\s]|$)/,
91+
lookbehind: true
92+
},
93+
'function': {
94+
pattern: /((?:^|[^'`#])\()(?:[^|()'\s]+|\|(?:[^\\|]|\\.)*\|)(?=[()\s]|$)/,
95+
lookbehind: true
96+
},
97+
'identifier': {
98+
pattern: /(^|[\s()])\|(?:[^\\|]|\\.)*\|(?=[()\s]|$)/,
99+
lookbehind: true,
100+
greedy: true
101+
},
102+
'punctuation': /[()']/
103+
};
104+
105+
/**
106+
* Given a topologically sorted BNF grammar, this will return the RegExp source of last rule of the grammar.
107+
*
108+
* @param {Record<string, string>} grammar
109+
* @returns {string}
110+
*/
111+
function SortedBNF(grammar) {
112+
for (var key in grammar) {
113+
grammar[key] = grammar[key].replace(/<[\w\s]+>/g, function (key) {
114+
return '(?:' + grammar[key].trim() + ')';
115+
});
30116
}
31-
],
32-
'keyword': {
33-
pattern: /((?:^|[^'`#])\()(?:begin|case(?:-lambda)?|cond(?:-expand)?|define(?:-library|-macro|-record-type|-syntax|-values)?|defmacro|delay(?:-force)?|do|else|export|except|guard|if|import|include(?:-ci|-library-declarations)?|lambda|let(?:rec)?(?:-syntax|-values|\*)?|let\*-values|only|parameterize|prefix|(?:quasi-?)?quote|rename|set!|syntax-(?:case|rules)|unless|unquote(?:-splicing)?|when)(?=[()\s]|$)/,
34-
lookbehind: true
35-
},
36-
'builtin': {
37-
// all functions of the base library of R7RS plus some of built-ins of R5Rs
38-
pattern: /((?:^|[^'`#])\()(?:abs|and|append|apply|assoc|ass[qv]|binary-port\?|boolean=?\?|bytevector(?:-append|-copy|-copy!|-length|-u8-ref|-u8-set!|\?)?|caar|cadr|call-with-(?:current-continuation|port|values)|call\/cc|car|cdar|cddr|cdr|ceiling|char(?:->integer|-ready\?|\?|<\?|<=\?|=\?|>\?|>=\?)|close-(?:input-port|output-port|port)|complex\?|cons|current-(?:error|input|output)-port|denominator|dynamic-wind|eof-object\??|eq\?|equal\?|eqv\?|error|error-object(?:-irritants|-message|\?)|eval|even\?|exact(?:-integer-sqrt|-integer\?|\?)?|expt|features|file-error\?|floor(?:-quotient|-remainder|\/)?|flush-output-port|for-each|gcd|get-output-(?:bytevector|string)|inexact\??|input-port(?:-open\?|\?)|integer(?:->char|\?)|lcm|length|list(?:->string|->vector|-copy|-ref|-set!|-tail|\?)?|make-(?:bytevector|list|parameter|string|vector)|map|max|member|memq|memv|min|modulo|negative\?|newline|not|null\?|number(?:->string|\?)|numerator|odd\?|open-(?:input|output)-(?:bytevector|string)|or|output-port(?:-open\?|\?)|pair\?|peek-char|peek-u8|port\?|positive\?|procedure\?|quotient|raise|raise-continuable|rational\?|rationalize|read-(?:bytevector|bytevector!|char|error\?|line|string|u8)|real\?|remainder|reverse|round|set-c[ad]r!|square|string(?:->list|->number|->symbol|->utf8|->vector|-append|-copy|-copy!|-fill!|-for-each|-length|-map|-ref|-set!|\?|<\?|<=\?|=\?|>\?|>=\?)?|substring|symbol(?:->string|\?|=\?)|syntax-error|textual-port\?|truncate(?:-quotient|-remainder|\/)?|u8-ready\?|utf8->string|values|vector(?:->list|->string|-append|-copy|-copy!|-fill!|-for-each|-length|-map|-ref|-set!|\?)?|with-exception-handler|write-(?:bytevector|char|string|u8)|zero\?)(?=[()\s]|$)/,
39-
lookbehind: true
40-
},
41-
'operator': {
42-
pattern: /((?:^|[^'`#])\()(?:[-+*%/]|[<>]=?|=>?)(?=[()\s]|$)/,
43-
lookbehind: true
44-
},
45-
'number': {
46-
// This pattern (apart from the lookarounds) works like this:
47-
//
48-
// Decimal numbers
49-
// <dec real> := \d*\.?\d+(?:[eE][+-]?\d+)?|\d+\/\d+
50-
// <dec complex> := <dec real>(?:[+-]<dec real>i)?|<dec real>i
51-
// <dec prefix> := (?:#d(?:#[ei])?|#[ei](?:#d)?)?
52-
// <dec number> := <dec prefix>[+-]?<complex>
53-
//
54-
// Binary, octal, and hexadecimal numbers
55-
// <b.o.x. real> := [\da-fA-F]+(?:\/[\da-fA-F]+)?
56-
// <b.o.x. complex> := <b.o.x. real>(?:[+-]<b.o.x. real>i)?|<b.o.x. real>i
57-
// <b.o.x. prefix> := #[box](?:#[ei])?|#[ei](?:#[box])?
58-
// <b.o.x. number> := <b.o.x. prefix>[+-]?<b.o.x. complex>
59-
//
60-
// <number> := <dec number>|<b.o.x. number>
61-
pattern: /(^|[\s()])(?:(?:#d(?:#[ei])?|#[ei](?:#d)?)?[+-]?(?:(?:\d*\.?\d+(?:[eE][+-]?\d+)?|\d+\/\d+)(?:[+-](?:\d*\.?\d+(?:[eE][+-]?\d+)?|\d+\/\d+)i)?|(?:\d*\.?\d+(?:[eE][+-]?\d+)?|\d+\/\d+)i)|(?:#[box](?:#[ei])?|#[ei](?:#[box])?)[+-]?(?:[\da-fA-F]+(?:\/[\da-fA-F]+)?(?:[+-][\da-fA-F]+(?:\/[\da-fA-F]+)?i)?|[\da-fA-F]+(?:\/[\da-fA-F]+)?i))(?=[()\s]|$)/,
62-
lookbehind: true
63-
},
64-
'boolean': {
65-
pattern: /(^|[\s()])#(?:[ft]|false|true)(?=[()\s]|$)/,
66-
lookbehind: true
67-
},
68-
'function': {
69-
pattern: /((?:^|[^'`#])\()(?:[^|()'\s]+|\|(?:[^\\|]|\\.)*\|)(?=[()\s]|$)/,
70-
lookbehind: true
71-
},
72-
'identifier': {
73-
pattern: /(^|[\s()])\|(?:[^\\|]|\\.)*\|(?=[()\s]|$)/,
74-
lookbehind: true,
75-
greedy: true
76-
},
77-
'punctuation': /[()']/
78-
};
117+
// return the last item
118+
return grammar[key];
119+
}
120+
121+
})(Prism);

components/prism-scheme.min.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/languages/racket/number_feature.test

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,21 @@
1+
123
2+
13
(foo 42 +42 -42)
24
(foo 1e3 +1e3 -1e3)
35
(foo 1e+3 1e-3 3.14159 3.14159e-1)
46
(foo 8/3)
57
(foo 3+4i 2.5+0.0i 2.5+0.0i -2.5e4+0.0e4i 3+0i -2e-5i)
6-
(list 10i +10i -10i 10.10i 10+10i 10.10+10.10i 10-10i 10e+10i 10+10e+10i)
8+
(list +10i -10i 10+10i 10.10+10.10i 10-10i 10+10e+10i)
79

810
(list #d123 #e#d123e-4 #d#i12 #i-1.234i)
911

1012
(list #xBAD #b1110011 #o777)
1113
(list #i#x10 #i#x10+10i #b10+10i)
1214

15+
10+i
16+
10+.1i
17+
10+1.i
18+
1319
; not a number but a symbol
1420
(define 1+2 10)
1521

@@ -19,6 +25,8 @@
1925
----------------------------------------------------
2026

2127
[
28+
["number", "123"],
29+
2230
["punctuation", "("],
2331
["function", "foo"],
2432
["number", "42"],
@@ -58,14 +66,11 @@
5866

5967
["punctuation", "("],
6068
["builtin", "list"],
61-
["number", "10i"],
6269
["number", "+10i"],
6370
["number", "-10i"],
64-
["number", "10.10i"],
6571
["number", "10+10i"],
6672
["number", "10.10+10.10i"],
6773
["number", "10-10i"],
68-
["number", "10e+10i"],
6974
["number", "10+10e+10i"],
7075
["punctuation", ")"],
7176

@@ -91,8 +96,16 @@
9196
["number", "#b10+10i"],
9297
["punctuation", ")"],
9398

99+
["number", "10+i"],
100+
["number", "10+.1i"],
101+
["number", "10+1.i"],
102+
94103
["comment", "; not a number but a symbol"],
95-
["punctuation", "("], ["keyword", "define"], " 1+2 ", ["number", "10"], ["punctuation", ")"],
104+
["punctuation", "("],
105+
["keyword", "define"],
106+
" 1+2 ",
107+
["number", "10"],
108+
["punctuation", ")"],
96109

97110
["punctuation", "["],
98111
["function", "foo"],

0 commit comments

Comments
 (0)