Skip to content

Commit 2afe668

Browse files
authored
Check string literals against regular expressions (#511)
Add string-format rule
1 parent 8635ef9 commit 2afe668

File tree

7 files changed

+448
-1
lines changed

7 files changed

+448
-1
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,7 @@ List of all available rules. The rules ported from `golint` are left unchanged a
357357
| [`unhandled-error`](./RULES_DESCRIPTIONS.md#unhandled-error) | []string | Warns on unhandled errors returned by funcion calls | no | yes |
358358
| [`cognitive-complexity`](./RULES_DESCRIPTIONS.md#cognitive-complexity) | int | Sets restriction for maximum Cognitive complexity. | no | no |
359359
| [`string-of-int`](./RULES_DESCRIPTIONS.md#string-of-int) | n/a | Warns on suspicious casts from int to string | no | yes |
360+
| [`string-format`](./RULES_DESCRIPTIONS.md#string-format) | map | Warns on specific string literals that fail one or more user-configured regular expressions | no | no |
360361
| [`early-return`](./RULES_DESCRIPTIONS.md#early-return) | n/a | Spots if-then-else statements that can be refactored to simplify code reading | no | no |
361362
| [`unconditional-recursion`](./RULES_DESCRIPTIONS.md#unconditional-recursion) | n/a | Warns on function calls that will lead to (direct) infinite recursion | no | no |
362363
| [`identical-branches`](./RULES_DESCRIPTIONS.md#identical-branches) | n/a | Spots if-then-else statements with identical `then` and `else` branches | no | no |

RULES_DESCRIPTIONS.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ List of all available rules.
5353
- [redefines-builtin-id](#redefines-builtin-id)
5454
- [string-of-int](#string-of-int)
5555
- [struct-tag](#struct-tag)
56+
- [string-format](#string-format)
5657
- [superfluous-else](#superfluous-else)
5758
- [time-naming](#time-naming)
5859
- [var-naming](#var-naming)
@@ -488,6 +489,29 @@ _Description_: explicit type conversion `string(i)` where `i` has an integer ty
488489

489490
_Configuration_: N/A
490491

492+
## string-format
493+
494+
_Description_: This rule allows you to configure a list of regular expressions that string literals in certain function calls are checked against.
495+
This is geared towards user facing applications where string literals are often used for messages that will be presented to users, so it may be desirable to enforce consistent formatting.
496+
497+
_Configuration_: Each argument is a slice containing 2-3 strings: a scope, a regex, and an optional error message.
498+
499+
1. The first string defines a scope. This controls which string literals the regex will apply to, and is defined as a function argument. It must contain at least a function name (`core.WriteError`). Scopes may optionally contain a number specifying which argument in the function to check (`core.WriteError[1]`), as well as a struct field (`core.WriteError[1].Message`, only works for top level fields). Function arguments are counted starting at 0, so `[0]` would refer to the first argument, `[1]` would refer to the second, etc. If no argument number is provided, the first argument will be used (same as `[0]`).
500+
501+
2. The second string is a regular expression (beginning and ending with a `/` character), which will be used to check the string literals in the scope.
502+
503+
3. The third string (optional) is a message containing the purpose for the regex, which will be used in lint errors.
504+
505+
Example:
506+
507+
```toml
508+
[rule.string-format]
509+
arguments = [
510+
["core.WriteError[1].Message", "/^([^A-Z]|$)/", "must not start with a capital letter"],
511+
["fmt.Errorf[0]", "/(^|[^\\.!?])$/", "must not end in punctuation"],
512+
["panic", "/^[^\\n]*$/", "must not contain line breaks"]]
513+
```
514+
491515
## struct-tag
492516

493517
_Description_: Struct tags are not checked at compile time.

config/config.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ var allRules = append([]lint.Rule{
7272
&rule.UnhandledErrorRule{},
7373
&rule.CognitiveComplexityRule{},
7474
&rule.StringOfIntRule{},
75+
&rule.StringFormatRule{},
7576
&rule.EarlyReturnRule{},
7677
&rule.UnconditionalRecursionRule{},
7778
&rule.IdenticalBranchesRule{},

go.sum

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h
4444
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
4545
golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
4646
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
47-
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f h1:+Nyd8tzPX9R7BWHguqsrbFdRx3WQ/1ib8I44HXV5yTA=
4847
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
4948
golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4 h1:myAQVi0cGEoqQVR5POX+8RR2mrocKqNN1hmeMqhX27k=
5049
golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=

rule/string-format.go

Lines changed: 281 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,281 @@
1+
package rule
2+
3+
import (
4+
"fmt"
5+
"go/ast"
6+
"go/token"
7+
"regexp"
8+
"strconv"
9+
10+
"github.com/mgechev/revive/lint"
11+
)
12+
13+
// #region Revive API
14+
15+
// StringFormatRule lints strings and/or comments according to a set of regular expressions given as Arguments
16+
type StringFormatRule struct{}
17+
18+
// Apply applies the rule to the given file.
19+
func (r *StringFormatRule) Apply(file *lint.File, arguments lint.Arguments) []lint.Failure {
20+
var failures []lint.Failure
21+
22+
onFailure := func(failure lint.Failure) {
23+
failures = append(failures, failure)
24+
}
25+
26+
w := lintStringFormatRule{onFailure: onFailure}
27+
w.parseArguments(arguments)
28+
ast.Walk(w, file.AST)
29+
30+
return failures
31+
}
32+
33+
func (r *StringFormatRule) Name() string {
34+
return "string-format"
35+
}
36+
37+
// Public wrapper around w.parseArguments used for testing, returns the error message provided to panic, or nil if no error was encountered
38+
func (r *StringFormatRule) ParseArgumentsTest(arguments lint.Arguments) *string {
39+
w := lintStringFormatRule{}
40+
c := make(chan interface{})
41+
// Parse the arguments in a goroutine, defer a recover() call, return the error encountered (or nil if there was no error)
42+
go func() {
43+
defer func() {
44+
err := recover()
45+
c <- err
46+
}()
47+
w.parseArguments(arguments)
48+
}()
49+
err := <-c
50+
if err != nil {
51+
e := fmt.Sprintf("%s", err)
52+
return &e
53+
}
54+
return nil
55+
}
56+
57+
// #endregion
58+
59+
// #region Internal structure
60+
61+
type lintStringFormatRule struct {
62+
onFailure func(lint.Failure)
63+
64+
rules []stringFormatSubrule
65+
stringDeclarations map[string]string
66+
}
67+
68+
type stringFormatSubrule struct {
69+
parent *lintStringFormatRule
70+
scope stringFormatSubruleScope
71+
regexp *regexp.Regexp
72+
errorMessage string
73+
}
74+
75+
type stringFormatSubruleScope struct {
76+
funcName string // Function name the rule is scoped to
77+
argument int // (optional) Which argument in calls to the function is checked against the rule (the first argument is checked by default)
78+
field string // (optional) If the argument to be checked is a struct, which member of the struct is checked against the rule (top level members only)
79+
}
80+
81+
// Regex inserted to match valid function/struct field identifiers
82+
const identRegex = "[_A-Za-z][_A-Za-z0-9]*"
83+
84+
var parseStringFormatScope = regexp.MustCompile(
85+
fmt.Sprintf("^(%s(?:\\.%s)?)(?:\\[([0-9]+)\\](?:\\.(%s))?)?$", identRegex, identRegex, identRegex))
86+
87+
// #endregion
88+
89+
// #region Argument parsing
90+
91+
func (w *lintStringFormatRule) parseArguments(arguments lint.Arguments) {
92+
for i, argument := range arguments {
93+
scope, regex, errorMessage := w.parseArgument(argument, i)
94+
w.rules = append(w.rules, stringFormatSubrule{
95+
parent: w,
96+
scope: scope,
97+
regexp: regex,
98+
errorMessage: errorMessage,
99+
})
100+
}
101+
}
102+
103+
func (w lintStringFormatRule) parseArgument(argument interface{}, ruleNum int) (scope stringFormatSubruleScope, regex *regexp.Regexp, errorMessage string) {
104+
g, ok := argument.([]interface{}) // Cast to generic slice first
105+
if !ok {
106+
w.configError("argument is not a slice", ruleNum, 0)
107+
}
108+
if len(g) < 2 {
109+
w.configError("less than two slices found in argument, scope and regex are required", ruleNum, len(g)-1)
110+
}
111+
rule := make([]string, len(g))
112+
for i, obj := range g {
113+
val, ok := obj.(string)
114+
if !ok {
115+
w.configError("unexpected value, string was expected", ruleNum, i)
116+
}
117+
rule[i] = val
118+
}
119+
120+
// Validate scope and regex length
121+
if len(rule[0]) == 0 {
122+
w.configError("empty scope provided", ruleNum, 0)
123+
} else if len(rule[1]) < 2 {
124+
w.configError("regex is too small (regexes should begin and end with '/')", ruleNum, 1)
125+
}
126+
127+
// Parse rule scope
128+
scope = stringFormatSubruleScope{}
129+
matches := parseStringFormatScope.FindStringSubmatch(rule[0])
130+
if matches == nil {
131+
// The rule's scope didn't match the parsing regex at all, probably a configuration error
132+
w.parseError("unable to parse rule scope", ruleNum, 0)
133+
} else if len(matches) != 4 {
134+
// The rule's scope matched the parsing regex, but an unexpected number of submatches was returned, probably a bug
135+
w.parseError(fmt.Sprintf("unexpected number of submatches when parsing scope: %d, expected 4", len(matches)), ruleNum, 0)
136+
}
137+
scope.funcName = matches[1]
138+
if len(matches[2]) > 0 {
139+
var err error
140+
scope.argument, err = strconv.Atoi(matches[2])
141+
if err != nil {
142+
w.parseError("unable to parse argument number in rule scope", ruleNum, 0)
143+
}
144+
}
145+
if len(matches[3]) > 0 {
146+
scope.field = matches[3]
147+
}
148+
149+
// Strip / characters from the beginning and end of rule[1] before compiling
150+
regex, err := regexp.Compile(rule[1][1 : len(rule[1])-1])
151+
if err != nil {
152+
w.parseError(fmt.Sprintf("unable to compile %s as regexp", rule[1]), ruleNum, 1)
153+
}
154+
155+
// Use custom error message if provided
156+
if len(rule) == 3 {
157+
errorMessage = rule[2]
158+
}
159+
return scope, regex, errorMessage
160+
}
161+
162+
// Report an invalid config, this is specifically the user's fault
163+
func (w lintStringFormatRule) configError(msg string, ruleNum, option int) {
164+
panic(fmt.Sprintf("invalid configuration for string-format: %s [argument %d, option %d]", msg, ruleNum, option))
165+
}
166+
167+
// Report a general config parsing failure, this may be the user's fault, but it isn't known for certain
168+
func (w lintStringFormatRule) parseError(msg string, ruleNum, option int) {
169+
panic(fmt.Sprintf("failed to parse configuration for string-format: %s [argument %d, option %d]", msg, ruleNum, option))
170+
}
171+
172+
// #endregion
173+
174+
// #region Node traversal
175+
176+
func (w lintStringFormatRule) Visit(node ast.Node) ast.Visitor {
177+
// First, check if node is a call expression
178+
call, ok := node.(*ast.CallExpr)
179+
if !ok {
180+
return w
181+
}
182+
183+
// Get the name of the call expression to check against rule scope
184+
callName, ok := w.getCallName(call)
185+
if !ok {
186+
return w
187+
}
188+
189+
for _, rule := range w.rules {
190+
if rule.scope.funcName == callName {
191+
rule.Apply(call)
192+
}
193+
}
194+
195+
return w
196+
}
197+
198+
// Return the name of a call expression in the form of package.Func or Func
199+
func (w lintStringFormatRule) getCallName(call *ast.CallExpr) (callName string, ok bool) {
200+
if ident, ok := call.Fun.(*ast.Ident); ok {
201+
// Local function call
202+
return ident.Name, true
203+
}
204+
205+
if selector, ok := call.Fun.(*ast.SelectorExpr); ok {
206+
// Scoped function call
207+
scope, ok := selector.X.(*ast.Ident)
208+
if !ok {
209+
return "", false
210+
}
211+
return scope.Name + "." + selector.Sel.Name, true
212+
}
213+
214+
return "", false
215+
}
216+
217+
// #endregion
218+
219+
// #region Linting logic
220+
221+
// Apply a single format rule to a call expression (should be done after verifying the that the call expression matches the rule's scope)
222+
func (rule stringFormatSubrule) Apply(call *ast.CallExpr) {
223+
if len(call.Args) <= rule.scope.argument {
224+
return
225+
}
226+
227+
arg := call.Args[rule.scope.argument]
228+
var lit *ast.BasicLit
229+
if len(rule.scope.field) > 0 {
230+
// Try finding the scope's Field, treating arg as a composite literal
231+
composite, ok := arg.(*ast.CompositeLit)
232+
if !ok {
233+
return
234+
}
235+
for _, el := range composite.Elts {
236+
kv, ok := el.(*ast.KeyValueExpr)
237+
if !ok {
238+
continue
239+
}
240+
key, ok := kv.Key.(*ast.Ident)
241+
if !ok || key.Name != rule.scope.field {
242+
continue
243+
}
244+
245+
// We're now dealing with the exact field in the rule's scope, so if anything fails, we can safely return instead of continuing the loop
246+
lit, ok = kv.Value.(*ast.BasicLit)
247+
if !ok || lit.Kind != token.STRING {
248+
return
249+
}
250+
}
251+
} else {
252+
var ok bool
253+
// Treat arg as a string literal
254+
lit, ok = arg.(*ast.BasicLit)
255+
if !ok || lit.Kind != token.STRING {
256+
return
257+
}
258+
}
259+
// Unquote the string literal before linting
260+
unquoted := lit.Value[1 : len(lit.Value)-1]
261+
rule.lintMessage(unquoted, lit)
262+
}
263+
264+
func (rule stringFormatSubrule) lintMessage(s string, node ast.Node) {
265+
// Fail if the string doesn't match the user's regex
266+
if rule.regexp.MatchString(s) {
267+
return
268+
}
269+
var failure string
270+
if len(rule.errorMessage) > 0 {
271+
failure = rule.errorMessage
272+
} else {
273+
failure = fmt.Sprintf("string literal doesn't match user defined regex /%s/", rule.regexp.String())
274+
}
275+
rule.parent.onFailure(lint.Failure{
276+
Confidence: 1,
277+
Failure: failure,
278+
Node: node})
279+
}
280+
281+
// #endregion

0 commit comments

Comments
 (0)