Skip to content

Commit c9c7afd

Browse files
Add sanitizer rules per renderer (#16110)
* Added sanitizer rules per renderer. * Updated documentation. Co-authored-by: techknowlogick <[email protected]>
1 parent eb324a9 commit c9c7afd

File tree

10 files changed

+215
-113
lines changed

10 files changed

+215
-113
lines changed

docs/content/doc/advanced/config-cheat-sheet.en-us.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -907,13 +907,17 @@ Gitea supports customizing the sanitization policy for rendered HTML. The exampl
907907
ELEMENT = span
908908
ALLOW_ATTR = class
909909
REGEXP = ^\s*((math(\s+|$)|inline(\s+|$)|display(\s+|$)))+
910+
ALLOW_DATA_URI_IMAGES = true
910911
```
911912

912913
- `ELEMENT`: The element this policy applies to. Must be non-empty.
913914
- `ALLOW_ATTR`: The attribute this policy allows. Must be non-empty.
914915
- `REGEXP`: A regex to match the contents of the attribute against. Must be present but may be empty for unconditional whitelisting of this attribute.
916+
- `ALLOW_DATA_URI_IMAGES`: **false** Allow data uri images (`<img src="data:image/png;base64,..."/>`).
915917

916918
Multiple sanitisation rules can be defined by adding unique subsections, e.g. `[markup.sanitizer.TeX-2]`.
919+
To apply a sanitisation rules only for a specify external renderer they must use the renderer name, e.g. `[markup.sanitizer.asciidoc.rule-1]`.
920+
If the rule is defined above the renderer ini section or the name does not match a renderer it is applied to every renderer.
917921

918922
## Time (`time`)
919923

docs/content/doc/advanced/external-renderers.en-us.md

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,8 @@ IS_INPUT_FILE = false
6464
[markup.jupyter]
6565
ENABLED = true
6666
FILE_EXTENSIONS = .ipynb
67-
RENDER_COMMAND = "jupyter nbconvert --stdout --to html --template basic "
68-
IS_INPUT_FILE = true
67+
RENDER_COMMAND = "jupyter nbconvert --stdin --stdout --to html --template basic"
68+
IS_INPUT_FILE = false
6969

7070
[markup.restructuredtext]
7171
ENABLED = true
@@ -90,15 +90,50 @@ FILE_EXTENSIONS = .md,.markdown
9090
RENDER_COMMAND = pandoc -f markdown -t html --katex
9191
```
9292

93-
You must define `ELEMENT`, `ALLOW_ATTR`, and `REGEXP` in each section.
93+
You must define `ELEMENT` and `ALLOW_ATTR` in each section.
9494

9595
To define multiple entries, add a unique alphanumeric suffix (e.g., `[markup.sanitizer.1]` and `[markup.sanitizer.something]`).
9696

97+
To apply a sanitisation rules only for a specify external renderer they must use the renderer name, e.g. `[markup.sanitizer.asciidoc.rule-1]`, `[markup.sanitizer.<renderer>.rule-1]`.
98+
99+
**Note**: If the rule is defined above the renderer ini section or the name does not match a renderer it is applied to every renderer.
100+
97101
Once your configuration changes have been made, restart Gitea to have changes take effect.
98102

99103
**Note**: Prior to Gitea 1.12 there was a single `markup.sanitiser` section with keys that were redefined for multiple rules, however,
100104
there were significant problems with this method of configuration necessitating configuration through multiple sections.
101105

106+
### Example: Office DOCX
107+
108+
Display Office DOCX files with [`pandoc`](https://pandoc.org/):
109+
```ini
110+
[markup.docx]
111+
ENABLED = true
112+
FILE_EXTENSIONS = .docx
113+
RENDER_COMMAND = "pandoc --from docx --to html --self-contained --template /path/to/basic.html"
114+
115+
[markup.sanitizer.docx.img]
116+
ALLOW_DATA_URI_IMAGES = true
117+
```
118+
119+
The template file has the following content:
120+
```
121+
$body$
122+
```
123+
124+
### Example: Jupyter Notebook
125+
126+
Display Jupyter Notebook files with [`nbconvert`](https://github.com/jupyter/nbconvert):
127+
```ini
128+
[markup.jupyter]
129+
ENABLED = true
130+
FILE_EXTENSIONS = .ipynb
131+
RENDER_COMMAND = "jupyter-nbconvert --stdin --stdout --to html --template basic"
132+
133+
[markup.sanitizer.jupyter.img]
134+
ALLOW_DATA_URI_IMAGES = true
135+
```
136+
102137
## Customizing CSS
103138
The external renderer is specified in the .ini in the format `[markup.XXXXX]` and the HTML supplied by your external renderer will be wrapped in a `<div>` with classes `markup` and `XXXXX`. The `markup` class provides out of the box styling (as does `markdown` if `XXXXX` is `markdown`). Otherwise you can use these classes to specifically target the contents of your rendered HTML.
104139

modules/markup/csv/csv.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"html"
1111
"io"
1212
"io/ioutil"
13+
"regexp"
1314
"strconv"
1415

1516
"code.gitea.io/gitea/modules/csv"
@@ -38,6 +39,15 @@ func (Renderer) Extensions() []string {
3839
return []string{".csv", ".tsv"}
3940
}
4041

42+
// SanitizerRules implements markup.Renderer
43+
func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule {
44+
return []setting.MarkupSanitizerRule{
45+
{Element: "table", AllowAttr: "class", Regexp: regexp.MustCompile(`data-table`)},
46+
{Element: "th", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)},
47+
{Element: "td", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)},
48+
}
49+
}
50+
4151
func writeField(w io.Writer, element, class, field string) error {
4252
if _, err := io.WriteString(w, "<"); err != nil {
4353
return err

modules/markup/external/external.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ func RegisterRenderers() {
3030

3131
// Renderer implements markup.Renderer for external tools
3232
type Renderer struct {
33-
setting.MarkupRenderer
33+
*setting.MarkupRenderer
3434
}
3535

3636
// Name returns the external tool name
@@ -48,6 +48,11 @@ func (p *Renderer) Extensions() []string {
4848
return p.FileExtensions
4949
}
5050

51+
// SanitizerRules implements markup.Renderer
52+
func (p *Renderer) SanitizerRules() []setting.MarkupSanitizerRule {
53+
return p.MarkupSanitizerRules
54+
}
55+
5156
func envMark(envName string) string {
5257
if runtime.GOOS == "windows" {
5358
return "%" + envName + "%"

modules/markup/html_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ func TestRender_links(t *testing.T) {
112112

113113
defaultCustom := setting.Markdown.CustomURLSchemes
114114
setting.Markdown.CustomURLSchemes = []string{"ftp", "magnet"}
115-
ReplaceSanitizer()
115+
InitializeSanitizer()
116116
CustomLinkURLSchemes(setting.Markdown.CustomURLSchemes)
117117

118118
test(
@@ -192,7 +192,7 @@ func TestRender_links(t *testing.T) {
192192

193193
// Restore previous settings
194194
setting.Markdown.CustomURLSchemes = defaultCustom
195-
ReplaceSanitizer()
195+
InitializeSanitizer()
196196
CustomLinkURLSchemes(setting.Markdown.CustomURLSchemes)
197197
}
198198

modules/markup/markdown/markdown.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ func actualRender(ctx *markup.RenderContext, input io.Reader, output io.Writer)
199199
}
200200
_ = lw.Close()
201201
}()
202-
buf := markup.SanitizeReader(rd)
202+
buf := markup.SanitizeReader(rd, "")
203203
_, err := io.Copy(output, buf)
204204
return err
205205
}
@@ -215,7 +215,7 @@ func render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error
215215
if log.IsDebug() {
216216
log.Debug("Panic in markdown: %v\n%s", err, string(log.Stack(2)))
217217
}
218-
ret := markup.SanitizeReader(input)
218+
ret := markup.SanitizeReader(input, "")
219219
_, err = io.Copy(output, ret)
220220
if err != nil {
221221
log.Error("SanitizeReader failed: %v", err)
@@ -249,6 +249,11 @@ func (Renderer) Extensions() []string {
249249
return setting.Markdown.FileExtensions
250250
}
251251

252+
// SanitizerRules implements markup.Renderer
253+
func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule {
254+
return []setting.MarkupSanitizerRule{}
255+
}
256+
252257
// Render implements markup.Renderer
253258
func (Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error {
254259
return render(ctx, input, output)

modules/markup/orgmode/orgmode.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313

1414
"code.gitea.io/gitea/modules/highlight"
1515
"code.gitea.io/gitea/modules/markup"
16+
"code.gitea.io/gitea/modules/setting"
1617
"code.gitea.io/gitea/modules/util"
1718

1819
"github.com/alecthomas/chroma"
@@ -41,6 +42,11 @@ func (Renderer) Extensions() []string {
4142
return []string{".org"}
4243
}
4344

45+
// SanitizerRules implements markup.Renderer
46+
func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule {
47+
return []setting.MarkupSanitizerRule{}
48+
}
49+
4450
// Render renders orgmode rawbytes to HTML
4551
func Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error {
4652
htmlWriter := org.NewHTMLWriter()

modules/markup/renderer.go

Lines changed: 26 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ type Renderer interface {
8181
Name() string // markup format name
8282
Extensions() []string
8383
NeedPostProcess() bool
84+
SanitizerRules() []setting.MarkupSanitizerRule
8485
Render(ctx *RenderContext, input io.Reader, output io.Writer) error
8586
}
8687

@@ -136,37 +137,32 @@ func render(ctx *RenderContext, renderer Renderer, input io.Reader, output io.Wr
136137
_ = pw.Close()
137138
}()
138139

139-
if renderer.NeedPostProcess() {
140-
pr2, pw2 := io.Pipe()
141-
defer func() {
142-
_ = pr2.Close()
143-
_ = pw2.Close()
144-
}()
145-
146-
wg.Add(1)
147-
go func() {
148-
buf := SanitizeReader(pr2)
149-
_, err = io.Copy(output, buf)
150-
_ = pr2.Close()
151-
wg.Done()
152-
}()
153-
154-
wg.Add(1)
155-
go func() {
140+
pr2, pw2 := io.Pipe()
141+
defer func() {
142+
_ = pr2.Close()
143+
_ = pw2.Close()
144+
}()
145+
146+
wg.Add(1)
147+
go func() {
148+
buf := SanitizeReader(pr2, renderer.Name())
149+
_, err = io.Copy(output, buf)
150+
_ = pr2.Close()
151+
wg.Done()
152+
}()
153+
154+
wg.Add(1)
155+
go func() {
156+
if renderer.NeedPostProcess() {
156157
err = PostProcess(ctx, pr, pw2)
157-
_ = pr.Close()
158-
_ = pw2.Close()
159-
wg.Done()
160-
}()
161-
} else {
162-
wg.Add(1)
163-
go func() {
164-
buf := SanitizeReader(pr)
165-
_, err = io.Copy(output, buf)
166-
_ = pr.Close()
167-
wg.Done()
168-
}()
169-
}
158+
} else {
159+
_, err = io.Copy(pw2, pr)
160+
}
161+
_ = pr.Close()
162+
_ = pw2.Close()
163+
wg.Done()
164+
}()
165+
170166
if err1 := renderer.Render(ctx, input, pw); err1 != nil {
171167
return err1
172168
}

0 commit comments

Comments
 (0)