File tree 2 files changed +64
-6
lines changed
material/plugins/blog/readtime
src/plugins/blog/readtime
2 files changed +64
-6
lines changed Original file line number Diff line number Diff line change 20
20
21
21
from html .parser import HTMLParser
22
22
23
+ # TODO: Refactor the `void` set into a common module and import it from there
24
+ # and not from the search plugin.
25
+ from material .plugins .search .plugin import void
26
+
23
27
# -----------------------------------------------------------------------------
24
28
# Classes
25
29
# -----------------------------------------------------------------------------
@@ -31,15 +35,40 @@ class ReadtimeParser(HTMLParser):
31
35
def __init__ (self ):
32
36
super ().__init__ (convert_charrefs = True )
33
37
38
+ # Tags to skip
39
+ self .skip = set ([
40
+ "object" , # Objects
41
+ "script" , # Scripts
42
+ "style" , # Styles
43
+ "svg" # SVGs
44
+ ])
45
+
46
+ # Current context
47
+ self .context = []
48
+
34
49
# Keep track of text and images
35
50
self .text = []
36
51
self .images = 0
37
52
38
- # Collect images
53
+ # Called at the start of every HTML tag
39
54
def handle_starttag (self , tag , attrs ):
55
+ # Collect images
40
56
if tag == "img" :
41
57
self .images += 1
42
58
43
- # Collect text
59
+ # Ignore self-closing tags
60
+ if tag not in void :
61
+ # Add tag to context
62
+ self .context .append (tag )
63
+
64
+ # Called for the text contents of each tag
44
65
def handle_data (self , data ):
45
- self .text .append (data )
66
+ # Collect text if not inside skip context
67
+ if not self .skip .intersection (self .context ):
68
+ self .text .append (data )
69
+
70
+ # Called at the end of every HTML tag
71
+ def handle_endtag (self , tag ):
72
+ if self .context and self .context [- 1 ] == tag :
73
+ # Remove tag from context
74
+ self .context .pop ()
Original file line number Diff line number Diff line change 20
20
21
21
from html .parser import HTMLParser
22
22
23
+ # TODO: Refactor the `void` set into a common module and import it from there
24
+ # and not from the search plugin.
25
+ from material .plugins .search .plugin import void
26
+
23
27
# -----------------------------------------------------------------------------
24
28
# Classes
25
29
# -----------------------------------------------------------------------------
@@ -31,15 +35,40 @@ class ReadtimeParser(HTMLParser):
31
35
def __init__ (self ):
32
36
super ().__init__ (convert_charrefs = True )
33
37
38
+ # Tags to skip
39
+ self .skip = set ([
40
+ "object" , # Objects
41
+ "script" , # Scripts
42
+ "style" , # Styles
43
+ "svg" # SVGs
44
+ ])
45
+
46
+ # Current context
47
+ self .context = []
48
+
34
49
# Keep track of text and images
35
50
self .text = []
36
51
self .images = 0
37
52
38
- # Collect images
53
+ # Called at the start of every HTML tag
39
54
def handle_starttag (self , tag , attrs ):
55
+ # Collect images
40
56
if tag == "img" :
41
57
self .images += 1
42
58
43
- # Collect text
59
+ # Ignore self-closing tags
60
+ if tag not in void :
61
+ # Add tag to context
62
+ self .context .append (tag )
63
+
64
+ # Called for the text contents of each tag
44
65
def handle_data (self , data ):
45
- self .text .append (data )
66
+ # Collect text if not inside skip context
67
+ if not self .skip .intersection (self .context ):
68
+ self .text .append (data )
69
+
70
+ # Called at the end of every HTML tag
71
+ def handle_endtag (self , tag ):
72
+ if self .context and self .context [- 1 ] == tag :
73
+ # Remove tag from context
74
+ self .context .pop ()
You can’t perform that action at this time.
0 commit comments