By using this site, you agree to have cookies stored on your device, strictly for functional purposes, such as storing your session and preferences.

Dismiss

 markdown.py

View raw Download
text/x-script.python • 6.41 kiB
Python script, ASCII text executable
        
            
1
import re
2
import bs4 as beautifulsoup
3
import sys
4
5
inlineRegex = r"""
6
(?P<em>[*_]{1,7}) (?P<textEm>(?:\\[*]|[^*])*) (?P=em) # emphasis
7
|
8
[`] (?P<textCode>(?:\\[`]|[^`])*) [`] # inline code
9
|
10
(?P<imageFlag>!?) \[ (?P<urlText>[^\[\]]*) \] \((?P<urlDestination>[^\(\)]*)\) # hyperlink or media
11
|
12
(?P<strike>~{2}) (?P<textStrike>(?:\\[~]|[^~])*) (~{2}) # strikethrough
13
|
14
(?P<diff>\-\-|\+\+) (?P<textDiff>(?:\\[-+]|[^-+])*) (?P=diff) # diffs
15
"""
16
17
18
def leading(string, character):
19
return len(string) - len(string.lstrip(character))
20
21
22
def trailing(string, character):
23
return len(string) - len(string.rstrip(character))
24
25
26
class Element:
27
def __init__(self):
28
self.classes = []
29
self.content = None
30
pass
31
32
def __repr__(self):
33
return "Void block"
34
35
@property
36
def tag_name(self):
37
return "m-void"
38
39
40
class Container(Element):
41
def __init__(self, content):
42
super().__init__()
43
self.content = parse_line(content)
44
45
def __repr__(self):
46
return "Generic container element: " + repr(self.content)
47
48
49
class Heading(Container):
50
def __init__(self, content, level):
51
super().__init__(content)
52
self.level = level
53
pass
54
55
def __repr__(self):
56
return f"Heading level {self.level}:\n\t" + repr(self.content)
57
58
@property
59
def tag_name(self):
60
return "h" + str(self.level)
61
62
63
class Paragraph(Container):
64
def __init__(self):
65
super().__init__("")
66
67
def addLine(self, content):
68
self.content.extend([*parse_line(content), " "])
69
70
def __repr__(self):
71
return "Paragraph:\n\t" + repr(self.content)
72
73
@property
74
def tag_name(self):
75
return "p"
76
77
78
class Emphasis(Container):
79
def __init__(self, content, value):
80
super().__init__(content)
81
self.value = value
82
if value >= 4:
83
self.classes.append("emphasis-3")
84
if value % 4 >= 2:
85
self.classes.append("emphasis-2")
86
if value % 2:
87
self.classes.append("emphasis-1")
88
89
def __repr__(self):
90
return f"Emphasis ({self.value}): " + repr(self.content)
91
92
@property
93
def tag_name(self):
94
return "em" if self.value == 1 else "strong"
95
96
97
class Code(Element):
98
def __init__(self, content):
99
super().__init__()
100
self.content = [content]
101
102
def __repr__(self):
103
return f"Inline code: {self.content}"
104
105
@property
106
def tag_name(self):
107
return "code"
108
109
110
class Strikethrough(Container):
111
def __init__(self, content):
112
super().__init__(content)
113
114
def __repr__(self):
115
return f"Strikethrough: {repr(self.content)}"
116
117
@property
118
def tag_name(self):
119
return "s"
120
121
122
class Diff(Container):
123
def __init__(self, content, value):
124
super().__init__(content)
125
self.value = value
126
127
def __repr__(self):
128
return f"Diff ({self.value}): {self.content}"
129
130
@property
131
def tag_name(self):
132
return "ins" if self.value == "++" else "del"
133
134
135
class Link(Element):
136
def __init__(self, content, destination, image=False):
137
super().__init__()
138
self.content = content
139
self.destination = destination
140
self.image = image
141
142
def __repr__(self):
143
return f"{'Image' if self.image else 'Link'}: {self.text} -> {self.destination}"
144
145
@property
146
def tag_name(self):
147
return "a"
148
149
150
class Image(Link):
151
def __init__(self, text, destination):
152
super().__init__(text, destination, True)
153
154
@property
155
def tag_name(self):
156
return "img"
157
158
159
def parse_line(source):
160
if trailing(source, "\\") == 1:
161
source = source.rstrip("\\")
162
source += "\n"
163
164
tokens = []
165
pattern = re.compile(inlineRegex, re.MULTILINE | re.DOTALL | re.VERBOSE)
166
matches = pattern.finditer(source)
167
168
lookup = 0
169
for i in matches:
170
l = i.start()
171
r = i.end()
172
tokens.append(source[lookup:l])
173
174
lookup = r
175
176
if i.group("em"):
177
tokens.append(Emphasis(i.group("textEm"), len(i.group("em"))))
178
if i.group("textCode"):
179
tokens.append(Code(i.group("textCode")))
180
if i.group("strike"):
181
tokens.append(Strikethrough(i.group("textStrike")))
182
if i.group("diff"):
183
tokens.append(Diff(i.group("textDiff"), i.group("diff")))
184
if i.group("urlText"):
185
tokens.append(Link(i.group("urlText"), i.group("urlDestination")))
186
if i.group("imageFlag"):
187
tokens.append(Image(i.group("urlText"), i.group("urlDestination")))
188
189
tokens.append(source[lookup:])
190
191
return tokens
192
193
194
def tokenise(source):
195
tokens = []
196
197
current_block = Element
198
199
for line in source.split("\n"):
200
if not line.strip():
201
# Void block
202
203
tokens.append(current_block)
204
current_block = Element()
205
elif line.startswith("#") and leading(line.lstrip("#"), " "):
206
tokens.append(current_block)
207
208
content = line.lstrip("#").strip()
209
current_block = Heading(content, leading(line, "#"))
210
else:
211
if not isinstance(current_block, Paragraph):
212
# Paragraph is default
213
214
tokens.append(current_block)
215
current_block = Paragraph()
216
217
current_block.addLine(line.strip())
218
219
tokens.append(current_block)
220
221
return tokens
222
223
224
def make_html(ast):
225
soup = beautifulsoup.BeautifulSoup()
226
for i in ast:
227
# Use bs4 to generate HTML
228
if isinstance(i, str):
229
soup.append(i)
230
elif hasattr(i, "content") and i.tag_name != "m-void":
231
tag = soup.new_tag(str(i.tag_name))
232
try:
233
if isinstance(i.content, list):
234
tag.append(make_html(i.content))
235
elif i.content:
236
tag.string = i.content
237
except AttributeError as exc:
238
# print(i)
239
print(exc, file=sys.stderr)
240
...
241
soup.append(tag)
242
return soup
243
244
245
if __name__ == '__main__':
246
# Generate an AST from a markdown file
247
ast = tokenise(
248
"""
249
# Hello World!
250
## Title 1
251
### Part 1
252
#### Chapter _1_
253
##### Article 1
254
###### Section 1
255
Lorem **i`p`sum**
256
dolor `sit` amet
257
258
...
259
"""
260
)
261
# for i in ast:
262
# print(repr(i))
263
264
# Now convert the AST to HTML
265
print(make_html(ast).prettify(formatter=beautifulsoup.formatter.HTMLFormatter(indent=4)))
266
267