By using this site, you agree to have cookies stored on your device, strictly for functional purposes, such as storing your session and preferences.

Dismiss

 markdown.py

View raw Download
text/x-script.python • 4.68 kiB
Python script, ASCII text executable
        
            
1
import re
2
3
inlineRegex = r"""
4
(?P<em>[*_]{1,7}) (?P<textEm>(?:\\[*]|[^*])*) (?P=em) # emphasis
5
|
6
[`] (?P<textCode>(?:\\[`]|[^`])*) [`] # inline code
7
|
8
(?P<imageFlag>!?) \[ (?P<urlText>[^\[\]]*) \] \((?P<urlDestination>[^\(\)]*)\) # hyperlink or media
9
|
10
(?P<strike>~{2}) (?P<textStrike>(?:\\[~]|[^~])*) (~{2}) # strikethrough
11
|
12
(?P<diff>\-\-|\+\+) (?P<textDiff>(?:\\[-+]|[^-+])*) (?P=diff) # diffs
13
"""
14
15
16
def leading(string, character):
17
return len(string) - len(string.lstrip(character))
18
19
20
def trailing(string, character):
21
return len(string) - len(string.rstrip(character))
22
23
24
class Element:
25
def __init__(self):
26
pass
27
28
def __repr__(self):
29
return "Void block"
30
31
32
class Container(Element):
33
def __init__(self, content):
34
super().__init__()
35
self.content = parse_line(content)
36
37
def __repr__(self):
38
return "Generic container element: " + repr(self.content)
39
40
41
class Heading(Container):
42
def __init__(self, content, level):
43
super().__init__(content)
44
self.level = level
45
pass
46
47
def __repr__(self):
48
return f"Heading level {self.level}:\n\t" + repr(self.content)
49
50
51
class Paragraph(Container):
52
def __init__(self):
53
super().__init__("")
54
55
def addLine(self, content):
56
self.content.extend([*parse_line(content), " "])
57
58
def __repr__(self):
59
return "Paragraph:\n\t" + repr(self.content)
60
61
62
class Emphasis(Container):
63
def __init__(self, content, value):
64
super().__init__(content)
65
self.value = value
66
67
def __repr__(self):
68
return f"Emphasis ({self.value}): " + repr(self.content)
69
70
71
class Code(Element):
72
def __init__(self, content):
73
super().__init__()
74
self.content = content
75
76
def __repr__(self):
77
return f"Inline code: {self.content}"
78
79
80
class Strikethrough(Container):
81
def __init__(self, content):
82
super().__init__(content)
83
84
def __repr__(self):
85
return f"Strikethrough: {repr(self.content)}"
86
87
88
class Diff(Container):
89
def __init__(self, content, value):
90
super().__init__(content)
91
self.value = value
92
93
def __repr__(self):
94
return f"Diff ({self.value}): {self.content}"
95
96
97
class Link(Element):
98
def __init__(self, text, destination, image=False):
99
super().__init__()
100
self.text = text
101
self.destination = destination
102
self.image = image
103
104
def __repr__(self):
105
return f"{'Image' if self.image else 'Link'}: {self.text} -> {self.destination}"
106
107
108
class Image(Link):
109
def __init__(self, text, destination):
110
super().__init__(text, destination, True)
111
112
113
def parse_line(source):
114
if trailing(source, "\\") == 1:
115
source = source.rstrip("\\")
116
source += "\n"
117
118
tokens = []
119
pattern = re.compile(inlineRegex, re.MULTILINE | re.DOTALL | re.VERBOSE)
120
matches = pattern.finditer(source)
121
122
lookup = 0
123
for i in matches:
124
l = i.start()
125
r = i.end()
126
tokens.append(source[lookup:l])
127
128
lookup = r
129
130
if i.group("em"):
131
tokens.append(Emphasis(i.group("textEm"), len(i.group("em"))))
132
if i.group("textCode"):
133
tokens.append(Code(i.group("textCode")))
134
if i.group("strike"):
135
tokens.append(Strikethrough(i.group("textStrike")))
136
if i.group("diff"):
137
tokens.append(Diff(i.group("textDiff"), i.group("diff")))
138
if i.group("urlText"):
139
tokens.append(Link(i.group("urlText"), i.group("urlDestination")))
140
if i.group("imageFlag"):
141
tokens.append(Image(i.group("urlText"), i.group("urlDestination")))
142
143
tokens.append(source[lookup:])
144
145
return tokens
146
147
148
def _tokenise(source):
149
tokens = []
150
151
current_block = Element
152
153
for line in source.split("\n"):
154
if not line.strip():
155
# Void block
156
157
tokens.append(current_block)
158
current_block = Element()
159
elif line.startswith("#") and leading(line.lstrip("#"), " "):
160
tokens.append(current_block)
161
162
content = line.lstrip("#").strip()
163
current_block = Heading(content, leading(line, "#"))
164
else:
165
if not isinstance(current_block, Paragraph):
166
# Paragraph is default
167
168
tokens.append(current_block)
169
current_block = Paragraph()
170
171
current_block.addLine(line.strip())
172
173
tokens.append(current_block)
174
175
return tokens
176
177
178
if __name__ == '__main__':
179
for i in _tokenise(
180
"""
181
# Hello World!
182
## Title 1
183
### Part 1
184
#### Chapter _1_
185
##### Article 1
186
###### Section 1
187
Lorem **i`p`sum**
188
dolor `sit` amet
189
190
...
191
"""
192
):
193
print(repr(i))
194
195