By using this site, you agree to have cookies stored on your device, strictly for functional purposes, such as storing your session and preferences.

Dismiss

 markdown.py

View raw Download
text/x-script.python • 3.11 kiB
Python script, ASCII text executable
        
            
1
import re
2
3
4
inlineRegex = r"""
5
(?P<em>[*_]{1,7}) (?P<textEm>(?:\\[*]|[^*])*) (?P=em) # emphasis
6
|
7
[`] (?P<textCode>(?:\\[`]|[^`])*) [`] # inline code
8
|
9
(?P<imageFlag>!?) \[ (?P<urlText>[^\[\]]*) \] \((?P<urlDestination>[^\(\)]*)\) # hyperlink or media
10
|
11
(?P<strike>~{2}) (?P<textStrike>(?:\\[~]|[^~])*) (~{2}) # strikethrough
12
|
13
(?P<diff>\-\-|\+\+) (?P<textDiff>(?:\\[-+]|[^-+])*) (?P=diff) # diffs
14
"""
15
16
17
def leading(string, character):
18
return len(string) - len(string.lstrip(character))
19
20
21
def trailing(string, character):
22
return len(string) - len(string.rstrip(character))
23
24
25
class Element:
26
def __init__(self):
27
pass
28
29
def __repr__(self):
30
return "Void block"
31
32
33
class Heading(Element):
34
def __init__(self, content, level):
35
super().__init__()
36
self.content = content
37
self.level = level
38
pass
39
40
def __repr__(self):
41
return f"Heading level {self.level}:\n\t" + self.content
42
43
44
class Paragraph(Element):
45
def __init__(self, content):
46
super().__init__()
47
self.content = content
48
49
def addLine(self, content):
50
self.content += content.strip() + " "
51
52
def __repr__(self):
53
return "Paragraph:\n\t" + self.content
54
55
56
class Emphasis(Element):
57
def __init__(self, content, value):
58
super().__init__()
59
self.content = content
60
self.value = value
61
62
def __repr__(self):
63
return f"Emphasis ({self.value}): " + self.content
64
65
66
def _parse_line(source):
67
if trailing(source, "\\") == 1:
68
source = source.rstrip("\\")
69
source += "\n"
70
71
tokens = []
72
pattern = re.compile(inlineRegex, re.MULTILINE | re.DOTALL | re.VERBOSE)
73
matches = pattern.finditer(source)
74
75
lookup = 0
76
for i in matches:
77
l = i.start()
78
r = i.end()
79
tokens.append(source[lookup:l])
80
81
lookup = r
82
83
if i.group("em"):
84
tokens.append(Emphasis(i.group("textEm"), len(i.group("em"))))
85
86
tokens.append(source[lookup:])
87
88
return tokens
89
90
91
def _tokenise(source):
92
tokens = []
93
94
current_block = Element
95
96
for line in source.split("\n"):
97
if not line.strip():
98
# Void block
99
100
tokens.append(current_block)
101
current_block = Element()
102
elif line.startswith("#") and leading(line.lstrip("#"), " ") == 1:
103
tokens.append(current_block)
104
105
content = line.lstrip("#").strip()
106
current_block = Heading(content, leading(line, "#"))
107
else:
108
if not isinstance(current_block, Paragraph):
109
# Paragraph is default
110
111
tokens.append(current_block)
112
current_block = Paragraph("")
113
114
current_block.addLine(line)
115
116
tokens.append(current_block)
117
118
return tokens
119
120
121
for i in _tokenise(
122
"""
123
# Hello World!
124
## Title 1
125
### Part 1
126
#### Chapter 1
127
##### Article 1
128
###### Section 1
129
Lorem ipsum
130
dolor sit amet
131
132
..."""
133
):
134
print(repr(i))
135
136
137
def parse_markdown(source):
138
tokens = _tokenise(source)
139
140
141
parse_markdown("")
142
print(_parse_line("**bold** text"))
143