roundabout,
created on Wednesday, 27 March 2024, 16:12:42 (1711555962),
received on Wednesday, 31 July 2024, 06:54:42 (1722408882)
Author identity: vlad <vlad.muntoiu@gmail.com>
d033fb6cae5b42ef6660f881bb1331aaf72df530
markdown.py
@@ -1,4 +1,6 @@
import re
import bs4 as beautifulsoup
import sys
inlineRegex = r"""
(?P<em>[*_]{1,7}) (?P<textEm>(?:\\[*]|[^*])*) (?P=em) # emphasis
@@ -23,11 +25,17 @@ def trailing(string, character):
class Element:
def __init__(self):
self.classes = []
self.content = None
pass
def __repr__(self):
return "Void block"
@property
def tag_name(self):
return "m-void"
class Container(Element):
def __init__(self, content):
@@ -47,6 +55,10 @@ class Heading(Container):
def __repr__(self):
return f"Heading level {self.level}:\n\t" + repr(self.content)
@property
def tag_name(self):
return "h" + str(self.level)
class Paragraph(Container):
def __init__(self):
@@ -58,24 +70,42 @@ class Paragraph(Container):
def __repr__(self):
return "Paragraph:\n\t" + repr(self.content)
@property
def tag_name(self):
return "p"
class Emphasis(Container):
def __init__(self, content, value):
super().__init__(content)
self.value = value
if value >= 4:
self.classes.append("emphasis-3")
if value % 4 >= 2:
self.classes.append("emphasis-2")
if value % 2:
self.classes.append("emphasis-1")
def __repr__(self):
return f"Emphasis ({self.value}): " + repr(self.content)
@property
def tag_name(self):
return "em" if self.value == 1 else "strong"
class Code(Element):
def __init__(self, content):
super().__init__()
self.content = content
self.content = [content]
def __repr__(self):
return f"Inline code: {self.content}"
@property
def tag_name(self):
return "code"
class Strikethrough(Container):
def __init__(self, content):
@@ -84,6 +114,10 @@ class Strikethrough(Container):
def __repr__(self):
return f"Strikethrough: {repr(self.content)}"
@property
def tag_name(self):
return "s"
class Diff(Container):
def __init__(self, content, value):
@@ -93,22 +127,34 @@ class Diff(Container):
def __repr__(self):
return f"Diff ({self.value}): {self.content}"
@property
def tag_name(self):
return "ins" if self.value == "++" else "del"
class Link(Element):
def __init__(self, text, destination, image=False):
def __init__(self, content, destination, image=False):
super().__init__()
self.text = text
self.content = content
self.destination = destination
self.image = image
def __repr__(self):
return f"{'Image' if self.image else 'Link'}: {self.text} -> {self.destination}"
@property
def tag_name(self):
return "a"
class Image(Link):
def __init__(self, text, destination):
super().__init__(text, destination, True)
@property
def tag_name(self):
return "img"
def parse_line(source):
if trailing(source, "\\") == 1:
@@ -145,7 +191,7 @@ def parse_line(source):
return tokens
def _tokenise(source):
def tokenise(source):
tokens = []
current_block = Element
@@ -175,8 +221,30 @@ def _tokenise(source):
return tokens
def make_html(ast):
soup = beautifulsoup.BeautifulSoup()
for i in ast:
# Use bs4 to generate HTML
if isinstance(i, str):
soup.append(i)
elif hasattr(i, "content") and i.tag_name != "m-void":
tag = soup.new_tag(str(i.tag_name))
try:
if isinstance(i.content, list):
tag.append(make_html(i.content))
elif i.content:
tag.string = i.content
except AttributeError as exc:
# print(i)
print(exc, file=sys.stderr)
...
soup.append(tag)
return soup
if __name__ == '__main__':
for i in _tokenise(
# Generate an AST from a markdown file
ast = tokenise(
"""
# Hello World!
## Title 1
@@ -189,6 +257,10 @@ dolor `sit` amet
...
"""
):
print(repr(i))
)
# for i in ast:
# print(repr(i))
# Now convert the AST to HTML
print(make_html(ast).prettify(formatter=beautifulsoup.formatter.HTMLFormatter(indent=4)))