markdown.py
Python script, ASCII text executable
1import re 2 3 4inlineRegex = r""" 5(?P<em>[*_]{1,7}) (?P<textEm>(?:\\[*]|[^*])*) (?P=em) # emphasis 6| 7[`] (?P<textCode>(?:\\[`]|[^`])*) [`] # inline code 8| 9(?P<imageFlag>!?) \[ (?P<urlText>[^\[\]]*) \] \((?P<urlDestination>[^\(\)]*)\) # hyperlink or media 10| 11(?P<strike>~{2}) (?P<textStrike>(?:\\[~]|[^~])*) (~{2}) # strikethrough 12| 13(?P<diff>\-\-|\+\+) (?P<textDiff>(?:\\[-+]|[^-+])*) (?P=diff) # diffs 14""" 15 16 17def leading(string, character): 18return len(string) - len(string.lstrip(character)) 19 20 21def trailing(string, character): 22return len(string) - len(string.rstrip(character)) 23 24 25class Element: 26def __init__(self): 27pass 28 29def __repr__(self): 30return "Void block" 31 32 33class Heading(Element): 34def __init__(self, content, level): 35super().__init__() 36self.content = content 37self.level = level 38pass 39 40def __repr__(self): 41return f"Heading level {self.level}:\n\t" + self.content 42 43 44class Paragraph(Element): 45def __init__(self, content): 46super().__init__() 47self.content = content 48 49def addLine(self, content): 50self.content += content.strip() + " " 51 52def __repr__(self): 53return "Paragraph:\n\t" + self.content 54 55 56class Emphasis(Element): 57def __init__(self, content, value): 58super().__init__() 59self.content = content 60self.value = value 61 62def __repr__(self): 63return f"Emphasis ({self.value}): " + self.content 64 65 66def _parse_line(source): 67if trailing(source, "\\") == 1: 68source = source.rstrip("\\") 69source += "\n" 70 71tokens = [] 72pattern = re.compile(inlineRegex, re.MULTILINE | re.DOTALL | re.VERBOSE) 73matches = pattern.finditer(source) 74 75lookup = 0 76for i in matches: 77l = i.start() 78r = i.end() 79tokens.append(source[lookup:l]) 80 81lookup = r 82 83if i.group("em"): 84tokens.append(Emphasis(i.group("textEm"), len(i.group("em")))) 85 86tokens.append(source[lookup:]) 87 88return tokens 89 90 91def _tokenise(source): 92tokens = [] 93 94current_block = Element 95 96for line in source.split("\n"): 97if not line.strip(): 98# Void block 99 100tokens.append(current_block) 101current_block = Element() 102elif line.startswith("#") and leading(line.lstrip("#"), " ") == 1: 103tokens.append(current_block) 104 105content = line.lstrip("#").strip() 106current_block = Heading(content, leading(line, "#")) 107else: 108if not isinstance(current_block, Paragraph): 109# Paragraph is default 110 111tokens.append(current_block) 112current_block = Paragraph("") 113 114current_block.addLine(line) 115 116tokens.append(current_block) 117 118return tokens 119 120 121for i in _tokenise( 122""" 123# Hello World! 124## Title 1 125### Part 1 126#### Chapter 1 127##### Article 1 128###### Section 1 129Lorem ipsum 130dolor sit amet 131 132...""" 133): 134print(repr(i)) 135 136 137def parse_markdown(source): 138tokens = _tokenise(source) 139 140 141parse_markdown("") 142print(_parse_line("**bold** text")) 143