markdown.py
Python script, ASCII text executable
1import re 2 3inlineRegex = r""" 4(?P<em>[*_]{1,7}) (?P<textEm>(?:\\[*]|[^*])*) (?P=em) # emphasis 5| 6[`] (?P<textCode>(?:\\[`]|[^`])*) [`] # inline code 7| 8(?P<imageFlag>!?) \[ (?P<urlText>[^\[\]]*) \] \((?P<urlDestination>[^\(\)]*)\) # hyperlink or media 9| 10(?P<strike>~{2}) (?P<textStrike>(?:\\[~]|[^~])*) (~{2}) # strikethrough 11| 12(?P<diff>\-\-|\+\+) (?P<textDiff>(?:\\[-+]|[^-+])*) (?P=diff) # diffs 13""" 14 15 16def leading(string, character): 17return len(string) - len(string.lstrip(character)) 18 19 20def trailing(string, character): 21return len(string) - len(string.rstrip(character)) 22 23 24class Element: 25def __init__(self): 26pass 27 28def __repr__(self): 29return "Void block" 30 31 32class Container(Element): 33def __init__(self, content): 34super().__init__() 35self.content = parse_line(content) 36 37def __repr__(self): 38return "Generic container element: " + repr(self.content) 39 40 41class Heading(Container): 42def __init__(self, content, level): 43super().__init__(content) 44self.level = level 45pass 46 47def __repr__(self): 48return f"Heading level {self.level}:\n\t" + repr(self.content) 49 50 51class Paragraph(Container): 52def __init__(self): 53super().__init__("") 54 55def addLine(self, content): 56self.content.extend([*parse_line(content), " "]) 57 58def __repr__(self): 59return "Paragraph:\n\t" + repr(self.content) 60 61 62class Emphasis(Container): 63def __init__(self, content, value): 64super().__init__(content) 65self.value = value 66 67def __repr__(self): 68return f"Emphasis ({self.value}): " + repr(self.content) 69 70 71class Code(Element): 72def __init__(self, content): 73super().__init__() 74self.content = content 75 76def __repr__(self): 77return f"Inline code: {self.content}" 78 79 80class Strikethrough(Container): 81def __init__(self, content): 82super().__init__(content) 83 84def __repr__(self): 85return f"Strikethrough: {repr(self.content)}" 86 87 88class Diff(Container): 89def __init__(self, content, value): 90super().__init__(content) 91self.value = value 92 93def __repr__(self): 94return f"Diff ({self.value}): {self.content}" 95 96 97class Link(Element): 98def __init__(self, text, destination, image=False): 99super().__init__() 100self.text = text 101self.destination = destination 102self.image = image 103 104def __repr__(self): 105return f"{'Image' if self.image else 'Link'}: {self.text} -> {self.destination}" 106 107 108class Image(Link): 109def __init__(self, text, destination): 110super().__init__(text, destination, True) 111 112 113def parse_line(source): 114if trailing(source, "\\") == 1: 115source = source.rstrip("\\") 116source += "\n" 117 118tokens = [] 119pattern = re.compile(inlineRegex, re.MULTILINE | re.DOTALL | re.VERBOSE) 120matches = pattern.finditer(source) 121 122lookup = 0 123for i in matches: 124l = i.start() 125r = i.end() 126tokens.append(source[lookup:l]) 127 128lookup = r 129 130if i.group("em"): 131tokens.append(Emphasis(i.group("textEm"), len(i.group("em")))) 132if i.group("textCode"): 133tokens.append(Code(i.group("textCode"))) 134if i.group("strike"): 135tokens.append(Strikethrough(i.group("textStrike"))) 136if i.group("diff"): 137tokens.append(Diff(i.group("textDiff"), i.group("diff"))) 138if i.group("urlText"): 139tokens.append(Link(i.group("urlText"), i.group("urlDestination"))) 140if i.group("imageFlag"): 141tokens.append(Image(i.group("urlText"), i.group("urlDestination"))) 142 143tokens.append(source[lookup:]) 144 145return tokens 146 147 148def _tokenise(source): 149tokens = [] 150 151current_block = Element 152 153for line in source.split("\n"): 154if not line.strip(): 155# Void block 156 157tokens.append(current_block) 158current_block = Element() 159elif line.startswith("#") and leading(line.lstrip("#"), " "): 160tokens.append(current_block) 161 162content = line.lstrip("#").strip() 163current_block = Heading(content, leading(line, "#")) 164else: 165if not isinstance(current_block, Paragraph): 166# Paragraph is default 167 168tokens.append(current_block) 169current_block = Paragraph() 170 171current_block.addLine(line.strip()) 172 173tokens.append(current_block) 174 175return tokens 176 177 178if __name__ == '__main__': 179for i in _tokenise( 180""" 181# Hello World! 182## Title 1 183### Part 1 184#### Chapter _1_ 185##### Article 1 186###### Section 1 187Lorem **i`p`sum** 188dolor `sit` amet 189 190... 191""" 192): 193print(repr(i)) 194 195