markdown.py
Python script, ASCII text executable
1import re 2 3 4inlineRegex = r""" 5(?P<em>[*_]{1,7}) (?P<textEm>(?:\\[*]|[^*])*) (?P=em) # emphasis 6| 7[`] (?P<textCode>(?:\\[`]|[^`])*) [`] # inline code 8| 9(?P<imageFlag>!?) \[ (?P<urlText>[^\[\]]*) \] \((?P<urlDestination>[^\(\)]*)\) # hyperlink or media 10| 11(?P<strike>~{2}) (?P<textStrike>(?:\\[~]|[^~])*) (~{2}) # strikethrough 12| 13(?P<diff>\-\-|\+\+) (?P<textDiff>(?:\\[-+]|[^-+])*) (?P=diff) # diffs 14""" 15 16 17def leading(string, character): 18return len(string) - len(string.lstrip(character)) 19 20 21def trailing(string, character): 22return len(string) - len(string.rstrip(character)) 23 24 25class Element: 26def __init__(self): 27pass 28 29def __repr__(self): 30return "Void block" 31 32 33class Heading(Element): 34def __init__(self, content, level): 35super().__init__() 36self.content = content 37self.level = level 38pass 39 40def __repr__(self): 41return f"Heading level {self.level}:\n\t" + self.content 42 43 44class Paragraph(Element): 45def __init__(self, content): 46super().__init__() 47self.content = content 48 49def addLine(self, content): 50self.content += content.strip() + " " 51 52def __repr__(self): 53return "Paragraph:\n\t" + self.content 54 55 56class Emphasis(Element): 57def __init__(self, content, bold, italic): 58super().__init__() 59self.content = content 60self.bold = bold 61self.italic = italic 62 63def __repr__(self): 64return f"Emphasis (bold {self.bold}, italic {self.italic}):\n\t" + self.content 65 66 67def _parseLine(source): 68if trailing(source, "\\") == 1: 69source = source.rstrip("\\") 70source += "\n" 71 72tokens = [source] 73 74 75def _tokenise(source): 76tokens = [] 77 78currentBlock = Element 79 80for line in source.split("\n"): 81if not line.strip(): 82# Void block 83 84tokens.append(currentBlock) 85currentBlock = Element() 86elif line.startswith("#") and leading(line.lstrip("#"), " ") == 1: 87tokens.append(currentBlock) 88 89content = line.lstrip("#").strip() 90currentBlock = Heading(content, leading(line, "#")) 91else: 92if not isinstance(currentBlock, Paragraph): 93# Paragraph is default 94 95tokens.append(currentBlock) 96currentBlock = Paragraph("") 97 98currentBlock.addLine(line) 99 100tokens.append(currentBlock) 101 102return tokens 103 104 105for i in _tokenise( 106""" 107# Hello World! 108## Title 1 109### Part 1 110#### Chapter 1 111##### Article 1 112###### Section 1 113Lorem ipsum 114dolor sit amet 115 116...""" 117): 118print(repr(i)) 119 120 121def parseMarkdown(source): 122tokens = _tokenise(source) 123 124 125parseMarkdown("") 126