markdown.py

text/x-script.python • 6.41 kiB
Python script, ASCII text executable
        
            1
            import re
        
            2
            import bs4 as beautifulsoup
        
            3
            import sys
        
            4
            
        
            5
            inlineRegex = r"""
        
            6
            (?P<em>[*_]{1,7}) (?P<textEm>(?:\\[*]|[^*])*) (?P=em)                              # emphasis
        
            7
            |
        
            8
            [`] (?P<textCode>(?:\\[`]|[^`])*) [`]                                              # inline code
        
            9
            |
        
            10
            (?P<imageFlag>!?) \[ (?P<urlText>[^\[\]]*) \] \((?P<urlDestination>[^\(\)]*)\)     # hyperlink or media
        
            11
            |
        
            12
            (?P<strike>~{2}) (?P<textStrike>(?:\\[~]|[^~])*) (~{2})                            # strikethrough
        
            13
            |
        
            14
            (?P<diff>\-\-|\+\+) (?P<textDiff>(?:\\[-+]|[^-+])*) (?P=diff)                      # diffs
        
            15
            """
        
            16
            
        
            17
            
        
            18
            def leading(string, character):
        
            19
                return len(string) - len(string.lstrip(character))
        
            20
            
        
            21
            
        
            22
            def trailing(string, character):
        
            23
                return len(string) - len(string.rstrip(character))
        
            24
            
        
            25
            
        
            26
            class Element:
        
            27
                def __init__(self):
        
            28
                    self.classes = []
        
            29
                    self.content = None
        
            30
                    pass
        
            31
            
        
            32
                def __repr__(self):
        
            33
                    return "Void block"
        
            34
            
        
            35
                @property
        
            36
                def tag_name(self):
        
            37
                    return "m-void"
        
            38
            
        
            39
            
        
            40
            class Container(Element):
        
            41
                def __init__(self, content):
        
            42
                    super().__init__()
        
            43
                    self.content = parse_line(content)
        
            44
            
        
            45
                def __repr__(self):
        
            46
                    return "Generic container element: " + repr(self.content)
        
            47
            
        
            48
            
        
            49
            class Heading(Container):
        
            50
                def __init__(self, content, level):
        
            51
                    super().__init__(content)
        
            52
                    self.level = level
        
            53
                    pass
        
            54
            
        
            55
                def __repr__(self):
        
            56
                    return f"Heading level {self.level}:\n\t" + repr(self.content)
        
            57
            
        
            58
                @property
        
            59
                def tag_name(self):
        
            60
                    return "h" + str(self.level)
        
            61
            
        
            62
            
        
            63
            class Paragraph(Container):
        
            64
                def __init__(self):
        
            65
                    super().__init__("")
        
            66
            
        
            67
                def addLine(self, content):
        
            68
                    self.content.extend([*parse_line(content), " "])
        
            69
            
        
            70
                def __repr__(self):
        
            71
                    return "Paragraph:\n\t" + repr(self.content)
        
            72
            
        
            73
                @property
        
            74
                def tag_name(self):
        
            75
                    return "p"
        
            76
            
        
            77
            
        
            78
            class Emphasis(Container):
        
            79
                def __init__(self, content, value):
        
            80
                    super().__init__(content)
        
            81
                    self.value = value
        
            82
                    if value >= 4:
        
            83
                        self.classes.append("emphasis-3")
        
            84
                    if value % 4 >= 2:
        
            85
                        self.classes.append("emphasis-2")
        
            86
                    if value % 2:
        
            87
                        self.classes.append("emphasis-1")
        
            88
            
        
            89
                def __repr__(self):
        
            90
                    return f"Emphasis ({self.value}): " + repr(self.content)
        
            91
            
        
            92
                @property
        
            93
                def tag_name(self):
        
            94
                    return "em" if self.value == 1 else "strong"
        
            95
            
        
            96
            
        
            97
            class Code(Element):
        
            98
                def __init__(self, content):
        
            99
                    super().__init__()
        
            100
                    self.content = [content]
        
            101
            
        
            102
                def __repr__(self):
        
            103
                    return f"Inline code: {self.content}"
        
            104
            
        
            105
                @property
        
            106
                def tag_name(self):
        
            107
                    return "code"
        
            108
            
        
            109
            
        
            110
            class Strikethrough(Container):
        
            111
                def __init__(self, content):
        
            112
                    super().__init__(content)
        
            113
            
        
            114
                def __repr__(self):
        
            115
                    return f"Strikethrough: {repr(self.content)}"
        
            116
            
        
            117
                @property
        
            118
                def tag_name(self):
        
            119
                    return "s"
        
            120
            
        
            121
            
        
            122
            class Diff(Container):
        
            123
                def __init__(self, content, value):
        
            124
                    super().__init__(content)
        
            125
                    self.value = value
        
            126
            
        
            127
                def __repr__(self):
        
            128
                    return f"Diff ({self.value}): {self.content}"
        
            129
            
        
            130
                @property
        
            131
                def tag_name(self):
        
            132
                    return "ins" if self.value == "++" else "del"
        
            133
            
        
            134
            
        
            135
            class Link(Element):
        
            136
                def __init__(self, content, destination, image=False):
        
            137
                    super().__init__()
        
            138
                    self.content = content
        
            139
                    self.destination = destination
        
            140
                    self.image = image
        
            141
            
        
            142
                def __repr__(self):
        
            143
                    return f"{'Image' if self.image else 'Link'}: {self.text} -> {self.destination}"
        
            144
            
        
            145
                @property
        
            146
                def tag_name(self):
        
            147
                    return "a"
        
            148
            
        
            149
            
        
            150
            class Image(Link):
        
            151
                def __init__(self, text, destination):
        
            152
                    super().__init__(text, destination, True)
        
            153
            
        
            154
                @property
        
            155
                def tag_name(self):
        
            156
                    return "img"
        
            157
            
        
            158
            
        
            159
            def parse_line(source):
        
            160
                if trailing(source, "\\") == 1:
        
            161
                    source = source.rstrip("\\")
        
            162
                    source += "\n"
        
            163
            
        
            164
                tokens = []
        
            165
                pattern = re.compile(inlineRegex, re.MULTILINE | re.DOTALL | re.VERBOSE)
        
            166
                matches = pattern.finditer(source)
        
            167
            
        
            168
                lookup = 0
        
            169
                for i in matches:
        
            170
                    l = i.start()
        
            171
                    r = i.end()
        
            172
                    tokens.append(source[lookup:l])
        
            173
            
        
            174
                    lookup = r
        
            175
            
        
            176
                    if i.group("em"):
        
            177
                        tokens.append(Emphasis(i.group("textEm"), len(i.group("em"))))
        
            178
                    if i.group("textCode"):
        
            179
                        tokens.append(Code(i.group("textCode")))
        
            180
                    if i.group("strike"):
        
            181
                        tokens.append(Strikethrough(i.group("textStrike")))
        
            182
                    if i.group("diff"):
        
            183
                        tokens.append(Diff(i.group("textDiff"), i.group("diff")))
        
            184
                    if i.group("urlText"):
        
            185
                        tokens.append(Link(i.group("urlText"), i.group("urlDestination")))
        
            186
                    if i.group("imageFlag"):
        
            187
                        tokens.append(Image(i.group("urlText"), i.group("urlDestination")))
        
            188
            
        
            189
                tokens.append(source[lookup:])
        
            190
            
        
            191
                return tokens
        
            192
            
        
            193
            
        
            194
            def tokenise(source):
        
            195
                tokens = []
        
            196
            
        
            197
                current_block = Element
        
            198
            
        
            199
                for line in source.split("\n"):
        
            200
                    if not line.strip():
        
            201
                        # Void block
        
            202
            
        
            203
                        tokens.append(current_block)
        
            204
                        current_block = Element()
        
            205
                    elif line.startswith("#") and leading(line.lstrip("#"), " "):
        
            206
                        tokens.append(current_block)
        
            207
            
        
            208
                        content = line.lstrip("#").strip()
        
            209
                        current_block = Heading(content, leading(line, "#"))
        
            210
                    else:
        
            211
                        if not isinstance(current_block, Paragraph):
        
            212
                            # Paragraph is default
        
            213
            
        
            214
                            tokens.append(current_block)
        
            215
                            current_block = Paragraph()
        
            216
            
        
            217
                        current_block.addLine(line.strip())
        
            218
            
        
            219
                tokens.append(current_block)
        
            220
            
        
            221
                return tokens
        
            222
            
        
            223
            
        
            224
            def make_html(ast):
        
            225
                soup = beautifulsoup.BeautifulSoup()
        
            226
                for i in ast:
        
            227
                    # Use bs4 to generate HTML
        
            228
                    if isinstance(i, str):
        
            229
                        soup.append(i)
        
            230
                    elif hasattr(i, "content") and i.tag_name != "m-void":
        
            231
                        tag = soup.new_tag(str(i.tag_name))
        
            232
                        try:
        
            233
                            if isinstance(i.content, list):
        
            234
                                tag.append(make_html(i.content))
        
            235
                            elif i.content:
        
            236
                                tag.string = i.content
        
            237
                        except AttributeError as exc:
        
            238
                            # print(i)
        
            239
                            print(exc, file=sys.stderr)
        
            240
                            ...
        
            241
                        soup.append(tag)
        
            242
                return soup
        
            243
            
        
            244
            
        
            245
            if __name__ == '__main__':
        
            246
                # Generate an AST from a markdown file
        
            247
                ast = tokenise(
        
            248
                        """
        
            249
            # Hello World!
        
            250
            ## Title 1
        
            251
            ### Part 1
        
            252
            #### Chapter _1_
        
            253
            ##### Article 1
        
            254
            ###### Section 1
        
            255
            Lorem **i`p`sum**
        
            256
            dolor `sit` amet
        
            257
            
        
            258
            ...
        
            259
            """
        
            260
                )
        
            261
                # for i in ast:
        
            262
                #     print(repr(i))
        
            263
            
        
            264
                # Now convert the AST to HTML
        
            265
                print(make_html(ast).prettify(formatter=beautifulsoup.formatter.HTMLFormatter(indent=4)))
        
            266
            
        
            267