markdown.py

text/x-script.python • 3.11 kiB
Python script, ASCII text executable
        
            1
            import re
        
            2
            
        
            3
            
        
            4
            inlineRegex = r"""
        
            5
            (?P<em>[*_]{1,7}) (?P<textEm>(?:\\[*]|[^*])*) (?P=em)                              # emphasis
        
            6
            |
        
            7
            [`] (?P<textCode>(?:\\[`]|[^`])*) [`]                                              # inline code
        
            8
            |
        
            9
            (?P<imageFlag>!?) \[ (?P<urlText>[^\[\]]*) \] \((?P<urlDestination>[^\(\)]*)\)     # hyperlink or media
        
            10
            |
        
            11
            (?P<strike>~{2}) (?P<textStrike>(?:\\[~]|[^~])*) (~{2})                            # strikethrough
        
            12
            |
        
            13
            (?P<diff>\-\-|\+\+) (?P<textDiff>(?:\\[-+]|[^-+])*) (?P=diff)                      # diffs
        
            14
            """
        
            15
            
        
            16
            
        
            17
            def leading(string, character):
        
            18
                return len(string) - len(string.lstrip(character))
        
            19
            
        
            20
            
        
            21
            def trailing(string, character):
        
            22
                return len(string) - len(string.rstrip(character))
        
            23
            
        
            24
            
        
            25
            class Element:
        
            26
                def __init__(self):
        
            27
                    pass
        
            28
            
        
            29
                def __repr__(self):
        
            30
                    return "Void block"
        
            31
            
        
            32
            
        
            33
            class Heading(Element):
        
            34
                def __init__(self, content, level):
        
            35
                    super().__init__()
        
            36
                    self.content = content
        
            37
                    self.level = level
        
            38
                    pass
        
            39
            
        
            40
                def __repr__(self):
        
            41
                    return f"Heading level {self.level}:\n\t" + self.content
        
            42
            
        
            43
            
        
            44
            class Paragraph(Element):
        
            45
                def __init__(self, content):
        
            46
                    super().__init__()
        
            47
                    self.content = content
        
            48
            
        
            49
                def addLine(self, content):
        
            50
                    self.content += content.strip() + " "
        
            51
            
        
            52
                def __repr__(self):
        
            53
                    return "Paragraph:\n\t" + self.content
        
            54
            
        
            55
            
        
            56
            class Emphasis(Element):
        
            57
                def __init__(self, content, value):
        
            58
                    super().__init__()
        
            59
                    self.content = content
        
            60
                    self.value = value
        
            61
            
        
            62
                def __repr__(self):
        
            63
                    return f"Emphasis ({self.value}): " + self.content
        
            64
            
        
            65
            
        
            66
            def _parse_line(source):
        
            67
                if trailing(source, "\\") == 1:
        
            68
                    source = source.rstrip("\\")
        
            69
                    source += "\n"
        
            70
            
        
            71
                tokens = []
        
            72
                pattern = re.compile(inlineRegex, re.MULTILINE | re.DOTALL | re.VERBOSE)
        
            73
                matches = pattern.finditer(source)
        
            74
            
        
            75
                lookup = 0
        
            76
                for i in matches:
        
            77
                    l = i.start()
        
            78
                    r = i.end()
        
            79
                    tokens.append(source[lookup:l])
        
            80
            
        
            81
                    lookup = r
        
            82
            
        
            83
                    if i.group("em"):
        
            84
                        tokens.append(Emphasis(i.group("textEm"), len(i.group("em"))))
        
            85
            
        
            86
                tokens.append(source[lookup:])
        
            87
            
        
            88
                return tokens
        
            89
            
        
            90
            
        
            91
            def _tokenise(source):
        
            92
                tokens = []
        
            93
            
        
            94
                current_block = Element
        
            95
            
        
            96
                for line in source.split("\n"):
        
            97
                    if not line.strip():
        
            98
                        # Void block
        
            99
            
        
            100
                        tokens.append(current_block)
        
            101
                        current_block = Element()
        
            102
                    elif line.startswith("#") and leading(line.lstrip("#"), " ") == 1:
        
            103
                        tokens.append(current_block)
        
            104
            
        
            105
                        content = line.lstrip("#").strip()
        
            106
                        current_block = Heading(content, leading(line, "#"))
        
            107
                    else:
        
            108
                        if not isinstance(current_block, Paragraph):
        
            109
                            # Paragraph is default
        
            110
            
        
            111
                            tokens.append(current_block)
        
            112
                            current_block = Paragraph("")
        
            113
            
        
            114
                        current_block.addLine(line)
        
            115
            
        
            116
                tokens.append(current_block)
        
            117
            
        
            118
                return tokens
        
            119
            
        
            120
            
        
            121
            for i in _tokenise(
        
            122
                """
        
            123
            # Hello World!
        
            124
            ## Title 1
        
            125
            ### Part 1
        
            126
            #### Chapter 1
        
            127
            ##### Article 1
        
            128
            ###### Section 1
        
            129
            Lorem ipsum
        
            130
            dolor sit amet
        
            131
            
        
            132
            ..."""
        
            133
            ):
        
            134
                print(repr(i))
        
            135
            
        
            136
            
        
            137
            def parse_markdown(source):
        
            138
                tokens = _tokenise(source)
        
            139
            
        
            140
            
        
            141
            parse_markdown("")
        
            142
            print(_parse_line("**bold** text"))
        
            143