Python script, ASCII text executable
        
            1
            import re 
        
            2
            import bs4 as beautifulsoup 
        
            3
            import sys 
        
            4
            def only_chars(string, chars): 
        
            7
                chars = set(chars) 
        
            8
                all_chars = set(string) 
        
            9
                return all_chars.issubset(chars) 
        
            10
            inline_regex = r""" 
        
            13
            (?P<imageFlag>!?) \[ (?P<urlText>[^\[\]]*) \] \((?P<urlDestination>[^\(\)]*)\)     # hyperlink or media 
        
            14
            | 
        
            15
            <(?P<urlDestination2>[^<>]*)>                                                      # autolink 
        
            16
            | 
        
            17
            (?P<em>\*{1,7}) (?P<textEm>(?:\\\*|[^*])*) (?P=em)                                 # emphasis with * not requiring space on either side 
        
            18
            | 
        
            19
            (?:^|\s)(?P<em2>_{1,7}) (?P<textEm2>(?:\\.|[^*])*) (?P=em2)(?=\s|$)                # emphasis with _ requiring space on at least one side 
        
            20
            | 
        
            21
            [``] (?P<textCode>(?:\\[``]|[^``])*) [``]                                          # inline code (2 backticks) 
        
            22
            | 
        
            23
            [`] (?P<textCode>(?:\\[`]|[^`])*) [`]                                              # inline code 
        
            24
            | 
        
            25
            (?P<strike>~{2}) (?P<textStrike>(?:\\[~]|[^~])*) (~{2})                            # strikethrough 
        
            26
            | 
        
            27
            (?P<diff>\-\-|\+\+) (?P<textDiff>(?:\\[-+]|[^-+])*) (?P=diff)                      # diffs 
        
            28
            """ 
        
            29
            def leading(string, character): 
        
            32
                return len(string) - len(string.lstrip(character)) 
        
            33
            def trailing(string, character): 
        
            36
                return len(string) - len(string.rstrip(character)) 
        
            37
            class Element: 
        
            40
                def __init__(self): 
        
            41
                    self.classes = [] 
        
            42
                    self.content = None 
        
            43
                    pass 
        
            44
                def __repr__(self): 
        
            46
                    return "Void block" 
        
            47
                @property 
        
            49
                def tag_name(self): 
        
            50
                    return "m-void" 
        
            51
            class Container(Element): 
        
            54
                def __init__(self, content): 
        
            55
                    super().__init__() 
        
            56
                    self.content = parse_line(content) 
        
            57
                def __repr__(self): 
        
            59
                    return "Generic container element: " + repr(self.content) 
        
            60
            class Rule(Element): 
        
            63
                def __init__(self): 
        
            64
                    super().__init__() 
        
            65
                def __repr__(self): 
        
            67
                    return "Rule" 
        
            68
                @property 
        
            70
                def tag_name(self): 
        
            71
                    return "hr" 
        
            72
            class HardBreak(Element): 
        
            75
                def __init__(self): 
        
            76
                    super().__init__() 
        
            77
                def __repr__(self): 
        
            79
                    return "Hard break" 
        
            80
                @property 
        
            82
                def tag_name(self): 
        
            83
                    return "br" 
        
            84
            class Heading(Container): 
        
            87
                def __init__(self, content, level): 
        
            88
                    super().__init__(content) 
        
            89
                    self.level = level 
        
            90
                    pass 
        
            91
                def __repr__(self): 
        
            93
                    return f"Heading level {self.level}:\n\t" + repr(self.content) 
        
            94
                @property 
        
            96
                def tag_name(self): 
        
            97
                    return "h" + str(self.level) 
        
            98
            class Paragraph(Container): 
        
            101
                def __init__(self, content): 
        
            102
                    super().__init__("") 
        
            103
                    self.content = parse_line(content) 
        
            104
                def __repr__(self): 
        
            106
                    return "Paragraph:\n\t" + repr(self.content) 
        
            107
                @property 
        
            109
                def tag_name(self): 
        
            110
                    return "p" 
        
            111
            class CodeBlock(Element): 
        
            114
                def __init__(self, content, language="text"): 
        
            115
                    super().__init__() 
        
            116
                    self.content = content 
        
            117
                    self.language = language 
        
            118
                def __repr__(self): 
        
            120
                    return f"Code block ({self.language}):\n\t" + repr(self.content) 
        
            121
                @property 
        
            123
                def tag_name(self): 
        
            124
                    return "pre" 
        
            125
            class UnorderedList(Element): 
        
            128
                def __init__(self, content): 
        
            129
                    super().__init__() 
        
            130
                    self.content = content 
        
            131
                def __repr__(self): 
        
            133
                    return "Unordered list:\n\t" + repr(self.content) 
        
            134
                @property 
        
            136
                def tag_name(self): 
        
            137
                    return "ul" 
        
            138
            class OrderedList(Element): 
        
            141
                def __init__(self, content): 
        
            142
                    super().__init__() 
        
            143
                    self.content = content 
        
            144
                def __repr__(self): 
        
            146
                    return "Ordered list:\n\t" + repr(self.content) 
        
            147
                @property 
        
            149
                def tag_name(self): 
        
            150
                    return "ol" 
        
            151
            class ListItem(Element): 
        
            154
                def __init__(self, content): 
        
            155
                    super().__init__() 
        
            156
                    self.content = tokenise(content) 
        
            157
                def __repr__(self): 
        
            159
                    return "List item:\n\t" + repr(self.content) 
        
            160
                @property 
        
            162
                def tag_name(self): 
        
            163
                    return "li" 
        
            164
            class Blockquote(Paragraph): 
        
            167
                def __init__(self, content): 
        
            168
                    super().__init__("") 
        
            169
                    self.content = tokenise(content) 
        
            170
                def __repr__(self): 
        
            172
                    return "Blockquote:\n\t" + repr(self.content) 
        
            173
                @property 
        
            175
                def tag_name(self): 
        
            176
                    return "blockquote" 
        
            177
            class Emphasis(Container): 
        
            180
                def __init__(self, content, value): 
        
            181
                    super().__init__(content) 
        
            182
                    self.value = value 
        
            183
                    if value >= 4: 
        
            184
                        self.classes.append("emphasis-3") 
        
            185
                    if value % 4 >= 2: 
        
            186
                        self.classes.append("emphasis-2") 
        
            187
                    if value % 2: 
        
            188
                        self.classes.append("emphasis-1") 
        
            189
                def __repr__(self): 
        
            191
                    return f"Emphasis ({self.value}): " + repr(self.content) 
        
            192
                @property 
        
            194
                def tag_name(self): 
        
            195
                    return "em" if self.value == 1 else "strong" 
        
            196
            class Code(Element): 
        
            199
                def __init__(self, content): 
        
            200
                    super().__init__() 
        
            201
                    self.content = [content] 
        
            202
                def __repr__(self): 
        
            204
                    return f"Inline code: {self.content}" 
        
            205
                @property 
        
            207
                def tag_name(self): 
        
            208
                    return "code" 
        
            209
            class Strikethrough(Container): 
        
            212
                def __init__(self, content): 
        
            213
                    super().__init__(content) 
        
            214
                def __repr__(self): 
        
            216
                    return f"Strikethrough: {repr(self.content)}" 
        
            217
                @property 
        
            219
                def tag_name(self): 
        
            220
                    return "s" 
        
            221
            class Diff(Container): 
        
            224
                def __init__(self, content, value): 
        
            225
                    super().__init__(content) 
        
            226
                    self.value = value 
        
            227
                def __repr__(self): 
        
            229
                    return f"Diff ({self.value}): {self.content}" 
        
            230
                @property 
        
            232
                def tag_name(self): 
        
            233
                    return "ins" if self.value == "++" else "del" 
        
            234
            class Link(Element): 
        
            237
                def __init__(self, content, destination, image=False): 
        
            238
                    super().__init__() 
        
            239
                    self.content = parse_line(content) 
        
            240
                    self.destination = destination 
        
            241
                    self.image = image 
        
            242
                def __repr__(self): 
        
            244
                    return f"{'Image' if self.image else 'Link'}: {self.content} -> {self.destination}" 
        
            245
                @property 
        
            247
                def tag_name(self): 
        
            248
                    return "a" 
        
            249
            class Image(Link): 
        
            252
                def __init__(self, text, destination): 
        
            253
                    super().__init__(text, destination, True) 
        
            254
                @property 
        
            256
                def tag_name(self): 
        
            257
                    return "img" 
        
            258
            def parse_line(source): 
        
            261
                if trailing(source, "\\") == 1: 
        
            262
                    source = source.rstrip("\\") 
        
            263
                    hard_break = True 
        
            264
                else: 
        
            265
                    hard_break = False 
        
            266
                tokens = [] 
        
            268
                pattern = re.compile(inline_regex, re.MULTILINE | re.DOTALL | re.VERBOSE) 
        
            269
                matches = pattern.finditer(source) 
        
            270
                lookup = 0 
        
            272
                for i in matches: 
        
            273
                    l = i.start() 
        
            274
                    r = i.end() 
        
            275
                    tokens.append(source[lookup:l]) 
        
            276
                    lookup = r 
        
            278
                    if i.group("em"): 
        
            280
                        tokens.append(Emphasis(i.group("textEm"), len(i.group("em")))) 
        
            281
                    if i.group("em2"): 
        
            282
                        tokens.append(Emphasis(i.group("textEm2"), len(i.group("em2")))) 
        
            283
                    if i.group("textCode"): 
        
            284
                        tokens.append(Code(i.group("textCode"))) 
        
            285
                    if i.group("strike"): 
        
            286
                        tokens.append(Strikethrough(i.group("textStrike"))) 
        
            287
                    if i.group("diff"): 
        
            288
                        tokens.append(Diff(i.group("textDiff"), i.group("diff"))) 
        
            289
                    if i.group("urlText"): 
        
            290
                        if i.group("imageFlag"): 
        
            291
                            tokens.append(Image(i.group("urlText"), i.group("urlDestination"))) 
        
            292
                        else: 
        
            293
                            tokens.append(Link(i.group("urlText"), i.group("urlDestination"))) 
        
            294
                    if i.group("urlDestination2"): 
        
            295
                        if "://" not in i.group("urlDestination2"): 
        
            296
                            url_text = i.group("urlDestination2").partition(":")[2]    # remove tel, mailto, sms prefixes 
        
            297
                            url_destination = i.group("urlDestination2") 
        
            298
                            if url_destination.startswith("mailto:"): 
        
            299
                                url_destination = url_destination.replace("@", "@")  # prevent email harvesting 
        
            300
                                url_text = url_text.replace("@", "@")                # prevent protocol injection 
        
            301
                        else: 
        
            302
                            url_text = url_destination = i.group("urlDestination2") 
        
            303
                        tokens.append(Link(url_text, url_destination)) 
        
            305
                tokens.append(source[lookup:]) 
        
            307
                if hard_break: 
        
            309
                    tokens.append(HardBreak()) 
        
            310
                return tokens 
        
            312
            def tokenise(source): 
        
            315
                tokens = [] 
        
            316
                current_block = Element() 
        
            318
                lines = [line[1:] if line.startswith(" ") else line for line in source.split("\n")]  # remove leading spaces 
        
            320
                i = 0 
        
            322
                while i < len(lines): 
        
            323
                    line = lines[i] 
        
            324
                    if not line.strip() or line.startswith(";"): 
        
            325
                        # Void block 
        
            326
                        tokens.append(current_block) 
        
            328
                        current_block = Element() 
        
            329
                        i += 1 
        
            331
                    elif only_chars(line.strip(), "-_* ") and len(line.strip()) >= 3: 
        
            332
                        # Horizontal rule 
        
            333
                        tokens.append(current_block) 
        
            335
                        current_block = Rule() 
        
            336
                        i += 1 
        
            338
                    elif (lines[i].startswith("*") or lines[i].startswith("+") or lines[i].startswith("-")) and lines[i][1:].startswith(" "): 
        
            339
                        if not isinstance(current_block, UnorderedList): 
        
            340
                            tokens.append(current_block) 
        
            341
                        content = [] 
        
            343
                        while i < len(lines) and ((lines[i].startswith("*") or lines[i].startswith("+") or lines[i].startswith("-")) and lines[i][1:].startswith(" ")): 
        
            345
                            inner_content = lines[i][2:].strip() + "\n"      # discard marker and space 
        
            346
                            i += 1 
        
            347
                            while i < len(lines) and lines[i].strip() and not ((lines[i].startswith("*") or lines[i].startswith("+") or lines[i].startswith("-")) and lines[i][1] == " "): 
        
            348
                                inner_content += lines[i] + "\n" 
        
            349
                                i += 1 
        
            350
                            content.append(ListItem(inner_content)) 
        
            352
                        current_block = UnorderedList(content) 
        
            354
                    elif re.match(r"^\d+\.", line): 
        
            355
                        if not isinstance(current_block, UnorderedList): 
        
            356
                            tokens.append(current_block) 
        
            357
                        content = [] 
        
            359
                        while i < len(lines) and re.match(r"^ ?\d+\.", lines[i]) and len(lines[i].split(".", 1)) > 1: 
        
            361
                            inner_content = lines[i].split(".", 1)[1] + "\n"      # discard number and period 
        
            362
                            i += 1 
        
            363
                            marker_length = len(lines[i].split(".", 1)[0]) + 1 
        
            364
                            while i < len(lines) and lines[i].strip() and not re.match(r"^ ?\d+\.", lines[i]): 
        
            365
                                if re.match(r"^  \d+\.", lines[i]): 
        
            366
                                    marker_length = 2 
        
            367
                                inner_content += lines[i][marker_length:] + "\n" 
        
            368
                                i += 1 
        
            369
                            content.append(ListItem(inner_content)) 
        
            371
                        current_block = OrderedList(content) 
        
            373
                    elif line.startswith("#") and leading(line.lstrip("#"), " "): 
        
            374
                        tokens.append(current_block) 
        
            375
                        content = line.lstrip("#").strip() 
        
            377
                        current_block = Heading(content, leading(line, "#")) 
        
            378
                        i += 1 
        
            380
                    elif line.startswith(">"): 
        
            381
                        if not isinstance(current_block, Blockquote): 
        
            382
                            tokens.append(current_block) 
        
            383
                        content = "" 
        
            385
                        while i < len(lines) and (lines[i].startswith(">") or (not lines[i].startswith("#") and not lines[i].startswith(">") and lines[i].strip()) and not only_chars(line.strip(), "-_* ") and len(line.strip()) >= 3): 
        
            387
                            content += lines[i].lstrip(">") + "\n" 
        
            388
                            i += 1 
        
            389
                        current_block = Blockquote(content) 
        
            391
                    elif leading(line, "~") == 3 or leading(line, "`") == 3: 
        
            392
                        if not isinstance(current_block, CodeBlock): 
        
            393
                            tokens.append(current_block) 
        
            394
                        language = line.lstrip("`~").strip() 
        
            396
                        content = "" 
        
            398
                        i += 1        # skip the opening fence 
        
            399
                        while i < len(lines) and not lines[i].strip() in ("```", "~~~"): 
        
            400
                            content += lines[i] + "\n" 
        
            401
                            i += 1 
        
            402
                        if i < len(lines): 
        
            404
                            i += 1    # prevent a new block from beginning with the closing fence 
        
            405
                        current_block = CodeBlock(content, language=language) 
        
            407
                    elif i < len(lines) - 1 and (only_chars(lines[i+1].strip(), "=") or only_chars(lines[i+1].strip(), "-")) and lines[i+1].strip(): 
        
            408
                        tokens.append(current_block) 
        
            409
                        content = line.strip() 
        
            411
                        current_block = Heading(content, 1 if lines[i+1].startswith("=") else 2) 
        
            412
                        i += 2 
        
            414
                    else: 
        
            415
                        if not isinstance(current_block, Paragraph): 
        
            416
                            # Create a paragraph, if there is no other specifier 
        
            417
                            tokens.append(current_block) 
        
            418
                        content = "" 
        
            420
                        while (i < len(lines) 
        
            422
                               and not lines[i].startswith("#") 
        
            423
                               and not lines[i].startswith(">") 
        
            424
                               and not lines[i].startswith(";") 
        
            425
                               and not lines[i].startswith("* ") 
        
            426
                               and not lines[i].startswith("+ ") 
        
            427
                               and not lines[i].startswith("- ") 
        
            428
                               and not lines[i].startswith("~~~") 
        
            429
                               and not lines[i].startswith("```") 
        
            430
                               and not re.match(r"^\d+\.", lines[i]) 
        
            431
                               and lines[i].strip()): 
        
            432
                            content += lines[i].strip() + "\n" 
        
            433
                            i += 1 
        
            434
                        current_block = Paragraph(content) 
        
            436
                tokens.append(current_block) 
        
            438
                return tokens 
        
            440
            def make_html(ast): 
        
            443
                soup = beautifulsoup.BeautifulSoup() 
        
            444
                for i in ast: 
        
            445
                    # Use bs4 to generate HTML 
        
            446
                    if isinstance(i, str): 
        
            447
                        soup.append(i) 
        
            448
                    elif hasattr(i, "content") and i.tag_name != "m-void": 
        
            449
                        tag = soup.new_tag(str(i.tag_name)) 
        
            450
                        if i.tag_name == "a": 
        
            451
                            tag["href"] = i.destination 
        
            452
                        if i.tag_name == "img": 
        
            453
                            tag["src"] = i.destination 
        
            454
                            tag["alt"] = " ".join(i.content) 
        
            455
                        if i.tag_name == "pre": 
        
            456
                            tag["data-language"] = i.language 
        
            457
                        if i.classes: 
        
            458
                            tag["class"] = " ".join(i.classes) 
        
            459
                        try: 
        
            460
                            if isinstance(i.content, list): 
        
            461
                                tag.append(make_html(i.content)) 
        
            462
                            elif i.content and i.tag_name != "img": 
        
            463
                                tag.string = i.content 
        
            464
                            if i.tag_name == "img": 
        
            466
                                tag.string = "" 
        
            467
                        except AttributeError as exc: 
        
            468
                            # print(i) 
        
            469
                            print(exc, file=sys.stderr) 
        
            470
                        soup.append(tag) 
        
            471
                return soup 
        
            472
            def markdown2html(markdown): 
        
            475
                return make_html(tokenise(markdown)) 
        
            476