Markdown regex

by roundabout, Wednesday, 17 January 2024, 17:36:12 (1705512972), pushed by roundabout, Wednesday, 31 July 2024, 06:54:41 (1722408881)

Author identity: vlad <vlad.muntoiu@gmail.com>

5944550ea57fcbed2226dfc3f9bc9edc0517a2ad

markdown.py

@@ -1,6 +1,19 @@

                                
                                
                                
                            
                                
                                    
                                        
                                            
                                            import re
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                            
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                            
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        inlineRegex = r"""
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        (?P<em>[*_]{1,7}) (?P<textEm>(?:\\[*]|[^*])*) (?P=em)                              # emphasis
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        |
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        [`] (?P<textCode>(?:\\[`]|[^`])*) [`]                                              # inline code
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        |
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        (?P<imageFlag>!?) \[ (?P<urlText>[^\[\]]*) \] \((?P<urlDestination>[^\(\)]*)\)     # hyperlink or media
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        |
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        (?P<strike>~{2}) (?P<textStrike>(?:\\[~]|[^~])*) (~{2})                            # strikethrough
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        |
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        (?P<diff>\-\-|\+\+) (?P<textDiff>(?:\\[-+]|[^-+])*) (?P=diff)                      # diffs
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        """
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                            def leading(string, character):
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                                return len(string) - len(string.lstrip(character))
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                            
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        

@@ -40,6 +53,25 @@ class Paragraph(Element):

                                
                                
                                
                            
                                
                                    
                                        
                                            
                                                    return "Paragraph:\n\t" + self.content
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                            
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                            
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        class Emphasis(Element):
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            def __init__(self, content, bold, italic):
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                                super().__init__()
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                                self.content = content
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                                self.bold = bold
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                                self.italic = italic
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            def __repr__(self):
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                                return f"Emphasis (bold {self.bold}, italic {self.italic}):\n\t" + self.content
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        def _parseLine(source):
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            if trailing(source, "\\") == 1:
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                                source = source.rstrip("\\")
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                                source += "\n"
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            tokens = [source]
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                            def _tokenise(source):
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                                tokens = []
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                            
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        

@@ -58,6 +90,8 @@ def _tokenise(source):

                                
                                
                                
                            
                                
                                    
                                        
                                            
                                                        currentBlock = Heading(content, leading(line, "#"))
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                                    else:
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                                        if not isinstance(currentBlock, Paragraph):
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                                        # Paragraph is default
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                                            tokens.append(currentBlock)
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                                            currentBlock = Paragraph("")
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                            
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        

@@ -77,7 +111,9 @@ for i in _tokenise(

                                
                                
                                
                            
                                
                                    
                                        
                                            
                                            ##### Article 1
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                            ###### Section 1
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                            Lorem ipsum
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        dolor sit amet"""
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        dolor sit amet
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        ..."""
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                            ):
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                                print(repr(i))