Fix ol parsing in roundabout/ampoule

ampoule_ssg/markdown.py

@@ -1,3 +1,26 @@

                                
                                
                                
                            
                                
                                    
                                        
                                        """
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        This is a parser for a Markdown-like language, but it isn't compatible with
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        the CommonMark specification; check doc/enduser/Formatting messages.md for
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        its syntax.
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        Roundabout - git hosting for everyone <https://roundabout-host.com>
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        Copyright (C) 2023-2025 Roundabout developers <root@roundabout-host.com>
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        This program is free software: you can redistribute it and/or modify
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        it under the terms of the GNU Affero General Public License as published by
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        the Free Software Foundation, either version 3 of the License, or
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        (at your option) any later version.
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        This program is distributed in the hope that it will be useful,
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        but WITHOUT ANY WARRANTY; without even the implied warranty of
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        GNU Affero General Public License for more details.
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        You should have received a copy of the GNU Affero General Public License
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        along with this program.  If not, see <http://www.gnu.org/licenses/>.
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        """
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                            import re
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                            import bs4 as beautifulsoup
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                            import sys

@@ -12,10 +35,14 @@ def only_chars(string, chars):

                                
                                
                                
                            
                                
                                    
                                        
                                            
                                            inline_regex = r"""
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                            (?P<imageFlag>!?) \[ (?P<urlText>[^\[\]]*) \] \((?P<urlDestination>[^\(\)]*)\)     # hyperlink or media
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                            |
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        <(?P<urlDestination2>[^<>]*)>                                                      # autolink
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        |
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                            (?P<em>\*{1,7}) (?P<textEm>(?:\\\*|[^*])*) (?P=em)                                 # emphasis with * not requiring space on either side
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                            |
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                            (?:^|\s)(?P<em2>_{1,7}) (?P<textEm2>(?:\\.|[^*])*) (?P=em2)(?=\s|$)                # emphasis with _ requiring space on at least one side
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                            |
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        [``] (?P<textCode2>(?:\\[``]|[^``])*) [``]                                          # inline code (2 backticks)
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        |
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                            [`] (?P<textCode>(?:\\[`]|[^`])*) [`]                                              # inline code
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                            |
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                            (?P<strike>~{2}) (?P<textStrike>(?:\\[~]|[^~])*) (~{2})                            # strikethrough

@@ -278,6 +305,8 @@ def parse_line(source):

                                
                                
                                
                            
                                
                                    
                                        
                                            
                                                        tokens.append(Emphasis(i.group("textEm2"), len(i.group("em2"))))
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                                    if i.group("textCode"):
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                                        tokens.append(Code(i.group("textCode")))
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                                if i.group("textCode2"):
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                                    tokens.append(Code(i.group("textCode2")))
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                                    if i.group("strike"):
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                                        tokens.append(Strikethrough(i.group("textStrike")))
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                                    if i.group("diff"):

@@ -287,6 +316,17 @@ def parse_line(source):

                                
                                
                                
                            
                                
                                    
                                        
                                            
                                                            tokens.append(Image(i.group("urlText"), i.group("urlDestination")))
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                                        else:
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                                            tokens.append(Link(i.group("urlText"), i.group("urlDestination")))
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                                if i.group("urlDestination2"):
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                                    if "://" not in i.group("urlDestination2"):
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                                        url_text = i.group("urlDestination2").partition(":")[2]    # remove tel, mailto, sms prefixes
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                                        url_destination = i.group("urlDestination2")
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                                        if url_destination.startswith("mailto:"):
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                                            url_destination = url_destination.replace("@", "&#64;")  # prevent email harvesting
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                                            url_text = url_text.replace("@", "&#64;")                # prevent protocol injection
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                                    else:
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                                        url_text = url_destination = i.group("urlDestination2")
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                        
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                                    tokens.append(Link(url_text, url_destination))
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                            
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                                tokens.append(source[lookup:])

@@ -347,9 +387,7 @@ def tokenise(source):

                                
                                
                                
                            
                                
                                    
                                        
                                            
                                                            i += 1
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                                            marker_length = len(lines[i].split(".", 1)[0]) + 1
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                                            while i < len(lines) and lines[i].strip() and not re.match(r"^ ?\d+\.", lines[i]):
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                                            if re.match(r"^  \d+\.", lines[i]):
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                                                marker_length = 2
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                                            inner_content += lines[i][marker_length:] + "\n"
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                                            inner_content += lines[i][2:] + "\n"
                                        
                                        
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                                                i += 1
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                            
                                        
                                        
                                            
                                            
                                            
                                            
                                        
                                    
                                
                                
                                
                            
                                
                                    
                                        
                                            
                                                            content.append(ListItem(inner_content))

Information

Fix ol parsing

ampoule_ssg/markdown.py

@@ -1,3 +1,26 @@

@@ -12,10 +35,14 @@ def only_chars(string, chars):

@@ -278,6 +305,8 @@ def parse_line(source):

@@ -287,6 +316,17 @@ def parse_line(source):

@@ -347,9 +387,7 @@ def tokenise(source):

Add comment