roundabout,
created on Sunday, 2 February 2025, 19:34:31 (1738524871),
received on Sunday, 2 February 2025, 19:36:58 (1738525018)
Author identity: vlad <vlad.muntoiu@gmail.com>
28efe5148f90064c56a41b53146aad3a315ad013
ampoule_ssg/markdown.py
@@ -1,3 +1,26 @@
""" This is a parser for a Markdown-like language, but it isn't compatible with the CommonMark specification; check doc/enduser/Formatting messages.md for its syntax. Roundabout - git hosting for everyone <https://roundabout-host.com> Copyright (C) 2023-2025 Roundabout developers <root@roundabout-host.com> This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. """import re import bs4 as beautifulsoup import sys
@@ -12,10 +35,14 @@ def only_chars(string, chars):
inline_regex = r""" (?P<imageFlag>!?) \[ (?P<urlText>[^\[\]]*) \] \((?P<urlDestination>[^\(\)]*)\) # hyperlink or media | <(?P<urlDestination2>[^<>]*)> # autolink |(?P<em>\*{1,7}) (?P<textEm>(?:\\\*|[^*])*) (?P=em) # emphasis with * not requiring space on either side | (?:^|\s)(?P<em2>_{1,7}) (?P<textEm2>(?:\\.|[^*])*) (?P=em2)(?=\s|$) # emphasis with _ requiring space on at least one side | [``] (?P<textCode2>(?:\\[``]|[^``])*) [``] # inline code (2 backticks) |[`] (?P<textCode>(?:\\[`]|[^`])*) [`] # inline code | (?P<strike>~{2}) (?P<textStrike>(?:\\[~]|[^~])*) (~{2}) # strikethrough
@@ -278,6 +305,8 @@ def parse_line(source):
tokens.append(Emphasis(i.group("textEm2"), len(i.group("em2")))) if i.group("textCode"): tokens.append(Code(i.group("textCode"))) if i.group("textCode2"): tokens.append(Code(i.group("textCode2")))if i.group("strike"): tokens.append(Strikethrough(i.group("textStrike"))) if i.group("diff"):
@@ -287,6 +316,17 @@ def parse_line(source):
tokens.append(Image(i.group("urlText"), i.group("urlDestination"))) else: tokens.append(Link(i.group("urlText"), i.group("urlDestination"))) if i.group("urlDestination2"): if "://" not in i.group("urlDestination2"): url_text = i.group("urlDestination2").partition(":")[2] # remove tel, mailto, sms prefixes url_destination = i.group("urlDestination2") if url_destination.startswith("mailto:"): url_destination = url_destination.replace("@", "@") # prevent email harvesting url_text = url_text.replace("@", "@") # prevent protocol injection else: url_text = url_destination = i.group("urlDestination2") tokens.append(Link(url_text, url_destination))tokens.append(source[lookup:])
@@ -347,9 +387,7 @@ def tokenise(source):
i += 1 marker_length = len(lines[i].split(".", 1)[0]) + 1 while i < len(lines) and lines[i].strip() and not re.match(r"^ ?\d+\.", lines[i]): if re.match(r"^ \d+\.", lines[i]):marker_length = 2inner_content += lines[i][marker_length:] + "\n"inner_content += lines[i][2:] + "\n"i += 1 content.append(ListItem(inner_content))