rendercomment: safer auto-linkification of URLs

Fixes a few edge cases:

- URLs within code blocks used to get redundant <> added, breaking bash
  code snippets like `curl https://...` into `curl <https://...>`.

- Links written with markdown's <https://...> syntax also used to get an
  extra pair of brackets.

Signed-off-by: Lukas Fleischer <lfleischer@archlinux.org>
This commit is contained in:
Frédéric Mangano-Tarumi 2020-02-01 19:07:41 +01:00 committed by Lukas Fleischer
parent 0fc69e96bd
commit 199f34e42e
2 changed files with 25 additions and 11 deletions

View file

@ -13,17 +13,20 @@ repo_path = aurweb.config.get('serve', 'repo-path')
commit_uri = aurweb.config.get('options', 'commit_uri')
class LinkifyPreprocessor(markdown.preprocessors.Preprocessor):
_urlre = re.compile(r'(\b(?:https?|ftp):\/\/[\w\/\#~:.?+=&%@!\-;,]+?'
r'(?=[.:?\-;,]*(?:[^\w\/\#~:.?+=&%@!\-;,]|$)))')
def run(self, lines):
return [self._urlre.sub(r'<\1>', line) for line in lines]
class LinkifyExtension(markdown.extensions.Extension):
"""
Turn URLs into links, even without explicit markdown.
Do not linkify URLs in code blocks.
"""
# Captures http(s) and ftp URLs until the first non URL-ish character.
# Excludes trailing punctuation.
_urlre = (r'(\b(?:https?|ftp):\/\/[\w\/\#~:.?+=&%@!\-;,]+?'
r'(?=[.:?\-;,]*(?:[^\w\/\#~:.?+=&%@!\-;,]|$)))')
def extendMarkdown(self, md, md_globals):
md.preprocessors.add('linkify', LinkifyPreprocessor(md), '_end')
processor = markdown.inlinepatterns.AutolinkInlineProcessor(self._urlre, md)
md.inlinePatterns.add('linkify', processor, '_end')
class FlysprayLinksPreprocessor(markdown.preprocessors.Preprocessor):

View file

@ -51,11 +51,22 @@ test_expect_success 'Test HTML sanitizing.' '
test_expect_success 'Test link conversion.' '
cat <<-EOD | sqlite3 aur.db &&
INSERT INTO PackageComments (ID, PackageBaseID, Comments, RenderedComment) VALUES (4, 1, "Visit https://www.archlinux.org/.", "");
INSERT INTO PackageComments (ID, PackageBaseID, Comments, RenderedComment) VALUES (4, 1, "
Visit https://www.archlinux.org/.
Visit <https://www.archlinux.org/>.
Visit \`https://www.archlinux.org/\`.
Visit [Arch Linux](https://www.archlinux.org/).
Visit [Arch Linux][arch].
[arch]: https://www.archlinux.org/
", "");
EOD
"$RENDERCOMMENT" 4 &&
cat <<-EOD >expected &&
<p>Visit <a href="https://www.archlinux.org/">https://www.archlinux.org/</a>.</p>
<p>Visit <a href="https://www.archlinux.org/">https://www.archlinux.org/</a>.
Visit <a href="https://www.archlinux.org/">https://www.archlinux.org/</a>.
Visit <code>https://www.archlinux.org/</code>.
Visit <a href="https://www.archlinux.org/">Arch Linux</a>.
Visit <a href="https://www.archlinux.org/">Arch Linux</a>.</p>
EOD
cat <<-EOD | sqlite3 aur.db >actual &&
SELECT RenderedComment FROM PackageComments WHERE ID = 4;