diff --git a/find_posts.py b/find_posts.py index 82cd002a..2ddfbce5 100644 --- a/find_posts.py +++ b/find_posts.py @@ -628,6 +628,11 @@ def parse_url(url, parsed_urls): if match is not None: parsed_urls[url] = match + if url not in parsed_urls: + match = parse_pleroma_uri(url) + if match is not None: + parsed_urls[url] = match + if url not in parsed_urls: match = parse_lemmy_url(url) if match is not None: @@ -696,6 +701,13 @@ def parse_pleroma_url(url): return None return None +def parse_pleroma_uri(uri): + """parse a Pleroma URL and return the server and ID""" + match = re.match(r"https://(?P[^/]+)/notice/(?P[^/]+)", uri) + if match is not None: + return (match.group("server"), match.group("toot_id")) + return None + def parse_pleroma_profile_url(url): """parse a Pleroma Profile URL and return the server and username""" match = re.match(r"https://(?P[^/]+)/users/(?P[^/]+)", url) diff --git a/tests/test_find_posts.py b/tests/test_find_posts.py index 6e633d53..eb345bdc 100644 --- a/tests/test_find_posts.py +++ b/tests/test_find_posts.py @@ -39,9 +39,11 @@ parse_pixelfed_profile_url, parse_pixelfed_url, parse_pleroma_url, + parse_pleroma_uri, post, set_server_apis, user_has_opted_out, + parse_url ) @@ -874,6 +876,10 @@ def test_parse_pleroma_url(mock_get_redirect_url): result = parse_pleroma_url("https://different.example.com/objects/111") assert result == ("different.example.com", "789") +def test_parse_pleroma_uri(): + # Test that a valid URI is correctly parsed + uri = "https://friedcheese.us/notice/Arv4zBVnAR84mmkVay" + assert parse_pleroma_uri(uri) == ("friedcheese.us", "Arv4zBVnAR84mmkVay") import re import pytest @@ -951,6 +957,25 @@ def test_parse_peertube_url_valid(): # assert that the result is as expected assert result == expected +def test_parse_url(): + tests = [ + ( + "https://video.infosec.exchange/videos/watch/56f1d0b5-d98f-4bad-b1e7-648ae074ab9d", + ("video.infosec.exchange", "56f1d0b5-d98f-4bad-b1e7-648ae074ab9d") + ), + ( + "https://veedeo.org/videos/watch/a51bb77c-e1bd-4d6a-b119-95af176f6d66", + ("veedeo.org", "a51bb77c-e1bd-4d6a-b119-95af176f6d66") + ), + ( + 'https://foo.bar/nothing', + None + ) + ] + for (url,expected) in tests: + result = parse_url(url, {}) + assert result == expected + def test_parse_peertube_url_invalid(): # define an invalid url