Skip to content

Commit

Permalink
Remove forum anchor links (#102)
Browse files Browse the repository at this point in the history
  • Loading branch information
jessica-cheng authored Oct 25, 2024
1 parent 6ef68e6 commit 0034558
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 5 deletions.
7 changes: 5 additions & 2 deletions portal-backend/depmap/public/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,17 @@ def create_sanitizer() -> Sanitizer:
# The discourse api returns non-image attachments with ONLY relative urls.
# Example: /uploads/short-url/random_letters_and_numbers.pdf
# This function adds a prefix equal to the url of the relevant forum (DMC vs public).
def expand_forum_relative_urls(forum_url: str, html: str):
def modify_forum_relative_urls(forum_url: str, html: str):
# html.parser must be used to avoid automatically adding <html> and <body>
# tags to the post html
soup = BeautifulSoup(str(html), features="html.parser")
for a in soup.findAll("a", href=True):
if str(a["href"]).startswith("/"):
short_link = str(a["href"])
a["href"] = urljoin(forum_url, short_link)
# remove link anchors to forum post
elif str(a["href"]).startswith("#p"):
a.extract()
# add attribute to open links in new tab
a["target"] = "_blank"
return str(soup)
Expand Down Expand Up @@ -110,7 +113,7 @@ def modify_html(
forum_url: str,
):
sanitized_html = sanitizer.sanitize(topic_html)
modified_urls_html = expand_forum_relative_urls(forum_url, sanitized_html)
modified_urls_html = modify_forum_relative_urls(forum_url, sanitized_html)
added_forum_link_html = add_forum_link_to_html(
forum_url, topic_id, topic_slug, modified_urls_html
)
Expand Down
15 changes: 12 additions & 3 deletions portal-backend/tests/depmap/public/test_resources.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from depmap.public.resources import expand_forum_relative_urls, add_forum_link_to_html
from depmap.public.resources import modify_forum_relative_urls, add_forum_link_to_html


def test_expand_forum_relative_urls():
Expand All @@ -11,15 +11,15 @@ def test_expand_forum_relative_urls():
expected_output_html = f'<a class="attachment" href="{expected_long_url1}" target="_blank">test_random_non_image_pdf.pdf</a> (13.9 KB)'

# Checks full link html element
html_with_absolute_urls = expand_forum_relative_urls(
html_with_absolute_urls = modify_forum_relative_urls(
"https://forum.depmap.org", input_html
)

assert html_with_absolute_urls == expected_output_html

# Checks only href value
input_html = '<a href="depmap.org/portal">test</a>'
expect_no_change_html = expand_forum_relative_urls(
expect_no_change_html = modify_forum_relative_urls(
"https://forum.depmap.org", input_html
)

Expand All @@ -37,3 +37,12 @@ def test_add_forum_link_to_html():
assert 'href="https://forum.depmap.org/t/topic-slug/1"' in added_forum_link_to_html
assert "</p><p><a" in added_forum_link_to_html
assert added_forum_link_to_html.endswith("</a></p>")


def test_remove_anchor_links():
input_html = f'<h1><a href="#p-100-welcome" name="p-100-welcome"></a>Welcome to the Resources Page!</h1>'
expected_output_html = f"<h1>Welcome to the Resources Page!</h1>"

# Checks full link html element
modified_html = modify_forum_relative_urls("https://forum.depmap.org", input_html)
assert expected_output_html == modified_html

0 comments on commit 0034558

Please sign in to comment.