From e3756891780a5a9bef4fcbc2169893df1b68bcac Mon Sep 17 00:00:00 2001 From: Alex Vandiver Date: Tue, 16 Jan 2024 17:09:32 -0500 Subject: [PATCH] Improve support for null atttibute values (#406) * img: Support null "src" attributes, rather than fail an assert. * img: Support null "width" and "height" attributes, rather than assert. * links: Support null "title" attribute with --reference-links. --- html2text/__init__.py | 12 ++++-------- test/empty-img-src.html | 1 + test/empty-img-src.md | 2 ++ test/images_with_size.html | 4 ++++ test/images_with_size.md | 5 ++++- test/no_inline_links_example.html | 1 + test/no_inline_links_example.md | 4 +++- 7 files changed, 19 insertions(+), 10 deletions(-) create mode 100644 test/empty-img-src.html create mode 100644 test/empty-img-src.md diff --git a/html2text/__init__.py b/html2text/__init__.py index ae4e154..9054388 100644 --- a/html2text/__init__.py +++ b/html2text/__init__.py @@ -537,8 +537,7 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None: self.o("][" + str(a_props.count) + "]") if tag == "img" and start and not self.ignore_images: - if "src" in attrs: - assert attrs["src"] is not None + if "src" in attrs and attrs["src"] is not None: if not self.images_to_alt: attrs["href"] = attrs["src"] alt = attrs.get("alt") or self.default_image_alt @@ -549,11 +548,9 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None: self.images_with_size and ("width" in attrs or "height" in attrs) ): self.o(" diff --git a/test/empty-img-src.md b/test/empty-img-src.md new file mode 100644 index 0000000..139597f --- /dev/null +++ b/test/empty-img-src.md @@ -0,0 +1,2 @@ + + diff --git a/test/images_with_size.html b/test/images_with_size.html index fcda9b6..3fccec1 100644 --- a/test/images_with_size.html +++ b/test/images_with_size.html @@ -8,3 +8,7 @@ + + + + diff --git a/test/images_with_size.md b/test/images_with_size.md index c0548a9..7678ffa 100644 --- a/test/images_with_size.md +++ b/test/images_with_size.md @@ -3,4 +3,7 @@ src='image_with_width.jpg' width='300' alt='An image with a width attr' /> An image with a height attr An
 image with width and height ![](image_with_width_and_height.jpg) +width='300' height='300' /> ![](image_with_width_and_height.jpg) diff --git a/test/no_inline_links_example.html b/test/no_inline_links_example.html index 5e4c45c..e7d428f 100644 --- a/test/no_inline_links_example.html +++ b/test/no_inline_links_example.html @@ -7,3 +7,4 @@ link text +Empty link title diff --git a/test/no_inline_links_example.md b/test/no_inline_links_example.md index c000b59..bd11d62 100644 --- a/test/no_inline_links_example.md +++ b/test/no_inline_links_example.md @@ -1,8 +1,10 @@ [Googler][1] No href No href but title available [ Example][2] [ [ [ link text -][3]][3]][3] +][3]][3]][3] [Empty link title][4] [1]: http://google.com [2]: http://example.com (Example title) [3]: http://example.com (abc) + + [4]: http://example.com