Skip to content

Commit

Permalink
Improve support for null atttibute values (#406)
Browse files Browse the repository at this point in the history
* img: Support null "src" attributes, rather than fail an assert.
* img: Support null "width" and "height" attributes, rather than assert.
* links: Support null "title" attribute with --reference-links.
  • Loading branch information
alexmv authored Jan 16, 2024
1 parent 7ba8431 commit e375689
Show file tree
Hide file tree
Showing 7 changed files with 19 additions and 10 deletions.
12 changes: 4 additions & 8 deletions html2text/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,8 +537,7 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None:
self.o("][" + str(a_props.count) + "]")

if tag == "img" and start and not self.ignore_images:
if "src" in attrs:
assert attrs["src"] is not None
if "src" in attrs and attrs["src"] is not None:
if not self.images_to_alt:
attrs["href"] = attrs["src"]
alt = attrs.get("alt") or self.default_image_alt
Expand All @@ -549,11 +548,9 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None:
self.images_with_size and ("width" in attrs or "height" in attrs)
):
self.o("<img src='" + attrs["src"] + "' ")
if "width" in attrs:
assert attrs["width"] is not None
if "width" in attrs and attrs["width"] is not None:
self.o("width='" + attrs["width"] + "' ")
if "height" in attrs:
assert attrs["height"] is not None
if "height" in attrs and attrs["height"] is not None:
self.o("height='" + attrs["height"] + "' ")
if alt:
self.o("alt='" + alt + "' ")
Expand Down Expand Up @@ -826,8 +823,7 @@ def o(
+ "]: "
+ urlparse.urljoin(self.baseurl, link.attrs["href"])
)
if "title" in link.attrs:
assert link.attrs["title"] is not None
if "title" in link.attrs and link.attrs["title"] is not None:
self.out(" (" + link.attrs["title"] + ")")
self.out("\n")
else:
Expand Down
1 change: 1 addition & 0 deletions test/empty-img-src.html
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<img alt src width="1234" height="auto">
2 changes: 2 additions & 0 deletions test/empty-img-src.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@


4 changes: 4 additions & 0 deletions test/images_with_size.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,7 @@
<img src='image_with_width_and_height.jpg' width='300' height='300' id='ignored-id' />
<img src='image_with_width_and_height.jpg' id='ignored-id' />
<img id='ignored-id' />

<img src='image_with_no_width_value.jpg' width height='123' id='ignored-id' />
<img src='image_with_no_height_value.jpg' width='123' height id='ignored-id' />
<img src='image_with_no_dimention_values.jpg' width height id='ignored-id' />
5 changes: 4 additions & 1 deletion test/images_with_size.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,7 @@ src='image_with_width.jpg' width='300' alt='An image with a width attr' />
<img src='image_with_width.jpg' height='300' alt='An image with a height attr'
/> <img src='image_with_width_and_height.jpg' width='300' height='300' alt='An
image with width and height' /> <img src='image_with_width_and_height.jpg'
width='300' height='300' /> ![](image_with_width_and_height.jpg)
width='300' height='300' /> ![](image_with_width_and_height.jpg) <img
src='image_with_no_width_value.jpg' height='123' /> <img
src='image_with_no_height_value.jpg' width='123' /> <img
src='image_with_no_dimention_values.jpg' />
1 change: 1 addition & 0 deletions test/no_inline_links_example.html
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@
<a href="http://example.com" title="abc">
link text
</a></a></a>
<a href='http://example.com' title>Empty link title</a>
4 changes: 3 additions & 1 deletion test/no_inline_links_example.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
[Googler][1] No href No href but title available [ Example][2] [ [ [ link text
][3]][3]][3]
][3]][3]][3] [Empty link title][4]

[1]: http://google.com

[2]: http://example.com (Example title)

[3]: http://example.com (abc)

[4]: http://example.com

0 comments on commit e375689

Please sign in to comment.