Skip to content

Commit

Permalink
fix: Skip NavigableString in HTML parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
higuhigu-lb committed Dec 3, 2024
1 parent 33cff98 commit dba0e47
Showing 1 changed file with 3 additions and 1 deletion.
4 changes: 3 additions & 1 deletion docling/backend/html_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from pathlib import Path
from typing import Set, Union

from bs4 import BeautifulSoup
from bs4 import BeautifulSoup, NavigableString
from docling_core.types.doc import (
DocItemLabel,
DoclingDocument,
Expand Down Expand Up @@ -92,6 +92,8 @@ def walk(self, element, doc):
try:
# Iterate over elements in the body of the document
for idx, element in enumerate(element.children):
if isinstance(element, NavigableString):
continue # Skip over navigable strings
try:
self.analyse_element(element, idx, doc)
except Exception as exc_child:
Expand Down

0 comments on commit dba0e47

Please sign in to comment.