Skip to content

Commit aa430d3

Browse files
cau-gitvagenas
andauthored
feat: New labels for CVAT annotation (#314)
* New labels and utility for CVAT annotation Signed-off-by: Christoph Auer <[email protected]> * Add EMPTY_VALUE to TextItem Signed-off-by: Christoph Auer <[email protected]> * Small fixes Signed-off-by: Christoph Auer <[email protected]> * align usage of content layer param (#326) * align usage of content layer param Signed-off-by: Panos Vagenas <[email protected]> * parametrize content layers in visualizers Signed-off-by: Panos Vagenas <[email protected]> --------- Signed-off-by: Panos Vagenas <[email protected]> --------- Signed-off-by: Christoph Auer <[email protected]> Signed-off-by: Panos Vagenas <[email protected]> Signed-off-by: Christoph Auer <[email protected]> Co-authored-by: Panos Vagenas <[email protected]>
1 parent 0070e2e commit aa430d3

File tree

9 files changed

+650
-211
lines changed

9 files changed

+650
-211
lines changed

docling_core/transforms/visualizer/layout_visualizer.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ class Params(BaseModel):
4040
"""Layout visualization parameters."""
4141

4242
show_label: bool = True
43+
content_layers: set[ContentLayer] = {cl for cl in ContentLayer}
4344

4445
base_visualizer: Optional[BaseVisualizer] = None
4546
params: Params = Params()

docling_core/transforms/visualizer/reading_order_visualizer.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
from PIL import ImageDraw
77
from PIL.Image import Image
8+
from pydantic import BaseModel
89
from typing_extensions import override
910

1011
from docling_core.transforms.visualizer.base import BaseVisualizer
@@ -14,7 +15,16 @@
1415
class ReadingOrderVisualizer(BaseVisualizer):
1516
"""Reading order visualizer."""
1617

18+
class Params(BaseModel):
19+
"""Layout visualization parameters."""
20+
21+
show_label: bool = True
22+
content_layers: set[ContentLayer] = {
23+
cl for cl in ContentLayer if cl != ContentLayer.BACKGROUND
24+
}
25+
1726
base_visualizer: Optional[BaseVisualizer] = None
27+
params: Params = Params()
1828

1929
def _draw_arrow(
2030
self,
@@ -71,7 +81,7 @@ def _draw_doc_reading_order(
7181
my_images: dict[Optional[int], Image] = images or {}
7282
prev_page = None
7383
for elem, _ in doc.iterate_items(
74-
included_content_layers={ContentLayer.BODY, ContentLayer.FURNITURE},
84+
included_content_layers=self.params.content_layers,
7585
):
7686
if not isinstance(elem, DocItem):
7787
continue

docling_core/types/doc/document.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -623,6 +623,7 @@ class ContentLayer(str, Enum):
623623

624624
BODY = "body"
625625
FURNITURE = "furniture"
626+
BACKGROUND = "background"
626627

627628

628629
DEFAULT_CONTENT_LAYERS = {ContentLayer.BODY}
@@ -860,6 +861,7 @@ class TextItem(DocItem):
860861
DocItemLabel.PARAGRAPH,
861862
DocItemLabel.REFERENCE,
862863
DocItemLabel.TEXT,
864+
DocItemLabel.EMPTY_VALUE,
863865
]
864866

865867
orig: str # untreated representation
@@ -2867,23 +2869,46 @@ def _with_pictures_refs(
28672869

28682870
def print_element_tree(self):
28692871
"""Print_element_tree."""
2870-
for ix, (item, level) in enumerate(self.iterate_items(with_groups=True)):
2872+
for ix, (item, level) in enumerate(
2873+
self.iterate_items(
2874+
with_groups=True,
2875+
traverse_pictures=True,
2876+
included_content_layers={cl for cl in ContentLayer},
2877+
)
2878+
):
28712879
if isinstance(item, GroupItem):
28722880
print(
28732881
" " * level,
28742882
f"{ix}: {item.label.value} with name={item.name}",
28752883
)
2884+
elif isinstance(item, TextItem):
2885+
print(
2886+
" " * level,
2887+
f"{ix}: {item.label.value}: {item.text[:min(len(item.text), 100)]}",
2888+
)
2889+
28762890
elif isinstance(item, DocItem):
28772891
print(" " * level, f"{ix}: {item.label.value}")
28782892

28792893
def export_to_element_tree(self) -> str:
28802894
"""Export_to_element_tree."""
28812895
texts = []
2882-
for ix, (item, level) in enumerate(self.iterate_items(with_groups=True)):
2896+
for ix, (item, level) in enumerate(
2897+
self.iterate_items(
2898+
with_groups=True,
2899+
traverse_pictures=True,
2900+
included_content_layers={cl for cl in ContentLayer},
2901+
)
2902+
):
28832903
if isinstance(item, GroupItem):
28842904
texts.append(
28852905
" " * level + f"{ix}: {item.label.value} with name={item.name}"
28862906
)
2907+
elif isinstance(item, TextItem):
2908+
texts.append(
2909+
" " * level
2910+
+ f"{ix}: {item.label.value}: {item.text[:min(len(item.text), 100)]}"
2911+
)
28872912
elif isinstance(item, DocItem):
28882913
texts.append(" " * level + f"{ix}: {item.label.value}")
28892914

docling_core/types/doc/labels.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ class DocItemLabel(str, Enum):
2727
KEY_VALUE_REGION = "key_value_region"
2828
GRADING_SCALE = "grading_scale" # for elements in forms, questionaires representing a grading scale
2929
# e.g. [strongly disagree | ... | ... | strongly agree]
30+
# e.g. ★★☆☆☆
31+
HANDWRITTEN_TEXT = "handwritten_text"
32+
EMPTY_VALUE = "empty_value" # used for empty value fields in fillable forms
3033

3134
# Additional labels for markup-based formats (e.g. HTML, Word)
3235
PARAGRAPH = "paragraph"
@@ -60,6 +63,9 @@ def get_color(label: "DocItemLabel") -> Tuple[int, int, int]:
6063
DocItemLabel.KEY_VALUE_REGION: (183, 65, 14),
6164
DocItemLabel.PARAGRAPH: (255, 255, 153),
6265
DocItemLabel.REFERENCE: (176, 224, 230),
66+
DocItemLabel.GRADING_SCALE: (255, 204, 204),
67+
DocItemLabel.HANDWRITTEN_TEXT: (204, 255, 204),
68+
DocItemLabel.EMPTY_VALUE: (220, 220, 220),
6369
}
6470
return color_map.get(label, (0, 0, 0))
6571

@@ -166,7 +172,6 @@ class GraphCellLabel(str, Enum):
166172
KEY = "key" # used to designate a key (label) of a key-value element
167173
VALUE = "value" # Data value with or without explicit Key, but filled in,
168174
# e.g. telephone number, address, quantity, name, date
169-
EMPTY_VALUE = "empty_value" # used for empty value fields in fillable forms
170175
CHECKBOX = "checkbox"
171176

172177
def __str__(self):
@@ -179,7 +184,6 @@ def get_color(label: "GraphCellLabel") -> Tuple[int, int, int]:
179184
color_map = {
180185
GraphCellLabel.KEY: (255, 0, 0),
181186
GraphCellLabel.VALUE: (0, 255, 0),
182-
GraphCellLabel.EMPTY_VALUE: (0, 0, 255),
183187
}
184188
return color_map.get(label, (0, 0, 0))
185189

docs/DoclingDocument.json

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,8 @@
361361
"description": "ContentLayer.",
362362
"enum": [
363363
"body",
364-
"furniture"
364+
"furniture",
365+
"background"
365366
],
366367
"title": "ContentLayer",
367368
"type": "string"
@@ -711,7 +712,6 @@
711712
"unspecified",
712713
"key",
713714
"value",
714-
"empty_value",
715715
"checkbox"
716716
],
717717
"title": "GraphCellLabel",
@@ -2099,7 +2099,8 @@
20992099
"page_header",
21002100
"paragraph",
21012101
"reference",
2102-
"text"
2102+
"text",
2103+
"empty_value"
21032104
],
21042105
"title": "Label",
21052106
"type": "string"

0 commit comments

Comments
 (0)