Skip to content

Commit 0482bac

Browse files
maxmnemonicMaksym Lysak
and
Maksym Lysak
authored
feat: Markdown chart serializer (picture+table) (#235)
* Markdown serializer for charts Signed-off-by: Maksym Lysak <[email protected]> * Added serializer parameter disable_chart_tables, that prevents printing of chart table into MarkDown Signed-off-by: Maksym Lysak <[email protected]> * cleaning Signed-off-by: Maksym Lysak <[email protected]> * propagating paramerer disable_chart_tables Signed-off-by: Maksym Lysak <[email protected]> * Updated parameter name from negative to positive notation enable_chart_tables Signed-off-by: Maksym Lysak <[email protected]> --------- Signed-off-by: Maksym Lysak <[email protected]> Co-authored-by: Maksym Lysak <[email protected]>
1 parent e9259a5 commit 0482bac

File tree

2 files changed

+19
-0
lines changed

2 files changed

+19
-0
lines changed

docling_core/experimental/serializer/markdown.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
NodeItem,
4444
OrderedList,
4545
PictureItem,
46+
PictureTabularChartData,
4647
SectionHeaderItem,
4748
TableItem,
4849
TextItem,
@@ -57,6 +58,7 @@ class MarkdownParams(CommonParams):
5758
layers: set[ContentLayer] = {ContentLayer.BODY}
5859
image_mode: ImageRefMode = ImageRefMode.PLACEHOLDER
5960
image_placeholder: str = "<!-- image -->"
61+
enable_chart_tables: bool = True
6062
indent: int = 4
6163
wrap_width: Optional[PositiveInt] = None
6264
page_break_placeholder: Optional[str] = None # e.g. "<!-- page break -->"
@@ -206,6 +208,21 @@ def serialize(
206208
if img_res.text:
207209
texts.append(img_res.text)
208210

211+
if params.enable_chart_tables:
212+
# Check if picture has attached PictureTabularChartData
213+
tabular_chart_annotations = [
214+
ann
215+
for ann in item.annotations
216+
if isinstance(ann, PictureTabularChartData)
217+
]
218+
if len(tabular_chart_annotations) > 0:
219+
temp_doc = DoclingDocument(name="temp")
220+
temp_table = temp_doc.add_table(
221+
data=tabular_chart_annotations[0].chart_data
222+
)
223+
md_table_content = temp_table.export_to_markdown(temp_doc)
224+
if len(md_table_content) > 0:
225+
texts.append(md_table_content)
209226
text_res = "\n\n".join(texts)
210227

211228
return SerializationResult(text=text_res)

docling_core/types/doc/document.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3130,6 +3130,7 @@ def export_to_markdown( # noqa: C901
31303130
strict_text: bool = False,
31313131
escape_underscores: bool = True,
31323132
image_placeholder: str = "<!-- image -->",
3133+
enable_chart_tables: bool = True,
31333134
image_mode: ImageRefMode = ImageRefMode.PLACEHOLDER,
31343135
indent: int = 4,
31353136
text_width: int = -1,
@@ -3197,6 +3198,7 @@ def export_to_markdown( # noqa: C901
31973198
stop_idx=to_element,
31983199
escape_underscores=escape_underscores,
31993200
image_placeholder=image_placeholder,
3201+
enable_chart_tables=enable_chart_tables,
32003202
image_mode=image_mode,
32013203
indent=indent,
32023204
wrap_width=text_width if text_width > 0 else None,

0 commit comments

Comments
 (0)