Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions docling_core/transforms/serializer/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,16 @@
"""Hook for strikethrough formatting serialization."""
...

@abstractmethod
def serialize_subscript(self, text: str, **kwargs: Any) -> str:
"""Hook for subscript formatting serialization."""
...

Check warning on line 208 in docling_core/transforms/serializer/base.py

View check run for this annotation

Codecov / codecov/patch

docling_core/transforms/serializer/base.py#L208

Added line #L208 was not covered by tests

@abstractmethod
def serialize_superscript(self, text: str, **kwargs: Any) -> str:
"""Hook for superscript formatting serialization."""
...

Check warning on line 213 in docling_core/transforms/serializer/base.py

View check run for this annotation

Codecov / codecov/patch

docling_core/transforms/serializer/base.py#L213

Added line #L213 was not covered by tests

@abstractmethod
def serialize_hyperlink(
self,
Expand Down
15 changes: 15 additions & 0 deletions docling_core/transforms/serializer/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
PictureDataType,
PictureItem,
PictureMoleculeData,
Script,
TableAnnotationType,
TableItem,
TextItem,
Expand Down Expand Up @@ -455,6 +456,10 @@ def post_process(
res = self.serialize_underline(text=res)
if formatting.strikethrough:
res = self.serialize_strikethrough(text=res)
if formatting.script == Script.SUB:
res = self.serialize_subscript(text=res)
elif formatting.script == Script.SUPER:
res = self.serialize_superscript(text=res)
if params.include_hyperlinks and hyperlink:
res = self.serialize_hyperlink(text=res, hyperlink=hyperlink)
return res
Expand All @@ -479,6 +484,16 @@ def serialize_strikethrough(self, text: str, **kwargs: Any) -> str:
"""Hook for strikethrough formatting serialization."""
return text

@override
def serialize_subscript(self, text: str, **kwargs: Any) -> str:
"""Hook for subscript formatting serialization."""
return text

@override
def serialize_superscript(self, text: str, **kwargs: Any) -> str:
"""Hook for superscript formatting serialization."""
return text

@override
def serialize_hyperlink(
self,
Expand Down
10 changes: 10 additions & 0 deletions docling_core/transforms/serializer/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -847,6 +847,16 @@ def serialize_strikethrough(self, text: str, **kwargs: Any) -> str:
"""Apply HTML-specific strikethrough serialization."""
return f"<del>{text}</del>"

@override
def serialize_subscript(self, text: str, **kwargs: Any) -> str:
"""Apply HTML-specific subscript serialization."""
return f"<sub>{text}</sub>"

@override
def serialize_superscript(self, text: str, **kwargs: Any) -> str:
"""Apply HTML-specific superscript serialization."""
return f"<sup>{text}</sup>"

@override
def serialize_hyperlink(
self,
Expand Down
9 changes: 9 additions & 0 deletions docling_core/types/doc/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -829,13 +829,22 @@ def get_annotations(self) -> Sequence[BaseAnnotation]:
return []


class Script(str, Enum):
"""Text script position."""

BASELINE = "baseline"
SUB = "sub"
SUPER = "super"


class Formatting(BaseModel):
"""Formatting."""

bold: bool = False
italic: bool = False
underline: bool = False
strikethrough: bool = False
script: Script = Script.BASELINE


class TextItem(DocItem):
Expand Down
14 changes: 14 additions & 0 deletions docs/DoclingDocument.json
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,10 @@
"default": false,
"title": "Strikethrough",
"type": "boolean"
},
"script": {
"$ref": "#/$defs/Script",
"default": "baseline"
}
},
"title": "Formatting",
Expand Down Expand Up @@ -1715,6 +1719,16 @@
"title": "RefItem",
"type": "object"
},
"Script": {
"description": "Text script position.",
"enum": [
"baseline",
"sub",
"super"
],
"title": "Script",
"type": "string"
},
"SectionHeaderItem": {
"additionalProperties": false,
"description": "SectionItem.",
Expand Down
Loading
Loading