Skip to content

Commit

Permalink
initial work
Browse files Browse the repository at this point in the history
Signed-off-by: Peter Staar <[email protected]>
  • Loading branch information
PeterStaar-IBM committed Aug 19, 2024
1 parent 3f81690 commit 5fd4471
Showing 1 changed file with 73 additions and 0 deletions.
73 changes: 73 additions & 0 deletions docling_core/types/doc/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,3 +500,76 @@ def export_to_markdown(

result = delim.join(md_texts)
return result


class HierarchicalDocument(
MinimalDocument,
Generic[
DescriptionAdvancedT,
DescriptionAnalyticsT,
IdentifierTypeT,
LanguageT,
CollectionNameTypeT,
],
):
"""HierarchicalDocument model for Docling."""

# description of the document

obj_type: Optional[StrictStr] = Field(
"pdf-document",
alias="type",
json_schema_extra=es_field(type="keyword", ignore_above=8191),
)

description: CCSDocumentDescription[
DescriptionAdvancedT,
DescriptionAnalyticsT,
IdentifierTypeT,
LanguageT,
CollectionNameTypeT,
]

file_info: CCSFileInfoObject = Field(alias="file-info")

s3_data: Optional[S3Data] = Field(default=None, alias="_s3_data")

identifiers: Optional[list[Identifier[IdentifierTypeT]]] = None

# content of the document

prov: list[Union[Ref, BaseText]] = Field(
default=None, alias="prov"
)

body: list[Union[Ref, BaseText]] = Field(
default=None, alias="body"
)
meta: list[Union[Ref, BaseText]] = Field(
default=None, alias="meta"
)

texts: list[Union[Ref, BaseText]] = Field(
default=None, alias="texts"
)
tables: list[Union[Ref, BaseText]] = Field(
default=None, alias="tables"
)
figures: list[Union[Ref, BaseText]] = Field(
default=None, alias="figures"
)

page_headers: list[Union[Ref, BaseText]] = Field(
default=None, alias="page_headers"
)
page_footers: list[Union[Ref, BaseText]] = Field(
default=None, alias="page_footers"
)
footnotes: list[Union[Ref, BaseText]] = Field(
default=None, alias="footnotes"
)

other: list[Union[Ref, BaseText]] = Field(
default=None, alias="other"
)

0 comments on commit 5fd4471

Please sign in to comment.