Skip to content

Commit faf472c

Browse files
authored
fix: set type to optional (#7)
Signed-off-by: Cesar Berrospi Ramis <[email protected]>
1 parent 3f77b2e commit faf472c

File tree

4 files changed

+214
-9
lines changed

4 files changed

+214
-9
lines changed

docling_core/types/doc/document.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ class MinimalDocument(
263263
"""Minimal model for a document."""
264264

265265
name: StrictStr = Field(alias="_name")
266-
obj_type: StrictStr = Field("document", alias="type")
266+
obj_type: Optional[StrictStr] = Field("document", alias="type")
267267
description: CCSDocumentDescription[
268268
DescriptionAdvancedT,
269269
DescriptionAnalyticsT,
@@ -291,7 +291,7 @@ class CCSDocument(
291291
):
292292
"""Model for a CCS-generated document."""
293293

294-
obj_type: StrictStr = Field("pdf-document", alias="type")
294+
obj_type: Optional[StrictStr] = Field("pdf-document", alias="type")
295295
bitmaps: Optional[list[BitmapObject]] = None
296296
equations: Optional[list[BaseCell]] = None
297297
footnotes: Optional[list[BaseText]] = None
@@ -355,7 +355,7 @@ class ExportedCCSDocument(
355355
):
356356
"""Document model for Docling."""
357357

358-
obj_type: StrictStr = Field(
358+
obj_type: Optional[StrictStr] = Field(
359359
"pdf-document",
360360
alias="type",
361361
json_schema_extra=es_field(type="keyword", ignore_above=8191),

docs/Document.json

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1732,9 +1732,16 @@
17321732
"type": "string"
17331733
},
17341734
"type": {
1735+
"anyOf": [
1736+
{
1737+
"type": "string"
1738+
},
1739+
{
1740+
"type": "null"
1741+
}
1742+
],
17351743
"default": "pdf-document",
17361744
"title": "Type",
1737-
"type": "string",
17381745
"x-es-ignore_above": 8191,
17391746
"x-es-type": "keyword"
17401747
},

docs/Document.md

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,42 @@
3636

3737
**Title:** Type
3838

39-
| | |
40-
| ------------ | ---------------- |
41-
| **Type** | `string` |
42-
| **Required** | No |
43-
| **Default** | `"pdf-document"` |
39+
| | |
40+
| ------------------------- | ------------------------------------------------------------------------- |
41+
| **Type** | `combining` |
42+
| **Required** | No |
43+
| **Additional properties** | [[Any type: allowed]](# "Additional Properties of any type are allowed.") |
44+
| **Default** | `"pdf-document"` |
45+
46+
<blockquote>
47+
48+
| Any of(Option) |
49+
| ------------------------ |
50+
| [item 0](#type_anyOf_i0) |
51+
| [item 1](#type_anyOf_i1) |
52+
53+
<blockquote>
54+
55+
### <a name="type_anyOf_i0"></a>2.1. Property `ExportedCCSDocument > type > anyOf > item 0`
56+
57+
| | |
58+
| ------------ | -------- |
59+
| **Type** | `string` |
60+
| **Required** | No |
61+
62+
</blockquote>
63+
<blockquote>
64+
65+
### <a name="type_anyOf_i1"></a>2.2. Property `ExportedCCSDocument > type > anyOf > item 1`
66+
67+
| | |
68+
| ------------ | ------ |
69+
| **Type** | `null` |
70+
| **Required** | No |
71+
72+
</blockquote>
73+
74+
</blockquote>
4475

4576
</blockquote>
4677
</details>

test/data/doc/doc-9.json

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
{
2+
"_name": "2023 IBM International Business Machines Corp.",
3+
"bitmaps": [],
4+
"description": {
5+
"logs": [
6+
{
7+
"date": "2024-04-15T09:18:31.855017+00:00",
8+
"agent": "CCS",
9+
"comment": "parsing of documents",
10+
"type": "parsing"
11+
},
12+
{
13+
"date": "2024-04-15T09:43:18.651+00:00",
14+
"agent": "CXS",
15+
"task": "task 12345",
16+
"comment": "enrichment of documents",
17+
"type": "text enrichment"
18+
}
19+
],
20+
"collection": {
21+
"type": "Document",
22+
"name": "ESG Reports",
23+
"alias": [
24+
"esg-report"
25+
],
26+
"version": "2.0.2"
27+
},
28+
"languages": [
29+
"en"
30+
],
31+
"advanced": {
32+
"website": [
33+
"http://www.ibm.com/"
34+
],
35+
"year": 2023
36+
},
37+
"subjects": [
38+
"Technology"
39+
],
40+
"publication_date": "2023-01-01T12:00:00.000+00:00",
41+
"affiliations": [
42+
{
43+
"name": "International Business Machines Corp.",
44+
"id": "ibm",
45+
"source": "nyse"
46+
}
47+
],
48+
"title": "2023 ESG Report",
49+
"type": "ESG report"
50+
},
51+
"equations": [],
52+
"figures": [],
53+
"file-info": {
54+
"#-pages": 1,
55+
"document-hash": "776531b533f5970d81de684e84b25cb13bafbab2cf520fddf7bac2ba25233dcb",
56+
"filename": "IBM_2023.pdf",
57+
"page-hashes": [
58+
{
59+
"hash": "49319ad44d1997ea392d043a3b5c5fb044c9da0f16ab917326b14da4482bb39e",
60+
"model": "model",
61+
"page": 1
62+
}
63+
]
64+
},
65+
"footnotes": [],
66+
"main-text": [
67+
{
68+
"$ref": "#/figures/0",
69+
"name": "picture",
70+
"type": "figure"
71+
},
72+
{
73+
"text": "-------------------------___, _ IXI",
74+
"name": "text",
75+
"type": "paragraph",
76+
"prov": [
77+
{
78+
"bbox": [
79+
896.1026000976562,
80+
30.518247604370117,
81+
972.6661987304688,
82+
77.09062957763672
83+
],
84+
"page": 1,
85+
"span": [
86+
0,
87+
35
88+
]
89+
}
90+
]
91+
}
92+
],
93+
"page-dimensions": [
94+
{
95+
"height": 612,
96+
"page": 1,
97+
"width": 1008
98+
}
99+
],
100+
"page-footers": [],
101+
"page-headers": [],
102+
"references": [],
103+
"tables": [],
104+
"conversion_settings": {
105+
"model_pipeline": {
106+
"clusters": [
107+
{
108+
"type": "LayoutSegmentationModel",
109+
"name": "LayoutSegmentationModel",
110+
"version": "NA"
111+
}
112+
],
113+
"page": [],
114+
"normalization": [],
115+
"tables": [
116+
{
117+
"type": "TableStructureModel",
118+
"name": "TableStructureModel",
119+
"version": "NA"
120+
}
121+
]
122+
}
123+
},
124+
"version": 2,
125+
"_s3_data": {
126+
"pdf-document": [
127+
{
128+
"mime": "application/pdf",
129+
"path": "index-code/PDFDocuments/776531b533f5970d81de684e84b25cb13bafbab2cf520fddf7bac2ba25233dcb.pdf"
130+
}
131+
],
132+
"pdf-pages": [
133+
{
134+
"mime": "application/pdf",
135+
"path": "index-code/PDFPages/49319ad44d1997ea392d043a3b5c5fb044c9da0f16ab917326b14da4482bb39e.pdf",
136+
"page": 1
137+
}
138+
],
139+
"markdown-document": [
140+
{
141+
"mime": "text/markdown",
142+
"path": "index-code/MD/776531b533f5970d81de684e84b25cb13bafbab2cf520fddf7bac2ba25233dcb.md"
143+
}
144+
],
145+
"json-document": {
146+
"mime": "application/json",
147+
"path": "index-code/JSONDocuments/776531b533f5970d81de684e84b25cb13bafbab2cf520fddf7bac2ba25233dcb.json"
148+
},
149+
"json-meta": {
150+
"mime": "application/json",
151+
"path": "index-code/JSONDocuments/776531b533f5970d81de684e84b25cb13bafbab2cf520fddf7bac2ba25233dcb.meta.json"
152+
},
153+
"glm-json-document": {
154+
"mime": "application/json",
155+
"path": "index-code/JSONDocuments/776531b533f5970d81de684e84b25cb13bafbab2cf520fddf7bac2ba25233dcb.glm.json"
156+
}
157+
},
158+
"type": null,
159+
"_content_hash": "84be138c500936cbbc70628ceb4e4f82",
160+
"identifiers": [
161+
{
162+
"_name": "esg_report#ibm_2023",
163+
"type": "esg_report",
164+
"value": "ibm_2023"
165+
}
166+
]
167+
}

0 commit comments

Comments
 (0)