diff --git a/.speakeasy/gen.lock b/.speakeasy/gen.lock
index 7e2af778..50da82b7 100755
--- a/.speakeasy/gen.lock
+++ b/.speakeasy/gen.lock
@@ -1,26 +1,26 @@
lockVersion: 2.0.0
id: 8b5fa338-9106-4734-abf0-e30d67044a90
management:
- docChecksum: 903444f359d1dfa6342c692ae3e5c7ff
+ docChecksum: a112aea005467aa6818696fa4e99fcfe
docVersion: 0.0.1
speakeasyVersion: internal
- generationVersion: 2.250.19
- releaseVersion: 0.18.0
- configChecksum: 938a4a39baa5695a3140be3b858483d4
+ generationVersion: 2.277.0
+ releaseVersion: 0.21.0
+ configChecksum: c5e7c8526f43272d7585627468d8c4e5
repoURL: https://github.com/Unstructured-IO/unstructured-python-client.git
repoSubDirectory: .
installationURL: https://github.com/Unstructured-IO/unstructured-python-client.git
published: true
features:
python:
- core: 4.4.5
+ core: 4.5.0
examples: 2.81.3
- globalSecurity: 2.83.2
+ globalSecurity: 2.83.4
globalServerURLs: 2.82.1
nameOverrides: 2.81.1
retries: 2.82.1
serverIDs: 2.81.1
- unions: 2.82.5
+ unions: 2.82.6
generatedFiles:
- src/unstructured_client/sdkconfiguration.py
- src/unstructured_client/general.py
@@ -51,3 +51,6 @@ generatedFiles:
- docs/models/shared/security.md
- USAGE.md
- .gitattributes
+ - src/unstructured_client/_hooks/sdkhooks.py
+ - src/unstructured_client/_hooks/types.py
+ - src/unstructured_client/_hooks/__init__.py
diff --git a/RELEASES.md b/RELEASES.md
index 189b3d18..93856bc2 100644
--- a/RELEASES.md
+++ b/RELEASES.md
@@ -424,4 +424,34 @@ Based on:
### Generated
- [python v0.18.0] .
### Releases
-- [PyPI v0.18.0] https://pypi.org/project/unstructured-client/0.18.0 - .
\ No newline at end of file
+- [PyPI v0.18.0] https://pypi.org/project/unstructured-client/0.18.0 - .
+
+## 2024-02-19 00:19:41
+### Changes
+Based on:
+- OpenAPI Doc 0.0.64
+- Speakeasy CLI 1.183.2 (2.262.2) https://github.com/speakeasy-api/speakeasy
+### Generated
+- [python v0.19.0] .
+### Releases
+- [PyPI v0.19.0] https://pypi.org/project/unstructured-client/0.19.0 - .
+
+## 2024-02-22 00:18:37
+### Changes
+Based on:
+- OpenAPI Doc 0.0.1
+- Speakeasy CLI 1.189.0 (2.263.3) https://github.com/speakeasy-api/speakeasy
+### Generated
+- [python v0.20.0] .
+### Releases
+- [PyPI v0.20.0] https://pypi.org/project/unstructured-client/0.20.0 - .
+
+## 2024-03-01 23:20:07
+### Changes
+Based on:
+- OpenAPI Doc 0.0.1
+- Speakeasy CLI 1.200.0 (2.277.0) https://github.com/speakeasy-api/speakeasy
+### Generated
+- [python v0.21.0] .
+### Releases
+- [PyPI v0.21.0] https://pypi.org/project/unstructured-client/0.21.0 - .
\ No newline at end of file
diff --git a/USAGE.md b/USAGE.md
index c224a2a0..919a0d13 100644
--- a/USAGE.md
+++ b/USAGE.md
@@ -15,10 +15,6 @@ req = shared.PartitionParameters(
'image',
'table',
],
- files=shared.Files(
- content='0x2cC94b2FEF'.encode(),
- file_name='um.shtml',
- ),
gz_uncompressed_content_type='application/pdf',
hi_res_model_name='yolox',
languages=[
@@ -31,9 +27,8 @@ req = shared.PartitionParameters(
max_characters=1500,
new_after_n_chars=1500,
output_format='application/json',
- skip_infer_table_types=[
- 'pdf',
- ],
+ overlap=25,
+ overlap_all=True,
strategy='hi_res',
)
diff --git a/docs/models/shared/partitionparameters.md b/docs/models/shared/partitionparameters.md
index 4e2b8f97..df9df34b 100644
--- a/docs/models/shared/partitionparameters.md
+++ b/docs/models/shared/partitionparameters.md
@@ -9,7 +9,7 @@
| `combine_under_n_chars` | *Optional[int]* | :heavy_minus_sign: | If chunking strategy is set, combine elements until a section reaches a length of n chars. Default: 500 | 500 |
| `coordinates` | *Optional[bool]* | :heavy_minus_sign: | If true, return coordinates for each element. Default: false | |
| `encoding` | *Optional[str]* | :heavy_minus_sign: | The encoding method used to decode the text input. Default: utf-8 | utf-8 |
-| `extract_image_block_types` | List[*str*] | :heavy_minus_sign: | The types of elements to extract, for use in extracting image blocks as base64 encoded data stored in metadata fields | ["image","table"] |
+| `extract_image_block_types` | List[*str*] | :heavy_minus_sign: | The types of elements to extract, for use in extracting image blocks as base64 encoded data stored in metadata fields | [
"image",
"table"
] |
| `files` | [Optional[shared.Files]](../../models/shared/files.md) | :heavy_minus_sign: | The file to extract | |
| `gz_uncompressed_content_type` | *Optional[str]* | :heavy_minus_sign: | If file is gzipped, use this content type after unzipping | application/pdf |
| `hi_res_model_name` | *Optional[str]* | :heavy_minus_sign: | The name of the inference model used when strategy is hi_res | yolox |
@@ -19,6 +19,8 @@
| `multipage_sections` | *Optional[bool]* | :heavy_minus_sign: | If chunking strategy is set, determines if sections can span multiple sections. Default: true | |
| `new_after_n_chars` | *Optional[int]* | :heavy_minus_sign: | If chunking strategy is set, cut off new sections after reaching a length of n chars (soft max). Default: 1500 | 1500 |
| `output_format` | *Optional[str]* | :heavy_minus_sign: | The format of the response. Supported formats are application/json and text/csv. Default: application/json. | application/json |
+| `overlap` | *Optional[int]* | :heavy_minus_sign: | A prefix of this many trailing characters from prior text-split chunk is applied to second and later chunks formed from oversized elements by text-splitting. Default: None | 25 |
+| `overlap_all` | *Optional[bool]* | :heavy_minus_sign: | When True, overlap is also applied to 'normal' chunks formed by combining whole elements. Use with caution as this can introduce noise into otherwise clean semantic units. Default: None | 1500 |
| `pdf_infer_table_structure` | *Optional[bool]* | :heavy_minus_sign: | If True and strategy=hi_res, any Table Elements extracted from a PDF will include an additional metadata field, 'text_as_html', where the value (string) is a just a transformation of the data into an HTML