From d5375790f64ab1302013c157ca07e190b61ebf0c Mon Sep 17 00:00:00 2001 From: speakeasybot Date: Mon, 19 Feb 2024 00:19:48 +0000 Subject: [PATCH 1/3] ci: regenerated with OpenAPI Doc 0.0.64, Speakeasy CLI 1.183.2 --- .speakeasy/gen.lock | 30 ++++++----- RELEASES.md | 12 ++++- USAGE.md | 38 ++++---------- .../operations/partitionparametersrequest.md | 9 ++++ ...onse.md => partitionparametersresponse.md} | 4 +- docs/models/shared/bodypartitionparameters.md | 28 ++++++++++ docs/models/shared/element.md | 11 ++++ docs/models/shared/metadata.md | 7 +++ docs/models/shared/partitionparameters.md | 25 --------- docs/models/shared/strategy.md | 13 +++++ gen.yaml | 2 +- setup.py | 2 +- src/unstructured_client/general.py | 25 +++++---- .../models/operations/__init__.py | 4 +- .../models/operations/partition.py | 20 -------- .../models/operations/partition_parameters.py | 31 +++++++++++ .../models/shared/__init__.py | 5 +- ...meters.py => body_partition_parameters.py} | 51 ++++++++++++------- .../models/shared/element.py | 22 ++++++++ src/unstructured_client/sdk.py | 5 +- src/unstructured_client/sdkconfiguration.py | 19 +++---- src/unstructured_client/utils/retries.py | 3 -- 22 files changed, 226 insertions(+), 140 deletions(-) create mode 100644 docs/models/operations/partitionparametersrequest.md rename docs/models/operations/{partitionresponse.md => partitionparametersresponse.md} (89%) create mode 100644 docs/models/shared/bodypartitionparameters.md create mode 100644 docs/models/shared/element.md create mode 100644 docs/models/shared/metadata.md delete mode 100644 docs/models/shared/partitionparameters.md create mode 100644 docs/models/shared/strategy.md delete mode 100644 src/unstructured_client/models/operations/partition.py create mode 100644 src/unstructured_client/models/operations/partition_parameters.py rename src/unstructured_client/models/shared/{partition_parameters.py => body_partition_parameters.py} (52%) create mode 100644 src/unstructured_client/models/shared/element.py diff --git a/.speakeasy/gen.lock b/.speakeasy/gen.lock index 7e2af778..a8649730 100755 --- a/.speakeasy/gen.lock +++ b/.speakeasy/gen.lock @@ -1,19 +1,20 @@ lockVersion: 2.0.0 id: 8b5fa338-9106-4734-abf0-e30d67044a90 management: - docChecksum: 903444f359d1dfa6342c692ae3e5c7ff - docVersion: 0.0.1 + docChecksum: 8e80e4d12e16961f9061ef746c01761a + docVersion: 0.0.64 speakeasyVersion: internal - generationVersion: 2.250.19 - releaseVersion: 0.18.0 - configChecksum: 938a4a39baa5695a3140be3b858483d4 + generationVersion: 2.262.2 + releaseVersion: 0.19.0 + configChecksum: bcbf9e2848a6a837e9453f70dbc10b07 repoURL: https://github.com/Unstructured-IO/unstructured-python-client.git repoSubDirectory: . installationURL: https://github.com/Unstructured-IO/unstructured-python-client.git published: true features: python: - core: 4.4.5 + constsAndDefaults: 0.1.2 + core: 4.4.6 examples: 2.81.3 globalSecurity: 2.83.2 globalServerURLs: 2.82.1 @@ -33,21 +34,26 @@ generatedFiles: - src/unstructured_client/utils/utils.py - src/unstructured_client/models/errors/sdkerror.py - tests/helpers.py - - src/unstructured_client/models/operations/partition.py - - src/unstructured_client/models/errors/httpvalidationerror.py + - src/unstructured_client/models/operations/partition_parameters.py + - src/unstructured_client/models/shared/element.py - src/unstructured_client/models/shared/validationerror.py - - src/unstructured_client/models/shared/partition_parameters.py + - src/unstructured_client/models/shared/body_partition_parameters.py - src/unstructured_client/models/shared/security.py + - src/unstructured_client/models/errors/httpvalidationerror.py - src/unstructured_client/models/__init__.py - src/unstructured_client/models/errors/__init__.py - src/unstructured_client/models/operations/__init__.py - src/unstructured_client/models/shared/__init__.py - - docs/models/operations/partitionresponse.md - - docs/models/errors/httpvalidationerror.md + - docs/models/operations/partitionparametersrequest.md + - docs/models/operations/partitionparametersresponse.md + - docs/models/shared/metadata.md + - docs/models/shared/element.md - docs/models/shared/loc.md - docs/models/shared/validationerror.md - docs/models/shared/files.md - - docs/models/shared/partitionparameters.md + - docs/models/shared/strategy.md + - docs/models/shared/bodypartitionparameters.md - docs/models/shared/security.md + - docs/models/errors/httpvalidationerror.md - USAGE.md - .gitattributes diff --git a/RELEASES.md b/RELEASES.md index 189b3d18..17efc126 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -424,4 +424,14 @@ Based on: ### Generated - [python v0.18.0] . ### Releases -- [PyPI v0.18.0] https://pypi.org/project/unstructured-client/0.18.0 - . \ No newline at end of file +- [PyPI v0.18.0] https://pypi.org/project/unstructured-client/0.18.0 - . + +## 2024-02-19 00:19:41 +### Changes +Based on: +- OpenAPI Doc 0.0.64 +- Speakeasy CLI 1.183.2 (2.262.2) https://github.com/speakeasy-api/speakeasy +### Generated +- [python v0.19.0] . +### Releases +- [PyPI v0.19.0] https://pypi.org/project/unstructured-client/0.19.0 - . \ No newline at end of file diff --git a/USAGE.md b/USAGE.md index c224a2a0..1c1cc7b2 100644 --- a/USAGE.md +++ b/USAGE.md @@ -1,45 +1,25 @@ ```python import unstructured_client -from unstructured_client.models import shared +from unstructured_client.models import operations, shared s = unstructured_client.UnstructuredClient( api_key_auth="YOUR_API_KEY", ) -req = shared.PartitionParameters( - chunking_strategy='by_title', - combine_under_n_chars=500, - encoding='utf-8', - extract_image_block_types=[ - 'image', - 'table', - ], - files=shared.Files( - content='0x2cC94b2FEF'.encode(), - file_name='um.shtml', +req = operations.PartitionParametersRequest( + body_partition_parameters=shared.BodyPartitionParameters( + files=shared.Files( + content='0x2cC94b2FEF'.encode(), + file_name='um.shtml', + ), + strategy=shared.Strategy.HI_RES, ), - gz_uncompressed_content_type='application/pdf', - hi_res_model_name='yolox', - languages=[ - '[', - 'e', - 'n', - 'g', - ']', - ], - max_characters=1500, - new_after_n_chars=1500, - output_format='application/json', - skip_infer_table_types=[ - 'pdf', - ], - strategy='hi_res', ) res = s.general.partition(req) -if res.elements is not None: +if res.response_partition_parameters is not None: # handle response pass ``` diff --git a/docs/models/operations/partitionparametersrequest.md b/docs/models/operations/partitionparametersrequest.md new file mode 100644 index 00000000..019a2840 --- /dev/null +++ b/docs/models/operations/partitionparametersrequest.md @@ -0,0 +1,9 @@ +# PartitionParametersRequest + + +## Fields + +| Field | Type | Required | Description | +| -------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | +| `body_partition_parameters` | [shared.BodyPartitionParameters](../../models/shared/bodypartitionparameters.md) | :heavy_check_mark: | N/A | +| `unstructured_api_key` | *Optional[str]* | :heavy_minus_sign: | N/A | \ No newline at end of file diff --git a/docs/models/operations/partitionresponse.md b/docs/models/operations/partitionparametersresponse.md similarity index 89% rename from docs/models/operations/partitionresponse.md rename to docs/models/operations/partitionparametersresponse.md index b0dbc682..db64e639 100644 --- a/docs/models/operations/partitionresponse.md +++ b/docs/models/operations/partitionparametersresponse.md @@ -1,4 +1,4 @@ -# PartitionResponse +# PartitionParametersResponse ## Fields @@ -8,4 +8,4 @@ | `content_type` | *str* | :heavy_check_mark: | HTTP response content type for this operation | | `status_code` | *int* | :heavy_check_mark: | HTTP response status code for this operation | | `raw_response` | [requests.Response](https://requests.readthedocs.io/en/latest/api/#requests.Response) | :heavy_check_mark: | Raw HTTP response; suitable for custom response parsing | -| `elements` | List[*Any*] | :heavy_minus_sign: | Successful Response | \ No newline at end of file +| `response_partition_parameters` | List[[shared.Element](../../models/shared/element.md)] | :heavy_minus_sign: | Successful Response | \ No newline at end of file diff --git a/docs/models/shared/bodypartitionparameters.md b/docs/models/shared/bodypartitionparameters.md new file mode 100644 index 00000000..ed8a581f --- /dev/null +++ b/docs/models/shared/bodypartitionparameters.md @@ -0,0 +1,28 @@ +# BodyPartitionParameters + + +## Fields + +| Field | Type | Required | Description | Example | +| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `files` | [shared.Files](../../models/shared/files.md) | :heavy_check_mark: | The file to extract | | +| `chunking_strategy` | *Optional[Any]* | :heavy_minus_sign: | Use one of the supported strategies to chunk the returned elements. Currently supports: by_title | | +| `combine_under_n_chars` | *Optional[int]* | :heavy_minus_sign: | If chunking strategy is set, combine elements until a section reaches a length of n chars. Default: 500 | | +| `coordinates` | *Optional[bool]* | :heavy_minus_sign: | If true, return coordinates for each element. Default: false | | +| `encoding` | *Optional[str]* | :heavy_minus_sign: | The encoding method used to decode the text input. Default: utf-8 | | +| `extract_image_block_types` | List[*str*] | :heavy_minus_sign: | The types of elements to extract, for use in extracting image blocks as base64 encoded data stored in metadata fields | | +| `gz_uncompressed_content_type` | *Optional[str]* | :heavy_minus_sign: | If file is gzipped, use this content type after unzipping | | +| `hi_res_model_name` | *Optional[str]* | :heavy_minus_sign: | The name of the inference model used when strategy is hi_res | | +| `include_page_breaks` | *Optional[bool]* | :heavy_minus_sign: | If True, the output will include page breaks if the filetype supports it. Default: false | | +| `languages` | List[*str*] | :heavy_minus_sign: | The languages present in the document, for use in partitioning and/or OCR | | +| `max_characters` | *Optional[int]* | :heavy_minus_sign: | If chunking strategy is set, cut off new sections after reaching a length of n chars (hard max). Default: 1500 | | +| `multipage_sections` | *Optional[bool]* | :heavy_minus_sign: | If chunking strategy is set, determines if sections can span multiple sections. Default: true | | +| `new_after_n_chars` | *Optional[int]* | :heavy_minus_sign: | If chunking strategy is set, cut off new sections after reaching a length of n chars (soft max). Default: 1500 | | +| `ocr_languages` | List[*str*] | :heavy_minus_sign: | The languages present in the document, for use in partitioning and/or OCR | | +| `output_format` | *Optional[str]* | :heavy_minus_sign: | The format of the response. Supported formats are application/json and text/csv. Default: application/json. | | +| `overlap` | *Optional[int]* | :heavy_minus_sign: | Specifies the length of a string ('tail') to be drawn from each chunk and prefixed to the next chunk as a context-preserving mechanism. By default, this only applies to split-chunks where an oversized element is divided into multiple chunks by text-splitting. Default: 0 | | +| `overlap_all` | *Optional[bool]* | :heavy_minus_sign: | When `True`, apply overlap between 'normal' chunks formed from whole elements and not subject to text-splitting. Use this with caution as it entails a certain level of 'pollution' of otherwise clean semantic chunk boundaries. Default: False | | +| `pdf_infer_table_structure` | *Optional[bool]* | :heavy_minus_sign: | If True and strategy=hi_res, any Table Elements extracted from a PDF will include an additional metadata field, 'text_as_html', where the value (string) is a just a transformation of the data into an HTML . | | +| `skip_infer_table_types` | List[*str*] | :heavy_minus_sign: | The document types that you want to skip table extraction with. Default: ['pdf', 'jpg', 'png'] | | +| `strategy` | [Optional[shared.Strategy]](../../models/shared/strategy.md) | :heavy_minus_sign: | The strategy to use for partitioning PDF/image. Options are fast, hi_res, auto. Default: auto | auto | +| `xml_keep_tags` | *Optional[bool]* | :heavy_minus_sign: | If True, will retain the XML tags in the output. Otherwise it will simply extract the text from within the tags. Only applies to partition_xml. | | \ No newline at end of file diff --git a/docs/models/shared/element.md b/docs/models/shared/element.md new file mode 100644 index 00000000..403252e8 --- /dev/null +++ b/docs/models/shared/element.md @@ -0,0 +1,11 @@ +# Element + + +## Fields + +| Field | Type | Required | Description | +| -------------------------------------------------- | -------------------------------------------------- | -------------------------------------------------- | -------------------------------------------------- | +| `element_id` | *str* | :heavy_check_mark: | N/A | +| `metadata` | [shared.Metadata](../../models/shared/metadata.md) | :heavy_check_mark: | N/A | +| `text` | *str* | :heavy_check_mark: | N/A | +| `type` | *str* | :heavy_check_mark: | N/A | \ No newline at end of file diff --git a/docs/models/shared/metadata.md b/docs/models/shared/metadata.md new file mode 100644 index 00000000..e655f580 --- /dev/null +++ b/docs/models/shared/metadata.md @@ -0,0 +1,7 @@ +# Metadata + + +## Fields + +| Field | Type | Required | Description | +| ----------- | ----------- | ----------- | ----------- | \ No newline at end of file diff --git a/docs/models/shared/partitionparameters.md b/docs/models/shared/partitionparameters.md deleted file mode 100644 index 4e2b8f97..00000000 --- a/docs/models/shared/partitionparameters.md +++ /dev/null @@ -1,25 +0,0 @@ -# PartitionParameters - - -## Fields - -| Field | Type | Required | Description | Example | -| --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `chunking_strategy` | *Optional[str]* | :heavy_minus_sign: | Use one of the supported strategies to chunk the returned elements. Currently supports: by_title | by_title | -| `combine_under_n_chars` | *Optional[int]* | :heavy_minus_sign: | If chunking strategy is set, combine elements until a section reaches a length of n chars. Default: 500 | 500 | -| `coordinates` | *Optional[bool]* | :heavy_minus_sign: | If true, return coordinates for each element. Default: false | | -| `encoding` | *Optional[str]* | :heavy_minus_sign: | The encoding method used to decode the text input. Default: utf-8 | utf-8 | -| `extract_image_block_types` | List[*str*] | :heavy_minus_sign: | The types of elements to extract, for use in extracting image blocks as base64 encoded data stored in metadata fields | ["image","table"] | -| `files` | [Optional[shared.Files]](../../models/shared/files.md) | :heavy_minus_sign: | The file to extract | | -| `gz_uncompressed_content_type` | *Optional[str]* | :heavy_minus_sign: | If file is gzipped, use this content type after unzipping | application/pdf | -| `hi_res_model_name` | *Optional[str]* | :heavy_minus_sign: | The name of the inference model used when strategy is hi_res | yolox | -| `include_page_breaks` | *Optional[bool]* | :heavy_minus_sign: | If True, the output will include page breaks if the filetype supports it. Default: false | | -| `languages` | List[*str*] | :heavy_minus_sign: | The languages present in the document, for use in partitioning and/or OCR | [eng] | -| `max_characters` | *Optional[int]* | :heavy_minus_sign: | If chunking strategy is set, cut off new sections after reaching a length of n chars (hard max). Default: 1500 | 1500 | -| `multipage_sections` | *Optional[bool]* | :heavy_minus_sign: | If chunking strategy is set, determines if sections can span multiple sections. Default: true | | -| `new_after_n_chars` | *Optional[int]* | :heavy_minus_sign: | If chunking strategy is set, cut off new sections after reaching a length of n chars (soft max). Default: 1500 | 1500 | -| `output_format` | *Optional[str]* | :heavy_minus_sign: | The format of the response. Supported formats are application/json and text/csv. Default: application/json. | application/json | -| `pdf_infer_table_structure` | *Optional[bool]* | :heavy_minus_sign: | If True and strategy=hi_res, any Table Elements extracted from a PDF will include an additional metadata field, 'text_as_html', where the value (string) is a just a transformation of the data into an HTML
. | | -| `skip_infer_table_types` | List[*str*] | :heavy_minus_sign: | The document types that you want to skip table extraction with. Default: ['pdf', 'jpg', 'png'] | | -| `strategy` | *Optional[str]* | :heavy_minus_sign: | The strategy to use for partitioning PDF/image. Options are fast, hi_res, auto. Default: auto | hi_res | -| `xml_keep_tags` | *Optional[bool]* | :heavy_minus_sign: | If True, will retain the XML tags in the output. Otherwise it will simply extract the text from within the tags. Only applies to partition_xml. | | \ No newline at end of file diff --git a/docs/models/shared/strategy.md b/docs/models/shared/strategy.md new file mode 100644 index 00000000..2c6d2875 --- /dev/null +++ b/docs/models/shared/strategy.md @@ -0,0 +1,13 @@ +# Strategy + +The strategy to use for partitioning PDF/image. Options are fast, hi_res, auto. Default: auto + + +## Values + +| Name | Value | +| ---------- | ---------- | +| `FAST` | fast | +| `HI_RES` | hi_res | +| `AUTO` | auto | +| `OCR_ONLY` | ocr_only | \ No newline at end of file diff --git a/gen.yaml b/gen.yaml index 9f6e1178..2dc9f53d 100644 --- a/gen.yaml +++ b/gen.yaml @@ -8,7 +8,7 @@ generation: parameterOrderingFeb2024: false requestResponseComponentNamesFeb2024: false python: - version: 0.18.0 + version: 0.19.0 author: Unstructured clientServerStatusCodesAsErrors: true description: Python Client SDK for Unstructured API diff --git a/setup.py b/setup.py index 2599d8c2..072d425c 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setuptools.setup( name="unstructured-client", - version="0.18.0", + version="0.19.0", author="Unstructured", description="Python Client SDK for Unstructured API", license = "MIT", diff --git a/src/unstructured_client/general.py b/src/unstructured_client/general.py index 6d662b55..5ac651de 100644 --- a/src/unstructured_client/general.py +++ b/src/unstructured_client/general.py @@ -1,10 +1,9 @@ """Code generated by Speakeasy (https://speakeasyapi.dev). DO NOT EDIT.""" from .sdkconfiguration import SDKConfiguration -from typing import Any, List, Optional +from typing import List, Optional from unstructured_client import utils from unstructured_client.models import errors, operations, shared -from unstructured_client.utils._human_utils import suggest_defining_url_if_401 # human code class General: sdk_configuration: SDKConfiguration @@ -13,16 +12,20 @@ def __init__(self, sdk_config: SDKConfiguration) -> None: self.sdk_configuration = sdk_config - @suggest_defining_url_if_401 # human code - def partition(self, request: Optional[shared.PartitionParameters], retries: Optional[utils.RetryConfig] = None) -> operations.PartitionResponse: - r"""Pipeline 1""" + + def partition(self, request: operations.PartitionParametersRequest, retries: Optional[utils.RetryConfig] = None) -> operations.PartitionParametersResponse: + r"""Summary + Description + """ base_url = utils.template_url(*self.sdk_configuration.get_server_details()) url = base_url + '/general/v0/general' - headers = {} - req_content_type, data, form = utils.serialize_request_body(request, Optional[shared.PartitionParameters], "request", False, True, 'multipart') + headers = utils.get_headers(request) + req_content_type, data, form = utils.serialize_request_body(request, operations.PartitionParametersRequest, "body_partition_parameters", False, False, 'multipart') if req_content_type not in ('multipart/form-data', 'multipart/mixed'): headers['content-type'] = req_content_type + if data is None and form is None: + raise Exception('request body is required') headers['Accept'] = 'application/json' headers['user-agent'] = self.sdk_configuration.user_agent @@ -47,12 +50,12 @@ def do_request(): ])) content_type = http_res.headers.get('Content-Type') - res = operations.PartitionResponse(status_code=http_res.status_code, content_type=content_type, raw_response=http_res) + res = operations.PartitionParametersResponse(status_code=http_res.status_code, content_type=content_type, raw_response=http_res) if http_res.status_code == 200: if utils.match_content_type(content_type, 'application/json'): - out = utils.unmarshal_json(http_res.text, Optional[List[Any]]) - res.elements = out + out = utils.unmarshal_json(http_res.text, Optional[List[shared.Element]]) + res.response_partition_parameters = out else: raise errors.SDKError(f'unknown content-type received: {content_type}', http_res.status_code, http_res.text, http_res) elif http_res.status_code == 422: @@ -67,4 +70,4 @@ def do_request(): return res - + \ No newline at end of file diff --git a/src/unstructured_client/models/operations/__init__.py b/src/unstructured_client/models/operations/__init__.py index 175065fa..5f1fcd9e 100644 --- a/src/unstructured_client/models/operations/__init__.py +++ b/src/unstructured_client/models/operations/__init__.py @@ -1,5 +1,5 @@ """Code generated by Speakeasy (https://speakeasyapi.dev). DO NOT EDIT.""" -from .partition import * +from .partition_parameters import * -__all__ = ["PartitionResponse"] +__all__ = ["PartitionParametersRequest","PartitionParametersResponse"] diff --git a/src/unstructured_client/models/operations/partition.py b/src/unstructured_client/models/operations/partition.py deleted file mode 100644 index b5fc73c7..00000000 --- a/src/unstructured_client/models/operations/partition.py +++ /dev/null @@ -1,20 +0,0 @@ -"""Code generated by Speakeasy (https://speakeasyapi.dev). DO NOT EDIT.""" - -from __future__ import annotations -import dataclasses -import requests as requests_http -from typing import Any, List, Optional - - -@dataclasses.dataclass -class PartitionResponse: - content_type: str = dataclasses.field() - r"""HTTP response content type for this operation""" - status_code: int = dataclasses.field() - r"""HTTP response status code for this operation""" - raw_response: requests_http.Response = dataclasses.field() - r"""Raw HTTP response; suitable for custom response parsing""" - elements: Optional[List[Any]] = dataclasses.field(default=None) - r"""Successful Response""" - - diff --git a/src/unstructured_client/models/operations/partition_parameters.py b/src/unstructured_client/models/operations/partition_parameters.py new file mode 100644 index 00000000..5f6b34f0 --- /dev/null +++ b/src/unstructured_client/models/operations/partition_parameters.py @@ -0,0 +1,31 @@ +"""Code generated by Speakeasy (https://speakeasyapi.dev). DO NOT EDIT.""" + +from __future__ import annotations +import dataclasses +import requests as requests_http +from ...models.shared import body_partition_parameters as shared_body_partition_parameters +from ...models.shared import element as shared_element +from typing import List, Optional + + +@dataclasses.dataclass +class PartitionParametersRequest: + UNSET='__SPEAKEASY_UNSET__' + body_partition_parameters: shared_body_partition_parameters.BodyPartitionParameters = dataclasses.field(metadata={'request': { 'media_type': 'multipart/form-data' }}) + unstructured_api_key: Optional[str] = dataclasses.field(default=UNSET, metadata={'header': { 'field_name': 'unstructured-api-key', 'style': 'simple', 'explode': False }}) + + + + +@dataclasses.dataclass +class PartitionParametersResponse: + content_type: str = dataclasses.field() + r"""HTTP response content type for this operation""" + status_code: int = dataclasses.field() + r"""HTTP response status code for this operation""" + raw_response: requests_http.Response = dataclasses.field() + r"""Raw HTTP response; suitable for custom response parsing""" + response_partition_parameters: Optional[List[shared_element.Element]] = dataclasses.field(default=None) + r"""Successful Response""" + + diff --git a/src/unstructured_client/models/shared/__init__.py b/src/unstructured_client/models/shared/__init__.py index bb3fc976..c535b76f 100644 --- a/src/unstructured_client/models/shared/__init__.py +++ b/src/unstructured_client/models/shared/__init__.py @@ -1,7 +1,8 @@ """Code generated by Speakeasy (https://speakeasyapi.dev). DO NOT EDIT.""" -from .partition_parameters import * +from .body_partition_parameters import * +from .element import * from .security import * from .validationerror import * -__all__ = ["Files","PartitionParameters","Security","ValidationError"] +__all__ = ["BodyPartitionParameters","Element","Files","Metadata","Security","Strategy","ValidationError"] diff --git a/src/unstructured_client/models/shared/partition_parameters.py b/src/unstructured_client/models/shared/body_partition_parameters.py similarity index 52% rename from src/unstructured_client/models/shared/partition_parameters.py rename to src/unstructured_client/models/shared/body_partition_parameters.py index cd44e544..36158a95 100644 --- a/src/unstructured_client/models/shared/partition_parameters.py +++ b/src/unstructured_client/models/shared/body_partition_parameters.py @@ -2,7 +2,8 @@ from __future__ import annotations import dataclasses -from typing import List, Optional +from enum import Enum +from typing import Any, List, Optional @dataclasses.dataclass @@ -12,44 +13,58 @@ class Files: +class Strategy(str, Enum): + r"""The strategy to use for partitioning PDF/image. Options are fast, hi_res, auto. Default: auto""" + FAST = 'fast' + HI_RES = 'hi_res' + AUTO = 'auto' + OCR_ONLY = 'ocr_only' + @dataclasses.dataclass -class PartitionParameters: - chunking_strategy: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'chunking_strategy' }}) +class BodyPartitionParameters: + UNSET='__SPEAKEASY_UNSET__' + files: Files = dataclasses.field(metadata={'multipart_form': { 'file': True }}) + r"""The file to extract""" + chunking_strategy: Optional[Any] = dataclasses.field(default=UNSET, metadata={'multipart_form': { 'field_name': 'chunking_strategy' }}) r"""Use one of the supported strategies to chunk the returned elements. Currently supports: by_title""" - combine_under_n_chars: Optional[int] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'combine_under_n_chars' }}) + combine_under_n_chars: Optional[int] = dataclasses.field(default=UNSET, metadata={'multipart_form': { 'field_name': 'combine_under_n_chars' }}) r"""If chunking strategy is set, combine elements until a section reaches a length of n chars. Default: 500""" - coordinates: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'coordinates' }}) + coordinates: Optional[bool] = dataclasses.field(default=False, metadata={'multipart_form': { 'field_name': 'coordinates' }}) r"""If true, return coordinates for each element. Default: false""" - encoding: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'encoding' }}) + encoding: Optional[str] = dataclasses.field(default=UNSET, metadata={'multipart_form': { 'field_name': 'encoding' }}) r"""The encoding method used to decode the text input. Default: utf-8""" extract_image_block_types: Optional[List[str]] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'extract_image_block_types' }}) r"""The types of elements to extract, for use in extracting image blocks as base64 encoded data stored in metadata fields""" - files: Optional[Files] = dataclasses.field(default=None, metadata={'multipart_form': { 'file': True }}) - r"""The file to extract""" - gz_uncompressed_content_type: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'gz_uncompressed_content_type' }}) + gz_uncompressed_content_type: Optional[str] = dataclasses.field(default=UNSET, metadata={'multipart_form': { 'field_name': 'gz_uncompressed_content_type' }}) r"""If file is gzipped, use this content type after unzipping""" - hi_res_model_name: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'hi_res_model_name' }}) + hi_res_model_name: Optional[str] = dataclasses.field(default=UNSET, metadata={'multipart_form': { 'field_name': 'hi_res_model_name' }}) r"""The name of the inference model used when strategy is hi_res""" - include_page_breaks: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'include_page_breaks' }}) + include_page_breaks: Optional[bool] = dataclasses.field(default=False, metadata={'multipart_form': { 'field_name': 'include_page_breaks' }}) r"""If True, the output will include page breaks if the filetype supports it. Default: false""" languages: Optional[List[str]] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'languages' }}) r"""The languages present in the document, for use in partitioning and/or OCR""" - max_characters: Optional[int] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'max_characters' }}) + max_characters: Optional[int] = dataclasses.field(default=500, metadata={'multipart_form': { 'field_name': 'max_characters' }}) r"""If chunking strategy is set, cut off new sections after reaching a length of n chars (hard max). Default: 1500""" - multipage_sections: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'multipage_sections' }}) + multipage_sections: Optional[bool] = dataclasses.field(default=True, metadata={'multipart_form': { 'field_name': 'multipage_sections' }}) r"""If chunking strategy is set, determines if sections can span multiple sections. Default: true""" - new_after_n_chars: Optional[int] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'new_after_n_chars' }}) + new_after_n_chars: Optional[int] = dataclasses.field(default=UNSET, metadata={'multipart_form': { 'field_name': 'new_after_n_chars' }}) r"""If chunking strategy is set, cut off new sections after reaching a length of n chars (soft max). Default: 1500""" - output_format: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'output_format' }}) + ocr_languages: Optional[List[str]] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'ocr_languages' }}) + r"""The languages present in the document, for use in partitioning and/or OCR""" + output_format: Optional[str] = dataclasses.field(default='application/json', metadata={'multipart_form': { 'field_name': 'output_format' }}) r"""The format of the response. Supported formats are application/json and text/csv. Default: application/json.""" - pdf_infer_table_structure: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'pdf_infer_table_structure' }}) + overlap: Optional[int] = dataclasses.field(default=0, metadata={'multipart_form': { 'field_name': 'overlap' }}) + r"""Specifies the length of a string ('tail') to be drawn from each chunk and prefixed to the next chunk as a context-preserving mechanism. By default, this only applies to split-chunks where an oversized element is divided into multiple chunks by text-splitting. Default: 0""" + overlap_all: Optional[bool] = dataclasses.field(default=False, metadata={'multipart_form': { 'field_name': 'overlap_all' }}) + r"""When `True`, apply overlap between 'normal' chunks formed from whole elements and not subject to text-splitting. Use this with caution as it entails a certain level of 'pollution' of otherwise clean semantic chunk boundaries. Default: False""" + pdf_infer_table_structure: Optional[bool] = dataclasses.field(default=False, metadata={'multipart_form': { 'field_name': 'pdf_infer_table_structure' }}) r"""If True and strategy=hi_res, any Table Elements extracted from a PDF will include an additional metadata field, 'text_as_html', where the value (string) is a just a transformation of the data into an HTML
.""" skip_infer_table_types: Optional[List[str]] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'skip_infer_table_types' }}) r"""The document types that you want to skip table extraction with. Default: ['pdf', 'jpg', 'png']""" - strategy: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'strategy' }}) + strategy: Optional[Strategy] = dataclasses.field(default=Strategy.AUTO, metadata={'multipart_form': { 'field_name': 'strategy' }}) r"""The strategy to use for partitioning PDF/image. Options are fast, hi_res, auto. Default: auto""" - xml_keep_tags: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'xml_keep_tags' }}) + xml_keep_tags: Optional[bool] = dataclasses.field(default=False, metadata={'multipart_form': { 'field_name': 'xml_keep_tags' }}) r"""If True, will retain the XML tags in the output. Otherwise it will simply extract the text from within the tags. Only applies to partition_xml.""" diff --git a/src/unstructured_client/models/shared/element.py b/src/unstructured_client/models/shared/element.py new file mode 100644 index 00000000..dddd4afe --- /dev/null +++ b/src/unstructured_client/models/shared/element.py @@ -0,0 +1,22 @@ +"""Code generated by Speakeasy (https://speakeasyapi.dev). DO NOT EDIT.""" + +from __future__ import annotations +import dataclasses +from dataclasses_json import Undefined, dataclass_json +from unstructured_client import utils + + +@dataclasses.dataclass +class Metadata: + pass + + +@dataclass_json(undefined=Undefined.EXCLUDE) +@dataclasses.dataclass +class Element: + element_id: str = dataclasses.field(metadata={'dataclasses_json': { 'letter_case': utils.get_field_name('element_id') }}) + metadata: Metadata = dataclasses.field(metadata={'dataclasses_json': { 'letter_case': utils.get_field_name('metadata') }}) + text: str = dataclasses.field(metadata={'dataclasses_json': { 'letter_case': utils.get_field_name('text') }}) + type: str = dataclasses.field(metadata={'dataclasses_json': { 'letter_case': utils.get_field_name('type') }}) + + diff --git a/src/unstructured_client/sdk.py b/src/unstructured_client/sdk.py index 3cf032c4..c131ec49 100644 --- a/src/unstructured_client/sdk.py +++ b/src/unstructured_client/sdk.py @@ -6,15 +6,12 @@ from typing import Callable, Dict, Union from unstructured_client import utils from unstructured_client.models import shared -from unstructured_client.utils._human_utils import clean_server_url # human code class UnstructuredClient: - r"""Unstructured Pipeline API: Partition documents with the Unstructured library""" general: General sdk_configuration: SDKConfiguration - @clean_server_url # human code def __init__(self, api_key_auth: Union[str, Callable[[], str]], server: str = None, @@ -57,4 +54,4 @@ def security(): def _init_sdks(self): self.general = General(self.sdk_configuration) - + \ No newline at end of file diff --git a/src/unstructured_client/sdkconfiguration.py b/src/unstructured_client/sdkconfiguration.py index f5d9f83d..d604bbc5 100644 --- a/src/unstructured_client/sdkconfiguration.py +++ b/src/unstructured_client/sdkconfiguration.py @@ -1,10 +1,11 @@ """Code generated by Speakeasy (https://speakeasyapi.dev). DO NOT EDIT.""" -import requests -from dataclasses import dataclass -from typing import Dict, Tuple, Callable, Union -from .utils.retries import RetryConfig + +import requests as requests_http from .utils import utils +from .utils.retries import RetryConfig +from dataclasses import dataclass +from typing import Callable, Dict, Tuple, Union from unstructured_client.models import shared @@ -21,15 +22,15 @@ @dataclass class SDKConfiguration: - client: requests.Session + client: requests_http.Session security: Union[shared.Security,Callable[[], shared.Security]] = None server_url: str = '' server: str = '' language: str = 'python' - openapi_doc_version: str = '0.0.1' - sdk_version: str = '0.18.0' - gen_version: str = '2.250.19' - user_agent: str = 'speakeasy-sdk/python 0.18.0 2.250.19 0.0.1 unstructured-client' + openapi_doc_version: str = '0.0.64' + sdk_version: str = '0.19.0' + gen_version: str = '2.262.2' + user_agent: str = 'speakeasy-sdk/python 0.19.0 2.262.2 0.0.64 unstructured-client' retry_config: RetryConfig = None def get_server_details(self) -> Tuple[str, Dict[str, str]]: diff --git a/src/unstructured_client/utils/retries.py b/src/unstructured_client/utils/retries.py index 6ffe8942..8eba0940 100644 --- a/src/unstructured_client/utils/retries.py +++ b/src/unstructured_client/utils/retries.py @@ -6,8 +6,6 @@ import requests -from unstructured_client.utils._human_utils import log_retries # human code - class BackoffStrategy: initial_interval: int @@ -118,6 +116,5 @@ def retry_with_backoff(func, initial_interval=500, max_interval=60000, exponent= exponent**retries + random.uniform(0, 1)) if sleep > max_interval/1000: sleep = max_interval/1000 - log_retries(retry_count=retries+1, sleep=sleep, exception=exception) # human code time.sleep(sleep) retries += 1 From e3f6de62989501c2537a8efa0a01e6db0a347469 Mon Sep 17 00:00:00 2001 From: speakeasybot Date: Thu, 22 Feb 2024 00:18:44 +0000 Subject: [PATCH 2/3] ci: regenerated with OpenAPI Doc 0.0.1, Speakeasy CLI 1.189.0 --- .speakeasy/gen.lock | 28 ++++------ RELEASES.md | 12 ++++- USAGE.md | 33 ++++++++---- .../operations/partitionparametersrequest.md | 9 ---- ...metersresponse.md => partitionresponse.md} | 4 +- docs/models/shared/bodypartitionparameters.md | 28 ---------- docs/models/shared/element.md | 11 ---- docs/models/shared/metadata.md | 7 --- docs/models/shared/partitionparameters.md | 25 +++++++++ docs/models/shared/strategy.md | 13 ----- gen.yaml | 2 +- setup.py | 2 +- src/unstructured_client/general.py | 20 +++----- .../models/operations/__init__.py | 4 +- .../models/operations/partition.py | 20 ++++++++ .../models/operations/partition_parameters.py | 31 ----------- .../models/shared/__init__.py | 5 +- .../models/shared/element.py | 22 -------- ..._parameters.py => partition_parameters.py} | 51 +++++++------------ src/unstructured_client/sdk.py | 1 + src/unstructured_client/sdkconfiguration.py | 8 +-- 21 files changed, 129 insertions(+), 207 deletions(-) delete mode 100644 docs/models/operations/partitionparametersrequest.md rename docs/models/operations/{partitionparametersresponse.md => partitionresponse.md} (89%) delete mode 100644 docs/models/shared/bodypartitionparameters.md delete mode 100644 docs/models/shared/element.md delete mode 100644 docs/models/shared/metadata.md create mode 100644 docs/models/shared/partitionparameters.md delete mode 100644 docs/models/shared/strategy.md create mode 100644 src/unstructured_client/models/operations/partition.py delete mode 100644 src/unstructured_client/models/operations/partition_parameters.py delete mode 100644 src/unstructured_client/models/shared/element.py rename src/unstructured_client/models/shared/{body_partition_parameters.py => partition_parameters.py} (52%) diff --git a/.speakeasy/gen.lock b/.speakeasy/gen.lock index a8649730..a7f8b09b 100755 --- a/.speakeasy/gen.lock +++ b/.speakeasy/gen.lock @@ -1,19 +1,18 @@ lockVersion: 2.0.0 id: 8b5fa338-9106-4734-abf0-e30d67044a90 management: - docChecksum: 8e80e4d12e16961f9061ef746c01761a - docVersion: 0.0.64 + docChecksum: 903444f359d1dfa6342c692ae3e5c7ff + docVersion: 0.0.1 speakeasyVersion: internal - generationVersion: 2.262.2 - releaseVersion: 0.19.0 - configChecksum: bcbf9e2848a6a837e9453f70dbc10b07 + generationVersion: 2.263.3 + releaseVersion: 0.20.0 + configChecksum: cc7d371a0d9a447399c02d889587222d repoURL: https://github.com/Unstructured-IO/unstructured-python-client.git repoSubDirectory: . installationURL: https://github.com/Unstructured-IO/unstructured-python-client.git published: true features: python: - constsAndDefaults: 0.1.2 core: 4.4.6 examples: 2.81.3 globalSecurity: 2.83.2 @@ -34,26 +33,21 @@ generatedFiles: - src/unstructured_client/utils/utils.py - src/unstructured_client/models/errors/sdkerror.py - tests/helpers.py - - src/unstructured_client/models/operations/partition_parameters.py - - src/unstructured_client/models/shared/element.py + - src/unstructured_client/models/operations/partition.py + - src/unstructured_client/models/errors/httpvalidationerror.py - src/unstructured_client/models/shared/validationerror.py - - src/unstructured_client/models/shared/body_partition_parameters.py + - src/unstructured_client/models/shared/partition_parameters.py - src/unstructured_client/models/shared/security.py - - src/unstructured_client/models/errors/httpvalidationerror.py - src/unstructured_client/models/__init__.py - src/unstructured_client/models/errors/__init__.py - src/unstructured_client/models/operations/__init__.py - src/unstructured_client/models/shared/__init__.py - - docs/models/operations/partitionparametersrequest.md - - docs/models/operations/partitionparametersresponse.md - - docs/models/shared/metadata.md - - docs/models/shared/element.md + - docs/models/operations/partitionresponse.md + - docs/models/errors/httpvalidationerror.md - docs/models/shared/loc.md - docs/models/shared/validationerror.md - docs/models/shared/files.md - - docs/models/shared/strategy.md - - docs/models/shared/bodypartitionparameters.md + - docs/models/shared/partitionparameters.md - docs/models/shared/security.md - - docs/models/errors/httpvalidationerror.md - USAGE.md - .gitattributes diff --git a/RELEASES.md b/RELEASES.md index 17efc126..957246eb 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -434,4 +434,14 @@ Based on: ### Generated - [python v0.19.0] . ### Releases -- [PyPI v0.19.0] https://pypi.org/project/unstructured-client/0.19.0 - . \ No newline at end of file +- [PyPI v0.19.0] https://pypi.org/project/unstructured-client/0.19.0 - . + +## 2024-02-22 00:18:37 +### Changes +Based on: +- OpenAPI Doc 0.0.1 +- Speakeasy CLI 1.189.0 (2.263.3) https://github.com/speakeasy-api/speakeasy +### Generated +- [python v0.20.0] . +### Releases +- [PyPI v0.20.0] https://pypi.org/project/unstructured-client/0.20.0 - . \ No newline at end of file diff --git a/USAGE.md b/USAGE.md index 1c1cc7b2..e7a36ae7 100644 --- a/USAGE.md +++ b/USAGE.md @@ -1,25 +1,38 @@ ```python import unstructured_client -from unstructured_client.models import operations, shared +from unstructured_client.models import shared s = unstructured_client.UnstructuredClient( api_key_auth="YOUR_API_KEY", ) -req = operations.PartitionParametersRequest( - body_partition_parameters=shared.BodyPartitionParameters( - files=shared.Files( - content='0x2cC94b2FEF'.encode(), - file_name='um.shtml', - ), - strategy=shared.Strategy.HI_RES, - ), +req = shared.PartitionParameters( + chunking_strategy='by_title', + combine_under_n_chars=500, + encoding='utf-8', + extract_image_block_types=[ + 'image', + 'table', + ], + gz_uncompressed_content_type='application/pdf', + hi_res_model_name='yolox', + languages=[ + '[', + 'e', + 'n', + 'g', + ']', + ], + max_characters=1500, + new_after_n_chars=1500, + output_format='application/json', + strategy='hi_res', ) res = s.general.partition(req) -if res.response_partition_parameters is not None: +if res.elements is not None: # handle response pass ``` diff --git a/docs/models/operations/partitionparametersrequest.md b/docs/models/operations/partitionparametersrequest.md deleted file mode 100644 index 019a2840..00000000 --- a/docs/models/operations/partitionparametersrequest.md +++ /dev/null @@ -1,9 +0,0 @@ -# PartitionParametersRequest - - -## Fields - -| Field | Type | Required | Description | -| -------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | -------------------------------------------------------------------------------- | -| `body_partition_parameters` | [shared.BodyPartitionParameters](../../models/shared/bodypartitionparameters.md) | :heavy_check_mark: | N/A | -| `unstructured_api_key` | *Optional[str]* | :heavy_minus_sign: | N/A | \ No newline at end of file diff --git a/docs/models/operations/partitionparametersresponse.md b/docs/models/operations/partitionresponse.md similarity index 89% rename from docs/models/operations/partitionparametersresponse.md rename to docs/models/operations/partitionresponse.md index db64e639..b0dbc682 100644 --- a/docs/models/operations/partitionparametersresponse.md +++ b/docs/models/operations/partitionresponse.md @@ -1,4 +1,4 @@ -# PartitionParametersResponse +# PartitionResponse ## Fields @@ -8,4 +8,4 @@ | `content_type` | *str* | :heavy_check_mark: | HTTP response content type for this operation | | `status_code` | *int* | :heavy_check_mark: | HTTP response status code for this operation | | `raw_response` | [requests.Response](https://requests.readthedocs.io/en/latest/api/#requests.Response) | :heavy_check_mark: | Raw HTTP response; suitable for custom response parsing | -| `response_partition_parameters` | List[[shared.Element](../../models/shared/element.md)] | :heavy_minus_sign: | Successful Response | \ No newline at end of file +| `elements` | List[*Any*] | :heavy_minus_sign: | Successful Response | \ No newline at end of file diff --git a/docs/models/shared/bodypartitionparameters.md b/docs/models/shared/bodypartitionparameters.md deleted file mode 100644 index ed8a581f..00000000 --- a/docs/models/shared/bodypartitionparameters.md +++ /dev/null @@ -1,28 +0,0 @@ -# BodyPartitionParameters - - -## Fields - -| Field | Type | Required | Description | Example | -| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| `files` | [shared.Files](../../models/shared/files.md) | :heavy_check_mark: | The file to extract | | -| `chunking_strategy` | *Optional[Any]* | :heavy_minus_sign: | Use one of the supported strategies to chunk the returned elements. Currently supports: by_title | | -| `combine_under_n_chars` | *Optional[int]* | :heavy_minus_sign: | If chunking strategy is set, combine elements until a section reaches a length of n chars. Default: 500 | | -| `coordinates` | *Optional[bool]* | :heavy_minus_sign: | If true, return coordinates for each element. Default: false | | -| `encoding` | *Optional[str]* | :heavy_minus_sign: | The encoding method used to decode the text input. Default: utf-8 | | -| `extract_image_block_types` | List[*str*] | :heavy_minus_sign: | The types of elements to extract, for use in extracting image blocks as base64 encoded data stored in metadata fields | | -| `gz_uncompressed_content_type` | *Optional[str]* | :heavy_minus_sign: | If file is gzipped, use this content type after unzipping | | -| `hi_res_model_name` | *Optional[str]* | :heavy_minus_sign: | The name of the inference model used when strategy is hi_res | | -| `include_page_breaks` | *Optional[bool]* | :heavy_minus_sign: | If True, the output will include page breaks if the filetype supports it. Default: false | | -| `languages` | List[*str*] | :heavy_minus_sign: | The languages present in the document, for use in partitioning and/or OCR | | -| `max_characters` | *Optional[int]* | :heavy_minus_sign: | If chunking strategy is set, cut off new sections after reaching a length of n chars (hard max). Default: 1500 | | -| `multipage_sections` | *Optional[bool]* | :heavy_minus_sign: | If chunking strategy is set, determines if sections can span multiple sections. Default: true | | -| `new_after_n_chars` | *Optional[int]* | :heavy_minus_sign: | If chunking strategy is set, cut off new sections after reaching a length of n chars (soft max). Default: 1500 | | -| `ocr_languages` | List[*str*] | :heavy_minus_sign: | The languages present in the document, for use in partitioning and/or OCR | | -| `output_format` | *Optional[str]* | :heavy_minus_sign: | The format of the response. Supported formats are application/json and text/csv. Default: application/json. | | -| `overlap` | *Optional[int]* | :heavy_minus_sign: | Specifies the length of a string ('tail') to be drawn from each chunk and prefixed to the next chunk as a context-preserving mechanism. By default, this only applies to split-chunks where an oversized element is divided into multiple chunks by text-splitting. Default: 0 | | -| `overlap_all` | *Optional[bool]* | :heavy_minus_sign: | When `True`, apply overlap between 'normal' chunks formed from whole elements and not subject to text-splitting. Use this with caution as it entails a certain level of 'pollution' of otherwise clean semantic chunk boundaries. Default: False | | -| `pdf_infer_table_structure` | *Optional[bool]* | :heavy_minus_sign: | If True and strategy=hi_res, any Table Elements extracted from a PDF will include an additional metadata field, 'text_as_html', where the value (string) is a just a transformation of the data into an HTML
. | | -| `skip_infer_table_types` | List[*str*] | :heavy_minus_sign: | The document types that you want to skip table extraction with. Default: ['pdf', 'jpg', 'png'] | | -| `strategy` | [Optional[shared.Strategy]](../../models/shared/strategy.md) | :heavy_minus_sign: | The strategy to use for partitioning PDF/image. Options are fast, hi_res, auto. Default: auto | auto | -| `xml_keep_tags` | *Optional[bool]* | :heavy_minus_sign: | If True, will retain the XML tags in the output. Otherwise it will simply extract the text from within the tags. Only applies to partition_xml. | | \ No newline at end of file diff --git a/docs/models/shared/element.md b/docs/models/shared/element.md deleted file mode 100644 index 403252e8..00000000 --- a/docs/models/shared/element.md +++ /dev/null @@ -1,11 +0,0 @@ -# Element - - -## Fields - -| Field | Type | Required | Description | -| -------------------------------------------------- | -------------------------------------------------- | -------------------------------------------------- | -------------------------------------------------- | -| `element_id` | *str* | :heavy_check_mark: | N/A | -| `metadata` | [shared.Metadata](../../models/shared/metadata.md) | :heavy_check_mark: | N/A | -| `text` | *str* | :heavy_check_mark: | N/A | -| `type` | *str* | :heavy_check_mark: | N/A | \ No newline at end of file diff --git a/docs/models/shared/metadata.md b/docs/models/shared/metadata.md deleted file mode 100644 index e655f580..00000000 --- a/docs/models/shared/metadata.md +++ /dev/null @@ -1,7 +0,0 @@ -# Metadata - - -## Fields - -| Field | Type | Required | Description | -| ----------- | ----------- | ----------- | ----------- | \ No newline at end of file diff --git a/docs/models/shared/partitionparameters.md b/docs/models/shared/partitionparameters.md new file mode 100644 index 00000000..4e2b8f97 --- /dev/null +++ b/docs/models/shared/partitionparameters.md @@ -0,0 +1,25 @@ +# PartitionParameters + + +## Fields + +| Field | Type | Required | Description | Example | +| --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `chunking_strategy` | *Optional[str]* | :heavy_minus_sign: | Use one of the supported strategies to chunk the returned elements. Currently supports: by_title | by_title | +| `combine_under_n_chars` | *Optional[int]* | :heavy_minus_sign: | If chunking strategy is set, combine elements until a section reaches a length of n chars. Default: 500 | 500 | +| `coordinates` | *Optional[bool]* | :heavy_minus_sign: | If true, return coordinates for each element. Default: false | | +| `encoding` | *Optional[str]* | :heavy_minus_sign: | The encoding method used to decode the text input. Default: utf-8 | utf-8 | +| `extract_image_block_types` | List[*str*] | :heavy_minus_sign: | The types of elements to extract, for use in extracting image blocks as base64 encoded data stored in metadata fields | ["image","table"] | +| `files` | [Optional[shared.Files]](../../models/shared/files.md) | :heavy_minus_sign: | The file to extract | | +| `gz_uncompressed_content_type` | *Optional[str]* | :heavy_minus_sign: | If file is gzipped, use this content type after unzipping | application/pdf | +| `hi_res_model_name` | *Optional[str]* | :heavy_minus_sign: | The name of the inference model used when strategy is hi_res | yolox | +| `include_page_breaks` | *Optional[bool]* | :heavy_minus_sign: | If True, the output will include page breaks if the filetype supports it. Default: false | | +| `languages` | List[*str*] | :heavy_minus_sign: | The languages present in the document, for use in partitioning and/or OCR | [eng] | +| `max_characters` | *Optional[int]* | :heavy_minus_sign: | If chunking strategy is set, cut off new sections after reaching a length of n chars (hard max). Default: 1500 | 1500 | +| `multipage_sections` | *Optional[bool]* | :heavy_minus_sign: | If chunking strategy is set, determines if sections can span multiple sections. Default: true | | +| `new_after_n_chars` | *Optional[int]* | :heavy_minus_sign: | If chunking strategy is set, cut off new sections after reaching a length of n chars (soft max). Default: 1500 | 1500 | +| `output_format` | *Optional[str]* | :heavy_minus_sign: | The format of the response. Supported formats are application/json and text/csv. Default: application/json. | application/json | +| `pdf_infer_table_structure` | *Optional[bool]* | :heavy_minus_sign: | If True and strategy=hi_res, any Table Elements extracted from a PDF will include an additional metadata field, 'text_as_html', where the value (string) is a just a transformation of the data into an HTML
. | | +| `skip_infer_table_types` | List[*str*] | :heavy_minus_sign: | The document types that you want to skip table extraction with. Default: ['pdf', 'jpg', 'png'] | | +| `strategy` | *Optional[str]* | :heavy_minus_sign: | The strategy to use for partitioning PDF/image. Options are fast, hi_res, auto. Default: auto | hi_res | +| `xml_keep_tags` | *Optional[bool]* | :heavy_minus_sign: | If True, will retain the XML tags in the output. Otherwise it will simply extract the text from within the tags. Only applies to partition_xml. | | \ No newline at end of file diff --git a/docs/models/shared/strategy.md b/docs/models/shared/strategy.md deleted file mode 100644 index 2c6d2875..00000000 --- a/docs/models/shared/strategy.md +++ /dev/null @@ -1,13 +0,0 @@ -# Strategy - -The strategy to use for partitioning PDF/image. Options are fast, hi_res, auto. Default: auto - - -## Values - -| Name | Value | -| ---------- | ---------- | -| `FAST` | fast | -| `HI_RES` | hi_res | -| `AUTO` | auto | -| `OCR_ONLY` | ocr_only | \ No newline at end of file diff --git a/gen.yaml b/gen.yaml index 2dc9f53d..86ff4160 100644 --- a/gen.yaml +++ b/gen.yaml @@ -8,7 +8,7 @@ generation: parameterOrderingFeb2024: false requestResponseComponentNamesFeb2024: false python: - version: 0.19.0 + version: 0.20.0 author: Unstructured clientServerStatusCodesAsErrors: true description: Python Client SDK for Unstructured API diff --git a/setup.py b/setup.py index 072d425c..8924d912 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setuptools.setup( name="unstructured-client", - version="0.19.0", + version="0.20.0", author="Unstructured", description="Python Client SDK for Unstructured API", license = "MIT", diff --git a/src/unstructured_client/general.py b/src/unstructured_client/general.py index 5ac651de..cbd14b1c 100644 --- a/src/unstructured_client/general.py +++ b/src/unstructured_client/general.py @@ -1,7 +1,7 @@ """Code generated by Speakeasy (https://speakeasyapi.dev). DO NOT EDIT.""" from .sdkconfiguration import SDKConfiguration -from typing import List, Optional +from typing import Any, List, Optional from unstructured_client import utils from unstructured_client.models import errors, operations, shared @@ -13,19 +13,15 @@ def __init__(self, sdk_config: SDKConfiguration) -> None: - def partition(self, request: operations.PartitionParametersRequest, retries: Optional[utils.RetryConfig] = None) -> operations.PartitionParametersResponse: - r"""Summary - Description - """ + def partition(self, request: Optional[shared.PartitionParameters], retries: Optional[utils.RetryConfig] = None) -> operations.PartitionResponse: + r"""Pipeline 1""" base_url = utils.template_url(*self.sdk_configuration.get_server_details()) url = base_url + '/general/v0/general' - headers = utils.get_headers(request) - req_content_type, data, form = utils.serialize_request_body(request, operations.PartitionParametersRequest, "body_partition_parameters", False, False, 'multipart') + headers = {} + req_content_type, data, form = utils.serialize_request_body(request, Optional[shared.PartitionParameters], "request", False, True, 'multipart') if req_content_type not in ('multipart/form-data', 'multipart/mixed'): headers['content-type'] = req_content_type - if data is None and form is None: - raise Exception('request body is required') headers['Accept'] = 'application/json' headers['user-agent'] = self.sdk_configuration.user_agent @@ -50,12 +46,12 @@ def do_request(): ])) content_type = http_res.headers.get('Content-Type') - res = operations.PartitionParametersResponse(status_code=http_res.status_code, content_type=content_type, raw_response=http_res) + res = operations.PartitionResponse(status_code=http_res.status_code, content_type=content_type, raw_response=http_res) if http_res.status_code == 200: if utils.match_content_type(content_type, 'application/json'): - out = utils.unmarshal_json(http_res.text, Optional[List[shared.Element]]) - res.response_partition_parameters = out + out = utils.unmarshal_json(http_res.text, Optional[List[Any]]) + res.elements = out else: raise errors.SDKError(f'unknown content-type received: {content_type}', http_res.status_code, http_res.text, http_res) elif http_res.status_code == 422: diff --git a/src/unstructured_client/models/operations/__init__.py b/src/unstructured_client/models/operations/__init__.py index 5f1fcd9e..175065fa 100644 --- a/src/unstructured_client/models/operations/__init__.py +++ b/src/unstructured_client/models/operations/__init__.py @@ -1,5 +1,5 @@ """Code generated by Speakeasy (https://speakeasyapi.dev). DO NOT EDIT.""" -from .partition_parameters import * +from .partition import * -__all__ = ["PartitionParametersRequest","PartitionParametersResponse"] +__all__ = ["PartitionResponse"] diff --git a/src/unstructured_client/models/operations/partition.py b/src/unstructured_client/models/operations/partition.py new file mode 100644 index 00000000..b5fc73c7 --- /dev/null +++ b/src/unstructured_client/models/operations/partition.py @@ -0,0 +1,20 @@ +"""Code generated by Speakeasy (https://speakeasyapi.dev). DO NOT EDIT.""" + +from __future__ import annotations +import dataclasses +import requests as requests_http +from typing import Any, List, Optional + + +@dataclasses.dataclass +class PartitionResponse: + content_type: str = dataclasses.field() + r"""HTTP response content type for this operation""" + status_code: int = dataclasses.field() + r"""HTTP response status code for this operation""" + raw_response: requests_http.Response = dataclasses.field() + r"""Raw HTTP response; suitable for custom response parsing""" + elements: Optional[List[Any]] = dataclasses.field(default=None) + r"""Successful Response""" + + diff --git a/src/unstructured_client/models/operations/partition_parameters.py b/src/unstructured_client/models/operations/partition_parameters.py deleted file mode 100644 index 5f6b34f0..00000000 --- a/src/unstructured_client/models/operations/partition_parameters.py +++ /dev/null @@ -1,31 +0,0 @@ -"""Code generated by Speakeasy (https://speakeasyapi.dev). DO NOT EDIT.""" - -from __future__ import annotations -import dataclasses -import requests as requests_http -from ...models.shared import body_partition_parameters as shared_body_partition_parameters -from ...models.shared import element as shared_element -from typing import List, Optional - - -@dataclasses.dataclass -class PartitionParametersRequest: - UNSET='__SPEAKEASY_UNSET__' - body_partition_parameters: shared_body_partition_parameters.BodyPartitionParameters = dataclasses.field(metadata={'request': { 'media_type': 'multipart/form-data' }}) - unstructured_api_key: Optional[str] = dataclasses.field(default=UNSET, metadata={'header': { 'field_name': 'unstructured-api-key', 'style': 'simple', 'explode': False }}) - - - - -@dataclasses.dataclass -class PartitionParametersResponse: - content_type: str = dataclasses.field() - r"""HTTP response content type for this operation""" - status_code: int = dataclasses.field() - r"""HTTP response status code for this operation""" - raw_response: requests_http.Response = dataclasses.field() - r"""Raw HTTP response; suitable for custom response parsing""" - response_partition_parameters: Optional[List[shared_element.Element]] = dataclasses.field(default=None) - r"""Successful Response""" - - diff --git a/src/unstructured_client/models/shared/__init__.py b/src/unstructured_client/models/shared/__init__.py index c535b76f..bb3fc976 100644 --- a/src/unstructured_client/models/shared/__init__.py +++ b/src/unstructured_client/models/shared/__init__.py @@ -1,8 +1,7 @@ """Code generated by Speakeasy (https://speakeasyapi.dev). DO NOT EDIT.""" -from .body_partition_parameters import * -from .element import * +from .partition_parameters import * from .security import * from .validationerror import * -__all__ = ["BodyPartitionParameters","Element","Files","Metadata","Security","Strategy","ValidationError"] +__all__ = ["Files","PartitionParameters","Security","ValidationError"] diff --git a/src/unstructured_client/models/shared/element.py b/src/unstructured_client/models/shared/element.py deleted file mode 100644 index dddd4afe..00000000 --- a/src/unstructured_client/models/shared/element.py +++ /dev/null @@ -1,22 +0,0 @@ -"""Code generated by Speakeasy (https://speakeasyapi.dev). DO NOT EDIT.""" - -from __future__ import annotations -import dataclasses -from dataclasses_json import Undefined, dataclass_json -from unstructured_client import utils - - -@dataclasses.dataclass -class Metadata: - pass - - -@dataclass_json(undefined=Undefined.EXCLUDE) -@dataclasses.dataclass -class Element: - element_id: str = dataclasses.field(metadata={'dataclasses_json': { 'letter_case': utils.get_field_name('element_id') }}) - metadata: Metadata = dataclasses.field(metadata={'dataclasses_json': { 'letter_case': utils.get_field_name('metadata') }}) - text: str = dataclasses.field(metadata={'dataclasses_json': { 'letter_case': utils.get_field_name('text') }}) - type: str = dataclasses.field(metadata={'dataclasses_json': { 'letter_case': utils.get_field_name('type') }}) - - diff --git a/src/unstructured_client/models/shared/body_partition_parameters.py b/src/unstructured_client/models/shared/partition_parameters.py similarity index 52% rename from src/unstructured_client/models/shared/body_partition_parameters.py rename to src/unstructured_client/models/shared/partition_parameters.py index 36158a95..cd44e544 100644 --- a/src/unstructured_client/models/shared/body_partition_parameters.py +++ b/src/unstructured_client/models/shared/partition_parameters.py @@ -2,8 +2,7 @@ from __future__ import annotations import dataclasses -from enum import Enum -from typing import Any, List, Optional +from typing import List, Optional @dataclasses.dataclass @@ -13,58 +12,44 @@ class Files: -class Strategy(str, Enum): - r"""The strategy to use for partitioning PDF/image. Options are fast, hi_res, auto. Default: auto""" - FAST = 'fast' - HI_RES = 'hi_res' - AUTO = 'auto' - OCR_ONLY = 'ocr_only' - @dataclasses.dataclass -class BodyPartitionParameters: - UNSET='__SPEAKEASY_UNSET__' - files: Files = dataclasses.field(metadata={'multipart_form': { 'file': True }}) - r"""The file to extract""" - chunking_strategy: Optional[Any] = dataclasses.field(default=UNSET, metadata={'multipart_form': { 'field_name': 'chunking_strategy' }}) +class PartitionParameters: + chunking_strategy: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'chunking_strategy' }}) r"""Use one of the supported strategies to chunk the returned elements. Currently supports: by_title""" - combine_under_n_chars: Optional[int] = dataclasses.field(default=UNSET, metadata={'multipart_form': { 'field_name': 'combine_under_n_chars' }}) + combine_under_n_chars: Optional[int] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'combine_under_n_chars' }}) r"""If chunking strategy is set, combine elements until a section reaches a length of n chars. Default: 500""" - coordinates: Optional[bool] = dataclasses.field(default=False, metadata={'multipart_form': { 'field_name': 'coordinates' }}) + coordinates: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'coordinates' }}) r"""If true, return coordinates for each element. Default: false""" - encoding: Optional[str] = dataclasses.field(default=UNSET, metadata={'multipart_form': { 'field_name': 'encoding' }}) + encoding: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'encoding' }}) r"""The encoding method used to decode the text input. Default: utf-8""" extract_image_block_types: Optional[List[str]] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'extract_image_block_types' }}) r"""The types of elements to extract, for use in extracting image blocks as base64 encoded data stored in metadata fields""" - gz_uncompressed_content_type: Optional[str] = dataclasses.field(default=UNSET, metadata={'multipart_form': { 'field_name': 'gz_uncompressed_content_type' }}) + files: Optional[Files] = dataclasses.field(default=None, metadata={'multipart_form': { 'file': True }}) + r"""The file to extract""" + gz_uncompressed_content_type: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'gz_uncompressed_content_type' }}) r"""If file is gzipped, use this content type after unzipping""" - hi_res_model_name: Optional[str] = dataclasses.field(default=UNSET, metadata={'multipart_form': { 'field_name': 'hi_res_model_name' }}) + hi_res_model_name: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'hi_res_model_name' }}) r"""The name of the inference model used when strategy is hi_res""" - include_page_breaks: Optional[bool] = dataclasses.field(default=False, metadata={'multipart_form': { 'field_name': 'include_page_breaks' }}) + include_page_breaks: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'include_page_breaks' }}) r"""If True, the output will include page breaks if the filetype supports it. Default: false""" languages: Optional[List[str]] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'languages' }}) r"""The languages present in the document, for use in partitioning and/or OCR""" - max_characters: Optional[int] = dataclasses.field(default=500, metadata={'multipart_form': { 'field_name': 'max_characters' }}) + max_characters: Optional[int] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'max_characters' }}) r"""If chunking strategy is set, cut off new sections after reaching a length of n chars (hard max). Default: 1500""" - multipage_sections: Optional[bool] = dataclasses.field(default=True, metadata={'multipart_form': { 'field_name': 'multipage_sections' }}) + multipage_sections: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'multipage_sections' }}) r"""If chunking strategy is set, determines if sections can span multiple sections. Default: true""" - new_after_n_chars: Optional[int] = dataclasses.field(default=UNSET, metadata={'multipart_form': { 'field_name': 'new_after_n_chars' }}) + new_after_n_chars: Optional[int] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'new_after_n_chars' }}) r"""If chunking strategy is set, cut off new sections after reaching a length of n chars (soft max). Default: 1500""" - ocr_languages: Optional[List[str]] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'ocr_languages' }}) - r"""The languages present in the document, for use in partitioning and/or OCR""" - output_format: Optional[str] = dataclasses.field(default='application/json', metadata={'multipart_form': { 'field_name': 'output_format' }}) + output_format: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'output_format' }}) r"""The format of the response. Supported formats are application/json and text/csv. Default: application/json.""" - overlap: Optional[int] = dataclasses.field(default=0, metadata={'multipart_form': { 'field_name': 'overlap' }}) - r"""Specifies the length of a string ('tail') to be drawn from each chunk and prefixed to the next chunk as a context-preserving mechanism. By default, this only applies to split-chunks where an oversized element is divided into multiple chunks by text-splitting. Default: 0""" - overlap_all: Optional[bool] = dataclasses.field(default=False, metadata={'multipart_form': { 'field_name': 'overlap_all' }}) - r"""When `True`, apply overlap between 'normal' chunks formed from whole elements and not subject to text-splitting. Use this with caution as it entails a certain level of 'pollution' of otherwise clean semantic chunk boundaries. Default: False""" - pdf_infer_table_structure: Optional[bool] = dataclasses.field(default=False, metadata={'multipart_form': { 'field_name': 'pdf_infer_table_structure' }}) + pdf_infer_table_structure: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'pdf_infer_table_structure' }}) r"""If True and strategy=hi_res, any Table Elements extracted from a PDF will include an additional metadata field, 'text_as_html', where the value (string) is a just a transformation of the data into an HTML
.""" skip_infer_table_types: Optional[List[str]] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'skip_infer_table_types' }}) r"""The document types that you want to skip table extraction with. Default: ['pdf', 'jpg', 'png']""" - strategy: Optional[Strategy] = dataclasses.field(default=Strategy.AUTO, metadata={'multipart_form': { 'field_name': 'strategy' }}) + strategy: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'strategy' }}) r"""The strategy to use for partitioning PDF/image. Options are fast, hi_res, auto. Default: auto""" - xml_keep_tags: Optional[bool] = dataclasses.field(default=False, metadata={'multipart_form': { 'field_name': 'xml_keep_tags' }}) + xml_keep_tags: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'xml_keep_tags' }}) r"""If True, will retain the XML tags in the output. Otherwise it will simply extract the text from within the tags. Only applies to partition_xml.""" diff --git a/src/unstructured_client/sdk.py b/src/unstructured_client/sdk.py index c131ec49..18fb5cb1 100644 --- a/src/unstructured_client/sdk.py +++ b/src/unstructured_client/sdk.py @@ -8,6 +8,7 @@ from unstructured_client.models import shared class UnstructuredClient: + r"""Unstructured Pipeline API: Partition documents with the Unstructured library""" general: General sdk_configuration: SDKConfiguration diff --git a/src/unstructured_client/sdkconfiguration.py b/src/unstructured_client/sdkconfiguration.py index d604bbc5..50de515b 100644 --- a/src/unstructured_client/sdkconfiguration.py +++ b/src/unstructured_client/sdkconfiguration.py @@ -27,10 +27,10 @@ class SDKConfiguration: server_url: str = '' server: str = '' language: str = 'python' - openapi_doc_version: str = '0.0.64' - sdk_version: str = '0.19.0' - gen_version: str = '2.262.2' - user_agent: str = 'speakeasy-sdk/python 0.19.0 2.262.2 0.0.64 unstructured-client' + openapi_doc_version: str = '0.0.1' + sdk_version: str = '0.20.0' + gen_version: str = '2.263.3' + user_agent: str = 'speakeasy-sdk/python 0.20.0 2.263.3 0.0.1 unstructured-client' retry_config: RetryConfig = None def get_server_details(self) -> Tuple[str, Dict[str, str]]: From 39edb7ec830dd42a61656b5e31f343abacdaf215 Mon Sep 17 00:00:00 2001 From: speakeasybot Date: Fri, 1 Mar 2024 23:20:15 +0000 Subject: [PATCH 3/3] ci: regenerated with OpenAPI Doc 0.0.1, Speakeasy CLI 1.200.0 --- .speakeasy/gen.lock | 17 +++-- RELEASES.md | 12 +++- USAGE.md | 2 + docs/models/shared/partitionparameters.md | 4 +- gen.yaml | 8 ++- pylintrc | 3 +- setup.py | 10 +-- src/unstructured_client/_hooks/__init__.py | 4 ++ src/unstructured_client/_hooks/sdkhooks.py | 55 +++++++++++++++ src/unstructured_client/_hooks/types.py | 70 +++++++++++++++++++ src/unstructured_client/general.py | 33 ++++++++- .../models/shared/partition_parameters.py | 4 ++ src/unstructured_client/sdk.py | 15 +++- src/unstructured_client/sdkconfiguration.py | 12 +++- src/unstructured_client/utils/retries.py | 3 + src/unstructured_client/utils/utils.py | 44 ++++++++---- 16 files changed, 260 insertions(+), 36 deletions(-) create mode 100644 src/unstructured_client/_hooks/__init__.py create mode 100644 src/unstructured_client/_hooks/sdkhooks.py create mode 100644 src/unstructured_client/_hooks/types.py diff --git a/.speakeasy/gen.lock b/.speakeasy/gen.lock index a7f8b09b..50da82b7 100755 --- a/.speakeasy/gen.lock +++ b/.speakeasy/gen.lock @@ -1,26 +1,26 @@ lockVersion: 2.0.0 id: 8b5fa338-9106-4734-abf0-e30d67044a90 management: - docChecksum: 903444f359d1dfa6342c692ae3e5c7ff + docChecksum: a112aea005467aa6818696fa4e99fcfe docVersion: 0.0.1 speakeasyVersion: internal - generationVersion: 2.263.3 - releaseVersion: 0.20.0 - configChecksum: cc7d371a0d9a447399c02d889587222d + generationVersion: 2.277.0 + releaseVersion: 0.21.0 + configChecksum: c5e7c8526f43272d7585627468d8c4e5 repoURL: https://github.com/Unstructured-IO/unstructured-python-client.git repoSubDirectory: . installationURL: https://github.com/Unstructured-IO/unstructured-python-client.git published: true features: python: - core: 4.4.6 + core: 4.5.0 examples: 2.81.3 - globalSecurity: 2.83.2 + globalSecurity: 2.83.4 globalServerURLs: 2.82.1 nameOverrides: 2.81.1 retries: 2.82.1 serverIDs: 2.81.1 - unions: 2.82.5 + unions: 2.82.6 generatedFiles: - src/unstructured_client/sdkconfiguration.py - src/unstructured_client/general.py @@ -51,3 +51,6 @@ generatedFiles: - docs/models/shared/security.md - USAGE.md - .gitattributes + - src/unstructured_client/_hooks/sdkhooks.py + - src/unstructured_client/_hooks/types.py + - src/unstructured_client/_hooks/__init__.py diff --git a/RELEASES.md b/RELEASES.md index 957246eb..93856bc2 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -444,4 +444,14 @@ Based on: ### Generated - [python v0.20.0] . ### Releases -- [PyPI v0.20.0] https://pypi.org/project/unstructured-client/0.20.0 - . \ No newline at end of file +- [PyPI v0.20.0] https://pypi.org/project/unstructured-client/0.20.0 - . + +## 2024-03-01 23:20:07 +### Changes +Based on: +- OpenAPI Doc 0.0.1 +- Speakeasy CLI 1.200.0 (2.277.0) https://github.com/speakeasy-api/speakeasy +### Generated +- [python v0.21.0] . +### Releases +- [PyPI v0.21.0] https://pypi.org/project/unstructured-client/0.21.0 - . \ No newline at end of file diff --git a/USAGE.md b/USAGE.md index e7a36ae7..919a0d13 100644 --- a/USAGE.md +++ b/USAGE.md @@ -27,6 +27,8 @@ req = shared.PartitionParameters( max_characters=1500, new_after_n_chars=1500, output_format='application/json', + overlap=25, + overlap_all=True, strategy='hi_res', ) diff --git a/docs/models/shared/partitionparameters.md b/docs/models/shared/partitionparameters.md index 4e2b8f97..df9df34b 100644 --- a/docs/models/shared/partitionparameters.md +++ b/docs/models/shared/partitionparameters.md @@ -9,7 +9,7 @@ | `combine_under_n_chars` | *Optional[int]* | :heavy_minus_sign: | If chunking strategy is set, combine elements until a section reaches a length of n chars. Default: 500 | 500 | | `coordinates` | *Optional[bool]* | :heavy_minus_sign: | If true, return coordinates for each element. Default: false | | | `encoding` | *Optional[str]* | :heavy_minus_sign: | The encoding method used to decode the text input. Default: utf-8 | utf-8 | -| `extract_image_block_types` | List[*str*] | :heavy_minus_sign: | The types of elements to extract, for use in extracting image blocks as base64 encoded data stored in metadata fields | ["image","table"] | +| `extract_image_block_types` | List[*str*] | :heavy_minus_sign: | The types of elements to extract, for use in extracting image blocks as base64 encoded data stored in metadata fields | [
"image",
"table"
] | | `files` | [Optional[shared.Files]](../../models/shared/files.md) | :heavy_minus_sign: | The file to extract | | | `gz_uncompressed_content_type` | *Optional[str]* | :heavy_minus_sign: | If file is gzipped, use this content type after unzipping | application/pdf | | `hi_res_model_name` | *Optional[str]* | :heavy_minus_sign: | The name of the inference model used when strategy is hi_res | yolox | @@ -19,6 +19,8 @@ | `multipage_sections` | *Optional[bool]* | :heavy_minus_sign: | If chunking strategy is set, determines if sections can span multiple sections. Default: true | | | `new_after_n_chars` | *Optional[int]* | :heavy_minus_sign: | If chunking strategy is set, cut off new sections after reaching a length of n chars (soft max). Default: 1500 | 1500 | | `output_format` | *Optional[str]* | :heavy_minus_sign: | The format of the response. Supported formats are application/json and text/csv. Default: application/json. | application/json | +| `overlap` | *Optional[int]* | :heavy_minus_sign: | A prefix of this many trailing characters from prior text-split chunk is applied to second and later chunks formed from oversized elements by text-splitting. Default: None | 25 | +| `overlap_all` | *Optional[bool]* | :heavy_minus_sign: | When True, overlap is also applied to 'normal' chunks formed by combining whole elements. Use with caution as this can introduce noise into otherwise clean semantic units. Default: None | 1500 | | `pdf_infer_table_structure` | *Optional[bool]* | :heavy_minus_sign: | If True and strategy=hi_res, any Table Elements extracted from a PDF will include an additional metadata field, 'text_as_html', where the value (string) is a just a transformation of the data into an HTML
. | | | `skip_infer_table_types` | List[*str*] | :heavy_minus_sign: | The document types that you want to skip table extraction with. Default: ['pdf', 'jpg', 'png'] | | | `strategy` | *Optional[str]* | :heavy_minus_sign: | The strategy to use for partitioning PDF/image. Options are fast, hi_res, auto. Default: auto | hi_res | diff --git a/gen.yaml b/gen.yaml index 86ff4160..0ebedbc6 100644 --- a/gen.yaml +++ b/gen.yaml @@ -7,8 +7,14 @@ generation: nameResolutionDec2023: false parameterOrderingFeb2024: false requestResponseComponentNamesFeb2024: false + auth: + oAuth2ClientCredentialsEnabled: false python: - version: 0.20.0 + version: 0.21.0 + additionalDependencies: + dependencies: {} + extraDependencies: + dev: {} author: Unstructured clientServerStatusCodesAsErrors: true description: Python Client SDK for Unstructured API diff --git a/pylintrc b/pylintrc index bf0a55d2..8537761c 100644 --- a/pylintrc +++ b/pylintrc @@ -179,7 +179,8 @@ good-names=i, k, ex, Run, - _ + _, + e # Good variable names regexes, separated by a comma. If names match any regex, # they will always be accepted diff --git a/setup.py b/setup.py index 8924d912..f546a557 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setuptools.setup( name="unstructured-client", - version="0.20.0", + version="0.21.0", author="Unstructured", description="Python Client SDK for Unstructured API", license = "MIT", @@ -20,9 +20,9 @@ install_requires=[ "certifi>=2023.7.22", "charset-normalizer>=3.2.0", - "dataclasses-json-speakeasy>=0.5.11", + "dataclasses-json>=0.6.4", "idna>=3.4", - "jsonpath-python>=1.0.6 ", + "jsonpath-python>=1.0.6", "marshmallow>=3.19.0", "mypy-extensions>=1.0.0", "packaging>=23.1", @@ -34,7 +34,9 @@ "urllib3>=1.26.18", ], extras_require={ - "dev":["pylint==2.16.2"] + "dev": [ + "pylint==2.16.2", + ], }, package_dir={'': 'src'}, python_requires='>=3.8', diff --git a/src/unstructured_client/_hooks/__init__.py b/src/unstructured_client/_hooks/__init__.py new file mode 100644 index 00000000..b2ab14b3 --- /dev/null +++ b/src/unstructured_client/_hooks/__init__.py @@ -0,0 +1,4 @@ +"""Code generated by Speakeasy (https://speakeasyapi.dev). DO NOT EDIT.""" + +from .sdkhooks import * +from .types import * diff --git a/src/unstructured_client/_hooks/sdkhooks.py b/src/unstructured_client/_hooks/sdkhooks.py new file mode 100644 index 00000000..a8f9a583 --- /dev/null +++ b/src/unstructured_client/_hooks/sdkhooks.py @@ -0,0 +1,55 @@ +"""Code generated by Speakeasy (https://speakeasyapi.dev). DO NOT EDIT.""" + +import requests +from .types import SDKInitHook, BeforeRequestContext, BeforeRequestHook, AfterSuccessContext, AfterSuccessHook, AfterErrorContext, AfterErrorHook, Hooks +from typing import List, Optional, Tuple, Union + + +class SDKHooks(Hooks): + sdk_init_hooks: List[SDKInitHook] = [] + before_request_hooks: List[BeforeRequestHook] = [] + after_success_hooks: List[AfterSuccessHook] = [] + after_error_hooks: List[AfterErrorHook] = [] + + def __init__(self): + pass + + def register_sdk_init_hook(self, hook: SDKInitHook) -> None: + self.sdk_init_hooks.append(hook) + + def register_before_request_hook(self, hook: BeforeRequestHook) -> None: + self.before_request_hooks.append(hook) + + def register_after_success_hook(self, hook: AfterSuccessHook) -> None: + self.after_success_hooks.append(hook) + + def register_after_error_hook(self, hook: AfterErrorHook) -> None: + self.after_error_hooks.append(hook) + + def sdk_init(self, base_url: str, client: requests.Session) -> Tuple[str, requests.Session]: + for hook in self.sdk_init_hooks: + base_url, client = hook.sdk_init(base_url, client) + return base_url, client + + def before_request(self, hook_ctx: BeforeRequestContext, request: requests.PreparedRequest) -> Union[requests.PreparedRequest, Exception]: + for hook in self.before_request_hooks: + request = hook.before_request(hook_ctx, request) + if isinstance(request, Exception): + raise request + + return request + + def after_success(self, hook_ctx: AfterSuccessContext, response: requests.Response) -> requests.Response: + for hook in self.after_success_hooks: + response = hook.after_success(hook_ctx, response) + if isinstance(response, Exception): + raise response + return response + + def after_error(self, hook_ctx: AfterErrorContext, response: Optional[requests.Response], error: Optional[Exception]) -> Tuple[Optional[requests.Response], Optional[Exception]]: + for hook in self.after_error_hooks: + result = hook.after_error(hook_ctx, response, error) + if isinstance(result, Exception): + raise result + response, error = result + return response, error diff --git a/src/unstructured_client/_hooks/types.py b/src/unstructured_client/_hooks/types.py new file mode 100644 index 00000000..d2fa8629 --- /dev/null +++ b/src/unstructured_client/_hooks/types.py @@ -0,0 +1,70 @@ +"""Code generated by Speakeasy (https://speakeasyapi.dev). DO NOT EDIT.""" + +import requests as requests_http +from abc import ABC, abstractmethod +from typing import Any, Callable, List, Optional, Tuple, Union + + +class HookContext: + operation_id: str + oauth2_scopes: Optional[List[str]] = None + security_source: Optional[Union[Any, Callable[[], Any]]] = None + + def __init__(self, operation_id: str, oauth2_scopes: Optional[List[str]], security_source: Optional[Union[Any, Callable[[], Any]]]): + self.operation_id = operation_id + self.oauth2_scopes = oauth2_scopes + self.security_source = security_source + + +class BeforeRequestContext(HookContext): + pass + + +class AfterSuccessContext(HookContext): + pass + + +class AfterErrorContext(HookContext): + pass + + +class SDKInitHook(ABC): + @abstractmethod + def sdk_init(self, base_url: str, client: requests_http.Session) -> Tuple[str, requests_http.Session]: + pass + + +class BeforeRequestHook(ABC): + @abstractmethod + def before_request(self, hook_ctx: BeforeRequestContext, request: requests_http.PreparedRequest) -> Union[requests_http.PreparedRequest, Exception]: + pass + + +class AfterSuccessHook(ABC): + @abstractmethod + def after_success(self, hook_ctx: AfterSuccessContext, response: requests_http.Response) -> Union[requests_http.PreparedRequest, Exception]: + pass + + +class AfterErrorHook(ABC): + @abstractmethod + def after_error(self, hook_ctx: AfterErrorContext, response: Optional[requests_http.Response], error: Optional[Exception]) -> Union[Tuple[Optional[requests_http.PreparedRequest], Optional[Exception]], Exception]: + pass + + +class Hooks(ABC): + @abstractmethod + def register_sdk_init_hook(self, hook: SDKInitHook): + pass + + @abstractmethod + def register_before_request_hook(self, hook: BeforeRequestHook): + pass + + @abstractmethod + def register_after_success_hook(self, hook: AfterSuccessHook): + pass + + @abstractmethod + def register_after_error_hook(self, hook: AfterErrorHook): + pass diff --git a/src/unstructured_client/general.py b/src/unstructured_client/general.py index cbd14b1c..431f7ec5 100644 --- a/src/unstructured_client/general.py +++ b/src/unstructured_client/general.py @@ -1,9 +1,12 @@ """Code generated by Speakeasy (https://speakeasyapi.dev). DO NOT EDIT.""" +import requests as requests_http from .sdkconfiguration import SDKConfiguration from typing import Any, List, Optional from unstructured_client import utils +from unstructured_client._hooks import HookContext from unstructured_client.models import errors, operations, shared +from unstructured_client.utils._human_utils import suggest_defining_url_if_401 # human code class General: sdk_configuration: SDKConfiguration @@ -13,14 +16,16 @@ def __init__(self, sdk_config: SDKConfiguration) -> None: + @suggest_defining_url_if_401 # human code def partition(self, request: Optional[shared.PartitionParameters], retries: Optional[utils.RetryConfig] = None) -> operations.PartitionResponse: r"""Pipeline 1""" + hook_ctx = HookContext(operation_id='partition', oauth2_scopes=[], security_source=self.sdk_configuration.security) base_url = utils.template_url(*self.sdk_configuration.get_server_details()) url = base_url + '/general/v0/general' headers = {} req_content_type, data, form = utils.serialize_request_body(request, Optional[shared.PartitionParameters], "request", False, True, 'multipart') - if req_content_type not in ('multipart/form-data', 'multipart/mixed'): + if req_content_type is not None and req_content_type not in ('multipart/form-data', 'multipart/mixed'): headers['content-type'] = req_content_type headers['Accept'] = 'application/json' headers['user-agent'] = self.sdk_configuration.user_agent @@ -30,6 +35,7 @@ def partition(self, request: Optional[shared.PartitionParameters], retries: Opti else: client = utils.configure_security_client(self.sdk_configuration.client, self.sdk_configuration.security) + global_retry_config = self.sdk_configuration.retry_config retry_config = retries if retry_config is None: @@ -39,11 +45,32 @@ def partition(self, request: Optional[shared.PartitionParameters], retries: Opti retry_config = utils.RetryConfig('backoff', utils.BackoffStrategy(500, 60000, 1.5, 900000), True) def do_request(): - return client.request('POST', url, data=data, files=form, headers=headers) + try: + req = self.sdk_configuration.get_hooks().before_request( + hook_ctx, + requests_http.Request('POST', url, data=data, files=form, headers=headers).prepare(), + ) + http_res = client.send(req) + except Exception as e: + _, e = self.sdk_configuration.get_hooks().after_error(hook_ctx, None, e) + raise e + + if utils.match_status_codes(['422','4XX','5XX'], http_res.status_code): + http_res, e = self.sdk_configuration.get_hooks().after_error(hook_ctx, http_res, None) + if e: + raise e + else: + result = self.sdk_configuration.get_hooks().after_success(hook_ctx, http_res) + if isinstance(result, Exception): + raise result + http_res = result + + return http_res http_res = utils.retry(do_request, utils.Retries(retry_config, [ '5xx' ])) + content_type = http_res.headers.get('Content-Type') res = operations.PartitionResponse(status_code=http_res.status_code, content_type=content_type, raw_response=http_res) @@ -66,4 +93,4 @@ def do_request(): return res - \ No newline at end of file + diff --git a/src/unstructured_client/models/shared/partition_parameters.py b/src/unstructured_client/models/shared/partition_parameters.py index cd44e544..b6ec13c7 100644 --- a/src/unstructured_client/models/shared/partition_parameters.py +++ b/src/unstructured_client/models/shared/partition_parameters.py @@ -43,6 +43,10 @@ class PartitionParameters: r"""If chunking strategy is set, cut off new sections after reaching a length of n chars (soft max). Default: 1500""" output_format: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'output_format' }}) r"""The format of the response. Supported formats are application/json and text/csv. Default: application/json.""" + overlap: Optional[int] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'overlap' }}) + r"""A prefix of this many trailing characters from prior text-split chunk is applied to second and later chunks formed from oversized elements by text-splitting. Default: None""" + overlap_all: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'overlap_all' }}) + r"""When True, overlap is also applied to 'normal' chunks formed by combining whole elements. Use with caution as this can introduce noise into otherwise clean semantic units. Default: None""" pdf_infer_table_structure: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'pdf_infer_table_structure' }}) r"""If True and strategy=hi_res, any Table Elements extracted from a PDF will include an additional metadata field, 'text_as_html', where the value (string) is a just a transformation of the data into an HTML
.""" skip_infer_table_types: Optional[List[str]] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'skip_infer_table_types' }}) diff --git a/src/unstructured_client/sdk.py b/src/unstructured_client/sdk.py index 18fb5cb1..4dd9b7ba 100644 --- a/src/unstructured_client/sdk.py +++ b/src/unstructured_client/sdk.py @@ -5,7 +5,9 @@ from .sdkconfiguration import SDKConfiguration from typing import Callable, Dict, Union from unstructured_client import utils +from unstructured_client._hooks import SDKHooks from unstructured_client.models import shared +from unstructured_client.utils._human_utils import clean_server_url # human code class UnstructuredClient: r"""Unstructured Pipeline API: Partition documents with the Unstructured library""" @@ -13,6 +15,7 @@ class UnstructuredClient: sdk_configuration: SDKConfiguration + @clean_server_url # human code def __init__(self, api_key_auth: Union[str, Callable[[], str]], server: str = None, @@ -50,9 +53,19 @@ def security(): server_url = utils.template_url(server_url, url_params) self.sdk_configuration = SDKConfiguration(client, security, server_url, server, retry_config=retry_config) + + hooks = SDKHooks() + + current_server_url, *_ = self.sdk_configuration.get_server_details() + server_url, self.sdk_configuration.client = hooks.sdk_init(current_server_url, self.sdk_configuration.client) + if current_server_url != server_url: + self.sdk_configuration.server_url = server_url + + # pylint: disable=protected-access + self.sdk_configuration._hooks=hooks self._init_sdks() def _init_sdks(self): self.general = General(self.sdk_configuration) - \ No newline at end of file + diff --git a/src/unstructured_client/sdkconfiguration.py b/src/unstructured_client/sdkconfiguration.py index 50de515b..573e64c9 100644 --- a/src/unstructured_client/sdkconfiguration.py +++ b/src/unstructured_client/sdkconfiguration.py @@ -2,6 +2,7 @@ import requests as requests_http +from ._hooks import SDKHooks from .utils import utils from .utils.retries import RetryConfig from dataclasses import dataclass @@ -28,10 +29,11 @@ class SDKConfiguration: server: str = '' language: str = 'python' openapi_doc_version: str = '0.0.1' - sdk_version: str = '0.20.0' - gen_version: str = '2.263.3' - user_agent: str = 'speakeasy-sdk/python 0.20.0 2.263.3 0.0.1 unstructured-client' + sdk_version: str = '0.21.0' + gen_version: str = '2.277.0' + user_agent: str = 'speakeasy-sdk/python 0.21.0 2.277.0 0.0.1 unstructured-client' retry_config: RetryConfig = None + _hooks: SDKHooks = None def get_server_details(self) -> Tuple[str, Dict[str, str]]: if self.server_url: @@ -40,3 +42,7 @@ def get_server_details(self) -> Tuple[str, Dict[str, str]]: self.server = SERVER_PROD return SERVERS[self.server], {} + + + def get_hooks(self) -> SDKHooks: + return self._hooks diff --git a/src/unstructured_client/utils/retries.py b/src/unstructured_client/utils/retries.py index 8eba0940..6ffe8942 100644 --- a/src/unstructured_client/utils/retries.py +++ b/src/unstructured_client/utils/retries.py @@ -6,6 +6,8 @@ import requests +from unstructured_client.utils._human_utils import log_retries # human code + class BackoffStrategy: initial_interval: int @@ -116,5 +118,6 @@ def retry_with_backoff(func, initial_interval=500, max_interval=60000, exponent= exponent**retries + random.uniform(0, 1)) if sleep > max_interval/1000: sleep = max_interval/1000 + log_retries(retry_count=retries+1, sleep=sleep, exception=exception) # human code time.sleep(sleep) retries += 1 diff --git a/src/unstructured_client/utils/utils.py b/src/unstructured_client/utils/utils.py index 52434025..691091c8 100644 --- a/src/unstructured_client/utils/utils.py +++ b/src/unstructured_client/utils/utils.py @@ -21,15 +21,16 @@ class SecurityClient: client: requests.Session query_params: Dict[str, str] = {} + headers: Dict[str, str] = {} def __init__(self, client: requests.Session): self.client = client - def request(self, method, url, **kwargs): - params = kwargs.get('params', {}) - kwargs["params"] = {**self.query_params, **params} + def send(self, request: requests.PreparedRequest, **kwargs): + request.prepare_url(url=request.url, params=self.query_params) + request.headers.update(self.headers) - return self.client.request(method, url, **kwargs) + return self.client.send(request, **kwargs) def configure_security_client(client: requests.Session, security: dataclass): @@ -102,20 +103,19 @@ def _parse_security_scheme_value(client: SecurityClient, scheme_metadata: Dict, if scheme_type == "apiKey": if sub_type == 'header': - client.client.headers[header_name] = value + client.headers[header_name] = value elif sub_type == 'query': client.query_params[header_name] = value - elif sub_type == 'cookie': - client.client.cookies[header_name] = value else: raise Exception('not supported') elif scheme_type == "openIdConnect": - client.client.headers[header_name] = _apply_bearer(value) + client.headers[header_name] = _apply_bearer(value) elif scheme_type == 'oauth2': - client.client.headers[header_name] = _apply_bearer(value) + if sub_type != 'client_credentials': + client.headers[header_name] = _apply_bearer(value) elif scheme_type == 'http': if sub_type == 'bearer': - client.client.headers[header_name] = _apply_bearer(value) + client.headers[header_name] = _apply_bearer(value) else: raise Exception('not supported') else: @@ -145,7 +145,7 @@ def _parse_basic_auth_scheme(client: SecurityClient, scheme: dataclass): password = value data = f'{username}:{password}'.encode() - client.client.headers['Authorization'] = f'Basic {base64.b64encode(data).decode()}' + client.headers['Authorization'] = f'Basic {base64.b64encode(data).decode()}' def generate_url(clazz: type, server_url: str, path: str, path_params: dataclass, @@ -264,7 +264,8 @@ def get_query_params(clazz: type, query_params: dataclass, gbls: Dict[str, Dict[ f_name = metadata.get("field_name") serialization = metadata.get('serialization', '') if serialization != '': - serialized_parms = _get_serialized_params(metadata, field.type, f_name, value) + serialized_parms = _get_serialized_params( + metadata, field.type, f_name, value) for key, value in serialized_parms.items(): if key in params: params[key].extend(value) @@ -312,7 +313,8 @@ def _get_serialized_params(metadata: Dict, field_type: type, field_name: str, ob serialization = metadata.get('serialization', '') if serialization == 'json': - params[metadata.get("field_name", field_name)] = marshal_json(obj, field_type) + params[metadata.get("field_name", field_name) + ] = marshal_json(obj, field_type) return params @@ -702,7 +704,8 @@ def unmarshal_json(data, typ, decoder=None): def marshal_json(val, typ, encoder=None): if not is_optional_type(typ) and val is None: - raise ValueError(f"Could not marshal None into non-optional type: {typ}") + raise ValueError( + f"Could not marshal None into non-optional type: {typ}") marshal = make_dataclass('Marshal', [('res', typ)], bases=(DataClassJsonMixin,)) @@ -732,6 +735,16 @@ def match_content_type(content_type: str, pattern: str) -> boolean: return False +def match_status_codes(status_codes: List[str], status_code: int) -> bool: + for code in status_codes: + if code == str(status_code): + return True + + if code.endswith("XX") and code.startswith(str(status_code)[:1]): + return True + return False + + def datetimeisoformat(optional: bool): def isoformatoptional(val): if optional and val is None: @@ -835,6 +848,7 @@ def list_decode(val: List): return list_decode + def union_encoder(all_encoders: Dict[str, Callable]): def selective_encoder(val: any): if type(val) in all_encoders: @@ -842,6 +856,7 @@ def selective_encoder(val: any): return val return selective_encoder + def union_decoder(all_decoders: List[Callable]): def selective_decoder(val: any): decoded = val @@ -854,6 +869,7 @@ def selective_decoder(val: any): return decoded return selective_decoder + def get_field_name(name): def override(_, _field_name=name): return _field_name