From 9f872a3d8cde39b2a25e5b3fe12396a066251928 Mon Sep 17 00:00:00 2001 From: Austin Walker Date: Wed, 14 Aug 2024 18:16:21 -0400 Subject: [PATCH] Generate SDK v2 --- .gitignore | 2 +- .speakeasy/gen.lock | 71 +- README.md | 187 ++- USAGE.md | 64 +- docs/models/operations/partitionrequest.md | 2 +- docs/models/operations/partitionresponse.md | 12 +- docs/models/shared/files.md | 9 +- docs/models/shared/partitionparameters.md | 22 +- gen.yaml | 6 +- pylintrc | 43 +- pyproject.toml | 50 + scripts/publish.sh | 8 +- setup.py | 61 - src/unstructured_client/__init__.py | 2 +- src/unstructured_client/_hooks/__init__.py | 2 +- src/unstructured_client/_hooks/sdkhooks.py | 16 +- src/unstructured_client/_hooks/types.py | 14 +- src/unstructured_client/basesdk.py | 213 ++++ src/unstructured_client/general.py | 246 ++-- src/unstructured_client/httpclient.py | 78 ++ src/unstructured_client/models/__init__.py | 4 - .../models/errors/__init__.py | 10 +- .../models/errors/httpvalidationerror.py | 27 +- .../models/errors/sdkerror.py | 26 +- .../models/errors/servererror.py | 21 +- .../models/operations/__init__.py | 6 +- .../models/operations/partition.py | 77 +- .../models/shared/__init__.py | 10 +- .../models/shared/partition_parameters.py | 206 ++- .../models/shared/security.py | 16 +- .../models/shared/validationerror.py | 31 +- src/unstructured_client/py.typed | 1 + src/unstructured_client/sdk.py | 91 +- src/unstructured_client/sdkconfiguration.py | 47 +- src/unstructured_client/types/__init__.py | 21 + src/unstructured_client/types/basemodel.py | 35 + src/unstructured_client/utils/__init__.py | 78 +- src/unstructured_client/utils/annotations.py | 19 + src/unstructured_client/utils/enums.py | 25 +- .../utils/eventstreaming.py | 179 +++ src/unstructured_client/utils/forms.py | 207 +++ src/unstructured_client/utils/headers.py | 136 ++ src/unstructured_client/utils/metadata.py | 118 ++ src/unstructured_client/utils/queryparams.py | 203 +++ .../utils/requestbodies.py | 66 + src/unstructured_client/utils/retries.py | 131 +- src/unstructured_client/utils/security.py | 168 +++ src/unstructured_client/utils/serializers.py | 158 +++ src/unstructured_client/utils/url.py | 150 +++ src/unstructured_client/utils/utils.py | 1116 ----------------- src/unstructured_client/utils/values.py | 128 ++ 51 files changed, 2985 insertions(+), 1634 deletions(-) create mode 100644 pyproject.toml delete mode 100644 setup.py create mode 100644 src/unstructured_client/basesdk.py create mode 100644 src/unstructured_client/httpclient.py delete mode 100644 src/unstructured_client/models/__init__.py create mode 100644 src/unstructured_client/py.typed create mode 100644 src/unstructured_client/types/__init__.py create mode 100644 src/unstructured_client/types/basemodel.py create mode 100644 src/unstructured_client/utils/annotations.py create mode 100644 src/unstructured_client/utils/eventstreaming.py create mode 100644 src/unstructured_client/utils/forms.py create mode 100644 src/unstructured_client/utils/headers.py create mode 100644 src/unstructured_client/utils/metadata.py create mode 100644 src/unstructured_client/utils/queryparams.py create mode 100644 src/unstructured_client/utils/requestbodies.py create mode 100644 src/unstructured_client/utils/security.py create mode 100644 src/unstructured_client/utils/serializers.py create mode 100644 src/unstructured_client/utils/url.py delete mode 100644 src/unstructured_client/utils/utils.py create mode 100644 src/unstructured_client/utils/values.py diff --git a/.gitignore b/.gitignore index 6545cd8f..f647ef8d 100755 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,10 @@ +pyrightconfig.json venv/ src/*.egg-info/ __pycache__/ .pytest_cache/ .python-version .DS_Store - # human-added igore files .ipynb_checkpoints/ .idea/ diff --git a/.speakeasy/gen.lock b/.speakeasy/gen.lock index 925d1ce0..3218f261 100755 --- a/.speakeasy/gen.lock +++ b/.speakeasy/gen.lock @@ -1,57 +1,76 @@ lockVersion: 2.0.0 id: 8b5fa338-9106-4734-abf0-e30d67044a90 management: - docChecksum: 17bd23e4247d7b65a92813afd1252693 + docChecksum: a6ff17ff485bb4b5884d75af244e18a1 docVersion: 1.0.44 - speakeasyVersion: 1.361.1 - generationVersion: 2.393.4 - releaseVersion: 0.25.5 - configChecksum: 6b4c1555edde75f4f1e422e49a07c208 + speakeasyVersion: 1.346.0 + generationVersion: 2.379.3 + releaseVersion: 0.26.0-beta + configChecksum: 96c9dbe127b795111b840819ebcc996d repoURL: https://github.com/Unstructured-IO/unstructured-python-client.git repoSubDirectory: . installationURL: https://github.com/Unstructured-IO/unstructured-python-client.git published: true features: python: - additionalDependencies: 0.1.0 - constsAndDefaults: 0.1.4 - core: 4.8.4 - examples: 2.81.3 - globalSecurity: 2.83.7 - globalSecurityCallbacks: 0.1.0 - globalSecurityFlattening: 0.1.0 - globalServerURLs: 2.82.2 - nameOverrides: 2.81.2 - nullables: 0.1.0 - openEnums: 0.1.0 - responseFormat: 0.1.0 - retries: 2.82.2 - sdkHooks: 0.1.0 - serverIDs: 2.81.1 - unions: 2.82.9 + additionalDependencies: 1.0.0 + constsAndDefaults: 1.0.0 + core: 5.2.4 + defaultEnabledRetries: 0.2.0 + envVarSecurityUsage: 0.2.0 + examples: 3.0.0 + globalSecurity: 3.0.0 + globalSecurityCallbacks: 1.0.0 + globalSecurityFlattening: 1.0.0 + globalServerURLs: 3.0.0 + multipartFileContentType: 1.0.0 + nameOverrides: 3.0.0 + nullables: 1.0.0 + openEnums: 1.0.0 + responseFormat: 1.0.0 + retries: 3.0.0 + sdkHooks: 1.0.0 + serverIDs: 3.0.0 + unions: 3.0.1 + uploadStreams: 1.0.0 generatedFiles: - src/unstructured_client/sdkconfiguration.py - src/unstructured_client/general.py - src/unstructured_client/sdk.py + - .vscode/settings.json - py.typed - pylintrc + - pyproject.toml - scripts/publish.sh - - setup.py - src/unstructured_client/__init__.py + - src/unstructured_client/basesdk.py + - src/unstructured_client/httpclient.py + - src/unstructured_client/py.typed + - src/unstructured_client/types/__init__.py + - src/unstructured_client/types/basemodel.py - src/unstructured_client/utils/__init__.py + - src/unstructured_client/utils/annotations.py - src/unstructured_client/utils/enums.py + - src/unstructured_client/utils/eventstreaming.py + - src/unstructured_client/utils/forms.py + - src/unstructured_client/utils/headers.py + - src/unstructured_client/utils/metadata.py + - src/unstructured_client/utils/queryparams.py + - src/unstructured_client/utils/requestbodies.py - src/unstructured_client/utils/retries.py - - src/unstructured_client/utils/utils.py + - src/unstructured_client/utils/security.py + - src/unstructured_client/utils/serializers.py + - src/unstructured_client/utils/url.py + - src/unstructured_client/utils/values.py - src/unstructured_client/models/errors/sdkerror.py - src/unstructured_client/models/operations/partition.py + - src/unstructured_client/models/operations/__init__.py - src/unstructured_client/models/errors/httpvalidationerror.py - src/unstructured_client/models/errors/servererror.py + - src/unstructured_client/models/errors/__init__.py - src/unstructured_client/models/shared/validationerror.py - src/unstructured_client/models/shared/partition_parameters.py - src/unstructured_client/models/shared/security.py - - src/unstructured_client/models/__init__.py - - src/unstructured_client/models/errors/__init__.py - - src/unstructured_client/models/operations/__init__.py - src/unstructured_client/models/shared/__init__.py - docs/models/operations/partitionrequest.md - docs/models/operations/partitionresponse.md diff --git a/README.md b/README.md index 98ba146b..921323c3 100755 --- a/README.md +++ b/README.md @@ -30,9 +30,15 @@ Please refer to the [Unstructured docs](https://docs.unstructured.io/api-referen ## SDK Installation +PIP ```bash pip install unstructured-client ``` + +Poetry +```bash +poetry add unstructured-client +``` ## SDK Example Usage @@ -131,28 +137,30 @@ Some of the endpoints in this SDK support retries. If you use the SDK without an To change the default retry strategy for a single API call, simply provide a `RetryConfig` object to the call: ```python -import unstructured_client -from unstructured_client.models import operations, shared +from unstructured_client import UnstructuredClient +from unstructured_client.models import shared from unstructured_client.utils import BackoffStrategy, RetryConfig -s = unstructured_client.UnstructuredClient() +s = UnstructuredClient( + api_key_auth="YOUR_API_KEY", +) -res = s.general.partition(request=operations.PartitionRequest( - partition_parameters=shared.PartitionParameters( - files=shared.Files( - content='0x2cC94b2FEF'.encode(), - file_name='your_file_here', - ), - chunking_strategy=shared.ChunkingStrategy.BY_TITLE, - split_pdf_page_range=[ +res = s.general.partition(request={ + "partition_parameters": { + "files": { + "content": open("", "rb"), + "file_name": "your_file_here", + }, + "chunking_strategy": shared.ChunkingStrategy.BY_TITLE, + "split_pdf_page_range": [ 1, 10, ], - strategy=shared.Strategy.HI_RES, - ), -), - RetryConfig('backoff', BackoffStrategy(1, 50, 1.1, 100), False)) + "strategy": shared.Strategy.HI_RES, + }, +}, + RetryConfig("backoff", BackoffStrategy(1, 50, 1.1, 100), False)) if res.elements is not None: # handle response @@ -162,29 +170,30 @@ if res.elements is not None: If you'd like to override the default retry strategy for all operations that support retries, you can use the `retry_config` optional parameter when initializing the SDK: ```python -import unstructured_client -from unstructured_client.models import operations, shared +from unstructured_client import UnstructuredClient +from unstructured_client.models import shared from unstructured_client.utils import BackoffStrategy, RetryConfig -s = unstructured_client.UnstructuredClient( - retry_config=RetryConfig('backoff', BackoffStrategy(1, 50, 1.1, 100), False), +s = UnstructuredClient( + retry_config=RetryConfig("backoff", BackoffStrategy(1, 50, 1.1, 100), False), + api_key_auth="YOUR_API_KEY", ) -res = s.general.partition(request=operations.PartitionRequest( - partition_parameters=shared.PartitionParameters( - files=shared.Files( - content='0x2cC94b2FEF'.encode(), - file_name='your_file_here', - ), - chunking_strategy=shared.ChunkingStrategy.BY_TITLE, - split_pdf_page_range=[ +res = s.general.partition(request={ + "partition_parameters": { + "files": { + "content": open("", "rb"), + "file_name": "your_file_here", + }, + "chunking_strategy": shared.ChunkingStrategy.BY_TITLE, + "split_pdf_page_range": [ 1, 10, ], - strategy=shared.Strategy.HI_RES, - ), -)) + "strategy": shared.Strategy.HI_RES, + }, +}) if res.elements is not None: # handle response @@ -196,16 +205,81 @@ if res.elements is not None: ## Custom HTTP Client -The Python SDK makes API calls using the [requests](https://pypi.org/project/requests/) HTTP library. In order to provide a convenient way to configure timeouts, cookies, proxies, custom headers, and other low-level configuration, you can initialize the SDK client with a custom `requests.Session` object. +The Python SDK makes API calls using the [httpx](https://www.python-httpx.org/) HTTP library. In order to provide a convenient way to configure timeouts, cookies, proxies, custom headers, and other low-level configuration, you can initialize the SDK client with your own HTTP client instance. +Depending on whether you are using the sync or async version of the SDK, you can pass an instance of `HttpClient` or `AsyncHttpClient` respectively, which are Protocol's ensuring that the client has the necessary methods to make API calls. +This allows you to wrap the client with your own custom logic, such as adding custom headers, logging, or error handling, or you can just pass an instance of `httpx.Client` or `httpx.AsyncClient` directly. For example, you could specify a header for every request that this sdk makes as follows: ```python -import unstructured_client -import requests +from unstructured_client import UnstructuredClient +import httpx + +http_client = httpx.Client(headers={"x-custom-header": "someValue"}) +s = UnstructuredClient(client=http_client) +``` -http_client = requests.Session() -http_client.headers.update({'x-custom-header': 'someValue'}) -s = unstructured_client.UnstructuredClient(client=http_client) +or you could wrap the client with your own custom logic: +```python +from unstructured_client import UnstructuredClient +from unstructured_client.httpclient import AsyncHttpClient +import httpx + +class CustomClient(AsyncHttpClient): + client: AsyncHttpClient + + def __init__(self, client: AsyncHttpClient): + self.client = client + + async def send( + self, + request: httpx.Request, + *, + stream: bool = False, + auth: Union[ + httpx._types.AuthTypes, httpx._client.UseClientDefault, None + ] = httpx.USE_CLIENT_DEFAULT, + follow_redirects: Union[ + bool, httpx._client.UseClientDefault + ] = httpx.USE_CLIENT_DEFAULT, + ) -> httpx.Response: + request.headers["Client-Level-Header"] = "added by client" + + return await self.client.send( + request, stream=stream, auth=auth, follow_redirects=follow_redirects + ) + + def build_request( + self, + method: str, + url: httpx._types.URLTypes, + *, + content: Optional[httpx._types.RequestContent] = None, + data: Optional[httpx._types.RequestData] = None, + files: Optional[httpx._types.RequestFiles] = None, + json: Optional[Any] = None, + params: Optional[httpx._types.QueryParamTypes] = None, + headers: Optional[httpx._types.HeaderTypes] = None, + cookies: Optional[httpx._types.CookieTypes] = None, + timeout: Union[ + httpx._types.TimeoutTypes, httpx._client.UseClientDefault + ] = httpx.USE_CLIENT_DEFAULT, + extensions: Optional[httpx._types.RequestExtensions] = None, + ) -> httpx.Request: + return self.client.build_request( + method, + url, + content=content, + data=data, + files=files, + json=json, + params=params, + headers=headers, + cookies=cookies, + timeout=timeout, + extensions=extensions, + ) + +s = UnstructuredClient(async_client=CustomClient(httpx.AsyncClient())) ``` @@ -216,6 +290,47 @@ s = unstructured_client.UnstructuredClient(client=http_client) + +## File uploads + +Certain SDK methods accept file objects as part of a request body or multi-part request. It is possible and typically recommended to upload files as a stream rather than reading the entire contents into memory. This avoids excessive memory consumption and potentially crashing with out-of-memory errors when working with very large files. The following example demonstrates how to attach a file stream to a request. + +> [!TIP] +> +> For endpoints that handle file uploads bytes arrays can also be used. However, using streams is recommended for large files. +> + +```python +from unstructured_client import UnstructuredClient +from unstructured_client.models import shared + +s = UnstructuredClient( + api_key_auth="YOUR_API_KEY", +) + + +res = s.general.partition(request={ + "partition_parameters": { + "files": { + "content": open("", "rb"), + "file_name": "your_file_here", + }, + "chunking_strategy": shared.ChunkingStrategy.BY_TITLE, + "split_pdf_page_range": [ + 1, + 10, + ], + "strategy": shared.Strategy.HI_RES, + }, +}) + +if res.elements is not None: + # handle response + pass + +``` + + ### Maturity diff --git a/USAGE.md b/USAGE.md index d5cb8629..63ba6954 100644 --- a/USAGE.md +++ b/USAGE.md @@ -1,29 +1,65 @@ ```python -import unstructured_client -from unstructured_client.models import operations, shared +# Synchronous Example +from unstructured_client import UnstructuredClient +from unstructured_client.models import shared -s = unstructured_client.UnstructuredClient() +s = UnstructuredClient( + api_key_auth="YOUR_API_KEY", +) -res = s.general.partition(request=operations.PartitionRequest( - partition_parameters=shared.PartitionParameters( - files=shared.Files( - content='0x2cC94b2FEF'.encode(), - file_name='your_file_here', - ), - chunking_strategy=shared.ChunkingStrategy.BY_TITLE, - split_pdf_page_range=[ +res = s.general.partition(request={ + "partition_parameters": { + "files": { + "content": open("", "rb"), + "file_name": "your_file_here", + }, + "chunking_strategy": shared.ChunkingStrategy.BY_TITLE, + "split_pdf_page_range": [ 1, 10, ], - strategy=shared.Strategy.HI_RES, - ), -)) + "strategy": shared.Strategy.HI_RES, + }, +}) if res.elements is not None: # handle response pass +``` + +
+ +The same SDK client can also be used to make asychronous requests by importing asyncio. +```python +# Asynchronous Example +import asyncio +from unstructured_client import UnstructuredClient +from unstructured_client.models import shared + +async def main(): + s = UnstructuredClient( + api_key_auth="YOUR_API_KEY", + ) + res = await s.general.partition_async(request={ + "partition_parameters": { + "files": { + "content": open("", "rb"), + "file_name": "your_file_here", + }, + "chunking_strategy": shared.ChunkingStrategy.BASIC, + "split_pdf_page_range": [ + 1, + 10, + ], + "strategy": shared.Strategy.AUTO, + }, + }) + if res.elements is not None: + # handle response + pass +asyncio.run(main()) ``` \ No newline at end of file diff --git a/docs/models/operations/partitionrequest.md b/docs/models/operations/partitionrequest.md index 1965803f..7e0f9e8d 100644 --- a/docs/models/operations/partitionrequest.md +++ b/docs/models/operations/partitionrequest.md @@ -6,4 +6,4 @@ | Field | Type | Required | Description | | ------------------------------------------------------------------------ | ------------------------------------------------------------------------ | ------------------------------------------------------------------------ | ------------------------------------------------------------------------ | | `partition_parameters` | [shared.PartitionParameters](../../models/shared/partitionparameters.md) | :heavy_check_mark: | N/A | -| `unstructured_api_key` | *Optional[str]* | :heavy_minus_sign: | N/A | \ No newline at end of file +| `unstructured_api_key` | *OptionalNullable[str]* | :heavy_minus_sign: | N/A | \ No newline at end of file diff --git a/docs/models/operations/partitionresponse.md b/docs/models/operations/partitionresponse.md index f010516c..b2beb267 100644 --- a/docs/models/operations/partitionresponse.md +++ b/docs/models/operations/partitionresponse.md @@ -3,9 +3,9 @@ ## Fields -| Field | Type | Required | Description | -| ------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------- | -| `content_type` | *str* | :heavy_check_mark: | HTTP response content type for this operation | -| `status_code` | *int* | :heavy_check_mark: | HTTP response status code for this operation | -| `raw_response` | [requests.Response](https://requests.readthedocs.io/en/latest/api/#requests.Response) | :heavy_check_mark: | Raw HTTP response; suitable for custom response parsing | -| `elements` | List[Dict[str, *Any*]] | :heavy_minus_sign: | Successful Response | \ No newline at end of file +| Field | Type | Required | Description | +| ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | ------------------------------------------------------------ | +| `content_type` | *str* | :heavy_check_mark: | HTTP response content type for this operation | +| `status_code` | *int* | :heavy_check_mark: | HTTP response status code for this operation | +| `raw_response` | [httpx.Response](https://www.python-httpx.org/api/#response) | :heavy_check_mark: | Raw HTTP response; suitable for custom response parsing | +| `elements` | List[Dict[str, *Any*]] | :heavy_minus_sign: | Successful Response | \ No newline at end of file diff --git a/docs/models/shared/files.md b/docs/models/shared/files.md index 27d480a0..bbe16fab 100644 --- a/docs/models/shared/files.md +++ b/docs/models/shared/files.md @@ -3,7 +3,8 @@ ## Fields -| Field | Type | Required | Description | -| ------------------ | ------------------ | ------------------ | ------------------ | -| `content` | *bytes* | :heavy_check_mark: | N/A | -| `file_name` | *str* | :heavy_check_mark: | N/A | \ No newline at end of file +| Field | Type | Required | Description | +| -------------------------------------------- | -------------------------------------------- | -------------------------------------------- | -------------------------------------------- | +| `content` | *Union[bytes, IO[bytes], io.BufferedReader]* | :heavy_check_mark: | N/A | +| `file_name` | *str* | :heavy_check_mark: | N/A | +| `content_type` | *Optional[str]* | :heavy_minus_sign: | N/A | \ No newline at end of file diff --git a/docs/models/shared/partitionparameters.md b/docs/models/shared/partitionparameters.md index 1f8369a7..f0c46215 100644 --- a/docs/models/shared/partitionparameters.md +++ b/docs/models/shared/partitionparameters.md @@ -6,32 +6,32 @@ | Field | Type | Required | Description | Example | | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `files` | [shared.Files](../../models/shared/files.md) | :heavy_check_mark: | The file to extract | | -| `chunking_strategy` | [Optional[shared.ChunkingStrategy]](../../models/shared/chunkingstrategy.md) | :heavy_minus_sign: | Use one of the supported strategies to chunk the returned elements after partitioning. When 'chunking_strategy' is not specified, no chunking is performed and any other chunking parameters provided are ignored. Supported strategies: 'basic', 'by_page', 'by_similarity', or 'by_title' | by_title | -| `combine_under_n_chars` | *Optional[int]* | :heavy_minus_sign: | If chunking strategy is set, combine elements until a section reaches a length of n chars. Default: 500 | | -| `content_type` | *Optional[str]* | :heavy_minus_sign: | A hint about the content type to use (such as text/markdown), when there are problems processing a specific file. This value is a MIME type in the format type/subtype. | | +| `chunking_strategy` | [OptionalNullable[shared.ChunkingStrategy]](../../models/shared/chunkingstrategy.md) | :heavy_minus_sign: | Use one of the supported strategies to chunk the returned elements after partitioning. When 'chunking_strategy' is not specified, no chunking is performed and any other chunking parameters provided are ignored. Supported strategies: 'basic', 'by_page', 'by_similarity', or 'by_title' | by_title | +| `combine_under_n_chars` | *OptionalNullable[int]* | :heavy_minus_sign: | If chunking strategy is set, combine elements until a section reaches a length of n chars. Default: 500 | | +| `content_type` | *OptionalNullable[str]* | :heavy_minus_sign: | A hint about the content type to use (such as text/markdown), when there are problems processing a specific file. This value is a MIME type in the format type/subtype. | | | `coordinates` | *Optional[bool]* | :heavy_minus_sign: | If `True`, return coordinates for each element extracted via OCR. Default: `False` | | -| `encoding` | *Optional[str]* | :heavy_minus_sign: | The encoding method used to decode the text input. Default: utf-8 | | +| `encoding` | *OptionalNullable[str]* | :heavy_minus_sign: | The encoding method used to decode the text input. Default: utf-8 | | | `extract_image_block_types` | List[*str*] | :heavy_minus_sign: | The types of elements to extract, for use in extracting image blocks as base64 encoded data stored in metadata fields. | | -| `gz_uncompressed_content_type` | *Optional[str]* | :heavy_minus_sign: | If file is gzipped, use this content type after unzipping. | | -| `hi_res_model_name` | *Optional[str]* | :heavy_minus_sign: | The name of the inference model used when strategy is hi_res | | -| `include_orig_elements` | *Optional[bool]* | :heavy_minus_sign: | When a chunking strategy is specified, each returned chunk will include the elements consolidated to form that chunk as `.metadata.orig_elements`. Default: true. | | +| `gz_uncompressed_content_type` | *OptionalNullable[str]* | :heavy_minus_sign: | If file is gzipped, use this content type after unzipping. | | +| `hi_res_model_name` | *OptionalNullable[str]* | :heavy_minus_sign: | The name of the inference model used when strategy is hi_res | | +| `include_orig_elements` | *OptionalNullable[bool]* | :heavy_minus_sign: | When a chunking strategy is specified, each returned chunk will include the elements consolidated to form that chunk as `.metadata.orig_elements`. Default: true. | | | `include_page_breaks` | *Optional[bool]* | :heavy_minus_sign: | If true, the output will include page breaks if the filetype supports it. Default: false | | | `languages` | List[*str*] | :heavy_minus_sign: | The languages present in the document, for use in partitioning and/or OCR. See the Tesseract documentation for a full list of languages. | | -| `max_characters` | *Optional[int]* | :heavy_minus_sign: | If chunking strategy is set, cut off new sections after reaching a length of n chars (hard max). Default: 500 | | +| `max_characters` | *OptionalNullable[int]* | :heavy_minus_sign: | If chunking strategy is set, cut off new sections after reaching a length of n chars (hard max). Default: 500 | | | `multipage_sections` | *Optional[bool]* | :heavy_minus_sign: | If chunking strategy is set, determines if sections can span multiple sections. Default: true | | -| `new_after_n_chars` | *Optional[int]* | :heavy_minus_sign: | If chunking strategy is set, cut off new sections after reaching a length of n chars (soft max). Default: 1500 | | +| `new_after_n_chars` | *OptionalNullable[int]* | :heavy_minus_sign: | If chunking strategy is set, cut off new sections after reaching a length of n chars (soft max). Default: 1500 | | | `ocr_languages` | List[*str*] | :heavy_minus_sign: | Deprecated! The languages present in the document, for use in partitioning and/or OCR | | | `output_format` | [Optional[shared.OutputFormat]](../../models/shared/outputformat.md) | :heavy_minus_sign: | The format of the response. Supported formats are application/json and text/csv. Default: application/json. | | | `overlap` | *Optional[int]* | :heavy_minus_sign: | Specifies the length of a string ('tail') to be drawn from each chunk and prefixed to the next chunk as a context-preserving mechanism. By default, this only applies to split-chunks where an oversized element is divided into multiple chunks by text-splitting. Default: 0 | | | `overlap_all` | *Optional[bool]* | :heavy_minus_sign: | When `True`, apply overlap between 'normal' chunks formed from whole elements and not subject to text-splitting. Use this with caution as it entails a certain level of 'pollution' of otherwise clean semantic chunk boundaries. Default: False | | | `pdf_infer_table_structure` | *Optional[bool]* | :heavy_minus_sign: | Deprecated! Use skip_infer_table_types to opt out of table extraction for any file type. If False and strategy=hi_res, no Table Elements will be extracted from pdf files regardless of skip_infer_table_types contents. | | -| `similarity_threshold` | *Optional[float]* | :heavy_minus_sign: | A value between 0.0 and 1.0 describing the minimum similarity two elements must have to be included in the same chunk. Note that similar elements may be separated to meet chunk-size criteria; this value can only guarantees that two elements with similarity below the threshold will appear in separate chunks. | | +| `similarity_threshold` | *OptionalNullable[float]* | :heavy_minus_sign: | A value between 0.0 and 1.0 describing the minimum similarity two elements must have to be included in the same chunk. Note that similar elements may be separated to meet chunk-size criteria; this value can only guarantees that two elements with similarity below the threshold will appear in separate chunks. | | | `skip_infer_table_types` | List[*str*] | :heavy_minus_sign: | The document types that you want to skip table extraction with. Default: [] | | | `split_pdf_allow_failed` | *Optional[bool]* | :heavy_minus_sign: | When `split_pdf_page` is set to `True`, this parameter defines the behavior when some of the parallel requests fail. By default `split_pdf_allow_failed` is set to `False` and any failed request send to the API will make the whole process break and raise an Exception. If `split_pdf_allow_failed` is set to `True`, the errors encountered while sending parallel requests will not break the processing - the resuling list of Elements will miss the data from errored pages. | | | `split_pdf_concurrency_level` | *Optional[int]* | :heavy_minus_sign: | When `split_pdf_page` is set to `True`, this parameter specifies the number of workers used for sending requests when the PDF is split on the client side. It's an internal parameter for the Python client and is not sent to the backend. | | | `split_pdf_page` | *Optional[bool]* | :heavy_minus_sign: | This parameter determines if the PDF file should be split on the client side. It's an internal parameter for the Python client and is not sent to the backend. | | | `split_pdf_page_range` | List[*int*] | :heavy_minus_sign: | When `split_pdf_page is set to `True`, this parameter selects a subset of the pdf to send to the API. The parameter is a list of 2 integers within the range [1, length_of_pdf]. A ValueError is thrown if the given range is invalid. It's an internal parameter for the Python client and is not sent to the backend. | [
1,
10
] | -| `starting_page_number` | *Optional[int]* | :heavy_minus_sign: | When PDF is split into pages before sending it into the API, providing this information will allow the page number to be assigned correctly. Introduced in 1.0.27. | | +| `starting_page_number` | *OptionalNullable[int]* | :heavy_minus_sign: | When PDF is split into pages before sending it into the API, providing this information will allow the page number to be assigned correctly. Introduced in 1.0.27. | | | `strategy` | [Optional[shared.Strategy]](../../models/shared/strategy.md) | :heavy_minus_sign: | The strategy to use for partitioning PDF/image. Options are fast, hi_res, auto. Default: auto | auto | | `unique_element_ids` | *Optional[bool]* | :heavy_minus_sign: | When `True`, assign UUIDs to element IDs, which guarantees their uniqueness (useful when using them as primary keys in database). Otherwise a SHA-256 of element text is used. Default: `False` | | | `xml_keep_tags` | *Optional[bool]* | :heavy_minus_sign: | If `True`, will retain the XML tags in the output. Otherwise it will simply extract the text from within the tags. Only applies to XML documents. | | \ No newline at end of file diff --git a/gen.yaml b/gen.yaml index a4c6eff3..e70ccae6 100644 --- a/gen.yaml +++ b/gen.yaml @@ -10,7 +10,7 @@ generation: auth: oAuth2ClientCredentialsEnabled: false python: - version: 0.25.5 + version: 0.26.0-beta additionalDependencies: dev: {} main: @@ -23,7 +23,9 @@ python: - Unstructured clientServerStatusCodesAsErrors: true description: Python Client SDK for Unstructured API + enumFormat: enum flattenGlobalSecurity: true + flattenRequests: false imports: option: openapi paths: @@ -40,4 +42,4 @@ python: packageName: unstructured-client projectUrls: {} responseFormat: envelope - templateVersion: v2 \ No newline at end of file + templateVersion: v2 diff --git a/pylintrc b/pylintrc index a165907e..2b1e2c8f 100644 --- a/pylintrc +++ b/pylintrc @@ -59,10 +59,11 @@ ignore-paths= # Emacs file locks ignore-patterns=^\.# -# List of module names for which member attributes should not be checked -# (useful for modules/projects where namespaces are manipulated during runtime -# and thus existing member attributes cannot be deduced by static analysis). It -# supports qualified module names, as well as Unix pattern matching. +# List of module names for which member attributes should not be checked and +# will not be imported (useful for modules/projects where namespaces are +# manipulated during runtime and thus existing member attributes cannot be +# deduced by static analysis). It supports qualified module names, as well as +# Unix pattern matching. ignored-modules= # Python code to execute, usually for sys.path manipulation such as @@ -93,6 +94,12 @@ py-version=3.8 # Discover python modules and packages in the file system subtree. recursive=no +# Add paths to the list of the source roots. Supports globbing patterns. The +# source root is an absolute path or a path relative to the current working +# directory used to determine a package namespace for modules located under the +# source root. +source-roots=src + # When enabled, pylint would attempt to guess common misconfiguration and emit # user-friendly hints instead of false-positive error messages. suggestion-mode=yes @@ -224,6 +231,10 @@ no-docstring-rgx=^_ # These decorators are taken in consideration only for invalid-name. property-classes=abc.abstractproperty +# Regular expression matching correct type alias names. If left empty, type +# alias names will be checked with the set naming style. +typealias-rgx=.* + # Regular expression matching correct type variable names. If left empty, type # variable names will be checked with the set naming style. #typevar-rgx= @@ -246,15 +257,12 @@ check-protected-access-in-special-methods=no defining-attr-methods=__init__, __new__, setUp, + asyncSetUp, __post_init__ # List of member names, which should be excluded from the protected access # warning. -exclude-protected=_asdict, - _fields, - _replace, - _source, - _make +exclude-protected=_asdict,_fields,_replace,_source,_make,os._exit # List of valid names for the first argument in a class method. valid-classmethod-first-arg=cls @@ -417,6 +425,8 @@ disable=raw-checker-failed, suppressed-message, useless-suppression, deprecated-pragma, + use-implicit-booleaness-not-comparison-to-string, + use-implicit-booleaness-not-comparison-to-zero, use-symbolic-message-instead, trailing-whitespace, line-too-long, @@ -442,13 +452,15 @@ disable=raw-checker-failed, too-many-boolean-expressions, no-else-raise, bare-except, - broad-exception-caught + broad-exception-caught, + fixme, + consider-using-from-import # Enable the message, report, category or checker with the given id(s). You can # either give multiple identifier separated by comma (,) or put this option # multiple time (only on the command line, not in the configuration file where # it should appear only once). See also the "--disable" option for examples. -enable=c-extension-no-member +enable= [METHOD_ARGS] @@ -494,8 +506,9 @@ evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor # used to format the message information. See doc for all details. msg-template= -# Set the output format. Available formats are text, parseable, colorized, json -# and msvs (visual studio). You can also give a reporter class, e.g. +# Set the output format. Available formats are: text, parseable, colorized, +# json2 (improved json format), json (old json format) and msvs (visual +# studio). You can also give a reporter class, e.g. # mypackage.mymodule.MyReporterClass. #output-format= @@ -529,8 +542,8 @@ min-similarity-lines=4 # Limits count of emitted suggestions for spelling mistakes. max-spelling-suggestions=4 -# Spelling dictionary name. Available dictionaries: none. To make it work, -# install the 'python-enchant' package. +# Spelling dictionary name. No available dictionaries : You need to install +# both the python package and the system dependency for enchant to work. spelling-dict= # List of comma separated words that should be considered directives if they diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..d6d0d6ae --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,50 @@ +[tool.poetry] +name = "unstructured-client" +version = "0.26.0-beta" +description = "Python Client SDK for Unstructured API" +authors = ["Unstructured",] +readme = "README.md" +repository = "https://github.com/Unstructured-IO/unstructured-python-client.git" +license = "MIT" +packages = [ + { include = "unstructured_client", from = "src" } +] +include = ["py.typed", "src/unstructured_client/py.typed"] + +[tool.setuptools.package-data] +"*" = ["py.typed", "src/unstructured_client/py.typed"] + +[tool.poetry.dependencies] +python = "^3.8" +deepdiff = ">=6.0" +httpx = ">=0.27.0" +jsonpath-python = "^1.0.6" +nest-asyncio = ">=1.6.0" +pydantic = "~2.8.2" +pypdf = ">=4.0" +python-dateutil = "^2.9.0.post0" +requests-toolbelt = ">=1.0.0" +typing-inspect = "^0.9.0" + +[tool.poetry.group.dev.dependencies] +mypy = "==1.10.1" +pylint = "==3.2.3" +types-python-dateutil = "^2.9.0.20240316" + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" + +[tool.pytest.ini_options] +pythonpath = ["src"] + +[tool.mypy] +disable_error_code = "misc" + +[[tool.mypy.overrides]] +module = "typing_inspect" +ignore_missing_imports = true + +[[tool.mypy.overrides]] +module = "jsonpath" +ignore_missing_imports = true diff --git a/scripts/publish.sh b/scripts/publish.sh index ed45d8a9..6392f414 100755 --- a/scripts/publish.sh +++ b/scripts/publish.sh @@ -1,9 +1,5 @@ #!/usr/bin/env bash -export TWINE_USERNAME=__token__ -export TWINE_PASSWORD=${PYPI_TOKEN} +export POETRY_PYPI_TOKEN_PYPI=${PYPI_TOKEN} -python -m pip install --upgrade pip -pip install setuptools wheel twine -python setup.py sdist bdist_wheel -twine upload dist/* +poetry publish --build diff --git a/setup.py b/setup.py deleted file mode 100644 index b282502b..00000000 --- a/setup.py +++ /dev/null @@ -1,61 +0,0 @@ -"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" - -import setuptools -import re - -try: - with open('README.md', 'r') as fh: - long_description = fh.read() - GITHUB_URL = 'https://github.com/Unstructured-IO/unstructured-python-client.git' - GITHUB_URL = GITHUB_URL[: -len('.git')] if GITHUB_URL.endswith('.git') else GITHUB_URL - # links on PyPI should have absolute URLs - long_description = re.sub( - r'(\[[^\]]+\]\()((?!https?:)[^\)]+)(\))', - lambda m: m.group(1) + GITHUB_URL + '/blob/master/' + m.group(2) + m.group(3), - long_description, - ) -except FileNotFoundError: - long_description = '' - -setuptools.setup( - name='unstructured-client', - version='0.25.5', - author='Unstructured', - description='Python Client SDK for Unstructured API', - license = 'MIT', - url='https://github.com/Unstructured-IO/unstructured-python-client.git', - long_description=long_description, - long_description_content_type='text/markdown', - packages=setuptools.find_packages(where='src'), - install_requires=[ - "certifi>=2023.7.22", - "charset-normalizer>=3.2.0", - "dataclasses-json>=0.6.4", - "deepdiff>=6.0", - "httpx>=0.27.0", - "idna>=3.4", - "jsonpath-python>=1.0.6", - "marshmallow>=3.19.0", - "mypy-extensions>=1.0.0", - "nest-asyncio>=1.6.0", - "packaging>=23.1", - "pypdf>=4.0", - "python-dateutil>=2.8.2", - "requests>=2.31.0", - "requests-toolbelt>=1.0.0", - "six>=1.16.0", - "typing-inspect>=0.9.0", - "typing_extensions>=4.7.1", - "urllib3>=1.26.18", - ], - extras_require={ - "dev": [ - "pylint==3.1.0", - ], - }, - package_dir={'': 'src'}, - python_requires='>=3.8', - package_data={ - 'unstructured-client': ['py.typed'] - }, -) diff --git a/src/unstructured_client/__init__.py b/src/unstructured_client/__init__.py index d8d60c47..3ba186d9 100644 --- a/src/unstructured_client/__init__.py +++ b/src/unstructured_client/__init__.py @@ -1,4 +1,4 @@ -"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" from .sdk import * from .sdkconfiguration import * diff --git a/src/unstructured_client/_hooks/__init__.py b/src/unstructured_client/_hooks/__init__.py index 2ee66cdd..86ab3098 100644 --- a/src/unstructured_client/_hooks/__init__.py +++ b/src/unstructured_client/_hooks/__init__.py @@ -1,4 +1,4 @@ -"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" from .sdkhooks import * from .types import * diff --git a/src/unstructured_client/_hooks/sdkhooks.py b/src/unstructured_client/_hooks/sdkhooks.py index 1fdae95a..f8bcb621 100644 --- a/src/unstructured_client/_hooks/sdkhooks.py +++ b/src/unstructured_client/_hooks/sdkhooks.py @@ -1,13 +1,13 @@ -"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" -import requests +import httpx from .types import SDKInitHook, BeforeRequestContext, BeforeRequestHook, AfterSuccessContext, AfterSuccessHook, AfterErrorContext, AfterErrorHook, Hooks from .registration import init_hooks from typing import List, Optional, Tuple - +from unstructured_client.httpclient import HttpClient class SDKHooks(Hooks): - def __init__(self): + def __init__(self) -> None: self.sdk_init_hooks: List[SDKInitHook] = [] self.before_request_hooks: List[BeforeRequestHook] = [] self.after_success_hooks: List[AfterSuccessHook] = [] @@ -26,12 +26,12 @@ def register_after_success_hook(self, hook: AfterSuccessHook) -> None: def register_after_error_hook(self, hook: AfterErrorHook) -> None: self.after_error_hooks.append(hook) - def sdk_init(self, base_url: str, client: requests.Session) -> Tuple[str, requests.Session]: + def sdk_init(self, base_url: str, client: HttpClient) -> Tuple[str, HttpClient]: for hook in self.sdk_init_hooks: base_url, client = hook.sdk_init(base_url, client) return base_url, client - def before_request(self, hook_ctx: BeforeRequestContext, request: requests.PreparedRequest) -> requests.PreparedRequest: + def before_request(self, hook_ctx: BeforeRequestContext, request: httpx.Request) -> httpx.Request: for hook in self.before_request_hooks: out = hook.before_request(hook_ctx, request) if isinstance(out, Exception): @@ -40,7 +40,7 @@ def before_request(self, hook_ctx: BeforeRequestContext, request: requests.Prepa return request - def after_success(self, hook_ctx: AfterSuccessContext, response: requests.Response) -> requests.Response: + def after_success(self, hook_ctx: AfterSuccessContext, response: httpx.Response) -> httpx.Response: for hook in self.after_success_hooks: out = hook.after_success(hook_ctx, response) if isinstance(out, Exception): @@ -48,7 +48,7 @@ def after_success(self, hook_ctx: AfterSuccessContext, response: requests.Respon response = out return response - def after_error(self, hook_ctx: AfterErrorContext, response: Optional[requests.Response], error: Optional[Exception]) -> Tuple[Optional[requests.Response], Optional[Exception]]: + def after_error(self, hook_ctx: AfterErrorContext, response: Optional[httpx.Response], error: Optional[Exception]) -> Tuple[Optional[httpx.Response], Optional[Exception]]: for hook in self.after_error_hooks: result = hook.after_error(hook_ctx, response, error) if isinstance(result, Exception): diff --git a/src/unstructured_client/_hooks/types.py b/src/unstructured_client/_hooks/types.py index 72ab059b..844c0da6 100644 --- a/src/unstructured_client/_hooks/types.py +++ b/src/unstructured_client/_hooks/types.py @@ -1,8 +1,10 @@ -"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" + -import requests as requests_http from abc import ABC, abstractmethod +import httpx from typing import Any, Callable, List, Optional, Tuple, Union +from unstructured_client.httpclient import HttpClient class HookContext: @@ -34,25 +36,25 @@ def __init__(self, hook_ctx: HookContext): class SDKInitHook(ABC): @abstractmethod - def sdk_init(self, base_url: str, client: requests_http.Session) -> Tuple[str, requests_http.Session]: + def sdk_init(self, base_url: str, client: HttpClient) -> Tuple[str, HttpClient]: pass class BeforeRequestHook(ABC): @abstractmethod - def before_request(self, hook_ctx: BeforeRequestContext, request: requests_http.PreparedRequest) -> Union[requests_http.PreparedRequest, Exception]: + def before_request(self, hook_ctx: BeforeRequestContext, request: httpx.Request) -> Union[httpx.Request, Exception]: pass class AfterSuccessHook(ABC): @abstractmethod - def after_success(self, hook_ctx: AfterSuccessContext, response: requests_http.Response) -> Union[requests_http.Response, Exception]: + def after_success(self, hook_ctx: AfterSuccessContext, response: httpx.Response) -> Union[httpx.Response, Exception]: pass class AfterErrorHook(ABC): @abstractmethod - def after_error(self, hook_ctx: AfterErrorContext, response: Optional[requests_http.Response], error: Optional[Exception]) -> Union[Tuple[Optional[requests_http.Response], Optional[Exception]], Exception]: + def after_error(self, hook_ctx: AfterErrorContext, response: Optional[httpx.Response], error: Optional[Exception]) -> Union[Tuple[Optional[httpx.Response], Optional[Exception]], Exception]: pass diff --git a/src/unstructured_client/basesdk.py b/src/unstructured_client/basesdk.py new file mode 100644 index 00000000..c1df9f31 --- /dev/null +++ b/src/unstructured_client/basesdk.py @@ -0,0 +1,213 @@ +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" + +from .sdkconfiguration import SDKConfiguration +import httpx +from typing import Callable, List, Optional, Tuple +from unstructured_client._hooks import AfterErrorContext, AfterSuccessContext, BeforeRequestContext +from unstructured_client.models import errors +import unstructured_client.utils as utils +from unstructured_client.utils import RetryConfig, SerializedRequestBody + +class BaseSDK: + sdk_configuration: SDKConfiguration + + def __init__(self, sdk_config: SDKConfiguration) -> None: + self.sdk_configuration = sdk_config + + def get_url(self, base_url, url_variables): + sdk_url, sdk_variables = self.sdk_configuration.get_server_details() + + if base_url is None: + base_url = sdk_url + + if url_variables is None: + url_variables = sdk_variables + + return utils.template_url(base_url, url_variables) + + def build_request( + self, + method, + path, + base_url, + url_variables, + request, + request_body_required, + request_has_path_params, + request_has_query_params, + user_agent_header, + accept_header_value, + _globals=None, + security=None, + timeout_ms: Optional[int] = None, + get_serialized_body: Optional[ + Callable[[], Optional[SerializedRequestBody]] + ] = None, + url_override: Optional[str] = None, + ) -> httpx.Request: + client = self.sdk_configuration.client + + query_params = {} + + url = url_override + if url is None: + url = utils.generate_url( + self.get_url(base_url, url_variables), + path, + request if request_has_path_params else None, + _globals if request_has_path_params else None, + ) + + query_params = utils.get_query_params( + request if request_has_query_params else None, + _globals if request_has_query_params else None, + ) + + headers = utils.get_headers(request, _globals) + headers["Accept"] = accept_header_value + headers[user_agent_header] = self.sdk_configuration.user_agent + + if security is not None: + if callable(security): + security = security() + + if security is not None: + security_headers, security_query_params = utils.get_security(security) + headers = {**headers, **security_headers} + query_params = {**query_params, **security_query_params} + + serialized_request_body = SerializedRequestBody("application/octet-stream") + if get_serialized_body is not None: + rb = get_serialized_body() + if request_body_required and rb is None: + raise ValueError("request body is required") + + if rb is not None: + serialized_request_body = rb + + if ( + serialized_request_body.media_type is not None + and serialized_request_body.media_type + not in ( + "multipart/form-data", + "multipart/mixed", + ) + ): + headers["content-type"] = serialized_request_body.media_type + + timeout = timeout_ms / 1000 if timeout_ms is not None else None + + return client.build_request( + method, + url, + params=query_params, + content=serialized_request_body.content, + data=serialized_request_body.data, + files=serialized_request_body.files, + headers=headers, + timeout=timeout, + ) + + def do_request( + self, + hook_ctx, + request, + error_status_codes, + retry_config: Optional[Tuple[RetryConfig, List[str]]] = None, + ) -> httpx.Response: + client = self.sdk_configuration.client + + def do(): + http_res = None + try: + req = self.sdk_configuration.get_hooks().before_request( + BeforeRequestContext(hook_ctx), request + ) + http_res = client.send(req) + except Exception as e: + _, e = self.sdk_configuration.get_hooks().after_error( + AfterErrorContext(hook_ctx), None, e + ) + if e is not None: + raise e + + if http_res is None: + raise errors.SDKError("No response received") + + if utils.match_status_codes(error_status_codes, http_res.status_code): + result, err = self.sdk_configuration.get_hooks().after_error( + AfterErrorContext(hook_ctx), http_res, None + ) + if err is not None: + raise err + if result is not None: + http_res = result + else: + raise errors.SDKError("Unexpected error occurred") + + return http_res + + if retry_config is not None: + http_res = utils.retry(do, utils.Retries(retry_config[0], retry_config[1])) + else: + http_res = do() + + if not utils.match_status_codes(error_status_codes, http_res.status_code): + http_res = self.sdk_configuration.get_hooks().after_success( + AfterSuccessContext(hook_ctx), http_res + ) + + return http_res + + async def do_request_async( + self, + hook_ctx, + request, + error_status_codes, + retry_config: Optional[Tuple[RetryConfig, List[str]]] = None, + ) -> httpx.Response: + client = self.sdk_configuration.async_client + + async def do(): + http_res = None + try: + req = self.sdk_configuration.get_hooks().before_request( + BeforeRequestContext(hook_ctx), request + ) + http_res = await client.send(req) + except Exception as e: + _, e = self.sdk_configuration.get_hooks().after_error( + AfterErrorContext(hook_ctx), None, e + ) + if e is not None: + raise e + + if http_res is None: + raise errors.SDKError("No response received") + + if utils.match_status_codes(error_status_codes, http_res.status_code): + result, err = self.sdk_configuration.get_hooks().after_error( + AfterErrorContext(hook_ctx), http_res, None + ) + if err is not None: + raise err + if result is not None: + http_res = result + else: + raise errors.SDKError("Unexpected error occurred") + + return http_res + + if retry_config is not None: + http_res = await utils.retry_async( + do, utils.Retries(retry_config[0], retry_config[1]) + ) + else: + http_res = await do() + + if not utils.match_status_codes(error_status_codes, http_res.status_code): + http_res = self.sdk_configuration.get_hooks().after_success( + AfterSuccessContext(hook_ctx), http_res + ) + + return http_res diff --git a/src/unstructured_client/general.py b/src/unstructured_client/general.py index e9d06a9d..3b8af5fd 100644 --- a/src/unstructured_client/general.py +++ b/src/unstructured_client/general.py @@ -1,117 +1,175 @@ -"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" -import requests as requests_http -from .sdkconfiguration import SDKConfiguration -from typing import Any, Dict, List, Optional -from unstructured_client import utils -from unstructured_client._hooks import AfterErrorContext, AfterSuccessContext, BeforeRequestContext, HookContext -from unstructured_client.models import errors, operations +from .basesdk import BaseSDK +from typing import Any, Dict, List, Optional, Union, cast +from unstructured_client._hooks import HookContext +from unstructured_client.models import errors, operations, shared +from unstructured_client.types import BaseModel, OptionalNullable, UNSET +import unstructured_client.utils as utils -class General: - sdk_configuration: SDKConfiguration - - def __init__(self, sdk_config: SDKConfiguration) -> None: - self.sdk_configuration = sdk_config - +class General(BaseSDK): - def partition(self, request: operations.PartitionRequest, retries: Optional[utils.RetryConfig] = None) -> operations.PartitionResponse: + def partition( + self, *, + request: Union[operations.PartitionRequest, operations.PartitionRequestTypedDict], + retries: OptionalNullable[utils.RetryConfig] = UNSET, + server_url: Optional[str] = None, + timeout_ms: Optional[int] = None, + ) -> operations.PartitionResponse: r"""Summary + Description + + :param request: The request object to send. + :param retries: Override the default retry configuration for this method + :param server_url: Override the default server URL for this method + :param timeout_ms: Override the default request timeout configuration for this method in milliseconds """ - hook_ctx = HookContext(operation_id='partition', oauth2_scopes=[], security_source=self.sdk_configuration.security) - base_url = utils.template_url(*self.sdk_configuration.get_server_details()) + base_url = None + url_variables = None + if timeout_ms is None: + timeout_ms = self.sdk_configuration.timeout_ms - url = base_url + '/general/v0/general' + if server_url is not None: + base_url = server_url - if callable(self.sdk_configuration.security): - headers, query_params = utils.get_security(self.sdk_configuration.security()) - else: - headers, query_params = utils.get_security(self.sdk_configuration.security) + if not isinstance(request, BaseModel): + request = utils.unmarshal(request, operations.PartitionRequest) + request = cast(operations.PartitionRequest, request) - headers = { **utils.get_headers(request), **headers } - req_content_type, data, form = utils.serialize_request_body(request, operations.PartitionRequest, "partition_parameters", False, False, 'multipart') - if req_content_type is not None and req_content_type not in ('multipart/form-data', 'multipart/mixed'): - headers['content-type'] = req_content_type - if data is None and form is None: - raise Exception('request body is required') - headers['Accept'] = 'application/json' - headers['user-agent'] = self.sdk_configuration.user_agent - client = self.sdk_configuration.client + req = self.build_request( + method="POST", + path="/general/v0/general", + base_url=base_url, + url_variables=url_variables, + request=request, + request_body_required=True, + request_has_path_params=False, + request_has_query_params=True, + user_agent_header="user-agent", + accept_header_value="application/json", + security=self.sdk_configuration.security, + get_serialized_body=lambda: utils.serialize_request_body(request.partition_parameters, False, False, "multipart", shared.PartitionParameters), + timeout_ms=timeout_ms, + ) - global_retry_config = self.sdk_configuration.retry_config - retry_config = retries - if retry_config is None: - if global_retry_config: - retry_config = global_retry_config + if retries == UNSET: + if self.sdk_configuration.retry_config is not UNSET: + retries = self.sdk_configuration.retry_config else: - retry_config = utils.RetryConfig('backoff', utils.BackoffStrategy(500, 60000, 1.5, 900000), True) + retries = utils.RetryConfig("backoff", utils.BackoffStrategy(500, 60000, 1.5, 900000), True) - req = None - def do_request(): - nonlocal req - try: - req = client.prepare_request(requests_http.Request('POST', url, params=query_params, data=data, files=form, headers=headers)) - req = self.sdk_configuration.get_hooks().before_request(BeforeRequestContext(hook_ctx), req) - http_res = client.send(req) - except Exception as e: - _, err = self.sdk_configuration.get_hooks().after_error(AfterErrorContext(hook_ctx), None, e) - if err is not None: - raise err from e - raise e + retry_config = None + if isinstance(retries, utils.RetryConfig): + retry_config = (retries, [ + "502", + "503", + "504" + ]) + + http_res = self.do_request( + hook_ctx=HookContext(operation_id="partition", oauth2_scopes=[], security_source=self.sdk_configuration.security), + request=req, + error_status_codes=["422","4XX","5XX"], + retry_config=retry_config + ) + + data: Any = None + if utils.match_response(http_res, "200", "application/json"): + return operations.PartitionResponse(elements=utils.unmarshal_json(http_res.text, Optional[List[Dict[str, Any]]]), status_code=http_res.status_code, content_type=http_res.headers.get("Content-Type") or "", raw_response=http_res) + if utils.match_response(http_res, "422", "application/json"): + data = utils.unmarshal_json(http_res.text, errors.HTTPValidationErrorData) + raise errors.HTTPValidationError(data=data) + if utils.match_response(http_res, "4XX", "*"): + raise errors.SDKError("API error occurred", http_res.status_code, http_res.text, http_res) + if utils.match_response(http_res, "5XX", "application/json"): + data = utils.unmarshal_json(http_res.text, errors.ServerErrorData) + raise errors.ServerError(data=data) + + content_type = http_res.headers.get("Content-Type") + raise errors.SDKError(f"Unexpected response received (code: {http_res.status_code}, type: {content_type})", http_res.status_code, http_res.text, http_res) - if utils.match_status_codes(['422','4XX','5XX'], http_res.status_code): - result, e = self.sdk_configuration.get_hooks().after_error(AfterErrorContext(hook_ctx), http_res, None) - if e is not None: - raise e - if result is not None: - http_res = result - else: - raise errors.SDKError('Unexpected error occurred', -1, '', None) - else: - http_res = self.sdk_configuration.get_hooks().after_success(AfterSuccessContext(hook_ctx), http_res) + + + async def partition_async( + self, *, + request: Union[operations.PartitionRequest, operations.PartitionRequestTypedDict], + retries: OptionalNullable[utils.RetryConfig] = UNSET, + server_url: Optional[str] = None, + timeout_ms: Optional[int] = None, + ) -> operations.PartitionResponse: + r"""Summary - return http_res + Description - http_res = utils.retry(do_request, utils.Retries(retry_config, [ - '502', - '503', - '504' - ])) + :param request: The request object to send. + :param retries: Override the default retry configuration for this method + :param server_url: Override the default server URL for this method + :param timeout_ms: Override the default request timeout configuration for this method in milliseconds + """ + base_url = None + url_variables = None + if timeout_ms is None: + timeout_ms = self.sdk_configuration.timeout_ms + if server_url is not None: + base_url = server_url - res = operations.PartitionResponse(status_code=http_res.status_code, content_type=http_res.headers.get('Content-Type') or '', raw_response=http_res) + if not isinstance(request, BaseModel): + request = utils.unmarshal(request, operations.PartitionRequest) + request = cast(operations.PartitionRequest, request) - if http_res.status_code == 200: - # pylint: disable=no-else-return - if utils.match_content_type(http_res.headers.get('Content-Type') or '', 'application/json'): - out = utils.unmarshal_json(http_res.text, Optional[List[Dict[str, Any]]]) - res.elements = out - else: - content_type = http_res.headers.get('Content-Type') - raise errors.SDKError(f'unknown content-type received: {content_type}', http_res.status_code, http_res.text, http_res) - elif http_res.status_code == 422: - # pylint: disable=no-else-return - if utils.match_content_type(http_res.headers.get('Content-Type') or '', 'application/json'): - out = utils.unmarshal_json(http_res.text, errors.HTTPValidationError) - raise out - else: - content_type = http_res.headers.get('Content-Type') - raise errors.SDKError(f'unknown content-type received: {content_type}', http_res.status_code, http_res.text, http_res) - elif http_res.status_code >= 400 and http_res.status_code < 500: - raise errors.SDKError('API error occurred', http_res.status_code, http_res.text, http_res) - elif http_res.status_code >= 500 and http_res.status_code < 600: - # pylint: disable=no-else-return - if utils.match_content_type(http_res.headers.get('Content-Type') or '', 'application/json'): - out = utils.unmarshal_json(http_res.text, errors.ServerError) - raise out + req = self.build_request( + method="POST", + path="/general/v0/general", + base_url=base_url, + url_variables=url_variables, + request=request, + request_body_required=True, + request_has_path_params=False, + request_has_query_params=True, + user_agent_header="user-agent", + accept_header_value="application/json", + security=self.sdk_configuration.security, + get_serialized_body=lambda: utils.serialize_request_body(request.partition_parameters, False, False, "multipart", shared.PartitionParameters), + timeout_ms=timeout_ms, + ) + + if retries == UNSET: + if self.sdk_configuration.retry_config is not UNSET: + retries = self.sdk_configuration.retry_config else: - content_type = http_res.headers.get('Content-Type') - raise errors.SDKError(f'unknown content-type received: {content_type}', http_res.status_code, http_res.text, http_res) - else: - raise errors.SDKError('unknown status code received', http_res.status_code, http_res.text, http_res) + retries = utils.RetryConfig("backoff", utils.BackoffStrategy(500, 60000, 1.5, 900000), True) - return res + retry_config = None + if isinstance(retries, utils.RetryConfig): + retry_config = (retries, [ + "502", + "503", + "504" + ]) + + http_res = await self.do_request_async( + hook_ctx=HookContext(operation_id="partition", oauth2_scopes=[], security_source=self.sdk_configuration.security), + request=req, + error_status_codes=["422","4XX","5XX"], + retry_config=retry_config + ) + + data: Any = None + if utils.match_response(http_res, "200", "application/json"): + return operations.PartitionResponse(elements=utils.unmarshal_json(http_res.text, Optional[List[Dict[str, Any]]]), status_code=http_res.status_code, content_type=http_res.headers.get("Content-Type") or "", raw_response=http_res) + if utils.match_response(http_res, "422", "application/json"): + data = utils.unmarshal_json(http_res.text, errors.HTTPValidationErrorData) + raise errors.HTTPValidationError(data=data) + if utils.match_response(http_res, "4XX", "*"): + raise errors.SDKError("API error occurred", http_res.status_code, http_res.text, http_res) + if utils.match_response(http_res, "5XX", "application/json"): + data = utils.unmarshal_json(http_res.text, errors.ServerErrorData) + raise errors.ServerError(data=data) + + content_type = http_res.headers.get("Content-Type") + raise errors.SDKError(f"Unexpected response received (code: {http_res.status_code}, type: {content_type})", http_res.status_code, http_res.text, http_res) - diff --git a/src/unstructured_client/httpclient.py b/src/unstructured_client/httpclient.py new file mode 100644 index 00000000..32704973 --- /dev/null +++ b/src/unstructured_client/httpclient.py @@ -0,0 +1,78 @@ +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" + +# pyright: reportReturnType = false +from typing_extensions import Protocol, runtime_checkable +import httpx +from typing import Any, Optional, Union + + +@runtime_checkable +class HttpClient(Protocol): + def send( + self, + request: httpx.Request, + *, + stream: bool = False, + auth: Union[ + httpx._types.AuthTypes, httpx._client.UseClientDefault, None + ] = httpx.USE_CLIENT_DEFAULT, + follow_redirects: Union[ + bool, httpx._client.UseClientDefault + ] = httpx.USE_CLIENT_DEFAULT, + ) -> httpx.Response: + pass + + def build_request( + self, + method: str, + url: httpx._types.URLTypes, + *, + content: Optional[httpx._types.RequestContent] = None, + data: Optional[httpx._types.RequestData] = None, + files: Optional[httpx._types.RequestFiles] = None, + json: Optional[Any] = None, + params: Optional[httpx._types.QueryParamTypes] = None, + headers: Optional[httpx._types.HeaderTypes] = None, + cookies: Optional[httpx._types.CookieTypes] = None, + timeout: Union[ + httpx._types.TimeoutTypes, httpx._client.UseClientDefault + ] = httpx.USE_CLIENT_DEFAULT, + extensions: Optional[httpx._types.RequestExtensions] = None, + ) -> httpx.Request: + pass + + +@runtime_checkable +class AsyncHttpClient(Protocol): + async def send( + self, + request: httpx.Request, + *, + stream: bool = False, + auth: Union[ + httpx._types.AuthTypes, httpx._client.UseClientDefault, None + ] = httpx.USE_CLIENT_DEFAULT, + follow_redirects: Union[ + bool, httpx._client.UseClientDefault + ] = httpx.USE_CLIENT_DEFAULT, + ) -> httpx.Response: + pass + + def build_request( + self, + method: str, + url: httpx._types.URLTypes, + *, + content: Optional[httpx._types.RequestContent] = None, + data: Optional[httpx._types.RequestData] = None, + files: Optional[httpx._types.RequestFiles] = None, + json: Optional[Any] = None, + params: Optional[httpx._types.QueryParamTypes] = None, + headers: Optional[httpx._types.HeaderTypes] = None, + cookies: Optional[httpx._types.CookieTypes] = None, + timeout: Union[ + httpx._types.TimeoutTypes, httpx._client.UseClientDefault + ] = httpx.USE_CLIENT_DEFAULT, + extensions: Optional[httpx._types.RequestExtensions] = None, + ) -> httpx.Request: + pass diff --git a/src/unstructured_client/models/__init__.py b/src/unstructured_client/models/__init__.py deleted file mode 100644 index 97d7cbfd..00000000 --- a/src/unstructured_client/models/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" - - -# package diff --git a/src/unstructured_client/models/errors/__init__.py b/src/unstructured_client/models/errors/__init__.py index 03839458..c3b549ec 100644 --- a/src/unstructured_client/models/errors/__init__.py +++ b/src/unstructured_client/models/errors/__init__.py @@ -1,7 +1,7 @@ -"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" -from .httpvalidationerror import * -from .sdkerror import * -from .servererror import * +from .httpvalidationerror import Detail, HTTPValidationError, HTTPValidationErrorData +from .sdkerror import SDKError +from .servererror import ServerError, ServerErrorData -__all__ = ["Detail","HTTPValidationError","SDKError","ServerError"] +__all__ = ["Detail", "HTTPValidationError", "HTTPValidationErrorData", "SDKError", "ServerError", "ServerErrorData"] diff --git a/src/unstructured_client/models/errors/httpvalidationerror.py b/src/unstructured_client/models/errors/httpvalidationerror.py index 9b943ef5..c4fc2e59 100644 --- a/src/unstructured_client/models/errors/httpvalidationerror.py +++ b/src/unstructured_client/models/errors/httpvalidationerror.py @@ -1,21 +1,28 @@ -"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" from __future__ import annotations -import dataclasses -from ...models.shared import validationerror as shared_validationerror -from dataclasses_json import Undefined, dataclass_json from typing import List, Optional, Union -from unstructured_client import utils +from unstructured_client.models.shared import validationerror as shared_validationerror +from unstructured_client.types import BaseModel +import unstructured_client.utils as utils +class HTTPValidationErrorData(BaseModel): + detail: Optional[Detail] = None + -@dataclass_json(undefined=Undefined.EXCLUDE) -@dataclasses.dataclass class HTTPValidationError(Exception): - detail: Optional[Detail] = dataclasses.field(default=None, metadata={'dataclasses_json': { 'letter_case': utils.get_field_name('detail'), 'exclude': lambda f: f is None }}) - + data: HTTPValidationErrorData + + def __init__(self, data: HTTPValidationErrorData): + self.data = data def __str__(self) -> str: - return utils.marshal_json(self, type(self)) + return utils.marshal_json(self.data, HTTPValidationErrorData) + + +DetailTypedDict = Union[List[shared_validationerror.ValidationErrorTypedDict], str] + Detail = Union[List[shared_validationerror.ValidationError], str] + diff --git a/src/unstructured_client/models/errors/sdkerror.py b/src/unstructured_client/models/errors/sdkerror.py index 2e7ef211..60a01a86 100644 --- a/src/unstructured_client/models/errors/sdkerror.py +++ b/src/unstructured_client/models/errors/sdkerror.py @@ -1,24 +1,22 @@ -"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" -import requests as requests_http +from dataclasses import dataclass +from typing import Optional +import httpx +@dataclass class SDKError(Exception): """Represents an error returned by the API.""" - message: str - status_code: int - body: str - raw_response: requests_http.Response - def __init__(self, message: str, status_code: int, body: str, raw_response: requests_http.Response): - self.message = message - self.status_code = status_code - self.body = body - self.raw_response = raw_response + message: str + status_code: int = -1 + body: str = "" + raw_response: Optional[httpx.Response] = None def __str__(self): - body = '' + body = "" if len(self.body) > 0: - body = f'\n{self.body}' + body = f"\n{self.body}" - return f'{self.message}: Status {self.status_code}{body}' + return f"{self.message}: Status {self.status_code}{body}" diff --git a/src/unstructured_client/models/errors/servererror.py b/src/unstructured_client/models/errors/servererror.py index 14e1bd77..ad6a5d3f 100644 --- a/src/unstructured_client/models/errors/servererror.py +++ b/src/unstructured_client/models/errors/servererror.py @@ -1,18 +1,21 @@ -"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" from __future__ import annotations -import dataclasses -from dataclasses_json import Undefined, dataclass_json from typing import Optional -from unstructured_client import utils +from unstructured_client.types import BaseModel +import unstructured_client.utils as utils +class ServerErrorData(BaseModel): + detail: Optional[str] = None + -@dataclass_json(undefined=Undefined.EXCLUDE) -@dataclasses.dataclass class ServerError(Exception): - detail: Optional[str] = dataclasses.field(default=None, metadata={'dataclasses_json': { 'letter_case': utils.get_field_name('detail'), 'exclude': lambda f: f is None }}) - + data: ServerErrorData + + def __init__(self, data: ServerErrorData): + self.data = data def __str__(self) -> str: - return utils.marshal_json(self, type(self)) + return utils.marshal_json(self.data, ServerErrorData) + diff --git a/src/unstructured_client/models/operations/__init__.py b/src/unstructured_client/models/operations/__init__.py index 53ed8933..8bfbbe98 100644 --- a/src/unstructured_client/models/operations/__init__.py +++ b/src/unstructured_client/models/operations/__init__.py @@ -1,5 +1,5 @@ -"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" -from .partition import * +from .partition import PartitionRequest, PartitionRequestTypedDict, PartitionResponse, PartitionResponseTypedDict -__all__ = ["PartitionRequest","PartitionResponse"] +__all__ = ["PartitionRequest", "PartitionRequestTypedDict", "PartitionResponse", "PartitionResponseTypedDict"] diff --git a/src/unstructured_client/models/operations/partition.py b/src/unstructured_client/models/operations/partition.py index cb462c2a..465bc3ab 100644 --- a/src/unstructured_client/models/operations/partition.py +++ b/src/unstructured_client/models/operations/partition.py @@ -1,30 +1,75 @@ -"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" from __future__ import annotations -import dataclasses -import requests as requests_http -from ...models.shared import partition_parameters as shared_partition_parameters -from typing import Any, Dict, List, Optional +import httpx +import pydantic +from pydantic import model_serializer +from typing import Any, Dict, List, Optional, TypedDict +from typing_extensions import Annotated, NotRequired +from unstructured_client.models.shared import partition_parameters as shared_partition_parameters +from unstructured_client.types import BaseModel, Nullable, OptionalNullable, UNSET, UNSET_SENTINEL +from unstructured_client.utils import FieldMetadata, HeaderMetadata, RequestMetadata -@dataclasses.dataclass -class PartitionRequest: - UNSET='__SPEAKEASY_UNSET__' - partition_parameters: shared_partition_parameters.PartitionParameters = dataclasses.field(metadata={'request': { 'media_type': 'multipart/form-data' }}) - unstructured_api_key: Optional[str] = dataclasses.field(default=UNSET, metadata={'header': { 'field_name': 'unstructured-api-key', 'style': 'simple', 'explode': False }}) +class PartitionRequestTypedDict(TypedDict): + partition_parameters: shared_partition_parameters.PartitionParametersTypedDict + unstructured_api_key: NotRequired[Nullable[str]] +class PartitionRequest(BaseModel): + partition_parameters: Annotated[shared_partition_parameters.PartitionParameters, FieldMetadata(request=RequestMetadata(media_type="multipart/form-data"))] + unstructured_api_key: Annotated[OptionalNullable[str], pydantic.Field(alias="unstructured-api-key"), FieldMetadata(header=HeaderMetadata(style="simple", explode=False))] = UNSET + + @model_serializer(mode="wrap") + def serialize_model(self, handler): + optional_fields = ["unstructured-api-key"] + nullable_fields = ["unstructured-api-key"] + null_default_fields = [] + + serialized = handler(self) + + m = {} + + for n, f in self.model_fields.items(): + k = f.alias or n + val = serialized.get(k) + + if val is not None and val != UNSET_SENTINEL: + m[k] = val + elif val != UNSET_SENTINEL and ( + not k in optional_fields + or ( + k in optional_fields + and k in nullable_fields + and ( + self.__pydantic_fields_set__.intersection({n}) + or k in null_default_fields + ) # pylint: disable=no-member + ) + ): + m[k] = val + return m + -@dataclasses.dataclass -class PartitionResponse: - content_type: str = dataclasses.field() +class PartitionResponseTypedDict(TypedDict): + content_type: str r"""HTTP response content type for this operation""" - status_code: int = dataclasses.field() + status_code: int r"""HTTP response status code for this operation""" - raw_response: requests_http.Response = dataclasses.field() + raw_response: httpx.Response r"""Raw HTTP response; suitable for custom response parsing""" - elements: Optional[List[Dict[str, Any]]] = dataclasses.field(default=None) + elements: NotRequired[List[Dict[str, Any]]] r"""Successful Response""" +class PartitionResponse(BaseModel): + content_type: str + r"""HTTP response content type for this operation""" + status_code: int + r"""HTTP response status code for this operation""" + raw_response: httpx.Response + r"""Raw HTTP response; suitable for custom response parsing""" + elements: Optional[List[Dict[str, Any]]] = None + r"""Successful Response""" + diff --git a/src/unstructured_client/models/shared/__init__.py b/src/unstructured_client/models/shared/__init__.py index b6a7718a..7c09195f 100644 --- a/src/unstructured_client/models/shared/__init__.py +++ b/src/unstructured_client/models/shared/__init__.py @@ -1,7 +1,7 @@ -"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" -from .partition_parameters import * -from .security import * -from .validationerror import * +from .partition_parameters import ChunkingStrategy, Files, FilesTypedDict, OutputFormat, PartitionParameters, PartitionParametersTypedDict, Strategy +from .security import Security, SecurityTypedDict +from .validationerror import Loc, LocTypedDict, ValidationError, ValidationErrorTypedDict -__all__ = ["ChunkingStrategy","Files","Loc","OutputFormat","PartitionParameters","Security","Strategy","ValidationError"] +__all__ = ["ChunkingStrategy", "Files", "FilesTypedDict", "Loc", "LocTypedDict", "OutputFormat", "PartitionParameters", "PartitionParametersTypedDict", "Security", "SecurityTypedDict", "Strategy", "ValidationError", "ValidationErrorTypedDict"] diff --git a/src/unstructured_client/models/shared/partition_parameters.py b/src/unstructured_client/models/shared/partition_parameters.py index 69cbcf8d..898fad70 100644 --- a/src/unstructured_client/models/shared/partition_parameters.py +++ b/src/unstructured_client/models/shared/partition_parameters.py @@ -1,103 +1,201 @@ -"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" from __future__ import annotations -import dataclasses from enum import Enum -from typing import List, Optional -from unstructured_client import utils +import io +import pydantic +from pydantic import model_serializer +from pydantic.functional_validators import PlainValidator +from typing import IO, List, Optional, TypedDict, Union +from typing_extensions import Annotated, NotRequired +from unstructured_client.types import BaseModel, Nullable, OptionalNullable, UNSET_SENTINEL +import unstructured_client.utils as utils +from unstructured_client.utils import FieldMetadata, MultipartFormMetadata, validate_open_enum class ChunkingStrategy(str, Enum, metaclass=utils.OpenEnumMeta): - BASIC = 'basic' - BY_PAGE = 'by_page' - BY_SIMILARITY = 'by_similarity' - BY_TITLE = 'by_title' + BASIC = "basic" + BY_PAGE = "by_page" + BY_SIMILARITY = "by_similarity" + BY_TITLE = "by_title" - -@dataclasses.dataclass -class Files: - content: bytes = dataclasses.field(metadata={'multipart_form': { 'content': True }}) - file_name: str = dataclasses.field(metadata={'multipart_form': { 'field_name': 'files' }}) +class FilesTypedDict(TypedDict): + content: Union[bytes, IO[bytes], io.BufferedReader] + file_name: str + content_type: NotRequired[str] - +class Files(BaseModel): + content: Annotated[Union[bytes, IO[bytes], io.BufferedReader], pydantic.Field(alias=""), FieldMetadata(multipart=MultipartFormMetadata(content=True))] + file_name: Annotated[str, pydantic.Field(alias="files"), FieldMetadata(multipart=True)] + content_type: Annotated[Optional[str], pydantic.Field(alias="Content-Type"), FieldMetadata(multipart=True)] = None + class OutputFormat(str, Enum, metaclass=utils.OpenEnumMeta): r"""The format of the response. Supported formats are application/json and text/csv. Default: application/json.""" - APPLICATION_JSON = 'application/json' - TEXT_CSV = 'text/csv' - + APPLICATION_JSON = "application/json" + TEXT_CSV = "text/csv" class Strategy(str, Enum, metaclass=utils.OpenEnumMeta): r"""The strategy to use for partitioning PDF/image. Options are fast, hi_res, auto. Default: auto""" - FAST = 'fast' - HI_RES = 'hi_res' - AUTO = 'auto' - OCR_ONLY = 'ocr_only' + FAST = "fast" + HI_RES = "hi_res" + AUTO = "auto" + OCR_ONLY = "ocr_only" +class PartitionParametersTypedDict(TypedDict): + files: FilesTypedDict + r"""The file to extract""" + chunking_strategy: NotRequired[Nullable[ChunkingStrategy]] + r"""Use one of the supported strategies to chunk the returned elements after partitioning. When 'chunking_strategy' is not specified, no chunking is performed and any other chunking parameters provided are ignored. Supported strategies: 'basic', 'by_page', 'by_similarity', or 'by_title'""" + combine_under_n_chars: NotRequired[Nullable[int]] + r"""If chunking strategy is set, combine elements until a section reaches a length of n chars. Default: 500""" + content_type: NotRequired[Nullable[str]] + r"""A hint about the content type to use (such as text/markdown), when there are problems processing a specific file. This value is a MIME type in the format type/subtype.""" + coordinates: NotRequired[bool] + r"""If `True`, return coordinates for each element extracted via OCR. Default: `False`""" + encoding: NotRequired[Nullable[str]] + r"""The encoding method used to decode the text input. Default: utf-8""" + extract_image_block_types: NotRequired[List[str]] + r"""The types of elements to extract, for use in extracting image blocks as base64 encoded data stored in metadata fields.""" + gz_uncompressed_content_type: NotRequired[Nullable[str]] + r"""If file is gzipped, use this content type after unzipping.""" + hi_res_model_name: NotRequired[Nullable[str]] + r"""The name of the inference model used when strategy is hi_res""" + include_orig_elements: NotRequired[Nullable[bool]] + r"""When a chunking strategy is specified, each returned chunk will include the elements consolidated to form that chunk as `.metadata.orig_elements`. Default: true.""" + include_page_breaks: NotRequired[bool] + r"""If true, the output will include page breaks if the filetype supports it. Default: false""" + languages: NotRequired[List[str]] + r"""The languages present in the document, for use in partitioning and/or OCR. See the Tesseract documentation for a full list of languages.""" + max_characters: NotRequired[Nullable[int]] + r"""If chunking strategy is set, cut off new sections after reaching a length of n chars (hard max). Default: 500""" + multipage_sections: NotRequired[bool] + r"""If chunking strategy is set, determines if sections can span multiple sections. Default: true""" + new_after_n_chars: NotRequired[Nullable[int]] + r"""If chunking strategy is set, cut off new sections after reaching a length of n chars (soft max). Default: 1500""" + ocr_languages: NotRequired[List[str]] + r"""Deprecated! The languages present in the document, for use in partitioning and/or OCR""" + output_format: NotRequired[OutputFormat] + r"""The format of the response. Supported formats are application/json and text/csv. Default: application/json.""" + overlap: NotRequired[int] + r"""Specifies the length of a string ('tail') to be drawn from each chunk and prefixed to the next chunk as a context-preserving mechanism. By default, this only applies to split-chunks where an oversized element is divided into multiple chunks by text-splitting. Default: 0""" + overlap_all: NotRequired[bool] + r"""When `True`, apply overlap between 'normal' chunks formed from whole elements and not subject to text-splitting. Use this with caution as it entails a certain level of 'pollution' of otherwise clean semantic chunk boundaries. Default: False""" + pdf_infer_table_structure: NotRequired[bool] + r"""Deprecated! Use skip_infer_table_types to opt out of table extraction for any file type. If False and strategy=hi_res, no Table Elements will be extracted from pdf files regardless of skip_infer_table_types contents.""" + similarity_threshold: NotRequired[Nullable[float]] + r"""A value between 0.0 and 1.0 describing the minimum similarity two elements must have to be included in the same chunk. Note that similar elements may be separated to meet chunk-size criteria; this value can only guarantees that two elements with similarity below the threshold will appear in separate chunks.""" + skip_infer_table_types: NotRequired[List[str]] + r"""The document types that you want to skip table extraction with. Default: []""" + split_pdf_allow_failed: NotRequired[bool] + r"""When `split_pdf_page` is set to `True`, this parameter defines the behavior when some of the parallel requests fail. By default `split_pdf_allow_failed` is set to `False` and any failed request send to the API will make the whole process break and raise an Exception. If `split_pdf_allow_failed` is set to `True`, the errors encountered while sending parallel requests will not break the processing - the resuling list of Elements will miss the data from errored pages.""" + split_pdf_concurrency_level: NotRequired[int] + r"""When `split_pdf_page` is set to `True`, this parameter specifies the number of workers used for sending requests when the PDF is split on the client side. It's an internal parameter for the Python client and is not sent to the backend.""" + split_pdf_page: NotRequired[bool] + r"""This parameter determines if the PDF file should be split on the client side. It's an internal parameter for the Python client and is not sent to the backend.""" + split_pdf_page_range: NotRequired[List[int]] + r"""When `split_pdf_page is set to `True`, this parameter selects a subset of the pdf to send to the API. The parameter is a list of 2 integers within the range [1, length_of_pdf]. A ValueError is thrown if the given range is invalid. It's an internal parameter for the Python client and is not sent to the backend.""" + starting_page_number: NotRequired[Nullable[int]] + r"""When PDF is split into pages before sending it into the API, providing this information will allow the page number to be assigned correctly. Introduced in 1.0.27.""" + strategy: NotRequired[Strategy] + r"""The strategy to use for partitioning PDF/image. Options are fast, hi_res, auto. Default: auto""" + unique_element_ids: NotRequired[bool] + r"""When `True`, assign UUIDs to element IDs, which guarantees their uniqueness (useful when using them as primary keys in database). Otherwise a SHA-256 of element text is used. Default: `False`""" + xml_keep_tags: NotRequired[bool] + r"""If `True`, will retain the XML tags in the output. Otherwise it will simply extract the text from within the tags. Only applies to XML documents.""" + -@dataclasses.dataclass -class PartitionParameters: - UNSET='__SPEAKEASY_UNSET__' - files: Files = dataclasses.field(metadata={'multipart_form': { 'file': True }}) +class PartitionParameters(BaseModel): + files: Annotated[Files, pydantic.Field(alias=""), FieldMetadata(multipart=MultipartFormMetadata(file=True))] r"""The file to extract""" - chunking_strategy: Optional[ChunkingStrategy] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'chunking_strategy' }}) + chunking_strategy: Annotated[Annotated[OptionalNullable[ChunkingStrategy], PlainValidator(validate_open_enum(False))], FieldMetadata(multipart=True)] = None r"""Use one of the supported strategies to chunk the returned elements after partitioning. When 'chunking_strategy' is not specified, no chunking is performed and any other chunking parameters provided are ignored. Supported strategies: 'basic', 'by_page', 'by_similarity', or 'by_title'""" - combine_under_n_chars: Optional[int] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'combine_under_n_chars' }}) + combine_under_n_chars: Annotated[OptionalNullable[int], FieldMetadata(multipart=True)] = None r"""If chunking strategy is set, combine elements until a section reaches a length of n chars. Default: 500""" - content_type: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'content_type' }}) + content_type: Annotated[OptionalNullable[str], FieldMetadata(multipart=True)] = None r"""A hint about the content type to use (such as text/markdown), when there are problems processing a specific file. This value is a MIME type in the format type/subtype.""" - coordinates: Optional[bool] = dataclasses.field(default=False, metadata={'multipart_form': { 'field_name': 'coordinates' }}) + coordinates: Annotated[Optional[bool], FieldMetadata(multipart=True)] = False r"""If `True`, return coordinates for each element extracted via OCR. Default: `False`""" - encoding: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'encoding' }}) + encoding: Annotated[OptionalNullable[str], FieldMetadata(multipart=True)] = None r"""The encoding method used to decode the text input. Default: utf-8""" - extract_image_block_types: Optional[List[str]] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'extract_image_block_types' }}) + extract_image_block_types: Annotated[Optional[List[str]], FieldMetadata(multipart=True)] = None r"""The types of elements to extract, for use in extracting image blocks as base64 encoded data stored in metadata fields.""" - gz_uncompressed_content_type: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'gz_uncompressed_content_type' }}) + gz_uncompressed_content_type: Annotated[OptionalNullable[str], FieldMetadata(multipart=True)] = None r"""If file is gzipped, use this content type after unzipping.""" - hi_res_model_name: Optional[str] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'hi_res_model_name' }}) + hi_res_model_name: Annotated[OptionalNullable[str], FieldMetadata(multipart=True)] = None r"""The name of the inference model used when strategy is hi_res""" - include_orig_elements: Optional[bool] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'include_orig_elements' }}) + include_orig_elements: Annotated[OptionalNullable[bool], FieldMetadata(multipart=True)] = None r"""When a chunking strategy is specified, each returned chunk will include the elements consolidated to form that chunk as `.metadata.orig_elements`. Default: true.""" - include_page_breaks: Optional[bool] = dataclasses.field(default=False, metadata={'multipart_form': { 'field_name': 'include_page_breaks' }}) + include_page_breaks: Annotated[Optional[bool], FieldMetadata(multipart=True)] = False r"""If true, the output will include page breaks if the filetype supports it. Default: false""" - languages: Optional[List[str]] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'languages' }}) + languages: Annotated[Optional[List[str]], FieldMetadata(multipart=True)] = None r"""The languages present in the document, for use in partitioning and/or OCR. See the Tesseract documentation for a full list of languages.""" - max_characters: Optional[int] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'max_characters' }}) + max_characters: Annotated[OptionalNullable[int], FieldMetadata(multipart=True)] = None r"""If chunking strategy is set, cut off new sections after reaching a length of n chars (hard max). Default: 500""" - multipage_sections: Optional[bool] = dataclasses.field(default=True, metadata={'multipart_form': { 'field_name': 'multipage_sections' }}) + multipage_sections: Annotated[Optional[bool], FieldMetadata(multipart=True)] = True r"""If chunking strategy is set, determines if sections can span multiple sections. Default: true""" - new_after_n_chars: Optional[int] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'new_after_n_chars' }}) + new_after_n_chars: Annotated[OptionalNullable[int], FieldMetadata(multipart=True)] = None r"""If chunking strategy is set, cut off new sections after reaching a length of n chars (soft max). Default: 1500""" - ocr_languages: Optional[List[str]] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'ocr_languages' }}) + ocr_languages: Annotated[Optional[List[str]], FieldMetadata(multipart=True)] = None r"""Deprecated! The languages present in the document, for use in partitioning and/or OCR""" - output_format: Optional[OutputFormat] = dataclasses.field(default=OutputFormat.APPLICATION_JSON, metadata={'multipart_form': { 'field_name': 'output_format' }}) + output_format: Annotated[Annotated[Optional[OutputFormat], PlainValidator(validate_open_enum(False))], FieldMetadata(multipart=True)] = OutputFormat.APPLICATION_JSON r"""The format of the response. Supported formats are application/json and text/csv. Default: application/json.""" - overlap: Optional[int] = dataclasses.field(default=0, metadata={'multipart_form': { 'field_name': 'overlap' }}) + overlap: Annotated[Optional[int], FieldMetadata(multipart=True)] = 0 r"""Specifies the length of a string ('tail') to be drawn from each chunk and prefixed to the next chunk as a context-preserving mechanism. By default, this only applies to split-chunks where an oversized element is divided into multiple chunks by text-splitting. Default: 0""" - overlap_all: Optional[bool] = dataclasses.field(default=False, metadata={'multipart_form': { 'field_name': 'overlap_all' }}) + overlap_all: Annotated[Optional[bool], FieldMetadata(multipart=True)] = False r"""When `True`, apply overlap between 'normal' chunks formed from whole elements and not subject to text-splitting. Use this with caution as it entails a certain level of 'pollution' of otherwise clean semantic chunk boundaries. Default: False""" - pdf_infer_table_structure: Optional[bool] = dataclasses.field(default=True, metadata={'multipart_form': { 'field_name': 'pdf_infer_table_structure' }}) + pdf_infer_table_structure: Annotated[Optional[bool], FieldMetadata(multipart=True)] = True r"""Deprecated! Use skip_infer_table_types to opt out of table extraction for any file type. If False and strategy=hi_res, no Table Elements will be extracted from pdf files regardless of skip_infer_table_types contents.""" - similarity_threshold: Optional[float] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'similarity_threshold' }}) + similarity_threshold: Annotated[OptionalNullable[float], FieldMetadata(multipart=True)] = None r"""A value between 0.0 and 1.0 describing the minimum similarity two elements must have to be included in the same chunk. Note that similar elements may be separated to meet chunk-size criteria; this value can only guarantees that two elements with similarity below the threshold will appear in separate chunks.""" - skip_infer_table_types: Optional[List[str]] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'skip_infer_table_types' }}) + skip_infer_table_types: Annotated[Optional[List[str]], FieldMetadata(multipart=True)] = None r"""The document types that you want to skip table extraction with. Default: []""" - split_pdf_allow_failed: Optional[bool] = dataclasses.field(default=False, metadata={'multipart_form': { 'field_name': 'split_pdf_allow_failed' }}) + split_pdf_allow_failed: Annotated[Optional[bool], FieldMetadata(multipart=True)] = False r"""When `split_pdf_page` is set to `True`, this parameter defines the behavior when some of the parallel requests fail. By default `split_pdf_allow_failed` is set to `False` and any failed request send to the API will make the whole process break and raise an Exception. If `split_pdf_allow_failed` is set to `True`, the errors encountered while sending parallel requests will not break the processing - the resuling list of Elements will miss the data from errored pages.""" - split_pdf_concurrency_level: Optional[int] = dataclasses.field(default=5, metadata={'multipart_form': { 'field_name': 'split_pdf_concurrency_level' }}) + split_pdf_concurrency_level: Annotated[Optional[int], FieldMetadata(multipart=True)] = 5 r"""When `split_pdf_page` is set to `True`, this parameter specifies the number of workers used for sending requests when the PDF is split on the client side. It's an internal parameter for the Python client and is not sent to the backend.""" - split_pdf_page: Optional[bool] = dataclasses.field(default=True, metadata={'multipart_form': { 'field_name': 'split_pdf_page' }}) + split_pdf_page: Annotated[Optional[bool], FieldMetadata(multipart=True)] = True r"""This parameter determines if the PDF file should be split on the client side. It's an internal parameter for the Python client and is not sent to the backend.""" - split_pdf_page_range: Optional[List[int]] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'split_pdf_page_range' }}) + split_pdf_page_range: Annotated[Optional[List[int]], FieldMetadata(multipart=True)] = None r"""When `split_pdf_page is set to `True`, this parameter selects a subset of the pdf to send to the API. The parameter is a list of 2 integers within the range [1, length_of_pdf]. A ValueError is thrown if the given range is invalid. It's an internal parameter for the Python client and is not sent to the backend.""" - starting_page_number: Optional[int] = dataclasses.field(default=None, metadata={'multipart_form': { 'field_name': 'starting_page_number' }}) + starting_page_number: Annotated[OptionalNullable[int], FieldMetadata(multipart=True)] = None r"""When PDF is split into pages before sending it into the API, providing this information will allow the page number to be assigned correctly. Introduced in 1.0.27.""" - strategy: Optional[Strategy] = dataclasses.field(default=Strategy.AUTO, metadata={'multipart_form': { 'field_name': 'strategy' }}) + strategy: Annotated[Annotated[Optional[Strategy], PlainValidator(validate_open_enum(False))], FieldMetadata(multipart=True)] = Strategy.AUTO r"""The strategy to use for partitioning PDF/image. Options are fast, hi_res, auto. Default: auto""" - unique_element_ids: Optional[bool] = dataclasses.field(default=False, metadata={'multipart_form': { 'field_name': 'unique_element_ids' }}) + unique_element_ids: Annotated[Optional[bool], FieldMetadata(multipart=True)] = False r"""When `True`, assign UUIDs to element IDs, which guarantees their uniqueness (useful when using them as primary keys in database). Otherwise a SHA-256 of element text is used. Default: `False`""" - xml_keep_tags: Optional[bool] = dataclasses.field(default=False, metadata={'multipart_form': { 'field_name': 'xml_keep_tags' }}) + xml_keep_tags: Annotated[Optional[bool], FieldMetadata(multipart=True)] = False r"""If `True`, will retain the XML tags in the output. Otherwise it will simply extract the text from within the tags. Only applies to XML documents.""" + @model_serializer(mode="wrap") + def serialize_model(self, handler): + optional_fields = ["chunking_strategy", "combine_under_n_chars", "content_type", "coordinates", "encoding", "extract_image_block_types", "gz_uncompressed_content_type", "hi_res_model_name", "include_orig_elements", "include_page_breaks", "languages", "max_characters", "multipage_sections", "new_after_n_chars", "ocr_languages", "output_format", "overlap", "overlap_all", "pdf_infer_table_structure", "similarity_threshold", "skip_infer_table_types", "split_pdf_allow_failed", "split_pdf_concurrency_level", "split_pdf_page", "split_pdf_page_range", "starting_page_number", "strategy", "unique_element_ids", "xml_keep_tags"] + nullable_fields = ["chunking_strategy", "combine_under_n_chars", "content_type", "encoding", "gz_uncompressed_content_type", "hi_res_model_name", "include_orig_elements", "max_characters", "new_after_n_chars", "similarity_threshold", "starting_page_number"] + null_default_fields = ["chunking_strategy", "combine_under_n_chars", "content_type", "encoding", "gz_uncompressed_content_type", "hi_res_model_name", "include_orig_elements", "max_characters", "new_after_n_chars", "similarity_threshold", "starting_page_number"] + + serialized = handler(self) + + m = {} + + for n, f in self.model_fields.items(): + k = f.alias or n + val = serialized.get(k) + + if val is not None and val != UNSET_SENTINEL: + m[k] = val + elif val != UNSET_SENTINEL and ( + not k in optional_fields + or ( + k in optional_fields + and k in nullable_fields + and ( + self.__pydantic_fields_set__.intersection({n}) + or k in null_default_fields + ) # pylint: disable=no-member + ) + ): + m[k] = val + return m + diff --git a/src/unstructured_client/models/shared/security.py b/src/unstructured_client/models/shared/security.py index 21a232a2..cfbd0384 100644 --- a/src/unstructured_client/models/shared/security.py +++ b/src/unstructured_client/models/shared/security.py @@ -1,12 +1,16 @@ -"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" from __future__ import annotations -import dataclasses -from typing import Optional +from typing import Optional, TypedDict +from typing_extensions import Annotated, NotRequired +from unstructured_client.types import BaseModel +from unstructured_client.utils import FieldMetadata, SecurityMetadata -@dataclasses.dataclass -class Security: - api_key_auth: Optional[str] = dataclasses.field(default=None, metadata={'security': { 'scheme': True, 'type': 'apiKey', 'sub_type': 'header', 'field_name': 'unstructured-api-key' }}) +class SecurityTypedDict(TypedDict): + api_key_auth: NotRequired[str] +class Security(BaseModel): + api_key_auth: Annotated[Optional[str], FieldMetadata(security=SecurityMetadata(scheme=True, scheme_type="apiKey", sub_type="header", field_name="unstructured-api-key"))] = None + diff --git a/src/unstructured_client/models/shared/validationerror.py b/src/unstructured_client/models/shared/validationerror.py index a7c35ec2..d5054f84 100644 --- a/src/unstructured_client/models/shared/validationerror.py +++ b/src/unstructured_client/models/shared/validationerror.py @@ -1,19 +1,24 @@ -"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" from __future__ import annotations -import dataclasses -from dataclasses_json import Undefined, dataclass_json -from typing import List, Union -from unstructured_client import utils - - -@dataclass_json(undefined=Undefined.EXCLUDE) -@dataclasses.dataclass -class ValidationError: - loc: List[Loc] = dataclasses.field(metadata={'dataclasses_json': { 'letter_case': utils.get_field_name('loc') }}) - msg: str = dataclasses.field(metadata={'dataclasses_json': { 'letter_case': utils.get_field_name('msg') }}) - type: str = dataclasses.field(metadata={'dataclasses_json': { 'letter_case': utils.get_field_name('type') }}) +from typing import List, TypedDict, Union +from unstructured_client.types import BaseModel + + +class ValidationErrorTypedDict(TypedDict): + loc: List[LocTypedDict] + msg: str + type: str + + +class ValidationError(BaseModel): + loc: List[Loc] + msg: str + type: str +LocTypedDict = Union[str, int] + Loc = Union[str, int] + diff --git a/src/unstructured_client/py.typed b/src/unstructured_client/py.typed new file mode 100644 index 00000000..3e38f1a9 --- /dev/null +++ b/src/unstructured_client/py.typed @@ -0,0 +1 @@ +# Marker file for PEP 561. The package enables type hints. diff --git a/src/unstructured_client/sdk.py b/src/unstructured_client/sdk.py index 91dfbef9..663b0aa8 100644 --- a/src/unstructured_client/sdk.py +++ b/src/unstructured_client/sdk.py @@ -1,48 +1,58 @@ -"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" -import requests as requests_http -from .general import General +from .basesdk import BaseSDK +from .httpclient import AsyncHttpClient, HttpClient from .sdkconfiguration import SDKConfiguration from .utils.retries import RetryConfig -from typing import Callable, Dict, Optional, Union -from unstructured_client import utils +import httpx +from typing import Any, Callable, Dict, Optional, Union from unstructured_client._hooks import SDKHooks +from unstructured_client.general import General from unstructured_client.models import shared +from unstructured_client.types import OptionalNullable, UNSET +import unstructured_client.utils as utils -class UnstructuredClient: +class UnstructuredClient(BaseSDK): general: General - - sdk_configuration: SDKConfiguration - - def __init__(self, - api_key_auth: Union[Optional[str], Callable[[], Optional[str]]] = None, - server: Optional[str] = None, - server_url: Optional[str] = None, - url_params: Optional[Dict[str, str]] = None, - client: Optional[requests_http.Session] = None, - retry_config: Optional[RetryConfig] = None - ) -> None: - """Instantiates the SDK configuring it with the provided parameters. + def __init__( + self, + api_key_auth: Optional[Union[Optional[str], Callable[[], Optional[str]]]] = None, + server: Optional[str] = None, + server_url: Optional[str] = None, + url_params: Optional[Dict[str, str]] = None, + client: Optional[HttpClient] = None, + async_client: Optional[AsyncHttpClient] = None, + retry_config: OptionalNullable[RetryConfig] = UNSET, + timeout_ms: Optional[int] = None + ) -> None: + r"""Instantiates the SDK configuring it with the provided parameters. :param api_key_auth: The api_key_auth required for authentication - :type api_key_auth: Union[Optional[str], Callable[[], Optional[str]]] - :param server: The server by name to use for all operations - :type server: str - :param server_url: The server URL to use for all operations - :type server_url: str + :param server: The server by name to use for all methods + :param server_url: The server URL to use for all methods :param url_params: Parameters to optionally template the server URL with - :type url_params: Dict[str, str] - :param client: The requests.Session HTTP client to use for all operations - :type client: requests_http.Session - :param retry_config: The utils.RetryConfig to use globally - :type retry_config: RetryConfig + :param client: The HTTP client to use for all synchronous methods + :param async_client: The Async HTTP client to use for all asynchronous methods + :param retry_config: The retry configuration to use for all supported methods + :param timeout_ms: Optional request timeout applied to each operation in milliseconds """ if client is None: - client = requests_http.Session() + client = httpx.Client() + + assert issubclass( + type(client), HttpClient + ), "The provided client must implement the HttpClient protocol." + if async_client is None: + async_client = httpx.AsyncClient() + + assert issubclass( + type(async_client), AsyncHttpClient + ), "The provided async_client must implement the AsyncHttpClient protocol." + + security: Any = None if callable(api_key_auth): - def security(): - return shared.Security(api_key_auth = api_key_auth()) + security = lambda: shared.Security(api_key_auth = api_key_auth()) # pylint: disable=unnecessary-lambda-assignment else: security = shared.Security(api_key_auth = api_key_auth) @@ -51,13 +61,15 @@ def security(): server_url = utils.template_url(server_url, url_params) - self.sdk_configuration = SDKConfiguration( - client, - security, - server_url, - server, - retry_config=retry_config - ) + BaseSDK.__init__(self, SDKConfiguration( + client=client, + async_client=async_client, + security=security, + server_url=server_url, + server=server, + retry_config=retry_config, + timeout_ms=timeout_ms + )) hooks = SDKHooks() @@ -67,10 +79,11 @@ def security(): self.sdk_configuration.server_url = server_url # pylint: disable=protected-access - self.sdk_configuration.__dict__['_hooks'] = hooks + self.sdk_configuration.__dict__["_hooks"] = hooks self._init_sdks() def _init_sdks(self): self.general = General(self.sdk_configuration) + diff --git a/src/unstructured_client/sdkconfiguration.py b/src/unstructured_client/sdkconfiguration.py index a1da362f..239f64ab 100644 --- a/src/unstructured_client/sdkconfiguration.py +++ b/src/unstructured_client/sdkconfiguration.py @@ -1,48 +1,51 @@ -"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" -import requests as requests_http from ._hooks import SDKHooks -from .utils import utils -from .utils.retries import RetryConfig +from .httpclient import AsyncHttpClient, HttpClient +from .utils import RetryConfig, remove_suffix from dataclasses import dataclass +from pydantic import Field from typing import Callable, Dict, Optional, Tuple, Union from unstructured_client.models import shared +from unstructured_client.types import OptionalNullable, UNSET -SERVER_SAAS_API = 'saas-api' +SERVER_SAAS_API = "saas-api" r"""Serverless SaaS API""" -SERVER_FREE_API = 'free-api' +SERVER_FREE_API = "free-api" r"""Hosted API Free""" -SERVER_DEVELOPMENT = 'development' +SERVER_DEVELOPMENT = "development" r"""Development server""" SERVERS = { - SERVER_SAAS_API: 'https://api.unstructuredapp.io', - SERVER_FREE_API: 'https://api.unstructured.io', - SERVER_DEVELOPMENT: 'http://localhost:8000', + SERVER_SAAS_API: "https://api.unstructuredapp.io", + SERVER_FREE_API: "https://api.unstructured.io", + SERVER_DEVELOPMENT: "http://localhost:8000", } """Contains the list of servers available to the SDK""" @dataclass class SDKConfiguration: - client: requests_http.Session - security: Union[shared.Security,Callable[[], shared.Security]] = None - server_url: Optional[str] = '' - server: Optional[str] = '' - language: str = 'python' - openapi_doc_version: str = '1.0.44' - sdk_version: str = '0.25.5' - gen_version: str = '2.393.4' - user_agent: str = 'speakeasy-sdk/python 0.25.5 2.393.4 1.0.44 unstructured-client' - retry_config: Optional[RetryConfig] = None + client: HttpClient + async_client: AsyncHttpClient + security: Optional[Union[shared.Security,Callable[[], shared.Security]]] = None + server_url: Optional[str] = "" + server: Optional[str] = "" + language: str = "python" + openapi_doc_version: str = "1.0.44" + sdk_version: str = "0.26.0-beta" + gen_version: str = "2.379.3" + user_agent: str = "speakeasy-sdk/python 0.26.0-beta 2.379.3 1.0.44 unstructured-client" + retry_config: OptionalNullable[RetryConfig] = Field(default_factory=lambda: UNSET) + timeout_ms: Optional[int] = None def __post_init__(self): self._hooks = SDKHooks() def get_server_details(self) -> Tuple[str, Dict[str, str]]: - if self.server_url is not None and self.server_url != '': - return utils.remove_suffix(self.server_url, '/'), {} + if self.server_url is not None and self.server_url: + return remove_suffix(self.server_url, "/"), {} if not self.server: self.server = SERVER_SAAS_API diff --git a/src/unstructured_client/types/__init__.py b/src/unstructured_client/types/__init__.py new file mode 100644 index 00000000..28c55c1f --- /dev/null +++ b/src/unstructured_client/types/__init__.py @@ -0,0 +1,21 @@ +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" + +from .basemodel import ( + BaseModel, + Nullable, + OptionalNullable, + UnrecognizedInt, + UnrecognizedStr, + UNSET, + UNSET_SENTINEL, +) + +__all__ = [ + "BaseModel", + "Nullable", + "OptionalNullable", + "UnrecognizedInt", + "UnrecognizedStr", + "UNSET", + "UNSET_SENTINEL", +] diff --git a/src/unstructured_client/types/basemodel.py b/src/unstructured_client/types/basemodel.py new file mode 100644 index 00000000..b82525a7 --- /dev/null +++ b/src/unstructured_client/types/basemodel.py @@ -0,0 +1,35 @@ +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" + +from pydantic import ConfigDict, model_serializer +from pydantic import BaseModel as PydanticBaseModel +from typing import Literal, Optional, TypeVar, Union, NewType +from typing_extensions import TypeAliasType + + +class BaseModel(PydanticBaseModel): + model_config = ConfigDict( + populate_by_name=True, arbitrary_types_allowed=True, protected_namespaces=() + ) + + +class Unset(BaseModel): + @model_serializer(mode="plain") + def serialize_model(self): + return UNSET_SENTINEL + + def __bool__(self) -> Literal[False]: + return False + + +UNSET = Unset() +UNSET_SENTINEL = "~?~unset~?~sentinel~?~" + + +T = TypeVar("T") +Nullable = TypeAliasType("Nullable", Union[T, None], type_params=(T,)) +OptionalNullable = TypeAliasType( + "OptionalNullable", Union[Optional[Nullable[T]], Unset], type_params=(T,) +) + +UnrecognizedInt = NewType("UnrecognizedInt", int) +UnrecognizedStr = NewType("UnrecognizedStr", str) diff --git a/src/unstructured_client/utils/__init__.py b/src/unstructured_client/utils/__init__.py index cbf886a7..3424e790 100644 --- a/src/unstructured_client/utils/__init__.py +++ b/src/unstructured_client/utils/__init__.py @@ -1,6 +1,76 @@ -"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" -from .retries import * -from .utils import * -from .enums import * +from .annotations import get_discriminator +from .enums import OpenEnumMeta +from .headers import get_headers, get_response_headers +from .metadata import ( + FieldMetadata, + find_metadata, + FormMetadata, + HeaderMetadata, + MultipartFormMetadata, + PathParamMetadata, + QueryParamMetadata, + RequestMetadata, + SecurityMetadata, +) +from .queryparams import get_query_params +from .retries import BackoffStrategy, Retries, retry, retry_async, RetryConfig +from .requestbodies import serialize_request_body, SerializedRequestBody +from .security import get_security +from .serializers import ( + marshal_json, + unmarshal, + unmarshal_json, + serialize_decimal, + serialize_float, + serialize_int, + validate_decimal, + validate_float, + validate_int, + validate_open_enum, +) +from .url import generate_url, template_url, remove_suffix +from .values import get_global_from_env, match_content_type, match_status_codes, match_response +__all__ = [ + "BackoffStrategy", + "FieldMetadata", + "find_metadata", + "FormMetadata", + "generate_url", + "get_discriminator", + "get_global_from_env", + "get_headers", + "get_query_params", + "get_response_headers", + "get_security", + "HeaderMetadata", + "marshal_json", + "match_content_type", + "match_status_codes", + "match_response", + "MultipartFormMetadata", + "OpenEnumMeta", + "PathParamMetadata", + "QueryParamMetadata", + "remove_suffix", + "Retries", + "retry", + "retry_async", + "RetryConfig", + "RequestMetadata", + "SecurityMetadata", + "serialize_decimal", + "serialize_float", + "serialize_int", + "serialize_request_body", + "SerializedRequestBody", + "template_url", + "unmarshal", + "unmarshal_json", + "validate_decimal", + "validate_float", + "validate_int", + "validate_open_enum", +] diff --git a/src/unstructured_client/utils/annotations.py b/src/unstructured_client/utils/annotations.py new file mode 100644 index 00000000..a95637c5 --- /dev/null +++ b/src/unstructured_client/utils/annotations.py @@ -0,0 +1,19 @@ +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" + +from typing import Any + +def get_discriminator(model: Any, fieldname: str, key: str) -> str: + if isinstance(model, dict): + try: + return f'{model.get(key)}' + except AttributeError as e: + raise ValueError(f'Could not find discriminator key {key} in {model}') from e + + if hasattr(model, fieldname): + return f'{getattr(model, fieldname)}' + + fieldname = fieldname.upper() + if hasattr(model, fieldname): + return f'{getattr(model, fieldname)}' + + raise ValueError(f'Could not find discriminator field {fieldname} in {model}') diff --git a/src/unstructured_client/utils/enums.py b/src/unstructured_client/utils/enums.py index 4d3aec17..63a9ad4b 100644 --- a/src/unstructured_client/utils/enums.py +++ b/src/unstructured_client/utils/enums.py @@ -1,17 +1,34 @@ -"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" import enum + class OpenEnumMeta(enum.EnumMeta): - def __call__(cls, value, names=None, *, module=None, qualname=None, type=None, start=1): + def __call__( + cls, value, names=None, *, module=None, qualname=None, type=None, start=1 + ): # The `type` kwarg also happens to be a built-in that pylint flags as # redeclared. Safe to ignore this lint rule with this scope. # pylint: disable=redefined-builtin if names is not None: - return super().__call__(value, names=names, module=module, qualname=qualname, type=type, start=start) + return super().__call__( + value, + names=names, + module=module, + qualname=qualname, + type=type, + start=start, + ) try: - return super().__call__(value, names=names, module=module, qualname=qualname, type=type, start=start) + return super().__call__( + value, + names=names, # pyright: ignore[reportArgumentType] + module=module, + qualname=qualname, + type=type, + start=start, + ) except ValueError: return value diff --git a/src/unstructured_client/utils/eventstreaming.py b/src/unstructured_client/utils/eventstreaming.py new file mode 100644 index 00000000..98dbeb3e --- /dev/null +++ b/src/unstructured_client/utils/eventstreaming.py @@ -0,0 +1,179 @@ +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" + +import re +import json +from typing import Callable, TypeVar, Optional, Generator, AsyncGenerator, Tuple +import httpx + +T = TypeVar("T") + + +class ServerEvent: + id: Optional[str] = None + event: Optional[str] = None + data: Optional[str] = None + retry: Optional[int] = None + + +MESSAGE_BOUNDARIES = [ + b"\r\n\r\n", + b"\n\n", + b"\r\r", +] + + +async def stream_events_async( + response: httpx.Response, + decoder: Callable[[str], T], + sentinel: Optional[str] = None, +) -> AsyncGenerator[T, None]: + buffer = bytearray() + position = 0 + discard = False + async for chunk in response.aiter_bytes(): + # We've encountered the sentinel value and should no longer process + # incoming data. Instead we throw new data away until the server closes + # the connection. + if discard: + continue + + buffer += chunk + for i in range(position, len(buffer)): + char = buffer[i : i + 1] + seq: Optional[bytes] = None + if char in [b"\r", b"\n"]: + for boundary in MESSAGE_BOUNDARIES: + seq = _peek_sequence(i, buffer, boundary) + if seq is not None: + break + if seq is None: + continue + + block = buffer[position:i] + position = i + len(seq) + event, discard = _parse_event(block, decoder, sentinel) + if event is not None: + yield event + + if position > 0: + buffer = buffer[position:] + position = 0 + + event, discard = _parse_event(buffer, decoder, sentinel) + if event is not None: + yield event + + +def stream_events( + response: httpx.Response, + decoder: Callable[[str], T], + sentinel: Optional[str] = None, +) -> Generator[T, None, None]: + buffer = bytearray() + position = 0 + discard = False + for chunk in response.iter_bytes(): + # We've encountered the sentinel value and should no longer process + # incoming data. Instead we throw new data away until the server closes + # the connection. + if discard: + continue + + buffer += chunk + for i in range(position, len(buffer)): + char = buffer[i : i + 1] + seq: Optional[bytes] = None + if char in [b"\r", b"\n"]: + for boundary in MESSAGE_BOUNDARIES: + seq = _peek_sequence(i, buffer, boundary) + if seq is not None: + break + if seq is None: + continue + + block = buffer[position:i] + position = i + len(seq) + event, discard = _parse_event(block, decoder, sentinel) + if event is not None: + yield event + + if position > 0: + buffer = buffer[position:] + position = 0 + + event, discard = _parse_event(buffer, decoder, sentinel) + if event is not None: + yield event + + +def _parse_event( + raw: bytearray, decoder: Callable[[str], T], sentinel: Optional[str] = None +) -> Tuple[Optional[T], bool]: + block = raw.decode() + lines = re.split(r"\r?\n|\r", block) + publish = False + event = ServerEvent() + data = "" + for line in lines: + if not line: + continue + + delim = line.find(":") + if delim <= 0: + continue + + field = line[0:delim] + value = line[delim + 1 :] if delim < len(line) - 1 else "" + if len(value) and value[0] == " ": + value = value[1:] + + if field == "event": + event.event = value + publish = True + elif field == "data": + data += value + "\n" + publish = True + elif field == "id": + event.id = value + publish = True + elif field == "retry": + event.retry = int(value) if value.isdigit() else None + publish = True + + if sentinel and data == f"{sentinel}\n": + return None, True + + if data: + data = data[:-1] + event.data = data + + if ( + data.isnumeric() + or data == "true" + or data == "false" + or data == "null" + or data.startswith("{") + or data.startswith("[") + or data.startswith('"') + ): + try: + event.data = json.loads(data) + except Exception: + pass + + out = None + if publish: + out = decoder(json.dumps(event.__dict__)) + + return out, False + + +def _peek_sequence(position: int, buffer: bytearray, sequence: bytes): + if len(sequence) > (len(buffer) - position): + return None + + for i, seq in enumerate(sequence): + if buffer[position + i] != seq: + return None + + return sequence diff --git a/src/unstructured_client/utils/forms.py b/src/unstructured_client/utils/forms.py new file mode 100644 index 00000000..686b74f2 --- /dev/null +++ b/src/unstructured_client/utils/forms.py @@ -0,0 +1,207 @@ +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" + +from typing import ( + Any, + Dict, + get_type_hints, + List, + Tuple, +) +from pydantic import BaseModel +from pydantic.fields import FieldInfo + +from .serializers import marshal_json + +from .metadata import ( + FormMetadata, + MultipartFormMetadata, + find_field_metadata, +) +from .values import _val_to_string + + +def _populate_form( + field_name: str, + explode: bool, + obj: Any, + delimiter: str, + form: Dict[str, List[str]], +): + if obj is None: + return form + + if isinstance(obj, BaseModel): + items = [] + + obj_fields: Dict[str, FieldInfo] = obj.__class__.model_fields + for name in obj_fields: + obj_field = obj_fields[name] + obj_field_name = obj_field.alias if obj_field.alias is not None else name + if obj_field_name == "": + continue + + val = getattr(obj, name) + if val is None: + continue + + if explode: + form[obj_field_name] = [_val_to_string(val)] + else: + items.append(f"{obj_field_name}{delimiter}{_val_to_string(val)}") + + if len(items) > 0: + form[field_name] = [delimiter.join(items)] + elif isinstance(obj, Dict): + items = [] + for key, value in obj.items(): + if value is None: + continue + + if explode: + form[key] = [_val_to_string(value)] + else: + items.append(f"{key}{delimiter}{_val_to_string(value)}") + + if len(items) > 0: + form[field_name] = [delimiter.join(items)] + elif isinstance(obj, List): + items = [] + + for value in obj: + if value is None: + continue + + if explode: + if not field_name in form: + form[field_name] = [] + form[field_name].append(_val_to_string(value)) + else: + items.append(_val_to_string(value)) + + if len(items) > 0: + form[field_name] = [delimiter.join([str(item) for item in items])] + else: + form[field_name] = [_val_to_string(obj)] + + return form + + +def serialize_multipart_form( + media_type: str, request: Any +) -> Tuple[str, Dict[str, Any], Dict[str, Any]]: + form: Dict[str, Any] = {} + files: Dict[str, Any] = {} + + if not isinstance(request, BaseModel): + raise TypeError("invalid request body type") + + request_fields: Dict[str, FieldInfo] = request.__class__.model_fields + request_field_types = get_type_hints(request.__class__) + + for name in request_fields: + field = request_fields[name] + + val = getattr(request, name) + if val is None: + continue + + field_metadata = find_field_metadata(field, MultipartFormMetadata) + if not field_metadata: + continue + + f_name = field.alias if field.alias is not None else name + + if field_metadata.file: + file_fields: Dict[str, FieldInfo] = val.__class__.model_fields + + file_name = "" + field_name = "" + content = None + content_type = None + + for file_field_name in file_fields: + file_field = file_fields[file_field_name] + + file_metadata = find_field_metadata(file_field, MultipartFormMetadata) + if file_metadata is None: + continue + + if file_metadata.content: + content = getattr(val, file_field_name, None) + elif file_field_name == "content_type": + content_type = getattr(val, file_field_name, None) + else: + field_name = ( + file_field.alias + if file_field.alias is not None + else file_field_name + ) + file_name = getattr(val, file_field_name) + + if field_name == "" or file_name == "" or content is None: + raise ValueError("invalid multipart/form-data file") + + if content_type is not None: + files[field_name] = (file_name, content, content_type) + else: + files[field_name] = (file_name, content) + elif field_metadata.json: + files[f_name] = ( + None, + marshal_json(val, request_field_types[name]), + "application/json", + ) + else: + if isinstance(val, List): + values = [] + + for value in val: + if value is None: + continue + values.append(_val_to_string(value)) + + form[f_name + "[]"] = values + else: + form[f_name] = _val_to_string(val) + return media_type, form, files + + +def serialize_form_data(data: Any) -> Dict[str, Any]: + form: Dict[str, List[str]] = {} + + if isinstance(data, BaseModel): + data_fields: Dict[str, FieldInfo] = data.__class__.model_fields + data_field_types = get_type_hints(data.__class__) + for name in data_fields: + field = data_fields[name] + + val = getattr(data, name) + if val is None: + continue + + metadata = find_field_metadata(field, FormMetadata) + if metadata is None: + continue + + f_name = field.alias if field.alias is not None else name + + if metadata.json: + form[f_name] = [marshal_json(val, data_field_types[name])] + else: + if metadata.style == "form": + _populate_form( + f_name, + metadata.explode, + val, + ",", + form, + ) + else: + raise ValueError(f"Invalid form style for field {name}") + elif isinstance(data, Dict): + for key, value in data.items(): + form[key] = [_val_to_string(value)] + else: + raise TypeError(f"Invalid request body type {type(data)} for form data") + + return form diff --git a/src/unstructured_client/utils/headers.py b/src/unstructured_client/utils/headers.py new file mode 100644 index 00000000..483f0bbe --- /dev/null +++ b/src/unstructured_client/utils/headers.py @@ -0,0 +1,136 @@ +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" + +from typing import ( + Any, + Dict, + List, + Optional, +) +from httpx import Headers +from pydantic import BaseModel +from pydantic.fields import FieldInfo + +from .metadata import ( + HeaderMetadata, + find_field_metadata, +) + +from .values import _populate_from_globals, _val_to_string + + +def get_headers(headers_params: Any, gbls: Optional[Any] = None) -> Dict[str, str]: + headers: Dict[str, str] = {} + + globals_already_populated = [] + if headers_params is not None: + globals_already_populated = _populate_headers(headers_params, gbls, headers, []) + if gbls is not None: + _populate_headers(gbls, None, headers, globals_already_populated) + + return headers + + +def _populate_headers( + headers_params: Any, + gbls: Any, + header_values: Dict[str, str], + skip_fields: List[str], +) -> List[str]: + globals_already_populated: List[str] = [] + + if not isinstance(headers_params, BaseModel): + return globals_already_populated + + param_fields: Dict[str, FieldInfo] = headers_params.__class__.model_fields + for name in param_fields: + if name in skip_fields: + continue + + field = param_fields[name] + f_name = field.alias if field.alias is not None else name + + metadata = find_field_metadata(field, HeaderMetadata) + if metadata is None: + continue + + value, global_found = _populate_from_globals( + name, getattr(headers_params, name), HeaderMetadata, gbls + ) + if global_found: + globals_already_populated.append(name) + value = _serialize_header(metadata.explode, value) + + if value != "": + header_values[f_name] = value + + return globals_already_populated + + +def _serialize_header(explode: bool, obj: Any) -> str: + if obj is None: + return "" + + if isinstance(obj, BaseModel): + items = [] + obj_fields: Dict[str, FieldInfo] = obj.__class__.model_fields + for name in obj_fields: + obj_field = obj_fields[name] + obj_param_metadata = find_field_metadata(obj_field, HeaderMetadata) + + if not obj_param_metadata: + continue + + f_name = obj_field.alias if obj_field.alias is not None else name + + val = getattr(obj, name) + if val is None: + continue + + if explode: + items.append(f"{f_name}={_val_to_string(val)}") + else: + items.append(f_name) + items.append(_val_to_string(val)) + + if len(items) > 0: + return ",".join(items) + elif isinstance(obj, Dict): + items = [] + + for key, value in obj.items(): + if value is None: + continue + + if explode: + items.append(f"{key}={_val_to_string(value)}") + else: + items.append(key) + items.append(_val_to_string(value)) + + if len(items) > 0: + return ",".join([str(item) for item in items]) + elif isinstance(obj, List): + items = [] + + for value in obj: + if value is None: + continue + + items.append(_val_to_string(value)) + + if len(items) > 0: + return ",".join(items) + else: + return f"{_val_to_string(obj)}" + + return "" + + +def get_response_headers(headers: Headers) -> Dict[str, List[str]]: + res: Dict[str, List[str]] = {} + for k, v in headers.items(): + if not k in res: + res[k] = [] + + res[k].append(v) + return res diff --git a/src/unstructured_client/utils/metadata.py b/src/unstructured_client/utils/metadata.py new file mode 100644 index 00000000..404d21e9 --- /dev/null +++ b/src/unstructured_client/utils/metadata.py @@ -0,0 +1,118 @@ +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" + +from typing import Optional, Type, TypeVar, Union +from dataclasses import dataclass +from pydantic.fields import FieldInfo + + +T = TypeVar("T") + + +@dataclass +class SecurityMetadata: + option: bool = False + scheme: bool = False + scheme_type: Optional[str] = None + sub_type: Optional[str] = None + field_name: Optional[str] = None + + def get_field_name(self, default: str) -> str: + return self.field_name or default + + +@dataclass +class ParamMetadata: + serialization: Optional[str] = None + style: str = "simple" + explode: bool = False + + +@dataclass +class PathParamMetadata(ParamMetadata): + pass + + +@dataclass +class QueryParamMetadata(ParamMetadata): + style: str = "form" + explode: bool = True + + +@dataclass +class HeaderMetadata(ParamMetadata): + pass + + +@dataclass +class RequestMetadata: + media_type: str = "application/octet-stream" + + +@dataclass +class MultipartFormMetadata: + file: bool = False + content: bool = False + json: bool = False + + +@dataclass +class FormMetadata: + json: bool = False + style: str = "form" + explode: bool = True + + +class FieldMetadata: + security: Optional[SecurityMetadata] = None + path: Optional[PathParamMetadata] = None + query: Optional[QueryParamMetadata] = None + header: Optional[HeaderMetadata] = None + request: Optional[RequestMetadata] = None + form: Optional[FormMetadata] = None + multipart: Optional[MultipartFormMetadata] = None + + def __init__( + self, + security: Optional[SecurityMetadata] = None, + path: Optional[Union[PathParamMetadata, bool]] = None, + query: Optional[Union[QueryParamMetadata, bool]] = None, + header: Optional[Union[HeaderMetadata, bool]] = None, + request: Optional[Union[RequestMetadata, bool]] = None, + form: Optional[Union[FormMetadata, bool]] = None, + multipart: Optional[Union[MultipartFormMetadata, bool]] = None, + ): + self.security = security + self.path = PathParamMetadata() if isinstance(path, bool) else path + self.query = QueryParamMetadata() if isinstance(query, bool) else query + self.header = HeaderMetadata() if isinstance(header, bool) else header + self.request = RequestMetadata() if isinstance(request, bool) else request + self.form = FormMetadata() if isinstance(form, bool) else form + self.multipart = ( + MultipartFormMetadata() if isinstance(multipart, bool) else multipart + ) + + +def find_field_metadata(field_info: FieldInfo, metadata_type: Type[T]) -> Optional[T]: + metadata = find_metadata(field_info, FieldMetadata) + if not metadata: + return None + + fields = metadata.__dict__ + + for field in fields: + if isinstance(fields[field], metadata_type): + return fields[field] + + return None + + +def find_metadata(field_info: FieldInfo, metadata_type: Type[T]) -> Optional[T]: + metadata = field_info.metadata + if not metadata: + return None + + for md in metadata: + if isinstance(md, metadata_type): + return md + + return None diff --git a/src/unstructured_client/utils/queryparams.py b/src/unstructured_client/utils/queryparams.py new file mode 100644 index 00000000..130b31e2 --- /dev/null +++ b/src/unstructured_client/utils/queryparams.py @@ -0,0 +1,203 @@ +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" + +from typing import ( + Any, + Dict, + get_type_hints, + List, + Optional, +) + +from pydantic import BaseModel +from pydantic.fields import FieldInfo + +from .metadata import ( + QueryParamMetadata, + find_field_metadata, +) +from .values import _get_serialized_params, _populate_from_globals, _val_to_string +from .forms import _populate_form + + +def get_query_params( + query_params: Any, + gbls: Optional[Any] = None, +) -> Dict[str, List[str]]: + params: Dict[str, List[str]] = {} + + globals_already_populated = _populate_query_params(query_params, gbls, params, []) + if gbls is not None: + _populate_query_params(gbls, None, params, globals_already_populated) + + return params + + +def _populate_query_params( + query_params: Any, + gbls: Any, + query_param_values: Dict[str, List[str]], + skip_fields: List[str], +) -> List[str]: + globals_already_populated: List[str] = [] + + if not isinstance(query_params, BaseModel): + return globals_already_populated + + param_fields: Dict[str, FieldInfo] = query_params.__class__.model_fields + param_field_types = get_type_hints(query_params.__class__) + for name in param_fields: + if name in skip_fields: + continue + + field = param_fields[name] + + metadata = find_field_metadata(field, QueryParamMetadata) + if not metadata: + continue + + value = getattr(query_params, name) if query_params is not None else None + + value, global_found = _populate_from_globals( + name, value, QueryParamMetadata, gbls + ) + if global_found: + globals_already_populated.append(name) + + f_name = field.alias if field.alias is not None else name + serialization = metadata.serialization + if serialization is not None: + serialized_parms = _get_serialized_params( + metadata, f_name, value, param_field_types[name] + ) + for key, value in serialized_parms.items(): + if key in query_param_values: + query_param_values[key].extend(value) + else: + query_param_values[key] = [value] + else: + style = metadata.style + if style == "deepObject": + _populate_deep_object_query_params(f_name, value, query_param_values) + elif style == "form": + _populate_delimited_query_params( + metadata, f_name, value, ",", query_param_values + ) + elif style == "pipeDelimited": + _populate_delimited_query_params( + metadata, f_name, value, "|", query_param_values + ) + else: + raise NotImplementedError( + f"query param style {style} not yet supported" + ) + + return globals_already_populated + + +def _populate_deep_object_query_params( + field_name: str, + obj: Any, + params: Dict[str, List[str]], +): + if obj is None: + return + + if isinstance(obj, BaseModel): + _populate_deep_object_query_params_basemodel(field_name, obj, params) + elif isinstance(obj, Dict): + _populate_deep_object_query_params_dict(field_name, obj, params) + + +def _populate_deep_object_query_params_basemodel( + prior_params_key: str, + obj: Any, + params: Dict[str, List[str]], +): + if obj is None: + return + + if not isinstance(obj, BaseModel): + return + + obj_fields: Dict[str, FieldInfo] = obj.__class__.model_fields + for name in obj_fields: + obj_field = obj_fields[name] + + f_name = obj_field.alias if obj_field.alias is not None else name + + params_key = f"{prior_params_key}[{f_name}]" + + obj_param_metadata = find_field_metadata(obj_field, QueryParamMetadata) + if obj_param_metadata is None: + continue + + obj_val = getattr(obj, name) + if obj_val is None: + continue + + if isinstance(obj_val, BaseModel): + _populate_deep_object_query_params_basemodel(params_key, obj_val, params) + elif isinstance(obj_val, Dict): + _populate_deep_object_query_params_dict(params_key, obj_val, params) + elif isinstance(obj_val, List): + _populate_deep_object_query_params_list(params_key, obj_val, params) + else: + params[params_key] = [_val_to_string(obj_val)] + + +def _populate_deep_object_query_params_dict( + prior_params_key: str, + value: Dict, + params: Dict[str, List[str]], +): + if value is None: + return + + for key, val in value.items(): + if val is None: + continue + + params_key = f"{prior_params_key}[{key}]" + + if isinstance(val, BaseModel): + _populate_deep_object_query_params_basemodel(params_key, val, params) + elif isinstance(val, Dict): + _populate_deep_object_query_params_dict(params_key, val, params) + elif isinstance(val, List): + _populate_deep_object_query_params_list(params_key, val, params) + else: + params[params_key] = [_val_to_string(val)] + + +def _populate_deep_object_query_params_list( + params_key: str, + value: List, + params: Dict[str, List[str]], +): + if value is None: + return + + for val in value: + if val is None: + continue + + if params.get(params_key) is None: + params[params_key] = [] + + params[params_key].append(_val_to_string(val)) + + +def _populate_delimited_query_params( + metadata: QueryParamMetadata, + field_name: str, + obj: Any, + delimiter: str, + query_param_values: Dict[str, List[str]], +): + _populate_form( + field_name, + metadata.explode, + obj, + delimiter, + query_param_values, + ) diff --git a/src/unstructured_client/utils/requestbodies.py b/src/unstructured_client/utils/requestbodies.py new file mode 100644 index 00000000..0090892f --- /dev/null +++ b/src/unstructured_client/utils/requestbodies.py @@ -0,0 +1,66 @@ +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" + +import io +from dataclasses import dataclass +import re +from typing import ( + Any, + Optional, +) + +from .forms import serialize_form_data, serialize_multipart_form + +from .serializers import marshal_json + +SERIALIZATION_METHOD_TO_CONTENT_TYPE = { + "json": "application/json", + "form": "application/x-www-form-urlencoded", + "multipart": "multipart/form-data", + "raw": "application/octet-stream", + "string": "text/plain", +} + + +@dataclass +class SerializedRequestBody: + media_type: str + content: Optional[Any] = None + data: Optional[Any] = None + files: Optional[Any] = None + + +def serialize_request_body( + request_body: Any, + nullable: bool, + optional: bool, + serialization_method: str, + request_body_type, +) -> Optional[SerializedRequestBody]: + if request_body is None: + if not nullable and optional: + return None + + media_type = SERIALIZATION_METHOD_TO_CONTENT_TYPE[serialization_method] + + serialized_request_body = SerializedRequestBody(media_type) + + if re.match(r"(application|text)\/.*?\+*json.*", media_type) is not None: + serialized_request_body.content = marshal_json(request_body, request_body_type) + elif re.match(r"multipart\/.*", media_type) is not None: + ( + serialized_request_body.media_type, + serialized_request_body.data, + serialized_request_body.files, + ) = serialize_multipart_form(media_type, request_body) + elif re.match(r"application\/x-www-form-urlencoded.*", media_type) is not None: + serialized_request_body.data = serialize_form_data(request_body) + elif isinstance(request_body, (bytes, bytearray, io.BytesIO, io.BufferedReader)): + serialized_request_body.content = request_body + elif isinstance(request_body, str): + serialized_request_body.content = request_body + else: + raise TypeError( + f"invalid request body type {type(request_body)} for mediaType {media_type}" + ) + + return serialized_request_body diff --git a/src/unstructured_client/utils/retries.py b/src/unstructured_client/utils/retries.py index 5edae538..03a59b03 100644 --- a/src/unstructured_client/utils/retries.py +++ b/src/unstructured_client/utils/retries.py @@ -1,10 +1,10 @@ -"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" import random import time from typing import List -import requests +import httpx class BackoffStrategy: @@ -13,7 +13,13 @@ class BackoffStrategy: exponent: float max_elapsed_time: int - def __init__(self, initial_interval: int, max_interval: int, exponent: float, max_elapsed_time: int): + def __init__( + self, + initial_interval: int, + max_interval: int, + exponent: float, + max_elapsed_time: int, + ): self.initial_interval = initial_interval self.max_interval = max_interval self.exponent = exponent @@ -25,7 +31,9 @@ class RetryConfig: backoff: BackoffStrategy retry_connection_errors: bool - def __init__(self, strategy: str, backoff: BackoffStrategy, retry_connection_errors: bool): + def __init__( + self, strategy: str, backoff: BackoffStrategy, retry_connection_errors: bool + ): self.strategy = strategy self.backoff = backoff self.retry_connection_errors = retry_connection_errors @@ -41,9 +49,9 @@ def __init__(self, config: RetryConfig, status_codes: List[str]): class TemporaryError(Exception): - response: requests.Response + response: httpx.Response - def __init__(self, response: requests.Response): + def __init__(self, response: httpx.Response): self.response = response @@ -55,9 +63,10 @@ def __init__(self, inner: Exception): def retry(func, retries: Retries): - if retries.config.strategy == 'backoff': - def do_request(): - res: requests.Response + if retries.config.strategy == "backoff": + + def do_request() -> httpx.Response: + res: httpx.Response try: res = func() @@ -74,12 +83,12 @@ def do_request(): if res.status_code == parsed_code: raise TemporaryError(res) - except requests.exceptions.ConnectionError as exception: + except httpx.ConnectError as exception: if retries.config.retry_connection_errors: raise raise PermanentError(exception) from exception - except requests.exceptions.Timeout as exception: + except httpx.TimeoutException as exception: if retries.config.retry_connection_errors: raise @@ -91,13 +100,74 @@ def do_request(): return res - return retry_with_backoff(do_request, retries.config.backoff.initial_interval, retries.config.backoff.max_interval, retries.config.backoff.exponent, retries.config.backoff.max_elapsed_time) + return retry_with_backoff( + do_request, + retries.config.backoff.initial_interval, + retries.config.backoff.max_interval, + retries.config.backoff.exponent, + retries.config.backoff.max_elapsed_time, + ) return func() -def retry_with_backoff(func, initial_interval=500, max_interval=60000, exponent=1.5, max_elapsed_time=3600000): - start = round(time.time()*1000) +async def retry_async(func, retries: Retries): + if retries.config.strategy == "backoff": + + async def do_request() -> httpx.Response: + res: httpx.Response + try: + res = await func() + + for code in retries.status_codes: + if "X" in code.upper(): + code_range = int(code[0]) + + status_major = res.status_code / 100 + + if status_major >= code_range and status_major < code_range + 1: + raise TemporaryError(res) + else: + parsed_code = int(code) + + if res.status_code == parsed_code: + raise TemporaryError(res) + except httpx.ConnectError as exception: + if retries.config.retry_connection_errors: + raise + + raise PermanentError(exception) from exception + except httpx.TimeoutException as exception: + if retries.config.retry_connection_errors: + raise + + raise PermanentError(exception) from exception + except TemporaryError: + raise + except Exception as exception: + raise PermanentError(exception) from exception + + return res + + return await retry_with_backoff_async( + do_request, + retries.config.backoff.initial_interval, + retries.config.backoff.max_interval, + retries.config.backoff.exponent, + retries.config.backoff.max_elapsed_time, + ) + + return await func() + + +def retry_with_backoff( + func, + initial_interval=500, + max_interval=60000, + exponent=1.5, + max_elapsed_time=3600000, +): + start = round(time.time() * 1000) retries = 0 while True: @@ -106,14 +176,41 @@ def retry_with_backoff(func, initial_interval=500, max_interval=60000, exponent= except PermanentError as exception: raise exception.inner except Exception as exception: # pylint: disable=broad-exception-caught - now = round(time.time()*1000) + now = round(time.time() * 1000) + if now - start > max_elapsed_time: + if isinstance(exception, TemporaryError): + return exception.response + + raise + sleep = (initial_interval / 1000) * exponent**retries + random.uniform(0, 1) + sleep = min(sleep, max_interval / 1000) + time.sleep(sleep) + retries += 1 + + +async def retry_with_backoff_async( + func, + initial_interval=500, + max_interval=60000, + exponent=1.5, + max_elapsed_time=3600000, +): + start = round(time.time() * 1000) + retries = 0 + + while True: + try: + return await func() + except PermanentError as exception: + raise exception.inner + except Exception as exception: # pylint: disable=broad-exception-caught + now = round(time.time() * 1000) if now - start > max_elapsed_time: if isinstance(exception, TemporaryError): return exception.response raise - sleep = ((initial_interval/1000) * - exponent**retries + random.uniform(0, 1)) + sleep = (initial_interval / 1000) * exponent**retries + random.uniform(0, 1) sleep = min(sleep, max_interval / 1000) time.sleep(sleep) retries += 1 diff --git a/src/unstructured_client/utils/security.py b/src/unstructured_client/utils/security.py new file mode 100644 index 00000000..0e89cb8b --- /dev/null +++ b/src/unstructured_client/utils/security.py @@ -0,0 +1,168 @@ +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" + +import base64 +from typing import ( + Any, + Dict, + List, + Tuple, +) +from pydantic import BaseModel +from pydantic.fields import FieldInfo + +from .metadata import ( + SecurityMetadata, + find_field_metadata, +) + + + +def get_security(security: Any) -> Tuple[Dict[str, str], Dict[str, List[str]]]: + headers: Dict[str, str] = {} + query_params: Dict[str, List[str]] = {} + + if security is None: + return headers, query_params + + if not isinstance(security, BaseModel): + raise TypeError("security must be a pydantic model") + + sec_fields: Dict[str, FieldInfo] = security.__class__.model_fields + for name in sec_fields: + sec_field = sec_fields[name] + + value = getattr(security, name) + if value is None: + continue + + metadata = find_field_metadata(sec_field, SecurityMetadata) + if metadata is None: + continue + if metadata.option: + _parse_security_option(headers, query_params, value) + return headers, query_params + if metadata.scheme: + # Special case for basic auth which could be a flattened model + if metadata.sub_type == "basic" and not isinstance(value, BaseModel): + _parse_security_scheme(headers, query_params, metadata, name, security) + else: + _parse_security_scheme(headers, query_params, metadata, name, value) + + return headers, query_params + + +def _parse_security_option( + headers: Dict[str, str], query_params: Dict[str, List[str]], option: Any +): + if not isinstance(option, BaseModel): + raise TypeError("security option must be a pydantic model") + + opt_fields: Dict[str, FieldInfo] = option.__class__.model_fields + for name in opt_fields: + opt_field = opt_fields[name] + + metadata = find_field_metadata(opt_field, SecurityMetadata) + if metadata is None or not metadata.scheme: + continue + _parse_security_scheme( + headers, query_params, metadata, name, getattr(option, name) + ) + + +def _parse_security_scheme( + headers: Dict[str, str], + query_params: Dict[str, List[str]], + scheme_metadata: SecurityMetadata, + field_name: str, + scheme: Any, +): + scheme_type = scheme_metadata.scheme_type + sub_type = scheme_metadata.sub_type + + if isinstance(scheme, BaseModel): + if scheme_type == "http" and sub_type == "basic": + _parse_basic_auth_scheme(headers, scheme) + return + + scheme_fields: Dict[str, FieldInfo] = scheme.__class__.model_fields + for name in scheme_fields: + scheme_field = scheme_fields[name] + + metadata = find_field_metadata(scheme_field, SecurityMetadata) + if metadata is None or metadata.field_name is None: + continue + + value = getattr(scheme, name) + + _parse_security_scheme_value( + headers, query_params, scheme_metadata, metadata, name, value + ) + else: + _parse_security_scheme_value( + headers, query_params, scheme_metadata, scheme_metadata, field_name, scheme + ) + + +def _parse_security_scheme_value( + headers: Dict[str, str], + query_params: Dict[str, List[str]], + scheme_metadata: SecurityMetadata, + security_metadata: SecurityMetadata, + field_name: str, + value: Any, +): + scheme_type = scheme_metadata.scheme_type + sub_type = scheme_metadata.sub_type + + header_name = security_metadata.get_field_name(field_name) + + if scheme_type == "apiKey": + if sub_type == "header": + headers[header_name] = value + elif sub_type == "query": + query_params[header_name] = [value] + else: + raise ValueError("sub type {sub_type} not supported") + elif scheme_type == "openIdConnect": + headers[header_name] = _apply_bearer(value) + elif scheme_type == "oauth2": + if sub_type != "client_credentials": + headers[header_name] = _apply_bearer(value) + elif scheme_type == "http": + if sub_type == "bearer": + headers[header_name] = _apply_bearer(value) + else: + raise ValueError("sub type {sub_type} not supported") + else: + raise ValueError("scheme type {scheme_type} not supported") + + +def _apply_bearer(token: str) -> str: + return token.lower().startswith("bearer ") and token or f"Bearer {token}" + + +def _parse_basic_auth_scheme(headers: Dict[str, str], scheme: Any): + username = "" + password = "" + + if not isinstance(scheme, BaseModel): + raise TypeError("basic auth scheme must be a pydantic model") + + scheme_fields: Dict[str, FieldInfo] = scheme.__class__.model_fields + for name in scheme_fields: + scheme_field = scheme_fields[name] + + metadata = find_field_metadata(scheme_field, SecurityMetadata) + if metadata is None or metadata.field_name is None: + continue + + field_name = metadata.field_name + value = getattr(scheme, name) + + if field_name == "username": + username = value + if field_name == "password": + password = value + + data = f"{username}:{password}".encode() + headers["Authorization"] = f"Basic {base64.b64encode(data).decode()}" diff --git a/src/unstructured_client/utils/serializers.py b/src/unstructured_client/utils/serializers.py new file mode 100644 index 00000000..3d427ad2 --- /dev/null +++ b/src/unstructured_client/utils/serializers.py @@ -0,0 +1,158 @@ +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" + +from decimal import Decimal +import json +from typing import Any, Union, get_args +from typing_extensions import get_origin +from pydantic import ConfigDict, create_model +from pydantic_core import from_json +from typing_inspect import is_optional_type + +from ..types.basemodel import Nullable, OptionalNullable + + +def serialize_decimal(as_str: bool): + def serialize(d): + if is_optional_type(type(d)) and d is None: + return None + + if not isinstance(d, Decimal): + raise ValueError("Expected Decimal object") + + return str(d) if as_str else float(d) + + return serialize + + +def validate_decimal(d): + if d is None: + return None + + if isinstance(d, Decimal): + return d + + if not isinstance(d, (str, int, float)): + raise ValueError("Expected string, int or float") + + return Decimal(str(d)) + + +def serialize_float(as_str: bool): + def serialize(f): + if is_optional_type(type(f)) and f is None: + return None + + if not isinstance(f, float): + raise ValueError("Expected float") + + return str(f) if as_str else f + + return serialize + + +def validate_float(f): + if f is None: + return None + + if isinstance(f, float): + return f + + if not isinstance(f, str): + raise ValueError("Expected string") + + return float(f) + + +def serialize_int(as_str: bool): + def serialize(b): + if is_optional_type(type(b)) and b is None: + return None + + if not isinstance(b, int): + raise ValueError("Expected int") + + return str(b) if as_str else b + + return serialize + + +def validate_int(b): + if b is None: + return None + + if isinstance(b, int): + return b + + if not isinstance(b, str): + raise ValueError("Expected string") + + return int(b) + + +def validate_open_enum(is_int: bool): + def validate(e): + if e is None: + return None + + if is_int: + if not isinstance(e, int): + raise ValueError("Expected int") + else: + if not isinstance(e, str): + raise ValueError("Expected string") + + return e + + return validate + + +def unmarshal_json(raw, typ: Any) -> Any: + return unmarshal(from_json(raw), typ) + + +def unmarshal(val, typ: Any) -> Any: + unmarshaller = create_model( + "Unmarshaller", + body=(typ, ...), + __config__=ConfigDict(populate_by_name=True, arbitrary_types_allowed=True), + ) + + m = unmarshaller(body=val) + + # pyright: ignore[reportAttributeAccessIssue] + return m.body # type: ignore + + +def marshal_json(val, typ): + if is_nullable(typ) and val is None: + return "null" + + marshaller = create_model( + "Marshaller", + body=(typ, ...), + __config__=ConfigDict(populate_by_name=True, arbitrary_types_allowed=True), + ) + + m = marshaller(body=val) + + d = m.model_dump(by_alias=True, mode="json", exclude_none=True) + + if len(d) == 0: + return "" + + return json.dumps(d[next(iter(d))], separators=(",", ":"), sort_keys=True) + + +def is_nullable(field): + origin = get_origin(field) + if origin is Nullable or origin is OptionalNullable: + return True + + if not origin is Union or type(None) not in get_args(field): + return False + + for arg in get_args(field): + if get_origin(arg) is Nullable or get_origin(arg) is OptionalNullable: + return True + + return False diff --git a/src/unstructured_client/utils/url.py b/src/unstructured_client/utils/url.py new file mode 100644 index 00000000..abcdb02d --- /dev/null +++ b/src/unstructured_client/utils/url.py @@ -0,0 +1,150 @@ +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" + +from decimal import Decimal +from typing import ( + Any, + Dict, + get_type_hints, + List, + Optional, + Union, + get_args, + get_origin, +) +from pydantic import BaseModel +from pydantic.fields import FieldInfo + +from .metadata import ( + PathParamMetadata, + find_field_metadata, +) +from .values import _get_serialized_params, _populate_from_globals, _val_to_string + + +def generate_url( + server_url: str, + path: str, + path_params: Any, + gbls: Optional[Any] = None, +) -> str: + path_param_values: Dict[str, str] = {} + + globals_already_populated = _populate_path_params( + path_params, gbls, path_param_values, [] + ) + if gbls is not None: + _populate_path_params(gbls, None, path_param_values, globals_already_populated) + + for key, value in path_param_values.items(): + path = path.replace("{" + key + "}", value, 1) + + return remove_suffix(server_url, "/") + path + + +def _populate_path_params( + path_params: Any, + gbls: Any, + path_param_values: Dict[str, str], + skip_fields: List[str], +) -> List[str]: + globals_already_populated: List[str] = [] + + if not isinstance(path_params, BaseModel): + return globals_already_populated + + path_param_fields: Dict[str, FieldInfo] = path_params.__class__.model_fields + path_param_field_types = get_type_hints(path_params.__class__) + for name in path_param_fields: + if name in skip_fields: + continue + + field = path_param_fields[name] + + param_metadata = find_field_metadata(field, PathParamMetadata) + if param_metadata is None: + continue + + param = getattr(path_params, name) if path_params is not None else None + param, global_found = _populate_from_globals( + name, param, PathParamMetadata, gbls + ) + if global_found: + globals_already_populated.append(name) + + if param is None: + continue + + f_name = field.alias if field.alias is not None else name + serialization = param_metadata.serialization + if serialization is not None: + serialized_params = _get_serialized_params( + param_metadata, f_name, param, path_param_field_types[name] + ) + for key, value in serialized_params.items(): + path_param_values[key] = value + else: + pp_vals: List[str] = [] + if param_metadata.style == "simple": + if isinstance(param, List): + for pp_val in param: + if pp_val is None: + continue + pp_vals.append(_val_to_string(pp_val)) + path_param_values[f_name] = ",".join(pp_vals) + elif isinstance(param, Dict): + for pp_key in param: + if param[pp_key] is None: + continue + if param_metadata.explode: + pp_vals.append(f"{pp_key}={_val_to_string(param[pp_key])}") + else: + pp_vals.append(f"{pp_key},{_val_to_string(param[pp_key])}") + path_param_values[f_name] = ",".join(pp_vals) + elif not isinstance(param, (str, int, float, complex, bool, Decimal)): + param_fields: Dict[str, FieldInfo] = param.__class__.model_fields + for name in param_fields: + param_field = param_fields[name] + + param_value_metadata = find_field_metadata( + param_field, PathParamMetadata + ) + if param_value_metadata is None: + continue + + param_name = ( + param_field.alias if param_field.alias is not None else name + ) + + param_field_val = getattr(param, name) + if param_field_val is None: + continue + if param_metadata.explode: + pp_vals.append( + f"{param_name}={_val_to_string(param_field_val)}" + ) + else: + pp_vals.append( + f"{param_name},{_val_to_string(param_field_val)}" + ) + path_param_values[f_name] = ",".join(pp_vals) + else: + path_param_values[f_name] = _val_to_string(param) + + return globals_already_populated + + +def is_optional(field): + return get_origin(field) is Union and type(None) in get_args(field) + + +def template_url(url_with_params: str, params: Dict[str, str]) -> str: + for key, value in params.items(): + url_with_params = url_with_params.replace("{" + key + "}", value) + + return url_with_params + + +def remove_suffix(input_string, suffix): + if suffix and input_string.endswith(suffix): + return input_string[: -len(suffix)] + return input_string diff --git a/src/unstructured_client/utils/utils.py b/src/unstructured_client/utils/utils.py deleted file mode 100644 index f21a65d9..00000000 --- a/src/unstructured_client/utils/utils.py +++ /dev/null @@ -1,1116 +0,0 @@ -"""Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" - -import base64 -import json -import re -import sys -from dataclasses import Field, fields, is_dataclass, make_dataclass -from datetime import date, datetime -from decimal import Decimal -from email.message import Message -from enum import Enum -from typing import ( - Any, - Callable, - Dict, - List, - Optional, - Tuple, - Union, - get_args, - get_origin, -) -from xmlrpc.client import boolean -from typing_inspect import is_optional_type -import dateutil.parser -from dataclasses_json import DataClassJsonMixin - - -def get_security(security: Any) -> Tuple[Dict[str, str], Dict[str, str]]: - headers: Dict[str, str] = {} - query_params: Dict[str, str] = {} - - if security is None: - return headers, query_params - - sec_fields: Tuple[Field, ...] = fields(security) - for sec_field in sec_fields: - value = getattr(security, sec_field.name) - if value is None: - continue - - metadata = sec_field.metadata.get("security") - if metadata is None: - continue - if metadata.get("option"): - _parse_security_option(headers, query_params, value) - return headers, query_params - if metadata.get("scheme"): - # Special case for basic auth which could be a flattened struct - if metadata.get("sub_type") == "basic" and not is_dataclass(value): - _parse_security_scheme(headers, query_params, metadata, security) - else: - _parse_security_scheme(headers, query_params, metadata, value) - - return headers, query_params - - -def _parse_security_option( - headers: Dict[str, str], query_params: Dict[str, str], option: Any -): - opt_fields: Tuple[Field, ...] = fields(option) - for opt_field in opt_fields: - metadata = opt_field.metadata.get("security") - if metadata is None or metadata.get("scheme") is None: - continue - _parse_security_scheme( - headers, query_params, metadata, getattr(option, opt_field.name) - ) - - -def _parse_security_scheme( - headers: Dict[str, str], - query_params: Dict[str, str], - scheme_metadata: Dict, - scheme: Any, -): - scheme_type = scheme_metadata.get("type") - sub_type = scheme_metadata.get("sub_type") - - if is_dataclass(scheme): - if scheme_type == "http" and sub_type == "basic": - _parse_basic_auth_scheme(headers, scheme) - return - - scheme_fields: Tuple[Field, ...] = fields(scheme) - for scheme_field in scheme_fields: - metadata = scheme_field.metadata.get("security") - if metadata is None or metadata.get("field_name") is None: - continue - - value = getattr(scheme, scheme_field.name) - - _parse_security_scheme_value( - headers, query_params, scheme_metadata, metadata, value - ) - else: - _parse_security_scheme_value( - headers, query_params, scheme_metadata, scheme_metadata, scheme - ) - - -def _parse_security_scheme_value( - headers: Dict[str, str], - query_params: Dict[str, str], - scheme_metadata: Dict, - security_metadata: Dict, - value: Any, -): - scheme_type = scheme_metadata.get("type") - sub_type = scheme_metadata.get("sub_type") - - header_name = str(security_metadata.get("field_name")) - - if scheme_type == "apiKey": - if sub_type == "header": - headers[header_name] = value - elif sub_type == "query": - query_params[header_name] = value - else: - raise Exception("not supported") - elif scheme_type == "openIdConnect": - headers[header_name] = _apply_bearer(value) - elif scheme_type == "oauth2": - if sub_type != "client_credentials": - headers[header_name] = _apply_bearer(value) - elif scheme_type == "http": - if sub_type == "bearer": - headers[header_name] = _apply_bearer(value) - else: - raise Exception("not supported") - else: - raise Exception("not supported") - - -def _apply_bearer(token: str) -> str: - return token.lower().startswith("bearer ") and token or f"Bearer {token}" - - -def _parse_basic_auth_scheme(headers: Dict[str, str], scheme: Any): - username = "" - password = "" - - scheme_fields: Tuple[Field, ...] = fields(scheme) - for scheme_field in scheme_fields: - metadata = scheme_field.metadata.get("security") - if metadata is None or metadata.get("field_name") is None: - continue - - field_name = metadata.get("field_name") - value = getattr(scheme, scheme_field.name) - - if field_name == "username": - username = value - if field_name == "password": - password = value - - data = f"{username}:{password}".encode() - headers["Authorization"] = f"Basic {base64.b64encode(data).decode()}" - - -def generate_url( - server_url: str, - path: str, - path_params: Any, - gbls: Optional[Any] = None, -) -> str: - path_param_values: Dict[str, str] = {} - - globals_already_populated = _populate_path_params( - path_params, gbls, path_param_values, [] - ) - if gbls is not None: - _populate_path_params(gbls, None, path_param_values, globals_already_populated) - - for key, value in path_param_values.items(): - path = path.replace("{" + key + "}", value, 1) - - return remove_suffix(server_url, "/") + path - - -def _populate_path_params( - path_params: Any, - gbls: Any, - path_param_values: Dict[str, str], - skip_fields: List[str], -) -> List[str]: - globals_already_populated: List[str] = [] - - path_param_fields: Tuple[Field, ...] = fields(path_params) - for field in path_param_fields: - if field.name in skip_fields: - continue - - param_metadata = field.metadata.get("path_param") - if param_metadata is None: - continue - - param = getattr(path_params, field.name) if path_params is not None else None - param, global_found = _populate_from_globals( - field.name, param, "path_param", gbls - ) - if global_found: - globals_already_populated.append(field.name) - - if param is None: - continue - - f_name = param_metadata.get("field_name", field.name) - serialization = param_metadata.get("serialization", "") - if serialization != "": - serialized_params = _get_serialized_params( - param_metadata, field.type, f_name, param - ) - for key, value in serialized_params.items(): - path_param_values[key] = value - else: - if param_metadata.get("style", "simple") == "simple": - if isinstance(param, List): - pp_vals: List[str] = [] - for pp_val in param: - if pp_val is None: - continue - pp_vals.append(_val_to_string(pp_val)) - path_param_values[param_metadata.get("field_name", field.name)] = ( - ",".join(pp_vals) - ) - elif isinstance(param, Dict): - pp_vals: List[str] = [] - for pp_key in param: - if param[pp_key] is None: - continue - if param_metadata.get("explode"): - pp_vals.append(f"{pp_key}={_val_to_string(param[pp_key])}") - else: - pp_vals.append(f"{pp_key},{_val_to_string(param[pp_key])}") - path_param_values[param_metadata.get("field_name", field.name)] = ( - ",".join(pp_vals) - ) - elif not isinstance(param, (str, int, float, complex, bool, Decimal)): - pp_vals: List[str] = [] - param_fields: Tuple[Field, ...] = fields(param) - for param_field in param_fields: - param_value_metadata = param_field.metadata.get("path_param") - if not param_value_metadata: - continue - - param_name = param_value_metadata.get("field_name", field.name) - - param_field_val = getattr(param, param_field.name) - if param_field_val is None: - continue - if param_metadata.get("explode"): - pp_vals.append( - f"{param_name}={_val_to_string(param_field_val)}" - ) - else: - pp_vals.append( - f"{param_name},{_val_to_string(param_field_val)}" - ) - path_param_values[param_metadata.get("field_name", field.name)] = ( - ",".join(pp_vals) - ) - else: - path_param_values[param_metadata.get("field_name", field.name)] = ( - _val_to_string(param) - ) - - return globals_already_populated - - -def is_optional(field): - return get_origin(field) is Union and type(None) in get_args(field) - - -def template_url(url_with_params: str, params: Dict[str, str]) -> str: - for key, value in params.items(): - url_with_params = url_with_params.replace("{" + key + "}", value) - - return url_with_params - - -def get_query_params( - query_params: Any, - gbls: Optional[Any] = None, -) -> Dict[str, List[str]]: - params: Dict[str, List[str]] = {} - - globals_already_populated = _populate_query_params(query_params, gbls, params, []) - if gbls is not None: - _populate_query_params(gbls, None, params, globals_already_populated) - - return params - - -def _populate_query_params( - query_params: Any, - gbls: Any, - query_param_values: Dict[str, List[str]], - skip_fields: List[str], -) -> List[str]: - globals_already_populated: List[str] = [] - - param_fields: Tuple[Field, ...] = fields(query_params) - for field in param_fields: - if field.name in skip_fields: - continue - - metadata = field.metadata.get("query_param") - if not metadata: - continue - - param_name = field.name - value = getattr(query_params, param_name) if query_params is not None else None - - value, global_found = _populate_from_globals( - param_name, value, "query_param", gbls - ) - if global_found: - globals_already_populated.append(param_name) - - f_name = metadata.get("field_name") - serialization = metadata.get("serialization", "") - if serialization != "": - serialized_parms = _get_serialized_params( - metadata, field.type, f_name, value - ) - for key, value in serialized_parms.items(): - if key in query_param_values: - query_param_values[key].extend(value) - else: - query_param_values[key] = [value] - else: - style = metadata.get("style", "form") - if style == "deepObject": - _populate_deep_object_query_params( - metadata, f_name, value, query_param_values - ) - elif style == "form": - _populate_delimited_query_params( - metadata, f_name, value, ",", query_param_values - ) - elif style == "pipeDelimited": - _populate_delimited_query_params( - metadata, f_name, value, "|", query_param_values - ) - else: - raise Exception("not yet implemented") - - return globals_already_populated - - -def get_headers(headers_params: Any, gbls: Optional[Any] = None) -> Dict[str, str]: - headers: Dict[str, str] = {} - - globals_already_populated = [] - if headers_params is not None: - globals_already_populated = _populate_headers(headers_params, gbls, headers, []) - if gbls is not None: - _populate_headers(gbls, None, headers, globals_already_populated) - - return headers - - -def _populate_headers( - headers_params: Any, - gbls: Any, - header_values: Dict[str, str], - skip_fields: List[str], -) -> List[str]: - globals_already_populated: List[str] = [] - - param_fields: Tuple[Field, ...] = fields(headers_params) - for field in param_fields: - if field.name in skip_fields: - continue - - metadata = field.metadata.get("header") - if not metadata: - continue - - value, global_found = _populate_from_globals( - field.name, getattr(headers_params, field.name), "header", gbls - ) - if global_found: - globals_already_populated.append(field.name) - value = _serialize_header(metadata.get("explode", False), value) - - if value != "": - header_values[metadata.get("field_name", field.name)] = value - - return globals_already_populated - - -def _get_serialized_params( - metadata: Dict, field_type: type, field_name: str, obj: Any -) -> Dict[str, str]: - params: Dict[str, str] = {} - - serialization = metadata.get("serialization", "") - if serialization == "json": - params[metadata.get("field_name", field_name)] = marshal_json(obj, field_type) - - return params - - -def _populate_deep_object_query_params( - metadata: Dict, field_name: str, obj: Any, params: Dict[str, List[str]] -): - if obj is None: - return - - if is_dataclass(obj): - _populate_deep_object_query_params_dataclass(metadata.get("field_name", field_name), obj, params) - elif isinstance(obj, Dict): - _populate_deep_object_query_params_dict(metadata.get("field_name", field_name), obj, params) - - -def _populate_deep_object_query_params_dataclass( - prior_params_key: str, obj: Any, params: Dict[str, List[str]] -): - if obj is None: - return - - if not is_dataclass(obj): - return - - obj_fields: Tuple[Field, ...] = fields(obj) - for obj_field in obj_fields: - obj_param_metadata = obj_field.metadata.get("query_param") - if not obj_param_metadata: - continue - - obj_val = getattr(obj, obj_field.name) - if obj_val is None: - continue - - params_key = f'{prior_params_key}[{obj_param_metadata.get("field_name", obj_field.name)}]' - - if is_dataclass(obj_val): - _populate_deep_object_query_params_dataclass(params_key, obj_val, params) - elif isinstance(obj_val, Dict): - _populate_deep_object_query_params_dict(params_key, obj_val, params) - elif isinstance(obj_val, List): - _populate_deep_object_query_params_list(params_key, obj_val, params) - else: - params[params_key] = [_val_to_string(obj_val)] - - -def _populate_deep_object_query_params_dict( - prior_params_key: str, value: Dict, params: Dict[str, List[str]] -): - if value is None: - return - - for key, val in value.items(): - if val is None: - continue - - params_key = f'{prior_params_key}[{key}]' - - if is_dataclass(val): - _populate_deep_object_query_params_dataclass(params_key, val, params) - elif isinstance(val, Dict): - _populate_deep_object_query_params_dict(params_key, val, params) - elif isinstance(val, List): - _populate_deep_object_query_params_list(params_key, val, params) - else: - params[params_key] = [_val_to_string(val)] - - -def _populate_deep_object_query_params_list( - params_key: str, value: List, params: Dict[str, List[str]] -): - if value is None: - return - - for val in value: - if val is None: - continue - - if params.get(params_key) is None: - params[params_key] = [] - - params[params_key].append(_val_to_string(val)) - - -def _get_query_param_field_name(obj_field: Field) -> str: - obj_param_metadata = obj_field.metadata.get("query_param") - - if not obj_param_metadata: - return "" - - return obj_param_metadata.get("field_name", obj_field.name) - - -def _populate_delimited_query_params( - metadata: Dict, - field_name: str, - obj: Any, - delimiter: str, - query_param_values: Dict[str, List[str]], -): - _populate_form( - field_name, - metadata.get("explode", True), - obj, - _get_query_param_field_name, - delimiter, - query_param_values, - ) - - -SERIALIZATION_METHOD_TO_CONTENT_TYPE = { - "json": "application/json", - "form": "application/x-www-form-urlencoded", - "multipart": "multipart/form-data", - "raw": "application/octet-stream", - "string": "text/plain", -} - - -def serialize_request_body( - request: Any, - request_type: type, - request_field_name: str, - nullable: bool, - optional: bool, - serialization_method: str, - encoder=None, -) -> Tuple[Optional[str], Optional[Any], Optional[Any]]: - if request is None: - if not nullable and optional: - return None, None, None - - if not is_dataclass(request) or not hasattr(request, request_field_name): - return serialize_content_type( - request_field_name, - request_type, - SERIALIZATION_METHOD_TO_CONTENT_TYPE[serialization_method], - request, - encoder, - ) - - request_val = getattr(request, request_field_name) - - if request_val is None: - if not nullable and optional: - return None, None, None - - request_fields: Tuple[Field, ...] = fields(request) - request_metadata = None - - for field in request_fields: - if field.name == request_field_name: - request_metadata = field.metadata.get("request") - break - - if request_metadata is None: - raise Exception("invalid request type") - - return serialize_content_type( - request_field_name, - request_type, - request_metadata.get("media_type", "application/octet-stream"), - request_val, - ) - - -def serialize_content_type( - field_name: str, request_type: Any, media_type: str, request: Any, encoder=None -) -> Tuple[Optional[str], Optional[Any], Optional[List[List[Any]]]]: - if re.match(r"(application|text)\/.*?\+*json.*", media_type) is not None: - return media_type, marshal_json(request, request_type, encoder), None - if re.match(r"multipart\/.*", media_type) is not None: - return serialize_multipart_form(media_type, request) - if re.match(r"application\/x-www-form-urlencoded.*", media_type) is not None: - return media_type, serialize_form_data(field_name, request), None - if isinstance(request, (bytes, bytearray)): - return media_type, request, None - if isinstance(request, str): - return media_type, request, None - - raise Exception( - f"invalid request body type {type(request)} for mediaType {media_type}" - ) - - -def serialize_multipart_form( - media_type: str, request: Any -) -> Tuple[str, Any, List[List[Any]]]: - form: List[List[Any]] = [] - request_fields = fields(request) - - for field in request_fields: - val = getattr(request, field.name) - if val is None: - continue - - field_metadata = field.metadata.get("multipart_form") - if not field_metadata: - continue - - if field_metadata.get("file") is True: - file_fields = fields(val) - - file_name = "" - field_name = "" - content = bytes() - - for file_field in file_fields: - file_metadata = file_field.metadata.get("multipart_form") - if file_metadata is None: - continue - - if file_metadata.get("content") is True: - content = getattr(val, file_field.name) - else: - field_name = file_metadata.get("field_name", file_field.name) - file_name = getattr(val, file_field.name) - if field_name == "" or file_name == "" or content == bytes(): - raise Exception("invalid multipart/form-data file") - - form.append([field_name, [file_name, content]]) - elif field_metadata.get("json") is True: - to_append = [ - field_metadata.get("field_name", field.name), - [None, marshal_json(val, field.type), "application/json"], - ] - form.append(to_append) - else: - field_name = field_metadata.get("field_name", field.name) - if isinstance(val, List): - for value in val: - if value is None: - continue - form.append([field_name + "[]", [None, _val_to_string(value)]]) - else: - form.append([field_name, [None, _val_to_string(val)]]) - return media_type, None, form - - -def serialize_dict( - original: Dict, explode: bool, field_name, existing: Optional[Dict[str, List[str]]] -) -> Dict[str, List[str]]: - if existing is None: - existing = {} - - if explode is True: - for key, val in original.items(): - if key not in existing: - existing[key] = [] - existing[key].append(val) - else: - temp = [] - for key, val in original.items(): - temp.append(str(key)) - temp.append(str(val)) - if field_name not in existing: - existing[field_name] = [] - existing[field_name].append(",".join(temp)) - return existing - - -def serialize_form_data(field_name: str, data: Any) -> Dict[str, Any]: - form: Dict[str, List[str]] = {} - - if is_dataclass(data): - for field in fields(data): - val = getattr(data, field.name) - if val is None: - continue - - metadata = field.metadata.get("form") - if metadata is None: - continue - - field_name = metadata.get("field_name", field.name) - - if metadata.get("json"): - form[field_name] = [marshal_json(val, field.type)] - else: - if metadata.get("style", "form") == "form": - _populate_form( - field_name, - metadata.get("explode", True), - val, - _get_form_field_name, - ",", - form, - ) - else: - raise Exception(f"Invalid form style for field {field.name}") - elif isinstance(data, Dict): - for key, value in data.items(): - form[key] = [_val_to_string(value)] - else: - raise Exception(f"Invalid request body type for field {field_name}") - - return form - - -def _get_form_field_name(obj_field: Field) -> str: - obj_param_metadata = obj_field.metadata.get("form") - - if not obj_param_metadata: - return "" - - return obj_param_metadata.get("field_name", obj_field.name) - - -def _populate_form( - field_name: str, - explode: boolean, - obj: Any, - get_field_name_func: Callable, - delimiter: str, - form: Dict[str, List[str]], -): - if obj is None: - return form - - if is_dataclass(obj): - items = [] - - obj_fields: Tuple[Field, ...] = fields(obj) - for obj_field in obj_fields: - obj_field_name = get_field_name_func(obj_field) - if obj_field_name == "": - continue - - val = getattr(obj, obj_field.name) - if val is None: - continue - - if explode: - form[obj_field_name] = [_val_to_string(val)] - else: - items.append(f"{obj_field_name}{delimiter}{_val_to_string(val)}") - - if len(items) > 0: - form[field_name] = [delimiter.join(items)] - elif isinstance(obj, Dict): - items = [] - for key, value in obj.items(): - if value is None: - continue - - if explode: - form[key] = [_val_to_string(value)] - else: - items.append(f"{key}{delimiter}{_val_to_string(value)}") - - if len(items) > 0: - form[field_name] = [delimiter.join(items)] - elif isinstance(obj, List): - items = [] - - for value in obj: - if value is None: - continue - - if explode: - if not field_name in form: - form[field_name] = [] - form[field_name].append(_val_to_string(value)) - else: - items.append(_val_to_string(value)) - - if len(items) > 0: - form[field_name] = [delimiter.join([str(item) for item in items])] - else: - form[field_name] = [_val_to_string(obj)] - - return form - - -def _serialize_header(explode: bool, obj: Any) -> str: - if obj is None: - return "" - - if is_dataclass(obj): - items = [] - obj_fields: Tuple[Field, ...] = fields(obj) - for obj_field in obj_fields: - obj_param_metadata = obj_field.metadata.get("header") - - if not obj_param_metadata: - continue - - obj_field_name = obj_param_metadata.get("field_name", obj_field.name) - if obj_field_name == "": - continue - - val = getattr(obj, obj_field.name) - if val is None: - continue - - if explode: - items.append(f"{obj_field_name}={_val_to_string(val)}") - else: - items.append(obj_field_name) - items.append(_val_to_string(val)) - - if len(items) > 0: - return ",".join(items) - elif isinstance(obj, Dict): - items = [] - - for key, value in obj.items(): - if value is None: - continue - - if explode: - items.append(f"{key}={_val_to_string(value)}") - else: - items.append(key) - items.append(_val_to_string(value)) - - if len(items) > 0: - return ",".join([str(item) for item in items]) - elif isinstance(obj, List): - items = [] - - for value in obj: - if value is None: - continue - - items.append(_val_to_string(value)) - - if len(items) > 0: - return ",".join(items) - else: - return f"{_val_to_string(obj)}" - - return "" - - -def unmarshal_json(data, typ, decoder=None, infer_missing=False): - unmarshal = make_dataclass("Unmarshal", [("res", typ)], bases=(DataClassJsonMixin,)) - json_dict = json.loads(data) - try: - out = unmarshal.from_dict({"res": json_dict}, infer_missing=infer_missing) - except AttributeError as attr_err: - raise AttributeError( - f"unable to unmarshal {data} as {typ} - {attr_err}" - ) from attr_err - - return out.res if decoder is None else decoder(out.res) - - -def marshal_json(val, typ, encoder=None): - if not is_optional_type(typ) and val is None: - raise ValueError(f"Could not marshal None into non-optional type: {typ}") - - marshal = make_dataclass("Marshal", [("res", typ)], bases=(DataClassJsonMixin,)) - marshaller = marshal(res=val) - json_dict = marshaller.to_dict() - val = json_dict["res"] if encoder is None else encoder(json_dict["res"]) - - return json.dumps(val, separators=(",", ":"), sort_keys=True) - - -def match_content_type(content_type: str, pattern: str) -> boolean: - if pattern in (content_type, "*", "*/*"): - return True - - msg = Message() - msg["content-type"] = content_type - media_type = msg.get_content_type() - - if media_type == pattern: - return True - - parts = media_type.split("/") - if len(parts) == 2: - if pattern in (f"{parts[0]}/*", f"*/{parts[1]}"): - return True - - return False - - -def match_status_codes(status_codes: List[str], status_code: int) -> bool: - for code in status_codes: - if code == str(status_code): - return True - - if code.endswith("XX") and code.startswith(str(status_code)[:1]): - return True - return False - - -def datetimeisoformat(optional: bool): - def isoformatoptional(val): - if optional and val is None: - return None - return _val_to_string(val) - - return isoformatoptional - - -def dateisoformat(optional: bool): - def isoformatoptional(val): - if optional and val is None: - return None - return date.isoformat(val) - - return isoformatoptional - - -def datefromisoformat(date_str: str): - return dateutil.parser.parse(date_str).date() - - -def bigintencoder(optional: bool): - def bigintencode(val: int): - if optional and val is None: - return None - return str(val) - - return bigintencode - - -def bigintdecoder(val): - if val is None: - return None - - if isinstance(val, float): - raise ValueError(f"{val} is a float") - return int(val) - -def integerstrencoder(optional: bool): - def integerstrencode(val: int): - if optional and val is None: - return None - return str(val) - - return integerstrencode - - -def integerstrdecoder(val): - if val is None: - return None - - if isinstance(val, float): - raise ValueError(f"{val} is a float") - return int(val) - - -def numberstrencoder(optional: bool): - def numberstrencode(val: float): - if optional and val is None: - return None - return str(val) - - return numberstrencode - - -def numberstrdecoder(val): - if val is None: - return None - - return float(val) - - -def decimalencoder(optional: bool, as_str: bool): - def decimalencode(val: Decimal): - if optional and val is None: - return None - - if as_str: - return str(val) - - return float(val) - - return decimalencode - - -def decimaldecoder(val): - if val is None: - return None - - return Decimal(str(val)) - - -def map_encoder(optional: bool, value_encoder: Callable): - def map_encode(val: Dict): - if optional and val is None: - return None - - encoded = {} - for key, value in val.items(): - encoded[key] = value_encoder(value) - - return encoded - - return map_encode - - -def map_decoder(value_decoder: Callable): - def map_decode(val: Dict): - decoded = {} - for key, value in val.items(): - decoded[key] = value_decoder(value) - - return decoded - - return map_decode - - -def list_encoder(optional: bool, value_encoder: Callable): - def list_encode(val: List): - if optional and val is None: - return None - - encoded = [] - for value in val: - encoded.append(value_encoder(value)) - - return encoded - - return list_encode - - -def list_decoder(value_decoder: Callable): - def list_decode(val: List): - decoded = [] - for value in val: - decoded.append(value_decoder(value)) - - return decoded - - return list_decode - - -def union_encoder(all_encoders: Dict[str, Callable]): - def selective_encoder(val: Any): - if type(val) in all_encoders: - return all_encoders[type(val)](val) - return val - - return selective_encoder - - -def union_decoder(all_decoders: List[Callable]): - def selective_decoder(val: Any): - decoded = val - for decoder in all_decoders: - try: - decoded = decoder(val) - break - except (TypeError, ValueError): - continue - return decoded - - return selective_decoder - - -def get_field_name(name): - def override(_, _field_name=name): - return _field_name - - return override - - -def _val_to_string(val) -> str: - if isinstance(val, bool): - return str(val).lower() - if isinstance(val, datetime): - return str(val.isoformat().replace("+00:00", "Z")) - if isinstance(val, Enum): - return str(val.value) - - return str(val) - - -def _populate_from_globals( - param_name: str, value: Any, param_type: str, gbls: Any -) -> Tuple[Any, bool]: - if gbls is None: - return value, False - - global_fields = fields(gbls) - - found = False - for field in global_fields: - if field.name is not param_name: - continue - - found = True - - if value is not None: - return value, True - - global_value = getattr(gbls, field.name) - - param_metadata = field.metadata.get(param_type) - if param_metadata is None: - return value, True - - return global_value, True - - return value, found - - -def decoder_with_discriminator(field_name): - def decode_fx(obj): - kls = getattr(sys.modules["sdk.models.shared"], obj[field_name]) - return unmarshal_json(json.dumps(obj), kls) - - return decode_fx - - -def remove_suffix(input_string, suffix): - if suffix and input_string.endswith(suffix): - return input_string[: -len(suffix)] - return input_string diff --git a/src/unstructured_client/utils/values.py b/src/unstructured_client/utils/values.py new file mode 100644 index 00000000..db883b41 --- /dev/null +++ b/src/unstructured_client/utils/values.py @@ -0,0 +1,128 @@ +"""Code generated by Speakeasy (https://speakeasyapi.com). DO NOT EDIT.""" + +from datetime import datetime +from enum import Enum +from email.message import Message +import os +from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar, Union + +from httpx import Response +from pydantic import BaseModel +from pydantic.fields import FieldInfo + +from .serializers import marshal_json + +from .metadata import ParamMetadata, find_field_metadata + + +def match_content_type(content_type: str, pattern: str) -> bool: + if pattern in (content_type, "*", "*/*"): + return True + + msg = Message() + msg["content-type"] = content_type + media_type = msg.get_content_type() + + if media_type == pattern: + return True + + parts = media_type.split("/") + if len(parts) == 2: + if pattern in (f"{parts[0]}/*", f"*/{parts[1]}"): + return True + + return False + + +def match_status_codes(status_codes: List[str], status_code: int) -> bool: + if "default" in status_codes: + return True + + for code in status_codes: + if code == str(status_code): + return True + + if code.endswith("XX") and code.startswith(str(status_code)[:1]): + return True + return False + + +T = TypeVar("T") + + +def get_global_from_env( + value: Optional[T], env_key: str, type_cast: Callable[[str], T] +) -> Optional[T]: + if value is not None: + return value + env_value = os.getenv(env_key) + if env_value is not None: + try: + return type_cast(env_value) + except ValueError: + pass + return None + + +def match_response( + response: Response, code: Union[str, List[str]], content_type: str +) -> bool: + codes = code if isinstance(code, list) else [code] + return match_status_codes(codes, response.status_code) and match_content_type( + response.headers.get("content-type", "application/octet-stream"), content_type + ) + + +def _populate_from_globals( + param_name: str, value: Any, param_metadata_type: type, gbls: Any +) -> Tuple[Any, bool]: + if gbls is None: + return value, False + + if not isinstance(gbls, BaseModel): + raise TypeError("globals must be a pydantic model") + + global_fields: Dict[str, FieldInfo] = gbls.__class__.model_fields + found = False + for name in global_fields: + field = global_fields[name] + if name is not param_name: + continue + + found = True + + if value is not None: + return value, True + + global_value = getattr(gbls, name) + + param_metadata = find_field_metadata(field, param_metadata_type) + if param_metadata is None: + return value, True + + return global_value, True + + return value, found + + +def _val_to_string(val) -> str: + if isinstance(val, bool): + return str(val).lower() + if isinstance(val, datetime): + return str(val.isoformat().replace("+00:00", "Z")) + if isinstance(val, Enum): + return str(val.value) + + return str(val) + + +def _get_serialized_params( + metadata: ParamMetadata, field_name: str, obj: Any, typ: type +) -> Dict[str, str]: + params: Dict[str, str] = {} + + serialization = metadata.serialization + if serialization == "json": + params[field_name] = marshal_json(obj, typ) + + return params