diff --git a/.codegen.json b/.codegen.json index 2d2c90e4e..f98f968ab 100644 --- a/.codegen.json +++ b/.codegen.json @@ -2,7 +2,7 @@ "mode": "py_v0", "api_changelog": true, "version": { - "databricks/sdk/version.py": "__version__ = '$VERSION'" + "databricks/sdk/version.py": "__version__ = \"$VERSION\"" }, "toolchain": { "required": ["python3"], diff --git a/.codegen/_openapi_sha b/.codegen/_openapi_sha index baefa0efc..12fb465ab 100644 --- a/.codegen/_openapi_sha +++ b/.codegen/_openapi_sha @@ -1 +1 @@ -bdd8536d26484460f450b1d17722c01c5a6a50a9 \ No newline at end of file +94dc3e7289a19a90b167adf27316bd703a86f0eb \ No newline at end of file diff --git a/.release_metadata.json b/.release_metadata.json index a8b18a761..5202186bf 100644 --- a/.release_metadata.json +++ b/.release_metadata.json @@ -1,3 +1,3 @@ { - "timestamp": "2025-03-06 11:35:45+0000" + "timestamp": "2025-03-21 07:12:02+0000" } \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index bc307bbdf..4d07a4f7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,61 @@ # Version changelog +## Release v0.47.0 + +### Bug Fixes + + * Ensure that refresh tokens are returned when using the `external-browser` credentials strategy. + +### API Changes +* Added `abfss`, `dbfs`, `error_message`, `execution_duration_seconds`, `file`, `gcs`, `s3`, `status`, `volumes` and `workspace` fields for `databricks.sdk.service.compute.InitScriptInfoAndExecutionDetails`. +* [Breaking] Added `forecast_granularity` field for `databricks.sdk.service.ml.CreateForecastingExperimentRequest`. +* Added `jwks_uri` field for `databricks.sdk.service.oauth2.OidcFederationPolicy`. +* Added `fallback_config` field for `databricks.sdk.service.serving.AiGatewayConfig`. +* Added `custom_provider_config` field for `databricks.sdk.service.serving.ExternalModel`. +* Added `fallback_config` field for `databricks.sdk.service.serving.PutAiGatewayRequest`. +* Added `fallback_config` field for `databricks.sdk.service.serving.PutAiGatewayResponse`. +* Added `aliases`, `comment`, `data_type`, `dependency_list`, `full_data_type`, `id`, `input_params`, `name`, `properties`, `routine_definition`, `schema`, `securable_kind`, `share`, `share_id`, `storage_location` and `tags` fields for `databricks.sdk.service.sharing.DeltaSharingFunction`. +* Added `access_token_failure`, `allocation_timeout`, `allocation_timeout_node_daemon_not_ready`, `allocation_timeout_no_healthy_clusters`, `allocation_timeout_no_matched_clusters`, `allocation_timeout_no_ready_clusters`, `allocation_timeout_no_unallocated_clusters`, `allocation_timeout_no_warmed_up_clusters`, `aws_inaccessible_kms_key_failure`, `aws_instance_profile_update_failure`, `aws_invalid_key_pair`, `aws_invalid_kms_key_state`, `aws_resource_quota_exceeded`, `azure_packed_deployment_partial_failure`, `bootstrap_timeout_due_to_misconfig`, `budget_policy_limit_enforcement_activated`, `budget_policy_resolution_failure`, `cloud_account_setup_failure`, `cloud_operation_cancelled`, `cloud_provider_instance_not_launched`, `cloud_provider_launch_failure_due_to_misconfig`, `cloud_provider_resource_stockout_due_to_misconfig`, `cluster_operation_throttled`, `cluster_operation_timeout`, `control_plane_request_failure_due_to_misconfig`, `data_access_config_changed`, `disaster_recovery_replication`, `driver_eviction`, `driver_launch_timeout`, `driver_node_unreachable`, `driver_out_of_disk`, `driver_out_of_memory`, `driver_pod_creation_failure`, `driver_unexpected_failure`, `dynamic_spark_conf_size_exceeded`, `eos_spark_image`, `executor_pod_unscheduled`, `gcp_api_rate_quota_exceeded`, `gcp_forbidden`, `gcp_iam_timeout`, `gcp_inaccessible_kms_key_failure`, `gcp_insufficient_capacity`, `gcp_ip_space_exhausted`, `gcp_kms_key_permission_denied`, `gcp_not_found`, `gcp_resource_quota_exceeded`, `gcp_service_account_access_denied`, `gcp_service_account_not_found`, `gcp_subnet_not_ready`, `gcp_trusted_image_projects_violated`, `gke_based_cluster_termination`, `init_container_not_finished`, `instance_pool_max_capacity_reached`, `instance_pool_not_found`, `instance_unreachable_due_to_misconfig`, `internal_capacity_failure`, `invalid_aws_parameter`, `invalid_instance_placement_protocol`, `invalid_worker_image_failure`, `in_penalty_box`, `lazy_allocation_timeout`, `maintenance_mode`, `netvisor_setup_timeout`, `no_matched_k8s`, `no_matched_k8s_testing_tag`, `pod_assignment_failure`, `pod_scheduling_failure`, `resource_usage_blocked`, `secret_creation_failure`, `serverless_long_running_terminated`, `spark_image_download_throttled`, `spark_image_not_found`, `ssh_bootstrap_failure`, `storage_download_failure_due_to_misconfig`, `storage_download_failure_slow`, `storage_download_failure_throttled`, `unexpected_pod_recreation`, `user_initiated_vm_termination` and `workspace_update` enum values for `databricks.sdk.service.compute.TerminationReasonCode`. +* Added `generated_sql_query_too_long_exception` and `missing_sql_query_exception` enum values for `databricks.sdk.service.dashboards.MessageErrorType`. +* Added `balanced` enum value for `databricks.sdk.service.jobs.PerformanceTarget`. +* Added `listing_resource` enum value for `databricks.sdk.service.marketplace.FileParentType`. +* Added `app` enum value for `databricks.sdk.service.marketplace.MarketplaceFileType`. +* Added `custom` enum value for `databricks.sdk.service.serving.ExternalModelProvider`. +* [Breaking] Changed `create_experiment()` method for [w.forecasting](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/ml/forecasting.html) workspace-level service with new required argument order. +* Changed `instance_type_id` field for `databricks.sdk.service.compute.NodeInstanceType` to be required. +* Changed `category` field for `databricks.sdk.service.compute.NodeType` to be required. +* [Breaking] Changed `functions` field for `databricks.sdk.service.sharing.ListProviderShareAssetsResponse` to type `databricks.sdk.service.sharing.DeltaSharingFunctionList` dataclass. +* [Breaking] Changed waiter for [ClustersAPI.create](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/compute/clusters.html#databricks.sdk.service.compute.ClustersAPI.create) method. +* [Breaking] Changed waiter for [ClustersAPI.delete](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/compute/clusters.html#databricks.sdk.service.compute.ClustersAPI.delete) method. +* [Breaking] Changed waiter for [ClustersAPI.edit](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/compute/clusters.html#databricks.sdk.service.compute.ClustersAPI.edit) method. +* [Breaking] Changed waiter for [ClustersAPI.get](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/compute/clusters.html#databricks.sdk.service.compute.ClustersAPI.get) method. +* [Breaking] Changed waiter for [ClustersAPI.resize](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/compute/clusters.html#databricks.sdk.service.compute.ClustersAPI.resize) method. +* [Breaking] Changed waiter for [ClustersAPI.restart](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/compute/clusters.html#databricks.sdk.service.compute.ClustersAPI.restart) method. +* [Breaking] Changed waiter for [ClustersAPI.start](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/compute/clusters.html#databricks.sdk.service.compute.ClustersAPI.start) method. +* [Breaking] Changed waiter for [ClustersAPI.update](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/compute/clusters.html#databricks.sdk.service.compute.ClustersAPI.update) method. +* [Breaking] Removed `execution_details` and `script` fields for `databricks.sdk.service.compute.InitScriptInfoAndExecutionDetails`. +* [Breaking] Removed `supports_elastic_disk` field for `databricks.sdk.service.compute.NodeType`. +* [Breaking] Removed `data_granularity_quantity` and `data_granularity_unit` fields for `databricks.sdk.service.ml.CreateForecastingExperimentRequest`. +* [Breaking] Removed `aliases`, `comment`, `data_type`, `dependency_list`, `full_data_type`, `id`, `input_params`, `name`, `properties`, `routine_definition`, `schema`, `securable_kind`, `share`, `share_id`, `storage_location` and `tags` fields for `databricks.sdk.service.sharing.Function`. + + +## Release v0.46.0 + +### New Features and Improvements +* [Experimental] Add support for async token refresh ([#916](https://github.com/databricks/databricks-sdk-py/pull/916)). + This can be enabled with by setting the following setting: + ``` + export DATABRICKS_ENABLE_EXPERIMENTAL_ASYNC_TOKEN_REFRESH=1. + ``` + This feature and its setting are experimental and may be removed in future releases. + +### API Changes +* Added [w.forecasting](https://databricks-sdk-py.readthedocs.io/en/latest/workspace/ml/forecasting.html) workspace-level service. +* Added `statement_id` field for `databricks.sdk.service.dashboards.GenieQueryAttachment`. +* Added `could_not_get_model_deployments_exception` enum value for `databricks.sdk.service.dashboards.MessageErrorType`. +* [Breaking] Removed `jwks_uri` field for `databricks.sdk.service.oauth2.OidcFederationPolicy`. + + ## Release v0.45.0 ### New Features and Improvements diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index a835be1fa..395a1a32c 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -1,6 +1,6 @@ # NEXT CHANGELOG -## Release v0.46.0 +## Release v0.48.0 ### New Features and Improvements diff --git a/databricks/sdk/__init__.py b/databricks/sdk/__init__.py index 141e7e44d..806d8c584 100755 --- a/databricks/sdk/__init__.py +++ b/databricks/sdk/__init__.py @@ -8,6 +8,7 @@ import databricks.sdk.service as service from databricks.sdk import azure from databricks.sdk.credentials_provider import CredentialsStrategy +from databricks.sdk.data_plane import DataPlaneTokenSource from databricks.sdk.mixins.compute import ClustersExt from databricks.sdk.mixins.files import DbfsExt, FilesExt from databricks.sdk.mixins.jobs import JobsExt @@ -63,7 +64,8 @@ ProviderExchangeFiltersAPI, ProviderExchangesAPI, ProviderFilesAPI, ProviderListingsAPI, ProviderPersonalizationRequestsAPI, ProviderProviderAnalyticsDashboardsAPI, ProviderProvidersAPI) -from databricks.sdk.service.ml import ExperimentsAPI, ModelRegistryAPI +from databricks.sdk.service.ml import (ExperimentsAPI, ForecastingAPI, + ModelRegistryAPI) from databricks.sdk.service.oauth2 import (AccountFederationPolicyAPI, CustomAppIntegrationAPI, OAuthPublishedAppsAPI, @@ -284,8 +286,11 @@ def __init__( self._secrets = service.workspace.SecretsAPI(self._api_client) self._service_principals = service.iam.ServicePrincipalsAPI(self._api_client) self._serving_endpoints = serving_endpoints + serving_endpoints_data_plane_token_source = DataPlaneTokenSource( + self._config.host, self._config.oauth_token, not self._config.enable_experimental_async_token_refresh + ) self._serving_endpoints_data_plane = service.serving.ServingEndpointsDataPlaneAPI( - self._api_client, serving_endpoints + self._api_client, serving_endpoints, serving_endpoints_data_plane_token_source ) self._settings = service.settings.SettingsAPI(self._api_client) self._shares = service.sharing.SharesAPI(self._api_client) @@ -305,6 +310,7 @@ def __init__( self._workspace = WorkspaceExt(self._api_client) self._workspace_bindings = service.catalog.WorkspaceBindingsAPI(self._api_client) self._workspace_conf = service.settings.WorkspaceConfAPI(self._api_client) + self._forecasting = service.ml.ForecastingAPI(self._api_client) @property def config(self) -> client.Config: @@ -808,6 +814,11 @@ def workspace_conf(self) -> service.settings.WorkspaceConfAPI: """This API allows updating known workspace settings for advanced users.""" return self._workspace_conf + @property + def forecasting(self) -> service.ml.ForecastingAPI: + """The Forecasting API allows you to create and get serverless forecasting experiments.""" + return self._forecasting + def get_workspace_id(self) -> int: """Get the workspace ID of the workspace that this client is connected to.""" response = self._api_client.do("GET", "/api/2.0/preview/scim/v2/Me", response_headers=["X-Databricks-Org-Id"]) diff --git a/databricks/sdk/config.py b/databricks/sdk/config.py index 591aafc44..2a05cf6ba 100644 --- a/databricks/sdk/config.py +++ b/databricks/sdk/config.py @@ -95,6 +95,10 @@ class Config: max_connections_per_pool: int = ConfigAttribute() databricks_environment: Optional[DatabricksEnvironment] = None + enable_experimental_async_token_refresh: bool = ConfigAttribute( + env="DATABRICKS_ENABLE_EXPERIMENTAL_ASYNC_TOKEN_REFRESH" + ) + enable_experimental_files_api_client: bool = ConfigAttribute(env="DATABRICKS_ENABLE_EXPERIMENTAL_FILES_API_CLIENT") files_api_client_download_max_total_recovers = None files_api_client_download_max_total_recovers_without_progressing = 1 diff --git a/databricks/sdk/credentials_provider.py b/databricks/sdk/credentials_provider.py index 86acac86c..eac7c9697 100644 --- a/databricks/sdk/credentials_provider.py +++ b/databricks/sdk/credentials_provider.py @@ -191,6 +191,7 @@ def oauth_service_principal(cfg: "Config") -> Optional[CredentialsProvider]: token_url=oidc.token_endpoint, scopes=["all-apis"], use_header=True, + disable_async=not cfg.enable_experimental_async_token_refresh, ) def inner() -> Dict[str, str]: @@ -290,6 +291,7 @@ def token_source_for(resource: str) -> TokenSource: token_url=f"{aad_endpoint}{cfg.azure_tenant_id}/oauth2/token", endpoint_params={"resource": resource}, use_params=True, + disable_async=not cfg.enable_experimental_async_token_refresh, ) _ensure_host_present(cfg, token_source_for) @@ -355,6 +357,7 @@ def github_oidc_azure(cfg: "Config") -> Optional[CredentialsProvider]: token_url=f"{aad_endpoint}{cfg.azure_tenant_id}/oauth2/token", endpoint_params=params, use_params=True, + disable_async=not cfg.enable_experimental_async_token_refresh, ) def refreshed_headers() -> Dict[str, str]: @@ -458,8 +461,9 @@ def __init__( token_type_field: str, access_token_field: str, expiry_field: str, + disable_async: bool = True, ): - super().__init__() + super().__init__(disable_async=disable_async) self._cmd = cmd self._token_type_field = token_type_field self._access_token_field = access_token_field @@ -690,6 +694,7 @@ def __init__(self, cfg: "Config"): token_type_field="token_type", access_token_field="access_token", expiry_field="expiry", + disable_async=not cfg.enable_experimental_async_token_refresh, ) @staticmethod diff --git a/databricks/sdk/data_plane.py b/databricks/sdk/data_plane.py index 3c059ecf2..aa772edcc 100644 --- a/databricks/sdk/data_plane.py +++ b/databricks/sdk/data_plane.py @@ -2,7 +2,7 @@ import threading from dataclasses import dataclass -from typing import Callable, List, Optional +from typing import Callable, Optional from urllib import parse from databricks.sdk import oauth @@ -88,61 +88,3 @@ class DataPlaneDetails: """URL used to query the endpoint through the DataPlane.""" token: Token """Token to query the DataPlane endpoint.""" - - -## Old implementation. #TODO: Remove after the new implementation is used - - -class DataPlaneService: - """Helper class to fetch and manage DataPlane details.""" - - from .service.serving import DataPlaneInfo - - def __init__(self): - self._data_plane_info = {} - self._tokens = {} - self._lock = threading.Lock() - - def get_data_plane_details( - self, - method: str, - params: List[str], - info_getter: Callable[[], DataPlaneInfo], - refresh: Callable[[str], Token], - ): - """Get and cache information required to query a Data Plane endpoint using the provided methods. - - Returns a cached DataPlaneDetails if the details have already been fetched previously and are still valid. - If not, it uses the provided functions to fetch the details. - - :param method: method name. Used to construct a unique key for the cache. - :param params: path params used in the "get" operation which uniquely determine the object. Used to construct a unique key for the cache. - :param info_getter: function which returns the DataPlaneInfo. It will only be called if the information is not already present in the cache. - :param refresh: function to refresh the token. It will only be called if the token is missing or expired. - """ - all_elements = params.copy() - all_elements.insert(0, method) - map_key = "/".join(all_elements) - info = self._data_plane_info.get(map_key) - if not info: - self._lock.acquire() - try: - info = self._data_plane_info.get(map_key) - if not info: - info = info_getter() - self._data_plane_info[map_key] = info - finally: - self._lock.release() - - token = self._tokens.get(map_key) - if not token or not token.valid: - self._lock.acquire() - token = self._tokens.get(map_key) - try: - if not token or not token.valid: - token = refresh(info.authorization_details) - self._tokens[map_key] = token - finally: - self._lock.release() - - return DataPlaneDetails(endpoint_url=info.endpoint_url, token=token) diff --git a/databricks/sdk/oauth.py b/databricks/sdk/oauth.py index d2df2f0f5..e099dbf07 100644 --- a/databricks/sdk/oauth.py +++ b/databricks/sdk/oauth.py @@ -426,12 +426,16 @@ def __init__( client_id: str, client_secret: str = None, redirect_url: str = None, + disable_async: bool = True, ): self._token_endpoint = token_endpoint self._client_id = client_id self._client_secret = client_secret self._redirect_url = redirect_url - super().__init__(token) + super().__init__( + token=token, + disable_async=disable_async, + ) def as_dict(self) -> dict: return {"token": self.token().as_dict()} @@ -625,7 +629,11 @@ def __init__( ): if not scopes: - scopes = ["all-apis"] + # all-apis ensures that the returned OAuth token can be used with all APIs, aside + # from direct-to-dataplane APIs. + # offline_access ensures that the response from the Authorization server includes + # a refresh token. + scopes = ["all-apis", "offline_access"] self.redirect_url = redirect_url self._client_id = client_id @@ -650,8 +658,6 @@ def noop_credentials(_: any): return lambda: {} config = Config(host=host, credentials_strategy=noop_credentials) - if not scopes: - scopes = ["all-apis"] oidc = config.oidc_endpoints if not oidc: raise ValueError(f"{host} does not support OAuth") @@ -708,9 +714,10 @@ class ClientCredentials(Refreshable): scopes: List[str] = None use_params: bool = False use_header: bool = False + disable_async: bool = True def __post_init__(self): - super().__init__() + super().__init__(disable_async=self.disable_async) def refresh(self) -> Token: params = {"grant_type": "client_credentials"} diff --git a/databricks/sdk/service/catalog.py b/databricks/sdk/service/catalog.py index a790b1b6e..99bae81dd 100755 --- a/databricks/sdk/service/catalog.py +++ b/databricks/sdk/service/catalog.py @@ -9471,6 +9471,8 @@ def from_dict(cls, d: Dict[str, Any]) -> UpdateWorkspaceBindingsParameters: @dataclass class ValidateCredentialRequest: + """Next ID: 17""" + aws_iam_role: Optional[AwsIamRole] = None """The AWS IAM role configuration""" diff --git a/databricks/sdk/service/compute.py b/databricks/sdk/service/compute.py index 1a46811bf..b4d0b3394 100755 --- a/databricks/sdk/service/compute.py +++ b/databricks/sdk/service/compute.py @@ -103,6 +103,8 @@ def from_dict(cls, d: Dict[str, Any]) -> AddResponse: @dataclass class Adlsgen2Info: + """A storage location in Adls Gen2""" + destination: str """abfss destination, e.g. `abfss://<container-name>@<storage-account-name>.dfs.core.windows.net/<directory-name>`.""" @@ -163,6 +165,8 @@ def from_dict(cls, d: Dict[str, Any]) -> AutoScale: @dataclass class AwsAttributes: + """Attributes set during cluster creation which are related to Amazon Web Services.""" + availability: Optional[AwsAvailability] = None """Availability type used for all subsequent nodes past the `first_on_demand` ones. @@ -216,9 +220,7 @@ class AwsAttributes: profile must have previously been added to the Databricks environment by an account administrator. - This feature may only be available to certain customer plans. - - If this field is ommitted, we will pull in the default from the conf if it exists.""" + This feature may only be available to certain customer plans.""" spot_bid_price_percent: Optional[int] = None """The bid price for AWS spot instances, as a percentage of the corresponding instance type's @@ -227,10 +229,7 @@ class AwsAttributes: instances. Similarly, if this field is set to 200, the bid price is twice the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100. When spot instances are requested for this cluster, only spot instances whose bid price percentage matches this field - will be considered. Note that, for safety, we enforce this field to be no more than 10000. - - The default value and documentation here should be kept consistent with - CommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent.""" + will be considered. Note that, for safety, we enforce this field to be no more than 10000.""" zone_id: Optional[str] = None """Identifier for the availability zone/datacenter in which the cluster resides. This string will @@ -239,8 +238,10 @@ class AwsAttributes: deployment resides in the "us-east-1" region. This is an optional field at cluster creation, and if not specified, a default zone will be used. If the zone specified is "auto", will try to place cluster in a zone with high availability, and will retry placement in a different AZ if - there is not enough capacity. The list of available zones as well as the default value can be - found by using the `List Zones` method.""" + there is not enough capacity. + + The list of available zones as well as the default value can be found by using the `List Zones` + method.""" def as_dict(self) -> dict: """Serializes the AwsAttributes into a dictionary suitable for use as a JSON request body.""" @@ -321,10 +322,11 @@ class AwsAvailability(Enum): @dataclass class AzureAttributes: + """Attributes set during cluster creation which are related to Microsoft Azure.""" + availability: Optional[AzureAvailability] = None """Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If - `first_on_demand` is zero (which only happens on pool clusters), this availability type will be - used for the entire cluster.""" + `first_on_demand` is zero, this availability type will be used for the entire cluster.""" first_on_demand: Optional[int] = None """The first `first_on_demand` nodes of the cluster will be placed on on-demand instances. This @@ -383,8 +385,7 @@ def from_dict(cls, d: Dict[str, Any]) -> AzureAttributes: class AzureAvailability(Enum): """Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If - `first_on_demand` is zero (which only happens on pool clusters), this availability type will be - used for the entire cluster.""" + `first_on_demand` is zero, this availability type will be used for the entire cluster.""" ON_DEMAND_AZURE = "ON_DEMAND_AZURE" SPOT_AZURE = "SPOT_AZURE" @@ -452,7 +453,6 @@ def from_dict(cls, d: Dict[str, Any]) -> CancelResponse: @dataclass class ChangeClusterOwner: cluster_id: str - """<needs content added>""" owner_username: str """New owner of the cluster_id after this RPC.""" @@ -559,6 +559,7 @@ def from_dict(cls, d: Dict[str, Any]) -> CloneCluster: @dataclass class CloudProviderNodeInfo: status: Optional[List[CloudProviderNodeStatus]] = None + """Status as reported by the cloud provider""" def as_dict(self) -> dict: """Serializes the CloudProviderNodeInfo into a dictionary suitable for use as a JSON request body.""" @@ -698,6 +699,9 @@ def from_dict(cls, d: Dict[str, Any]) -> ClusterAccessControlResponse: @dataclass class ClusterAttributes: + """Common set of attributes set during cluster creation. These attributes cannot be changed over + the lifetime of a cluster.""" + spark_version: str """The Spark version of the cluster, e.g. `3.3.x-scala2.11`. A list of available Spark versions can be retrieved by using the :method:clusters/sparkVersions API call.""" @@ -763,6 +767,7 @@ class ClusterAttributes: doesn’t have UC nor passthrough enabled.""" docker_image: Optional[DockerImage] = None + """Custom docker image BYOC""" driver_instance_pool_id: Optional[str] = None """The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster @@ -770,7 +775,11 @@ class ClusterAttributes: driver_node_type_id: Optional[str] = None """The node type of the Spark driver. Note that this field is optional; if unset, the driver node - type will be set as the same value as `node_type_id` defined above.""" + type will be set as the same value as `node_type_id` defined above. + + This field, along with node_type_id, should not be set if virtual_cluster_size is set. If both + driver_node_type_id, node_type_id, and virtual_cluster_size are specified, driver_node_type_id + and node_type_id take precedence.""" enable_elastic_disk: Optional[bool] = None """Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk @@ -864,6 +873,7 @@ class ClusterAttributes: `use_ml_runtime`, and whether `node_type_id` is gpu node or not.""" workload_type: Optional[WorkloadType] = None + """Cluster Attributes showing for clusters workload types.""" def as_dict(self) -> dict: """Serializes the ClusterAttributes into a dictionary suitable for use as a JSON request body.""" @@ -1064,6 +1074,8 @@ def from_dict(cls, d: Dict[str, Any]) -> ClusterCompliance: @dataclass class ClusterDetails: + """Describes all of the metadata about a single Spark cluster in Databricks.""" + autoscale: Optional[AutoScale] = None """Parameters needed in order to automatically scale clusters up and down based on load. Note: autoscaling works best with DB runtime versions 3.0 or later.""" @@ -1110,7 +1122,7 @@ class ClusterDetails: cluster_source: Optional[ClusterSource] = None """Determines whether the cluster was created by a user through the UI, created by the Databricks - Jobs Scheduler, or through an API request. This is the same as cluster_creator, but read only.""" + Jobs Scheduler, or through an API request.""" creator_user_name: Optional[str] = None """Creator user name. The field won't be included in the response if the user has already been @@ -1165,6 +1177,7 @@ class ClusterDetails: - Name: <Databricks internal use>""" docker_image: Optional[DockerImage] = None + """Custom docker image BYOC""" driver: Optional[SparkNode] = None """Node on which the Spark driver resides. The driver node contains the Spark master and the @@ -1176,7 +1189,11 @@ class ClusterDetails: driver_node_type_id: Optional[str] = None """The node type of the Spark driver. Note that this field is optional; if unset, the driver node - type will be set as the same value as `node_type_id` defined above.""" + type will be set as the same value as `node_type_id` defined above. + + This field, along with node_type_id, should not be set if virtual_cluster_size is set. If both + driver_node_type_id, node_type_id, and virtual_cluster_size are specified, driver_node_type_id + and node_type_id take precedence.""" enable_elastic_disk: Optional[bool] = None """Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk @@ -1291,9 +1308,8 @@ class ClusterDetails: be retrieved by using the :method:clusters/sparkVersions API call.""" spec: Optional[ClusterSpec] = None - """`spec` contains a snapshot of the field values that were used to create or edit this cluster. - The contents of `spec` can be used in the body of a create cluster request. This field might not - be populated for older clusters. Note: not included in the response of the ListClusters API.""" + """The spec contains a snapshot of the latest user specified settings that were used to create/edit + the cluster. Note: not included in the response of the ListClusters API.""" ssh_public_keys: Optional[List[str]] = None """SSH public key contents that will be added to each Spark node in this cluster. The corresponding @@ -1325,6 +1341,7 @@ class ClusterDetails: `use_ml_runtime`, and whether `node_type_id` is gpu node or not.""" workload_type: Optional[WorkloadType] = None + """Cluster Attributes showing for clusters workload types.""" def as_dict(self) -> dict: """Serializes the ClusterDetails into a dictionary suitable for use as a JSON request body.""" @@ -1586,13 +1603,10 @@ def from_dict(cls, d: Dict[str, Any]) -> ClusterDetails: @dataclass class ClusterEvent: cluster_id: str - """<needs content added>""" data_plane_event_details: Optional[DataPlaneEventDetails] = None - """<needs content added>""" details: Optional[EventDetails] = None - """<needs content added>""" timestamp: Optional[int] = None """The timestamp when the event occurred, stored as the number of milliseconds since the Unix @@ -1679,6 +1693,8 @@ def from_dict(cls, d: Dict[str, Any]) -> ClusterLibraryStatuses: @dataclass class ClusterLogConf: + """Cluster log delivery config""" + dbfs: Optional[DbfsStorageInfo] = None """destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }`""" @@ -1690,7 +1706,7 @@ class ClusterLogConf: write data to the s3 destination.""" volumes: Optional[VolumesStorageInfo] = None - """destination needs to be provided. e.g. `{ "volumes" : { "destination" : + """destination needs to be provided, e.g. `{ "volumes": { "destination": "/Volumes/catalog/schema/volume/cluster_log" } }`""" def as_dict(self) -> dict: @@ -2250,6 +2266,9 @@ class ClusterSource(Enum): @dataclass class ClusterSpec: + """Contains a snapshot of the latest user specified settings that were used to create/edit the + cluster.""" + apply_policy_default_values: Optional[bool] = None """When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied.""" @@ -2319,6 +2338,7 @@ class ClusterSpec: doesn’t have UC nor passthrough enabled.""" docker_image: Optional[DockerImage] = None + """Custom docker image BYOC""" driver_instance_pool_id: Optional[str] = None """The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster @@ -2326,7 +2346,11 @@ class ClusterSpec: driver_node_type_id: Optional[str] = None """The node type of the Spark driver. Note that this field is optional; if unset, the driver node - type will be set as the same value as `node_type_id` defined above.""" + type will be set as the same value as `node_type_id` defined above. + + This field, along with node_type_id, should not be set if virtual_cluster_size is set. If both + driver_node_type_id, node_type_id, and virtual_cluster_size are specified, driver_node_type_id + and node_type_id take precedence.""" enable_elastic_disk: Optional[bool] = None """Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk @@ -2434,6 +2458,7 @@ class ClusterSpec: `use_ml_runtime`, and whether `node_type_id` is gpu node or not.""" workload_type: Optional[WorkloadType] = None + """Cluster Attributes showing for clusters workload types.""" def as_dict(self) -> dict: """Serializes the ClusterSpec into a dictionary suitable for use as a JSON request body.""" @@ -2816,6 +2841,7 @@ class CreateCluster: doesn’t have UC nor passthrough enabled.""" docker_image: Optional[DockerImage] = None + """Custom docker image BYOC""" driver_instance_pool_id: Optional[str] = None """The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster @@ -2823,7 +2849,11 @@ class CreateCluster: driver_node_type_id: Optional[str] = None """The node type of the Spark driver. Note that this field is optional; if unset, the driver node - type will be set as the same value as `node_type_id` defined above.""" + type will be set as the same value as `node_type_id` defined above. + + This field, along with node_type_id, should not be set if virtual_cluster_size is set. If both + driver_node_type_id, node_type_id, and virtual_cluster_size are specified, driver_node_type_id + and node_type_id take precedence.""" enable_elastic_disk: Optional[bool] = None """Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk @@ -2927,6 +2957,7 @@ class CreateCluster: `use_ml_runtime`, and whether `node_type_id` is gpu node or not.""" workload_type: Optional[WorkloadType] = None + """Cluster Attributes showing for clusters workload types.""" def as_dict(self) -> dict: """Serializes the CreateCluster into a dictionary suitable for use as a JSON request body.""" @@ -3531,16 +3562,12 @@ def from_dict(cls, d: Dict[str, Any]) -> CustomPolicyTag: @dataclass class DataPlaneEventDetails: event_type: Optional[DataPlaneEventDetailsEventType] = None - """<needs content added>""" executor_failures: Optional[int] = None - """<needs content added>""" host_id: Optional[str] = None - """<needs content added>""" timestamp: Optional[int] = None - """<needs content added>""" def as_dict(self) -> dict: """Serializes the DataPlaneEventDetails into a dictionary suitable for use as a JSON request body.""" @@ -3580,7 +3607,6 @@ def from_dict(cls, d: Dict[str, Any]) -> DataPlaneEventDetails: class DataPlaneEventDetailsEventType(Enum): - """<needs content added>""" NODE_BLACKLISTED = "NODE_BLACKLISTED" NODE_EXCLUDED_DECOMMISSIONED = "NODE_EXCLUDED_DECOMMISSIONED" @@ -3626,6 +3652,8 @@ class DataSecurityMode(Enum): @dataclass class DbfsStorageInfo: + """A storage location in DBFS""" + destination: str """dbfs destination, e.g. `dbfs:/my/path`""" @@ -3846,6 +3874,10 @@ def from_dict(cls, d: Dict[str, Any]) -> DestroyResponse: @dataclass class DiskSpec: + """Describes the disks that are launched for each instance in the spark cluster. For example, if + the cluster has 3 instances, each instance is configured to launch 2 disks, 100 GiB each, then + Databricks will launch a total of 6 disks, 100 GiB each, for this cluster.""" + disk_count: Optional[int] = None """The number of disks launched for each instance: - This feature is only enabled for supported node types. - Users can choose up to the limit of the disks supported by the node type. - For @@ -3920,9 +3952,15 @@ def from_dict(cls, d: Dict[str, Any]) -> DiskSpec: @dataclass class DiskType: + """Describes the disk type.""" + azure_disk_volume_type: Optional[DiskTypeAzureDiskVolumeType] = None + """All Azure Disk types that Databricks supports. See + https://docs.microsoft.com/en-us/azure/storage/storage-about-disks-and-vhds-linux#types-of-disks""" ebs_volume_type: Optional[DiskTypeEbsVolumeType] = None + """All EBS volume types that Databricks supports. See https://aws.amazon.com/ebs/details/ for + details.""" def as_dict(self) -> dict: """Serializes the DiskType into a dictionary suitable for use as a JSON request body.""" @@ -3952,12 +3990,16 @@ def from_dict(cls, d: Dict[str, Any]) -> DiskType: class DiskTypeAzureDiskVolumeType(Enum): + """All Azure Disk types that Databricks supports. See + https://docs.microsoft.com/en-us/azure/storage/storage-about-disks-and-vhds-linux#types-of-disks""" PREMIUM_LRS = "PREMIUM_LRS" STANDARD_LRS = "STANDARD_LRS" class DiskTypeEbsVolumeType(Enum): + """All EBS volume types that Databricks supports. See https://aws.amazon.com/ebs/details/ for + details.""" GENERAL_PURPOSE_SSD = "GENERAL_PURPOSE_SSD" THROUGHPUT_OPTIMIZED_HDD = "THROUGHPUT_OPTIMIZED_HDD" @@ -3998,6 +4040,7 @@ def from_dict(cls, d: Dict[str, Any]) -> DockerBasicAuth: @dataclass class DockerImage: basic_auth: Optional[DockerBasicAuth] = None + """Basic auth with username and password""" url: Optional[str] = None """URL of the docker image.""" @@ -4027,7 +4070,8 @@ def from_dict(cls, d: Dict[str, Any]) -> DockerImage: class EbsVolumeType(Enum): - """The type of EBS volumes that will be launched with this cluster.""" + """All EBS volume types that Databricks supports. See https://aws.amazon.com/ebs/details/ for + details.""" GENERAL_PURPOSE_SSD = "GENERAL_PURPOSE_SSD" THROUGHPUT_OPTIMIZED_HDD = "THROUGHPUT_OPTIMIZED_HDD" @@ -4111,6 +4155,7 @@ class EditCluster: doesn’t have UC nor passthrough enabled.""" docker_image: Optional[DockerImage] = None + """Custom docker image BYOC""" driver_instance_pool_id: Optional[str] = None """The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster @@ -4118,7 +4163,11 @@ class EditCluster: driver_node_type_id: Optional[str] = None """The node type of the Spark driver. Note that this field is optional; if unset, the driver node - type will be set as the same value as `node_type_id` defined above.""" + type will be set as the same value as `node_type_id` defined above. + + This field, along with node_type_id, should not be set if virtual_cluster_size is set. If both + driver_node_type_id, node_type_id, and virtual_cluster_size are specified, driver_node_type_id + and node_type_id take precedence.""" enable_elastic_disk: Optional[bool] = None """Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk @@ -4222,6 +4271,7 @@ class EditCluster: `use_ml_runtime`, and whether `node_type_id` is gpu node or not.""" workload_type: Optional[WorkloadType] = None + """Cluster Attributes showing for clusters workload types.""" def as_dict(self) -> dict: """Serializes the EditCluster into a dictionary suitable for use as a JSON request body.""" @@ -4781,7 +4831,6 @@ class EventDetails: """The current number of nodes in the cluster.""" did_not_expand_reason: Optional[str] = None - """<needs content added>""" disk_size: Optional[int] = None """Current disk size in bytes""" @@ -4793,7 +4842,6 @@ class EventDetails: """Whether or not a blocklisted node should be terminated. For ClusterEventType NODE_BLACKLISTED.""" free_space: Optional[int] = None - """<needs content added>""" init_scripts: Optional[InitScriptEventDetails] = None """List of global and cluster init scripts associated with this cluster event.""" @@ -4988,12 +5036,14 @@ class EventType(Enum): @dataclass class GcpAttributes: + """Attributes set during cluster creation which are related to GCP.""" + availability: Optional[GcpAvailability] = None - """This field determines whether the instance pool will contain preemptible VMs, on-demand VMs, or - preemptible VMs with a fallback to on-demand VMs if the former is unavailable.""" + """This field determines whether the spark executors will be scheduled to run on preemptible VMs, + on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable.""" boot_disk_size: Optional[int] = None - """boot disk size in GB""" + """Boot disk size in GB""" google_service_account: Optional[str] = None """If provided, the cluster will impersonate the google service account when accessing gcloud @@ -5010,12 +5060,12 @@ class GcpAttributes: use_preemptible_executors: Optional[bool] = None """This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default). Note: Soon - to be deprecated, use the availability field instead.""" + to be deprecated, use the 'availability' field instead.""" zone_id: Optional[str] = None """Identifier for the availability zone in which the cluster resides. This can be one of the following: - "HA" => High availability, spread nodes across availability zones for a Databricks - deployment region [default] - "AUTO" => Databricks picks an availability zone to schedule the + deployment region [default]. - "AUTO" => Databricks picks an availability zone to schedule the cluster on. - A GCP availability zone => Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones.""" @@ -5077,6 +5127,8 @@ class GcpAvailability(Enum): @dataclass class GcsStorageInfo: + """A storage location in Google Cloud Platform's GCS""" + destination: str """GCS destination/URI, e.g. `gs://my-bucket/some-prefix`""" @@ -5264,7 +5316,6 @@ def from_dict(cls, d: Dict[str, Any]) -> GetEvents: class GetEventsOrder(Enum): - """The order to list events in; either "ASC" or "DESC". Defaults to "DESC".""" ASC = "ASC" DESC = "DESC" @@ -5273,7 +5324,6 @@ class GetEventsOrder(Enum): @dataclass class GetEventsResponse: events: Optional[List[ClusterEvent]] = None - """<content needs to be added>""" next_page: Optional[GetEvents] = None """The parameters required to retrieve the next page of events. Omitted if there are no more events @@ -5334,7 +5384,7 @@ class GetInstancePool: - Currently, Databricks allows at most 45 custom tags""" default_tags: Optional[Dict[str, str]] = None - """Tags that are added by Databricks regardless of any `custom_tags`, including: + """Tags that are added by Databricks regardless of any ``custom_tags``, including: - Vendor: Databricks @@ -5861,13 +5911,17 @@ def from_dict(cls, d: Dict[str, Any]) -> GlobalInitScriptUpdateRequest: @dataclass class InitScriptEventDetails: cluster: Optional[List[InitScriptInfoAndExecutionDetails]] = None - """The cluster scoped init scripts associated with this cluster event""" + """The cluster scoped init scripts associated with this cluster event.""" global_: Optional[List[InitScriptInfoAndExecutionDetails]] = None - """The global init scripts associated with this cluster event""" + """The global init scripts associated with this cluster event.""" reported_for_node: Optional[str] = None - """The private ip address of the node where the init scripts were run.""" + """The private ip of the node we are reporting init script execution details for (we will select + the execution details from only one node rather than reporting the execution details from every + node to keep these event details small) + + This should only be defined for the INIT_SCRIPTS_FINISHED event""" def as_dict(self) -> dict: """Serializes the InitScriptEventDetails into a dictionary suitable for use as a JSON request body.""" @@ -5901,54 +5955,12 @@ def from_dict(cls, d: Dict[str, Any]) -> InitScriptEventDetails: ) -@dataclass -class InitScriptExecutionDetails: - error_message: Optional[str] = None - """Addition details regarding errors.""" - - execution_duration_seconds: Optional[int] = None - """The duration of the script execution in seconds.""" - - status: Optional[InitScriptExecutionDetailsStatus] = None - """The current status of the script""" - - def as_dict(self) -> dict: - """Serializes the InitScriptExecutionDetails into a dictionary suitable for use as a JSON request body.""" - body = {} - if self.error_message is not None: - body["error_message"] = self.error_message - if self.execution_duration_seconds is not None: - body["execution_duration_seconds"] = self.execution_duration_seconds - if self.status is not None: - body["status"] = self.status.value - return body - - def as_shallow_dict(self) -> dict: - """Serializes the InitScriptExecutionDetails into a shallow dictionary of its immediate attributes.""" - body = {} - if self.error_message is not None: - body["error_message"] = self.error_message - if self.execution_duration_seconds is not None: - body["execution_duration_seconds"] = self.execution_duration_seconds - if self.status is not None: - body["status"] = self.status - return body - - @classmethod - def from_dict(cls, d: Dict[str, Any]) -> InitScriptExecutionDetails: - """Deserializes the InitScriptExecutionDetails from a dictionary.""" - return cls( - error_message=d.get("error_message", None), - execution_duration_seconds=d.get("execution_duration_seconds", None), - status=_enum(d, "status", InitScriptExecutionDetailsStatus), - ) - - -class InitScriptExecutionDetailsStatus(Enum): - """The current status of the script""" +class InitScriptExecutionDetailsInitScriptExecutionStatus(Enum): + """Result of attempted script execution""" FAILED_EXECUTION = "FAILED_EXECUTION" FAILED_FETCH = "FAILED_FETCH" + FUSE_MOUNT_FAILED = "FUSE_MOUNT_FAILED" NOT_EXECUTED = "NOT_EXECUTED" SKIPPED = "SKIPPED" SUCCEEDED = "SUCCEEDED" @@ -5957,34 +5969,35 @@ class InitScriptExecutionDetailsStatus(Enum): @dataclass class InitScriptInfo: + """Config for an individual init script Next ID: 11""" + abfss: Optional[Adlsgen2Info] = None - """destination needs to be provided. e.g. `{ "abfss" : { "destination" : - "abfss://<container-name>@<storage-account-name>.dfs.core.windows.net/<directory-name>" } }""" + """destination needs to be provided, e.g. + `abfss://<container-name>@<storage-account-name>.dfs.core.windows.net/<directory-name>`""" dbfs: Optional[DbfsStorageInfo] = None - """destination needs to be provided. e.g. `{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } + """destination needs to be provided. e.g. `{ "dbfs": { "destination" : "dbfs:/home/cluster_log" } }`""" file: Optional[LocalFileInfo] = None - """destination needs to be provided. e.g. `{ "file" : { "destination" : "file:/my/local/file.sh" } - }`""" + """destination needs to be provided, e.g. `{ "file": { "destination": "file:/my/local/file.sh" } }`""" gcs: Optional[GcsStorageInfo] = None - """destination needs to be provided. e.g. `{ "gcs": { "destination": "gs://my-bucket/file.sh" } }`""" + """destination needs to be provided, e.g. `{ "gcs": { "destination": "gs://my-bucket/file.sh" } }`""" s3: Optional[S3StorageInfo] = None - """destination and either the region or endpoint need to be provided. e.g. `{ "s3": { "destination" - : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }` Cluster iam role is used to - access s3, please make sure the cluster iam role in `instance_profile_arn` has permission to - write data to the s3 destination.""" + """destination and either the region or endpoint need to be provided. e.g. `{ \"s3\": { + \"destination\": \"s3://cluster_log_bucket/prefix\", \"region\": \"us-west-2\" } }` Cluster iam + role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has + permission to write data to the s3 destination.""" volumes: Optional[VolumesStorageInfo] = None - """destination needs to be provided. e.g. `{ "volumes" : { "destination" : "/Volumes/my-init.sh" } - }`""" + """destination needs to be provided. e.g. `{ \"volumes\" : { \"destination\" : + \"/Volumes/my-init.sh\" } }`""" workspace: Optional[WorkspaceStorageInfo] = None - """destination needs to be provided. e.g. `{ "workspace" : { "destination" : - "/Users/user1@databricks.com/my-init.sh" } }`""" + """destination needs to be provided, e.g. `{ "workspace": { "destination": + "/cluster-init-scripts/setup-datadog.sh" } }`""" def as_dict(self) -> dict: """Serializes the InitScriptInfo into a dictionary suitable for use as a JSON request body.""" @@ -6040,36 +6053,109 @@ def from_dict(cls, d: Dict[str, Any]) -> InitScriptInfo: @dataclass class InitScriptInfoAndExecutionDetails: - execution_details: Optional[InitScriptExecutionDetails] = None - """Details about the script""" + abfss: Optional[Adlsgen2Info] = None + """destination needs to be provided, e.g. + `abfss://<container-name>@<storage-account-name>.dfs.core.windows.net/<directory-name>`""" + + dbfs: Optional[DbfsStorageInfo] = None + """destination needs to be provided. e.g. `{ "dbfs": { "destination" : "dbfs:/home/cluster_log" } + }`""" + + error_message: Optional[str] = None + """Additional details regarding errors (such as a file not found message if the status is + FAILED_FETCH). This field should only be used to provide *additional* information to the status + field, not duplicate it.""" - script: Optional[InitScriptInfo] = None - """The script""" + execution_duration_seconds: Optional[int] = None + """The number duration of the script execution in seconds""" + + file: Optional[LocalFileInfo] = None + """destination needs to be provided, e.g. `{ "file": { "destination": "file:/my/local/file.sh" } }`""" + + gcs: Optional[GcsStorageInfo] = None + """destination needs to be provided, e.g. `{ "gcs": { "destination": "gs://my-bucket/file.sh" } }`""" + + s3: Optional[S3StorageInfo] = None + """destination and either the region or endpoint need to be provided. e.g. `{ \"s3\": { + \"destination\": \"s3://cluster_log_bucket/prefix\", \"region\": \"us-west-2\" } }` Cluster iam + role is used to access s3, please make sure the cluster iam role in `instance_profile_arn` has + permission to write data to the s3 destination.""" + + status: Optional[InitScriptExecutionDetailsInitScriptExecutionStatus] = None + """The current status of the script""" + + volumes: Optional[VolumesStorageInfo] = None + """destination needs to be provided. e.g. `{ \"volumes\" : { \"destination\" : + \"/Volumes/my-init.sh\" } }`""" + + workspace: Optional[WorkspaceStorageInfo] = None + """destination needs to be provided, e.g. `{ "workspace": { "destination": + "/cluster-init-scripts/setup-datadog.sh" } }`""" def as_dict(self) -> dict: """Serializes the InitScriptInfoAndExecutionDetails into a dictionary suitable for use as a JSON request body.""" body = {} - if self.execution_details: - body["execution_details"] = self.execution_details.as_dict() - if self.script: - body["script"] = self.script.as_dict() + if self.abfss: + body["abfss"] = self.abfss.as_dict() + if self.dbfs: + body["dbfs"] = self.dbfs.as_dict() + if self.error_message is not None: + body["error_message"] = self.error_message + if self.execution_duration_seconds is not None: + body["execution_duration_seconds"] = self.execution_duration_seconds + if self.file: + body["file"] = self.file.as_dict() + if self.gcs: + body["gcs"] = self.gcs.as_dict() + if self.s3: + body["s3"] = self.s3.as_dict() + if self.status is not None: + body["status"] = self.status.value + if self.volumes: + body["volumes"] = self.volumes.as_dict() + if self.workspace: + body["workspace"] = self.workspace.as_dict() return body def as_shallow_dict(self) -> dict: """Serializes the InitScriptInfoAndExecutionDetails into a shallow dictionary of its immediate attributes.""" body = {} - if self.execution_details: - body["execution_details"] = self.execution_details - if self.script: - body["script"] = self.script + if self.abfss: + body["abfss"] = self.abfss + if self.dbfs: + body["dbfs"] = self.dbfs + if self.error_message is not None: + body["error_message"] = self.error_message + if self.execution_duration_seconds is not None: + body["execution_duration_seconds"] = self.execution_duration_seconds + if self.file: + body["file"] = self.file + if self.gcs: + body["gcs"] = self.gcs + if self.s3: + body["s3"] = self.s3 + if self.status is not None: + body["status"] = self.status + if self.volumes: + body["volumes"] = self.volumes + if self.workspace: + body["workspace"] = self.workspace return body @classmethod def from_dict(cls, d: Dict[str, Any]) -> InitScriptInfoAndExecutionDetails: """Deserializes the InitScriptInfoAndExecutionDetails from a dictionary.""" return cls( - execution_details=_from_dict(d, "execution_details", InitScriptExecutionDetails), - script=_from_dict(d, "script", InitScriptInfo), + abfss=_from_dict(d, "abfss", Adlsgen2Info), + dbfs=_from_dict(d, "dbfs", DbfsStorageInfo), + error_message=d.get("error_message", None), + execution_duration_seconds=d.get("execution_duration_seconds", None), + file=_from_dict(d, "file", LocalFileInfo), + gcs=_from_dict(d, "gcs", GcsStorageInfo), + s3=_from_dict(d, "s3", S3StorageInfo), + status=_enum(d, "status", InitScriptExecutionDetailsInitScriptExecutionStatus), + volumes=_from_dict(d, "volumes", VolumesStorageInfo), + workspace=_from_dict(d, "workspace", WorkspaceStorageInfo), ) @@ -6250,7 +6336,7 @@ class InstancePoolAndStats: - Currently, Databricks allows at most 45 custom tags""" default_tags: Optional[Dict[str, str]] = None - """Tags that are added by Databricks regardless of any `custom_tags`, including: + """Tags that are added by Databricks regardless of any ``custom_tags``, including: - Vendor: Databricks @@ -6427,10 +6513,10 @@ def from_dict(cls, d: Dict[str, Any]) -> InstancePoolAndStats: @dataclass class InstancePoolAwsAttributes: + """Attributes set during instance pool creation which are related to Amazon Web Services.""" + availability: Optional[InstancePoolAwsAttributesAvailability] = None - """Availability type used for the spot nodes. - - The default value is defined by InstancePoolConf.instancePoolDefaultAwsAvailability""" + """Availability type used for the spot nodes.""" spot_bid_price_percent: Optional[int] = None """Calculates the bid price for AWS spot instances, as a percentage of the corresponding instance @@ -6439,10 +6525,7 @@ class InstancePoolAwsAttributes: instances. Similarly, if this field is set to 200, the bid price is twice the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100. When spot instances are requested for this cluster, only spot instances whose bid price percentage matches this field - will be considered. Note that, for safety, we enforce this field to be no more than 10000. - - The default value and documentation here should be kept consistent with - CommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent.""" + will be considered. Note that, for safety, we enforce this field to be no more than 10000.""" zone_id: Optional[str] = None """Identifier for the availability zone/datacenter in which the cluster resides. This string will @@ -6485,9 +6568,7 @@ def from_dict(cls, d: Dict[str, Any]) -> InstancePoolAwsAttributes: class InstancePoolAwsAttributesAvailability(Enum): - """Availability type used for the spot nodes. - - The default value is defined by InstancePoolConf.instancePoolDefaultAwsAvailability""" + """The set of AWS availability types supported when setting up nodes for a cluster.""" ON_DEMAND = "ON_DEMAND" SPOT = "SPOT" @@ -6495,14 +6576,16 @@ class InstancePoolAwsAttributesAvailability(Enum): @dataclass class InstancePoolAzureAttributes: + """Attributes set during instance pool creation which are related to Azure.""" + availability: Optional[InstancePoolAzureAttributesAvailability] = None - """Shows the Availability type used for the spot nodes. - - The default value is defined by InstancePoolConf.instancePoolDefaultAzureAvailability""" + """Availability type used for the spot nodes.""" spot_bid_max_price: Optional[float] = None - """The default value and documentation here should be kept consistent with - CommonConf.defaultSpotBidMaxPrice.""" + """With variable pricing, you have option to set a max price, in US dollars (USD) For example, the + value 2 would be a max price of $2.00 USD per hour. If you set the max price to be -1, the VM + won't be evicted based on price. The price for the VM will be the current price for spot or the + price for a standard VM, which ever is less, as long as there is capacity and quota available.""" def as_dict(self) -> dict: """Serializes the InstancePoolAzureAttributes into a dictionary suitable for use as a JSON request body.""" @@ -6532,9 +6615,7 @@ def from_dict(cls, d: Dict[str, Any]) -> InstancePoolAzureAttributes: class InstancePoolAzureAttributesAvailability(Enum): - """Shows the Availability type used for the spot nodes. - - The default value is defined by InstancePoolConf.instancePoolDefaultAzureAvailability""" + """The set of Azure availability types supported when setting up nodes for a cluster.""" ON_DEMAND_AZURE = "ON_DEMAND_AZURE" SPOT_AZURE = "SPOT_AZURE" @@ -6542,6 +6623,8 @@ class InstancePoolAzureAttributesAvailability(Enum): @dataclass class InstancePoolGcpAttributes: + """Attributes set during instance pool creation which are related to GCP.""" + gcp_availability: Optional[GcpAvailability] = None """This field determines whether the instance pool will contain preemptible VMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable.""" @@ -6756,7 +6839,10 @@ def from_dict(cls, d: Dict[str, Any]) -> InstancePoolPermissionsRequest: class InstancePoolState(Enum): - """Current state of the instance pool.""" + """The state of a Cluster. The current allowable state transitions are as follows: + + - ``ACTIVE`` -> ``STOPPED`` - ``ACTIVE`` -> ``DELETED`` - ``STOPPED`` -> ``ACTIVE`` - + ``STOPPED`` -> ``DELETED``""" ACTIVE = "ACTIVE" DELETED = "DELETED" @@ -7099,7 +7185,7 @@ def from_dict(cls, d: Dict[str, Any]) -> ListAllClusterLibraryStatusesResponse: @dataclass class ListAvailableZonesResponse: default_zone: Optional[str] = None - """The availability zone if no `zone_id` is provided in the cluster creation request.""" + """The availability zone if no ``zone_id`` is provided in the cluster creation request.""" zones: Optional[List[str]] = None """The list of available zones (e.g., ['us-west-2c', 'us-east-2']).""" @@ -7227,7 +7313,6 @@ def from_dict(cls, d: Dict[str, Any]) -> ListClustersFilterBy: @dataclass class ListClustersResponse: clusters: Optional[List[ClusterDetails]] = None - """<needs content added>""" next_page_token: Optional[str] = None """This field represents the pagination token to retrieve the next page of results. If the value is @@ -7306,15 +7391,12 @@ def from_dict(cls, d: Dict[str, Any]) -> ListClustersSortBy: class ListClustersSortByDirection(Enum): - """The direction to sort by.""" ASC = "ASC" DESC = "DESC" class ListClustersSortByField(Enum): - """The sorting criteria. By default, clusters are sorted by 3 columns from highest to lowest - precedence: cluster state, pinned or unpinned, then cluster name.""" CLUSTER_NAME = "CLUSTER_NAME" DEFAULT = "DEFAULT" @@ -7486,7 +7568,6 @@ class ListSortColumn(Enum): class ListSortOrder(Enum): - """A generic ordering enum for list-based queries.""" ASC = "ASC" DESC = "DESC" @@ -7520,10 +7601,8 @@ def from_dict(cls, d: Dict[str, Any]) -> LocalFileInfo: @dataclass class LogAnalyticsInfo: log_analytics_primary_key: Optional[str] = None - """<needs content added>""" log_analytics_workspace_id: Optional[str] = None - """<needs content added>""" def as_dict(self) -> dict: """Serializes the LogAnalyticsInfo into a dictionary suitable for use as a JSON request body.""" @@ -7554,6 +7633,8 @@ def from_dict(cls, d: Dict[str, Any]) -> LogAnalyticsInfo: @dataclass class LogSyncStatus: + """The log delivery status""" + last_attempted: Optional[int] = None """The timestamp of last attempt. If the last attempt fails, `last_exception` will contain the exception in the last attempt.""" @@ -7633,15 +7714,24 @@ def from_dict(cls, d: Dict[str, Any]) -> MavenLibrary: @dataclass class NodeInstanceType: - instance_type_id: Optional[str] = None + """This structure embodies the machine type that hosts spark containers Note: this should be an + internal data structure for now It is defined in proto in case we want to send it over the wire + in the future (which is likely)""" + + instance_type_id: str + """Unique identifier across instance types""" local_disk_size_gb: Optional[int] = None + """Size of the individual local disks attached to this instance (i.e. per local disk).""" local_disks: Optional[int] = None + """Number of local disks that are present on this instance.""" local_nvme_disk_size_gb: Optional[int] = None + """Size of the individual local nvme disks attached to this instance (i.e. per local disk).""" local_nvme_disks: Optional[int] = None + """Number of local nvme disks that are present on this instance.""" def as_dict(self) -> dict: """Serializes the NodeInstanceType into a dictionary suitable for use as a JSON request body.""" @@ -7687,6 +7777,9 @@ def from_dict(cls, d: Dict[str, Any]) -> NodeInstanceType: @dataclass class NodeType: + """A description of a Spark node type including both the dimensions of the node and the instance + type on which it will be hosted.""" + node_type_id: str """Unique identifier for this node type.""" @@ -7704,9 +7797,13 @@ class NodeType: instance_type_id: str """An identifier for the type of hardware that this node runs on, e.g., "r3.2xlarge" in AWS.""" - category: Optional[str] = None + category: str + """A descriptive category for this node type. Examples include "Memory Optimized" and "Compute + Optimized".""" display_order: Optional[int] = None + """An optional hint at the display order of node types in the UI. Within a node type category, + lowest numbers come first.""" is_deprecated: Optional[bool] = None """Whether the node type is deprecated. Non-deprecated node types offer greater performance.""" @@ -7716,30 +7813,36 @@ class NodeType: workloads.""" is_graviton: Optional[bool] = None + """Whether this is an Arm-based instance.""" is_hidden: Optional[bool] = None + """Whether this node is hidden from presentation in the UI.""" is_io_cache_enabled: Optional[bool] = None + """Whether this node comes with IO cache enabled by default.""" node_info: Optional[CloudProviderNodeInfo] = None + """A collection of node type info reported by the cloud provider""" node_instance_type: Optional[NodeInstanceType] = None + """The NodeInstanceType object corresponding to instance_type_id""" num_gpus: Optional[int] = None + """Number of GPUs available for this node type.""" photon_driver_capable: Optional[bool] = None photon_worker_capable: Optional[bool] = None support_cluster_tags: Optional[bool] = None + """Whether this node type support cluster tags.""" support_ebs_volumes: Optional[bool] = None + """Whether this node type support EBS volumes. EBS volumes is disabled for node types that we could + place multiple corresponding containers on the same hosting instance.""" support_port_forwarding: Optional[bool] = None - - supports_elastic_disk: Optional[bool] = None - """Indicates if this node type can be used for an instance pool or cluster with elastic disk - enabled. This is true for most node types.""" + """Whether this node type supports port forwarding.""" def as_dict(self) -> dict: """Serializes the NodeType into a dictionary suitable for use as a JSON request body.""" @@ -7784,8 +7887,6 @@ def as_dict(self) -> dict: body["support_ebs_volumes"] = self.support_ebs_volumes if self.support_port_forwarding is not None: body["support_port_forwarding"] = self.support_port_forwarding - if self.supports_elastic_disk is not None: - body["supports_elastic_disk"] = self.supports_elastic_disk return body def as_shallow_dict(self) -> dict: @@ -7831,8 +7932,6 @@ def as_shallow_dict(self) -> dict: body["support_ebs_volumes"] = self.support_ebs_volumes if self.support_port_forwarding is not None: body["support_port_forwarding"] = self.support_port_forwarding - if self.supports_elastic_disk is not None: - body["supports_elastic_disk"] = self.supports_elastic_disk return body @classmethod @@ -7859,12 +7958,13 @@ def from_dict(cls, d: Dict[str, Any]) -> NodeType: support_cluster_tags=d.get("support_cluster_tags", None), support_ebs_volumes=d.get("support_ebs_volumes", None), support_port_forwarding=d.get("support_port_forwarding", None), - supports_elastic_disk=d.get("supports_elastic_disk", None), ) @dataclass class PendingInstanceError: + """Error message of a failed pending instances""" + instance_id: Optional[str] = None message: Optional[str] = None @@ -7939,7 +8039,6 @@ def from_dict(cls, d: Dict[str, Any]) -> PermanentDeleteClusterResponse: @dataclass class PinCluster: cluster_id: str - """<needs content added>""" def as_dict(self) -> dict: """Serializes the PinCluster into a dictionary suitable for use as a JSON request body.""" @@ -8341,7 +8440,6 @@ class RestartCluster: """The cluster to be started.""" restart_user: Optional[str] = None - """<needs content added>""" def as_dict(self) -> dict: """Serializes the RestartCluster into a dictionary suitable for use as a JSON request body.""" @@ -8491,13 +8589,6 @@ def from_dict(cls, d: Dict[str, Any]) -> Results: class RuntimeEngine(Enum): - """Determines the cluster's runtime engine, either standard or Photon. - - This field is not compatible with legacy `spark_version` values that contain `-photon-`. Remove - `-photon-` from the `spark_version` and set `runtime_engine` to `PHOTON`. - - If left unspecified, the runtime engine defaults to standard unless the spark_version contains - -photon-, in which case Photon will be used.""" NULL = "NULL" PHOTON = "PHOTON" @@ -8506,6 +8597,8 @@ class RuntimeEngine(Enum): @dataclass class S3StorageInfo: + """A storage location in Amazon S3""" + destination: str """S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using cluster iam role, please make sure you set cluster iam role and the role has write access to the @@ -8593,6 +8686,8 @@ def from_dict(cls, d: Dict[str, Any]) -> S3StorageInfo: @dataclass class SparkNode: + """Describes a specific Spark driver or executor.""" + host_private_ip: Optional[str] = None """The private IP address of the host instance.""" @@ -8612,16 +8707,10 @@ class SparkNode: public_dns: Optional[str] = None """Public DNS address of this node. This address can be used to access the Spark JDBC server on the driver node. To communicate with the JDBC server, traffic must be manually authorized by adding - security group rules to the "worker-unmanaged" security group via the AWS console. - - Actually it's the public DNS address of the host instance.""" + security group rules to the "worker-unmanaged" security group via the AWS console.""" start_timestamp: Optional[int] = None - """The timestamp (in millisecond) when the Spark node is launched. - - The start_timestamp is set right before the container is being launched. The timestamp when the - container is placed on the ResourceManager, before its launch and setup by the NodeDaemon. This - timestamp is the same as the creation timestamp in the database.""" + """The timestamp (in millisecond) when the Spark node is launched.""" def as_dict(self) -> dict: """Serializes the SparkNode into a dictionary suitable for use as a JSON request body.""" @@ -8677,6 +8766,8 @@ def from_dict(cls, d: Dict[str, Any]) -> SparkNode: @dataclass class SparkNodeAwsAttributes: + """Attributes specific to AWS for a Spark node.""" + is_spot: Optional[bool] = None """Whether this node is on an Amazon spot instance.""" @@ -8779,7 +8870,12 @@ def from_dict(cls, d: Dict[str, Any]) -> StartClusterResponse: class State(Enum): - """Current state of the cluster.""" + """The state of a Cluster. The current allowable state transitions are as follows: + + - `PENDING` -> `RUNNING` - `PENDING` -> `TERMINATING` - `RUNNING` -> `RESIZING` - `RUNNING` -> + `RESTARTING` - `RUNNING` -> `TERMINATING` - `RESTARTING` -> `RUNNING` - `RESTARTING` -> + `TERMINATING` - `RESIZING` -> `RUNNING` - `RESIZING` -> `TERMINATING` - `TERMINATING` -> + `TERMINATED`""" ERROR = "ERROR" PENDING = "PENDING" @@ -8835,20 +8931,34 @@ def from_dict(cls, d: Dict[str, Any]) -> TerminationReason: class TerminationReasonCode(Enum): - """status code indicating why the cluster was terminated""" + """The status code indicating why the cluster was terminated""" ABUSE_DETECTED = "ABUSE_DETECTED" + ACCESS_TOKEN_FAILURE = "ACCESS_TOKEN_FAILURE" + ALLOCATION_TIMEOUT = "ALLOCATION_TIMEOUT" + ALLOCATION_TIMEOUT_NODE_DAEMON_NOT_READY = "ALLOCATION_TIMEOUT_NODE_DAEMON_NOT_READY" + ALLOCATION_TIMEOUT_NO_HEALTHY_CLUSTERS = "ALLOCATION_TIMEOUT_NO_HEALTHY_CLUSTERS" + ALLOCATION_TIMEOUT_NO_MATCHED_CLUSTERS = "ALLOCATION_TIMEOUT_NO_MATCHED_CLUSTERS" + ALLOCATION_TIMEOUT_NO_READY_CLUSTERS = "ALLOCATION_TIMEOUT_NO_READY_CLUSTERS" + ALLOCATION_TIMEOUT_NO_UNALLOCATED_CLUSTERS = "ALLOCATION_TIMEOUT_NO_UNALLOCATED_CLUSTERS" + ALLOCATION_TIMEOUT_NO_WARMED_UP_CLUSTERS = "ALLOCATION_TIMEOUT_NO_WARMED_UP_CLUSTERS" ATTACH_PROJECT_FAILURE = "ATTACH_PROJECT_FAILURE" AWS_AUTHORIZATION_FAILURE = "AWS_AUTHORIZATION_FAILURE" + AWS_INACCESSIBLE_KMS_KEY_FAILURE = "AWS_INACCESSIBLE_KMS_KEY_FAILURE" + AWS_INSTANCE_PROFILE_UPDATE_FAILURE = "AWS_INSTANCE_PROFILE_UPDATE_FAILURE" AWS_INSUFFICIENT_FREE_ADDRESSES_IN_SUBNET_FAILURE = "AWS_INSUFFICIENT_FREE_ADDRESSES_IN_SUBNET_FAILURE" AWS_INSUFFICIENT_INSTANCE_CAPACITY_FAILURE = "AWS_INSUFFICIENT_INSTANCE_CAPACITY_FAILURE" + AWS_INVALID_KEY_PAIR = "AWS_INVALID_KEY_PAIR" + AWS_INVALID_KMS_KEY_STATE = "AWS_INVALID_KMS_KEY_STATE" AWS_MAX_SPOT_INSTANCE_COUNT_EXCEEDED_FAILURE = "AWS_MAX_SPOT_INSTANCE_COUNT_EXCEEDED_FAILURE" AWS_REQUEST_LIMIT_EXCEEDED = "AWS_REQUEST_LIMIT_EXCEEDED" + AWS_RESOURCE_QUOTA_EXCEEDED = "AWS_RESOURCE_QUOTA_EXCEEDED" AWS_UNSUPPORTED_FAILURE = "AWS_UNSUPPORTED_FAILURE" AZURE_BYOK_KEY_PERMISSION_FAILURE = "AZURE_BYOK_KEY_PERMISSION_FAILURE" AZURE_EPHEMERAL_DISK_FAILURE = "AZURE_EPHEMERAL_DISK_FAILURE" AZURE_INVALID_DEPLOYMENT_TEMPLATE = "AZURE_INVALID_DEPLOYMENT_TEMPLATE" AZURE_OPERATION_NOT_ALLOWED_EXCEPTION = "AZURE_OPERATION_NOT_ALLOWED_EXCEPTION" + AZURE_PACKED_DEPLOYMENT_PARTIAL_FAILURE = "AZURE_PACKED_DEPLOYMENT_PARTIAL_FAILURE" AZURE_QUOTA_EXCEEDED_EXCEPTION = "AZURE_QUOTA_EXCEEDED_EXCEPTION" AZURE_RESOURCE_MANAGER_THROTTLING = "AZURE_RESOURCE_MANAGER_THROTTLING" AZURE_RESOURCE_PROVIDER_THROTTLING = "AZURE_RESOURCE_PROVIDER_THROTTLING" @@ -8857,65 +8967,130 @@ class TerminationReasonCode(Enum): AZURE_VNET_CONFIGURATION_FAILURE = "AZURE_VNET_CONFIGURATION_FAILURE" BOOTSTRAP_TIMEOUT = "BOOTSTRAP_TIMEOUT" BOOTSTRAP_TIMEOUT_CLOUD_PROVIDER_EXCEPTION = "BOOTSTRAP_TIMEOUT_CLOUD_PROVIDER_EXCEPTION" + BOOTSTRAP_TIMEOUT_DUE_TO_MISCONFIG = "BOOTSTRAP_TIMEOUT_DUE_TO_MISCONFIG" + BUDGET_POLICY_LIMIT_ENFORCEMENT_ACTIVATED = "BUDGET_POLICY_LIMIT_ENFORCEMENT_ACTIVATED" + BUDGET_POLICY_RESOLUTION_FAILURE = "BUDGET_POLICY_RESOLUTION_FAILURE" + CLOUD_ACCOUNT_SETUP_FAILURE = "CLOUD_ACCOUNT_SETUP_FAILURE" + CLOUD_OPERATION_CANCELLED = "CLOUD_OPERATION_CANCELLED" CLOUD_PROVIDER_DISK_SETUP_FAILURE = "CLOUD_PROVIDER_DISK_SETUP_FAILURE" + CLOUD_PROVIDER_INSTANCE_NOT_LAUNCHED = "CLOUD_PROVIDER_INSTANCE_NOT_LAUNCHED" CLOUD_PROVIDER_LAUNCH_FAILURE = "CLOUD_PROVIDER_LAUNCH_FAILURE" + CLOUD_PROVIDER_LAUNCH_FAILURE_DUE_TO_MISCONFIG = "CLOUD_PROVIDER_LAUNCH_FAILURE_DUE_TO_MISCONFIG" CLOUD_PROVIDER_RESOURCE_STOCKOUT = "CLOUD_PROVIDER_RESOURCE_STOCKOUT" + CLOUD_PROVIDER_RESOURCE_STOCKOUT_DUE_TO_MISCONFIG = "CLOUD_PROVIDER_RESOURCE_STOCKOUT_DUE_TO_MISCONFIG" CLOUD_PROVIDER_SHUTDOWN = "CLOUD_PROVIDER_SHUTDOWN" + CLUSTER_OPERATION_THROTTLED = "CLUSTER_OPERATION_THROTTLED" + CLUSTER_OPERATION_TIMEOUT = "CLUSTER_OPERATION_TIMEOUT" COMMUNICATION_LOST = "COMMUNICATION_LOST" CONTAINER_LAUNCH_FAILURE = "CONTAINER_LAUNCH_FAILURE" CONTROL_PLANE_REQUEST_FAILURE = "CONTROL_PLANE_REQUEST_FAILURE" + CONTROL_PLANE_REQUEST_FAILURE_DUE_TO_MISCONFIG = "CONTROL_PLANE_REQUEST_FAILURE_DUE_TO_MISCONFIG" DATABASE_CONNECTION_FAILURE = "DATABASE_CONNECTION_FAILURE" + DATA_ACCESS_CONFIG_CHANGED = "DATA_ACCESS_CONFIG_CHANGED" DBFS_COMPONENT_UNHEALTHY = "DBFS_COMPONENT_UNHEALTHY" + DISASTER_RECOVERY_REPLICATION = "DISASTER_RECOVERY_REPLICATION" DOCKER_IMAGE_PULL_FAILURE = "DOCKER_IMAGE_PULL_FAILURE" + DRIVER_EVICTION = "DRIVER_EVICTION" + DRIVER_LAUNCH_TIMEOUT = "DRIVER_LAUNCH_TIMEOUT" + DRIVER_NODE_UNREACHABLE = "DRIVER_NODE_UNREACHABLE" + DRIVER_OUT_OF_DISK = "DRIVER_OUT_OF_DISK" + DRIVER_OUT_OF_MEMORY = "DRIVER_OUT_OF_MEMORY" + DRIVER_POD_CREATION_FAILURE = "DRIVER_POD_CREATION_FAILURE" + DRIVER_UNEXPECTED_FAILURE = "DRIVER_UNEXPECTED_FAILURE" DRIVER_UNREACHABLE = "DRIVER_UNREACHABLE" DRIVER_UNRESPONSIVE = "DRIVER_UNRESPONSIVE" + DYNAMIC_SPARK_CONF_SIZE_EXCEEDED = "DYNAMIC_SPARK_CONF_SIZE_EXCEEDED" + EOS_SPARK_IMAGE = "EOS_SPARK_IMAGE" EXECUTION_COMPONENT_UNHEALTHY = "EXECUTION_COMPONENT_UNHEALTHY" + EXECUTOR_POD_UNSCHEDULED = "EXECUTOR_POD_UNSCHEDULED" + GCP_API_RATE_QUOTA_EXCEEDED = "GCP_API_RATE_QUOTA_EXCEEDED" + GCP_FORBIDDEN = "GCP_FORBIDDEN" + GCP_IAM_TIMEOUT = "GCP_IAM_TIMEOUT" + GCP_INACCESSIBLE_KMS_KEY_FAILURE = "GCP_INACCESSIBLE_KMS_KEY_FAILURE" + GCP_INSUFFICIENT_CAPACITY = "GCP_INSUFFICIENT_CAPACITY" + GCP_IP_SPACE_EXHAUSTED = "GCP_IP_SPACE_EXHAUSTED" + GCP_KMS_KEY_PERMISSION_DENIED = "GCP_KMS_KEY_PERMISSION_DENIED" + GCP_NOT_FOUND = "GCP_NOT_FOUND" GCP_QUOTA_EXCEEDED = "GCP_QUOTA_EXCEEDED" + GCP_RESOURCE_QUOTA_EXCEEDED = "GCP_RESOURCE_QUOTA_EXCEEDED" + GCP_SERVICE_ACCOUNT_ACCESS_DENIED = "GCP_SERVICE_ACCOUNT_ACCESS_DENIED" GCP_SERVICE_ACCOUNT_DELETED = "GCP_SERVICE_ACCOUNT_DELETED" + GCP_SERVICE_ACCOUNT_NOT_FOUND = "GCP_SERVICE_ACCOUNT_NOT_FOUND" + GCP_SUBNET_NOT_READY = "GCP_SUBNET_NOT_READY" + GCP_TRUSTED_IMAGE_PROJECTS_VIOLATED = "GCP_TRUSTED_IMAGE_PROJECTS_VIOLATED" + GKE_BASED_CLUSTER_TERMINATION = "GKE_BASED_CLUSTER_TERMINATION" GLOBAL_INIT_SCRIPT_FAILURE = "GLOBAL_INIT_SCRIPT_FAILURE" HIVE_METASTORE_PROVISIONING_FAILURE = "HIVE_METASTORE_PROVISIONING_FAILURE" IMAGE_PULL_PERMISSION_DENIED = "IMAGE_PULL_PERMISSION_DENIED" INACTIVITY = "INACTIVITY" + INIT_CONTAINER_NOT_FINISHED = "INIT_CONTAINER_NOT_FINISHED" INIT_SCRIPT_FAILURE = "INIT_SCRIPT_FAILURE" INSTANCE_POOL_CLUSTER_FAILURE = "INSTANCE_POOL_CLUSTER_FAILURE" + INSTANCE_POOL_MAX_CAPACITY_REACHED = "INSTANCE_POOL_MAX_CAPACITY_REACHED" + INSTANCE_POOL_NOT_FOUND = "INSTANCE_POOL_NOT_FOUND" INSTANCE_UNREACHABLE = "INSTANCE_UNREACHABLE" + INSTANCE_UNREACHABLE_DUE_TO_MISCONFIG = "INSTANCE_UNREACHABLE_DUE_TO_MISCONFIG" + INTERNAL_CAPACITY_FAILURE = "INTERNAL_CAPACITY_FAILURE" INTERNAL_ERROR = "INTERNAL_ERROR" INVALID_ARGUMENT = "INVALID_ARGUMENT" + INVALID_AWS_PARAMETER = "INVALID_AWS_PARAMETER" + INVALID_INSTANCE_PLACEMENT_PROTOCOL = "INVALID_INSTANCE_PLACEMENT_PROTOCOL" INVALID_SPARK_IMAGE = "INVALID_SPARK_IMAGE" + INVALID_WORKER_IMAGE_FAILURE = "INVALID_WORKER_IMAGE_FAILURE" + IN_PENALTY_BOX = "IN_PENALTY_BOX" IP_EXHAUSTION_FAILURE = "IP_EXHAUSTION_FAILURE" JOB_FINISHED = "JOB_FINISHED" K8S_AUTOSCALING_FAILURE = "K8S_AUTOSCALING_FAILURE" K8S_DBR_CLUSTER_LAUNCH_TIMEOUT = "K8S_DBR_CLUSTER_LAUNCH_TIMEOUT" + LAZY_ALLOCATION_TIMEOUT = "LAZY_ALLOCATION_TIMEOUT" + MAINTENANCE_MODE = "MAINTENANCE_MODE" METASTORE_COMPONENT_UNHEALTHY = "METASTORE_COMPONENT_UNHEALTHY" NEPHOS_RESOURCE_MANAGEMENT = "NEPHOS_RESOURCE_MANAGEMENT" + NETVISOR_SETUP_TIMEOUT = "NETVISOR_SETUP_TIMEOUT" NETWORK_CONFIGURATION_FAILURE = "NETWORK_CONFIGURATION_FAILURE" NFS_MOUNT_FAILURE = "NFS_MOUNT_FAILURE" + NO_MATCHED_K8S = "NO_MATCHED_K8S" + NO_MATCHED_K8S_TESTING_TAG = "NO_MATCHED_K8S_TESTING_TAG" NPIP_TUNNEL_SETUP_FAILURE = "NPIP_TUNNEL_SETUP_FAILURE" NPIP_TUNNEL_TOKEN_FAILURE = "NPIP_TUNNEL_TOKEN_FAILURE" + POD_ASSIGNMENT_FAILURE = "POD_ASSIGNMENT_FAILURE" + POD_SCHEDULING_FAILURE = "POD_SCHEDULING_FAILURE" REQUEST_REJECTED = "REQUEST_REJECTED" REQUEST_THROTTLED = "REQUEST_THROTTLED" + RESOURCE_USAGE_BLOCKED = "RESOURCE_USAGE_BLOCKED" + SECRET_CREATION_FAILURE = "SECRET_CREATION_FAILURE" SECRET_RESOLUTION_ERROR = "SECRET_RESOLUTION_ERROR" SECURITY_DAEMON_REGISTRATION_EXCEPTION = "SECURITY_DAEMON_REGISTRATION_EXCEPTION" SELF_BOOTSTRAP_FAILURE = "SELF_BOOTSTRAP_FAILURE" + SERVERLESS_LONG_RUNNING_TERMINATED = "SERVERLESS_LONG_RUNNING_TERMINATED" SKIPPED_SLOW_NODES = "SKIPPED_SLOW_NODES" SLOW_IMAGE_DOWNLOAD = "SLOW_IMAGE_DOWNLOAD" SPARK_ERROR = "SPARK_ERROR" SPARK_IMAGE_DOWNLOAD_FAILURE = "SPARK_IMAGE_DOWNLOAD_FAILURE" + SPARK_IMAGE_DOWNLOAD_THROTTLED = "SPARK_IMAGE_DOWNLOAD_THROTTLED" + SPARK_IMAGE_NOT_FOUND = "SPARK_IMAGE_NOT_FOUND" SPARK_STARTUP_FAILURE = "SPARK_STARTUP_FAILURE" SPOT_INSTANCE_TERMINATION = "SPOT_INSTANCE_TERMINATION" + SSH_BOOTSTRAP_FAILURE = "SSH_BOOTSTRAP_FAILURE" STORAGE_DOWNLOAD_FAILURE = "STORAGE_DOWNLOAD_FAILURE" + STORAGE_DOWNLOAD_FAILURE_DUE_TO_MISCONFIG = "STORAGE_DOWNLOAD_FAILURE_DUE_TO_MISCONFIG" + STORAGE_DOWNLOAD_FAILURE_SLOW = "STORAGE_DOWNLOAD_FAILURE_SLOW" + STORAGE_DOWNLOAD_FAILURE_THROTTLED = "STORAGE_DOWNLOAD_FAILURE_THROTTLED" STS_CLIENT_SETUP_FAILURE = "STS_CLIENT_SETUP_FAILURE" SUBNET_EXHAUSTED_FAILURE = "SUBNET_EXHAUSTED_FAILURE" TEMPORARILY_UNAVAILABLE = "TEMPORARILY_UNAVAILABLE" TRIAL_EXPIRED = "TRIAL_EXPIRED" UNEXPECTED_LAUNCH_FAILURE = "UNEXPECTED_LAUNCH_FAILURE" + UNEXPECTED_POD_RECREATION = "UNEXPECTED_POD_RECREATION" UNKNOWN = "UNKNOWN" UNSUPPORTED_INSTANCE_TYPE = "UNSUPPORTED_INSTANCE_TYPE" UPDATE_INSTANCE_PROFILE_FAILURE = "UPDATE_INSTANCE_PROFILE_FAILURE" + USER_INITIATED_VM_TERMINATION = "USER_INITIATED_VM_TERMINATION" USER_REQUEST = "USER_REQUEST" WORKER_SETUP_FAILURE = "WORKER_SETUP_FAILURE" WORKSPACE_CANCELLED_ERROR = "WORKSPACE_CANCELLED_ERROR" WORKSPACE_CONFIGURATION_ERROR = "WORKSPACE_CONFIGURATION_ERROR" + WORKSPACE_UPDATE = "WORKSPACE_UPDATE" class TerminationReasonType(Enum): @@ -8980,7 +9155,6 @@ def from_dict(cls, d: Dict[str, Any]) -> UninstallLibrariesResponse: @dataclass class UnpinCluster: cluster_id: str - """<needs content added>""" def as_dict(self) -> dict: """Serializes the UnpinCluster into a dictionary suitable for use as a JSON request body.""" @@ -9026,10 +9200,18 @@ class UpdateCluster: """ID of the cluster.""" update_mask: str - """Specifies which fields of the cluster will be updated. This is required in the POST request. The - update mask should be supplied as a single string. To specify multiple fields, separate them - with commas (no spaces). To delete a field from a cluster configuration, add it to the - `update_mask` string but omit it from the `cluster` object.""" + """Used to specify which cluster attributes and size fields to update. See + https://google.aip.dev/161 for more details. + + The field mask must be a single string, with multiple fields separated by commas (no spaces). + The field path is relative to the resource object, using a dot (`.`) to navigate sub-fields + (e.g., `author.given_name`). Specification of elements in sequence or map fields is not allowed, + as only the entire collection field can be specified. Field names must exactly match the + resource field names. + + A field mask of `*` indicates full replacement. It’s recommended to always explicitly list the + fields being updated and avoid using `*` wildcards, as it can lead to unintended results if the + API changes in the future.""" cluster: Optional[UpdateClusterResource] = None """The cluster to be updated.""" @@ -9133,6 +9315,7 @@ class UpdateClusterResource: doesn’t have UC nor passthrough enabled.""" docker_image: Optional[DockerImage] = None + """Custom docker image BYOC""" driver_instance_pool_id: Optional[str] = None """The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster @@ -9140,7 +9323,11 @@ class UpdateClusterResource: driver_node_type_id: Optional[str] = None """The node type of the Spark driver. Note that this field is optional; if unset, the driver node - type will be set as the same value as `node_type_id` defined above.""" + type will be set as the same value as `node_type_id` defined above. + + This field, along with node_type_id, should not be set if virtual_cluster_size is set. If both + driver_node_type_id, node_type_id, and virtual_cluster_size are specified, driver_node_type_id + and node_type_id take precedence.""" enable_elastic_disk: Optional[bool] = None """Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk @@ -9248,6 +9435,7 @@ class UpdateClusterResource: `use_ml_runtime`, and whether `node_type_id` is gpu node or not.""" workload_type: Optional[WorkloadType] = None + """Cluster Attributes showing for clusters workload types.""" def as_dict(self) -> dict: """Serializes the UpdateClusterResource into a dictionary suitable for use as a JSON request body.""" @@ -9449,8 +9637,11 @@ def from_dict(cls, d: Dict[str, Any]) -> UpdateResponse: @dataclass class VolumesStorageInfo: + """A storage location back by UC Volumes.""" + destination: str - """Unity Catalog volumes file destination, e.g. `/Volumes/catalog/schema/volume/dir/file`""" + """UC Volumes destination, e.g. `/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh` or + `dbfs:/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh`""" def as_dict(self) -> dict: """Serializes the VolumesStorageInfo into a dictionary suitable for use as a JSON request body.""" @@ -9474,6 +9665,8 @@ def from_dict(cls, d: Dict[str, Any]) -> VolumesStorageInfo: @dataclass class WorkloadType: + """Cluster Attributes showing for clusters workload types.""" + clients: ClientsTypes """defined what type of clients can use the cluster. E.g. Notebooks, Jobs""" @@ -9499,8 +9692,10 @@ def from_dict(cls, d: Dict[str, Any]) -> WorkloadType: @dataclass class WorkspaceStorageInfo: + """A storage location in Workspace Filesystem (WSFS)""" + destination: str - """workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh`""" + """wsfs destination, e.g. `workspace:/cluster-init-scripts/setup-datadog.sh`""" def as_dict(self) -> dict: """Serializes the WorkspaceStorageInfo into a dictionary suitable for use as a JSON request body.""" @@ -9954,7 +10149,6 @@ def change_owner(self, cluster_id: str, owner_username: str): `owner_username`. :param cluster_id: str - <needs content added> :param owner_username: str New owner of the cluster_id after this RPC. @@ -10010,8 +10204,11 @@ def create( """Create new cluster. Creates a new Spark cluster. This method will acquire new instances from the cloud provider if - necessary. Note: Databricks may not be able to acquire some of the requested nodes, due to cloud - provider limitations (account limits, spot price, etc.) or transient network issues. + necessary. This method is asynchronous; the returned ``cluster_id`` can be used to poll the cluster + status. When this method returns, the cluster will be in a ``PENDING`` state. The cluster will be + usable once it enters a ``RUNNING`` state. Note: Databricks may not be able to acquire some of the + requested nodes, due to cloud provider limitations (account limits, spot price, etc.) or transient + network issues. If Databricks acquires at least 85% of the requested on-demand nodes, cluster creation will succeed. Otherwise the cluster will terminate with an informative error message. @@ -10084,12 +10281,17 @@ def create( standard clusters. * `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled. :param docker_image: :class:`DockerImage` (optional) + Custom docker image BYOC :param driver_instance_pool_id: str (optional) The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not assigned. :param driver_node_type_id: str (optional) The node type of the Spark driver. Note that this field is optional; if unset, the driver node type will be set as the same value as `node_type_id` defined above. + + This field, along with node_type_id, should not be set if virtual_cluster_size is set. If both + driver_node_type_id, node_type_id, and virtual_cluster_size are specified, driver_node_type_id and + node_type_id take precedence. :param enable_elastic_disk: bool (optional) Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk space when its Spark workers are running low on disk space. This feature requires specific AWS permissions @@ -10176,6 +10378,7 @@ def create( `effective_spark_version` is determined by `spark_version` (DBR release), this field `use_ml_runtime`, and whether `node_type_id` is gpu node or not. :param workload_type: :class:`WorkloadType` (optional) + Cluster Attributes showing for clusters workload types. :returns: Long-running operation waiter for :class:`ClusterDetails`. @@ -10470,12 +10673,17 @@ def edit( standard clusters. * `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled. :param docker_image: :class:`DockerImage` (optional) + Custom docker image BYOC :param driver_instance_pool_id: str (optional) The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not assigned. :param driver_node_type_id: str (optional) The node type of the Spark driver. Note that this field is optional; if unset, the driver node type will be set as the same value as `node_type_id` defined above. + + This field, along with node_type_id, should not be set if virtual_cluster_size is set. If both + driver_node_type_id, node_type_id, and virtual_cluster_size are specified, driver_node_type_id and + node_type_id take precedence. :param enable_elastic_disk: bool (optional) Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk space when its Spark workers are running low on disk space. This feature requires specific AWS permissions @@ -10562,6 +10770,7 @@ def edit( `effective_spark_version` is determined by `spark_version` (DBR release), this field `use_ml_runtime`, and whether `node_type_id` is gpu node or not. :param workload_type: :class:`WorkloadType` (optional) + Cluster Attributes showing for clusters workload types. :returns: Long-running operation waiter for :class:`ClusterDetails`. @@ -10724,8 +10933,7 @@ def events( """List cluster activity events. Retrieves a list of events about the activity of a cluster. This API is paginated. If there are more - events to read, the response includes all the nparameters necessary to request the next page of - events. + events to read, the response includes all the parameters necessary to request the next page of events. :param cluster_id: str The ID of the cluster to retrieve events about. @@ -10944,7 +11152,6 @@ def pin(self, cluster_id: str): cluster that is already pinned will have no effect. This API can only be called by workspace admins. :param cluster_id: str - <needs content added> """ @@ -11021,7 +11228,6 @@ def restart(self, cluster_id: str, *, restart_user: Optional[str] = None) -> Wai :param cluster_id: str The cluster to be started. :param restart_user: str (optional) - <needs content added> :returns: Long-running operation waiter for :class:`ClusterDetails`. @@ -11091,11 +11297,10 @@ def start(self, cluster_id: str) -> Wait[ClusterDetails]: """Start terminated cluster. Starts a terminated Spark cluster with the supplied ID. This works similar to `createCluster` except: - - * The previous cluster id and attributes are preserved. * The cluster starts with the last specified - cluster size. * If the previous cluster was an autoscaling cluster, the current cluster starts with - the minimum number of nodes. * If the cluster is not currently in a `TERMINATED` state, nothing will - happen. * Clusters launched to run a job cannot be started. + - The previous cluster id and attributes are preserved. - The cluster starts with the last specified + cluster size. - If the previous cluster was an autoscaling cluster, the current cluster starts with + the minimum number of nodes. - If the cluster is not currently in a ``TERMINATED`` state, nothing will + happen. - Clusters launched to run a job cannot be started. :param cluster_id: str The cluster to be started. @@ -11128,7 +11333,6 @@ def unpin(self, cluster_id: str): admins. :param cluster_id: str - <needs content added> """ @@ -11159,10 +11363,18 @@ def update( :param cluster_id: str ID of the cluster. :param update_mask: str - Specifies which fields of the cluster will be updated. This is required in the POST request. The - update mask should be supplied as a single string. To specify multiple fields, separate them with - commas (no spaces). To delete a field from a cluster configuration, add it to the `update_mask` - string but omit it from the `cluster` object. + Used to specify which cluster attributes and size fields to update. See https://google.aip.dev/161 + for more details. + + The field mask must be a single string, with multiple fields separated by commas (no spaces). The + field path is relative to the resource object, using a dot (`.`) to navigate sub-fields (e.g., + `author.given_name`). Specification of elements in sequence or map fields is not allowed, as only + the entire collection field can be specified. Field names must exactly match the resource field + names. + + A field mask of `*` indicates full replacement. It’s recommended to always explicitly list the + fields being updated and avoid using `*` wildcards, as it can lead to unintended results if the API + changes in the future. :param cluster: :class:`UpdateClusterResource` (optional) The cluster to be updated. diff --git a/databricks/sdk/service/dashboards.py b/databricks/sdk/service/dashboards.py index b1e915640..a85c0269b 100755 --- a/databricks/sdk/service/dashboards.py +++ b/databricks/sdk/service/dashboards.py @@ -594,12 +594,15 @@ class GenieMessage: `ASKING_AI`: Waiting for the LLM to respond to the user's question. * `PENDING_WAREHOUSE`: Waiting for warehouse before the SQL query can start executing. * `EXECUTING_QUERY`: Executing a generated SQL query. Get the SQL query result by calling - [getMessageQueryResult](:method:genie/getMessageQueryResult) API. * `FAILED`: The response - generation or query execution failed. See `error` field. * `COMPLETED`: Message processing is - completed. Results are in the `attachments` field. Get the SQL query result by calling - [getMessageQueryResult](:method:genie/getMessageQueryResult) API. * `SUBMITTED`: Message has - been submitted. * `QUERY_RESULT_EXPIRED`: SQL result is not available anymore. The user needs to - rerun the query. * `CANCELLED`: Message has been cancelled.""" + [getMessageAttachmentQueryResult](:method:genie/getMessageAttachmentQueryResult) API. * + `FAILED`: The response generation or query execution failed. See `error` field. * `COMPLETED`: + Message processing is completed. Results are in the `attachments` field. Get the SQL query + result by calling + [getMessageAttachmentQueryResult](:method:genie/getMessageAttachmentQueryResult) API. * + `SUBMITTED`: Message has been submitted. * `QUERY_RESULT_EXPIRED`: SQL result is not available + anymore. The user needs to rerun the query. Rerun the SQL query result by calling + [executeMessageAttachmentQuery](:method:genie/executeMessageAttachmentQuery) API. * `CANCELLED`: + Message has been cancelled.""" user_id: Optional[int] = None """ID of the user who created the message""" @@ -697,6 +700,10 @@ class GenieQueryAttachment: query_result_metadata: Optional[GenieResultMetadata] = None """Metadata associated with the query result.""" + statement_id: Optional[str] = None + """Statement Execution API statement id. Use [Get status, manifest, and result first + chunk](:method:statementexecution/getstatement) to get the full result data.""" + title: Optional[str] = None """Name of the query""" @@ -713,6 +720,8 @@ def as_dict(self) -> dict: body["query"] = self.query if self.query_result_metadata: body["query_result_metadata"] = self.query_result_metadata.as_dict() + if self.statement_id is not None: + body["statement_id"] = self.statement_id if self.title is not None: body["title"] = self.title return body @@ -730,6 +739,8 @@ def as_shallow_dict(self) -> dict: body["query"] = self.query if self.query_result_metadata: body["query_result_metadata"] = self.query_result_metadata + if self.statement_id is not None: + body["statement_id"] = self.statement_id if self.title is not None: body["title"] = self.title return body @@ -743,6 +754,7 @@ def from_dict(cls, d: Dict[str, Any]) -> GenieQueryAttachment: last_updated_timestamp=d.get("last_updated_timestamp", None), query=d.get("query", None), query_result_metadata=_from_dict(d, "query_result_metadata", GenieResultMetadata), + statement_id=d.get("statement_id", None), title=d.get("title", None), ) @@ -1062,6 +1074,7 @@ class MessageErrorType(Enum): CHAT_COMPLETION_NETWORK_EXCEPTION = "CHAT_COMPLETION_NETWORK_EXCEPTION" CONTENT_FILTER_EXCEPTION = "CONTENT_FILTER_EXCEPTION" CONTEXT_EXCEEDED_EXCEPTION = "CONTEXT_EXCEEDED_EXCEPTION" + COULD_NOT_GET_MODEL_DEPLOYMENTS_EXCEPTION = "COULD_NOT_GET_MODEL_DEPLOYMENTS_EXCEPTION" COULD_NOT_GET_UC_SCHEMA_EXCEPTION = "COULD_NOT_GET_UC_SCHEMA_EXCEPTION" DEPLOYMENT_NOT_FOUND_EXCEPTION = "DEPLOYMENT_NOT_FOUND_EXCEPTION" FUNCTIONS_NOT_AVAILABLE_EXCEPTION = "FUNCTIONS_NOT_AVAILABLE_EXCEPTION" @@ -1069,6 +1082,7 @@ class MessageErrorType(Enum): FUNCTION_ARGUMENTS_INVALID_JSON_EXCEPTION = "FUNCTION_ARGUMENTS_INVALID_JSON_EXCEPTION" FUNCTION_ARGUMENTS_INVALID_TYPE_EXCEPTION = "FUNCTION_ARGUMENTS_INVALID_TYPE_EXCEPTION" FUNCTION_CALL_MISSING_PARAMETER_EXCEPTION = "FUNCTION_CALL_MISSING_PARAMETER_EXCEPTION" + GENERATED_SQL_QUERY_TOO_LONG_EXCEPTION = "GENERATED_SQL_QUERY_TOO_LONG_EXCEPTION" GENERIC_CHAT_COMPLETION_EXCEPTION = "GENERIC_CHAT_COMPLETION_EXCEPTION" GENERIC_CHAT_COMPLETION_SERVICE_EXCEPTION = "GENERIC_CHAT_COMPLETION_SERVICE_EXCEPTION" GENERIC_SQL_EXEC_API_CALL_EXCEPTION = "GENERIC_SQL_EXEC_API_CALL_EXCEPTION" @@ -1083,6 +1097,7 @@ class MessageErrorType(Enum): MESSAGE_CANCELLED_WHILE_EXECUTING_EXCEPTION = "MESSAGE_CANCELLED_WHILE_EXECUTING_EXCEPTION" MESSAGE_DELETED_WHILE_EXECUTING_EXCEPTION = "MESSAGE_DELETED_WHILE_EXECUTING_EXCEPTION" MESSAGE_UPDATED_WHILE_EXECUTING_EXCEPTION = "MESSAGE_UPDATED_WHILE_EXECUTING_EXCEPTION" + MISSING_SQL_QUERY_EXCEPTION = "MISSING_SQL_QUERY_EXCEPTION" NO_DEPLOYMENTS_AVAILABLE_TO_WORKSPACE = "NO_DEPLOYMENTS_AVAILABLE_TO_WORKSPACE" NO_QUERY_TO_VISUALIZE_EXCEPTION = "NO_QUERY_TO_VISUALIZE_EXCEPTION" NO_TABLES_TO_QUERY_EXCEPTION = "NO_TABLES_TO_QUERY_EXCEPTION" @@ -1107,12 +1122,15 @@ class MessageStatus(Enum): `ASKING_AI`: Waiting for the LLM to respond to the user's question. * `PENDING_WAREHOUSE`: Waiting for warehouse before the SQL query can start executing. * `EXECUTING_QUERY`: Executing a generated SQL query. Get the SQL query result by calling - [getMessageQueryResult](:method:genie/getMessageQueryResult) API. * `FAILED`: The response - generation or query execution failed. See `error` field. * `COMPLETED`: Message processing is - completed. Results are in the `attachments` field. Get the SQL query result by calling - [getMessageQueryResult](:method:genie/getMessageQueryResult) API. * `SUBMITTED`: Message has - been submitted. * `QUERY_RESULT_EXPIRED`: SQL result is not available anymore. The user needs to - rerun the query. * `CANCELLED`: Message has been cancelled.""" + [getMessageAttachmentQueryResult](:method:genie/getMessageAttachmentQueryResult) API. * + `FAILED`: The response generation or query execution failed. See `error` field. * `COMPLETED`: + Message processing is completed. Results are in the `attachments` field. Get the SQL query + result by calling + [getMessageAttachmentQueryResult](:method:genie/getMessageAttachmentQueryResult) API. * + `SUBMITTED`: Message has been submitted. * `QUERY_RESULT_EXPIRED`: SQL result is not available + anymore. The user needs to rerun the query. Rerun the SQL query result by calling + [executeMessageAttachmentQuery](:method:genie/executeMessageAttachmentQuery) API. * `CANCELLED`: + Message has been cancelled.""" ASKING_AI = "ASKING_AI" CANCELLED = "CANCELLED" @@ -1917,7 +1935,8 @@ def execute_message_attachment_query( ) -> GenieGetMessageQueryResultResponse: """Execute message attachment SQL query. - Execute the SQL for a message query attachment. + Execute the SQL for a message query attachment. Use this API when the query attachment has expired and + needs to be re-executed. :param space_id: str Genie space ID @@ -1945,7 +1964,7 @@ def execute_message_attachment_query( def execute_message_query( self, space_id: str, conversation_id: str, message_id: str ) -> GenieGetMessageQueryResultResponse: - """Execute SQL query in a conversation message. + """[Deprecated] Execute SQL query in a conversation message. Execute the SQL query in the message. @@ -2059,7 +2078,7 @@ def get_message_query_result( def get_message_query_result_by_attachment( self, space_id: str, conversation_id: str, message_id: str, attachment_id: str ) -> GenieGetMessageQueryResultResponse: - """[deprecated] Get conversation message SQL query result. + """[Deprecated] Get conversation message SQL query result. Get the result of SQL query if the message has a query attachment. This is only available if a message has a query attachment and the message status is `EXECUTING_QUERY` OR `COMPLETED`. @@ -2088,9 +2107,9 @@ def get_message_query_result_by_attachment( return GenieGetMessageQueryResultResponse.from_dict(res) def get_space(self, space_id: str) -> GenieSpace: - """Get details of a Genie Space. + """Get Genie Space. - Get a Genie Space. + Get details of a Genie Space. :param space_id: str The ID associated with the Genie space diff --git a/databricks/sdk/service/files.py b/databricks/sdk/service/files.py index 394aa8697..8d60b842f 100755 --- a/databricks/sdk/service/files.py +++ b/databricks/sdk/service/files.py @@ -314,12 +314,14 @@ def from_dict(cls, d: Dict[str, Any]) -> DirectoryEntry: @dataclass class DownloadResponse: content_length: Optional[int] = None + """The length of the HTTP response body in bytes.""" content_type: Optional[str] = None contents: Optional[BinaryIO] = None last_modified: Optional[str] = None + """The last modified time of the file in HTTP-date (RFC 7231) format.""" def as_dict(self) -> dict: """Serializes the DownloadResponse into a dictionary suitable for use as a JSON request body.""" @@ -430,10 +432,12 @@ def from_dict(cls, d: Dict[str, Any]) -> GetDirectoryMetadataResponse: @dataclass class GetMetadataResponse: content_length: Optional[int] = None + """The length of the HTTP response body in bytes.""" content_type: Optional[str] = None last_modified: Optional[str] = None + """The last modified time of the file in HTTP-date (RFC 7231) format.""" def as_dict(self) -> dict: """Serializes the GetMetadataResponse into a dictionary suitable for use as a JSON request body.""" diff --git a/databricks/sdk/service/iam.py b/databricks/sdk/service/iam.py index 1dd81aaed..d5fe5645e 100755 --- a/databricks/sdk/service/iam.py +++ b/databricks/sdk/service/iam.py @@ -846,7 +846,7 @@ def from_dict(cls, d: Dict[str, Any]) -> ObjectPermissions: @dataclass class PartialUpdate: id: Optional[str] = None - """Unique ID for a user in the Databricks workspace.""" + """Unique ID in the Databricks workspace.""" operations: Optional[List[Patch]] = None @@ -1918,8 +1918,7 @@ class User: groups: Optional[List[ComplexValue]] = None id: Optional[str] = None - """Databricks user ID. This is automatically set by Databricks. Any value provided by the client - will be ignored.""" + """Databricks user ID.""" name: Optional[Name] = None @@ -2480,7 +2479,7 @@ def patch(self, id: str, *, operations: Optional[List[Patch]] = None, schemas: O Partially updates the details of a group. :param id: str - Unique ID for a group in the Databricks account. + Unique ID in the Databricks workspace. :param operations: List[:class:`Patch`] (optional) :param schemas: List[:class:`PatchSchema`] (optional) The schema of the patch request. Must be ["urn:ietf:params:scim:api:messages:2.0:PatchOp"]. @@ -2493,7 +2492,6 @@ def patch(self, id: str, *, operations: Optional[List[Patch]] = None, schemas: O if schemas is not None: body["schemas"] = [v.value for v in schemas] headers = { - "Accept": "application/json", "Content-Type": "application/json", } @@ -2557,7 +2555,6 @@ def update( if schemas is not None: body["schemas"] = [v.value for v in schemas] headers = { - "Accept": "application/json", "Content-Type": "application/json", } @@ -2765,7 +2762,7 @@ def patch(self, id: str, *, operations: Optional[List[Patch]] = None, schemas: O Partially updates the details of a single service principal in the Databricks account. :param id: str - Unique ID for a service principal in the Databricks account. + Unique ID in the Databricks workspace. :param operations: List[:class:`Patch`] (optional) :param schemas: List[:class:`PatchSchema`] (optional) The schema of the patch request. Must be ["urn:ietf:params:scim:api:messages:2.0:PatchOp"]. @@ -2778,7 +2775,6 @@ def patch(self, id: str, *, operations: Optional[List[Patch]] = None, schemas: O if schemas is not None: body["schemas"] = [v.value for v in schemas] headers = { - "Accept": "application/json", "Content-Type": "application/json", } @@ -2848,7 +2844,6 @@ def update( if schemas is not None: body["schemas"] = [v.value for v in schemas] headers = { - "Accept": "application/json", "Content-Type": "application/json", } @@ -2912,8 +2907,7 @@ def create( External ID is not currently supported. It is reserved for future use. :param groups: List[:class:`ComplexValue`] (optional) :param id: str (optional) - Databricks user ID. This is automatically set by Databricks. Any value provided by the client will - be ignored. + Databricks user ID. :param name: :class:`Name` (optional) :param roles: List[:class:`ComplexValue`] (optional) Corresponds to AWS instance profile/arn role. @@ -3123,7 +3117,7 @@ def patch(self, id: str, *, operations: Optional[List[Patch]] = None, schemas: O Partially updates a user resource by applying the supplied operations on specific user attributes. :param id: str - Unique ID for a user in the Databricks account. + Unique ID in the Databricks workspace. :param operations: List[:class:`Patch`] (optional) :param schemas: List[:class:`PatchSchema`] (optional) The schema of the patch request. Must be ["urn:ietf:params:scim:api:messages:2.0:PatchOp"]. @@ -3136,7 +3130,6 @@ def patch(self, id: str, *, operations: Optional[List[Patch]] = None, schemas: O if schemas is not None: body["schemas"] = [v.value for v in schemas] headers = { - "Accept": "application/json", "Content-Type": "application/json", } @@ -3164,8 +3157,7 @@ def update( Replaces a user's information with the data supplied in request. :param id: str - Databricks user ID. This is automatically set by Databricks. Any value provided by the client will - be ignored. + Databricks user ID. :param active: bool (optional) If this user is active :param display_name: str (optional) @@ -3215,7 +3207,6 @@ def update( if user_name is not None: body["userName"] = user_name headers = { - "Accept": "application/json", "Content-Type": "application/json", } @@ -3434,7 +3425,7 @@ def patch(self, id: str, *, operations: Optional[List[Patch]] = None, schemas: O Partially updates the details of a group. :param id: str - Unique ID for a group in the Databricks workspace. + Unique ID in the Databricks workspace. :param operations: List[:class:`Patch`] (optional) :param schemas: List[:class:`PatchSchema`] (optional) The schema of the patch request. Must be ["urn:ietf:params:scim:api:messages:2.0:PatchOp"]. @@ -3447,7 +3438,6 @@ def patch(self, id: str, *, operations: Optional[List[Patch]] = None, schemas: O if schemas is not None: body["schemas"] = [v.value for v in schemas] headers = { - "Accept": "application/json", "Content-Type": "application/json", } @@ -3509,7 +3499,6 @@ def update( if schemas is not None: body["schemas"] = [v.value for v in schemas] headers = { - "Accept": "application/json", "Content-Type": "application/json", } @@ -3922,7 +3911,7 @@ def patch(self, id: str, *, operations: Optional[List[Patch]] = None, schemas: O Partially updates the details of a single service principal in the Databricks workspace. :param id: str - Unique ID for a service principal in the Databricks workspace. + Unique ID in the Databricks workspace. :param operations: List[:class:`Patch`] (optional) :param schemas: List[:class:`PatchSchema`] (optional) The schema of the patch request. Must be ["urn:ietf:params:scim:api:messages:2.0:PatchOp"]. @@ -3935,7 +3924,6 @@ def patch(self, id: str, *, operations: Optional[List[Patch]] = None, schemas: O if schemas is not None: body["schemas"] = [v.value for v in schemas] headers = { - "Accept": "application/json", "Content-Type": "application/json", } @@ -4000,7 +3988,6 @@ def update( if schemas is not None: body["schemas"] = [v.value for v in schemas] headers = { - "Accept": "application/json", "Content-Type": "application/json", } @@ -4059,8 +4046,7 @@ def create( External ID is not currently supported. It is reserved for future use. :param groups: List[:class:`ComplexValue`] (optional) :param id: str (optional) - Databricks user ID. This is automatically set by Databricks. Any value provided by the client will - be ignored. + Databricks user ID. :param name: :class:`Name` (optional) :param roles: List[:class:`ComplexValue`] (optional) Corresponds to AWS instance profile/arn role. @@ -4294,7 +4280,7 @@ def patch(self, id: str, *, operations: Optional[List[Patch]] = None, schemas: O Partially updates a user resource by applying the supplied operations on specific user attributes. :param id: str - Unique ID for a user in the Databricks workspace. + Unique ID in the Databricks workspace. :param operations: List[:class:`Patch`] (optional) :param schemas: List[:class:`PatchSchema`] (optional) The schema of the patch request. Must be ["urn:ietf:params:scim:api:messages:2.0:PatchOp"]. @@ -4307,7 +4293,6 @@ def patch(self, id: str, *, operations: Optional[List[Patch]] = None, schemas: O if schemas is not None: body["schemas"] = [v.value for v in schemas] headers = { - "Accept": "application/json", "Content-Type": "application/json", } @@ -4356,8 +4341,7 @@ def update( Replaces a user's information with the data supplied in request. :param id: str - Databricks user ID. This is automatically set by Databricks. Any value provided by the client will - be ignored. + Databricks user ID. :param active: bool (optional) If this user is active :param display_name: str (optional) @@ -4407,7 +4391,6 @@ def update( if user_name is not None: body["userName"] = user_name headers = { - "Accept": "application/json", "Content-Type": "application/json", } diff --git a/databricks/sdk/service/jobs.py b/databricks/sdk/service/jobs.py index 6a19b8980..5be08ce72 100755 --- a/databricks/sdk/service/jobs.py +++ b/databricks/sdk/service/jobs.py @@ -3659,6 +3659,7 @@ class PerformanceTarget(Enum): on serverless compute should be. The performance mode on the job or pipeline should map to a performance setting that is passed to Cluster Manager (see cluster-common PerformanceTarget).""" + BALANCED = "BALANCED" COST_OPTIMIZED = "COST_OPTIMIZED" PERFORMANCE_OPTIMIZED = "PERFORMANCE_OPTIMIZED" diff --git a/databricks/sdk/service/marketplace.py b/databricks/sdk/service/marketplace.py index 1851bf1d6..41992fd69 100755 --- a/databricks/sdk/service/marketplace.py +++ b/databricks/sdk/service/marketplace.py @@ -1192,6 +1192,7 @@ def from_dict(cls, d: Dict[str, Any]) -> FileParent: class FileParentType(Enum): LISTING = "LISTING" + LISTING_RESOURCE = "LISTING_RESOURCE" PROVIDER = "PROVIDER" @@ -2452,6 +2453,7 @@ class ListingType(Enum): class MarketplaceFileType(Enum): + APP = "APP" EMBEDDED_NOTEBOOK = "EMBEDDED_NOTEBOOK" PROVIDER_ICON = "PROVIDER_ICON" diff --git a/databricks/sdk/service/ml.py b/databricks/sdk/service/ml.py index 61a7b1bc7..88a807ee4 100755 --- a/databricks/sdk/service/ml.py +++ b/databricks/sdk/service/ml.py @@ -3,11 +3,15 @@ from __future__ import annotations import logging +import random +import time from dataclasses import dataclass +from datetime import timedelta from enum import Enum -from typing import Any, Dict, Iterator, List, Optional +from typing import Any, Callable, Dict, Iterator, List, Optional -from ._internal import _enum, _from_dict, _repeated_dict, _repeated_enum +from ..errors import OperationFailed +from ._internal import Wait, _enum, _from_dict, _repeated_dict, _repeated_enum _LOG = logging.getLogger("databricks.sdk") @@ -482,6 +486,184 @@ def from_dict(cls, d: Dict[str, Any]) -> CreateExperimentResponse: return cls(experiment_id=d.get("experiment_id", None)) +@dataclass +class CreateForecastingExperimentRequest: + train_data_path: str + """The three-level (fully qualified) name of a unity catalog table. This table serves as the + training data for the forecasting model.""" + + target_column: str + """Name of the column in the input training table that serves as the prediction target. The values + in this column will be used as the ground truth for model training.""" + + time_column: str + """Name of the column in the input training table that represents the timestamp of each row.""" + + forecast_granularity: str + """The granularity of the forecast. This defines the time interval between consecutive rows in the + time series data. Possible values: '1 second', '1 minute', '5 minutes', '10 minutes', '15 + minutes', '30 minutes', 'Hourly', 'Daily', 'Weekly', 'Monthly', 'Quarterly', 'Yearly'.""" + + forecast_horizon: int + """The number of time steps into the future for which predictions should be made. This value + represents a multiple of forecast_granularity determining how far ahead the model will forecast.""" + + custom_weights_column: Optional[str] = None + """Name of the column in the input training table used to customize the weight for each time series + to calculate weighted metrics.""" + + experiment_path: Optional[str] = None + """The path to the created experiment. This is the path where the experiment will be stored in the + workspace.""" + + holiday_regions: Optional[List[str]] = None + """Region code(s) to consider when automatically adding holiday features. When empty, no holiday + features are added. Only supports 1 holiday region for now.""" + + max_runtime: Optional[int] = None + """The maximum duration in minutes for which the experiment is allowed to run. If the experiment + exceeds this time limit it will be stopped automatically.""" + + prediction_data_path: Optional[str] = None + """The three-level (fully qualified) path to a unity catalog table. This table path serves to store + the predictions.""" + + primary_metric: Optional[str] = None + """The evaluation metric used to optimize the forecasting model.""" + + register_to: Optional[str] = None + """The three-level (fully qualified) path to a unity catalog model. This model path serves to store + the best model.""" + + split_column: Optional[str] = None + """Name of the column in the input training table used for custom data splits. The values in this + column must be "train", "validate", or "test" to indicate which split each row belongs to.""" + + timeseries_identifier_columns: Optional[List[str]] = None + """Name of the column in the input training table used to group the dataset to predict individual + time series""" + + training_frameworks: Optional[List[str]] = None + """The list of frameworks to include for model tuning. Possible values: 'Prophet', 'ARIMA', + 'DeepAR'. An empty list will include all supported frameworks.""" + + def as_dict(self) -> dict: + """Serializes the CreateForecastingExperimentRequest into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.custom_weights_column is not None: + body["custom_weights_column"] = self.custom_weights_column + if self.experiment_path is not None: + body["experiment_path"] = self.experiment_path + if self.forecast_granularity is not None: + body["forecast_granularity"] = self.forecast_granularity + if self.forecast_horizon is not None: + body["forecast_horizon"] = self.forecast_horizon + if self.holiday_regions: + body["holiday_regions"] = [v for v in self.holiday_regions] + if self.max_runtime is not None: + body["max_runtime"] = self.max_runtime + if self.prediction_data_path is not None: + body["prediction_data_path"] = self.prediction_data_path + if self.primary_metric is not None: + body["primary_metric"] = self.primary_metric + if self.register_to is not None: + body["register_to"] = self.register_to + if self.split_column is not None: + body["split_column"] = self.split_column + if self.target_column is not None: + body["target_column"] = self.target_column + if self.time_column is not None: + body["time_column"] = self.time_column + if self.timeseries_identifier_columns: + body["timeseries_identifier_columns"] = [v for v in self.timeseries_identifier_columns] + if self.train_data_path is not None: + body["train_data_path"] = self.train_data_path + if self.training_frameworks: + body["training_frameworks"] = [v for v in self.training_frameworks] + return body + + def as_shallow_dict(self) -> dict: + """Serializes the CreateForecastingExperimentRequest into a shallow dictionary of its immediate attributes.""" + body = {} + if self.custom_weights_column is not None: + body["custom_weights_column"] = self.custom_weights_column + if self.experiment_path is not None: + body["experiment_path"] = self.experiment_path + if self.forecast_granularity is not None: + body["forecast_granularity"] = self.forecast_granularity + if self.forecast_horizon is not None: + body["forecast_horizon"] = self.forecast_horizon + if self.holiday_regions: + body["holiday_regions"] = self.holiday_regions + if self.max_runtime is not None: + body["max_runtime"] = self.max_runtime + if self.prediction_data_path is not None: + body["prediction_data_path"] = self.prediction_data_path + if self.primary_metric is not None: + body["primary_metric"] = self.primary_metric + if self.register_to is not None: + body["register_to"] = self.register_to + if self.split_column is not None: + body["split_column"] = self.split_column + if self.target_column is not None: + body["target_column"] = self.target_column + if self.time_column is not None: + body["time_column"] = self.time_column + if self.timeseries_identifier_columns: + body["timeseries_identifier_columns"] = self.timeseries_identifier_columns + if self.train_data_path is not None: + body["train_data_path"] = self.train_data_path + if self.training_frameworks: + body["training_frameworks"] = self.training_frameworks + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> CreateForecastingExperimentRequest: + """Deserializes the CreateForecastingExperimentRequest from a dictionary.""" + return cls( + custom_weights_column=d.get("custom_weights_column", None), + experiment_path=d.get("experiment_path", None), + forecast_granularity=d.get("forecast_granularity", None), + forecast_horizon=d.get("forecast_horizon", None), + holiday_regions=d.get("holiday_regions", None), + max_runtime=d.get("max_runtime", None), + prediction_data_path=d.get("prediction_data_path", None), + primary_metric=d.get("primary_metric", None), + register_to=d.get("register_to", None), + split_column=d.get("split_column", None), + target_column=d.get("target_column", None), + time_column=d.get("time_column", None), + timeseries_identifier_columns=d.get("timeseries_identifier_columns", None), + train_data_path=d.get("train_data_path", None), + training_frameworks=d.get("training_frameworks", None), + ) + + +@dataclass +class CreateForecastingExperimentResponse: + experiment_id: Optional[str] = None + """The unique ID of the created forecasting experiment""" + + def as_dict(self) -> dict: + """Serializes the CreateForecastingExperimentResponse into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.experiment_id is not None: + body["experiment_id"] = self.experiment_id + return body + + def as_shallow_dict(self) -> dict: + """Serializes the CreateForecastingExperimentResponse into a shallow dictionary of its immediate attributes.""" + body = {} + if self.experiment_id is not None: + body["experiment_id"] = self.experiment_id + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> CreateForecastingExperimentResponse: + """Deserializes the CreateForecastingExperimentResponse from a dictionary.""" + return cls(experiment_id=d.get("experiment_id", None)) + + @dataclass class CreateModelRequest: name: str @@ -1800,6 +1982,60 @@ def from_dict(cls, d: Dict[str, Any]) -> FileInfo: return cls(file_size=d.get("file_size", None), is_dir=d.get("is_dir", None), path=d.get("path", None)) +@dataclass +class ForecastingExperiment: + """Represents a forecasting experiment with its unique identifier, URL, and state.""" + + experiment_id: Optional[str] = None + """The unique ID for the forecasting experiment.""" + + experiment_page_url: Optional[str] = None + """The URL to the forecasting experiment page.""" + + state: Optional[ForecastingExperimentState] = None + """The current state of the forecasting experiment.""" + + def as_dict(self) -> dict: + """Serializes the ForecastingExperiment into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.experiment_id is not None: + body["experiment_id"] = self.experiment_id + if self.experiment_page_url is not None: + body["experiment_page_url"] = self.experiment_page_url + if self.state is not None: + body["state"] = self.state.value + return body + + def as_shallow_dict(self) -> dict: + """Serializes the ForecastingExperiment into a shallow dictionary of its immediate attributes.""" + body = {} + if self.experiment_id is not None: + body["experiment_id"] = self.experiment_id + if self.experiment_page_url is not None: + body["experiment_page_url"] = self.experiment_page_url + if self.state is not None: + body["state"] = self.state + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> ForecastingExperiment: + """Deserializes the ForecastingExperiment from a dictionary.""" + return cls( + experiment_id=d.get("experiment_id", None), + experiment_page_url=d.get("experiment_page_url", None), + state=_enum(d, "state", ForecastingExperimentState), + ) + + +class ForecastingExperimentState(Enum): + + CANCELLED = "CANCELLED" + FAILED = "FAILED" + PENDING = "PENDING" + RUNNING = "RUNNING" + SUCCEEDED = "SUCCEEDED" + + @dataclass class GetExperimentByNameResponse: experiment: Optional[Experiment] = None @@ -6705,6 +6941,219 @@ def update_run( return UpdateRunResponse.from_dict(res) +class ForecastingAPI: + """The Forecasting API allows you to create and get serverless forecasting experiments""" + + def __init__(self, api_client): + self._api = api_client + + def wait_get_experiment_forecasting_succeeded( + self, + experiment_id: str, + timeout=timedelta(minutes=120), + callback: Optional[Callable[[ForecastingExperiment], None]] = None, + ) -> ForecastingExperiment: + deadline = time.time() + timeout.total_seconds() + target_states = (ForecastingExperimentState.SUCCEEDED,) + failure_states = ( + ForecastingExperimentState.FAILED, + ForecastingExperimentState.CANCELLED, + ) + status_message = "polling..." + attempt = 1 + while time.time() < deadline: + poll = self.get_experiment(experiment_id=experiment_id) + status = poll.state + status_message = f"current status: {status}" + if status in target_states: + return poll + if callback: + callback(poll) + if status in failure_states: + msg = f"failed to reach SUCCEEDED, got {status}: {status_message}" + raise OperationFailed(msg) + prefix = f"experiment_id={experiment_id}" + sleep = attempt + if sleep > 10: + # sleep 10s max per attempt + sleep = 10 + _LOG.debug(f"{prefix}: ({status}) {status_message} (sleeping ~{sleep}s)") + time.sleep(sleep + random.random()) + attempt += 1 + raise TimeoutError(f"timed out after {timeout}: {status_message}") + + def create_experiment( + self, + train_data_path: str, + target_column: str, + time_column: str, + forecast_granularity: str, + forecast_horizon: int, + *, + custom_weights_column: Optional[str] = None, + experiment_path: Optional[str] = None, + holiday_regions: Optional[List[str]] = None, + max_runtime: Optional[int] = None, + prediction_data_path: Optional[str] = None, + primary_metric: Optional[str] = None, + register_to: Optional[str] = None, + split_column: Optional[str] = None, + timeseries_identifier_columns: Optional[List[str]] = None, + training_frameworks: Optional[List[str]] = None, + ) -> Wait[ForecastingExperiment]: + """Create a forecasting experiment. + + Creates a serverless forecasting experiment. Returns the experiment ID. + + :param train_data_path: str + The three-level (fully qualified) name of a unity catalog table. This table serves as the training + data for the forecasting model. + :param target_column: str + Name of the column in the input training table that serves as the prediction target. The values in + this column will be used as the ground truth for model training. + :param time_column: str + Name of the column in the input training table that represents the timestamp of each row. + :param forecast_granularity: str + The granularity of the forecast. This defines the time interval between consecutive rows in the time + series data. Possible values: '1 second', '1 minute', '5 minutes', '10 minutes', '15 minutes', '30 + minutes', 'Hourly', 'Daily', 'Weekly', 'Monthly', 'Quarterly', 'Yearly'. + :param forecast_horizon: int + The number of time steps into the future for which predictions should be made. This value represents + a multiple of forecast_granularity determining how far ahead the model will forecast. + :param custom_weights_column: str (optional) + Name of the column in the input training table used to customize the weight for each time series to + calculate weighted metrics. + :param experiment_path: str (optional) + The path to the created experiment. This is the path where the experiment will be stored in the + workspace. + :param holiday_regions: List[str] (optional) + Region code(s) to consider when automatically adding holiday features. When empty, no holiday + features are added. Only supports 1 holiday region for now. + :param max_runtime: int (optional) + The maximum duration in minutes for which the experiment is allowed to run. If the experiment + exceeds this time limit it will be stopped automatically. + :param prediction_data_path: str (optional) + The three-level (fully qualified) path to a unity catalog table. This table path serves to store the + predictions. + :param primary_metric: str (optional) + The evaluation metric used to optimize the forecasting model. + :param register_to: str (optional) + The three-level (fully qualified) path to a unity catalog model. This model path serves to store the + best model. + :param split_column: str (optional) + Name of the column in the input training table used for custom data splits. The values in this + column must be "train", "validate", or "test" to indicate which split each row belongs to. + :param timeseries_identifier_columns: List[str] (optional) + Name of the column in the input training table used to group the dataset to predict individual time + series + :param training_frameworks: List[str] (optional) + The list of frameworks to include for model tuning. Possible values: 'Prophet', 'ARIMA', 'DeepAR'. + An empty list will include all supported frameworks. + + :returns: + Long-running operation waiter for :class:`ForecastingExperiment`. + See :method:wait_get_experiment_forecasting_succeeded for more details. + """ + body = {} + if custom_weights_column is not None: + body["custom_weights_column"] = custom_weights_column + if experiment_path is not None: + body["experiment_path"] = experiment_path + if forecast_granularity is not None: + body["forecast_granularity"] = forecast_granularity + if forecast_horizon is not None: + body["forecast_horizon"] = forecast_horizon + if holiday_regions is not None: + body["holiday_regions"] = [v for v in holiday_regions] + if max_runtime is not None: + body["max_runtime"] = max_runtime + if prediction_data_path is not None: + body["prediction_data_path"] = prediction_data_path + if primary_metric is not None: + body["primary_metric"] = primary_metric + if register_to is not None: + body["register_to"] = register_to + if split_column is not None: + body["split_column"] = split_column + if target_column is not None: + body["target_column"] = target_column + if time_column is not None: + body["time_column"] = time_column + if timeseries_identifier_columns is not None: + body["timeseries_identifier_columns"] = [v for v in timeseries_identifier_columns] + if train_data_path is not None: + body["train_data_path"] = train_data_path + if training_frameworks is not None: + body["training_frameworks"] = [v for v in training_frameworks] + headers = { + "Accept": "application/json", + "Content-Type": "application/json", + } + + op_response = self._api.do("POST", "/api/2.0/automl/create-forecasting-experiment", body=body, headers=headers) + return Wait( + self.wait_get_experiment_forecasting_succeeded, + response=CreateForecastingExperimentResponse.from_dict(op_response), + experiment_id=op_response["experiment_id"], + ) + + def create_experiment_and_wait( + self, + train_data_path: str, + target_column: str, + time_column: str, + forecast_granularity: str, + forecast_horizon: int, + *, + custom_weights_column: Optional[str] = None, + experiment_path: Optional[str] = None, + holiday_regions: Optional[List[str]] = None, + max_runtime: Optional[int] = None, + prediction_data_path: Optional[str] = None, + primary_metric: Optional[str] = None, + register_to: Optional[str] = None, + split_column: Optional[str] = None, + timeseries_identifier_columns: Optional[List[str]] = None, + training_frameworks: Optional[List[str]] = None, + timeout=timedelta(minutes=120), + ) -> ForecastingExperiment: + return self.create_experiment( + custom_weights_column=custom_weights_column, + experiment_path=experiment_path, + forecast_granularity=forecast_granularity, + forecast_horizon=forecast_horizon, + holiday_regions=holiday_regions, + max_runtime=max_runtime, + prediction_data_path=prediction_data_path, + primary_metric=primary_metric, + register_to=register_to, + split_column=split_column, + target_column=target_column, + time_column=time_column, + timeseries_identifier_columns=timeseries_identifier_columns, + train_data_path=train_data_path, + training_frameworks=training_frameworks, + ).result(timeout=timeout) + + def get_experiment(self, experiment_id: str) -> ForecastingExperiment: + """Get a forecasting experiment. + + Public RPC to get forecasting experiment + + :param experiment_id: str + The unique ID of a forecasting experiment + + :returns: :class:`ForecastingExperiment` + """ + + headers = { + "Accept": "application/json", + } + + res = self._api.do("GET", f"/api/2.0/automl/get-forecasting-experiment/{experiment_id}", headers=headers) + return ForecastingExperiment.from_dict(res) + + class ModelRegistryAPI: """Note: This API reference documents APIs for the Workspace Model Registry. Databricks recommends using [Models in Unity Catalog](/api/workspace/registeredmodels) instead. Models in Unity Catalog provides diff --git a/databricks/sdk/service/pipelines.py b/databricks/sdk/service/pipelines.py index 36e74b8fd..5f0cc834a 100755 --- a/databricks/sdk/service/pipelines.py +++ b/databricks/sdk/service/pipelines.py @@ -69,7 +69,7 @@ class CreatePipeline: ingestion_definition: Optional[IngestionPipelineDefinition] = None """The configuration for a managed ingestion pipeline. These settings cannot be used with the - 'libraries', 'target' or 'catalog' settings.""" + 'libraries', 'schema', 'target', or 'catalog' settings.""" libraries: Optional[List[PipelineLibrary]] = None """Libraries or code needed by this deployment.""" @@ -95,8 +95,7 @@ class CreatePipeline: is thrown.""" schema: Optional[str] = None - """The default schema (database) where tables are read from or published to. The presence of this - field implies that the pipeline is in direct publishing mode.""" + """The default schema (database) where tables are read from or published to.""" serverless: Optional[bool] = None """Whether serverless compute is enabled for this pipeline.""" @@ -105,9 +104,9 @@ class CreatePipeline: """DBFS root directory for storing checkpoints and tables.""" target: Optional[str] = None - """Target schema (database) to add tables in this pipeline to. If not specified, no data is - published to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify - `catalog`.""" + """Target schema (database) to add tables in this pipeline to. Exactly one of `schema` or `target` + must be specified. To publish to Unity Catalog, also specify `catalog`. This legacy field is + deprecated for pipeline creation in favor of the `schema` field.""" trigger: Optional[PipelineTrigger] = None """Which pipeline trigger to use. Deprecated: Use `continuous` instead.""" @@ -443,7 +442,7 @@ class EditPipeline: ingestion_definition: Optional[IngestionPipelineDefinition] = None """The configuration for a managed ingestion pipeline. These settings cannot be used with the - 'libraries', 'target' or 'catalog' settings.""" + 'libraries', 'schema', 'target', or 'catalog' settings.""" libraries: Optional[List[PipelineLibrary]] = None """Libraries or code needed by this deployment.""" @@ -472,8 +471,7 @@ class EditPipeline: is thrown.""" schema: Optional[str] = None - """The default schema (database) where tables are read from or published to. The presence of this - field implies that the pipeline is in direct publishing mode.""" + """The default schema (database) where tables are read from or published to.""" serverless: Optional[bool] = None """Whether serverless compute is enabled for this pipeline.""" @@ -482,9 +480,9 @@ class EditPipeline: """DBFS root directory for storing checkpoints and tables.""" target: Optional[str] = None - """Target schema (database) to add tables in this pipeline to. If not specified, no data is - published to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify - `catalog`.""" + """Target schema (database) to add tables in this pipeline to. Exactly one of `schema` or `target` + must be specified. To publish to Unity Catalog, also specify `catalog`. This legacy field is + deprecated for pipeline creation in favor of the `schema` field.""" trigger: Optional[PipelineTrigger] = None """Which pipeline trigger to use. Deprecated: Use `continuous` instead.""" @@ -2218,7 +2216,7 @@ class PipelineSpec: ingestion_definition: Optional[IngestionPipelineDefinition] = None """The configuration for a managed ingestion pipeline. These settings cannot be used with the - 'libraries', 'target' or 'catalog' settings.""" + 'libraries', 'schema', 'target', or 'catalog' settings.""" libraries: Optional[List[PipelineLibrary]] = None """Libraries or code needed by this deployment.""" @@ -2236,8 +2234,7 @@ class PipelineSpec: """Restart window of this pipeline.""" schema: Optional[str] = None - """The default schema (database) where tables are read from or published to. The presence of this - field implies that the pipeline is in direct publishing mode.""" + """The default schema (database) where tables are read from or published to.""" serverless: Optional[bool] = None """Whether serverless compute is enabled for this pipeline.""" @@ -2246,9 +2243,9 @@ class PipelineSpec: """DBFS root directory for storing checkpoints and tables.""" target: Optional[str] = None - """Target schema (database) to add tables in this pipeline to. If not specified, no data is - published to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify - `catalog`.""" + """Target schema (database) to add tables in this pipeline to. Exactly one of `schema` or `target` + must be specified. To publish to Unity Catalog, also specify `catalog`. This legacy field is + deprecated for pipeline creation in favor of the `schema` field.""" trigger: Optional[PipelineTrigger] = None """Which pipeline trigger to use. Deprecated: Use `continuous` instead.""" @@ -3458,7 +3455,7 @@ def create( Unique identifier for this pipeline. :param ingestion_definition: :class:`IngestionPipelineDefinition` (optional) The configuration for a managed ingestion pipeline. These settings cannot be used with the - 'libraries', 'target' or 'catalog' settings. + 'libraries', 'schema', 'target', or 'catalog' settings. :param libraries: List[:class:`PipelineLibrary`] (optional) Libraries or code needed by this deployment. :param name: str (optional) @@ -3476,15 +3473,15 @@ def create( Only `user_name` or `service_principal_name` can be specified. If both are specified, an error is thrown. :param schema: str (optional) - The default schema (database) where tables are read from or published to. The presence of this field - implies that the pipeline is in direct publishing mode. + The default schema (database) where tables are read from or published to. :param serverless: bool (optional) Whether serverless compute is enabled for this pipeline. :param storage: str (optional) DBFS root directory for storing checkpoints and tables. :param target: str (optional) - Target schema (database) to add tables in this pipeline to. If not specified, no data is published - to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify `catalog`. + Target schema (database) to add tables in this pipeline to. Exactly one of `schema` or `target` must + be specified. To publish to Unity Catalog, also specify `catalog`. This legacy field is deprecated + for pipeline creation in favor of the `schema` field. :param trigger: :class:`PipelineTrigger` (optional) Which pipeline trigger to use. Deprecated: Use `continuous` instead. @@ -3962,7 +3959,7 @@ def update( Unique identifier for this pipeline. :param ingestion_definition: :class:`IngestionPipelineDefinition` (optional) The configuration for a managed ingestion pipeline. These settings cannot be used with the - 'libraries', 'target' or 'catalog' settings. + 'libraries', 'schema', 'target', or 'catalog' settings. :param libraries: List[:class:`PipelineLibrary`] (optional) Libraries or code needed by this deployment. :param name: str (optional) @@ -3980,15 +3977,15 @@ def update( Only `user_name` or `service_principal_name` can be specified. If both are specified, an error is thrown. :param schema: str (optional) - The default schema (database) where tables are read from or published to. The presence of this field - implies that the pipeline is in direct publishing mode. + The default schema (database) where tables are read from or published to. :param serverless: bool (optional) Whether serverless compute is enabled for this pipeline. :param storage: str (optional) DBFS root directory for storing checkpoints and tables. :param target: str (optional) - Target schema (database) to add tables in this pipeline to. If not specified, no data is published - to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify `catalog`. + Target schema (database) to add tables in this pipeline to. Exactly one of `schema` or `target` must + be specified. To publish to Unity Catalog, also specify `catalog`. This legacy field is deprecated + for pipeline creation in favor of the `schema` field. :param trigger: :class:`PipelineTrigger` (optional) Which pipeline trigger to use. Deprecated: Use `continuous` instead. diff --git a/databricks/sdk/service/serving.py b/databricks/sdk/service/serving.py index fccad9b79..1dff8e1e9 100755 --- a/databricks/sdk/service/serving.py +++ b/databricks/sdk/service/serving.py @@ -4,6 +4,7 @@ import logging import random +import threading import time from dataclasses import dataclass from datetime import timedelta @@ -62,6 +63,10 @@ def from_dict(cls, d: Dict[str, Any]) -> Ai21LabsConfig: @dataclass class AiGatewayConfig: + fallback_config: Optional[FallbackConfig] = None + """Configuration for traffic fallback which auto fallbacks to other served entities if the request + to a served entity fails with certain error codes, to increase availability.""" + guardrails: Optional[AiGatewayGuardrails] = None """Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses.""" @@ -80,6 +85,8 @@ class AiGatewayConfig: def as_dict(self) -> dict: """Serializes the AiGatewayConfig into a dictionary suitable for use as a JSON request body.""" body = {} + if self.fallback_config: + body["fallback_config"] = self.fallback_config.as_dict() if self.guardrails: body["guardrails"] = self.guardrails.as_dict() if self.inference_table_config: @@ -93,6 +100,8 @@ def as_dict(self) -> dict: def as_shallow_dict(self) -> dict: """Serializes the AiGatewayConfig into a shallow dictionary of its immediate attributes.""" body = {} + if self.fallback_config: + body["fallback_config"] = self.fallback_config if self.guardrails: body["guardrails"] = self.guardrails if self.inference_table_config: @@ -107,6 +116,7 @@ def as_shallow_dict(self) -> dict: def from_dict(cls, d: Dict[str, Any]) -> AiGatewayConfig: """Deserializes the AiGatewayConfig from a dictionary.""" return cls( + fallback_config=_from_dict(d, "fallback_config", FallbackConfig), guardrails=_from_dict(d, "guardrails", AiGatewayGuardrails), inference_table_config=_from_dict(d, "inference_table_config", AiGatewayInferenceTableConfig), rate_limits=_repeated_dict(d, "rate_limits", AiGatewayRateLimit), @@ -505,6 +515,47 @@ def from_dict(cls, d: Dict[str, Any]) -> AnthropicConfig: ) +@dataclass +class ApiKeyAuth: + key: str + """The name of the API key parameter used for authentication.""" + + value: Optional[str] = None + """The Databricks secret key reference for an API Key. If you prefer to paste your token directly, + see `value_plaintext`.""" + + value_plaintext: Optional[str] = None + """The API Key provided as a plaintext string. If you prefer to reference your token using + Databricks Secrets, see `value`.""" + + def as_dict(self) -> dict: + """Serializes the ApiKeyAuth into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.key is not None: + body["key"] = self.key + if self.value is not None: + body["value"] = self.value + if self.value_plaintext is not None: + body["value_plaintext"] = self.value_plaintext + return body + + def as_shallow_dict(self) -> dict: + """Serializes the ApiKeyAuth into a shallow dictionary of its immediate attributes.""" + body = {} + if self.key is not None: + body["key"] = self.key + if self.value is not None: + body["value"] = self.value + if self.value_plaintext is not None: + body["value_plaintext"] = self.value_plaintext + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> ApiKeyAuth: + """Deserializes the ApiKeyAuth from a dictionary.""" + return cls(key=d.get("key", None), value=d.get("value", None), value_plaintext=d.get("value_plaintext", None)) + + @dataclass class AutoCaptureConfigInput: catalog_name: Optional[str] = None @@ -644,6 +695,40 @@ def from_dict(cls, d: Dict[str, Any]) -> AutoCaptureState: return cls(payload_table=_from_dict(d, "payload_table", PayloadTable)) +@dataclass +class BearerTokenAuth: + token: Optional[str] = None + """The Databricks secret key reference for a token. If you prefer to paste your token directly, see + `token_plaintext`.""" + + token_plaintext: Optional[str] = None + """The token provided as a plaintext string. If you prefer to reference your token using Databricks + Secrets, see `token`.""" + + def as_dict(self) -> dict: + """Serializes the BearerTokenAuth into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.token is not None: + body["token"] = self.token + if self.token_plaintext is not None: + body["token_plaintext"] = self.token_plaintext + return body + + def as_shallow_dict(self) -> dict: + """Serializes the BearerTokenAuth into a shallow dictionary of its immediate attributes.""" + body = {} + if self.token is not None: + body["token"] = self.token + if self.token_plaintext is not None: + body["token_plaintext"] = self.token_plaintext + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> BearerTokenAuth: + """Deserializes the BearerTokenAuth from a dictionary.""" + return cls(token=d.get("token", None), token_plaintext=d.get("token_plaintext", None)) + + @dataclass class BuildLogsResponse: logs: str @@ -835,6 +920,53 @@ def from_dict(cls, d: Dict[str, Any]) -> CreateServingEndpoint: ) +@dataclass +class CustomProviderConfig: + """Configs needed to create a custom provider model route.""" + + custom_provider_url: str + """This is a field to provide the URL of the custom provider API.""" + + api_key_auth: Optional[ApiKeyAuth] = None + """This is a field to provide API key authentication for the custom provider API. You can only + specify one authentication method.""" + + bearer_token_auth: Optional[BearerTokenAuth] = None + """This is a field to provide bearer token authentication for the custom provider API. You can only + specify one authentication method.""" + + def as_dict(self) -> dict: + """Serializes the CustomProviderConfig into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.api_key_auth: + body["api_key_auth"] = self.api_key_auth.as_dict() + if self.bearer_token_auth: + body["bearer_token_auth"] = self.bearer_token_auth.as_dict() + if self.custom_provider_url is not None: + body["custom_provider_url"] = self.custom_provider_url + return body + + def as_shallow_dict(self) -> dict: + """Serializes the CustomProviderConfig into a shallow dictionary of its immediate attributes.""" + body = {} + if self.api_key_auth: + body["api_key_auth"] = self.api_key_auth + if self.bearer_token_auth: + body["bearer_token_auth"] = self.bearer_token_auth + if self.custom_provider_url is not None: + body["custom_provider_url"] = self.custom_provider_url + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> CustomProviderConfig: + """Deserializes the CustomProviderConfig from a dictionary.""" + return cls( + api_key_auth=_from_dict(d, "api_key_auth", ApiKeyAuth), + bearer_token_auth=_from_dict(d, "bearer_token_auth", BearerTokenAuth), + custom_provider_url=d.get("custom_provider_url", None), + ) + + @dataclass class DataPlaneInfo: """Details necessary to query this object's API through the DataPlane APIs.""" @@ -1494,6 +1626,9 @@ class ExternalModel: cohere_config: Optional[CohereConfig] = None """Cohere Config. Only required if the provider is 'cohere'.""" + custom_provider_config: Optional[CustomProviderConfig] = None + """Custom Provider Config. Only required if the provider is 'custom'.""" + databricks_model_serving_config: Optional[DatabricksModelServingConfig] = None """Databricks Model Serving Config. Only required if the provider is 'databricks-model-serving'.""" @@ -1517,6 +1652,8 @@ def as_dict(self) -> dict: body["anthropic_config"] = self.anthropic_config.as_dict() if self.cohere_config: body["cohere_config"] = self.cohere_config.as_dict() + if self.custom_provider_config: + body["custom_provider_config"] = self.custom_provider_config.as_dict() if self.databricks_model_serving_config: body["databricks_model_serving_config"] = self.databricks_model_serving_config.as_dict() if self.google_cloud_vertex_ai_config: @@ -1544,6 +1681,8 @@ def as_shallow_dict(self) -> dict: body["anthropic_config"] = self.anthropic_config if self.cohere_config: body["cohere_config"] = self.cohere_config + if self.custom_provider_config: + body["custom_provider_config"] = self.custom_provider_config if self.databricks_model_serving_config: body["databricks_model_serving_config"] = self.databricks_model_serving_config if self.google_cloud_vertex_ai_config: @@ -1568,6 +1707,7 @@ def from_dict(cls, d: Dict[str, Any]) -> ExternalModel: amazon_bedrock_config=_from_dict(d, "amazon_bedrock_config", AmazonBedrockConfig), anthropic_config=_from_dict(d, "anthropic_config", AnthropicConfig), cohere_config=_from_dict(d, "cohere_config", CohereConfig), + custom_provider_config=_from_dict(d, "custom_provider_config", CustomProviderConfig), databricks_model_serving_config=_from_dict( d, "databricks_model_serving_config", DatabricksModelServingConfig ), @@ -1586,6 +1726,7 @@ class ExternalModelProvider(Enum): AMAZON_BEDROCK = "amazon-bedrock" ANTHROPIC = "anthropic" COHERE = "cohere" + CUSTOM = "custom" DATABRICKS_MODEL_SERVING = "databricks-model-serving" GOOGLE_CLOUD_VERTEX_AI = "google-cloud-vertex-ai" OPENAI = "openai" @@ -1635,6 +1776,35 @@ def from_dict(cls, d: Dict[str, Any]) -> ExternalModelUsageElement: ) +@dataclass +class FallbackConfig: + enabled: bool + """Whether to enable traffic fallback. When a served entity in the serving endpoint returns + specific error codes (e.g. 500), the request will automatically be round-robin attempted with + other served entities in the same endpoint, following the order of served entity list, until a + successful response is returned. If all attempts fail, return the last response with the error + code.""" + + def as_dict(self) -> dict: + """Serializes the FallbackConfig into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.enabled is not None: + body["enabled"] = self.enabled + return body + + def as_shallow_dict(self) -> dict: + """Serializes the FallbackConfig into a shallow dictionary of its immediate attributes.""" + body = {} + if self.enabled is not None: + body["enabled"] = self.enabled + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> FallbackConfig: + """Deserializes the FallbackConfig from a dictionary.""" + return cls(enabled=d.get("enabled", None)) + + @dataclass class FoundationModel: """All fields are not sensitive as they are hard-coded in the system and made available to @@ -2123,6 +2293,10 @@ def from_dict(cls, d: Dict[str, Any]) -> PayloadTable: @dataclass class PutAiGatewayRequest: + fallback_config: Optional[FallbackConfig] = None + """Configuration for traffic fallback which auto fallbacks to other served entities if the request + to a served entity fails with certain error codes, to increase availability.""" + guardrails: Optional[AiGatewayGuardrails] = None """Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses.""" @@ -2144,6 +2318,8 @@ class PutAiGatewayRequest: def as_dict(self) -> dict: """Serializes the PutAiGatewayRequest into a dictionary suitable for use as a JSON request body.""" body = {} + if self.fallback_config: + body["fallback_config"] = self.fallback_config.as_dict() if self.guardrails: body["guardrails"] = self.guardrails.as_dict() if self.inference_table_config: @@ -2159,6 +2335,8 @@ def as_dict(self) -> dict: def as_shallow_dict(self) -> dict: """Serializes the PutAiGatewayRequest into a shallow dictionary of its immediate attributes.""" body = {} + if self.fallback_config: + body["fallback_config"] = self.fallback_config if self.guardrails: body["guardrails"] = self.guardrails if self.inference_table_config: @@ -2175,6 +2353,7 @@ def as_shallow_dict(self) -> dict: def from_dict(cls, d: Dict[str, Any]) -> PutAiGatewayRequest: """Deserializes the PutAiGatewayRequest from a dictionary.""" return cls( + fallback_config=_from_dict(d, "fallback_config", FallbackConfig), guardrails=_from_dict(d, "guardrails", AiGatewayGuardrails), inference_table_config=_from_dict(d, "inference_table_config", AiGatewayInferenceTableConfig), name=d.get("name", None), @@ -2185,6 +2364,10 @@ def from_dict(cls, d: Dict[str, Any]) -> PutAiGatewayRequest: @dataclass class PutAiGatewayResponse: + fallback_config: Optional[FallbackConfig] = None + """Configuration for traffic fallback which auto fallbacks to other served entities if the request + to a served entity fails with certain error codes, to increase availability.""" + guardrails: Optional[AiGatewayGuardrails] = None """Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses.""" @@ -2203,6 +2386,8 @@ class PutAiGatewayResponse: def as_dict(self) -> dict: """Serializes the PutAiGatewayResponse into a dictionary suitable for use as a JSON request body.""" body = {} + if self.fallback_config: + body["fallback_config"] = self.fallback_config.as_dict() if self.guardrails: body["guardrails"] = self.guardrails.as_dict() if self.inference_table_config: @@ -2216,6 +2401,8 @@ def as_dict(self) -> dict: def as_shallow_dict(self) -> dict: """Serializes the PutAiGatewayResponse into a shallow dictionary of its immediate attributes.""" body = {} + if self.fallback_config: + body["fallback_config"] = self.fallback_config if self.guardrails: body["guardrails"] = self.guardrails if self.inference_table_config: @@ -2230,6 +2417,7 @@ def as_shallow_dict(self) -> dict: def from_dict(cls, d: Dict[str, Any]) -> PutAiGatewayResponse: """Deserializes the PutAiGatewayResponse from a dictionary.""" return cls( + fallback_config=_from_dict(d, "fallback_config", FallbackConfig), guardrails=_from_dict(d, "guardrails", AiGatewayGuardrails), inference_table_config=_from_dict(d, "inference_table_config", AiGatewayInferenceTableConfig), rate_limits=_repeated_dict(d, "rate_limits", AiGatewayRateLimit), @@ -4368,6 +4556,7 @@ def put_ai_gateway( self, name: str, *, + fallback_config: Optional[FallbackConfig] = None, guardrails: Optional[AiGatewayGuardrails] = None, inference_table_config: Optional[AiGatewayInferenceTableConfig] = None, rate_limits: Optional[List[AiGatewayRateLimit]] = None, @@ -4380,6 +4569,9 @@ def put_ai_gateway( :param name: str The name of the serving endpoint whose AI Gateway is being updated. This field is required. + :param fallback_config: :class:`FallbackConfig` (optional) + Configuration for traffic fallback which auto fallbacks to other served entities if the request to a + served entity fails with certain error codes, to increase availability. :param guardrails: :class:`AiGatewayGuardrails` (optional) Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. :param inference_table_config: :class:`AiGatewayInferenceTableConfig` (optional) @@ -4394,6 +4586,8 @@ def put_ai_gateway( :returns: :class:`PutAiGatewayResponse` """ body = {} + if fallback_config is not None: + body["fallback_config"] = fallback_config.as_dict() if guardrails is not None: body["guardrails"] = guardrails.as_dict() if inference_table_config is not None: @@ -4657,12 +4851,31 @@ class ServingEndpointsDataPlaneAPI: """Serving endpoints DataPlane provides a set of operations to interact with data plane endpoints for Serving endpoints service.""" - def __init__(self, api_client, control_plane): + def __init__(self, api_client, control_plane_service, dpts): self._api = api_client - self._control_plane = control_plane - from ..data_plane import DataPlaneService - - self._data_plane_service = DataPlaneService() + self._lock = threading.Lock() + self._control_plane_service = control_plane_service + self._dpts = dpts + self._data_plane_details = {} + + def _data_plane_info_query(self, name: str) -> DataPlaneInfo: + key = "query" + "/".join( + [ + str(name), + ] + ) + with self._lock: + if key in self._data_plane_details: + return self._data_plane_details[key] + response = self._control_plane_service.get( + name=name, + ) + if response.data_plane_info is None: + raise Exception("Resource does not support direct Data Plane access") + result = response.data_plane_info.query_info + with self._lock: + self._data_plane_details[key] = result + return result def query( self, @@ -4757,22 +4970,10 @@ def query( body["stream"] = stream if temperature is not None: body["temperature"] = temperature - - def info_getter(): - response = self._control_plane.get( - name=name, - ) - if response.data_plane_info is None: - raise Exception("Resource does not support direct Data Plane access") - return response.data_plane_info.query_info - - get_params = [ - name, - ] - data_plane_details = self._data_plane_service.get_data_plane_details( - "query", get_params, info_getter, self._api.get_oauth_token + data_plane_info = self._data_plane_info_query( + name=name, ) - token = data_plane_details.token + token = self._dpts.token(data_plane_info.endpoint_url, data_plane_info.authorization_details) def auth(r: requests.PreparedRequest) -> requests.PreparedRequest: authorization = f"{token.token_type} {token.access_token}" @@ -4788,7 +4989,7 @@ def auth(r: requests.PreparedRequest) -> requests.PreparedRequest: ] res = self._api.do( "POST", - url=data_plane_details.endpoint_url, + url=data_plane_info.endpoint_url, body=body, headers=headers, response_headers=response_headers, diff --git a/databricks/sdk/service/sharing.py b/databricks/sdk/service/sharing.py index ab6360b41..7325e5fdd 100755 --- a/databricks/sdk/service/sharing.py +++ b/databricks/sdk/service/sharing.py @@ -324,71 +324,7 @@ def from_dict(cls, d: Dict[str, Any]) -> DeltaSharingDependencyList: @dataclass -class DeltaSharingFunctionDependency: - """A Function in UC as a dependency.""" - - function_name: Optional[str] = None - - schema_name: Optional[str] = None - - def as_dict(self) -> dict: - """Serializes the DeltaSharingFunctionDependency into a dictionary suitable for use as a JSON request body.""" - body = {} - if self.function_name is not None: - body["function_name"] = self.function_name - if self.schema_name is not None: - body["schema_name"] = self.schema_name - return body - - def as_shallow_dict(self) -> dict: - """Serializes the DeltaSharingFunctionDependency into a shallow dictionary of its immediate attributes.""" - body = {} - if self.function_name is not None: - body["function_name"] = self.function_name - if self.schema_name is not None: - body["schema_name"] = self.schema_name - return body - - @classmethod - def from_dict(cls, d: Dict[str, Any]) -> DeltaSharingFunctionDependency: - """Deserializes the DeltaSharingFunctionDependency from a dictionary.""" - return cls(function_name=d.get("function_name", None), schema_name=d.get("schema_name", None)) - - -@dataclass -class DeltaSharingTableDependency: - """A Table in UC as a dependency.""" - - schema_name: Optional[str] = None - - table_name: Optional[str] = None - - def as_dict(self) -> dict: - """Serializes the DeltaSharingTableDependency into a dictionary suitable for use as a JSON request body.""" - body = {} - if self.schema_name is not None: - body["schema_name"] = self.schema_name - if self.table_name is not None: - body["table_name"] = self.table_name - return body - - def as_shallow_dict(self) -> dict: - """Serializes the DeltaSharingTableDependency into a shallow dictionary of its immediate attributes.""" - body = {} - if self.schema_name is not None: - body["schema_name"] = self.schema_name - if self.table_name is not None: - body["table_name"] = self.table_name - return body - - @classmethod - def from_dict(cls, d: Dict[str, Any]) -> DeltaSharingTableDependency: - """Deserializes the DeltaSharingTableDependency from a dictionary.""" - return cls(schema_name=d.get("schema_name", None), table_name=d.get("table_name", None)) - - -@dataclass -class Function: +class DeltaSharingFunction: aliases: Optional[List[RegisteredModelAlias]] = None """The aliass of registered model.""" @@ -438,7 +374,7 @@ class Function: """The tags of the function.""" def as_dict(self) -> dict: - """Serializes the Function into a dictionary suitable for use as a JSON request body.""" + """Serializes the DeltaSharingFunction into a dictionary suitable for use as a JSON request body.""" body = {} if self.aliases: body["aliases"] = [v.as_dict() for v in self.aliases] @@ -475,7 +411,7 @@ def as_dict(self) -> dict: return body def as_shallow_dict(self) -> dict: - """Serializes the Function into a shallow dictionary of its immediate attributes.""" + """Serializes the DeltaSharingFunction into a shallow dictionary of its immediate attributes.""" body = {} if self.aliases: body["aliases"] = self.aliases @@ -512,8 +448,8 @@ def as_shallow_dict(self) -> dict: return body @classmethod - def from_dict(cls, d: Dict[str, Any]) -> Function: - """Deserializes the Function from a dictionary.""" + def from_dict(cls, d: Dict[str, Any]) -> DeltaSharingFunction: + """Deserializes the DeltaSharingFunction from a dictionary.""" return cls( aliases=_repeated_dict(d, "aliases", RegisteredModelAlias), comment=d.get("comment", None), @@ -534,6 +470,70 @@ def from_dict(cls, d: Dict[str, Any]) -> Function: ) +@dataclass +class DeltaSharingFunctionDependency: + """A Function in UC as a dependency.""" + + function_name: Optional[str] = None + + schema_name: Optional[str] = None + + def as_dict(self) -> dict: + """Serializes the DeltaSharingFunctionDependency into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.function_name is not None: + body["function_name"] = self.function_name + if self.schema_name is not None: + body["schema_name"] = self.schema_name + return body + + def as_shallow_dict(self) -> dict: + """Serializes the DeltaSharingFunctionDependency into a shallow dictionary of its immediate attributes.""" + body = {} + if self.function_name is not None: + body["function_name"] = self.function_name + if self.schema_name is not None: + body["schema_name"] = self.schema_name + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> DeltaSharingFunctionDependency: + """Deserializes the DeltaSharingFunctionDependency from a dictionary.""" + return cls(function_name=d.get("function_name", None), schema_name=d.get("schema_name", None)) + + +@dataclass +class DeltaSharingTableDependency: + """A Table in UC as a dependency.""" + + schema_name: Optional[str] = None + + table_name: Optional[str] = None + + def as_dict(self) -> dict: + """Serializes the DeltaSharingTableDependency into a dictionary suitable for use as a JSON request body.""" + body = {} + if self.schema_name is not None: + body["schema_name"] = self.schema_name + if self.table_name is not None: + body["table_name"] = self.table_name + return body + + def as_shallow_dict(self) -> dict: + """Serializes the DeltaSharingTableDependency into a shallow dictionary of its immediate attributes.""" + body = {} + if self.schema_name is not None: + body["schema_name"] = self.schema_name + if self.table_name is not None: + body["table_name"] = self.table_name + return body + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> DeltaSharingTableDependency: + """Deserializes the DeltaSharingTableDependency from a dictionary.""" + return cls(schema_name=d.get("schema_name", None), table_name=d.get("table_name", None)) + + @dataclass class FunctionParameterInfo: """Represents a parameter of a function. The same message is used for both input and output @@ -809,7 +809,7 @@ def from_dict(cls, d: Dict[str, Any]) -> IpAccessList: class ListProviderShareAssetsResponse: """Response to ListProviderShareAssets, which contains the list of assets of a share.""" - functions: Optional[List[Function]] = None + functions: Optional[List[DeltaSharingFunction]] = None """The list of functions in the share.""" notebooks: Optional[List[NotebookFile]] = None @@ -851,7 +851,7 @@ def as_shallow_dict(self) -> dict: def from_dict(cls, d: Dict[str, Any]) -> ListProviderShareAssetsResponse: """Deserializes the ListProviderShareAssetsResponse from a dictionary.""" return cls( - functions=_repeated_dict(d, "functions", Function), + functions=_repeated_dict(d, "functions", DeltaSharingFunction), notebooks=_repeated_dict(d, "notebooks", NotebookFile), tables=_repeated_dict(d, "tables", Table), volumes=_repeated_dict(d, "volumes", Volume), diff --git a/databricks/sdk/version.py b/databricks/sdk/version.py index 4d8afa5ba..bf97bc409 100644 --- a/databricks/sdk/version.py +++ b/databricks/sdk/version.py @@ -1 +1 @@ -__version__ = "0.45.0" +__version__ = "0.47.0" diff --git a/docs/account/iam/groups.rst b/docs/account/iam/groups.rst index adb23f7d7..d005f7930 100644 --- a/docs/account/iam/groups.rst +++ b/docs/account/iam/groups.rst @@ -99,7 +99,7 @@ Partially updates the details of a group. :param id: str - Unique ID for a group in the Databricks account. + Unique ID in the Databricks workspace. :param operations: List[:class:`Patch`] (optional) :param schemas: List[:class:`PatchSchema`] (optional) The schema of the patch request. Must be ["urn:ietf:params:scim:api:messages:2.0:PatchOp"]. diff --git a/docs/account/iam/service_principals.rst b/docs/account/iam/service_principals.rst index 2823c8d31..e0fd8577a 100644 --- a/docs/account/iam/service_principals.rst +++ b/docs/account/iam/service_principals.rst @@ -178,7 +178,7 @@ Partially updates the details of a single service principal in the Databricks account. :param id: str - Unique ID for a service principal in the Databricks account. + Unique ID in the Databricks workspace. :param operations: List[:class:`Patch`] (optional) :param schemas: List[:class:`PatchSchema`] (optional) The schema of the patch request. Must be ["urn:ietf:params:scim:api:messages:2.0:PatchOp"]. diff --git a/docs/account/iam/users.rst b/docs/account/iam/users.rst index 54a9f1af8..7e527ec45 100644 --- a/docs/account/iam/users.rst +++ b/docs/account/iam/users.rst @@ -58,8 +58,7 @@ External ID is not currently supported. It is reserved for future use. :param groups: List[:class:`ComplexValue`] (optional) :param id: str (optional) - Databricks user ID. This is automatically set by Databricks. Any value provided by the client will - be ignored. + Databricks user ID. :param name: :class:`Name` (optional) :param roles: List[:class:`ComplexValue`] (optional) Corresponds to AWS instance profile/arn role. @@ -223,7 +222,7 @@ Partially updates a user resource by applying the supplied operations on specific user attributes. :param id: str - Unique ID for a user in the Databricks account. + Unique ID in the Databricks workspace. :param operations: List[:class:`Patch`] (optional) :param schemas: List[:class:`PatchSchema`] (optional) The schema of the patch request. Must be ["urn:ietf:params:scim:api:messages:2.0:PatchOp"]. @@ -238,8 +237,7 @@ Replaces a user's information with the data supplied in request. :param id: str - Databricks user ID. This is automatically set by Databricks. Any value provided by the client will - be ignored. + Databricks user ID. :param active: bool (optional) If this user is active :param display_name: str (optional) diff --git a/docs/dbdataclasses/compute.rst b/docs/dbdataclasses/compute.rst index 3be80b79c..2424cf4cf 100644 --- a/docs/dbdataclasses/compute.rst +++ b/docs/dbdataclasses/compute.rst @@ -44,7 +44,7 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:class:: AzureAvailability - Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If `first_on_demand` is zero (which only happens on pool clusters), this availability type will be used for the entire cluster. + Availability type used for all subsequent nodes past the `first_on_demand` ones. Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster. .. py:attribute:: ON_DEMAND_AZURE :value: "ON_DEMAND_AZURE" @@ -309,8 +309,6 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:class:: DataPlaneEventDetailsEventType - <needs content added> - .. py:attribute:: NODE_BLACKLISTED :value: "NODE_BLACKLISTED" @@ -405,6 +403,8 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:class:: DiskTypeAzureDiskVolumeType + All Azure Disk types that Databricks supports. See https://docs.microsoft.com/en-us/azure/storage/storage-about-disks-and-vhds-linux#types-of-disks + .. py:attribute:: PREMIUM_LRS :value: "PREMIUM_LRS" @@ -413,6 +413,8 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:class:: DiskTypeEbsVolumeType + All EBS volume types that Databricks supports. See https://aws.amazon.com/ebs/details/ for details. + .. py:attribute:: GENERAL_PURPOSE_SSD :value: "GENERAL_PURPOSE_SSD" @@ -429,7 +431,7 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:class:: EbsVolumeType - The type of EBS volumes that will be launched with this cluster. + All EBS volume types that Databricks supports. See https://aws.amazon.com/ebs/details/ for details. .. py:attribute:: GENERAL_PURPOSE_SSD :value: "GENERAL_PURPOSE_SSD" @@ -625,8 +627,6 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:class:: GetEventsOrder - The order to list events in; either "ASC" or "DESC". Defaults to "DESC". - .. py:attribute:: ASC :value: "ASC" @@ -669,13 +669,9 @@ These dataclasses are used in the SDK to represent API requests and responses fo :members: :undoc-members: -.. autoclass:: InitScriptExecutionDetails - :members: - :undoc-members: - -.. py:class:: InitScriptExecutionDetailsStatus +.. py:class:: InitScriptExecutionDetailsInitScriptExecutionStatus - The current status of the script + Result of attempted script execution .. py:attribute:: FAILED_EXECUTION :value: "FAILED_EXECUTION" @@ -683,6 +679,9 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:attribute:: FAILED_FETCH :value: "FAILED_FETCH" + .. py:attribute:: FUSE_MOUNT_FAILED + :value: "FUSE_MOUNT_FAILED" + .. py:attribute:: NOT_EXECUTED :value: "NOT_EXECUTED" @@ -729,8 +728,7 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:class:: InstancePoolAwsAttributesAvailability - Availability type used for the spot nodes. - The default value is defined by InstancePoolConf.instancePoolDefaultAwsAvailability + The set of AWS availability types supported when setting up nodes for a cluster. .. py:attribute:: ON_DEMAND :value: "ON_DEMAND" @@ -744,8 +742,7 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:class:: InstancePoolAzureAttributesAvailability - Shows the Availability type used for the spot nodes. - The default value is defined by InstancePoolConf.instancePoolDefaultAzureAvailability + The set of Azure availability types supported when setting up nodes for a cluster. .. py:attribute:: ON_DEMAND_AZURE :value: "ON_DEMAND_AZURE" @@ -785,7 +782,8 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:class:: InstancePoolState - Current state of the instance pool. + The state of a Cluster. The current allowable state transitions are as follows: + - ``ACTIVE`` -> ``STOPPED`` - ``ACTIVE`` -> ``DELETED`` - ``STOPPED`` -> ``ACTIVE`` - ``STOPPED`` -> ``DELETED`` .. py:attribute:: ACTIVE :value: "ACTIVE" @@ -892,8 +890,6 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:class:: ListClustersSortByDirection - The direction to sort by. - .. py:attribute:: ASC :value: "ASC" @@ -902,8 +898,6 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:class:: ListClustersSortByField - The sorting criteria. By default, clusters are sorted by 3 columns from highest to lowest precedence: cluster state, pinned or unpinned, then cluster name. - .. py:attribute:: CLUSTER_NAME :value: "CLUSTER_NAME" @@ -944,8 +938,6 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:class:: ListSortOrder - A generic ordering enum for list-based queries. - .. py:attribute:: ASC :value: "ASC" @@ -1059,10 +1051,6 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:class:: RuntimeEngine - Determines the cluster's runtime engine, either standard or Photon. - This field is not compatible with legacy `spark_version` values that contain `-photon-`. Remove `-photon-` from the `spark_version` and set `runtime_engine` to `PHOTON`. - If left unspecified, the runtime engine defaults to standard unless the spark_version contains -photon-, in which case Photon will be used. - .. py:attribute:: NULL :value: "NULL" @@ -1098,7 +1086,8 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:class:: State - Current state of the cluster. + The state of a Cluster. The current allowable state transitions are as follows: + - `PENDING` -> `RUNNING` - `PENDING` -> `TERMINATING` - `RUNNING` -> `RESIZING` - `RUNNING` -> `RESTARTING` - `RUNNING` -> `TERMINATING` - `RESTARTING` -> `RUNNING` - `RESTARTING` -> `TERMINATING` - `RESIZING` -> `RUNNING` - `RESIZING` -> `TERMINATING` - `TERMINATING` -> `TERMINATED` .. py:attribute:: ERROR :value: "ERROR" @@ -1130,29 +1119,68 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:class:: TerminationReasonCode - status code indicating why the cluster was terminated + The status code indicating why the cluster was terminated .. py:attribute:: ABUSE_DETECTED :value: "ABUSE_DETECTED" + .. py:attribute:: ACCESS_TOKEN_FAILURE + :value: "ACCESS_TOKEN_FAILURE" + + .. py:attribute:: ALLOCATION_TIMEOUT + :value: "ALLOCATION_TIMEOUT" + + .. py:attribute:: ALLOCATION_TIMEOUT_NODE_DAEMON_NOT_READY + :value: "ALLOCATION_TIMEOUT_NODE_DAEMON_NOT_READY" + + .. py:attribute:: ALLOCATION_TIMEOUT_NO_HEALTHY_CLUSTERS + :value: "ALLOCATION_TIMEOUT_NO_HEALTHY_CLUSTERS" + + .. py:attribute:: ALLOCATION_TIMEOUT_NO_MATCHED_CLUSTERS + :value: "ALLOCATION_TIMEOUT_NO_MATCHED_CLUSTERS" + + .. py:attribute:: ALLOCATION_TIMEOUT_NO_READY_CLUSTERS + :value: "ALLOCATION_TIMEOUT_NO_READY_CLUSTERS" + + .. py:attribute:: ALLOCATION_TIMEOUT_NO_UNALLOCATED_CLUSTERS + :value: "ALLOCATION_TIMEOUT_NO_UNALLOCATED_CLUSTERS" + + .. py:attribute:: ALLOCATION_TIMEOUT_NO_WARMED_UP_CLUSTERS + :value: "ALLOCATION_TIMEOUT_NO_WARMED_UP_CLUSTERS" + .. py:attribute:: ATTACH_PROJECT_FAILURE :value: "ATTACH_PROJECT_FAILURE" .. py:attribute:: AWS_AUTHORIZATION_FAILURE :value: "AWS_AUTHORIZATION_FAILURE" + .. py:attribute:: AWS_INACCESSIBLE_KMS_KEY_FAILURE + :value: "AWS_INACCESSIBLE_KMS_KEY_FAILURE" + + .. py:attribute:: AWS_INSTANCE_PROFILE_UPDATE_FAILURE + :value: "AWS_INSTANCE_PROFILE_UPDATE_FAILURE" + .. py:attribute:: AWS_INSUFFICIENT_FREE_ADDRESSES_IN_SUBNET_FAILURE :value: "AWS_INSUFFICIENT_FREE_ADDRESSES_IN_SUBNET_FAILURE" .. py:attribute:: AWS_INSUFFICIENT_INSTANCE_CAPACITY_FAILURE :value: "AWS_INSUFFICIENT_INSTANCE_CAPACITY_FAILURE" + .. py:attribute:: AWS_INVALID_KEY_PAIR + :value: "AWS_INVALID_KEY_PAIR" + + .. py:attribute:: AWS_INVALID_KMS_KEY_STATE + :value: "AWS_INVALID_KMS_KEY_STATE" + .. py:attribute:: AWS_MAX_SPOT_INSTANCE_COUNT_EXCEEDED_FAILURE :value: "AWS_MAX_SPOT_INSTANCE_COUNT_EXCEEDED_FAILURE" .. py:attribute:: AWS_REQUEST_LIMIT_EXCEEDED :value: "AWS_REQUEST_LIMIT_EXCEEDED" + .. py:attribute:: AWS_RESOURCE_QUOTA_EXCEEDED + :value: "AWS_RESOURCE_QUOTA_EXCEEDED" + .. py:attribute:: AWS_UNSUPPORTED_FAILURE :value: "AWS_UNSUPPORTED_FAILURE" @@ -1168,6 +1196,9 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:attribute:: AZURE_OPERATION_NOT_ALLOWED_EXCEPTION :value: "AZURE_OPERATION_NOT_ALLOWED_EXCEPTION" + .. py:attribute:: AZURE_PACKED_DEPLOYMENT_PARTIAL_FAILURE + :value: "AZURE_PACKED_DEPLOYMENT_PARTIAL_FAILURE" + .. py:attribute:: AZURE_QUOTA_EXCEEDED_EXCEPTION :value: "AZURE_QUOTA_EXCEEDED_EXCEPTION" @@ -1192,18 +1223,48 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:attribute:: BOOTSTRAP_TIMEOUT_CLOUD_PROVIDER_EXCEPTION :value: "BOOTSTRAP_TIMEOUT_CLOUD_PROVIDER_EXCEPTION" + .. py:attribute:: BOOTSTRAP_TIMEOUT_DUE_TO_MISCONFIG + :value: "BOOTSTRAP_TIMEOUT_DUE_TO_MISCONFIG" + + .. py:attribute:: BUDGET_POLICY_LIMIT_ENFORCEMENT_ACTIVATED + :value: "BUDGET_POLICY_LIMIT_ENFORCEMENT_ACTIVATED" + + .. py:attribute:: BUDGET_POLICY_RESOLUTION_FAILURE + :value: "BUDGET_POLICY_RESOLUTION_FAILURE" + + .. py:attribute:: CLOUD_ACCOUNT_SETUP_FAILURE + :value: "CLOUD_ACCOUNT_SETUP_FAILURE" + + .. py:attribute:: CLOUD_OPERATION_CANCELLED + :value: "CLOUD_OPERATION_CANCELLED" + .. py:attribute:: CLOUD_PROVIDER_DISK_SETUP_FAILURE :value: "CLOUD_PROVIDER_DISK_SETUP_FAILURE" + .. py:attribute:: CLOUD_PROVIDER_INSTANCE_NOT_LAUNCHED + :value: "CLOUD_PROVIDER_INSTANCE_NOT_LAUNCHED" + .. py:attribute:: CLOUD_PROVIDER_LAUNCH_FAILURE :value: "CLOUD_PROVIDER_LAUNCH_FAILURE" + .. py:attribute:: CLOUD_PROVIDER_LAUNCH_FAILURE_DUE_TO_MISCONFIG + :value: "CLOUD_PROVIDER_LAUNCH_FAILURE_DUE_TO_MISCONFIG" + .. py:attribute:: CLOUD_PROVIDER_RESOURCE_STOCKOUT :value: "CLOUD_PROVIDER_RESOURCE_STOCKOUT" + .. py:attribute:: CLOUD_PROVIDER_RESOURCE_STOCKOUT_DUE_TO_MISCONFIG + :value: "CLOUD_PROVIDER_RESOURCE_STOCKOUT_DUE_TO_MISCONFIG" + .. py:attribute:: CLOUD_PROVIDER_SHUTDOWN :value: "CLOUD_PROVIDER_SHUTDOWN" + .. py:attribute:: CLUSTER_OPERATION_THROTTLED + :value: "CLUSTER_OPERATION_THROTTLED" + + .. py:attribute:: CLUSTER_OPERATION_TIMEOUT + :value: "CLUSTER_OPERATION_TIMEOUT" + .. py:attribute:: COMMUNICATION_LOST :value: "COMMUNICATION_LOST" @@ -1213,30 +1274,111 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:attribute:: CONTROL_PLANE_REQUEST_FAILURE :value: "CONTROL_PLANE_REQUEST_FAILURE" + .. py:attribute:: CONTROL_PLANE_REQUEST_FAILURE_DUE_TO_MISCONFIG + :value: "CONTROL_PLANE_REQUEST_FAILURE_DUE_TO_MISCONFIG" + .. py:attribute:: DATABASE_CONNECTION_FAILURE :value: "DATABASE_CONNECTION_FAILURE" + .. py:attribute:: DATA_ACCESS_CONFIG_CHANGED + :value: "DATA_ACCESS_CONFIG_CHANGED" + .. py:attribute:: DBFS_COMPONENT_UNHEALTHY :value: "DBFS_COMPONENT_UNHEALTHY" + .. py:attribute:: DISASTER_RECOVERY_REPLICATION + :value: "DISASTER_RECOVERY_REPLICATION" + .. py:attribute:: DOCKER_IMAGE_PULL_FAILURE :value: "DOCKER_IMAGE_PULL_FAILURE" + .. py:attribute:: DRIVER_EVICTION + :value: "DRIVER_EVICTION" + + .. py:attribute:: DRIVER_LAUNCH_TIMEOUT + :value: "DRIVER_LAUNCH_TIMEOUT" + + .. py:attribute:: DRIVER_NODE_UNREACHABLE + :value: "DRIVER_NODE_UNREACHABLE" + + .. py:attribute:: DRIVER_OUT_OF_DISK + :value: "DRIVER_OUT_OF_DISK" + + .. py:attribute:: DRIVER_OUT_OF_MEMORY + :value: "DRIVER_OUT_OF_MEMORY" + + .. py:attribute:: DRIVER_POD_CREATION_FAILURE + :value: "DRIVER_POD_CREATION_FAILURE" + + .. py:attribute:: DRIVER_UNEXPECTED_FAILURE + :value: "DRIVER_UNEXPECTED_FAILURE" + .. py:attribute:: DRIVER_UNREACHABLE :value: "DRIVER_UNREACHABLE" .. py:attribute:: DRIVER_UNRESPONSIVE :value: "DRIVER_UNRESPONSIVE" + .. py:attribute:: DYNAMIC_SPARK_CONF_SIZE_EXCEEDED + :value: "DYNAMIC_SPARK_CONF_SIZE_EXCEEDED" + + .. py:attribute:: EOS_SPARK_IMAGE + :value: "EOS_SPARK_IMAGE" + .. py:attribute:: EXECUTION_COMPONENT_UNHEALTHY :value: "EXECUTION_COMPONENT_UNHEALTHY" + .. py:attribute:: EXECUTOR_POD_UNSCHEDULED + :value: "EXECUTOR_POD_UNSCHEDULED" + + .. py:attribute:: GCP_API_RATE_QUOTA_EXCEEDED + :value: "GCP_API_RATE_QUOTA_EXCEEDED" + + .. py:attribute:: GCP_FORBIDDEN + :value: "GCP_FORBIDDEN" + + .. py:attribute:: GCP_IAM_TIMEOUT + :value: "GCP_IAM_TIMEOUT" + + .. py:attribute:: GCP_INACCESSIBLE_KMS_KEY_FAILURE + :value: "GCP_INACCESSIBLE_KMS_KEY_FAILURE" + + .. py:attribute:: GCP_INSUFFICIENT_CAPACITY + :value: "GCP_INSUFFICIENT_CAPACITY" + + .. py:attribute:: GCP_IP_SPACE_EXHAUSTED + :value: "GCP_IP_SPACE_EXHAUSTED" + + .. py:attribute:: GCP_KMS_KEY_PERMISSION_DENIED + :value: "GCP_KMS_KEY_PERMISSION_DENIED" + + .. py:attribute:: GCP_NOT_FOUND + :value: "GCP_NOT_FOUND" + .. py:attribute:: GCP_QUOTA_EXCEEDED :value: "GCP_QUOTA_EXCEEDED" + .. py:attribute:: GCP_RESOURCE_QUOTA_EXCEEDED + :value: "GCP_RESOURCE_QUOTA_EXCEEDED" + + .. py:attribute:: GCP_SERVICE_ACCOUNT_ACCESS_DENIED + :value: "GCP_SERVICE_ACCOUNT_ACCESS_DENIED" + .. py:attribute:: GCP_SERVICE_ACCOUNT_DELETED :value: "GCP_SERVICE_ACCOUNT_DELETED" + .. py:attribute:: GCP_SERVICE_ACCOUNT_NOT_FOUND + :value: "GCP_SERVICE_ACCOUNT_NOT_FOUND" + + .. py:attribute:: GCP_SUBNET_NOT_READY + :value: "GCP_SUBNET_NOT_READY" + + .. py:attribute:: GCP_TRUSTED_IMAGE_PROJECTS_VIOLATED + :value: "GCP_TRUSTED_IMAGE_PROJECTS_VIOLATED" + + .. py:attribute:: GKE_BASED_CLUSTER_TERMINATION + :value: "GKE_BASED_CLUSTER_TERMINATION" + .. py:attribute:: GLOBAL_INIT_SCRIPT_FAILURE :value: "GLOBAL_INIT_SCRIPT_FAILURE" @@ -1249,24 +1391,51 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:attribute:: INACTIVITY :value: "INACTIVITY" + .. py:attribute:: INIT_CONTAINER_NOT_FINISHED + :value: "INIT_CONTAINER_NOT_FINISHED" + .. py:attribute:: INIT_SCRIPT_FAILURE :value: "INIT_SCRIPT_FAILURE" .. py:attribute:: INSTANCE_POOL_CLUSTER_FAILURE :value: "INSTANCE_POOL_CLUSTER_FAILURE" + .. py:attribute:: INSTANCE_POOL_MAX_CAPACITY_REACHED + :value: "INSTANCE_POOL_MAX_CAPACITY_REACHED" + + .. py:attribute:: INSTANCE_POOL_NOT_FOUND + :value: "INSTANCE_POOL_NOT_FOUND" + .. py:attribute:: INSTANCE_UNREACHABLE :value: "INSTANCE_UNREACHABLE" + .. py:attribute:: INSTANCE_UNREACHABLE_DUE_TO_MISCONFIG + :value: "INSTANCE_UNREACHABLE_DUE_TO_MISCONFIG" + + .. py:attribute:: INTERNAL_CAPACITY_FAILURE + :value: "INTERNAL_CAPACITY_FAILURE" + .. py:attribute:: INTERNAL_ERROR :value: "INTERNAL_ERROR" .. py:attribute:: INVALID_ARGUMENT :value: "INVALID_ARGUMENT" + .. py:attribute:: INVALID_AWS_PARAMETER + :value: "INVALID_AWS_PARAMETER" + + .. py:attribute:: INVALID_INSTANCE_PLACEMENT_PROTOCOL + :value: "INVALID_INSTANCE_PLACEMENT_PROTOCOL" + .. py:attribute:: INVALID_SPARK_IMAGE :value: "INVALID_SPARK_IMAGE" + .. py:attribute:: INVALID_WORKER_IMAGE_FAILURE + :value: "INVALID_WORKER_IMAGE_FAILURE" + + .. py:attribute:: IN_PENALTY_BOX + :value: "IN_PENALTY_BOX" + .. py:attribute:: IP_EXHAUSTION_FAILURE :value: "IP_EXHAUSTION_FAILURE" @@ -1279,30 +1448,57 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:attribute:: K8S_DBR_CLUSTER_LAUNCH_TIMEOUT :value: "K8S_DBR_CLUSTER_LAUNCH_TIMEOUT" + .. py:attribute:: LAZY_ALLOCATION_TIMEOUT + :value: "LAZY_ALLOCATION_TIMEOUT" + + .. py:attribute:: MAINTENANCE_MODE + :value: "MAINTENANCE_MODE" + .. py:attribute:: METASTORE_COMPONENT_UNHEALTHY :value: "METASTORE_COMPONENT_UNHEALTHY" .. py:attribute:: NEPHOS_RESOURCE_MANAGEMENT :value: "NEPHOS_RESOURCE_MANAGEMENT" + .. py:attribute:: NETVISOR_SETUP_TIMEOUT + :value: "NETVISOR_SETUP_TIMEOUT" + .. py:attribute:: NETWORK_CONFIGURATION_FAILURE :value: "NETWORK_CONFIGURATION_FAILURE" .. py:attribute:: NFS_MOUNT_FAILURE :value: "NFS_MOUNT_FAILURE" + .. py:attribute:: NO_MATCHED_K8S + :value: "NO_MATCHED_K8S" + + .. py:attribute:: NO_MATCHED_K8S_TESTING_TAG + :value: "NO_MATCHED_K8S_TESTING_TAG" + .. py:attribute:: NPIP_TUNNEL_SETUP_FAILURE :value: "NPIP_TUNNEL_SETUP_FAILURE" .. py:attribute:: NPIP_TUNNEL_TOKEN_FAILURE :value: "NPIP_TUNNEL_TOKEN_FAILURE" + .. py:attribute:: POD_ASSIGNMENT_FAILURE + :value: "POD_ASSIGNMENT_FAILURE" + + .. py:attribute:: POD_SCHEDULING_FAILURE + :value: "POD_SCHEDULING_FAILURE" + .. py:attribute:: REQUEST_REJECTED :value: "REQUEST_REJECTED" .. py:attribute:: REQUEST_THROTTLED :value: "REQUEST_THROTTLED" + .. py:attribute:: RESOURCE_USAGE_BLOCKED + :value: "RESOURCE_USAGE_BLOCKED" + + .. py:attribute:: SECRET_CREATION_FAILURE + :value: "SECRET_CREATION_FAILURE" + .. py:attribute:: SECRET_RESOLUTION_ERROR :value: "SECRET_RESOLUTION_ERROR" @@ -1312,6 +1508,9 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:attribute:: SELF_BOOTSTRAP_FAILURE :value: "SELF_BOOTSTRAP_FAILURE" + .. py:attribute:: SERVERLESS_LONG_RUNNING_TERMINATED + :value: "SERVERLESS_LONG_RUNNING_TERMINATED" + .. py:attribute:: SKIPPED_SLOW_NODES :value: "SKIPPED_SLOW_NODES" @@ -1324,15 +1523,33 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:attribute:: SPARK_IMAGE_DOWNLOAD_FAILURE :value: "SPARK_IMAGE_DOWNLOAD_FAILURE" + .. py:attribute:: SPARK_IMAGE_DOWNLOAD_THROTTLED + :value: "SPARK_IMAGE_DOWNLOAD_THROTTLED" + + .. py:attribute:: SPARK_IMAGE_NOT_FOUND + :value: "SPARK_IMAGE_NOT_FOUND" + .. py:attribute:: SPARK_STARTUP_FAILURE :value: "SPARK_STARTUP_FAILURE" .. py:attribute:: SPOT_INSTANCE_TERMINATION :value: "SPOT_INSTANCE_TERMINATION" + .. py:attribute:: SSH_BOOTSTRAP_FAILURE + :value: "SSH_BOOTSTRAP_FAILURE" + .. py:attribute:: STORAGE_DOWNLOAD_FAILURE :value: "STORAGE_DOWNLOAD_FAILURE" + .. py:attribute:: STORAGE_DOWNLOAD_FAILURE_DUE_TO_MISCONFIG + :value: "STORAGE_DOWNLOAD_FAILURE_DUE_TO_MISCONFIG" + + .. py:attribute:: STORAGE_DOWNLOAD_FAILURE_SLOW + :value: "STORAGE_DOWNLOAD_FAILURE_SLOW" + + .. py:attribute:: STORAGE_DOWNLOAD_FAILURE_THROTTLED + :value: "STORAGE_DOWNLOAD_FAILURE_THROTTLED" + .. py:attribute:: STS_CLIENT_SETUP_FAILURE :value: "STS_CLIENT_SETUP_FAILURE" @@ -1348,6 +1565,9 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:attribute:: UNEXPECTED_LAUNCH_FAILURE :value: "UNEXPECTED_LAUNCH_FAILURE" + .. py:attribute:: UNEXPECTED_POD_RECREATION + :value: "UNEXPECTED_POD_RECREATION" + .. py:attribute:: UNKNOWN :value: "UNKNOWN" @@ -1357,6 +1577,9 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:attribute:: UPDATE_INSTANCE_PROFILE_FAILURE :value: "UPDATE_INSTANCE_PROFILE_FAILURE" + .. py:attribute:: USER_INITIATED_VM_TERMINATION + :value: "USER_INITIATED_VM_TERMINATION" + .. py:attribute:: USER_REQUEST :value: "USER_REQUEST" @@ -1369,6 +1592,9 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:attribute:: WORKSPACE_CONFIGURATION_ERROR :value: "WORKSPACE_CONFIGURATION_ERROR" + .. py:attribute:: WORKSPACE_UPDATE + :value: "WORKSPACE_UPDATE" + .. py:class:: TerminationReasonType type of the termination diff --git a/docs/dbdataclasses/dashboards.rst b/docs/dbdataclasses/dashboards.rst index b68b1f1b6..776aac603 100644 --- a/docs/dbdataclasses/dashboards.rst +++ b/docs/dbdataclasses/dashboards.rst @@ -133,6 +133,9 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:attribute:: CONTEXT_EXCEEDED_EXCEPTION :value: "CONTEXT_EXCEEDED_EXCEPTION" + .. py:attribute:: COULD_NOT_GET_MODEL_DEPLOYMENTS_EXCEPTION + :value: "COULD_NOT_GET_MODEL_DEPLOYMENTS_EXCEPTION" + .. py:attribute:: COULD_NOT_GET_UC_SCHEMA_EXCEPTION :value: "COULD_NOT_GET_UC_SCHEMA_EXCEPTION" @@ -154,6 +157,9 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:attribute:: FUNCTION_CALL_MISSING_PARAMETER_EXCEPTION :value: "FUNCTION_CALL_MISSING_PARAMETER_EXCEPTION" + .. py:attribute:: GENERATED_SQL_QUERY_TOO_LONG_EXCEPTION + :value: "GENERATED_SQL_QUERY_TOO_LONG_EXCEPTION" + .. py:attribute:: GENERIC_CHAT_COMPLETION_EXCEPTION :value: "GENERIC_CHAT_COMPLETION_EXCEPTION" @@ -196,6 +202,9 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:attribute:: MESSAGE_UPDATED_WHILE_EXECUTING_EXCEPTION :value: "MESSAGE_UPDATED_WHILE_EXECUTING_EXCEPTION" + .. py:attribute:: MISSING_SQL_QUERY_EXCEPTION + :value: "MISSING_SQL_QUERY_EXCEPTION" + .. py:attribute:: NO_DEPLOYMENTS_AVAILABLE_TO_WORKSPACE :value: "NO_DEPLOYMENTS_AVAILABLE_TO_WORKSPACE" @@ -246,7 +255,7 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:class:: MessageStatus - MessageStatus. The possible values are: * `FETCHING_METADATA`: Fetching metadata from the data sources. * `FILTERING_CONTEXT`: Running smart context step to determine relevant context. * `ASKING_AI`: Waiting for the LLM to respond to the user's question. * `PENDING_WAREHOUSE`: Waiting for warehouse before the SQL query can start executing. * `EXECUTING_QUERY`: Executing a generated SQL query. Get the SQL query result by calling [getMessageQueryResult](:method:genie/getMessageQueryResult) API. * `FAILED`: The response generation or query execution failed. See `error` field. * `COMPLETED`: Message processing is completed. Results are in the `attachments` field. Get the SQL query result by calling [getMessageQueryResult](:method:genie/getMessageQueryResult) API. * `SUBMITTED`: Message has been submitted. * `QUERY_RESULT_EXPIRED`: SQL result is not available anymore. The user needs to rerun the query. * `CANCELLED`: Message has been cancelled. + MessageStatus. The possible values are: * `FETCHING_METADATA`: Fetching metadata from the data sources. * `FILTERING_CONTEXT`: Running smart context step to determine relevant context. * `ASKING_AI`: Waiting for the LLM to respond to the user's question. * `PENDING_WAREHOUSE`: Waiting for warehouse before the SQL query can start executing. * `EXECUTING_QUERY`: Executing a generated SQL query. Get the SQL query result by calling [getMessageAttachmentQueryResult](:method:genie/getMessageAttachmentQueryResult) API. * `FAILED`: The response generation or query execution failed. See `error` field. * `COMPLETED`: Message processing is completed. Results are in the `attachments` field. Get the SQL query result by calling [getMessageAttachmentQueryResult](:method:genie/getMessageAttachmentQueryResult) API. * `SUBMITTED`: Message has been submitted. * `QUERY_RESULT_EXPIRED`: SQL result is not available anymore. The user needs to rerun the query. Rerun the SQL query result by calling [executeMessageAttachmentQuery](:method:genie/executeMessageAttachmentQuery) API. * `CANCELLED`: Message has been cancelled. .. py:attribute:: ASKING_AI :value: "ASKING_AI" diff --git a/docs/dbdataclasses/jobs.rst b/docs/dbdataclasses/jobs.rst index 19f1a2208..fa5af4189 100644 --- a/docs/dbdataclasses/jobs.rst +++ b/docs/dbdataclasses/jobs.rst @@ -482,6 +482,9 @@ These dataclasses are used in the SDK to represent API requests and responses fo PerformanceTarget defines how performant (lower latency) or cost efficient the execution of run on serverless compute should be. The performance mode on the job or pipeline should map to a performance setting that is passed to Cluster Manager (see cluster-common PerformanceTarget). + .. py:attribute:: BALANCED + :value: "BALANCED" + .. py:attribute:: COST_OPTIMIZED :value: "COST_OPTIMIZED" diff --git a/docs/dbdataclasses/marketplace.rst b/docs/dbdataclasses/marketplace.rst index 222c5065c..02e48c381 100644 --- a/docs/dbdataclasses/marketplace.rst +++ b/docs/dbdataclasses/marketplace.rst @@ -274,6 +274,9 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:attribute:: LISTING :value: "LISTING" + .. py:attribute:: LISTING_RESOURCE + :value: "LISTING_RESOURCE" + .. py:attribute:: PROVIDER :value: "PROVIDER" @@ -461,6 +464,9 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:class:: MarketplaceFileType + .. py:attribute:: APP + :value: "APP" + .. py:attribute:: EMBEDDED_NOTEBOOK :value: "EMBEDDED_NOTEBOOK" diff --git a/docs/dbdataclasses/ml.rst b/docs/dbdataclasses/ml.rst index b176e56c4..860a4ffbc 100644 --- a/docs/dbdataclasses/ml.rst +++ b/docs/dbdataclasses/ml.rst @@ -92,6 +92,14 @@ These dataclasses are used in the SDK to represent API requests and responses fo :members: :undoc-members: +.. autoclass:: CreateForecastingExperimentRequest + :members: + :undoc-members: + +.. autoclass:: CreateForecastingExperimentResponse + :members: + :undoc-members: + .. autoclass:: CreateModelRequest :members: :undoc-members: @@ -263,6 +271,27 @@ These dataclasses are used in the SDK to represent API requests and responses fo :members: :undoc-members: +.. autoclass:: ForecastingExperiment + :members: + :undoc-members: + +.. py:class:: ForecastingExperimentState + + .. py:attribute:: CANCELLED + :value: "CANCELLED" + + .. py:attribute:: FAILED + :value: "FAILED" + + .. py:attribute:: PENDING + :value: "PENDING" + + .. py:attribute:: RUNNING + :value: "RUNNING" + + .. py:attribute:: SUCCEEDED + :value: "SUCCEEDED" + .. autoclass:: GetExperimentByNameResponse :members: :undoc-members: diff --git a/docs/dbdataclasses/serving.rst b/docs/dbdataclasses/serving.rst index abaeb5355..367f41b90 100644 --- a/docs/dbdataclasses/serving.rst +++ b/docs/dbdataclasses/serving.rst @@ -79,6 +79,10 @@ These dataclasses are used in the SDK to represent API requests and responses fo :members: :undoc-members: +.. autoclass:: ApiKeyAuth + :members: + :undoc-members: + .. autoclass:: AutoCaptureConfigInput :members: :undoc-members: @@ -91,6 +95,10 @@ These dataclasses are used in the SDK to represent API requests and responses fo :members: :undoc-members: +.. autoclass:: BearerTokenAuth + :members: + :undoc-members: + .. autoclass:: BuildLogsResponse :members: :undoc-members: @@ -120,6 +128,10 @@ These dataclasses are used in the SDK to represent API requests and responses fo :members: :undoc-members: +.. autoclass:: CustomProviderConfig + :members: + :undoc-members: + .. autoclass:: DataPlaneInfo :members: :undoc-members: @@ -240,6 +252,9 @@ These dataclasses are used in the SDK to represent API requests and responses fo .. py:attribute:: COHERE :value: "COHERE" + .. py:attribute:: CUSTOM + :value: "CUSTOM" + .. py:attribute:: DATABRICKS_MODEL_SERVING :value: "DATABRICKS_MODEL_SERVING" @@ -256,6 +271,10 @@ These dataclasses are used in the SDK to represent API requests and responses fo :members: :undoc-members: +.. autoclass:: FallbackConfig + :members: + :undoc-members: + .. autoclass:: FoundationModel :members: :undoc-members: diff --git a/docs/dbdataclasses/sharing.rst b/docs/dbdataclasses/sharing.rst index 2e4437ef6..f72c59b21 100644 --- a/docs/dbdataclasses/sharing.rst +++ b/docs/dbdataclasses/sharing.rst @@ -111,15 +111,15 @@ These dataclasses are used in the SDK to represent API requests and responses fo :members: :undoc-members: -.. autoclass:: DeltaSharingFunctionDependency +.. autoclass:: DeltaSharingFunction :members: :undoc-members: -.. autoclass:: DeltaSharingTableDependency +.. autoclass:: DeltaSharingFunctionDependency :members: :undoc-members: -.. autoclass:: Function +.. autoclass:: DeltaSharingTableDependency :members: :undoc-members: diff --git a/docs/workspace/compute/clusters.rst b/docs/workspace/compute/clusters.rst index 528cff321..e4423bc98 100644 --- a/docs/workspace/compute/clusters.rst +++ b/docs/workspace/compute/clusters.rst @@ -66,7 +66,6 @@ `owner_username`. :param cluster_id: str - <needs content added> :param owner_username: str New owner of the cluster_id after this RPC. @@ -105,8 +104,11 @@ Create new cluster. Creates a new Spark cluster. This method will acquire new instances from the cloud provider if - necessary. Note: Databricks may not be able to acquire some of the requested nodes, due to cloud - provider limitations (account limits, spot price, etc.) or transient network issues. + necessary. This method is asynchronous; the returned ``cluster_id`` can be used to poll the cluster + status. When this method returns, the cluster will be in a ``PENDING`` state. The cluster will be + usable once it enters a ``RUNNING`` state. Note: Databricks may not be able to acquire some of the + requested nodes, due to cloud provider limitations (account limits, spot price, etc.) or transient + network issues. If Databricks acquires at least 85% of the requested on-demand nodes, cluster creation will succeed. Otherwise the cluster will terminate with an informative error message. @@ -179,12 +181,17 @@ standard clusters. * `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled. :param docker_image: :class:`DockerImage` (optional) + Custom docker image BYOC :param driver_instance_pool_id: str (optional) The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not assigned. :param driver_node_type_id: str (optional) The node type of the Spark driver. Note that this field is optional; if unset, the driver node type will be set as the same value as `node_type_id` defined above. + + This field, along with node_type_id, should not be set if virtual_cluster_size is set. If both + driver_node_type_id, node_type_id, and virtual_cluster_size are specified, driver_node_type_id and + node_type_id take precedence. :param enable_elastic_disk: bool (optional) Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk space when its Spark workers are running low on disk space. This feature requires specific AWS permissions @@ -271,6 +278,7 @@ `effective_spark_version` is determined by `spark_version` (DBR release), this field `use_ml_runtime`, and whether `node_type_id` is gpu node or not. :param workload_type: :class:`WorkloadType` (optional) + Cluster Attributes showing for clusters workload types. :returns: Long-running operation waiter for :class:`ClusterDetails`. @@ -443,12 +451,17 @@ standard clusters. * `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled. :param docker_image: :class:`DockerImage` (optional) + Custom docker image BYOC :param driver_instance_pool_id: str (optional) The optional ID of the instance pool for the driver of the cluster belongs. The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not assigned. :param driver_node_type_id: str (optional) The node type of the Spark driver. Note that this field is optional; if unset, the driver node type will be set as the same value as `node_type_id` defined above. + + This field, along with node_type_id, should not be set if virtual_cluster_size is set. If both + driver_node_type_id, node_type_id, and virtual_cluster_size are specified, driver_node_type_id and + node_type_id take precedence. :param enable_elastic_disk: bool (optional) Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk space when its Spark workers are running low on disk space. This feature requires specific AWS permissions @@ -535,6 +548,7 @@ `effective_spark_version` is determined by `spark_version` (DBR release), this field `use_ml_runtime`, and whether `node_type_id` is gpu node or not. :param workload_type: :class:`WorkloadType` (optional) + Cluster Attributes showing for clusters workload types. :returns: Long-running operation waiter for :class:`ClusterDetails`. @@ -603,8 +617,7 @@ List cluster activity events. Retrieves a list of events about the activity of a cluster. This API is paginated. If there are more - events to read, the response includes all the nparameters necessary to request the next page of - events. + events to read, the response includes all the parameters necessary to request the next page of events. :param cluster_id: str The ID of the cluster to retrieve events about. @@ -808,7 +821,6 @@ cluster that is already pinned will have no effect. This API can only be called by workspace admins. :param cluster_id: str - <needs content added> @@ -911,7 +923,6 @@ :param cluster_id: str The cluster to be started. :param restart_user: str (optional) - <needs content added> :returns: Long-running operation waiter for :class:`ClusterDetails`. @@ -1039,11 +1050,10 @@ Start terminated cluster. Starts a terminated Spark cluster with the supplied ID. This works similar to `createCluster` except: - - * The previous cluster id and attributes are preserved. * The cluster starts with the last specified - cluster size. * If the previous cluster was an autoscaling cluster, the current cluster starts with - the minimum number of nodes. * If the cluster is not currently in a `TERMINATED` state, nothing will - happen. * Clusters launched to run a job cannot be started. + - The previous cluster id and attributes are preserved. - The cluster starts with the last specified + cluster size. - If the previous cluster was an autoscaling cluster, the current cluster starts with + the minimum number of nodes. - If the cluster is not currently in a ``TERMINATED`` state, nothing will + happen. - Clusters launched to run a job cannot be started. :param cluster_id: str The cluster to be started. @@ -1094,7 +1104,6 @@ admins. :param cluster_id: str - <needs content added> @@ -1115,10 +1124,18 @@ :param cluster_id: str ID of the cluster. :param update_mask: str - Specifies which fields of the cluster will be updated. This is required in the POST request. The - update mask should be supplied as a single string. To specify multiple fields, separate them with - commas (no spaces). To delete a field from a cluster configuration, add it to the `update_mask` - string but omit it from the `cluster` object. + Used to specify which cluster attributes and size fields to update. See https://google.aip.dev/161 + for more details. + + The field mask must be a single string, with multiple fields separated by commas (no spaces). The + field path is relative to the resource object, using a dot (`.`) to navigate sub-fields (e.g., + `author.given_name`). Specification of elements in sequence or map fields is not allowed, as only + the entire collection field can be specified. Field names must exactly match the resource field + names. + + A field mask of `*` indicates full replacement. It’s recommended to always explicitly list the + fields being updated and avoid using `*` wildcards, as it can lead to unintended results if the API + changes in the future. :param cluster: :class:`UpdateClusterResource` (optional) The cluster to be updated. diff --git a/docs/workspace/dashboards/genie.rst b/docs/workspace/dashboards/genie.rst index 6c0e91751..eb92d299f 100644 --- a/docs/workspace/dashboards/genie.rst +++ b/docs/workspace/dashboards/genie.rst @@ -35,7 +35,8 @@ Execute message attachment SQL query. - Execute the SQL for a message query attachment. + Execute the SQL for a message query attachment. Use this API when the query attachment has expired and + needs to be re-executed. :param space_id: str Genie space ID @@ -51,7 +52,7 @@ .. py:method:: execute_message_query(space_id: str, conversation_id: str, message_id: str) -> GenieGetMessageQueryResultResponse - Execute SQL query in a conversation message. + [Deprecated] Execute SQL query in a conversation message. Execute the SQL query in the message. @@ -119,7 +120,7 @@ .. py:method:: get_message_query_result_by_attachment(space_id: str, conversation_id: str, message_id: str, attachment_id: str) -> GenieGetMessageQueryResultResponse - [deprecated] Get conversation message SQL query result. + [Deprecated] Get conversation message SQL query result. Get the result of SQL query if the message has a query attachment. This is only available if a message has a query attachment and the message status is `EXECUTING_QUERY` OR `COMPLETED`. @@ -138,9 +139,9 @@ .. py:method:: get_space(space_id: str) -> GenieSpace - Get details of a Genie Space. + Get Genie Space. - Get a Genie Space. + Get details of a Genie Space. :param space_id: str The ID associated with the Genie space diff --git a/docs/workspace/iam/groups.rst b/docs/workspace/iam/groups.rst index 8eb4ccbe2..0b62b675a 100644 --- a/docs/workspace/iam/groups.rst +++ b/docs/workspace/iam/groups.rst @@ -187,7 +187,7 @@ Partially updates the details of a group. :param id: str - Unique ID for a group in the Databricks workspace. + Unique ID in the Databricks workspace. :param operations: List[:class:`Patch`] (optional) :param schemas: List[:class:`PatchSchema`] (optional) The schema of the patch request. Must be ["urn:ietf:params:scim:api:messages:2.0:PatchOp"]. diff --git a/docs/workspace/iam/service_principals.rst b/docs/workspace/iam/service_principals.rst index ec893c807..74a498b00 100644 --- a/docs/workspace/iam/service_principals.rst +++ b/docs/workspace/iam/service_principals.rst @@ -176,7 +176,7 @@ Partially updates the details of a single service principal in the Databricks workspace. :param id: str - Unique ID for a service principal in the Databricks workspace. + Unique ID in the Databricks workspace. :param operations: List[:class:`Patch`] (optional) :param schemas: List[:class:`PatchSchema`] (optional) The schema of the patch request. Must be ["urn:ietf:params:scim:api:messages:2.0:PatchOp"]. diff --git a/docs/workspace/iam/users.rst b/docs/workspace/iam/users.rst index 5edacca5f..76837ac54 100644 --- a/docs/workspace/iam/users.rst +++ b/docs/workspace/iam/users.rst @@ -55,8 +55,7 @@ External ID is not currently supported. It is reserved for future use. :param groups: List[:class:`ComplexValue`] (optional) :param id: str (optional) - Databricks user ID. This is automatically set by Databricks. Any value provided by the client will - be ignored. + Databricks user ID. :param name: :class:`Name` (optional) :param roles: List[:class:`ComplexValue`] (optional) Corresponds to AWS instance profile/arn role. @@ -240,7 +239,7 @@ Partially updates a user resource by applying the supplied operations on specific user attributes. :param id: str - Unique ID for a user in the Databricks workspace. + Unique ID in the Databricks workspace. :param operations: List[:class:`Patch`] (optional) :param schemas: List[:class:`PatchSchema`] (optional) The schema of the patch request. Must be ["urn:ietf:params:scim:api:messages:2.0:PatchOp"]. @@ -285,8 +284,7 @@ Replaces a user's information with the data supplied in request. :param id: str - Databricks user ID. This is automatically set by Databricks. Any value provided by the client will - be ignored. + Databricks user ID. :param active: bool (optional) If this user is active :param display_name: str (optional) diff --git a/docs/workspace/ml/forecasting.rst b/docs/workspace/ml/forecasting.rst new file mode 100644 index 000000000..bb667b3fc --- /dev/null +++ b/docs/workspace/ml/forecasting.rst @@ -0,0 +1,80 @@ +``w.forecasting``: Forecasting +============================== +.. currentmodule:: databricks.sdk.service.ml + +.. py:class:: ForecastingAPI + + The Forecasting API allows you to create and get serverless forecasting experiments + + .. py:method:: create_experiment(train_data_path: str, target_column: str, time_column: str, forecast_granularity: str, forecast_horizon: int [, custom_weights_column: Optional[str], experiment_path: Optional[str], holiday_regions: Optional[List[str]], max_runtime: Optional[int], prediction_data_path: Optional[str], primary_metric: Optional[str], register_to: Optional[str], split_column: Optional[str], timeseries_identifier_columns: Optional[List[str]], training_frameworks: Optional[List[str]]]) -> Wait[ForecastingExperiment] + + Create a forecasting experiment. + + Creates a serverless forecasting experiment. Returns the experiment ID. + + :param train_data_path: str + The three-level (fully qualified) name of a unity catalog table. This table serves as the training + data for the forecasting model. + :param target_column: str + Name of the column in the input training table that serves as the prediction target. The values in + this column will be used as the ground truth for model training. + :param time_column: str + Name of the column in the input training table that represents the timestamp of each row. + :param forecast_granularity: str + The granularity of the forecast. This defines the time interval between consecutive rows in the time + series data. Possible values: '1 second', '1 minute', '5 minutes', '10 minutes', '15 minutes', '30 + minutes', 'Hourly', 'Daily', 'Weekly', 'Monthly', 'Quarterly', 'Yearly'. + :param forecast_horizon: int + The number of time steps into the future for which predictions should be made. This value represents + a multiple of forecast_granularity determining how far ahead the model will forecast. + :param custom_weights_column: str (optional) + Name of the column in the input training table used to customize the weight for each time series to + calculate weighted metrics. + :param experiment_path: str (optional) + The path to the created experiment. This is the path where the experiment will be stored in the + workspace. + :param holiday_regions: List[str] (optional) + Region code(s) to consider when automatically adding holiday features. When empty, no holiday + features are added. Only supports 1 holiday region for now. + :param max_runtime: int (optional) + The maximum duration in minutes for which the experiment is allowed to run. If the experiment + exceeds this time limit it will be stopped automatically. + :param prediction_data_path: str (optional) + The three-level (fully qualified) path to a unity catalog table. This table path serves to store the + predictions. + :param primary_metric: str (optional) + The evaluation metric used to optimize the forecasting model. + :param register_to: str (optional) + The three-level (fully qualified) path to a unity catalog model. This model path serves to store the + best model. + :param split_column: str (optional) + Name of the column in the input training table used for custom data splits. The values in this + column must be "train", "validate", or "test" to indicate which split each row belongs to. + :param timeseries_identifier_columns: List[str] (optional) + Name of the column in the input training table used to group the dataset to predict individual time + series + :param training_frameworks: List[str] (optional) + The list of frameworks to include for model tuning. Possible values: 'Prophet', 'ARIMA', 'DeepAR'. + An empty list will include all supported frameworks. + + :returns: + Long-running operation waiter for :class:`ForecastingExperiment`. + See :method:wait_get_experiment_forecasting_succeeded for more details. + + + .. py:method:: create_experiment_and_wait(train_data_path: str, target_column: str, time_column: str, forecast_granularity: str, forecast_horizon: int [, custom_weights_column: Optional[str], experiment_path: Optional[str], holiday_regions: Optional[List[str]], max_runtime: Optional[int], prediction_data_path: Optional[str], primary_metric: Optional[str], register_to: Optional[str], split_column: Optional[str], timeseries_identifier_columns: Optional[List[str]], training_frameworks: Optional[List[str]], timeout: datetime.timedelta = 2:00:00]) -> ForecastingExperiment + + + .. py:method:: get_experiment(experiment_id: str) -> ForecastingExperiment + + Get a forecasting experiment. + + Public RPC to get forecasting experiment + + :param experiment_id: str + The unique ID of a forecasting experiment + + :returns: :class:`ForecastingExperiment` + + + .. py:method:: wait_get_experiment_forecasting_succeeded(experiment_id: str, timeout: datetime.timedelta = 2:00:00, callback: Optional[Callable[[ForecastingExperiment], None]]) -> ForecastingExperiment diff --git a/docs/workspace/ml/index.rst b/docs/workspace/ml/index.rst index 1a713eb57..9114a2f19 100644 --- a/docs/workspace/ml/index.rst +++ b/docs/workspace/ml/index.rst @@ -8,4 +8,5 @@ Create and manage experiments, features, and other machine learning artifacts :maxdepth: 1 experiments + forecasting model_registry \ No newline at end of file diff --git a/docs/workspace/pipelines/pipelines.rst b/docs/workspace/pipelines/pipelines.rst index 38f440147..935724d82 100644 --- a/docs/workspace/pipelines/pipelines.rst +++ b/docs/workspace/pipelines/pipelines.rst @@ -87,7 +87,7 @@ Unique identifier for this pipeline. :param ingestion_definition: :class:`IngestionPipelineDefinition` (optional) The configuration for a managed ingestion pipeline. These settings cannot be used with the - 'libraries', 'target' or 'catalog' settings. + 'libraries', 'schema', 'target', or 'catalog' settings. :param libraries: List[:class:`PipelineLibrary`] (optional) Libraries or code needed by this deployment. :param name: str (optional) @@ -105,15 +105,15 @@ Only `user_name` or `service_principal_name` can be specified. If both are specified, an error is thrown. :param schema: str (optional) - The default schema (database) where tables are read from or published to. The presence of this field - implies that the pipeline is in direct publishing mode. + The default schema (database) where tables are read from or published to. :param serverless: bool (optional) Whether serverless compute is enabled for this pipeline. :param storage: str (optional) DBFS root directory for storing checkpoints and tables. :param target: str (optional) - Target schema (database) to add tables in this pipeline to. If not specified, no data is published - to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify `catalog`. + Target schema (database) to add tables in this pipeline to. Exactly one of `schema` or `target` must + be specified. To publish to Unity Catalog, also specify `catalog`. This legacy field is deprecated + for pipeline creation in favor of the `schema` field. :param trigger: :class:`PipelineTrigger` (optional) Which pipeline trigger to use. Deprecated: Use `continuous` instead. @@ -485,7 +485,7 @@ Unique identifier for this pipeline. :param ingestion_definition: :class:`IngestionPipelineDefinition` (optional) The configuration for a managed ingestion pipeline. These settings cannot be used with the - 'libraries', 'target' or 'catalog' settings. + 'libraries', 'schema', 'target', or 'catalog' settings. :param libraries: List[:class:`PipelineLibrary`] (optional) Libraries or code needed by this deployment. :param name: str (optional) @@ -503,15 +503,15 @@ Only `user_name` or `service_principal_name` can be specified. If both are specified, an error is thrown. :param schema: str (optional) - The default schema (database) where tables are read from or published to. The presence of this field - implies that the pipeline is in direct publishing mode. + The default schema (database) where tables are read from or published to. :param serverless: bool (optional) Whether serverless compute is enabled for this pipeline. :param storage: str (optional) DBFS root directory for storing checkpoints and tables. :param target: str (optional) - Target schema (database) to add tables in this pipeline to. If not specified, no data is published - to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify `catalog`. + Target schema (database) to add tables in this pipeline to. Exactly one of `schema` or `target` must + be specified. To publish to Unity Catalog, also specify `catalog`. This legacy field is deprecated + for pipeline creation in favor of the `schema` field. :param trigger: :class:`PipelineTrigger` (optional) Which pipeline trigger to use. Deprecated: Use `continuous` instead. diff --git a/docs/workspace/serving/serving_endpoints.rst b/docs/workspace/serving/serving_endpoints.rst index 83609fc09..ad99bfc30 100644 --- a/docs/workspace/serving/serving_endpoints.rst +++ b/docs/workspace/serving/serving_endpoints.rst @@ -209,7 +209,7 @@ :returns: :class:`PutResponse` - .. py:method:: put_ai_gateway(name: str [, guardrails: Optional[AiGatewayGuardrails], inference_table_config: Optional[AiGatewayInferenceTableConfig], rate_limits: Optional[List[AiGatewayRateLimit]], usage_tracking_config: Optional[AiGatewayUsageTrackingConfig]]) -> PutAiGatewayResponse + .. py:method:: put_ai_gateway(name: str [, fallback_config: Optional[FallbackConfig], guardrails: Optional[AiGatewayGuardrails], inference_table_config: Optional[AiGatewayInferenceTableConfig], rate_limits: Optional[List[AiGatewayRateLimit]], usage_tracking_config: Optional[AiGatewayUsageTrackingConfig]]) -> PutAiGatewayResponse Update AI Gateway of a serving endpoint. @@ -218,6 +218,9 @@ :param name: str The name of the serving endpoint whose AI Gateway is being updated. This field is required. + :param fallback_config: :class:`FallbackConfig` (optional) + Configuration for traffic fallback which auto fallbacks to other served entities if the request to a + served entity fails with certain error codes, to increase availability. :param guardrails: :class:`AiGatewayGuardrails` (optional) Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. :param inference_table_config: :class:`AiGatewayInferenceTableConfig` (optional) diff --git a/pyproject.toml b/pyproject.toml index 72dab7d59..60c33f0e6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,7 +55,7 @@ dev = [ "build", # some integration tests depend on the databricks-sdk-py wheel ] notebook = [ - "ipython>=8,<9", + "ipython>=8,<10", "ipywidgets>=8,<9", ] openai = [ diff --git a/setup.cfg b/setup.cfg index 2a1d9ab5e..3e5658a34 100644 --- a/setup.cfg +++ b/setup.cfg @@ -15,59 +15,4 @@ max-line-length = 120 [black] line-length = 120 -target-version = ['py37', 'py38', 'py39', 'py310', 'py311','py312','py313'] - -COLUMN_LIMIT = 110 -INDENT_WIDTH = 4 -CONTINUATION_INDENT_WIDTH = 4 -ALIGN_CLOSING_BRACKET_WITH_VISUAL_INDENT = true -ALLOW_SPLIT_BEFORE_DEFAULT_OR_NAMED_ASSIGNS = false -COALESCE_BRACKETS = true -DEDENT_CLOSING_BRACKETS = false -INDENT_CLOSING_BRACKETS = false -JOIN_MULTIPLE_LINES = true -SPACES_BEFORE_COMMENT = 1 -ALLOW_MULTILINE_LAMBDAS = true -DISABLE_ENDING_COMMA_HEURISTIC = true - -BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true -BLANK_LINES_AROUND_TOP_LEVEL_DEFINITION = 2 -BLANK_LINES_BETWEEN_TOP_LEVEL_IMPORTS_AND_VARIABLES = 1 -EACH_DICT_ENTRY_ON_SEPARATE_LINE = true -FORCE_MULTILINE_DICT = false -INDENT_DICTIONARY_VALUE = false -INDENT_BLANK_LINES = false - -SPACE_BETWEEN_ENDING_COMMA_AND_CLOSING_BRACKET = true -SPACE_INSIDE_BRACKETS = false -SPACES_AROUND_POWER_OPERATOR = false -SPACES_AROUND_DEFAULT_OR_NAMED_ASSIGN = false -SPACES_AROUND_DICT_DELIMITERS = false -SPACES_AROUND_LIST_DELIMITERS = false -SPACES_AROUND_SUBSCRIPT_COLON = false -SPACES_AROUND_TUPLE_DELIMITERS = false - -SPLIT_ARGUMENTS_WHEN_COMMA_TERMINATED = false -SPLIT_ALL_COMMA_SEPARATED_VALUES = false -SPLIT_ALL_TOP_LEVEL_COMMA_SEPARATED_VALUES = false -SPLIT_BEFORE_ARITHMETIC_OPERATOR = false -SPLIT_BEFORE_BITWISE_OPERATOR = true -SPLIT_BEFORE_CLOSING_BRACKET = true -SPLIT_BEFORE_DICT_SET_GENERATOR = true -SPLIT_BEFORE_DOT = false -SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = false -SPLIT_BEFORE_FIRST_ARGUMENT = false -SPLIT_BEFORE_LOGICAL_OPERATOR = true -SPLIT_BEFORE_NAMED_ASSIGNS = true -SPLIT_COMPLEX_COMPREHENSION = false - -SPLIT_PENALTY_AFTER_OPENING_BRACKET = 300 -SPLIT_PENALTY_AFTER_UNARY_OPERATOR = 10000 -SPLIT_PENALTY_ARITHMETIC_OPERATOR = 300 -SPLIT_PENALTY_BEFORE_IF_EXPR = 0 -SPLIT_PENALTY_BITWISE_OPERATOR = 300 -SPLIT_PENALTY_COMPREHENSION = 80 -SPLIT_PENALTY_EXCESS_CHARACTER = 7000 -SPLIT_PENALTY_FOR_ADDED_LINE_SPLIT = 30 -SPLIT_PENALTY_IMPORT_NAMES = 0 -SPLIT_PENALTY_LOGICAL_OPERATOR = 300 +target-version = ['py37', 'py38', 'py39', 'py310', 'py311','py312','py313'] \ No newline at end of file diff --git a/tagging.py b/tagging.py index c57621fb4..5504bdd0e 100644 --- a/tagging.py +++ b/tagging.py @@ -14,6 +14,7 @@ NEXT_CHANGELOG_FILE_NAME = "NEXT_CHANGELOG.md" CHANGELOG_FILE_NAME = "CHANGELOG.md" PACKAGE_FILE_NAME = ".package.json" +CODEGEN_FILE_NAME = ".codegen.json" """ This script tags the release of the SDKs using a combination of the GitHub API and Git commands. It reads the local repository to determine necessary changes, updates changelogs, and creates tags. @@ -153,14 +154,14 @@ def update_version_references(tag_info: TagInfo) -> None: Code references are defined in .package.json files. """ - # Load version patterns from '.package.json' file - package_file_path = os.path.join(os.getcwd(), tag_info.package.path, PACKAGE_FILE_NAME) + # Load version patterns from '.codegen.json' file at the top level of the repository + package_file_path = os.path.join(os.getcwd(), CODEGEN_FILE_NAME) with open(package_file_path, 'r') as file: package_file = json.load(file) version = package_file.get('version') if not version: - print(f"Version not found in .package.json. Nothing to update.") + print(f"`version` not found in .codegen.json. Nothing to update.") return # Update the versions diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index efcc78e95..55114bd84 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -39,7 +39,7 @@ def pytest_configure(config): def pytest_collection_modifyitems(items): # safer to refer to fixture fns instead of strings - client_fixtures = [x.__name__ for x in [a, w, ucws]] + client_fixtures = [x.__name__ for x in [a, w, ucws, ucacct]] for item in items: current_fixtures = getattr(item, "fixturenames", ()) for requires_client in client_fixtures: diff --git a/tests/integration/test_data_plane.py b/tests/integration/test_data_plane.py index 0062a7ed0..338366667 100644 --- a/tests/integration/test_data_plane.py +++ b/tests/integration/test_data_plane.py @@ -13,3 +13,10 @@ def test_data_plane_token_source(ucws, env_or_skip): dp_token = ts.token(info.endpoint_url, info.authorization_details) assert dp_token.valid + + +def test_model_serving_data_plane(ucws, env_or_skip): + endpoint = env_or_skip("SERVING_ENDPOINT_NAME") + serving_endpoints = ucws.serving_endpoints_data_plane + response = serving_endpoints.query(name=endpoint, dataframe_records=[{"col": 1.0}]) + assert response is not None diff --git a/tests/test_data_plane.py b/tests/test_data_plane.py index 54ace9ba7..d5956be57 100644 --- a/tests/test_data_plane.py +++ b/tests/test_data_plane.py @@ -3,9 +3,7 @@ from urllib import parse from databricks.sdk import data_plane, oauth -from databricks.sdk.data_plane import DataPlaneService from databricks.sdk.oauth import Token -from databricks.sdk.service.serving import DataPlaneInfo cp_token = Token(access_token="control plane token", token_type="type", expiry=datetime.now() + timedelta(hours=1)) dp_token = Token(access_token="data plane token", token_type="type", expiry=datetime.now() + timedelta(hours=1)) @@ -63,7 +61,7 @@ def token(self): def test_token_source_get_token_existing(config): another_token = Token(access_token="another token", token_type="type", expiry=datetime.now() + timedelta(hours=1)) - token_source = data_plane.DataPlaneTokenSource(config.host, success_callable(token), disable_async=True) + token_source = data_plane.DataPlaneTokenSource(config.host, success_callable(cp_token), disable_async=True) token_source._token_sources["endpoint:authDetails"] = MockEndpointTokenSource(another_token) with patch("databricks.sdk.oauth.retrieve_token", return_value=dp_token) as retrieve_token: @@ -71,80 +69,3 @@ def test_token_source_get_token_existing(config): retrieve_token.assert_not_called() assert result_token.access_token == another_token.access_token - - -## These tests are for the old implementation. #TODO: Remove after the new implementation is used - -info = DataPlaneInfo(authorization_details="authDetails", endpoint_url="url") - -token = Token( - access_token="token", - token_type="type", - expiry=datetime.now() + timedelta(hours=1), -) - - -class MockRefresher: - - def __init__(self, expected: str): - self._expected = expected - - def __call__(self, auth_details: str) -> Token: - assert self._expected == auth_details - return token - - -def throw_exception(): - raise Exception("Expected value to be cached") - - -def test_not_cached(): - data_plane = DataPlaneService() - res = data_plane.get_data_plane_details( - "method", - ["params"], - lambda: info, - lambda a: MockRefresher(info.authorization_details).__call__(a), - ) - assert res.endpoint_url == info.endpoint_url - assert res.token == token - - -def test_token_expired(): - expired = Token( - access_token="expired", - token_type="type", - expiry=datetime.now() + timedelta(hours=-1), - ) - data_plane = DataPlaneService() - data_plane._tokens["method/params"] = expired - res = data_plane.get_data_plane_details( - "method", - ["params"], - lambda: info, - lambda a: MockRefresher(info.authorization_details).__call__(a), - ) - assert res.endpoint_url == info.endpoint_url - assert res.token == token - - -def test_info_cached(): - data_plane = DataPlaneService() - data_plane._data_plane_info["method/params"] = info - res = data_plane.get_data_plane_details( - "method", - ["params"], - throw_exception, - lambda a: MockRefresher(info.authorization_details).__call__(a), - ) - assert res.endpoint_url == info.endpoint_url - assert res.token == token - - -def test_token_cached(): - data_plane = DataPlaneService() - data_plane._data_plane_info["method/params"] = info - data_plane._tokens["method/params"] = token - res = data_plane.get_data_plane_details("method", ["params"], throw_exception, throw_exception) - assert res.endpoint_url == info.endpoint_url - assert res.token == token