[Python] Add generated resource code (#2487)

## Changes
Add Python code generated from `databricks bundle schema`

## Tests
Add a few tests for special cases in the generated code
This commit is contained in:
Gleb Kanterov 2025-03-17 10:03:17 +01:00 committed by GitHub
parent 164b6d404d
commit 7c1604405f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
84 changed files with 6306 additions and 4 deletions

View File

@ -0,0 +1,38 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOr
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class Adlsgen2Info:
""""""
destination: VariableOr[str]
"""
abfss destination, e.g. `abfss://<container-name>@<storage-account-name>.dfs.core.windows.net/<directory-name>`.
"""
@classmethod
def from_dict(cls, value: "Adlsgen2InfoDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "Adlsgen2InfoDict":
return _transform_to_json_value(self) # type:ignore
class Adlsgen2InfoDict(TypedDict, total=False):
""""""
destination: VariableOr[str]
"""
abfss destination, e.g. `abfss://<container-name>@<storage-account-name>.dfs.core.windows.net/<directory-name>`.
"""
Adlsgen2InfoParam = Adlsgen2InfoDict | Adlsgen2Info

View File

@ -0,0 +1,52 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOrOptional
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class AutoScale:
""""""
max_workers: VariableOrOptional[int] = None
"""
The maximum number of workers to which the cluster can scale up when overloaded.
Note that `max_workers` must be strictly greater than `min_workers`.
"""
min_workers: VariableOrOptional[int] = None
"""
The minimum number of workers to which the cluster can scale down when underutilized.
It is also the initial number of workers the cluster will have after creation.
"""
@classmethod
def from_dict(cls, value: "AutoScaleDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "AutoScaleDict":
return _transform_to_json_value(self) # type:ignore
class AutoScaleDict(TypedDict, total=False):
""""""
max_workers: VariableOrOptional[int]
"""
The maximum number of workers to which the cluster can scale up when overloaded.
Note that `max_workers` must be strictly greater than `min_workers`.
"""
min_workers: VariableOrOptional[int]
"""
The minimum number of workers to which the cluster can scale down when underutilized.
It is also the initial number of workers the cluster will have after creation.
"""
AutoScaleParam = AutoScaleDict | AutoScale

View File

@ -0,0 +1,222 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.compute._models.aws_availability import (
AwsAvailability,
AwsAvailabilityParam,
)
from databricks.bundles.compute._models.ebs_volume_type import (
EbsVolumeType,
EbsVolumeTypeParam,
)
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOrOptional
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class AwsAttributes:
""""""
availability: VariableOrOptional[AwsAvailability] = None
ebs_volume_count: VariableOrOptional[int] = None
"""
The number of volumes launched for each instance. Users can choose up to 10 volumes.
This feature is only enabled for supported node types. Legacy node types cannot specify
custom EBS volumes.
For node types with no instance store, at least one EBS volume needs to be specified;
otherwise, cluster creation will fail.
These EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc.
Instance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc.
If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for
scratch storage because heterogenously sized scratch devices can lead to inefficient disk
utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance
store volumes.
Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir`
will be overridden.
"""
ebs_volume_iops: VariableOrOptional[int] = None
"""
If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used.
"""
ebs_volume_size: VariableOrOptional[int] = None
"""
The size of each EBS volume (in GiB) launched for each instance. For general purpose
SSD, this value must be within the range 100 - 4096. For throughput optimized HDD,
this value must be within the range 500 - 4096.
"""
ebs_volume_throughput: VariableOrOptional[int] = None
"""
If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used.
"""
ebs_volume_type: VariableOrOptional[EbsVolumeType] = None
first_on_demand: VariableOrOptional[int] = None
"""
The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.
If this value is greater than 0, the cluster driver node in particular will be placed on an
on-demand instance. If this value is greater than or equal to the current cluster size, all
nodes will be placed on on-demand instances. If this value is less than the current cluster
size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will
be placed on `availability` instances. Note that this value does not affect
cluster size and cannot currently be mutated over the lifetime of a cluster.
"""
instance_profile_arn: VariableOrOptional[str] = None
"""
Nodes for this cluster will only be placed on AWS instances with this instance profile. If
ommitted, nodes will be placed on instances without an IAM instance profile. The instance
profile must have previously been added to the Databricks environment by an account
administrator.
This feature may only be available to certain customer plans.
If this field is ommitted, we will pull in the default from the conf if it exists.
"""
spot_bid_price_percent: VariableOrOptional[int] = None
"""
The bid price for AWS spot instances, as a percentage of the corresponding instance type's
on-demand price.
For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot
instance, then the bid price is half of the price of
on-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice
the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100.
When spot instances are requested for this cluster, only spot instances whose bid price
percentage matches this field will be considered.
Note that, for safety, we enforce this field to be no more than 10000.
The default value and documentation here should be kept consistent with
CommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent.
"""
zone_id: VariableOrOptional[str] = None
"""
Identifier for the availability zone/datacenter in which the cluster resides.
This string will be of a form like "us-west-2a". The provided availability
zone must be in the same region as the Databricks deployment. For example, "us-west-2a"
is not a valid zone id if the Databricks deployment resides in the "us-east-1" region.
This is an optional field at cluster creation, and if not specified, a default zone will be used.
If the zone specified is "auto", will try to place cluster in a zone with high availability,
and will retry placement in a different AZ if there is not enough capacity.
The list of available zones as well as the default value can be found by using the
`List Zones` method.
"""
@classmethod
def from_dict(cls, value: "AwsAttributesDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "AwsAttributesDict":
return _transform_to_json_value(self) # type:ignore
class AwsAttributesDict(TypedDict, total=False):
""""""
availability: VariableOrOptional[AwsAvailabilityParam]
ebs_volume_count: VariableOrOptional[int]
"""
The number of volumes launched for each instance. Users can choose up to 10 volumes.
This feature is only enabled for supported node types. Legacy node types cannot specify
custom EBS volumes.
For node types with no instance store, at least one EBS volume needs to be specified;
otherwise, cluster creation will fail.
These EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc.
Instance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc.
If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for
scratch storage because heterogenously sized scratch devices can lead to inefficient disk
utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance
store volumes.
Please note that if EBS volumes are specified, then the Spark configuration `spark.local.dir`
will be overridden.
"""
ebs_volume_iops: VariableOrOptional[int]
"""
If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used.
"""
ebs_volume_size: VariableOrOptional[int]
"""
The size of each EBS volume (in GiB) launched for each instance. For general purpose
SSD, this value must be within the range 100 - 4096. For throughput optimized HDD,
this value must be within the range 500 - 4096.
"""
ebs_volume_throughput: VariableOrOptional[int]
"""
If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used.
"""
ebs_volume_type: VariableOrOptional[EbsVolumeTypeParam]
first_on_demand: VariableOrOptional[int]
"""
The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.
If this value is greater than 0, the cluster driver node in particular will be placed on an
on-demand instance. If this value is greater than or equal to the current cluster size, all
nodes will be placed on on-demand instances. If this value is less than the current cluster
size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will
be placed on `availability` instances. Note that this value does not affect
cluster size and cannot currently be mutated over the lifetime of a cluster.
"""
instance_profile_arn: VariableOrOptional[str]
"""
Nodes for this cluster will only be placed on AWS instances with this instance profile. If
ommitted, nodes will be placed on instances without an IAM instance profile. The instance
profile must have previously been added to the Databricks environment by an account
administrator.
This feature may only be available to certain customer plans.
If this field is ommitted, we will pull in the default from the conf if it exists.
"""
spot_bid_price_percent: VariableOrOptional[int]
"""
The bid price for AWS spot instances, as a percentage of the corresponding instance type's
on-demand price.
For example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot
instance, then the bid price is half of the price of
on-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice
the price of on-demand `r3.xlarge` instances. If not specified, the default value is 100.
When spot instances are requested for this cluster, only spot instances whose bid price
percentage matches this field will be considered.
Note that, for safety, we enforce this field to be no more than 10000.
The default value and documentation here should be kept consistent with
CommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent.
"""
zone_id: VariableOrOptional[str]
"""
Identifier for the availability zone/datacenter in which the cluster resides.
This string will be of a form like "us-west-2a". The provided availability
zone must be in the same region as the Databricks deployment. For example, "us-west-2a"
is not a valid zone id if the Databricks deployment resides in the "us-east-1" region.
This is an optional field at cluster creation, and if not specified, a default zone will be used.
If the zone specified is "auto", will try to place cluster in a zone with high availability,
and will retry placement in a different AZ if there is not enough capacity.
The list of available zones as well as the default value can be found by using the
`List Zones` method.
"""
AwsAttributesParam = AwsAttributesDict | AwsAttributes

View File

@ -0,0 +1,20 @@
from enum import Enum
from typing import Literal
class AwsAvailability(Enum):
"""
Availability type used for all subsequent nodes past the `first_on_demand` ones.
Note: If `first_on_demand` is zero, this availability type will be used for the entire cluster.
"""
SPOT = "SPOT"
ON_DEMAND = "ON_DEMAND"
SPOT_WITH_FALLBACK = "SPOT_WITH_FALLBACK"
AwsAvailabilityParam = (
Literal["SPOT", "ON_DEMAND", "SPOT_WITH_FALLBACK"] | AwsAvailability
)

View File

@ -0,0 +1,88 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.compute._models.azure_availability import (
AzureAvailability,
AzureAvailabilityParam,
)
from databricks.bundles.compute._models.log_analytics_info import (
LogAnalyticsInfo,
LogAnalyticsInfoParam,
)
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOrOptional
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class AzureAttributes:
""""""
availability: VariableOrOptional[AzureAvailability] = None
first_on_demand: VariableOrOptional[int] = None
"""
The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.
This value should be greater than 0, to make sure the cluster driver node is placed on an
on-demand instance. If this value is greater than or equal to the current cluster size, all
nodes will be placed on on-demand instances. If this value is less than the current cluster
size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will
be placed on `availability` instances. Note that this value does not affect
cluster size and cannot currently be mutated over the lifetime of a cluster.
"""
log_analytics_info: VariableOrOptional[LogAnalyticsInfo] = None
"""
Defines values necessary to configure and run Azure Log Analytics agent
"""
spot_bid_max_price: VariableOrOptional[float] = None
"""
The max bid price to be used for Azure spot instances.
The Max price for the bid cannot be higher than the on-demand price of the instance.
If not specified, the default value is -1, which specifies that the instance cannot be evicted
on the basis of price, and only on the basis of availability. Further, the value should > 0 or -1.
"""
@classmethod
def from_dict(cls, value: "AzureAttributesDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "AzureAttributesDict":
return _transform_to_json_value(self) # type:ignore
class AzureAttributesDict(TypedDict, total=False):
""""""
availability: VariableOrOptional[AzureAvailabilityParam]
first_on_demand: VariableOrOptional[int]
"""
The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.
This value should be greater than 0, to make sure the cluster driver node is placed on an
on-demand instance. If this value is greater than or equal to the current cluster size, all
nodes will be placed on on-demand instances. If this value is less than the current cluster
size, `first_on_demand` nodes will be placed on on-demand instances and the remainder will
be placed on `availability` instances. Note that this value does not affect
cluster size and cannot currently be mutated over the lifetime of a cluster.
"""
log_analytics_info: VariableOrOptional[LogAnalyticsInfoParam]
"""
Defines values necessary to configure and run Azure Log Analytics agent
"""
spot_bid_max_price: VariableOrOptional[float]
"""
The max bid price to be used for Azure spot instances.
The Max price for the bid cannot be higher than the on-demand price of the instance.
If not specified, the default value is -1, which specifies that the instance cannot be evicted
on the basis of price, and only on the basis of availability. Further, the value should > 0 or -1.
"""
AzureAttributesParam = AzureAttributesDict | AzureAttributes

View File

@ -0,0 +1,20 @@
from enum import Enum
from typing import Literal
class AzureAvailability(Enum):
"""
Availability type used for all subsequent nodes past the `first_on_demand` ones.
Note: If `first_on_demand` is zero (which only happens on pool clusters), this availability
type will be used for the entire cluster.
"""
SPOT_AZURE = "SPOT_AZURE"
ON_DEMAND_AZURE = "ON_DEMAND_AZURE"
SPOT_WITH_FALLBACK_AZURE = "SPOT_WITH_FALLBACK_AZURE"
AzureAvailabilityParam = (
Literal["SPOT_AZURE", "ON_DEMAND_AZURE", "SPOT_WITH_FALLBACK_AZURE"]
| AzureAvailability
)

View File

@ -0,0 +1,48 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOrOptional
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class ClientsTypes:
""""""
jobs: VariableOrOptional[bool] = None
"""
With jobs set, the cluster can be used for jobs
"""
notebooks: VariableOrOptional[bool] = None
"""
With notebooks set, this cluster can be used for notebooks
"""
@classmethod
def from_dict(cls, value: "ClientsTypesDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "ClientsTypesDict":
return _transform_to_json_value(self) # type:ignore
class ClientsTypesDict(TypedDict, total=False):
""""""
jobs: VariableOrOptional[bool]
"""
With jobs set, the cluster can be used for jobs
"""
notebooks: VariableOrOptional[bool]
"""
With notebooks set, this cluster can be used for notebooks
"""
ClientsTypesParam = ClientsTypesDict | ClientsTypes

View File

@ -0,0 +1,64 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.compute._models.dbfs_storage_info import (
DbfsStorageInfo,
DbfsStorageInfoParam,
)
from databricks.bundles.compute._models.s3_storage_info import (
S3StorageInfo,
S3StorageInfoParam,
)
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOrOptional
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class ClusterLogConf:
""""""
dbfs: VariableOrOptional[DbfsStorageInfo] = None
"""
destination needs to be provided. e.g.
`{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }`
"""
s3: VariableOrOptional[S3StorageInfo] = None
"""
destination and either the region or endpoint need to be provided. e.g.
`{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }`
Cluster iam role is used to access s3, please make sure the cluster iam role in
`instance_profile_arn` has permission to write data to the s3 destination.
"""
@classmethod
def from_dict(cls, value: "ClusterLogConfDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "ClusterLogConfDict":
return _transform_to_json_value(self) # type:ignore
class ClusterLogConfDict(TypedDict, total=False):
""""""
dbfs: VariableOrOptional[DbfsStorageInfoParam]
"""
destination needs to be provided. e.g.
`{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }`
"""
s3: VariableOrOptional[S3StorageInfoParam]
"""
destination and either the region or endpoint need to be provided. e.g.
`{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }`
Cluster iam role is used to access s3, please make sure the cluster iam role in
`instance_profile_arn` has permission to write data to the s3 destination.
"""
ClusterLogConfParam = ClusterLogConfDict | ClusterLogConf

View File

@ -0,0 +1,458 @@
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.compute._models.auto_scale import (
AutoScale,
AutoScaleParam,
)
from databricks.bundles.compute._models.aws_attributes import (
AwsAttributes,
AwsAttributesParam,
)
from databricks.bundles.compute._models.azure_attributes import (
AzureAttributes,
AzureAttributesParam,
)
from databricks.bundles.compute._models.cluster_log_conf import (
ClusterLogConf,
ClusterLogConfParam,
)
from databricks.bundles.compute._models.data_security_mode import (
DataSecurityMode,
DataSecurityModeParam,
)
from databricks.bundles.compute._models.docker_image import (
DockerImage,
DockerImageParam,
)
from databricks.bundles.compute._models.gcp_attributes import (
GcpAttributes,
GcpAttributesParam,
)
from databricks.bundles.compute._models.init_script_info import (
InitScriptInfo,
InitScriptInfoParam,
)
from databricks.bundles.compute._models.runtime_engine import (
RuntimeEngine,
RuntimeEngineParam,
)
from databricks.bundles.compute._models.workload_type import (
WorkloadType,
WorkloadTypeParam,
)
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import (
VariableOrDict,
VariableOrList,
VariableOrOptional,
)
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class ClusterSpec:
""""""
apply_policy_default_values: VariableOrOptional[bool] = None
"""
When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied.
"""
autoscale: VariableOrOptional[AutoScale] = None
"""
Parameters needed in order to automatically scale clusters up and down based on load.
Note: autoscaling works best with DB runtime versions 3.0 or later.
"""
autotermination_minutes: VariableOrOptional[int] = None
"""
Automatically terminates the cluster after it is inactive for this time in minutes. If not set,
this cluster will not be automatically terminated. If specified, the threshold must be between
10 and 10000 minutes.
Users can also set this value to 0 to explicitly disable automatic termination.
"""
aws_attributes: VariableOrOptional[AwsAttributes] = None
"""
Attributes related to clusters running on Amazon Web Services.
If not specified at cluster creation, a set of default values will be used.
"""
azure_attributes: VariableOrOptional[AzureAttributes] = None
"""
Attributes related to clusters running on Microsoft Azure.
If not specified at cluster creation, a set of default values will be used.
"""
cluster_log_conf: VariableOrOptional[ClusterLogConf] = None
"""
The configuration for delivering spark logs to a long-term storage destination.
Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified
for one cluster. If the conf is given, the logs will be delivered to the destination every
`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while
the destination of executor logs is `$destination/$clusterId/executor`.
"""
cluster_name: VariableOrOptional[str] = None
"""
Cluster name requested by the user. This doesn't have to be unique.
If not specified at creation, the cluster name will be an empty string.
"""
custom_tags: VariableOrDict[str] = field(default_factory=dict)
"""
Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS
instances and EBS volumes) with these tags in addition to `default_tags`. Notes:
- Currently, Databricks allows at most 45 custom tags
- Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags
"""
data_security_mode: VariableOrOptional[DataSecurityMode] = None
docker_image: VariableOrOptional[DockerImage] = None
driver_instance_pool_id: VariableOrOptional[str] = None
"""
The optional ID of the instance pool for the driver of the cluster belongs.
The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not
assigned.
"""
driver_node_type_id: VariableOrOptional[str] = None
"""
The node type of the Spark driver. Note that this field is optional;
if unset, the driver node type will be set as the same value
as `node_type_id` defined above.
"""
enable_elastic_disk: VariableOrOptional[bool] = None
"""
Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk
space when its Spark workers are running low on disk space. This feature requires specific AWS
permissions to function correctly - refer to the User Guide for more details.
"""
enable_local_disk_encryption: VariableOrOptional[bool] = None
"""
Whether to enable LUKS on cluster VMs' local disks
"""
gcp_attributes: VariableOrOptional[GcpAttributes] = None
"""
Attributes related to clusters running on Google Cloud Platform.
If not specified at cluster creation, a set of default values will be used.
"""
init_scripts: VariableOrList[InitScriptInfo] = field(default_factory=list)
"""
The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `<destination>/<cluster-ID>/init_scripts`.
"""
instance_pool_id: VariableOrOptional[str] = None
"""
The optional ID of the instance pool to which the cluster belongs.
"""
is_single_node: VariableOrOptional[bool] = None
"""
This field can only be used with `kind`.
When set to true, Databricks will automatically set single node related `custom_tags`, `spark_conf`, and `num_workers`
"""
node_type_id: VariableOrOptional[str] = None
"""
This field encodes, through a single value, the resources available to each of
the Spark nodes in this cluster. For example, the Spark nodes can be provisioned
and optimized for memory or compute intensive workloads. A list of available node
types can be retrieved by using the :method:clusters/listNodeTypes API call.
"""
num_workers: VariableOrOptional[int] = None
"""
Number of worker nodes that this cluster should have. A cluster has one Spark Driver
and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes.
Note: When reading the properties of a cluster, this field reflects the desired number
of workers rather than the actual current number of workers. For instance, if a cluster
is resized from 5 to 10 workers, this field will immediately be updated to reflect
the target size of 10 workers, whereas the workers listed in `spark_info` will gradually
increase from 5 to 10 as the new nodes are provisioned.
"""
policy_id: VariableOrOptional[str] = None
"""
The ID of the cluster policy used to create the cluster if applicable.
"""
runtime_engine: VariableOrOptional[RuntimeEngine] = None
single_user_name: VariableOrOptional[str] = None
"""
Single user name if data_security_mode is `SINGLE_USER`
"""
spark_conf: VariableOrDict[str] = field(default_factory=dict)
"""
An object containing a set of optional, user-specified Spark configuration key-value pairs.
Users can also pass in a string of extra JVM options to the driver and the executors via
`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively.
"""
spark_env_vars: VariableOrDict[str] = field(default_factory=dict)
"""
An object containing a set of optional, user-specified environment variable key-value pairs.
Please note that key-value pair of the form (X,Y) will be exported as is (i.e.,
`export X='Y'`) while launching the driver and workers.
In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending
them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all
default databricks managed environmental variables are included as well.
Example Spark environment variables:
`{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or
`{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}`
"""
spark_version: VariableOrOptional[str] = None
"""
The Spark version of the cluster, e.g. `3.3.x-scala2.11`.
A list of available Spark versions can be retrieved by using
the :method:clusters/sparkVersions API call.
"""
ssh_public_keys: VariableOrList[str] = field(default_factory=list)
"""
SSH public key contents that will be added to each Spark node in this cluster. The
corresponding private keys can be used to login with the user name `ubuntu` on port `2200`.
Up to 10 keys can be specified.
"""
use_ml_runtime: VariableOrOptional[bool] = None
"""
This field can only be used with `kind`.
`effective_spark_version` is determined by `spark_version` (DBR release), this field `use_ml_runtime`, and whether `node_type_id` is gpu node or not.
"""
workload_type: VariableOrOptional[WorkloadType] = None
@classmethod
def from_dict(cls, value: "ClusterSpecDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "ClusterSpecDict":
return _transform_to_json_value(self) # type:ignore
class ClusterSpecDict(TypedDict, total=False):
""""""
apply_policy_default_values: VariableOrOptional[bool]
"""
When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied.
"""
autoscale: VariableOrOptional[AutoScaleParam]
"""
Parameters needed in order to automatically scale clusters up and down based on load.
Note: autoscaling works best with DB runtime versions 3.0 or later.
"""
autotermination_minutes: VariableOrOptional[int]
"""
Automatically terminates the cluster after it is inactive for this time in minutes. If not set,
this cluster will not be automatically terminated. If specified, the threshold must be between
10 and 10000 minutes.
Users can also set this value to 0 to explicitly disable automatic termination.
"""
aws_attributes: VariableOrOptional[AwsAttributesParam]
"""
Attributes related to clusters running on Amazon Web Services.
If not specified at cluster creation, a set of default values will be used.
"""
azure_attributes: VariableOrOptional[AzureAttributesParam]
"""
Attributes related to clusters running on Microsoft Azure.
If not specified at cluster creation, a set of default values will be used.
"""
cluster_log_conf: VariableOrOptional[ClusterLogConfParam]
"""
The configuration for delivering spark logs to a long-term storage destination.
Two kinds of destinations (dbfs and s3) are supported. Only one destination can be specified
for one cluster. If the conf is given, the logs will be delivered to the destination every
`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while
the destination of executor logs is `$destination/$clusterId/executor`.
"""
cluster_name: VariableOrOptional[str]
"""
Cluster name requested by the user. This doesn't have to be unique.
If not specified at creation, the cluster name will be an empty string.
"""
custom_tags: VariableOrDict[str]
"""
Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS
instances and EBS volumes) with these tags in addition to `default_tags`. Notes:
- Currently, Databricks allows at most 45 custom tags
- Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags
"""
data_security_mode: VariableOrOptional[DataSecurityModeParam]
docker_image: VariableOrOptional[DockerImageParam]
driver_instance_pool_id: VariableOrOptional[str]
"""
The optional ID of the instance pool for the driver of the cluster belongs.
The pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not
assigned.
"""
driver_node_type_id: VariableOrOptional[str]
"""
The node type of the Spark driver. Note that this field is optional;
if unset, the driver node type will be set as the same value
as `node_type_id` defined above.
"""
enable_elastic_disk: VariableOrOptional[bool]
"""
Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk
space when its Spark workers are running low on disk space. This feature requires specific AWS
permissions to function correctly - refer to the User Guide for more details.
"""
enable_local_disk_encryption: VariableOrOptional[bool]
"""
Whether to enable LUKS on cluster VMs' local disks
"""
gcp_attributes: VariableOrOptional[GcpAttributesParam]
"""
Attributes related to clusters running on Google Cloud Platform.
If not specified at cluster creation, a set of default values will be used.
"""
init_scripts: VariableOrList[InitScriptInfoParam]
"""
The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `<destination>/<cluster-ID>/init_scripts`.
"""
instance_pool_id: VariableOrOptional[str]
"""
The optional ID of the instance pool to which the cluster belongs.
"""
is_single_node: VariableOrOptional[bool]
"""
This field can only be used with `kind`.
When set to true, Databricks will automatically set single node related `custom_tags`, `spark_conf`, and `num_workers`
"""
node_type_id: VariableOrOptional[str]
"""
This field encodes, through a single value, the resources available to each of
the Spark nodes in this cluster. For example, the Spark nodes can be provisioned
and optimized for memory or compute intensive workloads. A list of available node
types can be retrieved by using the :method:clusters/listNodeTypes API call.
"""
num_workers: VariableOrOptional[int]
"""
Number of worker nodes that this cluster should have. A cluster has one Spark Driver
and `num_workers` Executors for a total of `num_workers` + 1 Spark nodes.
Note: When reading the properties of a cluster, this field reflects the desired number
of workers rather than the actual current number of workers. For instance, if a cluster
is resized from 5 to 10 workers, this field will immediately be updated to reflect
the target size of 10 workers, whereas the workers listed in `spark_info` will gradually
increase from 5 to 10 as the new nodes are provisioned.
"""
policy_id: VariableOrOptional[str]
"""
The ID of the cluster policy used to create the cluster if applicable.
"""
runtime_engine: VariableOrOptional[RuntimeEngineParam]
single_user_name: VariableOrOptional[str]
"""
Single user name if data_security_mode is `SINGLE_USER`
"""
spark_conf: VariableOrDict[str]
"""
An object containing a set of optional, user-specified Spark configuration key-value pairs.
Users can also pass in a string of extra JVM options to the driver and the executors via
`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively.
"""
spark_env_vars: VariableOrDict[str]
"""
An object containing a set of optional, user-specified environment variable key-value pairs.
Please note that key-value pair of the form (X,Y) will be exported as is (i.e.,
`export X='Y'`) while launching the driver and workers.
In order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending
them to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all
default databricks managed environmental variables are included as well.
Example Spark environment variables:
`{"SPARK_WORKER_MEMORY": "28000m", "SPARK_LOCAL_DIRS": "/local_disk0"}` or
`{"SPARK_DAEMON_JAVA_OPTS": "$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true"}`
"""
spark_version: VariableOrOptional[str]
"""
The Spark version of the cluster, e.g. `3.3.x-scala2.11`.
A list of available Spark versions can be retrieved by using
the :method:clusters/sparkVersions API call.
"""
ssh_public_keys: VariableOrList[str]
"""
SSH public key contents that will be added to each Spark node in this cluster. The
corresponding private keys can be used to login with the user name `ubuntu` on port `2200`.
Up to 10 keys can be specified.
"""
use_ml_runtime: VariableOrOptional[bool]
"""
This field can only be used with `kind`.
`effective_spark_version` is determined by `spark_version` (DBR release), this field `use_ml_runtime`, and whether `node_type_id` is gpu node or not.
"""
workload_type: VariableOrOptional[WorkloadTypeParam]
ClusterSpecParam = ClusterSpecDict | ClusterSpec

View File

@ -0,0 +1,56 @@
from enum import Enum
from typing import Literal
class DataSecurityMode(Enum):
"""
Data security mode decides what data governance model to use when accessing data
from a cluster.
The following modes can only be used with `kind`.
* `DATA_SECURITY_MODE_AUTO`: Databricks will choose the most appropriate access mode depending on your compute configuration.
* `DATA_SECURITY_MODE_STANDARD`: Alias for `USER_ISOLATION`.
* `DATA_SECURITY_MODE_DEDICATED`: Alias for `SINGLE_USER`.
The following modes can be used regardless of `kind`.
* `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are not available in this mode.
* `SINGLE_USER`: A secure cluster that can only be exclusively used by a single user specified in `single_user_name`. Most programming languages, cluster features and data governance features are available in this mode.
* `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages and cluster features might be limited.
The following modes are deprecated starting with Databricks Runtime 15.0 and
will be removed for future Databricks Runtime versions:
* `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters.
* `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters.
* `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters.
* `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesnt have UC nor passthrough enabled.
"""
DATA_SECURITY_MODE_AUTO = "DATA_SECURITY_MODE_AUTO"
DATA_SECURITY_MODE_STANDARD = "DATA_SECURITY_MODE_STANDARD"
DATA_SECURITY_MODE_DEDICATED = "DATA_SECURITY_MODE_DEDICATED"
NONE = "NONE"
SINGLE_USER = "SINGLE_USER"
USER_ISOLATION = "USER_ISOLATION"
LEGACY_TABLE_ACL = "LEGACY_TABLE_ACL"
LEGACY_PASSTHROUGH = "LEGACY_PASSTHROUGH"
LEGACY_SINGLE_USER = "LEGACY_SINGLE_USER"
LEGACY_SINGLE_USER_STANDARD = "LEGACY_SINGLE_USER_STANDARD"
DataSecurityModeParam = (
Literal[
"DATA_SECURITY_MODE_AUTO",
"DATA_SECURITY_MODE_STANDARD",
"DATA_SECURITY_MODE_DEDICATED",
"NONE",
"SINGLE_USER",
"USER_ISOLATION",
"LEGACY_TABLE_ACL",
"LEGACY_PASSTHROUGH",
"LEGACY_SINGLE_USER",
"LEGACY_SINGLE_USER_STANDARD",
]
| DataSecurityMode
)

View File

@ -0,0 +1,38 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOr
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class DbfsStorageInfo:
""""""
destination: VariableOr[str]
"""
dbfs destination, e.g. `dbfs:/my/path`
"""
@classmethod
def from_dict(cls, value: "DbfsStorageInfoDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "DbfsStorageInfoDict":
return _transform_to_json_value(self) # type:ignore
class DbfsStorageInfoDict(TypedDict, total=False):
""""""
destination: VariableOr[str]
"""
dbfs destination, e.g. `dbfs:/my/path`
"""
DbfsStorageInfoParam = DbfsStorageInfoDict | DbfsStorageInfo

View File

@ -0,0 +1,48 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOrOptional
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class DockerBasicAuth:
""""""
password: VariableOrOptional[str] = None
"""
Password of the user
"""
username: VariableOrOptional[str] = None
"""
Name of the user
"""
@classmethod
def from_dict(cls, value: "DockerBasicAuthDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "DockerBasicAuthDict":
return _transform_to_json_value(self) # type:ignore
class DockerBasicAuthDict(TypedDict, total=False):
""""""
password: VariableOrOptional[str]
"""
Password of the user
"""
username: VariableOrOptional[str]
"""
Name of the user
"""
DockerBasicAuthParam = DockerBasicAuthDict | DockerBasicAuth

View File

@ -0,0 +1,46 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.compute._models.docker_basic_auth import (
DockerBasicAuth,
DockerBasicAuthParam,
)
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOrOptional
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class DockerImage:
""""""
basic_auth: VariableOrOptional[DockerBasicAuth] = None
url: VariableOrOptional[str] = None
"""
URL of the docker image.
"""
@classmethod
def from_dict(cls, value: "DockerImageDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "DockerImageDict":
return _transform_to_json_value(self) # type:ignore
class DockerImageDict(TypedDict, total=False):
""""""
basic_auth: VariableOrOptional[DockerBasicAuthParam]
url: VariableOrOptional[str]
"""
URL of the docker image.
"""
DockerImageParam = DockerImageDict | DockerImage

View File

@ -0,0 +1,16 @@
from enum import Enum
from typing import Literal
class EbsVolumeType(Enum):
"""
The type of EBS volumes that will be launched with this cluster.
"""
GENERAL_PURPOSE_SSD = "GENERAL_PURPOSE_SSD"
THROUGHPUT_OPTIMIZED_HDD = "THROUGHPUT_OPTIMIZED_HDD"
EbsVolumeTypeParam = (
Literal["GENERAL_PURPOSE_SSD", "THROUGHPUT_OPTIMIZED_HDD"] | EbsVolumeType
)

View File

@ -0,0 +1,63 @@
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOr, VariableOrList
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class Environment:
"""
The environment entity used to preserve serverless environment side panel and jobs' environment for non-notebook task.
In this minimal environment spec, only pip dependencies are supported.
"""
client: VariableOr[str]
"""
Client version used by the environment
The client is the user-facing environment of the runtime.
Each client comes with a specific set of pre-installed libraries.
The version is a string, consisting of the major client version.
"""
dependencies: VariableOrList[str] = field(default_factory=list)
"""
List of pip dependencies, as supported by the version of pip in this environment.
Each dependency is a pip requirement file line https://pip.pypa.io/en/stable/reference/requirements-file-format/
Allowed dependency could be <requirement specifier>, <archive url/path>, <local project path>(WSFS or Volumes in Databricks), <vcs project url>
E.g. dependencies: ["foo==0.0.1", "-r /Workspace/test/requirements.txt"]
"""
@classmethod
def from_dict(cls, value: "EnvironmentDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "EnvironmentDict":
return _transform_to_json_value(self) # type:ignore
class EnvironmentDict(TypedDict, total=False):
""""""
client: VariableOr[str]
"""
Client version used by the environment
The client is the user-facing environment of the runtime.
Each client comes with a specific set of pre-installed libraries.
The version is a string, consisting of the major client version.
"""
dependencies: VariableOrList[str]
"""
List of pip dependencies, as supported by the version of pip in this environment.
Each dependency is a pip requirement file line https://pip.pypa.io/en/stable/reference/requirements-file-format/
Allowed dependency could be <requirement specifier>, <archive url/path>, <local project path>(WSFS or Volumes in Databricks), <vcs project url>
E.g. dependencies: ["foo==0.0.1", "-r /Workspace/test/requirements.txt"]
"""
EnvironmentParam = EnvironmentDict | Environment

View File

@ -0,0 +1,102 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.compute._models.gcp_availability import (
GcpAvailability,
GcpAvailabilityParam,
)
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOrOptional
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class GcpAttributes:
""""""
availability: VariableOrOptional[GcpAvailability] = None
boot_disk_size: VariableOrOptional[int] = None
"""
boot disk size in GB
"""
google_service_account: VariableOrOptional[str] = None
"""
If provided, the cluster will impersonate the google service account when accessing
gcloud services (like GCS). The google service account
must have previously been added to the Databricks environment by an account
administrator.
"""
local_ssd_count: VariableOrOptional[int] = None
"""
If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type.
"""
use_preemptible_executors: VariableOrOptional[bool] = None
"""
This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default).
Note: Soon to be deprecated, use the availability field instead.
"""
zone_id: VariableOrOptional[str] = None
"""
Identifier for the availability zone in which the cluster resides.
This can be one of the following:
- "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default]
- "AUTO" => Databricks picks an availability zone to schedule the cluster on.
- A GCP availability zone => Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones.
"""
@classmethod
def from_dict(cls, value: "GcpAttributesDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "GcpAttributesDict":
return _transform_to_json_value(self) # type:ignore
class GcpAttributesDict(TypedDict, total=False):
""""""
availability: VariableOrOptional[GcpAvailabilityParam]
boot_disk_size: VariableOrOptional[int]
"""
boot disk size in GB
"""
google_service_account: VariableOrOptional[str]
"""
If provided, the cluster will impersonate the google service account when accessing
gcloud services (like GCS). The google service account
must have previously been added to the Databricks environment by an account
administrator.
"""
local_ssd_count: VariableOrOptional[int]
"""
If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type.
"""
use_preemptible_executors: VariableOrOptional[bool]
"""
This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default).
Note: Soon to be deprecated, use the availability field instead.
"""
zone_id: VariableOrOptional[str]
"""
Identifier for the availability zone in which the cluster resides.
This can be one of the following:
- "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default]
- "AUTO" => Databricks picks an availability zone to schedule the cluster on.
- A GCP availability zone => Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones.
"""
GcpAttributesParam = GcpAttributesDict | GcpAttributes

View File

@ -0,0 +1,19 @@
from enum import Enum
from typing import Literal
class GcpAvailability(Enum):
"""
This field determines whether the instance pool will contain preemptible
VMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable.
"""
PREEMPTIBLE_GCP = "PREEMPTIBLE_GCP"
ON_DEMAND_GCP = "ON_DEMAND_GCP"
PREEMPTIBLE_WITH_FALLBACK_GCP = "PREEMPTIBLE_WITH_FALLBACK_GCP"
GcpAvailabilityParam = (
Literal["PREEMPTIBLE_GCP", "ON_DEMAND_GCP", "PREEMPTIBLE_WITH_FALLBACK_GCP"]
| GcpAvailability
)

View File

@ -0,0 +1,38 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOr
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class GcsStorageInfo:
""""""
destination: VariableOr[str]
"""
GCS destination/URI, e.g. `gs://my-bucket/some-prefix`
"""
@classmethod
def from_dict(cls, value: "GcsStorageInfoDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "GcsStorageInfoDict":
return _transform_to_json_value(self) # type:ignore
class GcsStorageInfoDict(TypedDict, total=False):
""""""
destination: VariableOr[str]
"""
GCS destination/URI, e.g. `gs://my-bucket/some-prefix`
"""
GcsStorageInfoParam = GcsStorageInfoDict | GcsStorageInfo

View File

@ -0,0 +1,144 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.compute._models.adlsgen2_info import (
Adlsgen2Info,
Adlsgen2InfoParam,
)
from databricks.bundles.compute._models.dbfs_storage_info import (
DbfsStorageInfo,
DbfsStorageInfoParam,
)
from databricks.bundles.compute._models.gcs_storage_info import (
GcsStorageInfo,
GcsStorageInfoParam,
)
from databricks.bundles.compute._models.local_file_info import (
LocalFileInfo,
LocalFileInfoParam,
)
from databricks.bundles.compute._models.s3_storage_info import (
S3StorageInfo,
S3StorageInfoParam,
)
from databricks.bundles.compute._models.volumes_storage_info import (
VolumesStorageInfo,
VolumesStorageInfoParam,
)
from databricks.bundles.compute._models.workspace_storage_info import (
WorkspaceStorageInfo,
WorkspaceStorageInfoParam,
)
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOrOptional
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class InitScriptInfo:
""""""
abfss: VariableOrOptional[Adlsgen2Info] = None
"""
destination needs to be provided. e.g.
`{ "abfss" : { "destination" : "abfss://<container-name>@<storage-account-name>.dfs.core.windows.net/<directory-name>" } }`
"""
dbfs: VariableOrOptional[DbfsStorageInfo] = None
"""
destination needs to be provided. e.g.
`{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }`
"""
file: VariableOrOptional[LocalFileInfo] = None
"""
destination needs to be provided. e.g.
`{ "file" : { "destination" : "file:/my/local/file.sh" } }`
"""
gcs: VariableOrOptional[GcsStorageInfo] = None
"""
destination needs to be provided. e.g.
`{ "gcs": { "destination": "gs://my-bucket/file.sh" } }`
"""
s3: VariableOrOptional[S3StorageInfo] = None
"""
destination and either the region or endpoint need to be provided. e.g.
`{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }`
Cluster iam role is used to access s3, please make sure the cluster iam role in
`instance_profile_arn` has permission to write data to the s3 destination.
"""
volumes: VariableOrOptional[VolumesStorageInfo] = None
"""
destination needs to be provided. e.g.
`{ "volumes" : { "destination" : "/Volumes/my-init.sh" } }`
"""
workspace: VariableOrOptional[WorkspaceStorageInfo] = None
"""
destination needs to be provided. e.g.
`{ "workspace" : { "destination" : "/Users/user1@databricks.com/my-init.sh" } }`
"""
@classmethod
def from_dict(cls, value: "InitScriptInfoDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "InitScriptInfoDict":
return _transform_to_json_value(self) # type:ignore
class InitScriptInfoDict(TypedDict, total=False):
""""""
abfss: VariableOrOptional[Adlsgen2InfoParam]
"""
destination needs to be provided. e.g.
`{ "abfss" : { "destination" : "abfss://<container-name>@<storage-account-name>.dfs.core.windows.net/<directory-name>" } }
"""
dbfs: VariableOrOptional[DbfsStorageInfoParam]
"""
destination needs to be provided. e.g.
`{ "dbfs" : { "destination" : "dbfs:/home/cluster_log" } }`
"""
file: VariableOrOptional[LocalFileInfoParam]
"""
destination needs to be provided. e.g.
`{ "file" : { "destination" : "file:/my/local/file.sh" } }`
"""
gcs: VariableOrOptional[GcsStorageInfoParam]
"""
destination needs to be provided. e.g.
`{ "gcs": { "destination": "gs://my-bucket/file.sh" } }`
"""
s3: VariableOrOptional[S3StorageInfoParam]
"""
destination and either the region or endpoint need to be provided. e.g.
`{ "s3": { "destination" : "s3://cluster_log_bucket/prefix", "region" : "us-west-2" } }`
Cluster iam role is used to access s3, please make sure the cluster iam role in
`instance_profile_arn` has permission to write data to the s3 destination.
"""
volumes: VariableOrOptional[VolumesStorageInfoParam]
"""
destination needs to be provided. e.g.
`{ "volumes" : { "destination" : "/Volumes/my-init.sh" } }`
"""
workspace: VariableOrOptional[WorkspaceStorageInfoParam]
"""
destination needs to be provided. e.g.
`{ "workspace" : { "destination" : "/Users/user1@databricks.com/my-init.sh" } }`
"""
InitScriptInfoParam = InitScriptInfoDict | InitScriptInfo

View File

@ -0,0 +1,132 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.compute._models.maven_library import (
MavenLibrary,
MavenLibraryParam,
)
from databricks.bundles.compute._models.python_py_pi_library import (
PythonPyPiLibrary,
PythonPyPiLibraryParam,
)
from databricks.bundles.compute._models.r_cran_library import (
RCranLibrary,
RCranLibraryParam,
)
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOrOptional
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class Library:
""""""
cran: VariableOrOptional[RCranLibrary] = None
"""
Specification of a CRAN library to be installed as part of the library
"""
egg: VariableOrOptional[str] = None
"""
Deprecated. URI of the egg library to install. Installing Python egg files is deprecated and is not supported in Databricks Runtime 14.0 and above.
"""
jar: VariableOrOptional[str] = None
"""
URI of the JAR library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs.
For example: `{ "jar": "/Workspace/path/to/library.jar" }`, `{ "jar" : "/Volumes/path/to/library.jar" }` or
`{ "jar": "s3://my-bucket/library.jar" }`.
If S3 is used, please make sure the cluster has read access on the library. You may need to
launch the cluster with an IAM role to access the S3 URI.
"""
maven: VariableOrOptional[MavenLibrary] = None
"""
Specification of a maven library to be installed. For example:
`{ "coordinates": "org.jsoup:jsoup:1.7.2" }`
"""
pypi: VariableOrOptional[PythonPyPiLibrary] = None
"""
Specification of a PyPi library to be installed. For example:
`{ "package": "simplejson" }`
"""
requirements: VariableOrOptional[str] = None
"""
URI of the requirements.txt file to install. Only Workspace paths and Unity Catalog Volumes paths are supported.
For example: `{ "requirements": "/Workspace/path/to/requirements.txt" }` or `{ "requirements" : "/Volumes/path/to/requirements.txt" }`
"""
whl: VariableOrOptional[str] = None
"""
URI of the wheel library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs.
For example: `{ "whl": "/Workspace/path/to/library.whl" }`, `{ "whl" : "/Volumes/path/to/library.whl" }` or
`{ "whl": "s3://my-bucket/library.whl" }`.
If S3 is used, please make sure the cluster has read access on the library. You may need to
launch the cluster with an IAM role to access the S3 URI.
"""
@classmethod
def from_dict(cls, value: "LibraryDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "LibraryDict":
return _transform_to_json_value(self) # type:ignore
class LibraryDict(TypedDict, total=False):
""""""
cran: VariableOrOptional[RCranLibraryParam]
"""
Specification of a CRAN library to be installed as part of the library
"""
egg: VariableOrOptional[str]
"""
Deprecated. URI of the egg library to install. Installing Python egg files is deprecated and is not supported in Databricks Runtime 14.0 and above.
"""
jar: VariableOrOptional[str]
"""
URI of the JAR library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs.
For example: `{ "jar": "/Workspace/path/to/library.jar" }`, `{ "jar" : "/Volumes/path/to/library.jar" }` or
`{ "jar": "s3://my-bucket/library.jar" }`.
If S3 is used, please make sure the cluster has read access on the library. You may need to
launch the cluster with an IAM role to access the S3 URI.
"""
maven: VariableOrOptional[MavenLibraryParam]
"""
Specification of a maven library to be installed. For example:
`{ "coordinates": "org.jsoup:jsoup:1.7.2" }`
"""
pypi: VariableOrOptional[PythonPyPiLibraryParam]
"""
Specification of a PyPi library to be installed. For example:
`{ "package": "simplejson" }`
"""
requirements: VariableOrOptional[str]
"""
URI of the requirements.txt file to install. Only Workspace paths and Unity Catalog Volumes paths are supported.
For example: `{ "requirements": "/Workspace/path/to/requirements.txt" }` or `{ "requirements" : "/Volumes/path/to/requirements.txt" }`
"""
whl: VariableOrOptional[str]
"""
URI of the wheel library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs.
For example: `{ "whl": "/Workspace/path/to/library.whl" }`, `{ "whl" : "/Volumes/path/to/library.whl" }` or
`{ "whl": "s3://my-bucket/library.whl" }`.
If S3 is used, please make sure the cluster has read access on the library. You may need to
launch the cluster with an IAM role to access the S3 URI.
"""
LibraryParam = LibraryDict | Library

View File

@ -0,0 +1,38 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOr
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class LocalFileInfo:
""""""
destination: VariableOr[str]
"""
local file destination, e.g. `file:/my/local/file.sh`
"""
@classmethod
def from_dict(cls, value: "LocalFileInfoDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "LocalFileInfoDict":
return _transform_to_json_value(self) # type:ignore
class LocalFileInfoDict(TypedDict, total=False):
""""""
destination: VariableOr[str]
"""
local file destination, e.g. `file:/my/local/file.sh`
"""
LocalFileInfoParam = LocalFileInfoDict | LocalFileInfo

View File

@ -0,0 +1,48 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOrOptional
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class LogAnalyticsInfo:
""""""
log_analytics_primary_key: VariableOrOptional[str] = None
"""
<needs content added>
"""
log_analytics_workspace_id: VariableOrOptional[str] = None
"""
<needs content added>
"""
@classmethod
def from_dict(cls, value: "LogAnalyticsInfoDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "LogAnalyticsInfoDict":
return _transform_to_json_value(self) # type:ignore
class LogAnalyticsInfoDict(TypedDict, total=False):
""""""
log_analytics_primary_key: VariableOrOptional[str]
"""
<needs content added>
"""
log_analytics_workspace_id: VariableOrOptional[str]
"""
<needs content added>
"""
LogAnalyticsInfoParam = LogAnalyticsInfoDict | LogAnalyticsInfo

View File

@ -0,0 +1,70 @@
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import (
VariableOr,
VariableOrList,
VariableOrOptional,
)
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class MavenLibrary:
""""""
coordinates: VariableOr[str]
"""
Gradle-style maven coordinates. For example: "org.jsoup:jsoup:1.7.2".
"""
exclusions: VariableOrList[str] = field(default_factory=list)
"""
List of dependences to exclude. For example: `["slf4j:slf4j", "*:hadoop-client"]`.
Maven dependency exclusions:
https://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html.
"""
repo: VariableOrOptional[str] = None
"""
Maven repo to install the Maven package from. If omitted, both Maven Central Repository
and Spark Packages are searched.
"""
@classmethod
def from_dict(cls, value: "MavenLibraryDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "MavenLibraryDict":
return _transform_to_json_value(self) # type:ignore
class MavenLibraryDict(TypedDict, total=False):
""""""
coordinates: VariableOr[str]
"""
Gradle-style maven coordinates. For example: "org.jsoup:jsoup:1.7.2".
"""
exclusions: VariableOrList[str]
"""
List of dependences to exclude. For example: `["slf4j:slf4j", "*:hadoop-client"]`.
Maven dependency exclusions:
https://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html.
"""
repo: VariableOrOptional[str]
"""
Maven repo to install the Maven package from. If omitted, both Maven Central Repository
and Spark Packages are searched.
"""
MavenLibraryParam = MavenLibraryDict | MavenLibrary

View File

@ -0,0 +1,52 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOr, VariableOrOptional
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class PythonPyPiLibrary:
""""""
package: VariableOr[str]
"""
The name of the pypi package to install. An optional exact version specification is also
supported. Examples: "simplejson" and "simplejson==3.8.0".
"""
repo: VariableOrOptional[str] = None
"""
The repository where the package can be found. If not specified, the default pip index is
used.
"""
@classmethod
def from_dict(cls, value: "PythonPyPiLibraryDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "PythonPyPiLibraryDict":
return _transform_to_json_value(self) # type:ignore
class PythonPyPiLibraryDict(TypedDict, total=False):
""""""
package: VariableOr[str]
"""
The name of the pypi package to install. An optional exact version specification is also
supported. Examples: "simplejson" and "simplejson==3.8.0".
"""
repo: VariableOrOptional[str]
"""
The repository where the package can be found. If not specified, the default pip index is
used.
"""
PythonPyPiLibraryParam = PythonPyPiLibraryDict | PythonPyPiLibrary

View File

@ -0,0 +1,48 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOr, VariableOrOptional
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class RCranLibrary:
""""""
package: VariableOr[str]
"""
The name of the CRAN package to install.
"""
repo: VariableOrOptional[str] = None
"""
The repository where the package can be found. If not specified, the default CRAN repo is used.
"""
@classmethod
def from_dict(cls, value: "RCranLibraryDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "RCranLibraryDict":
return _transform_to_json_value(self) # type:ignore
class RCranLibraryDict(TypedDict, total=False):
""""""
package: VariableOr[str]
"""
The name of the CRAN package to install.
"""
repo: VariableOrOptional[str]
"""
The repository where the package can be found. If not specified, the default CRAN repo is used.
"""
RCranLibraryParam = RCranLibraryDict | RCranLibrary

View File

@ -0,0 +1,22 @@
from enum import Enum
from typing import Literal
class RuntimeEngine(Enum):
"""
Determines the cluster's runtime engine, either standard or Photon.
This field is not compatible with legacy `spark_version` values that contain `-photon-`.
Remove `-photon-` from the `spark_version` and set `runtime_engine` to `PHOTON`.
If left unspecified, the runtime engine defaults to standard unless the spark_version
contains -photon-, in which case Photon will be used.
"""
NULL = "NULL"
STANDARD = "STANDARD"
PHOTON = "PHOTON"
RuntimeEngineParam = Literal["NULL", "STANDARD", "PHOTON"] | RuntimeEngine

View File

@ -0,0 +1,120 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOr, VariableOrOptional
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class S3StorageInfo:
""""""
destination: VariableOr[str]
"""
S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using
cluster iam role, please make sure you set cluster iam role and the role has write access to the
destination. Please also note that you cannot use AWS keys to deliver logs.
"""
canned_acl: VariableOrOptional[str] = None
"""
(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.
If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on
the destination bucket and prefix. The full list of possible canned acl can be found at
http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.
Please also note that by default only the object owner gets full controls. If you are using cross account
role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to
read the logs.
"""
enable_encryption: VariableOrOptional[bool] = None
"""
(Optional) Flag to enable server side encryption, `false` by default.
"""
encryption_type: VariableOrOptional[str] = None
"""
(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when
encryption is enabled and the default type is `sse-s3`.
"""
endpoint: VariableOrOptional[str] = None
"""
S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.
If both are set, endpoint will be used.
"""
kms_key: VariableOrOptional[str] = None
"""
(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`.
"""
region: VariableOrOptional[str] = None
"""
S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,
endpoint will be used.
"""
@classmethod
def from_dict(cls, value: "S3StorageInfoDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "S3StorageInfoDict":
return _transform_to_json_value(self) # type:ignore
class S3StorageInfoDict(TypedDict, total=False):
""""""
destination: VariableOr[str]
"""
S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using
cluster iam role, please make sure you set cluster iam role and the role has write access to the
destination. Please also note that you cannot use AWS keys to deliver logs.
"""
canned_acl: VariableOrOptional[str]
"""
(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.
If `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on
the destination bucket and prefix. The full list of possible canned acl can be found at
http://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.
Please also note that by default only the object owner gets full controls. If you are using cross account
role for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to
read the logs.
"""
enable_encryption: VariableOrOptional[bool]
"""
(Optional) Flag to enable server side encryption, `false` by default.
"""
encryption_type: VariableOrOptional[str]
"""
(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when
encryption is enabled and the default type is `sse-s3`.
"""
endpoint: VariableOrOptional[str]
"""
S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.
If both are set, endpoint will be used.
"""
kms_key: VariableOrOptional[str]
"""
(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`.
"""
region: VariableOrOptional[str]
"""
S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,
endpoint will be used.
"""
S3StorageInfoParam = S3StorageInfoDict | S3StorageInfo

View File

@ -0,0 +1,38 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOr
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class VolumesStorageInfo:
""""""
destination: VariableOr[str]
"""
Unity Catalog Volumes file destination, e.g. `/Volumes/my-init.sh`
"""
@classmethod
def from_dict(cls, value: "VolumesStorageInfoDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "VolumesStorageInfoDict":
return _transform_to_json_value(self) # type:ignore
class VolumesStorageInfoDict(TypedDict, total=False):
""""""
destination: VariableOr[str]
"""
Unity Catalog Volumes file destination, e.g. `/Volumes/my-init.sh`
"""
VolumesStorageInfoParam = VolumesStorageInfoDict | VolumesStorageInfo

View File

@ -0,0 +1,42 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.compute._models.clients_types import (
ClientsTypes,
ClientsTypesParam,
)
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOr
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class WorkloadType:
""""""
clients: VariableOr[ClientsTypes]
"""
defined what type of clients can use the cluster. E.g. Notebooks, Jobs
"""
@classmethod
def from_dict(cls, value: "WorkloadTypeDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "WorkloadTypeDict":
return _transform_to_json_value(self) # type:ignore
class WorkloadTypeDict(TypedDict, total=False):
""""""
clients: VariableOr[ClientsTypesParam]
"""
defined what type of clients can use the cluster. E.g. Notebooks, Jobs
"""
WorkloadTypeParam = WorkloadTypeDict | WorkloadType

View File

@ -0,0 +1,38 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOr
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class WorkspaceStorageInfo:
""""""
destination: VariableOr[str]
"""
workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh`
"""
@classmethod
def from_dict(cls, value: "WorkspaceStorageInfoDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "WorkspaceStorageInfoDict":
return _transform_to_json_value(self) # type:ignore
class WorkspaceStorageInfoDict(TypedDict, total=False):
""""""
destination: VariableOr[str]
"""
workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh`
"""
WorkspaceStorageInfoParam = WorkspaceStorageInfoDict | WorkspaceStorageInfo

View File

@ -1,9 +1,593 @@
__all__ = [ __all__ = [
"Adlsgen2Info",
"Adlsgen2InfoDict",
"Adlsgen2InfoParam",
"AutoScale",
"AutoScaleDict",
"AutoScaleParam",
"AwsAttributes",
"AwsAttributesDict",
"AwsAttributesParam",
"AwsAvailability",
"AwsAvailabilityParam",
"AzureAttributes",
"AzureAttributesDict",
"AzureAttributesParam",
"AzureAvailability",
"AzureAvailabilityParam",
"ClientsTypes",
"ClientsTypesDict",
"ClientsTypesParam",
"ClusterLogConf",
"ClusterLogConfDict",
"ClusterLogConfParam",
"ClusterSpec",
"ClusterSpecDict",
"ClusterSpecParam",
"Condition",
"ConditionParam",
"ConditionTask",
"ConditionTaskDict",
"ConditionTaskOp",
"ConditionTaskOpParam",
"ConditionTaskParam",
"Continuous",
"ContinuousDict",
"ContinuousParam",
"CronSchedule",
"CronScheduleDict",
"CronScheduleParam",
"DataSecurityMode",
"DataSecurityModeParam",
"DbfsStorageInfo",
"DbfsStorageInfoDict",
"DbfsStorageInfoParam",
"DbtTask",
"DbtTaskDict",
"DbtTaskParam",
"DockerBasicAuth",
"DockerBasicAuthDict",
"DockerBasicAuthParam",
"DockerImage",
"DockerImageDict",
"DockerImageParam",
"EbsVolumeType",
"EbsVolumeTypeParam",
"Environment",
"EnvironmentDict",
"EnvironmentParam",
"FileArrivalTriggerConfiguration",
"FileArrivalTriggerConfigurationDict",
"FileArrivalTriggerConfigurationParam",
"ForEachTask",
"ForEachTaskDict",
"ForEachTaskParam",
"GcpAttributes",
"GcpAttributesDict",
"GcpAttributesParam",
"GcpAvailability",
"GcpAvailabilityParam",
"GcsStorageInfo",
"GcsStorageInfoDict",
"GcsStorageInfoParam",
"GitProvider",
"GitProviderParam",
"GitSnapshot",
"GitSnapshotDict",
"GitSnapshotParam",
"GitSource",
"GitSourceDict",
"GitSourceParam",
"InitScriptInfo",
"InitScriptInfoDict",
"InitScriptInfoParam",
"Job", "Job",
"JobCluster",
"JobClusterDict",
"JobClusterParam",
"JobDict", "JobDict",
"JobEmailNotifications",
"JobEmailNotificationsDict",
"JobEmailNotificationsParam",
"JobEnvironment",
"JobEnvironmentDict",
"JobEnvironmentParam",
"JobNotificationSettings",
"JobNotificationSettingsDict",
"JobNotificationSettingsParam",
"JobParam",
"JobParameterDefinition",
"JobParameterDefinitionDict",
"JobParameterDefinitionParam",
"JobRunAs",
"JobRunAsDict",
"JobRunAsParam",
"JobsHealthMetric",
"JobsHealthMetricParam",
"JobsHealthOperator",
"JobsHealthOperatorParam",
"JobsHealthRule",
"JobsHealthRuleDict",
"JobsHealthRuleParam",
"JobsHealthRules",
"JobsHealthRulesDict",
"JobsHealthRulesParam",
"Library",
"LibraryDict",
"LibraryParam",
"LocalFileInfo",
"LocalFileInfoDict",
"LocalFileInfoParam",
"LogAnalyticsInfo",
"LogAnalyticsInfoDict",
"LogAnalyticsInfoParam",
"MavenLibrary",
"MavenLibraryDict",
"MavenLibraryParam",
"NotebookTask",
"NotebookTaskDict",
"NotebookTaskParam",
"PauseStatus",
"PauseStatusParam",
"PeriodicTriggerConfiguration",
"PeriodicTriggerConfigurationDict",
"PeriodicTriggerConfigurationParam",
"PeriodicTriggerConfigurationTimeUnit",
"PeriodicTriggerConfigurationTimeUnitParam",
"Permission",
"PermissionDict",
"PermissionParam",
"PipelineTask",
"PipelineTaskDict",
"PipelineTaskParam",
"PythonPyPiLibrary",
"PythonPyPiLibraryDict",
"PythonPyPiLibraryParam",
"PythonWheelTask",
"PythonWheelTaskDict",
"PythonWheelTaskParam",
"QueueSettings",
"QueueSettingsDict",
"QueueSettingsParam",
"RCranLibrary",
"RCranLibraryDict",
"RCranLibraryParam",
"RunIf",
"RunIfParam",
"RunJobTask",
"RunJobTaskDict",
"RunJobTaskParam",
"RuntimeEngine",
"RuntimeEngineParam",
"S3StorageInfo",
"S3StorageInfoDict",
"S3StorageInfoParam",
"Source",
"SourceParam",
"SparkJarTask",
"SparkJarTaskDict",
"SparkJarTaskParam",
"SparkPythonTask",
"SparkPythonTaskDict",
"SparkPythonTaskParam",
"SparkSubmitTask",
"SparkSubmitTaskDict",
"SparkSubmitTaskParam",
"SqlTask",
"SqlTaskAlert",
"SqlTaskAlertDict",
"SqlTaskAlertParam",
"SqlTaskDashboard",
"SqlTaskDashboardDict",
"SqlTaskDashboardParam",
"SqlTaskDict",
"SqlTaskFile",
"SqlTaskFileDict",
"SqlTaskFileParam",
"SqlTaskParam",
"SqlTaskQuery",
"SqlTaskQueryDict",
"SqlTaskQueryParam",
"SqlTaskSubscription",
"SqlTaskSubscriptionDict",
"SqlTaskSubscriptionParam",
"TableUpdateTriggerConfiguration",
"TableUpdateTriggerConfigurationDict",
"TableUpdateTriggerConfigurationParam",
"Task",
"TaskDependency",
"TaskDependencyDict",
"TaskDependencyParam",
"TaskDict",
"TaskEmailNotifications",
"TaskEmailNotificationsDict",
"TaskEmailNotificationsParam",
"TaskNotificationSettings",
"TaskNotificationSettingsDict",
"TaskNotificationSettingsParam",
"TaskParam",
"TriggerSettings",
"TriggerSettingsDict",
"TriggerSettingsParam",
"VolumesStorageInfo",
"VolumesStorageInfoDict",
"VolumesStorageInfoParam",
"Webhook",
"WebhookDict",
"WebhookNotifications",
"WebhookNotificationsDict",
"WebhookNotificationsParam",
"WebhookParam",
"WorkloadType",
"WorkloadTypeDict",
"WorkloadTypeParam",
"WorkspaceStorageInfo",
"WorkspaceStorageInfoDict",
"WorkspaceStorageInfoParam",
] ]
from databricks.bundles.jobs._models.job import (
Job, from databricks.bundles.compute._models.adlsgen2_info import (
JobDict, Adlsgen2Info,
Adlsgen2InfoDict,
Adlsgen2InfoParam,
) )
from databricks.bundles.compute._models.auto_scale import (
AutoScale,
AutoScaleDict,
AutoScaleParam,
)
from databricks.bundles.compute._models.aws_attributes import (
AwsAttributes,
AwsAttributesDict,
AwsAttributesParam,
)
from databricks.bundles.compute._models.aws_availability import (
AwsAvailability,
AwsAvailabilityParam,
)
from databricks.bundles.compute._models.azure_attributes import (
AzureAttributes,
AzureAttributesDict,
AzureAttributesParam,
)
from databricks.bundles.compute._models.azure_availability import (
AzureAvailability,
AzureAvailabilityParam,
)
from databricks.bundles.compute._models.clients_types import (
ClientsTypes,
ClientsTypesDict,
ClientsTypesParam,
)
from databricks.bundles.compute._models.cluster_log_conf import (
ClusterLogConf,
ClusterLogConfDict,
ClusterLogConfParam,
)
from databricks.bundles.compute._models.cluster_spec import (
ClusterSpec,
ClusterSpecDict,
ClusterSpecParam,
)
from databricks.bundles.compute._models.data_security_mode import (
DataSecurityMode,
DataSecurityModeParam,
)
from databricks.bundles.compute._models.dbfs_storage_info import (
DbfsStorageInfo,
DbfsStorageInfoDict,
DbfsStorageInfoParam,
)
from databricks.bundles.compute._models.docker_basic_auth import (
DockerBasicAuth,
DockerBasicAuthDict,
DockerBasicAuthParam,
)
from databricks.bundles.compute._models.docker_image import (
DockerImage,
DockerImageDict,
DockerImageParam,
)
from databricks.bundles.compute._models.ebs_volume_type import (
EbsVolumeType,
EbsVolumeTypeParam,
)
from databricks.bundles.compute._models.environment import (
Environment,
EnvironmentDict,
EnvironmentParam,
)
from databricks.bundles.compute._models.gcp_attributes import (
GcpAttributes,
GcpAttributesDict,
GcpAttributesParam,
)
from databricks.bundles.compute._models.gcp_availability import (
GcpAvailability,
GcpAvailabilityParam,
)
from databricks.bundles.compute._models.gcs_storage_info import (
GcsStorageInfo,
GcsStorageInfoDict,
GcsStorageInfoParam,
)
from databricks.bundles.compute._models.init_script_info import (
InitScriptInfo,
InitScriptInfoDict,
InitScriptInfoParam,
)
from databricks.bundles.compute._models.library import (
Library,
LibraryDict,
LibraryParam,
)
from databricks.bundles.compute._models.local_file_info import (
LocalFileInfo,
LocalFileInfoDict,
LocalFileInfoParam,
)
from databricks.bundles.compute._models.log_analytics_info import (
LogAnalyticsInfo,
LogAnalyticsInfoDict,
LogAnalyticsInfoParam,
)
from databricks.bundles.compute._models.maven_library import (
MavenLibrary,
MavenLibraryDict,
MavenLibraryParam,
)
from databricks.bundles.compute._models.python_py_pi_library import (
PythonPyPiLibrary,
PythonPyPiLibraryDict,
PythonPyPiLibraryParam,
)
from databricks.bundles.compute._models.r_cran_library import (
RCranLibrary,
RCranLibraryDict,
RCranLibraryParam,
)
from databricks.bundles.compute._models.runtime_engine import (
RuntimeEngine,
RuntimeEngineParam,
)
from databricks.bundles.compute._models.s3_storage_info import (
S3StorageInfo,
S3StorageInfoDict,
S3StorageInfoParam,
)
from databricks.bundles.compute._models.volumes_storage_info import (
VolumesStorageInfo,
VolumesStorageInfoDict,
VolumesStorageInfoParam,
)
from databricks.bundles.compute._models.workload_type import (
WorkloadType,
WorkloadTypeDict,
WorkloadTypeParam,
)
from databricks.bundles.compute._models.workspace_storage_info import (
WorkspaceStorageInfo,
WorkspaceStorageInfoDict,
WorkspaceStorageInfoParam,
)
from databricks.bundles.jobs._models.condition import Condition, ConditionParam
from databricks.bundles.jobs._models.condition_task import (
ConditionTask,
ConditionTaskDict,
ConditionTaskParam,
)
from databricks.bundles.jobs._models.condition_task_op import (
ConditionTaskOp,
ConditionTaskOpParam,
)
from databricks.bundles.jobs._models.continuous import (
Continuous,
ContinuousDict,
ContinuousParam,
)
from databricks.bundles.jobs._models.cron_schedule import (
CronSchedule,
CronScheduleDict,
CronScheduleParam,
)
from databricks.bundles.jobs._models.dbt_task import DbtTask, DbtTaskDict, DbtTaskParam
from databricks.bundles.jobs._models.file_arrival_trigger_configuration import (
FileArrivalTriggerConfiguration,
FileArrivalTriggerConfigurationDict,
FileArrivalTriggerConfigurationParam,
)
from databricks.bundles.jobs._models.for_each_task import (
ForEachTask,
ForEachTaskDict,
ForEachTaskParam,
)
from databricks.bundles.jobs._models.git_provider import GitProvider, GitProviderParam
from databricks.bundles.jobs._models.git_snapshot import (
GitSnapshot,
GitSnapshotDict,
GitSnapshotParam,
)
from databricks.bundles.jobs._models.git_source import (
GitSource,
GitSourceDict,
GitSourceParam,
)
from databricks.bundles.jobs._models.job import Job, JobDict, JobParam
from databricks.bundles.jobs._models.job_cluster import (
JobCluster,
JobClusterDict,
JobClusterParam,
)
from databricks.bundles.jobs._models.job_email_notifications import (
JobEmailNotifications,
JobEmailNotificationsDict,
JobEmailNotificationsParam,
)
from databricks.bundles.jobs._models.job_environment import (
JobEnvironment,
JobEnvironmentDict,
JobEnvironmentParam,
)
from databricks.bundles.jobs._models.job_notification_settings import (
JobNotificationSettings,
JobNotificationSettingsDict,
JobNotificationSettingsParam,
)
from databricks.bundles.jobs._models.job_parameter_definition import (
JobParameterDefinition,
JobParameterDefinitionDict,
JobParameterDefinitionParam,
)
from databricks.bundles.jobs._models.job_run_as import (
JobRunAs,
JobRunAsDict,
JobRunAsParam,
)
from databricks.bundles.jobs._models.jobs_health_metric import (
JobsHealthMetric,
JobsHealthMetricParam,
)
from databricks.bundles.jobs._models.jobs_health_operator import (
JobsHealthOperator,
JobsHealthOperatorParam,
)
from databricks.bundles.jobs._models.jobs_health_rule import (
JobsHealthRule,
JobsHealthRuleDict,
JobsHealthRuleParam,
)
from databricks.bundles.jobs._models.jobs_health_rules import (
JobsHealthRules,
JobsHealthRulesDict,
JobsHealthRulesParam,
)
from databricks.bundles.jobs._models.notebook_task import (
NotebookTask,
NotebookTaskDict,
NotebookTaskParam,
)
from databricks.bundles.jobs._models.pause_status import PauseStatus, PauseStatusParam
from databricks.bundles.jobs._models.periodic_trigger_configuration import (
PeriodicTriggerConfiguration,
PeriodicTriggerConfigurationDict,
PeriodicTriggerConfigurationParam,
)
from databricks.bundles.jobs._models.periodic_trigger_configuration_time_unit import (
PeriodicTriggerConfigurationTimeUnit,
PeriodicTriggerConfigurationTimeUnitParam,
)
from databricks.bundles.jobs._models.permission import (
Permission,
PermissionDict,
PermissionParam,
)
from databricks.bundles.jobs._models.pipeline_task import (
PipelineTask,
PipelineTaskDict,
PipelineTaskParam,
)
from databricks.bundles.jobs._models.python_wheel_task import (
PythonWheelTask,
PythonWheelTaskDict,
PythonWheelTaskParam,
)
from databricks.bundles.jobs._models.queue_settings import (
QueueSettings,
QueueSettingsDict,
QueueSettingsParam,
)
from databricks.bundles.jobs._models.run_if import RunIf, RunIfParam
from databricks.bundles.jobs._models.run_job_task import (
RunJobTask,
RunJobTaskDict,
RunJobTaskParam,
)
from databricks.bundles.jobs._models.source import Source, SourceParam
from databricks.bundles.jobs._models.spark_jar_task import (
SparkJarTask,
SparkJarTaskDict,
SparkJarTaskParam,
)
from databricks.bundles.jobs._models.spark_python_task import (
SparkPythonTask,
SparkPythonTaskDict,
SparkPythonTaskParam,
)
from databricks.bundles.jobs._models.spark_submit_task import (
SparkSubmitTask,
SparkSubmitTaskDict,
SparkSubmitTaskParam,
)
from databricks.bundles.jobs._models.sql_task import SqlTask, SqlTaskDict, SqlTaskParam
from databricks.bundles.jobs._models.sql_task_alert import (
SqlTaskAlert,
SqlTaskAlertDict,
SqlTaskAlertParam,
)
from databricks.bundles.jobs._models.sql_task_dashboard import (
SqlTaskDashboard,
SqlTaskDashboardDict,
SqlTaskDashboardParam,
)
from databricks.bundles.jobs._models.sql_task_file import (
SqlTaskFile,
SqlTaskFileDict,
SqlTaskFileParam,
)
from databricks.bundles.jobs._models.sql_task_query import (
SqlTaskQuery,
SqlTaskQueryDict,
SqlTaskQueryParam,
)
from databricks.bundles.jobs._models.sql_task_subscription import (
SqlTaskSubscription,
SqlTaskSubscriptionDict,
SqlTaskSubscriptionParam,
)
from databricks.bundles.jobs._models.table_update_trigger_configuration import (
TableUpdateTriggerConfiguration,
TableUpdateTriggerConfigurationDict,
TableUpdateTriggerConfigurationParam,
)
from databricks.bundles.jobs._models.task import Task, TaskDict, TaskParam
from databricks.bundles.jobs._models.task_dependency import (
TaskDependency,
TaskDependencyDict,
TaskDependencyParam,
)
from databricks.bundles.jobs._models.task_email_notifications import (
TaskEmailNotifications,
TaskEmailNotificationsDict,
TaskEmailNotificationsParam,
)
from databricks.bundles.jobs._models.task_notification_settings import (
TaskNotificationSettings,
TaskNotificationSettingsDict,
TaskNotificationSettingsParam,
)
from databricks.bundles.jobs._models.trigger_settings import (
TriggerSettings,
TriggerSettingsDict,
TriggerSettingsParam,
)
from databricks.bundles.jobs._models.webhook import Webhook, WebhookDict, WebhookParam
from databricks.bundles.jobs._models.webhook_notifications import (
WebhookNotifications,
WebhookNotificationsDict,
WebhookNotificationsParam,
)
def _resolve_recursive_imports():
import typing
from databricks.bundles.core._variable import VariableOr
ForEachTask.__annotations__ = typing.get_type_hints(
ForEachTask,
globalns={"Task": Task, "VariableOr": VariableOr},
)
_resolve_recursive_imports()

View File

@ -0,0 +1,10 @@
from enum import Enum
from typing import Literal
class Condition(Enum):
ANY_UPDATED = "ANY_UPDATED"
ALL_UPDATED = "ALL_UPDATED"
ConditionParam = Literal["ANY_UPDATED", "ALL_UPDATED"] | Condition

View File

@ -0,0 +1,68 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOr
from databricks.bundles.jobs._models.condition_task_op import (
ConditionTaskOp,
ConditionTaskOpParam,
)
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class ConditionTask:
""""""
left: VariableOr[str]
"""
The left operand of the condition task. Can be either a string value or a job state or parameter reference.
"""
op: VariableOr[ConditionTaskOp]
"""
* `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `12.0 == 12` will evaluate to `false`.
* `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `12.0 >= 12` will evaluate to `true`, `10.0 >= 12` will evaluate to `false`.
The boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `true` or `false` for the comparison.
"""
right: VariableOr[str]
"""
The right operand of the condition task. Can be either a string value or a job state or parameter reference.
"""
@classmethod
def from_dict(cls, value: "ConditionTaskDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "ConditionTaskDict":
return _transform_to_json_value(self) # type:ignore
class ConditionTaskDict(TypedDict, total=False):
""""""
left: VariableOr[str]
"""
The left operand of the condition task. Can be either a string value or a job state or parameter reference.
"""
op: VariableOr[ConditionTaskOpParam]
"""
* `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `12.0 == 12` will evaluate to `false`.
* `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `12.0 >= 12` will evaluate to `true`, `10.0 >= 12` will evaluate to `false`.
The boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `true` or `false` for the comparison.
"""
right: VariableOr[str]
"""
The right operand of the condition task. Can be either a string value or a job state or parameter reference.
"""
ConditionTaskParam = ConditionTaskDict | ConditionTask

View File

@ -0,0 +1,31 @@
from enum import Enum
from typing import Literal
class ConditionTaskOp(Enum):
"""
* `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `12.0 == 12` will evaluate to `false`.
* `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `12.0 >= 12` will evaluate to `true`, `10.0 >= 12` will evaluate to `false`.
The boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `true` or `false` for the comparison.
"""
EQUAL_TO = "EQUAL_TO"
GREATER_THAN = "GREATER_THAN"
GREATER_THAN_OR_EQUAL = "GREATER_THAN_OR_EQUAL"
LESS_THAN = "LESS_THAN"
LESS_THAN_OR_EQUAL = "LESS_THAN_OR_EQUAL"
NOT_EQUAL = "NOT_EQUAL"
ConditionTaskOpParam = (
Literal[
"EQUAL_TO",
"GREATER_THAN",
"GREATER_THAN_OR_EQUAL",
"LESS_THAN",
"LESS_THAN_OR_EQUAL",
"NOT_EQUAL",
]
| ConditionTaskOp
)

View File

@ -0,0 +1,39 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOrOptional
from databricks.bundles.jobs._models.pause_status import PauseStatus, PauseStatusParam
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class Continuous:
""""""
pause_status: VariableOrOptional[PauseStatus] = None
"""
Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED.
"""
@classmethod
def from_dict(cls, value: "ContinuousDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "ContinuousDict":
return _transform_to_json_value(self) # type:ignore
class ContinuousDict(TypedDict, total=False):
""""""
pause_status: VariableOrOptional[PauseStatusParam]
"""
Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED.
"""
ContinuousParam = ContinuousDict | Continuous

View File

@ -0,0 +1,59 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOr, VariableOrOptional
from databricks.bundles.jobs._models.pause_status import PauseStatus, PauseStatusParam
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class CronSchedule:
""""""
quartz_cron_expression: VariableOr[str]
"""
A Cron expression using Quartz syntax that describes the schedule for a job. See [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) for details. This field is required.
"""
pause_status: VariableOrOptional[PauseStatus] = None
"""
Indicate whether this schedule is paused or not.
"""
timezone_id: VariableOr[str] = "UTC"
"""
A Java timezone ID. The schedule for a job is resolved with respect to this timezone. See [Java TimeZone](https://docs.oracle.com/javase/7/docs/api/java/util/TimeZone.html) for details. This field is required.
"""
@classmethod
def from_dict(cls, value: "CronScheduleDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "CronScheduleDict":
return _transform_to_json_value(self) # type:ignore
class CronScheduleDict(TypedDict, total=False):
""""""
quartz_cron_expression: VariableOr[str]
"""
A Cron expression using Quartz syntax that describes the schedule for a job. See [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) for details. This field is required.
"""
pause_status: VariableOrOptional[PauseStatusParam]
"""
Indicate whether this schedule is paused or not.
"""
timezone_id: VariableOr[str]
"""
A Java timezone ID. The schedule for a job is resolved with respect to this timezone. See [Java TimeZone](https://docs.oracle.com/javase/7/docs/api/java/util/TimeZone.html) for details. This field is required.
"""
CronScheduleParam = CronScheduleDict | CronSchedule

View File

@ -0,0 +1,111 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOrList, VariableOrOptional
from databricks.bundles.jobs._models.source import Source, SourceParam
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class DbtTask:
""""""
commands: VariableOrList[str]
"""
A list of dbt commands to execute. All commands must start with `dbt`. This parameter must not be empty. A maximum of up to 10 commands can be provided.
"""
catalog: VariableOrOptional[str] = None
"""
Optional name of the catalog to use. The value is the top level in the 3-level namespace of Unity Catalog (catalog / schema / relation). The catalog value can only be specified if a warehouse_id is specified. Requires dbt-databricks >= 1.1.1.
"""
profiles_directory: VariableOrOptional[str] = None
"""
Optional (relative) path to the profiles directory. Can only be specified if no warehouse_id is specified. If no warehouse_id is specified and this folder is unset, the root directory is used.
"""
project_directory: VariableOrOptional[str] = None
"""
Path to the project directory. Optional for Git sourced tasks, in which
case if no value is provided, the root of the Git repository is used.
"""
schema: VariableOrOptional[str] = None
"""
Optional schema to write to. This parameter is only used when a warehouse_id is also provided. If not provided, the `default` schema is used.
"""
source: VariableOrOptional[Source] = None
"""
Optional location type of the project directory. When set to `WORKSPACE`, the project will be retrieved
from the local Databricks workspace. When set to `GIT`, the project will be retrieved from a Git repository
defined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.
* `WORKSPACE`: Project is located in Databricks workspace.
* `GIT`: Project is located in cloud Git provider.
"""
warehouse_id: VariableOrOptional[str] = None
"""
ID of the SQL warehouse to connect to. If provided, we automatically generate and provide the profile and connection details to dbt. It can be overridden on a per-command basis by using the `--profiles-dir` command line argument.
"""
@classmethod
def from_dict(cls, value: "DbtTaskDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "DbtTaskDict":
return _transform_to_json_value(self) # type:ignore
class DbtTaskDict(TypedDict, total=False):
""""""
commands: VariableOrList[str]
"""
A list of dbt commands to execute. All commands must start with `dbt`. This parameter must not be empty. A maximum of up to 10 commands can be provided.
"""
catalog: VariableOrOptional[str]
"""
Optional name of the catalog to use. The value is the top level in the 3-level namespace of Unity Catalog (catalog / schema / relation). The catalog value can only be specified if a warehouse_id is specified. Requires dbt-databricks >= 1.1.1.
"""
profiles_directory: VariableOrOptional[str]
"""
Optional (relative) path to the profiles directory. Can only be specified if no warehouse_id is specified. If no warehouse_id is specified and this folder is unset, the root directory is used.
"""
project_directory: VariableOrOptional[str]
"""
Path to the project directory. Optional for Git sourced tasks, in which
case if no value is provided, the root of the Git repository is used.
"""
schema: VariableOrOptional[str]
"""
Optional schema to write to. This parameter is only used when a warehouse_id is also provided. If not provided, the `default` schema is used.
"""
source: VariableOrOptional[SourceParam]
"""
Optional location type of the project directory. When set to `WORKSPACE`, the project will be retrieved
from the local Databricks workspace. When set to `GIT`, the project will be retrieved from a Git repository
defined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.
* `WORKSPACE`: Project is located in Databricks workspace.
* `GIT`: Project is located in cloud Git provider.
"""
warehouse_id: VariableOrOptional[str]
"""
ID of the SQL warehouse to connect to. If provided, we automatically generate and provide the profile and connection details to dbt. It can be overridden on a per-command basis by using the `--profiles-dir` command line argument.
"""
DbtTaskParam = DbtTaskDict | DbtTask

View File

@ -0,0 +1,66 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOr, VariableOrOptional
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class FileArrivalTriggerConfiguration:
""""""
url: VariableOr[str]
"""
URL to be monitored for file arrivals. The path must point to the root or a subpath of the external location.
"""
min_time_between_triggers_seconds: VariableOrOptional[int] = None
"""
If set, the trigger starts a run only after the specified amount of time passed since
the last time the trigger fired. The minimum allowed value is 60 seconds
"""
wait_after_last_change_seconds: VariableOrOptional[int] = None
"""
If set, the trigger starts a run only after no file activity has occurred for the specified amount of time.
This makes it possible to wait for a batch of incoming files to arrive before triggering a run. The
minimum allowed value is 60 seconds.
"""
@classmethod
def from_dict(cls, value: "FileArrivalTriggerConfigurationDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "FileArrivalTriggerConfigurationDict":
return _transform_to_json_value(self) # type:ignore
class FileArrivalTriggerConfigurationDict(TypedDict, total=False):
""""""
url: VariableOr[str]
"""
URL to be monitored for file arrivals. The path must point to the root or a subpath of the external location.
"""
min_time_between_triggers_seconds: VariableOrOptional[int]
"""
If set, the trigger starts a run only after the specified amount of time passed since
the last time the trigger fired. The minimum allowed value is 60 seconds
"""
wait_after_last_change_seconds: VariableOrOptional[int]
"""
If set, the trigger starts a run only after no file activity has occurred for the specified amount of time.
This makes it possible to wait for a batch of incoming files to arrive before triggering a run. The
minimum allowed value is 60 seconds.
"""
FileArrivalTriggerConfigurationParam = (
FileArrivalTriggerConfigurationDict | FileArrivalTriggerConfiguration
)

View File

@ -0,0 +1,64 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOr, VariableOrOptional
if TYPE_CHECKING:
from typing_extensions import Self
from databricks.bundles.jobs._models.task import Task, TaskParam
@dataclass(kw_only=True)
class ForEachTask:
""""""
inputs: VariableOr[str]
"""
Array for task to iterate on. This can be a JSON string or a reference to
an array parameter.
"""
task: VariableOr["Task"]
"""
Configuration for the task that will be run for each element in the array
"""
concurrency: VariableOrOptional[int] = None
"""
An optional maximum allowed number of concurrent runs of the task.
Set this value if you want to be able to execute multiple runs of the task concurrently.
"""
@classmethod
def from_dict(cls, value: "ForEachTaskDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "ForEachTaskDict":
return _transform_to_json_value(self) # type:ignore
class ForEachTaskDict(TypedDict, total=False):
""""""
inputs: VariableOr[str]
"""
Array for task to iterate on. This can be a JSON string or a reference to
an array parameter.
"""
task: VariableOr["TaskParam"]
"""
Configuration for the task that will be run for each element in the array
"""
concurrency: VariableOrOptional[int]
"""
An optional maximum allowed number of concurrent runs of the task.
Set this value if you want to be able to execute multiple runs of the task concurrently.
"""
ForEachTaskParam = ForEachTaskDict | ForEachTask

View File

@ -0,0 +1,28 @@
from enum import Enum
from typing import Literal
class GitProvider(Enum):
GIT_HUB = "gitHub"
BITBUCKET_CLOUD = "bitbucketCloud"
AZURE_DEV_OPS_SERVICES = "azureDevOpsServices"
GIT_HUB_ENTERPRISE = "gitHubEnterprise"
BITBUCKET_SERVER = "bitbucketServer"
GIT_LAB = "gitLab"
GIT_LAB_ENTERPRISE_EDITION = "gitLabEnterpriseEdition"
AWS_CODE_COMMIT = "awsCodeCommit"
GitProviderParam = (
Literal[
"gitHub",
"bitbucketCloud",
"azureDevOpsServices",
"gitHubEnterprise",
"bitbucketServer",
"gitLab",
"gitLabEnterpriseEdition",
"awsCodeCommit",
]
| GitProvider
)

View File

@ -0,0 +1,40 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOrOptional
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class GitSnapshot:
"""
Read-only state of the remote repository at the time the job was run. This field is only included on job runs.
"""
used_commit: VariableOrOptional[str] = None
"""
Commit that was used to execute the run. If git_branch was specified, this points to the HEAD of the branch at the time of the run; if git_tag was specified, this points to the commit the tag points to.
"""
@classmethod
def from_dict(cls, value: "GitSnapshotDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "GitSnapshotDict":
return _transform_to_json_value(self) # type:ignore
class GitSnapshotDict(TypedDict, total=False):
""""""
used_commit: VariableOrOptional[str]
"""
Commit that was used to execute the run. If git_branch was specified, this points to the HEAD of the branch at the time of the run; if git_tag was specified, this points to the commit the tag points to.
"""
GitSnapshotParam = GitSnapshotDict | GitSnapshot

View File

@ -0,0 +1,85 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOr, VariableOrOptional
from databricks.bundles.jobs._models.git_provider import GitProvider, GitProviderParam
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class GitSource:
"""
An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks.
If `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task.
Note: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job.
"""
git_provider: VariableOr[GitProvider]
"""
Unique identifier of the service used to host the Git repository. The value is case insensitive.
"""
git_url: VariableOr[str]
"""
URL of the repository to be cloned by this job.
"""
git_branch: VariableOrOptional[str] = None
"""
Name of the branch to be checked out and used by this job. This field cannot be specified in conjunction with git_tag or git_commit.
"""
git_commit: VariableOrOptional[str] = None
"""
Commit to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_tag.
"""
git_tag: VariableOrOptional[str] = None
"""
Name of the tag to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_commit.
"""
@classmethod
def from_dict(cls, value: "GitSourceDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "GitSourceDict":
return _transform_to_json_value(self) # type:ignore
class GitSourceDict(TypedDict, total=False):
""""""
git_provider: VariableOr[GitProviderParam]
"""
Unique identifier of the service used to host the Git repository. The value is case insensitive.
"""
git_url: VariableOr[str]
"""
URL of the repository to be cloned by this job.
"""
git_branch: VariableOrOptional[str]
"""
Name of the branch to be checked out and used by this job. This field cannot be specified in conjunction with git_tag or git_commit.
"""
git_commit: VariableOrOptional[str]
"""
Commit to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_tag.
"""
git_tag: VariableOrOptional[str]
"""
Name of the tag to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_commit.
"""
GitSourceParam = GitSourceDict | GitSource

View File

@ -1,12 +1,59 @@
from dataclasses import dataclass from dataclasses import dataclass, field
from typing import TYPE_CHECKING, TypedDict from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._resource import Resource from databricks.bundles.core._resource import Resource
from databricks.bundles.core._transform import _transform from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import ( from databricks.bundles.core._variable import (
VariableOrDict,
VariableOrList,
VariableOrOptional, VariableOrOptional,
) )
from databricks.bundles.jobs._models.continuous import (
Continuous,
ContinuousParam,
)
from databricks.bundles.jobs._models.cron_schedule import (
CronSchedule,
CronScheduleParam,
)
from databricks.bundles.jobs._models.git_source import GitSource, GitSourceParam
from databricks.bundles.jobs._models.job_cluster import JobCluster, JobClusterParam
from databricks.bundles.jobs._models.job_email_notifications import (
JobEmailNotifications,
JobEmailNotificationsParam,
)
from databricks.bundles.jobs._models.job_environment import (
JobEnvironment,
JobEnvironmentParam,
)
from databricks.bundles.jobs._models.job_notification_settings import (
JobNotificationSettings,
JobNotificationSettingsParam,
)
from databricks.bundles.jobs._models.job_parameter_definition import (
JobParameterDefinition,
JobParameterDefinitionParam,
)
from databricks.bundles.jobs._models.job_run_as import JobRunAs, JobRunAsParam
from databricks.bundles.jobs._models.jobs_health_rules import (
JobsHealthRules,
JobsHealthRulesParam,
)
from databricks.bundles.jobs._models.permission import Permission, PermissionParam
from databricks.bundles.jobs._models.queue_settings import (
QueueSettings,
QueueSettingsParam,
)
from databricks.bundles.jobs._models.task import Task, TaskParam
from databricks.bundles.jobs._models.trigger_settings import (
TriggerSettings,
TriggerSettingsParam,
)
from databricks.bundles.jobs._models.webhook_notifications import (
WebhookNotifications,
WebhookNotificationsParam,
)
if TYPE_CHECKING: if TYPE_CHECKING:
from typing_extensions import Self from typing_extensions import Self
@ -16,16 +63,116 @@ if TYPE_CHECKING:
class Job(Resource): class Job(Resource):
"""""" """"""
budget_policy_id: VariableOrOptional[str] = None
"""
The id of the user specified budget policy to use for this job.
If not specified, a default budget policy may be applied when creating or modifying the job.
See `effective_budget_policy_id` for the budget policy used by this workload.
"""
continuous: VariableOrOptional[Continuous] = None
"""
An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used.
"""
description: VariableOrOptional[str] = None description: VariableOrOptional[str] = None
""" """
An optional description for the job. The maximum length is 27700 characters in UTF-8 encoding. An optional description for the job. The maximum length is 27700 characters in UTF-8 encoding.
""" """
email_notifications: VariableOrOptional[JobEmailNotifications] = None
"""
An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted.
"""
environments: VariableOrList[JobEnvironment] = field(default_factory=list)
"""
A list of task execution environment specifications that can be referenced by serverless tasks of this job.
An environment is required to be present for serverless tasks.
For serverless notebook tasks, the environment is accessible in the notebook environment panel.
For other serverless tasks, the task environment is required to be specified using environment_key in the task settings.
"""
git_source: VariableOrOptional[GitSource] = None
"""
An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks.
If `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task.
Note: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job.
"""
health: VariableOrOptional[JobsHealthRules] = None
job_clusters: VariableOrList[JobCluster] = field(default_factory=list)
"""
A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings.
"""
max_concurrent_runs: VariableOrOptional[int] = None
"""
An optional maximum allowed number of concurrent runs of the job.
Set this value if you want to be able to execute multiple runs of the same job concurrently.
This is useful for example if you trigger your job on a frequent schedule and want to allow consecutive runs to overlap with each other, or if you want to trigger multiple runs which differ by their input parameters.
This setting affects only new runs. For example, suppose the jobs concurrency is 4 and there are 4 concurrent active runs. Then setting the concurrency to 3 wont kill any of the active runs.
However, from then on, new runs are skipped unless there are fewer than 3 active runs.
This value cannot exceed 1000. Setting this value to `0` causes all new runs to be skipped.
"""
name: VariableOrOptional[str] = None name: VariableOrOptional[str] = None
""" """
An optional name for the job. The maximum length is 4096 bytes in UTF-8 encoding. An optional name for the job. The maximum length is 4096 bytes in UTF-8 encoding.
""" """
notification_settings: VariableOrOptional[JobNotificationSettings] = None
"""
Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job.
"""
parameters: VariableOrList[JobParameterDefinition] = field(default_factory=list)
"""
Job-level parameter definitions
"""
permissions: VariableOrList[Permission] = field(default_factory=list)
queue: VariableOrOptional[QueueSettings] = None
"""
The queue settings of the job.
"""
run_as: VariableOrOptional[JobRunAs] = None
schedule: VariableOrOptional[CronSchedule] = None
"""
An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking Run Now in the Jobs UI or sending an API request to `runNow`.
"""
tags: VariableOrDict[str] = field(default_factory=dict)
"""
A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job.
"""
tasks: VariableOrList[Task] = field(default_factory=list)
"""
A list of task specifications to be executed by this job.
"""
timeout_seconds: VariableOrOptional[int] = None
"""
An optional timeout applied to each run of this job. A value of `0` means no timeout.
"""
trigger: VariableOrOptional[TriggerSettings] = None
"""
A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking Run Now in the Jobs UI or sending an API request to `runNow`.
"""
webhook_notifications: VariableOrOptional[WebhookNotifications] = None
"""
A collection of system notification IDs to notify when runs of this job begin or complete.
"""
@classmethod @classmethod
def from_dict(cls, value: "JobDict") -> "Self": def from_dict(cls, value: "JobDict") -> "Self":
return _transform(cls, value) return _transform(cls, value)
@ -37,15 +184,115 @@ class Job(Resource):
class JobDict(TypedDict, total=False): class JobDict(TypedDict, total=False):
"""""" """"""
budget_policy_id: VariableOrOptional[str]
"""
The id of the user specified budget policy to use for this job.
If not specified, a default budget policy may be applied when creating or modifying the job.
See `effective_budget_policy_id` for the budget policy used by this workload.
"""
continuous: VariableOrOptional[ContinuousParam]
"""
An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used.
"""
description: VariableOrOptional[str] description: VariableOrOptional[str]
""" """
An optional description for the job. The maximum length is 27700 characters in UTF-8 encoding. An optional description for the job. The maximum length is 27700 characters in UTF-8 encoding.
""" """
email_notifications: VariableOrOptional[JobEmailNotificationsParam]
"""
An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted.
"""
environments: VariableOrList[JobEnvironmentParam]
"""
A list of task execution environment specifications that can be referenced by serverless tasks of this job.
An environment is required to be present for serverless tasks.
For serverless notebook tasks, the environment is accessible in the notebook environment panel.
For other serverless tasks, the task environment is required to be specified using environment_key in the task settings.
"""
git_source: VariableOrOptional[GitSourceParam]
"""
An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks.
If `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task.
Note: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job.
"""
health: VariableOrOptional[JobsHealthRulesParam]
job_clusters: VariableOrList[JobClusterParam]
"""
A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings.
"""
max_concurrent_runs: VariableOrOptional[int]
"""
An optional maximum allowed number of concurrent runs of the job.
Set this value if you want to be able to execute multiple runs of the same job concurrently.
This is useful for example if you trigger your job on a frequent schedule and want to allow consecutive runs to overlap with each other, or if you want to trigger multiple runs which differ by their input parameters.
This setting affects only new runs. For example, suppose the jobs concurrency is 4 and there are 4 concurrent active runs. Then setting the concurrency to 3 wont kill any of the active runs.
However, from then on, new runs are skipped unless there are fewer than 3 active runs.
This value cannot exceed 1000. Setting this value to `0` causes all new runs to be skipped.
"""
name: VariableOrOptional[str] name: VariableOrOptional[str]
""" """
An optional name for the job. The maximum length is 4096 bytes in UTF-8 encoding. An optional name for the job. The maximum length is 4096 bytes in UTF-8 encoding.
""" """
notification_settings: VariableOrOptional[JobNotificationSettingsParam]
"""
Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job.
"""
parameters: VariableOrList[JobParameterDefinitionParam]
"""
Job-level parameter definitions
"""
permissions: VariableOrList[PermissionParam]
queue: VariableOrOptional[QueueSettingsParam]
"""
The queue settings of the job.
"""
run_as: VariableOrOptional[JobRunAsParam]
schedule: VariableOrOptional[CronScheduleParam]
"""
An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking Run Now in the Jobs UI or sending an API request to `runNow`.
"""
tags: VariableOrDict[str]
"""
A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job.
"""
tasks: VariableOrList[TaskParam]
"""
A list of task specifications to be executed by this job.
"""
timeout_seconds: VariableOrOptional[int]
"""
An optional timeout applied to each run of this job. A value of `0` means no timeout.
"""
trigger: VariableOrOptional[TriggerSettingsParam]
"""
A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking Run Now in the Jobs UI or sending an API request to `runNow`.
"""
webhook_notifications: VariableOrOptional[WebhookNotificationsParam]
"""
A collection of system notification IDs to notify when runs of this job begin or complete.
"""
JobParam = JobDict | Job JobParam = JobDict | Job

View File

@ -0,0 +1,54 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.compute._models.cluster_spec import (
ClusterSpec,
ClusterSpecParam,
)
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOr
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class JobCluster:
""""""
job_cluster_key: VariableOr[str]
"""
A unique name for the job cluster. This field is required and must be unique within the job.
`JobTaskSettings` may refer to this field to determine which cluster to launch for the task execution.
"""
new_cluster: VariableOr[ClusterSpec]
"""
If new_cluster, a description of a cluster that is created for each task.
"""
@classmethod
def from_dict(cls, value: "JobClusterDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "JobClusterDict":
return _transform_to_json_value(self) # type:ignore
class JobClusterDict(TypedDict, total=False):
""""""
job_cluster_key: VariableOr[str]
"""
A unique name for the job cluster. This field is required and must be unique within the job.
`JobTaskSettings` may refer to this field to determine which cluster to launch for the task execution.
"""
new_cluster: VariableOr[ClusterSpecParam]
"""
If new_cluster, a description of a cluster that is created for each task.
"""
JobClusterParam = JobClusterDict | JobCluster

View File

@ -0,0 +1,96 @@
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOrList, VariableOrOptional
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class JobEmailNotifications:
""""""
no_alert_for_skipped_runs: VariableOrOptional[bool] = None
"""
If true, do not send email to recipients specified in `on_failure` if the run is skipped.
This field is `deprecated`. Please use the `notification_settings.no_alert_for_skipped_runs` field.
"""
on_duration_warning_threshold_exceeded: VariableOrList[str] = field(
default_factory=list
)
"""
A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent.
"""
on_failure: VariableOrList[str] = field(default_factory=list)
"""
A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.
"""
on_start: VariableOrList[str] = field(default_factory=list)
"""
A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.
"""
on_streaming_backlog_exceeded: VariableOrList[str] = field(default_factory=list)
"""
A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream.
Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`.
Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes.
"""
on_success: VariableOrList[str] = field(default_factory=list)
"""
A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.
"""
@classmethod
def from_dict(cls, value: "JobEmailNotificationsDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "JobEmailNotificationsDict":
return _transform_to_json_value(self) # type:ignore
class JobEmailNotificationsDict(TypedDict, total=False):
""""""
no_alert_for_skipped_runs: VariableOrOptional[bool]
"""
If true, do not send email to recipients specified in `on_failure` if the run is skipped.
This field is `deprecated`. Please use the `notification_settings.no_alert_for_skipped_runs` field.
"""
on_duration_warning_threshold_exceeded: VariableOrList[str]
"""
A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent.
"""
on_failure: VariableOrList[str]
"""
A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.
"""
on_start: VariableOrList[str]
"""
A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.
"""
on_streaming_backlog_exceeded: VariableOrList[str]
"""
A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream.
Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`.
Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes.
"""
on_success: VariableOrList[str]
"""
A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.
"""
JobEmailNotificationsParam = JobEmailNotificationsDict | JobEmailNotifications

View File

@ -0,0 +1,43 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.compute._models.environment import Environment, EnvironmentParam
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOr, VariableOrOptional
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class JobEnvironment:
""""""
environment_key: VariableOr[str]
"""
The key of an environment. It has to be unique within a job.
"""
spec: VariableOrOptional[Environment] = None
@classmethod
def from_dict(cls, value: "JobEnvironmentDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "JobEnvironmentDict":
return _transform_to_json_value(self) # type:ignore
class JobEnvironmentDict(TypedDict, total=False):
""""""
environment_key: VariableOr[str]
"""
The key of an environment. It has to be unique within a job.
"""
spec: VariableOrOptional[EnvironmentParam]
JobEnvironmentParam = JobEnvironmentDict | JobEnvironment

View File

@ -0,0 +1,48 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOrOptional
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class JobNotificationSettings:
""""""
no_alert_for_canceled_runs: VariableOrOptional[bool] = None
"""
If true, do not send notifications to recipients specified in `on_failure` if the run is canceled.
"""
no_alert_for_skipped_runs: VariableOrOptional[bool] = None
"""
If true, do not send notifications to recipients specified in `on_failure` if the run is skipped.
"""
@classmethod
def from_dict(cls, value: "JobNotificationSettingsDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "JobNotificationSettingsDict":
return _transform_to_json_value(self) # type:ignore
class JobNotificationSettingsDict(TypedDict, total=False):
""""""
no_alert_for_canceled_runs: VariableOrOptional[bool]
"""
If true, do not send notifications to recipients specified in `on_failure` if the run is canceled.
"""
no_alert_for_skipped_runs: VariableOrOptional[bool]
"""
If true, do not send notifications to recipients specified in `on_failure` if the run is skipped.
"""
JobNotificationSettingsParam = JobNotificationSettingsDict | JobNotificationSettings

View File

@ -0,0 +1,48 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOr
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class JobParameterDefinition:
""""""
default: VariableOr[str]
"""
Default value of the parameter.
"""
name: VariableOr[str]
"""
The name of the defined parameter. May only contain alphanumeric characters, `_`, `-`, and `.`
"""
@classmethod
def from_dict(cls, value: "JobParameterDefinitionDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "JobParameterDefinitionDict":
return _transform_to_json_value(self) # type:ignore
class JobParameterDefinitionDict(TypedDict, total=False):
""""""
default: VariableOr[str]
"""
Default value of the parameter.
"""
name: VariableOr[str]
"""
The name of the defined parameter. May only contain alphanumeric characters, `_`, `-`, and `.`
"""
JobParameterDefinitionParam = JobParameterDefinitionDict | JobParameterDefinition

View File

@ -0,0 +1,63 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOrOptional
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class JobRunAs:
"""
Write-only setting. Specifies the user or service principal that the job runs as. If not specified, the job runs as the user who created the job.
Either `user_name` or `service_principal_name` should be specified. If not, an error is thrown.
"""
service_principal_name: VariableOrOptional[str] = None
"""
Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role.
"""
user_name: VariableOrOptional[str] = None
"""
The email of an active workspace user. Non-admin users can only set this field to their own email.
"""
def __post_init__(self):
union_fields = [
self.user_name,
self.service_principal_name,
]
if sum(f is not None for f in union_fields) != 1:
raise ValueError(
"JobRunAs must specify exactly one of 'user_name', 'service_principal_name'"
)
@classmethod
def from_dict(cls, value: "JobRunAsDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "JobRunAsDict":
return _transform_to_json_value(self) # type:ignore
class JobRunAsDict(TypedDict, total=False):
""""""
service_principal_name: VariableOrOptional[str]
"""
Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role.
"""
user_name: VariableOrOptional[str]
"""
The email of an active workspace user. Non-admin users can only set this field to their own email.
"""
JobRunAsParam = JobRunAsDict | JobRunAs

View File

@ -0,0 +1,32 @@
from enum import Enum
from typing import Literal
class JobsHealthMetric(Enum):
"""
Specifies the health metric that is being evaluated for a particular health rule.
* `RUN_DURATION_SECONDS`: Expected total time for a run in seconds.
* `STREAMING_BACKLOG_BYTES`: An estimate of the maximum bytes of data waiting to be consumed across all streams. This metric is in Public Preview.
* `STREAMING_BACKLOG_RECORDS`: An estimate of the maximum offset lag across all streams. This metric is in Public Preview.
* `STREAMING_BACKLOG_SECONDS`: An estimate of the maximum consumer delay across all streams. This metric is in Public Preview.
* `STREAMING_BACKLOG_FILES`: An estimate of the maximum number of outstanding files across all streams. This metric is in Public Preview.
"""
RUN_DURATION_SECONDS = "RUN_DURATION_SECONDS"
STREAMING_BACKLOG_BYTES = "STREAMING_BACKLOG_BYTES"
STREAMING_BACKLOG_RECORDS = "STREAMING_BACKLOG_RECORDS"
STREAMING_BACKLOG_SECONDS = "STREAMING_BACKLOG_SECONDS"
STREAMING_BACKLOG_FILES = "STREAMING_BACKLOG_FILES"
JobsHealthMetricParam = (
Literal[
"RUN_DURATION_SECONDS",
"STREAMING_BACKLOG_BYTES",
"STREAMING_BACKLOG_RECORDS",
"STREAMING_BACKLOG_SECONDS",
"STREAMING_BACKLOG_FILES",
]
| JobsHealthMetric
)

View File

@ -0,0 +1,13 @@
from enum import Enum
from typing import Literal
class JobsHealthOperator(Enum):
"""
Specifies the operator used to compare the health metric value with the specified threshold.
"""
GREATER_THAN = "GREATER_THAN"
JobsHealthOperatorParam = Literal["GREATER_THAN"] | JobsHealthOperator

View File

@ -0,0 +1,54 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOr
from databricks.bundles.jobs._models.jobs_health_metric import (
JobsHealthMetric,
JobsHealthMetricParam,
)
from databricks.bundles.jobs._models.jobs_health_operator import (
JobsHealthOperator,
JobsHealthOperatorParam,
)
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class JobsHealthRule:
""""""
metric: VariableOr[JobsHealthMetric]
op: VariableOr[JobsHealthOperator]
value: VariableOr[int]
"""
Specifies the threshold value that the health metric should obey to satisfy the health rule.
"""
@classmethod
def from_dict(cls, value: "JobsHealthRuleDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "JobsHealthRuleDict":
return _transform_to_json_value(self) # type:ignore
class JobsHealthRuleDict(TypedDict, total=False):
""""""
metric: VariableOr[JobsHealthMetricParam]
op: VariableOr[JobsHealthOperatorParam]
value: VariableOr[int]
"""
Specifies the threshold value that the health metric should obey to satisfy the health rule.
"""
JobsHealthRuleParam = JobsHealthRuleDict | JobsHealthRule

View File

@ -0,0 +1,38 @@
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOrList
from databricks.bundles.jobs._models.jobs_health_rule import (
JobsHealthRule,
JobsHealthRuleParam,
)
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class JobsHealthRules:
"""
An optional set of health rules that can be defined for this job.
"""
rules: VariableOrList[JobsHealthRule] = field(default_factory=list)
@classmethod
def from_dict(cls, value: "JobsHealthRulesDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "JobsHealthRulesDict":
return _transform_to_json_value(self) # type:ignore
class JobsHealthRulesDict(TypedDict, total=False):
""""""
rules: VariableOrList[JobsHealthRuleParam]
JobsHealthRulesParam = JobsHealthRulesDict | JobsHealthRules

View File

@ -0,0 +1,107 @@
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import (
VariableOr,
VariableOrDict,
VariableOrOptional,
)
from databricks.bundles.jobs._models.source import Source, SourceParam
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class NotebookTask:
""""""
notebook_path: VariableOr[str]
"""
The path of the notebook to be run in the Databricks workspace or remote repository.
For notebooks stored in the Databricks workspace, the path must be absolute and begin with a slash.
For notebooks stored in a remote repository, the path must be relative. This field is required.
"""
base_parameters: VariableOrDict[str] = field(default_factory=dict)
"""
Base parameters to be used for each run of this job. If the run is initiated by a call to :method:jobs/run
Now with parameters specified, the two parameters maps are merged. If the same key is specified in
`base_parameters` and in `run-now`, the value from `run-now` is used.
Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.
If the notebook takes a parameter that is not specified in the jobs `base_parameters` or the `run-now` override parameters,
the default value from the notebook is used.
Retrieve these parameters in a notebook using [dbutils.widgets.get](https://docs.databricks.com/dev-tools/databricks-utils.html#dbutils-widgets).
The JSON representation of this field cannot exceed 1MB.
"""
source: VariableOrOptional[Source] = None
"""
Optional location type of the notebook. When set to `WORKSPACE`, the notebook will be retrieved from the local Databricks workspace. When set to `GIT`, the notebook will be retrieved from a Git repository
defined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.
* `WORKSPACE`: Notebook is located in Databricks workspace.
* `GIT`: Notebook is located in cloud Git provider.
"""
warehouse_id: VariableOrOptional[str] = None
"""
Optional `warehouse_id` to run the notebook on a SQL warehouse. Classic SQL warehouses are NOT supported, please use serverless or pro SQL warehouses.
Note that SQL warehouses only support SQL cells; if the notebook contains non-SQL cells, the run will fail.
"""
@classmethod
def from_dict(cls, value: "NotebookTaskDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "NotebookTaskDict":
return _transform_to_json_value(self) # type:ignore
class NotebookTaskDict(TypedDict, total=False):
""""""
notebook_path: VariableOr[str]
"""
The path of the notebook to be run in the Databricks workspace or remote repository.
For notebooks stored in the Databricks workspace, the path must be absolute and begin with a slash.
For notebooks stored in a remote repository, the path must be relative. This field is required.
"""
base_parameters: VariableOrDict[str]
"""
Base parameters to be used for each run of this job. If the run is initiated by a call to :method:jobs/run
Now with parameters specified, the two parameters maps are merged. If the same key is specified in
`base_parameters` and in `run-now`, the value from `run-now` is used.
Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.
If the notebook takes a parameter that is not specified in the jobs `base_parameters` or the `run-now` override parameters,
the default value from the notebook is used.
Retrieve these parameters in a notebook using [dbutils.widgets.get](https://docs.databricks.com/dev-tools/databricks-utils.html#dbutils-widgets).
The JSON representation of this field cannot exceed 1MB.
"""
source: VariableOrOptional[SourceParam]
"""
Optional location type of the notebook. When set to `WORKSPACE`, the notebook will be retrieved from the local Databricks workspace. When set to `GIT`, the notebook will be retrieved from a Git repository
defined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.
* `WORKSPACE`: Notebook is located in Databricks workspace.
* `GIT`: Notebook is located in cloud Git provider.
"""
warehouse_id: VariableOrOptional[str]
"""
Optional `warehouse_id` to run the notebook on a SQL warehouse. Classic SQL warehouses are NOT supported, please use serverless or pro SQL warehouses.
Note that SQL warehouses only support SQL cells; if the notebook contains non-SQL cells, the run will fail.
"""
NotebookTaskParam = NotebookTaskDict | NotebookTask

View File

@ -0,0 +1,10 @@
from enum import Enum
from typing import Literal
class PauseStatus(Enum):
UNPAUSED = "UNPAUSED"
PAUSED = "PAUSED"
PauseStatusParam = Literal["UNPAUSED", "PAUSED"] | PauseStatus

View File

@ -0,0 +1,54 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOr
from databricks.bundles.jobs._models.periodic_trigger_configuration_time_unit import (
PeriodicTriggerConfigurationTimeUnit,
PeriodicTriggerConfigurationTimeUnitParam,
)
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class PeriodicTriggerConfiguration:
""""""
interval: VariableOr[int]
"""
The interval at which the trigger should run.
"""
unit: VariableOr[PeriodicTriggerConfigurationTimeUnit]
"""
The unit of time for the interval.
"""
@classmethod
def from_dict(cls, value: "PeriodicTriggerConfigurationDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "PeriodicTriggerConfigurationDict":
return _transform_to_json_value(self) # type:ignore
class PeriodicTriggerConfigurationDict(TypedDict, total=False):
""""""
interval: VariableOr[int]
"""
The interval at which the trigger should run.
"""
unit: VariableOr[PeriodicTriggerConfigurationTimeUnitParam]
"""
The unit of time for the interval.
"""
PeriodicTriggerConfigurationParam = (
PeriodicTriggerConfigurationDict | PeriodicTriggerConfiguration
)

View File

@ -0,0 +1,13 @@
from enum import Enum
from typing import Literal
class PeriodicTriggerConfigurationTimeUnit(Enum):
HOURS = "HOURS"
DAYS = "DAYS"
WEEKS = "WEEKS"
PeriodicTriggerConfigurationTimeUnitParam = (
Literal["HOURS", "DAYS", "WEEKS"] | PeriodicTriggerConfigurationTimeUnit
)

View File

@ -0,0 +1,80 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOr, VariableOrOptional
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class Permission:
""""""
level: VariableOr[str]
"""
The allowed permission for user, group, service principal defined for this permission.
"""
group_name: VariableOrOptional[str] = None
"""
The name of the group that has the permission set in level.
"""
service_principal_name: VariableOrOptional[str] = None
"""
The name of the service principal that has the permission set in level.
"""
user_name: VariableOrOptional[str] = None
"""
The name of the user that has the permission set in level.
"""
def __post_init__(self):
union_fields = [
self.user_name,
self.service_principal_name,
self.group_name,
]
if sum(f is not None for f in union_fields) != 1:
raise ValueError(
"Permission must specify exactly one of 'user_name', 'service_principal_name', 'group_name'"
)
@classmethod
def from_dict(cls, value: "PermissionDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "PermissionDict":
return _transform_to_json_value(self) # type:ignore
class PermissionDict(TypedDict, total=False):
""""""
level: VariableOr[str]
"""
The allowed permission for user, group, service principal defined for this permission.
"""
group_name: VariableOrOptional[str]
"""
The name of the group that has the permission set in level.
"""
service_principal_name: VariableOrOptional[str]
"""
The name of the service principal that has the permission set in level.
"""
user_name: VariableOrOptional[str]
"""
The name of the user that has the permission set in level.
"""
PermissionParam = PermissionDict | Permission

View File

@ -0,0 +1,48 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOr, VariableOrOptional
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class PipelineTask:
""""""
pipeline_id: VariableOr[str]
"""
The full name of the pipeline task to execute.
"""
full_refresh: VariableOrOptional[bool] = None
"""
If true, triggers a full refresh on the delta live table.
"""
@classmethod
def from_dict(cls, value: "PipelineTaskDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "PipelineTaskDict":
return _transform_to_json_value(self) # type:ignore
class PipelineTaskDict(TypedDict, total=False):
""""""
pipeline_id: VariableOr[str]
"""
The full name of the pipeline task to execute.
"""
full_refresh: VariableOrOptional[bool]
"""
If true, triggers a full refresh on the delta live table.
"""
PipelineTaskParam = PipelineTaskDict | PipelineTask

View File

@ -0,0 +1,68 @@
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOr, VariableOrDict, VariableOrList
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class PythonWheelTask:
""""""
entry_point: VariableOr[str]
"""
Named entry point to use, if it does not exist in the metadata of the package it executes the function from the package directly using `$packageName.$entryPoint()`
"""
package_name: VariableOr[str]
"""
Name of the package to execute
"""
named_parameters: VariableOrDict[str] = field(default_factory=dict)
"""
Command-line parameters passed to Python wheel task in the form of `["--name=task", "--data=dbfs:/path/to/data.json"]`. Leave it empty if `parameters` is not null.
"""
parameters: VariableOrList[str] = field(default_factory=list)
"""
Command-line parameters passed to Python wheel task. Leave it empty if `named_parameters` is not null.
"""
@classmethod
def from_dict(cls, value: "PythonWheelTaskDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "PythonWheelTaskDict":
return _transform_to_json_value(self) # type:ignore
class PythonWheelTaskDict(TypedDict, total=False):
""""""
entry_point: VariableOr[str]
"""
Named entry point to use, if it does not exist in the metadata of the package it executes the function from the package directly using `$packageName.$entryPoint()`
"""
package_name: VariableOr[str]
"""
Name of the package to execute
"""
named_parameters: VariableOrDict[str]
"""
Command-line parameters passed to Python wheel task in the form of `["--name=task", "--data=dbfs:/path/to/data.json"]`. Leave it empty if `parameters` is not null.
"""
parameters: VariableOrList[str]
"""
Command-line parameters passed to Python wheel task. Leave it empty if `named_parameters` is not null.
"""
PythonWheelTaskParam = PythonWheelTaskDict | PythonWheelTask

View File

@ -0,0 +1,38 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOr
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class QueueSettings:
""""""
enabled: VariableOr[bool]
"""
If true, enable queueing for the job. This is a required field.
"""
@classmethod
def from_dict(cls, value: "QueueSettingsDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "QueueSettingsDict":
return _transform_to_json_value(self) # type:ignore
class QueueSettingsDict(TypedDict, total=False):
""""""
enabled: VariableOr[bool]
"""
If true, enable queueing for the job. This is a required field.
"""
QueueSettingsParam = QueueSettingsDict | QueueSettings

View File

@ -0,0 +1,36 @@
from enum import Enum
from typing import Literal
class RunIf(Enum):
"""
An optional value indicating the condition that determines whether the task should be run once its dependencies have been completed. When omitted, defaults to `ALL_SUCCESS`.
Possible values are:
* `ALL_SUCCESS`: All dependencies have executed and succeeded
* `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded
* `NONE_FAILED`: None of the dependencies have failed and at least one was executed
* `ALL_DONE`: All dependencies have been completed
* `AT_LEAST_ONE_FAILED`: At least one dependency failed
* `ALL_FAILED`: ALl dependencies have failed
"""
ALL_SUCCESS = "ALL_SUCCESS"
ALL_DONE = "ALL_DONE"
NONE_FAILED = "NONE_FAILED"
AT_LEAST_ONE_SUCCESS = "AT_LEAST_ONE_SUCCESS"
ALL_FAILED = "ALL_FAILED"
AT_LEAST_ONE_FAILED = "AT_LEAST_ONE_FAILED"
RunIfParam = (
Literal[
"ALL_SUCCESS",
"ALL_DONE",
"NONE_FAILED",
"AT_LEAST_ONE_SUCCESS",
"ALL_FAILED",
"AT_LEAST_ONE_FAILED",
]
| RunIf
)

View File

@ -0,0 +1,48 @@
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOr, VariableOrDict
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class RunJobTask:
""""""
job_id: VariableOr[int]
"""
ID of the job to trigger.
"""
job_parameters: VariableOrDict[str] = field(default_factory=dict)
"""
Job-level parameters used to trigger the job.
"""
@classmethod
def from_dict(cls, value: "RunJobTaskDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "RunJobTaskDict":
return _transform_to_json_value(self) # type:ignore
class RunJobTaskDict(TypedDict, total=False):
""""""
job_id: VariableOr[int]
"""
ID of the job to trigger.
"""
job_parameters: VariableOrDict[str]
"""
Job-level parameters used to trigger the job.
"""
RunJobTaskParam = RunJobTaskDict | RunJobTask

View File

@ -0,0 +1,19 @@
from enum import Enum
from typing import Literal
class Source(Enum):
"""
Optional location type of the SQL file. When set to `WORKSPACE`, the SQL file will be retrieved\
from the local Databricks workspace. When set to `GIT`, the SQL file will be retrieved from a Git repository
defined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.
* `WORKSPACE`: SQL file is located in Databricks workspace.
* `GIT`: SQL file is located in cloud Git provider.
"""
WORKSPACE = "WORKSPACE"
GIT = "GIT"
SourceParam = Literal["WORKSPACE", "GIT"] | Source

View File

@ -0,0 +1,70 @@
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import (
VariableOr,
VariableOrList,
VariableOrOptional,
)
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class SparkJarTask:
""""""
main_class_name: VariableOr[str]
"""
The full name of the class containing the main method to be executed. This class must be contained in a JAR provided as a library.
The code must use `SparkContext.getOrCreate` to obtain a Spark context; otherwise, runs of the job fail.
"""
jar_uri: VariableOrOptional[str] = None
"""
Deprecated since 04/2016. Provide a `jar` through the `libraries` field instead. For an example, see :method:jobs/create.
"""
parameters: VariableOrList[str] = field(default_factory=list)
"""
Parameters passed to the main method.
Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.
"""
@classmethod
def from_dict(cls, value: "SparkJarTaskDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "SparkJarTaskDict":
return _transform_to_json_value(self) # type:ignore
class SparkJarTaskDict(TypedDict, total=False):
""""""
main_class_name: VariableOr[str]
"""
The full name of the class containing the main method to be executed. This class must be contained in a JAR provided as a library.
The code must use `SparkContext.getOrCreate` to obtain a Spark context; otherwise, runs of the job fail.
"""
jar_uri: VariableOrOptional[str]
"""
Deprecated since 04/2016. Provide a `jar` through the `libraries` field instead. For an example, see :method:jobs/create.
"""
parameters: VariableOrList[str]
"""
Parameters passed to the main method.
Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.
"""
SparkJarTaskParam = SparkJarTaskDict | SparkJarTask

View File

@ -0,0 +1,77 @@
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import (
VariableOr,
VariableOrList,
VariableOrOptional,
)
from databricks.bundles.jobs._models.source import Source, SourceParam
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class SparkPythonTask:
""""""
python_file: VariableOr[str]
"""
The Python file to be executed. Cloud file URIs (such as dbfs:/, s3:/, adls:/, gcs:/) and workspace paths are supported. For python files stored in the Databricks workspace, the path must be absolute and begin with `/`. For files stored in a remote repository, the path must be relative. This field is required.
"""
parameters: VariableOrList[str] = field(default_factory=list)
"""
Command line parameters passed to the Python file.
Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.
"""
source: VariableOrOptional[Source] = None
"""
Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved from the local
Databricks workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`,
the Python file will be retrieved from a Git repository defined in `git_source`.
* `WORKSPACE`: The Python file is located in a Databricks workspace or at a cloud filesystem URI.
* `GIT`: The Python file is located in a remote Git repository.
"""
@classmethod
def from_dict(cls, value: "SparkPythonTaskDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "SparkPythonTaskDict":
return _transform_to_json_value(self) # type:ignore
class SparkPythonTaskDict(TypedDict, total=False):
""""""
python_file: VariableOr[str]
"""
The Python file to be executed. Cloud file URIs (such as dbfs:/, s3:/, adls:/, gcs:/) and workspace paths are supported. For python files stored in the Databricks workspace, the path must be absolute and begin with `/`. For files stored in a remote repository, the path must be relative. This field is required.
"""
parameters: VariableOrList[str]
"""
Command line parameters passed to the Python file.
Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.
"""
source: VariableOrOptional[SourceParam]
"""
Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved from the local
Databricks workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`,
the Python file will be retrieved from a Git repository defined in `git_source`.
* `WORKSPACE`: The Python file is located in a Databricks workspace or at a cloud filesystem URI.
* `GIT`: The Python file is located in a remote Git repository.
"""
SparkPythonTaskParam = SparkPythonTaskDict | SparkPythonTask

View File

@ -0,0 +1,42 @@
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOrList
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class SparkSubmitTask:
""""""
parameters: VariableOrList[str] = field(default_factory=list)
"""
Command-line parameters passed to spark submit.
Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.
"""
@classmethod
def from_dict(cls, value: "SparkSubmitTaskDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "SparkSubmitTaskDict":
return _transform_to_json_value(self) # type:ignore
class SparkSubmitTaskDict(TypedDict, total=False):
""""""
parameters: VariableOrList[str]
"""
Command-line parameters passed to spark submit.
Use [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.
"""
SparkSubmitTaskParam = SparkSubmitTaskDict | SparkSubmitTask

View File

@ -0,0 +1,105 @@
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import (
VariableOr,
VariableOrDict,
VariableOrOptional,
)
from databricks.bundles.jobs._models.sql_task_alert import (
SqlTaskAlert,
SqlTaskAlertParam,
)
from databricks.bundles.jobs._models.sql_task_dashboard import (
SqlTaskDashboard,
SqlTaskDashboardParam,
)
from databricks.bundles.jobs._models.sql_task_file import SqlTaskFile, SqlTaskFileParam
from databricks.bundles.jobs._models.sql_task_query import (
SqlTaskQuery,
SqlTaskQueryParam,
)
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class SqlTask:
""""""
warehouse_id: VariableOr[str]
"""
The canonical identifier of the SQL warehouse. Recommended to use with serverless or pro SQL warehouses. Classic SQL warehouses are only supported for SQL alert, dashboard and query tasks and are limited to scheduled single-task jobs.
"""
alert: VariableOrOptional[SqlTaskAlert] = None
"""
If alert, indicates that this job must refresh a SQL alert.
"""
dashboard: VariableOrOptional[SqlTaskDashboard] = None
"""
If dashboard, indicates that this job must refresh a SQL dashboard.
"""
file: VariableOrOptional[SqlTaskFile] = None
"""
If file, indicates that this job runs a SQL file in a remote Git repository.
"""
parameters: VariableOrDict[str] = field(default_factory=dict)
"""
Parameters to be used for each run of this job. The SQL alert task does not support custom parameters.
"""
query: VariableOrOptional[SqlTaskQuery] = None
"""
If query, indicates that this job must execute a SQL query.
"""
@classmethod
def from_dict(cls, value: "SqlTaskDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "SqlTaskDict":
return _transform_to_json_value(self) # type:ignore
class SqlTaskDict(TypedDict, total=False):
""""""
warehouse_id: VariableOr[str]
"""
The canonical identifier of the SQL warehouse. Recommended to use with serverless or pro SQL warehouses. Classic SQL warehouses are only supported for SQL alert, dashboard and query tasks and are limited to scheduled single-task jobs.
"""
alert: VariableOrOptional[SqlTaskAlertParam]
"""
If alert, indicates that this job must refresh a SQL alert.
"""
dashboard: VariableOrOptional[SqlTaskDashboardParam]
"""
If dashboard, indicates that this job must refresh a SQL dashboard.
"""
file: VariableOrOptional[SqlTaskFileParam]
"""
If file, indicates that this job runs a SQL file in a remote Git repository.
"""
parameters: VariableOrDict[str]
"""
Parameters to be used for each run of this job. The SQL alert task does not support custom parameters.
"""
query: VariableOrOptional[SqlTaskQueryParam]
"""
If query, indicates that this job must execute a SQL query.
"""
SqlTaskParam = SqlTaskDict | SqlTask

View File

@ -0,0 +1,66 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import (
VariableOr,
VariableOrList,
VariableOrOptional,
)
from databricks.bundles.jobs._models.sql_task_subscription import (
SqlTaskSubscription,
SqlTaskSubscriptionParam,
)
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class SqlTaskAlert:
""""""
alert_id: VariableOr[str]
"""
The canonical identifier of the SQL alert.
"""
subscriptions: VariableOrList[SqlTaskSubscription]
"""
If specified, alert notifications are sent to subscribers.
"""
pause_subscriptions: VariableOrOptional[bool] = None
"""
If true, the alert notifications are not sent to subscribers.
"""
@classmethod
def from_dict(cls, value: "SqlTaskAlertDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "SqlTaskAlertDict":
return _transform_to_json_value(self) # type:ignore
class SqlTaskAlertDict(TypedDict, total=False):
""""""
alert_id: VariableOr[str]
"""
The canonical identifier of the SQL alert.
"""
subscriptions: VariableOrList[SqlTaskSubscriptionParam]
"""
If specified, alert notifications are sent to subscribers.
"""
pause_subscriptions: VariableOrOptional[bool]
"""
If true, the alert notifications are not sent to subscribers.
"""
SqlTaskAlertParam = SqlTaskAlertDict | SqlTaskAlert

View File

@ -0,0 +1,76 @@
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import (
VariableOr,
VariableOrList,
VariableOrOptional,
)
from databricks.bundles.jobs._models.sql_task_subscription import (
SqlTaskSubscription,
SqlTaskSubscriptionParam,
)
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class SqlTaskDashboard:
""""""
dashboard_id: VariableOr[str]
"""
The canonical identifier of the SQL dashboard.
"""
custom_subject: VariableOrOptional[str] = None
"""
Subject of the email sent to subscribers of this task.
"""
pause_subscriptions: VariableOrOptional[bool] = None
"""
If true, the dashboard snapshot is not taken, and emails are not sent to subscribers.
"""
subscriptions: VariableOrList[SqlTaskSubscription] = field(default_factory=list)
"""
If specified, dashboard snapshots are sent to subscriptions.
"""
@classmethod
def from_dict(cls, value: "SqlTaskDashboardDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "SqlTaskDashboardDict":
return _transform_to_json_value(self) # type:ignore
class SqlTaskDashboardDict(TypedDict, total=False):
""""""
dashboard_id: VariableOr[str]
"""
The canonical identifier of the SQL dashboard.
"""
custom_subject: VariableOrOptional[str]
"""
Subject of the email sent to subscribers of this task.
"""
pause_subscriptions: VariableOrOptional[bool]
"""
If true, the dashboard snapshot is not taken, and emails are not sent to subscribers.
"""
subscriptions: VariableOrList[SqlTaskSubscriptionParam]
"""
If specified, dashboard snapshots are sent to subscriptions.
"""
SqlTaskDashboardParam = SqlTaskDashboardDict | SqlTaskDashboard

View File

@ -0,0 +1,59 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOr, VariableOrOptional
from databricks.bundles.jobs._models.source import Source, SourceParam
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class SqlTaskFile:
""""""
path: VariableOr[str]
"""
Path of the SQL file. Must be relative if the source is a remote Git repository and absolute for workspace paths.
"""
source: VariableOrOptional[Source] = None
"""
Optional location type of the SQL file. When set to `WORKSPACE`, the SQL file will be retrieved
from the local Databricks workspace. When set to `GIT`, the SQL file will be retrieved from a Git repository
defined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.
* `WORKSPACE`: SQL file is located in Databricks workspace.
* `GIT`: SQL file is located in cloud Git provider.
"""
@classmethod
def from_dict(cls, value: "SqlTaskFileDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "SqlTaskFileDict":
return _transform_to_json_value(self) # type:ignore
class SqlTaskFileDict(TypedDict, total=False):
""""""
path: VariableOr[str]
"""
Path of the SQL file. Must be relative if the source is a remote Git repository and absolute for workspace paths.
"""
source: VariableOrOptional[SourceParam]
"""
Optional location type of the SQL file. When set to `WORKSPACE`, the SQL file will be retrieved
from the local Databricks workspace. When set to `GIT`, the SQL file will be retrieved from a Git repository
defined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.
* `WORKSPACE`: SQL file is located in Databricks workspace.
* `GIT`: SQL file is located in cloud Git provider.
"""
SqlTaskFileParam = SqlTaskFileDict | SqlTaskFile

View File

@ -0,0 +1,38 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOr
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class SqlTaskQuery:
""""""
query_id: VariableOr[str]
"""
The canonical identifier of the SQL query.
"""
@classmethod
def from_dict(cls, value: "SqlTaskQueryDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "SqlTaskQueryDict":
return _transform_to_json_value(self) # type:ignore
class SqlTaskQueryDict(TypedDict, total=False):
""""""
query_id: VariableOr[str]
"""
The canonical identifier of the SQL query.
"""
SqlTaskQueryParam = SqlTaskQueryDict | SqlTaskQuery

View File

@ -0,0 +1,48 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOrOptional
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class SqlTaskSubscription:
""""""
destination_id: VariableOrOptional[str] = None
"""
The canonical identifier of the destination to receive email notification. This parameter is mutually exclusive with user_name. You cannot set both destination_id and user_name for subscription notifications.
"""
user_name: VariableOrOptional[str] = None
"""
The user name to receive the subscription email. This parameter is mutually exclusive with destination_id. You cannot set both destination_id and user_name for subscription notifications.
"""
@classmethod
def from_dict(cls, value: "SqlTaskSubscriptionDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "SqlTaskSubscriptionDict":
return _transform_to_json_value(self) # type:ignore
class SqlTaskSubscriptionDict(TypedDict, total=False):
""""""
destination_id: VariableOrOptional[str]
"""
The canonical identifier of the destination to receive email notification. This parameter is mutually exclusive with user_name. You cannot set both destination_id and user_name for subscription notifications.
"""
user_name: VariableOrOptional[str]
"""
The user name to receive the subscription email. This parameter is mutually exclusive with destination_id. You cannot set both destination_id and user_name for subscription notifications.
"""
SqlTaskSubscriptionParam = SqlTaskSubscriptionDict | SqlTaskSubscription

View File

@ -0,0 +1,77 @@
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOrList, VariableOrOptional
from databricks.bundles.jobs._models.condition import Condition, ConditionParam
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class TableUpdateTriggerConfiguration:
""""""
condition: VariableOrOptional[Condition] = None
"""
The table(s) condition based on which to trigger a job run.
"""
min_time_between_triggers_seconds: VariableOrOptional[int] = None
"""
If set, the trigger starts a run only after the specified amount of time has passed since
the last time the trigger fired. The minimum allowed value is 60 seconds.
"""
table_names: VariableOrList[str] = field(default_factory=list)
"""
A list of Delta tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`.
"""
wait_after_last_change_seconds: VariableOrOptional[int] = None
"""
If set, the trigger starts a run only after no table updates have occurred for the specified time
and can be used to wait for a series of table updates before triggering a run. The
minimum allowed value is 60 seconds.
"""
@classmethod
def from_dict(cls, value: "TableUpdateTriggerConfigurationDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "TableUpdateTriggerConfigurationDict":
return _transform_to_json_value(self) # type:ignore
class TableUpdateTriggerConfigurationDict(TypedDict, total=False):
""""""
condition: VariableOrOptional[ConditionParam]
"""
The table(s) condition based on which to trigger a job run.
"""
min_time_between_triggers_seconds: VariableOrOptional[int]
"""
If set, the trigger starts a run only after the specified amount of time has passed since
the last time the trigger fired. The minimum allowed value is 60 seconds.
"""
table_names: VariableOrList[str]
"""
A list of Delta tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`.
"""
wait_after_last_change_seconds: VariableOrOptional[int]
"""
If set, the trigger starts a run only after no table updates have occurred for the specified time
and can be used to wait for a series of table updates before triggering a run. The
minimum allowed value is 60 seconds.
"""
TableUpdateTriggerConfigurationParam = (
TableUpdateTriggerConfigurationDict | TableUpdateTriggerConfiguration
)

View File

@ -0,0 +1,444 @@
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.compute._models.cluster_spec import (
ClusterSpec,
ClusterSpecParam,
)
from databricks.bundles.compute._models.library import (
Library,
LibraryParam,
)
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import (
VariableOr,
VariableOrList,
VariableOrOptional,
)
from databricks.bundles.jobs._models.condition_task import (
ConditionTask,
ConditionTaskParam,
)
from databricks.bundles.jobs._models.dbt_task import DbtTask, DbtTaskParam
from databricks.bundles.jobs._models.for_each_task import (
ForEachTask,
ForEachTaskParam,
)
from databricks.bundles.jobs._models.jobs_health_rules import (
JobsHealthRules,
JobsHealthRulesParam,
)
from databricks.bundles.jobs._models.notebook_task import (
NotebookTask,
NotebookTaskParam,
)
from databricks.bundles.jobs._models.pipeline_task import (
PipelineTask,
PipelineTaskParam,
)
from databricks.bundles.jobs._models.python_wheel_task import (
PythonWheelTask,
PythonWheelTaskParam,
)
from databricks.bundles.jobs._models.run_if import RunIf, RunIfParam
from databricks.bundles.jobs._models.run_job_task import (
RunJobTask,
RunJobTaskParam,
)
from databricks.bundles.jobs._models.spark_jar_task import (
SparkJarTask,
SparkJarTaskParam,
)
from databricks.bundles.jobs._models.spark_python_task import (
SparkPythonTask,
SparkPythonTaskParam,
)
from databricks.bundles.jobs._models.spark_submit_task import (
SparkSubmitTask,
SparkSubmitTaskParam,
)
from databricks.bundles.jobs._models.sql_task import SqlTask, SqlTaskParam
from databricks.bundles.jobs._models.task_dependency import (
TaskDependency,
TaskDependencyParam,
)
from databricks.bundles.jobs._models.task_email_notifications import (
TaskEmailNotifications,
TaskEmailNotificationsParam,
)
from databricks.bundles.jobs._models.task_notification_settings import (
TaskNotificationSettings,
TaskNotificationSettingsParam,
)
from databricks.bundles.jobs._models.webhook_notifications import (
WebhookNotifications,
WebhookNotificationsParam,
)
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class Task:
""""""
task_key: VariableOr[str]
"""
A unique name for the task. This field is used to refer to this task from other tasks.
This field is required and must be unique within its parent job.
On Update or Reset, this field is used to reference the tasks to be updated or reset.
"""
condition_task: VariableOrOptional[ConditionTask] = None
"""
The task evaluates a condition that can be used to control the execution of other tasks when the `condition_task` field is present.
The condition task does not require a cluster to execute and does not support retries or notifications.
"""
dbt_task: VariableOrOptional[DbtTask] = None
"""
The task runs one or more dbt commands when the `dbt_task` field is present. The dbt task requires both Databricks SQL and the ability to use a serverless or a pro SQL warehouse.
"""
depends_on: VariableOrList[TaskDependency] = field(default_factory=list)
"""
An optional array of objects specifying the dependency graph of the task. All tasks specified in this field must complete before executing this task. The task will run only if the `run_if` condition is true.
The key is `task_key`, and the value is the name assigned to the dependent task.
"""
description: VariableOrOptional[str] = None
"""
An optional description for this task.
"""
disable_auto_optimization: VariableOrOptional[bool] = None
"""
An option to disable auto optimization in serverless
"""
email_notifications: VariableOrOptional[TaskEmailNotifications] = None
"""
An optional set of email addresses that is notified when runs of this task begin or complete as well as when this task is deleted. The default behavior is to not send any emails.
"""
environment_key: VariableOrOptional[str] = None
"""
The key that references an environment spec in a job. This field is required for Python script, Python wheel and dbt tasks when using serverless compute.
"""
existing_cluster_id: VariableOrOptional[str] = None
"""
If existing_cluster_id, the ID of an existing cluster that is used for all runs.
When running jobs or tasks on an existing cluster, you may need to manually restart
the cluster if it stops responding. We suggest running jobs and tasks on new clusters for
greater reliability
"""
for_each_task: VariableOrOptional[ForEachTask] = None
"""
The task executes a nested task for every input provided when the `for_each_task` field is present.
"""
health: VariableOrOptional[JobsHealthRules] = None
job_cluster_key: VariableOrOptional[str] = None
"""
If job_cluster_key, this task is executed reusing the cluster specified in `job.settings.job_clusters`.
"""
libraries: VariableOrList[Library] = field(default_factory=list)
"""
An optional list of libraries to be installed on the cluster.
The default value is an empty list.
"""
max_retries: VariableOrOptional[int] = None
"""
An optional maximum number of times to retry an unsuccessful run. A run is considered to be unsuccessful if it completes with the `FAILED` result_state or `INTERNAL_ERROR` `life_cycle_state`. The value `-1` means to retry indefinitely and the value `0` means to never retry.
"""
min_retry_interval_millis: VariableOrOptional[int] = None
"""
An optional minimal interval in milliseconds between the start of the failed run and the subsequent retry run. The default behavior is that unsuccessful runs are immediately retried.
"""
new_cluster: VariableOrOptional[ClusterSpec] = None
"""
If new_cluster, a description of a new cluster that is created for each run.
"""
notebook_task: VariableOrOptional[NotebookTask] = None
"""
The task runs a notebook when the `notebook_task` field is present.
"""
notification_settings: VariableOrOptional[TaskNotificationSettings] = None
"""
Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this task.
"""
pipeline_task: VariableOrOptional[PipelineTask] = None
"""
The task triggers a pipeline update when the `pipeline_task` field is present. Only pipelines configured to use triggered more are supported.
"""
python_wheel_task: VariableOrOptional[PythonWheelTask] = None
"""
The task runs a Python wheel when the `python_wheel_task` field is present.
"""
retry_on_timeout: VariableOrOptional[bool] = None
"""
An optional policy to specify whether to retry a job when it times out. The default behavior
is to not retry on timeout.
"""
run_if: VariableOrOptional[RunIf] = None
"""
An optional value specifying the condition determining whether the task is run once its dependencies have been completed.
* `ALL_SUCCESS`: All dependencies have executed and succeeded
* `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded
* `NONE_FAILED`: None of the dependencies have failed and at least one was executed
* `ALL_DONE`: All dependencies have been completed
* `AT_LEAST_ONE_FAILED`: At least one dependency failed
* `ALL_FAILED`: ALl dependencies have failed
"""
run_job_task: VariableOrOptional[RunJobTask] = None
"""
The task triggers another job when the `run_job_task` field is present.
"""
spark_jar_task: VariableOrOptional[SparkJarTask] = None
"""
The task runs a JAR when the `spark_jar_task` field is present.
"""
spark_python_task: VariableOrOptional[SparkPythonTask] = None
"""
The task runs a Python file when the `spark_python_task` field is present.
"""
spark_submit_task: VariableOrOptional[SparkSubmitTask] = None
"""
(Legacy) The task runs the spark-submit script when the `spark_submit_task` field is present. This task can run only on new clusters and is not compatible with serverless compute.
In the `new_cluster` specification, `libraries` and `spark_conf` are not supported. Instead, use `--jars` and `--py-files` to add Java and Python libraries and `--conf` to set the Spark configurations.
`master`, `deploy-mode`, and `executor-cores` are automatically configured by Databricks; you _cannot_ specify them in parameters.
By default, the Spark submit job uses all available memory (excluding reserved memory for Databricks services). You can set `--driver-memory`, and `--executor-memory` to a smaller value to leave some room for off-heap usage.
The `--jars`, `--py-files`, `--files` arguments support DBFS and S3 paths.
"""
sql_task: VariableOrOptional[SqlTask] = None
"""
The task runs a SQL query or file, or it refreshes a SQL alert or a legacy SQL dashboard when the `sql_task` field is present.
"""
timeout_seconds: VariableOrOptional[int] = None
"""
An optional timeout applied to each run of this job task. A value of `0` means no timeout.
"""
webhook_notifications: VariableOrOptional[WebhookNotifications] = None
"""
A collection of system notification IDs to notify when runs of this task begin or complete. The default behavior is to not send any system notifications.
"""
def __post_init__(self):
union_fields = [
self.new_cluster,
self.job_cluster_key,
self.environment_key,
self.existing_cluster_id,
]
if sum(f is not None for f in union_fields) > 1:
raise ValueError(
"Only one of 'new_cluster', 'job_cluster_key', 'environment_key', 'existing_cluster_id' can be specified in Task"
)
@classmethod
def from_dict(cls, value: "TaskDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "TaskDict":
return _transform_to_json_value(self) # type:ignore
class TaskDict(TypedDict, total=False):
""""""
task_key: VariableOr[str]
"""
A unique name for the task. This field is used to refer to this task from other tasks.
This field is required and must be unique within its parent job.
On Update or Reset, this field is used to reference the tasks to be updated or reset.
"""
condition_task: VariableOrOptional[ConditionTaskParam]
"""
The task evaluates a condition that can be used to control the execution of other tasks when the `condition_task` field is present.
The condition task does not require a cluster to execute and does not support retries or notifications.
"""
dbt_task: VariableOrOptional[DbtTaskParam]
"""
The task runs one or more dbt commands when the `dbt_task` field is present. The dbt task requires both Databricks SQL and the ability to use a serverless or a pro SQL warehouse.
"""
depends_on: VariableOrList[TaskDependencyParam]
"""
An optional array of objects specifying the dependency graph of the task. All tasks specified in this field must complete before executing this task. The task will run only if the `run_if` condition is true.
The key is `task_key`, and the value is the name assigned to the dependent task.
"""
description: VariableOrOptional[str]
"""
An optional description for this task.
"""
disable_auto_optimization: VariableOrOptional[bool]
"""
An option to disable auto optimization in serverless
"""
email_notifications: VariableOrOptional[TaskEmailNotificationsParam]
"""
An optional set of email addresses that is notified when runs of this task begin or complete as well as when this task is deleted. The default behavior is to not send any emails.
"""
environment_key: VariableOrOptional[str]
"""
The key that references an environment spec in a job. This field is required for Python script, Python wheel and dbt tasks when using serverless compute.
"""
existing_cluster_id: VariableOrOptional[str]
"""
If existing_cluster_id, the ID of an existing cluster that is used for all runs.
When running jobs or tasks on an existing cluster, you may need to manually restart
the cluster if it stops responding. We suggest running jobs and tasks on new clusters for
greater reliability
"""
for_each_task: VariableOrOptional[ForEachTaskParam]
"""
The task executes a nested task for every input provided when the `for_each_task` field is present.
"""
health: VariableOrOptional[JobsHealthRulesParam]
job_cluster_key: VariableOrOptional[str]
"""
If job_cluster_key, this task is executed reusing the cluster specified in `job.settings.job_clusters`.
"""
libraries: VariableOrList[LibraryParam]
"""
An optional list of libraries to be installed on the cluster.
The default value is an empty list.
"""
max_retries: VariableOrOptional[int]
"""
An optional maximum number of times to retry an unsuccessful run. A run is considered to be unsuccessful if it completes with the `FAILED` result_state or `INTERNAL_ERROR` `life_cycle_state`. The value `-1` means to retry indefinitely and the value `0` means to never retry.
"""
min_retry_interval_millis: VariableOrOptional[int]
"""
An optional minimal interval in milliseconds between the start of the failed run and the subsequent retry run. The default behavior is that unsuccessful runs are immediately retried.
"""
new_cluster: VariableOrOptional[ClusterSpecParam]
"""
If new_cluster, a description of a new cluster that is created for each run.
"""
notebook_task: VariableOrOptional[NotebookTaskParam]
"""
The task runs a notebook when the `notebook_task` field is present.
"""
notification_settings: VariableOrOptional[TaskNotificationSettingsParam]
"""
Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this task.
"""
pipeline_task: VariableOrOptional[PipelineTaskParam]
"""
The task triggers a pipeline update when the `pipeline_task` field is present. Only pipelines configured to use triggered more are supported.
"""
python_wheel_task: VariableOrOptional[PythonWheelTaskParam]
"""
The task runs a Python wheel when the `python_wheel_task` field is present.
"""
retry_on_timeout: VariableOrOptional[bool]
"""
An optional policy to specify whether to retry a job when it times out. The default behavior
is to not retry on timeout.
"""
run_if: VariableOrOptional[RunIfParam]
"""
An optional value specifying the condition determining whether the task is run once its dependencies have been completed.
* `ALL_SUCCESS`: All dependencies have executed and succeeded
* `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded
* `NONE_FAILED`: None of the dependencies have failed and at least one was executed
* `ALL_DONE`: All dependencies have been completed
* `AT_LEAST_ONE_FAILED`: At least one dependency failed
* `ALL_FAILED`: ALl dependencies have failed
"""
run_job_task: VariableOrOptional[RunJobTaskParam]
"""
The task triggers another job when the `run_job_task` field is present.
"""
spark_jar_task: VariableOrOptional[SparkJarTaskParam]
"""
The task runs a JAR when the `spark_jar_task` field is present.
"""
spark_python_task: VariableOrOptional[SparkPythonTaskParam]
"""
The task runs a Python file when the `spark_python_task` field is present.
"""
spark_submit_task: VariableOrOptional[SparkSubmitTaskParam]
"""
(Legacy) The task runs the spark-submit script when the `spark_submit_task` field is present. This task can run only on new clusters and is not compatible with serverless compute.
In the `new_cluster` specification, `libraries` and `spark_conf` are not supported. Instead, use `--jars` and `--py-files` to add Java and Python libraries and `--conf` to set the Spark configurations.
`master`, `deploy-mode`, and `executor-cores` are automatically configured by Databricks; you _cannot_ specify them in parameters.
By default, the Spark submit job uses all available memory (excluding reserved memory for Databricks services). You can set `--driver-memory`, and `--executor-memory` to a smaller value to leave some room for off-heap usage.
The `--jars`, `--py-files`, `--files` arguments support DBFS and S3 paths.
"""
sql_task: VariableOrOptional[SqlTaskParam]
"""
The task runs a SQL query or file, or it refreshes a SQL alert or a legacy SQL dashboard when the `sql_task` field is present.
"""
timeout_seconds: VariableOrOptional[int]
"""
An optional timeout applied to each run of this job task. A value of `0` means no timeout.
"""
webhook_notifications: VariableOrOptional[WebhookNotificationsParam]
"""
A collection of system notification IDs to notify when runs of this task begin or complete. The default behavior is to not send any system notifications.
"""
TaskParam = TaskDict | Task

View File

@ -0,0 +1,48 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOr, VariableOrOptional
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class TaskDependency:
""""""
task_key: VariableOr[str]
"""
The name of the task this task depends on.
"""
outcome: VariableOrOptional[str] = None
"""
Can only be specified on condition task dependencies. The outcome of the dependent task that must be met for this task to run.
"""
@classmethod
def from_dict(cls, value: "TaskDependencyDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "TaskDependencyDict":
return _transform_to_json_value(self) # type:ignore
class TaskDependencyDict(TypedDict, total=False):
""""""
task_key: VariableOr[str]
"""
The name of the task this task depends on.
"""
outcome: VariableOrOptional[str]
"""
Can only be specified on condition task dependencies. The outcome of the dependent task that must be met for this task to run.
"""
TaskDependencyParam = TaskDependencyDict | TaskDependency

View File

@ -0,0 +1,96 @@
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOrList, VariableOrOptional
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class TaskEmailNotifications:
""""""
no_alert_for_skipped_runs: VariableOrOptional[bool] = None
"""
If true, do not send email to recipients specified in `on_failure` if the run is skipped.
This field is `deprecated`. Please use the `notification_settings.no_alert_for_skipped_runs` field.
"""
on_duration_warning_threshold_exceeded: VariableOrList[str] = field(
default_factory=list
)
"""
A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent.
"""
on_failure: VariableOrList[str] = field(default_factory=list)
"""
A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.
"""
on_start: VariableOrList[str] = field(default_factory=list)
"""
A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.
"""
on_streaming_backlog_exceeded: VariableOrList[str] = field(default_factory=list)
"""
A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream.
Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`.
Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes.
"""
on_success: VariableOrList[str] = field(default_factory=list)
"""
A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.
"""
@classmethod
def from_dict(cls, value: "TaskEmailNotificationsDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "TaskEmailNotificationsDict":
return _transform_to_json_value(self) # type:ignore
class TaskEmailNotificationsDict(TypedDict, total=False):
""""""
no_alert_for_skipped_runs: VariableOrOptional[bool]
"""
If true, do not send email to recipients specified in `on_failure` if the run is skipped.
This field is `deprecated`. Please use the `notification_settings.no_alert_for_skipped_runs` field.
"""
on_duration_warning_threshold_exceeded: VariableOrList[str]
"""
A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent.
"""
on_failure: VariableOrList[str]
"""
A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.
"""
on_start: VariableOrList[str]
"""
A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.
"""
on_streaming_backlog_exceeded: VariableOrList[str]
"""
A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream.
Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`.
Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes.
"""
on_success: VariableOrList[str]
"""
A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.
"""
TaskEmailNotificationsParam = TaskEmailNotificationsDict | TaskEmailNotifications

View File

@ -0,0 +1,58 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOrOptional
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class TaskNotificationSettings:
""""""
alert_on_last_attempt: VariableOrOptional[bool] = None
"""
If true, do not send notifications to recipients specified in `on_start` for the retried runs and do not send notifications to recipients specified in `on_failure` until the last retry of the run.
"""
no_alert_for_canceled_runs: VariableOrOptional[bool] = None
"""
If true, do not send notifications to recipients specified in `on_failure` if the run is canceled.
"""
no_alert_for_skipped_runs: VariableOrOptional[bool] = None
"""
If true, do not send notifications to recipients specified in `on_failure` if the run is skipped.
"""
@classmethod
def from_dict(cls, value: "TaskNotificationSettingsDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "TaskNotificationSettingsDict":
return _transform_to_json_value(self) # type:ignore
class TaskNotificationSettingsDict(TypedDict, total=False):
""""""
alert_on_last_attempt: VariableOrOptional[bool]
"""
If true, do not send notifications to recipients specified in `on_start` for the retried runs and do not send notifications to recipients specified in `on_failure` until the last retry of the run.
"""
no_alert_for_canceled_runs: VariableOrOptional[bool]
"""
If true, do not send notifications to recipients specified in `on_failure` if the run is canceled.
"""
no_alert_for_skipped_runs: VariableOrOptional[bool]
"""
If true, do not send notifications to recipients specified in `on_failure` if the run is skipped.
"""
TaskNotificationSettingsParam = TaskNotificationSettingsDict | TaskNotificationSettings

View File

@ -0,0 +1,87 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOrOptional
from databricks.bundles.jobs._models.file_arrival_trigger_configuration import (
FileArrivalTriggerConfiguration,
FileArrivalTriggerConfigurationParam,
)
from databricks.bundles.jobs._models.pause_status import PauseStatus, PauseStatusParam
from databricks.bundles.jobs._models.periodic_trigger_configuration import (
PeriodicTriggerConfiguration,
PeriodicTriggerConfigurationParam,
)
from databricks.bundles.jobs._models.table_update_trigger_configuration import (
TableUpdateTriggerConfiguration,
TableUpdateTriggerConfigurationParam,
)
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class TriggerSettings:
""""""
file_arrival: VariableOrOptional[FileArrivalTriggerConfiguration] = None
"""
File arrival trigger settings.
"""
pause_status: VariableOrOptional[PauseStatus] = None
"""
Whether this trigger is paused or not.
"""
periodic: VariableOrOptional[PeriodicTriggerConfiguration] = None
"""
Periodic trigger settings.
"""
table_update: VariableOrOptional[TableUpdateTriggerConfiguration] = None
def __post_init__(self):
union_fields = [
self.file_arrival,
self.periodic,
self.table_update,
]
if sum(f is not None for f in union_fields) != 1:
raise ValueError(
"TriggerSettings must specify exactly one of 'file_arrival', 'periodic', 'table_update'"
)
@classmethod
def from_dict(cls, value: "TriggerSettingsDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "TriggerSettingsDict":
return _transform_to_json_value(self) # type:ignore
class TriggerSettingsDict(TypedDict, total=False):
""""""
file_arrival: VariableOrOptional[FileArrivalTriggerConfigurationParam]
"""
File arrival trigger settings.
"""
pause_status: VariableOrOptional[PauseStatusParam]
"""
Whether this trigger is paused or not.
"""
periodic: VariableOrOptional[PeriodicTriggerConfigurationParam]
"""
Periodic trigger settings.
"""
table_update: VariableOrOptional[TableUpdateTriggerConfigurationParam]
TriggerSettingsParam = TriggerSettingsDict | TriggerSettings

View File

@ -0,0 +1,32 @@
from dataclasses import dataclass
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOr
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class Webhook:
""""""
id: VariableOr[str]
@classmethod
def from_dict(cls, value: "WebhookDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "WebhookDict":
return _transform_to_json_value(self) # type:ignore
class WebhookDict(TypedDict, total=False):
""""""
id: VariableOr[str]
WebhookParam = WebhookDict | Webhook

View File

@ -0,0 +1,87 @@
from dataclasses import dataclass, field
from typing import TYPE_CHECKING, TypedDict
from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_value
from databricks.bundles.core._variable import VariableOrList
from databricks.bundles.jobs._models.webhook import Webhook, WebhookParam
if TYPE_CHECKING:
from typing_extensions import Self
@dataclass(kw_only=True)
class WebhookNotifications:
""""""
on_duration_warning_threshold_exceeded: VariableOrList[Webhook] = field(
default_factory=list
)
"""
An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property.
"""
on_failure: VariableOrList[Webhook] = field(default_factory=list)
"""
An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property.
"""
on_start: VariableOrList[Webhook] = field(default_factory=list)
"""
An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property.
"""
on_streaming_backlog_exceeded: VariableOrList[Webhook] = field(default_factory=list)
"""
An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream.
Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`.
Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes.
A maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property.
"""
on_success: VariableOrList[Webhook] = field(default_factory=list)
"""
An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property.
"""
@classmethod
def from_dict(cls, value: "WebhookNotificationsDict") -> "Self":
return _transform(cls, value)
def as_dict(self) -> "WebhookNotificationsDict":
return _transform_to_json_value(self) # type:ignore
class WebhookNotificationsDict(TypedDict, total=False):
""""""
on_duration_warning_threshold_exceeded: VariableOrList[WebhookParam]
"""
An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property.
"""
on_failure: VariableOrList[WebhookParam]
"""
An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property.
"""
on_start: VariableOrList[WebhookParam]
"""
An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property.
"""
on_streaming_backlog_exceeded: VariableOrList[WebhookParam]
"""
An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream.
Streaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`.
Alerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes.
A maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property.
"""
on_success: VariableOrList[WebhookParam]
"""
An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property.
"""
WebhookNotificationsParam = WebhookNotificationsDict | WebhookNotifications

View File

@ -11,6 +11,16 @@ from databricks.bundles.core import (
VariableOrOptional, VariableOrOptional,
) )
from databricks.bundles.core._transform import _transform from databricks.bundles.core._transform import _transform
from databricks.bundles.core._transform_to_json import _transform_to_json_object
from databricks.bundles.jobs import (
ClusterSpec,
ClusterSpecDict,
CronSchedule,
ForEachTask,
NotebookTask,
PauseStatus,
Task,
)
class Color(Enum): class Color(Enum):
@ -188,6 +198,82 @@ def test_transform_forward_ref():
assert out == ForwardRefA(b=ForwardRefB(value=42)) assert out == ForwardRefA(b=ForwardRefB(value=42))
def test_complex_cluster_spec_roundtrip():
# this is what is pre-populated in clusters created from UI
cluster_spec_dict: ClusterSpecDict = {
"autoscale": {"min_workers": 1, "max_workers": 2},
"cluster_name": "test cluster",
"spark_version": "13.3.x-scala2.12",
"aws_attributes": {
"first_on_demand": 1,
"availability": "SPOT_WITH_FALLBACK",
"zone_id": "auto",
"spot_bid_price_percent": 100,
"ebs_volume_count": 0,
},
"node_type_id": "i3.xlarge",
"driver_node_type_id": "i3.xlarge",
"spark_env_vars": {"PYSPARK_PYTHON": "/databricks/python3/bin/python3"},
"autotermination_minutes": 120,
"enable_elastic_disk": False,
"enable_local_disk_encryption": False,
"data_security_mode": "USER_ISOLATION",
"runtime_engine": "PHOTON",
}
cluster_spec = _transform(ClusterSpec, cluster_spec_dict)
cluster_spec_dict_2 = _transform_to_json_object(cluster_spec)
assert cluster_spec_dict == cluster_spec_dict_2
def test_cron_schedule():
cron_schedule = _transform(
CronSchedule,
{
"quartz_cron_expression": "0 0 0 * * ?",
"pause_status": Variable(path="var.pause_status", type=str),
},
)
assert cron_schedule == CronSchedule(
quartz_cron_expression="0 0 0 * * ?",
pause_status=Variable(path="var.pause_status", type=PauseStatus),
)
def test_for_each_task():
"""
Test the special case of recursive data class.
"""
task = _transform(
Task,
{
"task_key": "loop",
"for_each_task": {
"inputs": "[1, 2, 3]",
"task": {
"task_key": "loop_iteration",
"notebook_task": {"notebook_path": "notebooks/foo.ipynb"},
},
},
},
)
assert task == Task(
task_key="loop",
for_each_task=ForEachTask(
inputs="[1, 2, 3]",
task=Task(
task_key="loop_iteration",
notebook_task=NotebookTask(notebook_path="notebooks/foo.ipynb"),
),
),
)
def test_transform_dict_keys(): def test_transform_dict_keys():
@dataclass @dataclass
class Fake: class Fake:

View File

@ -0,0 +1,36 @@
import pytest
from databricks.bundles.jobs import Permission
def test_oneof_one():
permission = Permission(
level="CAN_VIEW",
user_name="test@example.com",
)
assert permission
def test_oneof_none():
with pytest.raises(ValueError) as exc_info:
Permission(level="CAN_VIEW") # FIXME should be enum
assert exc_info.exconly() == (
"ValueError: Permission must specify exactly one of 'user_name', "
"'service_principal_name', 'group_name'"
)
def test_oneof_both():
with pytest.raises(ValueError) as exc_info:
Permission(
level="CAN_VIEW", # FIXME should be enum
user_name="test@example.com",
service_principal_name="secret",
)
assert exc_info.exconly() == (
"ValueError: Permission must specify exactly one of 'user_name', "
"'service_principal_name', 'group_name'"
)