From 057f3288791e3d9e246ee429aa2cf7986351fe5e Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Fri, 7 Jul 2023 15:00:12 +0200 Subject: [PATCH] Update inline JSON schema documentation (#557) ## Changes Add docs for experiments and models to the json schema. Update the schema to the latest openapi spec. ## Tests Manually --- bundle/schema/docs/bundle_descriptions.json | 862 ++++++++++++++++---- bundle/schema/openapi.go | 46 +- 2 files changed, 755 insertions(+), 153 deletions(-) diff --git a/bundle/schema/docs/bundle_descriptions.json b/bundle/schema/docs/bundle_descriptions.json index ea780418..7734614e 100644 --- a/bundle/schema/docs/bundle_descriptions.json +++ b/bundle/schema/docs/bundle_descriptions.json @@ -20,6 +20,17 @@ "bundle": { "description": "The details for this bundle.", "properties": { + "git": { + "description": "", + "properties": { + "branch": { + "description": "" + }, + "origin_url": { + "description": "" + } + } + }, "name": { "description": "The name of the bundle." } @@ -49,6 +60,17 @@ "bundle": { "description": "The details for this bundle.", "properties": { + "git": { + "description": "", + "properties": { + "branch": { + "description": "" + }, + "origin_url": { + "description": "" + } + } + }, "name": { "description": "The name of the bundle." } @@ -66,22 +88,22 @@ "description": "", "properties": { "artifact_location": { - "description": "" + "description": "Location where artifacts for the experiment are stored." }, "creation_time": { - "description": "" + "description": "Creation time" }, "experiment_id": { - "description": "" + "description": "Unique identifier for the experiment." }, "last_update_time": { - "description": "" + "description": "Last update time" }, "lifecycle_stage": { - "description": "" + "description": "Current life cycle stage of the experiment: \"active\" or \"deleted\".\nDeleted experiments are not returned by APIs." }, "name": { - "description": "" + "description": "Human readable name that identifies the experiment." }, "permissions": { "description": "", @@ -104,15 +126,15 @@ } }, "tags": { - "description": "", + "description": "Tags: Additional metadata key-value pairs.", "items": { "description": "", "properties": { "key": { - "description": "" + "description": "The tag key." }, "value": { - "description": "" + "description": "The tag value." } } } @@ -121,15 +143,34 @@ } }, "jobs": { - "description": "List of job definations", + "description": "List of Databricks jobs", "additionalproperties": { "description": "", "properties": { + "compute": { + "description": "A list of compute requirements that can be referenced by tasks of this job.", + "items": { + "description": "", + "properties": { + "compute_key": { + "description": "A unique name for the compute requirement. This field is required and must be unique within the job.\n`JobTaskSettings` may refer to this field to determine the compute requirements for the task execution." + }, + "spec": { + "description": "", + "properties": { + "kind": { + "description": "The kind of compute described by this compute specification." + } + } + } + } + } + }, "continuous": { - "description": "", + "description": "An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used.", "properties": { "pause_status": { - "description": "" + "description": "Whether this trigger is paused or not." } } }, @@ -140,7 +181,7 @@ "description": "If true, do not send email to recipients specified in `on_failure` if the run is skipped." }, "on_failure": { - "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `SKIPPED`, `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", + "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", "items": { "description": "" } @@ -152,7 +193,7 @@ } }, "on_success": { - "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESSFUL` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", + "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", "items": { "description": "" } @@ -199,7 +240,7 @@ "description": "A unique name for the job cluster. This field is required and must be unique within the job.\n`JobTaskSettings` may refer to this field to determine which cluster to launch for the task execution." }, "new_cluster": { - "description": "If new_cluster, a description of a cluster that is created for each task.", + "description": "If new_cluster, a description of a cluster that is created for only for this task.", "properties": { "autoscale": { "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", @@ -287,7 +328,7 @@ } }, "s3": { - "description": "destination and either region or endpoint should also be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", "properties": { "canned_acl": { "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." @@ -326,6 +367,28 @@ "description": "" } }, + "data_security_mode": { + "description": "" + }, + "docker_image": { + "description": "", + "properties": { + "basic_auth": { + "description": "", + "properties": { + "password": { + "description": "Password of the user" + }, + "username": { + "description": "Name of the user" + } + } + }, + "url": { + "description": "URL of the docker image." + } + } + }, "driver_instance_pool_id": { "description": "The optional ID of the instance pool for the driver of the cluster belongs.\nThe pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not\nassigned." }, @@ -349,6 +412,59 @@ }, "google_service_account": { "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." + }, + "local_ssd_count": { + "description": "If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type." + } + } + }, + "init_scripts": { + "description": "The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `\u003cdestination\u003e/\u003ccluster-ID\u003e/init_scripts`.", + "items": { + "description": "", + "properties": { + "dbfs": { + "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", + "properties": { + "destination": { + "description": "dbfs destination, e.g. `dbfs:/my/path`" + } + } + }, + "s3": { + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", + "properties": { + "canned_acl": { + "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." + }, + "destination": { + "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." + }, + "enable_encryption": { + "description": "(Optional) Flag to enable server side encryption, `false` by default." + }, + "encryption_type": { + "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." + }, + "endpoint": { + "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." + }, + "kms_key": { + "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." + }, + "region": { + "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." + } + } + }, + "workspace": { + "description": "destination needs to be provided. e.g.\n`{ \"workspace\" : { \"destination\" : \"/Users/user1@databricks.com/my-init.sh\" } }`", + "properties": { + "destination": { + "description": "workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh`" + } + } + } } } }, @@ -367,6 +483,9 @@ "runtime_engine": { "description": "" }, + "single_user_name": { + "description": "Single user name if data_security_mode is `SINGLE_USER`" + }, "spark_conf": { "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nUsers can also pass in a string of extra JVM options to the driver and the executors via\n`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively.\n", "additionalproperties": { @@ -415,6 +534,31 @@ "name": { "description": "An optional name for the job." }, + "notification_settings": { + "description": "Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job.", + "properties": { + "no_alert_for_canceled_runs": { + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is canceled." + }, + "no_alert_for_skipped_runs": { + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is skipped." + } + } + }, + "parameters": { + "description": "Job-level parameter definitions", + "items": { + "description": "", + "properties": { + "default": { + "description": "Default value of the parameter." + }, + "name": { + "description": "The name of the defined parameter. May only contain alphanumeric characters, `_`, `-`, and `.`" + } + } + } + }, "permissions": { "description": "", "items": { @@ -435,11 +579,22 @@ } } }, + "run_as": { + "description": "", + "properties": { + "service_principal_name": { + "description": "Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role." + }, + "user_name": { + "description": "The email of an active workspace user. Non-admin users can only set this field to their own email." + } + } + }, "schedule": { "description": "An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", "properties": { "pause_status": { - "description": "Indicate whether this schedule is paused or not." + "description": "Whether this trigger is paused or not." }, "quartz_cron_expression": { "description": "A Cron expression using Quartz syntax that describes the schedule for a job.\nSee [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html)\nfor details. This field is required.\"\n" @@ -460,6 +615,23 @@ "items": { "description": "", "properties": { + "compute_key": { + "description": "The key of the compute requirement, specified in `job.settings.compute`, to use for execution of this task." + }, + "condition_task": { + "description": "If condition_task, specifies a condition with an outcome that can be used to control the execution of other tasks. Does not require a cluster to execute and does not support retries or notifications.", + "properties": { + "left": { + "description": "The left operand of the condition task. Can be either a string value or a job state or parameter reference." + }, + "op": { + "description": "* `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`.\n* `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” \u003e= “12”` will evaluate to `true`, `“10.0” \u003e= “12”` will evaluate to `false`.\n\nThe boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison.\n" + }, + "right": { + "description": "The right operand of the condition task. Can be either a string value or a job state or parameter reference." + } + } + }, "dbt_task": { "description": "If dbt_task, indicates that this must execute a dbt task. It requires both Databricks SQL and the ability to use a serverless or a pro SQL warehouse.", "properties": { @@ -487,12 +659,15 @@ } }, "depends_on": { - "description": "", + "description": "An optional array of objects specifying the dependency graph of the task. All tasks specified in this field must complete successfully before executing this task.\nThe key is `task_key`, and the value is the name assigned to the dependent task.\n", "items": { "description": "", "properties": { + "outcome": { + "description": "Can only be specified on condition task dependencies. The outcome of the dependent task that must be met for this task to run." + }, "task_key": { - "description": "" + "description": "The name of the task this task depends on." } } } @@ -501,13 +676,10 @@ "description": "An optional description for this task.\nThe maximum length is 4096 bytes." }, "email_notifications": { - "description": "An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. The default behavior is to not send any emails.", + "description": "An optional set of email addresses that is notified when runs of this task begin or complete as well as when this task is deleted. The default behavior is to not send any emails.", "properties": { - "no_alert_for_skipped_runs": { - "description": "If true, do not send email to recipients specified in `on_failure` if the run is skipped." - }, "on_failure": { - "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `SKIPPED`, `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", + "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", "items": { "description": "" } @@ -519,7 +691,7 @@ } }, "on_success": { - "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESSFUL` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", + "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", "items": { "description": "" } @@ -595,7 +767,7 @@ "description": "An optional minimal interval in milliseconds between the start of the failed run and the subsequent retry run. The default behavior is that unsuccessful runs are immediately retried." }, "new_cluster": { - "description": "If new_cluster, a description of a cluster that is created for each task.", + "description": "If new_cluster, a description of a cluster that is created for only for this task.", "properties": { "autoscale": { "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", @@ -683,7 +855,7 @@ } }, "s3": { - "description": "destination and either region or endpoint should also be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", "properties": { "canned_acl": { "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." @@ -722,6 +894,28 @@ "description": "" } }, + "data_security_mode": { + "description": "" + }, + "docker_image": { + "description": "", + "properties": { + "basic_auth": { + "description": "", + "properties": { + "password": { + "description": "Password of the user" + }, + "username": { + "description": "Name of the user" + } + } + }, + "url": { + "description": "URL of the docker image." + } + } + }, "driver_instance_pool_id": { "description": "The optional ID of the instance pool for the driver of the cluster belongs.\nThe pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not\nassigned." }, @@ -745,6 +939,59 @@ }, "google_service_account": { "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." + }, + "local_ssd_count": { + "description": "If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type." + } + } + }, + "init_scripts": { + "description": "The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `\u003cdestination\u003e/\u003ccluster-ID\u003e/init_scripts`.", + "items": { + "description": "", + "properties": { + "dbfs": { + "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", + "properties": { + "destination": { + "description": "dbfs destination, e.g. `dbfs:/my/path`" + } + } + }, + "s3": { + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", + "properties": { + "canned_acl": { + "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." + }, + "destination": { + "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." + }, + "enable_encryption": { + "description": "(Optional) Flag to enable server side encryption, `false` by default." + }, + "encryption_type": { + "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." + }, + "endpoint": { + "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." + }, + "kms_key": { + "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." + }, + "region": { + "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." + } + } + }, + "workspace": { + "description": "destination needs to be provided. e.g.\n`{ \"workspace\" : { \"destination\" : \"/Users/user1@databricks.com/my-init.sh\" } }`", + "properties": { + "destination": { + "description": "workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh`" + } + } + } } } }, @@ -763,6 +1010,9 @@ "runtime_engine": { "description": "" }, + "single_user_name": { + "description": "Single user name if data_security_mode is `SINGLE_USER`" + }, "spark_conf": { "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nUsers can also pass in a string of extra JVM options to the driver and the executors via\n`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively.\n", "additionalproperties": { @@ -815,7 +1065,21 @@ "description": "The path of the notebook to be run in the Databricks workspace or remote repository.\nFor notebooks stored in the Databricks workspace, the path must be absolute and begin with a slash.\nFor notebooks stored in a remote repository, the path must be relative. This field is required.\n" }, "source": { - "description": "This describes an enum" + "description": "Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved\nfrom the local \u003cDatabricks\u003e workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`,\nthe Python file will be retrieved from a Git repository defined in `git_source`.\n\n* `WORKSPACE`: The Python file is located in a \u003cDatabricks\u003e workspace or at a cloud filesystem URI.\n* `GIT`: The Python file is located in a remote Git repository.\n" + } + } + }, + "notification_settings": { + "description": "Optional notification settings that are used when sending notifications to each of the `email_notifications` for this task.", + "properties": { + "alert_on_last_attempt": { + "description": "If true, do not send notifications to recipients specified in `on_start` for the retried runs and do not send notifications to recipients specified in `on_failure` until the last retry of the run." + }, + "no_alert_for_canceled_runs": { + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is canceled." + }, + "no_alert_for_skipped_runs": { + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is skipped." } } }, @@ -856,11 +1120,14 @@ "retry_on_timeout": { "description": "An optional policy to specify whether to retry a task when it times out. The default behavior is to not retry on timeout." }, + "run_if": { + "description": "An optional value specifying the condition determining whether the task is run once its dependencies have been completed. When omitted, defaults to `ALL_SUCCESS`.\n\n* `ALL_SUCCESS`: All dependencies have executed and succeeded\n* `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded\n* `NONE_FAILED`: None of the dependencies have failed and at least one was executed\n* `ALL_DONE`: All dependencies completed and at least one was executed\n* `AT_LEAST_ONE_FAILED`: At least one dependency failed\n* `ALL_FAILED`: ALl dependencies have failed\n" + }, "spark_jar_task": { "description": "If spark_jar_task, indicates that this task must run a JAR.", "properties": { "jar_uri": { - "description": "Deprecated since 04/2016\\\\. Provide a `jar` through the `libraries` field instead. For an example, see :method:jobs/create.\n" + "description": "Deprecated since 04/2016. Provide a `jar` through the `libraries` field instead. For an example, see :method:jobs/create.\n" }, "main_class_name": { "description": "The full name of the class containing the main method to be executed. This class must be contained in a JAR provided as a library.\n\nThe code must use `SparkContext.getOrCreate` to obtain a Spark context; otherwise, runs of the job fail." @@ -883,7 +1150,10 @@ } }, "python_file": { - "description": "" + "description": "The Python file to be executed. Cloud file URIs (such as dbfs:/, s3:/, adls:/, gcs:/) and workspace paths are supported. For python files stored in the Databricks workspace, the path must be absolute and begin with `/`. For files stored in a remote repository, the path must be relative. This field is required." + }, + "source": { + "description": "Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved\nfrom the local \u003cDatabricks\u003e workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`,\nthe Python file will be retrieved from a Git repository defined in `git_source`.\n\n* `WORKSPACE`: The Python file is located in a \u003cDatabricks\u003e workspace or at a cloud filesystem URI.\n* `GIT`: The Python file is located in a remote Git repository.\n" } } }, @@ -954,6 +1224,14 @@ } } }, + "file": { + "description": "If file, indicates that this job runs a SQL file in a remote Git repository. Only one SQL statement is supported in a file. Multiple SQL statements separated by semicolons (;) are not permitted.", + "properties": { + "path": { + "description": "Relative path of the SQL file in the remote Git repository." + } + } + }, "parameters": { "description": "Parameters to be used for each run of this job. The SQL alert task does not support custom parameters.", "additionalproperties": { @@ -986,24 +1264,24 @@ "description": "An optional timeout applied to each run of this job. The default behavior is to have no timeout." }, "trigger": { - "description": "", + "description": "Trigger settings for the job. Can be used to trigger a run when new files arrive in an external location. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", "properties": { "file_arrival": { - "description": "", + "description": "File arrival trigger settings.", "properties": { - "min_time_between_trigger_seconds": { - "description": "" + "min_time_between_triggers_seconds": { + "description": "If set, the trigger starts a run only after the specified amount of time passed since\nthe last time the trigger fired. The minimum allowed value is 60 seconds\n" }, "url": { - "description": "" + "description": "URL to be monitored for file arrivals. The path must point to the root or a subpath of the external location." }, "wait_after_last_change_seconds": { - "description": "" + "description": "If set, the trigger starts a run only after no file activity has occurred for the specified amount of time.\nThis makes it possible to wait for a batch of incoming files to arrive before triggering a run. The\nminimum allowed value is 60 seconds.\n" } } }, "pause_status": { - "description": "" + "description": "Whether this trigger is paused or not." } } }, @@ -1054,74 +1332,74 @@ "description": "", "properties": { "creation_timestamp": { - "description": "" + "description": "Timestamp recorded when this `registered_model` was created." }, "description": { - "description": "" + "description": "Description of this `registered_model`." }, "last_updated_timestamp": { - "description": "" + "description": "Timestamp recorded when metadata for this `registered_model` was last updated." }, "latest_versions": { - "description": "", + "description": "Collection of latest model versions for each stage.\nOnly contains models with current `READY` status.", "items": { "description": "", "properties": { "creation_timestamp": { - "description": "" + "description": "Timestamp recorded when this `model_version` was created." }, "current_stage": { - "description": "" + "description": "Current stage for this `model_version`." }, "description": { - "description": "" + "description": "Description of this `model_version`." }, "last_updated_timestamp": { - "description": "" + "description": "Timestamp recorded when metadata for this `model_version` was last updated." }, "name": { - "description": "" + "description": "Unique name of the model" }, "run_id": { - "description": "" + "description": "MLflow run ID used when creating `model_version`, if `source` was generated by an\nexperiment run stored in MLflow tracking server." }, "run_link": { - "description": "" + "description": "Run Link: Direct link to the run that generated this version" }, "source": { - "description": "" + "description": "URI indicating the location of the source model artifacts, used when creating `model_version`" }, "status": { - "description": "" + "description": "Current status of `model_version`" }, "status_message": { - "description": "" + "description": "Details on current `status`, if it is pending or failed." }, "tags": { - "description": "", + "description": "Tags: Additional metadata key-value pairs for this `model_version`.", "items": { "description": "", "properties": { "key": { - "description": "" + "description": "The tag key." }, "value": { - "description": "" + "description": "The tag value." } } } }, "user_id": { - "description": "" + "description": "User that created this `model_version`." }, "version": { - "description": "" + "description": "Model's version number." } } } }, "name": { - "description": "" + "description": "Unique name for the model." }, "permissions": { "description": "", @@ -1144,32 +1422,32 @@ } }, "tags": { - "description": "", + "description": "Tags: Additional metadata key-value pairs for this `registered_model`.", "items": { "description": "", "properties": { "key": { - "description": "" + "description": "The tag key." }, "value": { - "description": "" + "description": "The tag value." } } } }, "user_id": { - "description": "" + "description": "User that created this `registered_model`" } } } }, "pipelines": { - "description": "List of pipeline definations", + "description": "List of DLT pipelines", "additionalproperties": { "description": "", "properties": { "catalog": { - "description": "Catalog in UC to add tables to. If target is specified, tables in this pipeline will be\npublished to a \"target\" schema inside catalog (i.e. \u003ccatalog\u003e.\u003ctarget\u003e.\u003ctable\u003e)." + "description": "A catalog in Unity Catalog to publish data from this pipeline to. If `target` is specified, tables in this pipeline are published to a `target` schema inside `catalog` (for example, `catalog`.`target`.`table`). If `target` is not specified, no data is published to Unity Catalog." }, "channel": { "description": "DLT Release Channel that specifies which version to use." @@ -1229,7 +1507,7 @@ } }, "azure_attributes": { - "description": "Attributes related to clusters running on Amazon Web Services.\nIf not specified at cluster creation, a set of default values will be used.", + "description": "Attributes related to clusters running on Microsoft Azure.\nIf not specified at cluster creation, a set of default values will be used.", "properties": { "availability": { "description": "" @@ -1254,7 +1532,7 @@ } }, "cluster_log_conf": { - "description": "The configuration for delivering spark logs to a long-term storage destination.\nTwo kinds of destinations (dbfs and s3) are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.", + "description": "The configuration for delivering spark logs to a long-term storage destination.\nOnly dbfs destinations are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.\n", "properties": { "dbfs": { "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", @@ -1265,7 +1543,7 @@ } }, "s3": { - "description": "destination and either region or endpoint should also be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", "properties": { "canned_acl": { "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." @@ -1315,6 +1593,9 @@ }, "google_service_account": { "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." + }, + "local_ssd_count": { + "description": "If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type." } } }, @@ -1322,7 +1603,7 @@ "description": "The optional ID of the instance pool to which the cluster belongs." }, "label": { - "description": "Cluster label" + "description": "A label for the cluster specification, either `default` to configure the default cluster, or `maintenance` to configure the maintenance cluster. This field is optional. The default value is `default`." }, "node_type_id": { "description": "This field encodes, through a single value, the resources available to each of\nthe Spark nodes in this cluster. For example, the Spark nodes can be provisioned\nand optimized for memory or compute intensive workloads. A list of available node\ntypes can be retrieved by using the :method:clusters/listNodeTypes API call.\n" @@ -1395,18 +1676,18 @@ "description": "", "properties": { "file": { - "description": "", + "description": "The path to a file that defines a pipeline and is stored in the Databricks Repos.\n", "properties": { "path": { - "description": "" + "description": "The absolute path of the file." } } }, "jar": { - "description": "URI of the jar to be installed. Currently only DBFS and S3 URIs are supported.\nFor example: `{ \"jar\": \"dbfs:/mnt/databricks/library.jar\" }` or\n`{ \"jar\": \"s3://my-bucket/library.jar\" }`.\nIf S3 is used, please make sure the cluster has read access on the library. You may need to\nlaunch the cluster with an IAM role to access the S3 URI." + "description": "URI of the jar to be installed. Currently only DBFS is supported.\n" }, "maven": { - "description": "Specification of a maven library to be installed. For example:\n`{ \"coordinates\": \"org.jsoup:jsoup:1.7.2\" }`", + "description": "Specification of a maven library to be installed.\n", "properties": { "coordinates": { "description": "Gradle-style maven coordinates. For example: \"org.jsoup:jsoup:1.7.2\"." @@ -1423,7 +1704,7 @@ } }, "notebook": { - "description": "The path to a notebook that defines a pipeline and is stored in the Databricks workspace.\nFor example: `{ \"notebook\" : { \"path\" : \"/my-pipeline-notebook-path\" } }`.\nCurrently, only Scala notebooks are supported, and pipelines must be defined in a package\ncell.", + "description": "The path to a notebook that defines a pipeline and is stored in the \u003cDatabricks\u003e workspace.\n", "properties": { "path": { "description": "The absolute path of the notebook." @@ -1431,7 +1712,7 @@ } }, "whl": { - "description": "URI of the wheel to be installed.\nFor example: `{ \"whl\": \"dbfs:/my/whl\" }` or `{ \"whl\": \"s3://my-bucket/whl\" }`.\nIf S3 is used, please make sure the cluster has read access on the library. You may need to\nlaunch the cluster with an IAM role to access the S3 URI." + "description": "URI of the wheel to be installed.\n" } } } @@ -1462,11 +1743,14 @@ "photon": { "description": "Whether Photon is enabled for this pipeline." }, + "serverless": { + "description": "Whether serverless compute is enabled for this pipeline." + }, "storage": { "description": "DBFS root directory for storing checkpoints and tables." }, "target": { - "description": "Target schema (database) to add tables in this pipeline to." + "description": "Target schema (database) to add tables in this pipeline to. If not specified, no data is published to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify `catalog`." }, "trigger": { "description": "Which pipeline trigger to use. Deprecated: Use `continuous` instead.", @@ -1492,6 +1776,12 @@ } } }, + "variables": { + "description": "", + "additionalproperties": { + "description": "" + } + }, "workspace": { "description": "Configures which workspace to connect to and locations for files, state, and similar locations within the workspace file tree.", "properties": { @@ -1554,22 +1844,22 @@ "description": "", "properties": { "artifact_location": { - "description": "" + "description": "Location where artifacts for the experiment are stored." }, "creation_time": { - "description": "" + "description": "Creation time" }, "experiment_id": { - "description": "" + "description": "Unique identifier for the experiment." }, "last_update_time": { - "description": "" + "description": "Last update time" }, "lifecycle_stage": { - "description": "" + "description": "Current life cycle stage of the experiment: \"active\" or \"deleted\".\nDeleted experiments are not returned by APIs." }, "name": { - "description": "" + "description": "Human readable name that identifies the experiment." }, "permissions": { "description": "", @@ -1592,15 +1882,15 @@ } }, "tags": { - "description": "", + "description": "Tags: Additional metadata key-value pairs.", "items": { "description": "", "properties": { "key": { - "description": "" + "description": "The tag key." }, "value": { - "description": "" + "description": "The tag value." } } } @@ -1609,15 +1899,34 @@ } }, "jobs": { - "description": "List of job definations", + "description": "List of Databricks jobs", "additionalproperties": { "description": "", "properties": { + "compute": { + "description": "A list of compute requirements that can be referenced by tasks of this job.", + "items": { + "description": "", + "properties": { + "compute_key": { + "description": "A unique name for the compute requirement. This field is required and must be unique within the job.\n`JobTaskSettings` may refer to this field to determine the compute requirements for the task execution." + }, + "spec": { + "description": "", + "properties": { + "kind": { + "description": "The kind of compute described by this compute specification." + } + } + } + } + } + }, "continuous": { - "description": "", + "description": "An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used.", "properties": { "pause_status": { - "description": "" + "description": "Whether this trigger is paused or not." } } }, @@ -1628,7 +1937,7 @@ "description": "If true, do not send email to recipients specified in `on_failure` if the run is skipped." }, "on_failure": { - "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `SKIPPED`, `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", + "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", "items": { "description": "" } @@ -1640,7 +1949,7 @@ } }, "on_success": { - "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESSFUL` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", + "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", "items": { "description": "" } @@ -1687,7 +1996,7 @@ "description": "A unique name for the job cluster. This field is required and must be unique within the job.\n`JobTaskSettings` may refer to this field to determine which cluster to launch for the task execution." }, "new_cluster": { - "description": "If new_cluster, a description of a cluster that is created for each task.", + "description": "If new_cluster, a description of a cluster that is created for only for this task.", "properties": { "autoscale": { "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", @@ -1775,7 +2084,7 @@ } }, "s3": { - "description": "destination and either region or endpoint should also be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", "properties": { "canned_acl": { "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." @@ -1814,6 +2123,28 @@ "description": "" } }, + "data_security_mode": { + "description": "" + }, + "docker_image": { + "description": "", + "properties": { + "basic_auth": { + "description": "", + "properties": { + "password": { + "description": "Password of the user" + }, + "username": { + "description": "Name of the user" + } + } + }, + "url": { + "description": "URL of the docker image." + } + } + }, "driver_instance_pool_id": { "description": "The optional ID of the instance pool for the driver of the cluster belongs.\nThe pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not\nassigned." }, @@ -1837,6 +2168,59 @@ }, "google_service_account": { "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." + }, + "local_ssd_count": { + "description": "If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type." + } + } + }, + "init_scripts": { + "description": "The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `\u003cdestination\u003e/\u003ccluster-ID\u003e/init_scripts`.", + "items": { + "description": "", + "properties": { + "dbfs": { + "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", + "properties": { + "destination": { + "description": "dbfs destination, e.g. `dbfs:/my/path`" + } + } + }, + "s3": { + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", + "properties": { + "canned_acl": { + "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." + }, + "destination": { + "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." + }, + "enable_encryption": { + "description": "(Optional) Flag to enable server side encryption, `false` by default." + }, + "encryption_type": { + "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." + }, + "endpoint": { + "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." + }, + "kms_key": { + "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." + }, + "region": { + "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." + } + } + }, + "workspace": { + "description": "destination needs to be provided. e.g.\n`{ \"workspace\" : { \"destination\" : \"/Users/user1@databricks.com/my-init.sh\" } }`", + "properties": { + "destination": { + "description": "workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh`" + } + } + } } } }, @@ -1855,6 +2239,9 @@ "runtime_engine": { "description": "" }, + "single_user_name": { + "description": "Single user name if data_security_mode is `SINGLE_USER`" + }, "spark_conf": { "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nUsers can also pass in a string of extra JVM options to the driver and the executors via\n`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively.\n", "additionalproperties": { @@ -1903,6 +2290,31 @@ "name": { "description": "An optional name for the job." }, + "notification_settings": { + "description": "Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job.", + "properties": { + "no_alert_for_canceled_runs": { + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is canceled." + }, + "no_alert_for_skipped_runs": { + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is skipped." + } + } + }, + "parameters": { + "description": "Job-level parameter definitions", + "items": { + "description": "", + "properties": { + "default": { + "description": "Default value of the parameter." + }, + "name": { + "description": "The name of the defined parameter. May only contain alphanumeric characters, `_`, `-`, and `.`" + } + } + } + }, "permissions": { "description": "", "items": { @@ -1923,11 +2335,22 @@ } } }, + "run_as": { + "description": "", + "properties": { + "service_principal_name": { + "description": "Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role." + }, + "user_name": { + "description": "The email of an active workspace user. Non-admin users can only set this field to their own email." + } + } + }, "schedule": { "description": "An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", "properties": { "pause_status": { - "description": "Indicate whether this schedule is paused or not." + "description": "Whether this trigger is paused or not." }, "quartz_cron_expression": { "description": "A Cron expression using Quartz syntax that describes the schedule for a job.\nSee [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html)\nfor details. This field is required.\"\n" @@ -1948,6 +2371,23 @@ "items": { "description": "", "properties": { + "compute_key": { + "description": "The key of the compute requirement, specified in `job.settings.compute`, to use for execution of this task." + }, + "condition_task": { + "description": "If condition_task, specifies a condition with an outcome that can be used to control the execution of other tasks. Does not require a cluster to execute and does not support retries or notifications.", + "properties": { + "left": { + "description": "The left operand of the condition task. Can be either a string value or a job state or parameter reference." + }, + "op": { + "description": "* `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`.\n* `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” \u003e= “12”` will evaluate to `true`, `“10.0” \u003e= “12”` will evaluate to `false`.\n\nThe boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison.\n" + }, + "right": { + "description": "The right operand of the condition task. Can be either a string value or a job state or parameter reference." + } + } + }, "dbt_task": { "description": "If dbt_task, indicates that this must execute a dbt task. It requires both Databricks SQL and the ability to use a serverless or a pro SQL warehouse.", "properties": { @@ -1975,12 +2415,15 @@ } }, "depends_on": { - "description": "", + "description": "An optional array of objects specifying the dependency graph of the task. All tasks specified in this field must complete successfully before executing this task.\nThe key is `task_key`, and the value is the name assigned to the dependent task.\n", "items": { "description": "", "properties": { + "outcome": { + "description": "Can only be specified on condition task dependencies. The outcome of the dependent task that must be met for this task to run." + }, "task_key": { - "description": "" + "description": "The name of the task this task depends on." } } } @@ -1989,13 +2432,10 @@ "description": "An optional description for this task.\nThe maximum length is 4096 bytes." }, "email_notifications": { - "description": "An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted. The default behavior is to not send any emails.", + "description": "An optional set of email addresses that is notified when runs of this task begin or complete as well as when this task is deleted. The default behavior is to not send any emails.", "properties": { - "no_alert_for_skipped_runs": { - "description": "If true, do not send email to recipients specified in `on_failure` if the run is skipped." - }, "on_failure": { - "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `SKIPPED`, `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", + "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", "items": { "description": "" } @@ -2007,7 +2447,7 @@ } }, "on_success": { - "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESSFUL` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", + "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", "items": { "description": "" } @@ -2083,7 +2523,7 @@ "description": "An optional minimal interval in milliseconds between the start of the failed run and the subsequent retry run. The default behavior is that unsuccessful runs are immediately retried." }, "new_cluster": { - "description": "If new_cluster, a description of a cluster that is created for each task.", + "description": "If new_cluster, a description of a cluster that is created for only for this task.", "properties": { "autoscale": { "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", @@ -2171,7 +2611,7 @@ } }, "s3": { - "description": "destination and either region or endpoint should also be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", "properties": { "canned_acl": { "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." @@ -2210,6 +2650,28 @@ "description": "" } }, + "data_security_mode": { + "description": "" + }, + "docker_image": { + "description": "", + "properties": { + "basic_auth": { + "description": "", + "properties": { + "password": { + "description": "Password of the user" + }, + "username": { + "description": "Name of the user" + } + } + }, + "url": { + "description": "URL of the docker image." + } + } + }, "driver_instance_pool_id": { "description": "The optional ID of the instance pool for the driver of the cluster belongs.\nThe pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not\nassigned." }, @@ -2233,6 +2695,59 @@ }, "google_service_account": { "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." + }, + "local_ssd_count": { + "description": "If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type." + } + } + }, + "init_scripts": { + "description": "The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `\u003cdestination\u003e/\u003ccluster-ID\u003e/init_scripts`.", + "items": { + "description": "", + "properties": { + "dbfs": { + "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", + "properties": { + "destination": { + "description": "dbfs destination, e.g. `dbfs:/my/path`" + } + } + }, + "s3": { + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", + "properties": { + "canned_acl": { + "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." + }, + "destination": { + "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs." + }, + "enable_encryption": { + "description": "(Optional) Flag to enable server side encryption, `false` by default." + }, + "encryption_type": { + "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`." + }, + "endpoint": { + "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used." + }, + "kms_key": { + "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`." + }, + "region": { + "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used." + } + } + }, + "workspace": { + "description": "destination needs to be provided. e.g.\n`{ \"workspace\" : { \"destination\" : \"/Users/user1@databricks.com/my-init.sh\" } }`", + "properties": { + "destination": { + "description": "workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh`" + } + } + } } } }, @@ -2251,6 +2766,9 @@ "runtime_engine": { "description": "" }, + "single_user_name": { + "description": "Single user name if data_security_mode is `SINGLE_USER`" + }, "spark_conf": { "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nUsers can also pass in a string of extra JVM options to the driver and the executors via\n`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively.\n", "additionalproperties": { @@ -2303,7 +2821,21 @@ "description": "The path of the notebook to be run in the Databricks workspace or remote repository.\nFor notebooks stored in the Databricks workspace, the path must be absolute and begin with a slash.\nFor notebooks stored in a remote repository, the path must be relative. This field is required.\n" }, "source": { - "description": "This describes an enum" + "description": "Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved\nfrom the local \u003cDatabricks\u003e workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`,\nthe Python file will be retrieved from a Git repository defined in `git_source`.\n\n* `WORKSPACE`: The Python file is located in a \u003cDatabricks\u003e workspace or at a cloud filesystem URI.\n* `GIT`: The Python file is located in a remote Git repository.\n" + } + } + }, + "notification_settings": { + "description": "Optional notification settings that are used when sending notifications to each of the `email_notifications` for this task.", + "properties": { + "alert_on_last_attempt": { + "description": "If true, do not send notifications to recipients specified in `on_start` for the retried runs and do not send notifications to recipients specified in `on_failure` until the last retry of the run." + }, + "no_alert_for_canceled_runs": { + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is canceled." + }, + "no_alert_for_skipped_runs": { + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is skipped." } } }, @@ -2344,11 +2876,14 @@ "retry_on_timeout": { "description": "An optional policy to specify whether to retry a task when it times out. The default behavior is to not retry on timeout." }, + "run_if": { + "description": "An optional value specifying the condition determining whether the task is run once its dependencies have been completed. When omitted, defaults to `ALL_SUCCESS`.\n\n* `ALL_SUCCESS`: All dependencies have executed and succeeded\n* `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded\n* `NONE_FAILED`: None of the dependencies have failed and at least one was executed\n* `ALL_DONE`: All dependencies completed and at least one was executed\n* `AT_LEAST_ONE_FAILED`: At least one dependency failed\n* `ALL_FAILED`: ALl dependencies have failed\n" + }, "spark_jar_task": { "description": "If spark_jar_task, indicates that this task must run a JAR.", "properties": { "jar_uri": { - "description": "Deprecated since 04/2016\\\\. Provide a `jar` through the `libraries` field instead. For an example, see :method:jobs/create.\n" + "description": "Deprecated since 04/2016. Provide a `jar` through the `libraries` field instead. For an example, see :method:jobs/create.\n" }, "main_class_name": { "description": "The full name of the class containing the main method to be executed. This class must be contained in a JAR provided as a library.\n\nThe code must use `SparkContext.getOrCreate` to obtain a Spark context; otherwise, runs of the job fail." @@ -2371,7 +2906,10 @@ } }, "python_file": { - "description": "" + "description": "The Python file to be executed. Cloud file URIs (such as dbfs:/, s3:/, adls:/, gcs:/) and workspace paths are supported. For python files stored in the Databricks workspace, the path must be absolute and begin with `/`. For files stored in a remote repository, the path must be relative. This field is required." + }, + "source": { + "description": "Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved\nfrom the local \u003cDatabricks\u003e workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`,\nthe Python file will be retrieved from a Git repository defined in `git_source`.\n\n* `WORKSPACE`: The Python file is located in a \u003cDatabricks\u003e workspace or at a cloud filesystem URI.\n* `GIT`: The Python file is located in a remote Git repository.\n" } } }, @@ -2442,6 +2980,14 @@ } } }, + "file": { + "description": "If file, indicates that this job runs a SQL file in a remote Git repository. Only one SQL statement is supported in a file. Multiple SQL statements separated by semicolons (;) are not permitted.", + "properties": { + "path": { + "description": "Relative path of the SQL file in the remote Git repository." + } + } + }, "parameters": { "description": "Parameters to be used for each run of this job. The SQL alert task does not support custom parameters.", "additionalproperties": { @@ -2474,24 +3020,24 @@ "description": "An optional timeout applied to each run of this job. The default behavior is to have no timeout." }, "trigger": { - "description": "", + "description": "Trigger settings for the job. Can be used to trigger a run when new files arrive in an external location. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", "properties": { "file_arrival": { - "description": "", + "description": "File arrival trigger settings.", "properties": { - "min_time_between_trigger_seconds": { - "description": "" + "min_time_between_triggers_seconds": { + "description": "If set, the trigger starts a run only after the specified amount of time passed since\nthe last time the trigger fired. The minimum allowed value is 60 seconds\n" }, "url": { - "description": "" + "description": "URL to be monitored for file arrivals. The path must point to the root or a subpath of the external location." }, "wait_after_last_change_seconds": { - "description": "" + "description": "If set, the trigger starts a run only after no file activity has occurred for the specified amount of time.\nThis makes it possible to wait for a batch of incoming files to arrive before triggering a run. The\nminimum allowed value is 60 seconds.\n" } } }, "pause_status": { - "description": "" + "description": "Whether this trigger is paused or not." } } }, @@ -2542,74 +3088,74 @@ "description": "", "properties": { "creation_timestamp": { - "description": "" + "description": "Timestamp recorded when this `registered_model` was created." }, "description": { - "description": "" + "description": "Description of this `registered_model`." }, "last_updated_timestamp": { - "description": "" + "description": "Timestamp recorded when metadata for this `registered_model` was last updated." }, "latest_versions": { - "description": "", + "description": "Collection of latest model versions for each stage.\nOnly contains models with current `READY` status.", "items": { "description": "", "properties": { "creation_timestamp": { - "description": "" + "description": "Timestamp recorded when this `model_version` was created." }, "current_stage": { - "description": "" + "description": "Current stage for this `model_version`." }, "description": { - "description": "" + "description": "Description of this `model_version`." }, "last_updated_timestamp": { - "description": "" + "description": "Timestamp recorded when metadata for this `model_version` was last updated." }, "name": { - "description": "" + "description": "Unique name of the model" }, "run_id": { - "description": "" + "description": "MLflow run ID used when creating `model_version`, if `source` was generated by an\nexperiment run stored in MLflow tracking server." }, "run_link": { - "description": "" + "description": "Run Link: Direct link to the run that generated this version" }, "source": { - "description": "" + "description": "URI indicating the location of the source model artifacts, used when creating `model_version`" }, "status": { - "description": "" + "description": "Current status of `model_version`" }, "status_message": { - "description": "" + "description": "Details on current `status`, if it is pending or failed." }, "tags": { - "description": "", + "description": "Tags: Additional metadata key-value pairs for this `model_version`.", "items": { "description": "", "properties": { "key": { - "description": "" + "description": "The tag key." }, "value": { - "description": "" + "description": "The tag value." } } } }, "user_id": { - "description": "" + "description": "User that created this `model_version`." }, "version": { - "description": "" + "description": "Model's version number." } } } }, "name": { - "description": "" + "description": "Unique name for the model." }, "permissions": { "description": "", @@ -2632,32 +3178,32 @@ } }, "tags": { - "description": "", + "description": "Tags: Additional metadata key-value pairs for this `registered_model`.", "items": { "description": "", "properties": { "key": { - "description": "" + "description": "The tag key." }, "value": { - "description": "" + "description": "The tag value." } } } }, "user_id": { - "description": "" + "description": "User that created this `registered_model`" } } } }, "pipelines": { - "description": "List of pipeline definations", + "description": "List of DLT pipelines", "additionalproperties": { "description": "", "properties": { "catalog": { - "description": "Catalog in UC to add tables to. If target is specified, tables in this pipeline will be\npublished to a \"target\" schema inside catalog (i.e. \u003ccatalog\u003e.\u003ctarget\u003e.\u003ctable\u003e)." + "description": "A catalog in Unity Catalog to publish data from this pipeline to. If `target` is specified, tables in this pipeline are published to a `target` schema inside `catalog` (for example, `catalog`.`target`.`table`). If `target` is not specified, no data is published to Unity Catalog." }, "channel": { "description": "DLT Release Channel that specifies which version to use." @@ -2717,7 +3263,7 @@ } }, "azure_attributes": { - "description": "Attributes related to clusters running on Amazon Web Services.\nIf not specified at cluster creation, a set of default values will be used.", + "description": "Attributes related to clusters running on Microsoft Azure.\nIf not specified at cluster creation, a set of default values will be used.", "properties": { "availability": { "description": "" @@ -2742,7 +3288,7 @@ } }, "cluster_log_conf": { - "description": "The configuration for delivering spark logs to a long-term storage destination.\nTwo kinds of destinations (dbfs and s3) are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.", + "description": "The configuration for delivering spark logs to a long-term storage destination.\nOnly dbfs destinations are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.\n", "properties": { "dbfs": { "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", @@ -2753,7 +3299,7 @@ } }, "s3": { - "description": "destination and either region or endpoint should also be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", "properties": { "canned_acl": { "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs." @@ -2803,6 +3349,9 @@ }, "google_service_account": { "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator." + }, + "local_ssd_count": { + "description": "If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type." } } }, @@ -2810,7 +3359,7 @@ "description": "The optional ID of the instance pool to which the cluster belongs." }, "label": { - "description": "Cluster label" + "description": "A label for the cluster specification, either `default` to configure the default cluster, or `maintenance` to configure the maintenance cluster. This field is optional. The default value is `default`." }, "node_type_id": { "description": "This field encodes, through a single value, the resources available to each of\nthe Spark nodes in this cluster. For example, the Spark nodes can be provisioned\nand optimized for memory or compute intensive workloads. A list of available node\ntypes can be retrieved by using the :method:clusters/listNodeTypes API call.\n" @@ -2883,18 +3432,18 @@ "description": "", "properties": { "file": { - "description": "", + "description": "The path to a file that defines a pipeline and is stored in the Databricks Repos.\n", "properties": { "path": { - "description": "" + "description": "The absolute path of the file." } } }, "jar": { - "description": "URI of the jar to be installed. Currently only DBFS and S3 URIs are supported.\nFor example: `{ \"jar\": \"dbfs:/mnt/databricks/library.jar\" }` or\n`{ \"jar\": \"s3://my-bucket/library.jar\" }`.\nIf S3 is used, please make sure the cluster has read access on the library. You may need to\nlaunch the cluster with an IAM role to access the S3 URI." + "description": "URI of the jar to be installed. Currently only DBFS is supported.\n" }, "maven": { - "description": "Specification of a maven library to be installed. For example:\n`{ \"coordinates\": \"org.jsoup:jsoup:1.7.2\" }`", + "description": "Specification of a maven library to be installed.\n", "properties": { "coordinates": { "description": "Gradle-style maven coordinates. For example: \"org.jsoup:jsoup:1.7.2\"." @@ -2911,7 +3460,7 @@ } }, "notebook": { - "description": "The path to a notebook that defines a pipeline and is stored in the Databricks workspace.\nFor example: `{ \"notebook\" : { \"path\" : \"/my-pipeline-notebook-path\" } }`.\nCurrently, only Scala notebooks are supported, and pipelines must be defined in a package\ncell.", + "description": "The path to a notebook that defines a pipeline and is stored in the \u003cDatabricks\u003e workspace.\n", "properties": { "path": { "description": "The absolute path of the notebook." @@ -2919,7 +3468,7 @@ } }, "whl": { - "description": "URI of the wheel to be installed.\nFor example: `{ \"whl\": \"dbfs:/my/whl\" }` or `{ \"whl\": \"s3://my-bucket/whl\" }`.\nIf S3 is used, please make sure the cluster has read access on the library. You may need to\nlaunch the cluster with an IAM role to access the S3 URI." + "description": "URI of the wheel to be installed.\n" } } } @@ -2950,11 +3499,14 @@ "photon": { "description": "Whether Photon is enabled for this pipeline." }, + "serverless": { + "description": "Whether serverless compute is enabled for this pipeline." + }, "storage": { "description": "DBFS root directory for storing checkpoints and tables." }, "target": { - "description": "Target schema (database) to add tables in this pipeline to." + "description": "Target schema (database) to add tables in this pipeline to. If not specified, no data is published to the Hive metastore or Unity Catalog. To publish to Unity Catalog, also specify `catalog`." }, "trigger": { "description": "Which pipeline trigger to use. Deprecated: Use `continuous` instead.", @@ -2980,6 +3532,20 @@ } } }, + "variables": { + "description": "", + "additionalproperties": { + "description": "", + "properties": { + "default": { + "description": "" + }, + "description": { + "description": "" + } + } + } + }, "workspace": { "description": "Configures which workspace to connect to and locations for files, state, and similar locations within the workspace file tree.", "properties": { diff --git a/bundle/schema/openapi.go b/bundle/schema/openapi.go index 6c2944aa..9b4b27dd 100644 --- a/bundle/schema/openapi.go +++ b/bundle/schema/openapi.go @@ -162,7 +162,7 @@ func (reader *OpenapiReader) jobsDocs() (*Docs, error) { // TODO: add description for id if needed. // Tracked in https://github.com/databricks/cli/issues/242 jobsDocs := &Docs{ - Description: "List of job definations", + Description: "List of Databricks jobs", AdditionalProperties: jobDocs, } return jobsDocs, nil @@ -177,12 +177,38 @@ func (reader *OpenapiReader) pipelinesDocs() (*Docs, error) { // TODO: Two fields in resources.Pipeline have the json tag id. Clarify the // semantics and then add a description if needed. (https://github.com/databricks/cli/issues/242) pipelinesDocs := &Docs{ - Description: "List of pipeline definations", + Description: "List of DLT pipelines", AdditionalProperties: pipelineDocs, } return pipelinesDocs, nil } +func (reader *OpenapiReader) experimentsDocs() (*Docs, error) { + experimentSpecSchema, err := reader.readResolvedSchema(SchemaPathPrefix + "ml.Experiment") + if err != nil { + return nil, err + } + experimentDocs := schemaToDocs(experimentSpecSchema) + experimentsDocs := &Docs{ + Description: "List of MLflow experiments", + AdditionalProperties: experimentDocs, + } + return experimentsDocs, nil +} + +func (reader *OpenapiReader) modelsDocs() (*Docs, error) { + modelSpecSchema, err := reader.readResolvedSchema(SchemaPathPrefix + "ml.Model") + if err != nil { + return nil, err + } + modelDocs := schemaToDocs(modelSpecSchema) + modelsDocs := &Docs{ + Description: "List of MLflow models", + AdditionalProperties: modelDocs, + } + return modelsDocs, nil +} + func (reader *OpenapiReader) ResourcesDocs() (*Docs, error) { jobsDocs, err := reader.jobsDocs() if err != nil { @@ -192,12 +218,22 @@ func (reader *OpenapiReader) ResourcesDocs() (*Docs, error) { if err != nil { return nil, err } + experimentsDocs, err := reader.experimentsDocs() + if err != nil { + return nil, err + } + modelsDocs, err := reader.modelsDocs() + if err != nil { + return nil, err + } return &Docs{ - Description: "Specification of databricks resources to instantiate", + Description: "Collection of Databricks resources to deploy.", Properties: map[string]*Docs{ - "jobs": jobsDocs, - "pipelines": pipelinesDocs, + "jobs": jobsDocs, + "pipelines": pipelinesDocs, + "experiments": experimentsDocs, + "models": modelsDocs, }, }, nil }