From 1fbec371e51e5c8e374e3df45c4a8b546c25def3 Mon Sep 17 00:00:00 2001 From: Ilya Kuznetsov Date: Fri, 10 Jan 2025 17:47:06 +0100 Subject: [PATCH] fix: Link --- bundle/internal/schema/annotations.go | 12 +- .../schema/annotations_openapi_overrides.yml | 229 +++++++++++++++++- bundle/internal/schema/annotations_test.go | 52 ++-- bundle/schema/jsonschema.json | 35 ++- 4 files changed, 286 insertions(+), 42 deletions(-) diff --git a/bundle/internal/schema/annotations.go b/bundle/internal/schema/annotations.go index 3313f7b1f..b857fddda 100644 --- a/bundle/internal/schema/annotations.go +++ b/bundle/internal/schema/annotations.go @@ -184,15 +184,17 @@ func convertLinksToAbsoluteUrl(s string) string { referencePage := "/dev-tools/bundles/reference.html" // Regular expression to match Markdown-style links like [_](link) - re := regexp.MustCompile(`\[_\]\(([^)]+)\)`) + re := regexp.MustCompile(`\[(.*)\]\(([^)]+)\)`) result := re.ReplaceAllStringFunc(s, func(match string) string { matches := re.FindStringSubmatch(match) if len(matches) < 2 { return match } - link := matches[1] - var text, absoluteURL string + originalText := matches[1] + link := matches[2] + + var text, absoluteURL string if strings.HasPrefix(link, "#") { text = strings.TrimPrefix(link, "#") absoluteURL = fmt.Sprintf("%s%s%s", base, referencePage, link) @@ -210,6 +212,10 @@ func convertLinksToAbsoluteUrl(s string) string { return match } + if originalText != "_" { + text = originalText + } + return fmt.Sprintf("[%s](%s)", text, absoluteURL) }) diff --git a/bundle/internal/schema/annotations_openapi_overrides.yml b/bundle/internal/schema/annotations_openapi_overrides.yml index 21e8217b2..36314b777 100644 --- a/bundle/internal/schema/annotations_openapi_overrides.yml +++ b/bundle/internal/schema/annotations_openapi_overrides.yml @@ -1,4 +1,34 @@ github.com/databricks/cli/bundle/config/resources.Cluster: + "_": + "markdown_description": |- + The cluster resource defines an [all-purpose cluster](/api/workspace/clusters/create). + + "markdown_examples": |- + The following example creates a cluster named `my_cluster` and sets that as the cluster to use to run the notebook in `my_job`: + + ```yaml + bundle: + name: clusters + + resources: + clusters: + my_cluster: + num_workers: 2 + node_type_id: "i3.xlarge" + autoscale: + min_workers: 2 + max_workers: 7 + spark_version: "13.3.x-scala2.12" + spark_conf: + "spark.executor.memory": "2g" + + jobs: + my_job: + tasks: + - task_key: test_task + notebook_task: + notebook_path: "./src/my_notebook.py" + ``` "data_security_mode": "description": |- PLACEHOLDER @@ -18,6 +48,24 @@ github.com/databricks/cli/bundle/config/resources.Cluster: "description": |- PLACEHOLDER github.com/databricks/cli/bundle/config/resources.Dashboard: + "_": + "markdown_description": |- + The dashboard resource allows you to manage [AI/BI dashboards](/api/workspace/lakeview/create) in a bundle. For information about AI/BI dashboards, see [_](/dashboards/index.md). + "markdown_examples": |- + The following example includes and deploys the sample __NYC Taxi Trip Analysis__ dashboard to the Databricks workspace. + + ``` yaml + resources: + dashboards: + nyc_taxi_trip_analysis: + display_name: "NYC Taxi Trip Analysis" + file_path: ../src/nyc_taxi_trip_analysis.lvdash.json + warehouse_id: ${var.warehouse_id} + ``` + If you use the UI to modify the dashboard, modifications made through the UI are not applied to the dashboard JSON file in the local bundle unless you explicitly update it using `bundle generate`. You can use the `--watch` option to continuously poll and retrieve changes to the dashboard. See [_](/dev-tools/cli/bundle-commands.md#generate). + + In addition, if you attempt to deploy a bundle that contains a dashboard JSON file that is different than the one in the remote workspace, an error will occur. To force the deploy and overwrite the dashboard in the remote workspace with the local one, use the `--force` option. See [_](/dev-tools/cli/bundle-commands.md#deploy). + "embed_credentials": "description": |- PLACEHOLDER @@ -28,6 +76,24 @@ github.com/databricks/cli/bundle/config/resources.Dashboard: "description": |- PLACEHOLDER github.com/databricks/cli/bundle/config/resources.Job: + "_": + "markdown_description": |- + The job resource allows you to define [jobs and their corresponding tasks](/api/workspace/jobs/create) in your bundle. For information about jobs, see [_](/jobs/index.md). For a tutorial that uses a template to create a job, see [_](/dev-tools/bundles/jobs-tutorial.md). + "markdown_examples": |- + The following example defines a job with the resource key `hello-job` with one notebook task: + + ```yaml + resources: + jobs: + hello-job: + name: hello-job + tasks: + - task_key: hello-task + notebook_task: + notebook_path: ./hello.py + ``` + + For information about defining job tasks and overriding job settings, see [_](/dev-tools/bundles/job-task-types.md), [_](/dev-tools/bundles/job-task-override.md), and [_](/dev-tools/bundles/cluster-override.md). "health": "description": |- PLACEHOLDER @@ -38,6 +104,22 @@ github.com/databricks/cli/bundle/config/resources.Job: "description": |- PLACEHOLDER github.com/databricks/cli/bundle/config/resources.MlflowExperiment: + "_": + "markdown_description": |- + The experiment resource allows you to define [MLflow experiments](/api/workspace/experiments/createexperiment) in a bundle. For information about MLflow experiments, see [_](/mlflow/experiments.md). + "markdown_examples": |- + The following example defines an experiment that all users can view: + + ```yaml + resources: + experiments: + experiment: + name: my_ml_experiment + permissions: + - level: CAN_READ + group_name: users + description: MLflow experiment used to track runs + ``` "permissions": "description": |- PLACEHOLDER @@ -46,22 +128,159 @@ github.com/databricks/cli/bundle/config/resources.MlflowModel: "description": |- PLACEHOLDER github.com/databricks/cli/bundle/config/resources.ModelServingEndpoint: + "_": + "markdown_description": |- + The model_serving_endpoint resource allows you to define [model serving endpoints](/api/workspace/servingendpoints/create). See [_](/machine-learning/model-serving/manage-serving-endpoints.md). + "markdown_examples": |- + The following example defines a model serving endpoint: + + ```yaml + resources: + model_serving_endpoints: + uc_model_serving_endpoint: + name: "uc-model-endpoint" + config: + served_entities: + - entity_name: "myCatalog.mySchema.my-ads-model" + entity_version: "10" + workload_size: "Small" + scale_to_zero_enabled: "true" + traffic_config: + routes: + - served_model_name: "my-ads-model-10" + traffic_percentage: "100" + tags: + - key: "team" + value: "data science" + ``` "permissions": "description": |- PLACEHOLDER github.com/databricks/cli/bundle/config/resources.Pipeline: + "_": + "markdown_description": |- + The pipeline resource allows you to create [pipelines](/api/workspace/pipelines/create). For information about pipelines, see [_](/delta-live-tables/index.md). For a tutorial that uses the template to create a pipeline, see [_](/dev-tools/bundles/pipelines-tutorial.md). + "markdown_examples": |- + The following example defines a pipeline with the resource key `hello-pipeline`: + + ```yaml + resources: + pipelines: + hello-pipeline: + name: hello-pipeline + clusters: + - label: default + num_workers: 1 + development: true + continuous: false + channel: CURRENT + edition: CORE + photon: false + libraries: + - notebook: + path: ./pipeline.py + ``` "permissions": "description": |- PLACEHOLDER github.com/databricks/cli/bundle/config/resources.QualityMonitor: + "_": + "markdown_description": |- + The quality_monitor resource allows you to define a [table monitor](/api/workspace/qualitymonitors/create). For information about monitors, see [_](/machine-learning/model-serving/monitor-diagnose-endpoints.md). + "markdown_examples": |- + The following example defines a quality monitor: + + ```yaml + resources: + quality_monitors: + my_quality_monitor: + table_name: dev.mlops_schema.predictions + output_schema_name: ${bundle.target}.mlops_schema + assets_dir: /Users/${workspace.current_user.userName}/databricks_lakehouse_monitoring + inference_log: + granularities: [1 day] + model_id_col: model_id + prediction_col: prediction + label_col: price + problem_type: PROBLEM_TYPE_REGRESSION + timestamp_col: timestamp + schedule: + quartz_cron_expression: 0 0 8 * * ? # Run Every day at 8am + timezone_id: UTC + ``` "table_name": "description": |- PLACEHOLDER github.com/databricks/cli/bundle/config/resources.RegisteredModel: + "_": + "markdown_description": |- + The registered model resource allows you to define models in . For information about [registered models](/api/workspace/registeredmodels/create), see [_](/machine-learning/manage-model-lifecycle/index.md). + "markdown_examples": |- + The following example defines a registered model in : + + ```yaml + resources: + registered_models: + model: + name: my_model + catalog_name: ${bundle.target} + schema_name: mlops_schema + comment: Registered model in Unity Catalog for ${bundle.target} deployment target + grants: + - privileges: + - EXECUTE + principal: account users + ``` "grants": "description": |- PLACEHOLDER github.com/databricks/cli/bundle/config/resources.Schema: + "_": + "markdown_description": |- + The schema resource type allows you to define [schemas](/api/workspace/schemas/create) for tables and other assets in your workflows and pipelines created as part of a bundle. A schema, different from other resource types, has the following limitations: + + - The owner of a schema resource is always the deployment user, and cannot be changed. If `run_as` is specified in the bundle, it will be ignored by operations on the schema. + - Only fields supported by the corresponding [Schemas object create API](/api/workspace/schemas/create) are available for the schema resource. For example, `enable_predictive_optimization` is not supported as it is only available on the [update API](/api/workspace/schemas/update). + "markdown_examples": |- + The following example defines a pipeline with the resource key `my_pipeline` that creates a schema with the key `my_schema` as the target: + + ```yaml + resources: + pipelines: + my_pipeline: + name: test-pipeline-{{.unique_id}} + libraries: + - notebook: + path: ./nb.sql + development: true + catalog: main + target: ${resources.schemas.my_schema.id} + + schemas: + my_schema: + name: test-schema-{{.unique_id}} + catalog_name: main + comment: This schema was created by DABs. + ``` + + A top-level grants mapping is not supported by , so if you want to set grants for a schema, define the grants for the schema within the `schemas` mapping. For more information about grants, see [_](/data-governance/unity-catalog/manage-privileges/index.md#grant). + + The following example defines a schema with grants: + + ```yaml + resources: + schemas: + my_schema: + name: test-schema + grants: + - principal: users + privileges: + - CAN_MANAGE + - principal: my_team + privileges: + - CAN_READ + catalog_name: main + ``` "grants": "description": |- PLACEHOLDER @@ -71,14 +290,14 @@ github.com/databricks/cli/bundle/config/resources.Schema: github.com/databricks/cli/bundle/config/resources.Volume: "_": "markdown_description": |- - The volume resource type allows you to define and create Unity Catalog [volumes](https://docs.databricks.com/api/workspace/volumes/create) as part of a bundle. When deploying a bundle with a volume defined, note that: + The volume resource type allows you to define and create [volumes](/api/workspace/volumes/create) as part of a bundle. When deploying a bundle with a volume defined, note that: - * A volume cannot be referenced in the `artifact_path` for the bundle until it exists in the workspace. Hence, if you want to use Databricks Asset Bundles to create the volume, you must first define the volume in the bundle, deploy it to create the volume, then reference it in the `artifact_path`` in subsequent deployments. + - A volume cannot be referenced in the `artifact_path` for the bundle until it exists in the workspace. Hence, if you want to use to create the volume, you must first define the volume in the bundle, deploy it to create the volume, then reference it in the `artifact_path` in subsequent deployments. - * Volumes in the bundle are not prepended with the `dev_${workspace.current_user.short_name}` prefix when the deployment target has `mode: development`` configured. However, you can manually configure this prefix. See [_](/dev-tools/bundles/deployment-modes.md#custom-presets) + - Volumes in the bundle are not prepended with the `dev_${workspace.current_user.short_name}` prefix when the deployment target has `mode: development` configured. However, you can manually configure this prefix. See [_](/dev-tools/bundles/deployment-modes.md#custom-presets). "markdown_examples": |- - The following example creates a Unity Catalog volume with the key `my_volume``: + The following example creates a volume with the key `my_volume`: ```yaml resources: @@ -88,6 +307,8 @@ github.com/databricks/cli/bundle/config/resources.Volume: name: my_volume schema_name: my_schema ``` + + For an example bundle that runs a job that writes to a file in volume, see the [bundle-examples GitHub repository](https://github.com/databricks/bundle-examples/tree/main/knowledge_base/write_from_job_to_volume). "grants": "description": |- PLACEHOLDER diff --git a/bundle/internal/schema/annotations_test.go b/bundle/internal/schema/annotations_test.go index d7e2fea7c..782d2d634 100644 --- a/bundle/internal/schema/annotations_test.go +++ b/bundle/internal/schema/annotations_test.go @@ -9,29 +9,37 @@ func TestConvertLinksToAbsoluteUrl(t *testing.T) { input string expected string }{ + // { + // input: "", + // expected: "", + // }, + // { + // input: "Some text (not a link)", + // expected: "Some text (not a link)", + // }, + // { + // input: "This is a link to [_](#section)", + // expected: "This is a link to [section](https://docs.databricks.com/dev-tools/bundles/reference.html#section)", + // }, + // { + // input: "This is a link to [_](/dev-tools/bundles/resources.html#dashboard)", + // expected: "This is a link to [dashboard](https://docs.databricks.com/dev-tools/bundles/resources.html#dashboard)", + // }, + // { + // input: "This is a link to [_](/dev-tools/bundles/resources.html)", + // expected: "This is a link to [link](https://docs.databricks.com/dev-tools/bundles/resources.html)", + // }, + // { + // input: "This is a link to [external](https://external.com)", + // expected: "This is a link to [external](https://external.com)", + // }, + // { + // input: "This is a link to [pipelines](/api/workspace/pipelines/create)", + // expected: "This is a link to [pipelines](https://docs.databricks.com/api/workspace/pipelines/create)", + // }, { - input: "", - expected: "", - }, - { - input: "Some text (not a link)", - expected: "Some text (not a link)", - }, - { - input: "This is a link to [_](#section)", - expected: "This is a link to [section](https://docs.databricks.com/dev-tools/bundles/reference.html#section)", - }, - { - input: "This is a link to [_](/dev-tools/bundles/resources.html#dashboard)", - expected: "This is a link to [dashboard](https://docs.databricks.com/dev-tools/bundles/resources.html#dashboard)", - }, - { - input: "This is a link to [_](/dev-tools/bundles/resources.html)", - expected: "This is a link to [link](https://docs.databricks.com/dev-tools/bundles/resources.html)", - }, - { - input: "This is a link to [external](https://external.com)", - expected: "This is a link to [external](https://external.com)", + input: "The registered model resource allows you to define models in \u003cUC\u003e. For information about \u003cUC\u003e [registered models](/api/workspace/registeredmodels/create), [registered models 2](/api/workspace/registeredmodels/create)", + expected: "The registered model resource allows you to define models in \u003cUC\u003e. For information about \u003cUC\u003e [registered models](/api/workspace/registeredmodels/create), [registered models 2](/api/workspace/registeredmodels/create)", }, } diff --git a/bundle/schema/jsonschema.json b/bundle/schema/jsonschema.json index 5f172ee55..4cc8d0d46 100644 --- a/bundle/schema/jsonschema.json +++ b/bundle/schema/jsonschema.json @@ -183,7 +183,8 @@ "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/compute.WorkloadType" } }, - "additionalProperties": false + "additionalProperties": false, + "markdownDescription": "The cluster resource defines an [all-purpose cluster](https://docs.databricks.com/api/workspace/clusters/create)." }, { "type": "string", @@ -246,7 +247,8 @@ "$ref": "#/$defs/string" } }, - "additionalProperties": false + "additionalProperties": false, + "markdownDescription": "The dashboard resource allows you to manage [AI/BI dashboards](/api/workspace/lakeview/create) in a bundle. For information about AI/BI dashboards, see [_](https://docs.databricks.com/dashboards/index.html)." }, { "type": "string", @@ -367,7 +369,8 @@ "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/jobs.WebhookNotifications" } }, - "additionalProperties": false + "additionalProperties": false, + "markdownDescription": "The job resource allows you to define [jobs and their corresponding tasks](/api/workspace/jobs/create) in your bundle. For information about jobs, see [_](/jobs/index.md). For a tutorial that uses a \u003cDABS\u003e template to create a job, see [_](https://docs.databricks.com/dev-tools/bundles/jobs-tutorial.html)." }, { "type": "string", @@ -412,7 +415,8 @@ "$ref": "#/$defs/slice/github.com/databricks/databricks-sdk-go/service/ml.ExperimentTag" } }, - "additionalProperties": false + "additionalProperties": false, + "markdownDescription": "The experiment resource allows you to define [MLflow experiments](/api/workspace/experiments/createexperiment) in a bundle. For information about MLflow experiments, see [_](https://docs.databricks.com/mlflow/experiments.html)." }, { "type": "string", @@ -502,7 +506,8 @@ "required": [ "config", "name" - ] + ], + "markdownDescription": "The model_serving_endpoint resource allows you to define [model serving endpoints](/api/workspace/servingendpoints/create). See [_](https://docs.databricks.com/machine-learning/model-serving/manage-serving-endpoints.html)." }, { "type": "string", @@ -644,7 +649,8 @@ "$ref": "#/$defs/github.com/databricks/databricks-sdk-go/service/pipelines.PipelineTrigger" } }, - "additionalProperties": false + "additionalProperties": false, + "markdownDescription": "The pipeline resource allows you to create \u003cDLT\u003e [pipelines](/api/workspace/pipelines/create). For information about pipelines, see [_](/delta-live-tables/index.md). For a tutorial that uses the \u003cDABS\u003e template to create a pipeline, see [_](https://docs.databricks.com/dev-tools/bundles/pipelines-tutorial.html)." }, { "type": "string", @@ -718,7 +724,8 @@ "table_name", "assets_dir", "output_schema_name" - ] + ], + "markdownDescription": "The quality_monitor resource allows you to define a \u003cUC\u003e [table monitor](/api/workspace/qualitymonitors/create). For information about monitors, see [_](https://docs.databricks.com/machine-learning/model-serving/monitor-diagnose-endpoints.html)." }, { "type": "string", @@ -760,7 +767,8 @@ "catalog_name", "name", "schema_name" - ] + ], + "markdownDescription": "The registered model resource allows you to define models in \u003cUC\u003e. For information about \u003cUC\u003e [registered models](/api/workspace/registeredmodels/create), see [_](https://docs.databricks.com/machine-learning/manage-model-lifecycle/index.html)." }, { "type": "string", @@ -800,7 +808,8 @@ "required": [ "catalog_name", "name" - ] + ], + "markdownDescription": "The schema resource type allows you to define \u003cUC\u003e [schemas](https://docs.databricks.com/api/workspace/schemas/create) for tables and other assets in your workflows and pipelines created as part of a bundle. A schema, different from other resource types, has the following limitations:\n\n- The owner of a schema resource is always the deployment user, and cannot be changed. If `run_as` is specified in the bundle, it will be ignored by operations on the schema.\n- Only fields supported by the corresponding [Schemas object create API](/api/workspace/schemas/create) are available for the schema resource. For example, `enable_predictive_optimization` is not supported as it is only available on the [update API](https://docs.databricks.com/api/workspace/schemas/update)." }, { "type": "string", @@ -846,7 +855,7 @@ "name", "schema_name" ], - "markdownDescription": "The volume resource type allows you to define and create Unity Catalog [volumes](https://docs.databricks.com/api/workspace/volumes/create) as part of a bundle. When deploying a bundle with a volume defined, note that:\n\n* A volume cannot be referenced in the `artifact_path` for the bundle until it exists in the workspace. Hence, if you want to use Databricks Asset Bundles to create the volume, you must first define the volume in the bundle, deploy it to create the volume, then reference it in the `artifact_path`` in subsequent deployments.\n\n* Volumes in the bundle are not prepended with the `dev_${workspace.current_user.short_name}` prefix when the deployment target has `mode: development`` configured. However, you can manually configure this prefix. See [custom-presets](https://docs.databricks.com/dev-tools/bundles/deployment-modes.html#custom-presets)" + "markdownDescription": "The volume resource type allows you to define and create \u003cUC\u003e [volumes](https://docs.databricks.com/api/workspace/volumes/create) as part of a bundle. When deploying a bundle with a volume defined, note that:\n\n- A volume cannot be referenced in the `artifact_path` for the bundle until it exists in the workspace. Hence, if you want to use \u003cDABS\u003e to create the volume, you must first define the volume in the bundle, deploy it to create the volume, then reference it in the `artifact_path` in subsequent deployments.\n\n- Volumes in the bundle are not prepended with the `dev_${workspace.current_user.short_name}` prefix when the deployment target has `mode: development` configured. However, you can manually configure this prefix. See [custom-presets](https://docs.databricks.com/dev-tools/bundles/deployment-modes.html#custom-presets)." }, { "type": "string", @@ -1039,12 +1048,12 @@ "deployment": { "description": "The definition of the bundle deployment", "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Deployment", - "markdownDescription": "The definition of the bundle deployment. For supported attributes, see [deployment](https://docs.databricks.com/dev-tools/bundles/reference.html#deployment) and [link](https://docs.databricks.com/dev-tools/bundles/deployment-modes.html)." + "markdownDescription": "The definition of the bundle deployment. For supported attributes, see [_](#deployment) and [_](https://docs.databricks.com/dev-tools/bundles/deployment-modes.html)." }, "git": { "description": "The Git version control details that are associated with your bundle.", "$ref": "#/$defs/github.com/databricks/cli/bundle/config.Git", - "markdownDescription": "The Git version control details that are associated with your bundle. For supported attributes, see [git](https://docs.databricks.com/dev-tools/bundles/reference.html#git) and [git](https://docs.databricks.com/dev-tools/bundles/settings.html#git)." + "markdownDescription": "The Git version control details that are associated with your bundle. For supported attributes, see [_](#git) and [_](https://docs.databricks.com/dev-tools/bundles/settings.html#git)." }, "name": { "description": "The name of the bundle.", @@ -6424,7 +6433,7 @@ "permissions": { "description": "Defines a permission for a specific entity.", "$ref": "#/$defs/slice/github.com/databricks/cli/bundle/config/resources.Permission", - "markdownDescription": "A Sequence that defines the permissions to apply to experiments, jobs, pipelines, and models defined in the bundle, where each item in the sequence is a permission for a specific entity.\n\nSee [permissions](https://docs.databricks.com/dev-tools/bundles/settings.html#permissions) and [link](https://docs.databricks.com/dev-tools/bundles/permissions.html)." + "markdownDescription": "A Sequence that defines the permissions to apply to experiments, jobs, pipelines, and models defined in the bundle, where each item in the sequence is a permission for a specific entity.\n\nSee [_](/dev-tools/bundles/settings.md#permissions) and [_](https://docs.databricks.com/dev-tools/bundles/permissions.html)." }, "presets": { "description": "Defines bundle deployment presets.",