Custom annotations for bundle-specific JSON schema fields (#1957)

## Changes

Adds annotations to json-schema for fields which are not covered by
OpenAPI spec.

Custom descriptions were copy-pasted from documentation PR which is
still WIP so descriptions for some fields are missing

Further improvements:
* documentation autogen based on json-schema
* fix missing descriptions

## Tests

This script is not part of CLI package so I didn't test all corner
cases. Few high-level tests were added to be sure that schema
annotations is in sync with actual config

---------

Co-authored-by: Pieter Noordhuis <pieter.noordhuis@databricks.com>
This commit is contained in:
Ilya Kuznetsov 2024-12-18 11:19:14 +01:00 committed by GitHub
parent 5b84856b17
commit 042c8d88c6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 4143 additions and 273 deletions

View File

@ -11,7 +11,7 @@
"required": ["go"],
"post_generate": [
"go test -timeout 240s -run TestConsistentDatabricksSdkVersion github.com/databricks/cli/internal/build",
"go run ./bundle/internal/schema/*.go ./bundle/schema/jsonschema.json",
"make schema",
"echo 'bundle/internal/tf/schema/\\*.go linguist-generated=true' >> ./.gitattributes",
"echo 'go.sum linguist-generated=true' >> ./.gitattributes",
"echo 'bundle/schema/jsonschema.json linguist-generated=true' >> ./.gitattributes"

View File

@ -99,14 +99,19 @@ jobs:
# By default the ajv-cli runs in strict mode which will fail if the schema
# itself is not valid. Strict mode is more strict than the JSON schema
# specification. See for details: https://ajv.js.org/options.html#strict-mode-options
# The ajv-cli is configured to use the markdownDescription keyword which is not part of the JSON schema specification,
# but is used in editors like VSCode to render markdown in the description field
- name: Validate bundle schema
run: |
go run main.go bundle schema > schema.json
# Add markdownDescription keyword to ajv
echo "module.exports=function(a){a.addKeyword('markdownDescription')}" >> keywords.js
for file in ./bundle/internal/schema/testdata/pass/*.yml; do
ajv test -s schema.json -d $file --valid
ajv test -s schema.json -d $file --valid -c=./keywords.js
done
for file in ./bundle/internal/schema/testdata/fail/*.yml; do
ajv test -s schema.json -d $file --invalid
ajv test -s schema.json -d $file --invalid -c=./keywords.js
done

View File

@ -29,6 +29,10 @@ snapshot:
vendor:
@echo "✓ Filling vendor folder with library code ..."
@go mod vendor
schema:
@echo "✓ Generating json-schema ..."
@go run ./bundle/internal/schema ./bundle/internal/schema ./bundle/schema/jsonschema.json
INTEGRATION = gotestsum --format github-actions --rerun-fails --jsonfile output.json --packages "./integration/..." -- -parallel 4 -timeout=2h
@ -38,4 +42,4 @@ integration:
integration-short:
$(INTEGRATION) -short
.PHONY: lint lintcheck test testonly coverage build snapshot vendor integration integration-short
.PHONY: lint lintcheck test testonly coverage build snapshot vendor schema integration integration-short

View File

@ -0,0 +1,209 @@
package main
import (
"bytes"
"fmt"
"os"
"reflect"
"regexp"
"strings"
yaml3 "gopkg.in/yaml.v3"
"github.com/databricks/cli/libs/dyn"
"github.com/databricks/cli/libs/dyn/convert"
"github.com/databricks/cli/libs/dyn/merge"
"github.com/databricks/cli/libs/dyn/yamlloader"
"github.com/databricks/cli/libs/dyn/yamlsaver"
"github.com/databricks/cli/libs/jsonschema"
)
type annotation struct {
Description string `json:"description,omitempty"`
MarkdownDescription string `json:"markdown_description,omitempty"`
Title string `json:"title,omitempty"`
Default any `json:"default,omitempty"`
Enum []any `json:"enum,omitempty"`
}
type annotationHandler struct {
// Annotations read from all annotation files including all overrides
parsedAnnotations annotationFile
// Missing annotations for fields that are found in config that need to be added to the annotation file
missingAnnotations annotationFile
}
/**
* Parsed file with annotations, expected format:
* github.com/databricks/cli/bundle/config.Bundle:
* cluster_id:
* description: "Description"
*/
type annotationFile map[string]map[string]annotation
const Placeholder = "PLACEHOLDER"
// Adds annotations to the JSON schema reading from the annotation files.
// More details https://json-schema.org/understanding-json-schema/reference/annotations
func newAnnotationHandler(sources []string) (*annotationHandler, error) {
prev := dyn.NilValue
for _, path := range sources {
b, err := os.ReadFile(path)
if err != nil {
return nil, err
}
generated, err := yamlloader.LoadYAML(path, bytes.NewBuffer(b))
if err != nil {
return nil, err
}
prev, err = merge.Merge(prev, generated)
if err != nil {
return nil, err
}
}
var data annotationFile
err := convert.ToTyped(&data, prev)
if err != nil {
return nil, err
}
d := &annotationHandler{}
d.parsedAnnotations = data
d.missingAnnotations = annotationFile{}
return d, nil
}
func (d *annotationHandler) addAnnotations(typ reflect.Type, s jsonschema.Schema) jsonschema.Schema {
refPath := getPath(typ)
shouldHandle := strings.HasPrefix(refPath, "github.com")
if !shouldHandle {
return s
}
annotations := d.parsedAnnotations[refPath]
if annotations == nil {
annotations = map[string]annotation{}
}
rootTypeAnnotation, ok := annotations[RootTypeKey]
if ok {
assignAnnotation(&s, rootTypeAnnotation)
}
for k, v := range s.Properties {
item := annotations[k]
if item.Description == "" {
item.Description = Placeholder
emptyAnnotations := d.missingAnnotations[refPath]
if emptyAnnotations == nil {
emptyAnnotations = map[string]annotation{}
d.missingAnnotations[refPath] = emptyAnnotations
}
emptyAnnotations[k] = item
}
assignAnnotation(v, item)
}
return s
}
// Writes missing annotations with placeholder values back to the annotation file
func (d *annotationHandler) syncWithMissingAnnotations(outputPath string) error {
existingFile, err := os.ReadFile(outputPath)
if err != nil {
return err
}
existing, err := yamlloader.LoadYAML("", bytes.NewBuffer(existingFile))
if err != nil {
return err
}
missingAnnotations, err := convert.FromTyped(&d.missingAnnotations, dyn.NilValue)
if err != nil {
return err
}
output, err := merge.Merge(existing, missingAnnotations)
if err != nil {
return err
}
err = saveYamlWithStyle(outputPath, output)
if err != nil {
return err
}
return nil
}
func getPath(typ reflect.Type) string {
return typ.PkgPath() + "." + typ.Name()
}
func assignAnnotation(s *jsonschema.Schema, a annotation) {
if a.Description != Placeholder {
s.Description = a.Description
}
if a.Default != nil {
s.Default = a.Default
}
s.MarkdownDescription = convertLinksToAbsoluteUrl(a.MarkdownDescription)
s.Title = a.Title
s.Enum = a.Enum
}
func saveYamlWithStyle(outputPath string, input dyn.Value) error {
style := map[string]yaml3.Style{}
file, _ := input.AsMap()
for _, v := range file.Keys() {
style[v.MustString()] = yaml3.LiteralStyle
}
saver := yamlsaver.NewSaverWithStyle(style)
err := saver.SaveAsYAML(file, outputPath, true)
if err != nil {
return err
}
return nil
}
func convertLinksToAbsoluteUrl(s string) string {
if s == "" {
return s
}
base := "https://docs.databricks.com"
referencePage := "/dev-tools/bundles/reference.html"
// Regular expression to match Markdown-style links like [_](link)
re := regexp.MustCompile(`\[_\]\(([^)]+)\)`)
result := re.ReplaceAllStringFunc(s, func(match string) string {
matches := re.FindStringSubmatch(match)
if len(matches) < 2 {
return match
}
link := matches[1]
var text, absoluteURL string
if strings.HasPrefix(link, "#") {
text = strings.TrimPrefix(link, "#")
absoluteURL = fmt.Sprintf("%s%s%s", base, referencePage, link)
// Handle relative paths like /dev-tools/bundles/resources.html#dashboard
} else if strings.HasPrefix(link, "/") {
absoluteURL = strings.ReplaceAll(fmt.Sprintf("%s%s", base, link), ".md", ".html")
if strings.Contains(link, "#") {
parts := strings.Split(link, "#")
text = parts[1]
} else {
text = "link"
}
} else {
return match
}
return fmt.Sprintf("[%s](%s)", text, absoluteURL)
})
return result
}

View File

@ -0,0 +1,501 @@
github.com/databricks/cli/bundle/config.Artifact:
"build":
"description": |-
An optional set of non-default build commands that you want to run locally before deployment.
For Python wheel builds, the Databricks CLI assumes that it can find a local install of the Python wheel package to run builds, and it runs the command python setup.py bdist_wheel by default during each bundle deployment.
To specify multiple build commands, separate each command with double-ampersand (&&) characters.
"executable":
"description": |-
The executable type.
"files":
"description": |-
The source files for the artifact.
"markdown_description": |-
The source files for the artifact, defined as an [_](#artifact_file).
"path":
"description": |-
The location where the built artifact will be saved.
"type":
"description": |-
The type of the artifact.
"markdown_description": |-
The type of the artifact. Valid values are `wheel` or `jar`
github.com/databricks/cli/bundle/config.ArtifactFile:
"source":
"description": |-
The path of the files used to build the artifact.
github.com/databricks/cli/bundle/config.Bundle:
"cluster_id":
"description": |-
The ID of a cluster to use to run the bundle.
"markdown_description": |-
The ID of a cluster to use to run the bundle. See [_](/dev-tools/bundles/settings.md#cluster_id).
"compute_id":
"description": |-
PLACEHOLDER
"databricks_cli_version":
"description": |-
The Databricks CLI version to use for the bundle.
"markdown_description": |-
The Databricks CLI version to use for the bundle. See [_](/dev-tools/bundles/settings.md#databricks_cli_version).
"deployment":
"description": |-
The definition of the bundle deployment
"markdown_description": |-
The definition of the bundle deployment. For supported attributes, see [_](#deployment) and [_](/dev-tools/bundles/deployment-modes.md).
"git":
"description": |-
The Git version control details that are associated with your bundle.
"markdown_description": |-
The Git version control details that are associated with your bundle. For supported attributes, see [_](#git) and [_](/dev-tools/bundles/settings.md#git).
"name":
"description": |-
The name of the bundle.
"uuid":
"description": |-
PLACEHOLDER
github.com/databricks/cli/bundle/config.Deployment:
"fail_on_active_runs":
"description": |-
Whether to fail on active runs. If this is set to true a deployment that is running can be interrupted.
"lock":
"description": |-
The deployment lock attributes.
"markdown_description": |-
The deployment lock attributes. See [_](#lock).
github.com/databricks/cli/bundle/config.Experimental:
"pydabs":
"description": |-
The PyDABs configuration.
"python_wheel_wrapper":
"description": |-
Whether to use a Python wheel wrapper
"scripts":
"description": |-
The commands to run
"use_legacy_run_as":
"description": |-
Whether to use the legacy run_as behavior
github.com/databricks/cli/bundle/config.Git:
"branch":
"description": |-
The Git branch name.
"markdown_description": |-
The Git branch name. See [_](/dev-tools/bundles/settings.md#git).
"origin_url":
"description": |-
The origin URL of the repository.
"markdown_description": |-
The origin URL of the repository. See [_](/dev-tools/bundles/settings.md#git).
github.com/databricks/cli/bundle/config.Lock:
"enabled":
"description": |-
Whether this lock is enabled.
"force":
"description": |-
Whether to force this lock if it is enabled.
github.com/databricks/cli/bundle/config.Presets:
"jobs_max_concurrent_runs":
"description": |-
The maximum concurrent runs for a job.
"name_prefix":
"description": |-
The prefix for job runs of the bundle.
"pipelines_development":
"description": |-
Whether pipeline deployments should be locked in development mode.
"source_linked_deployment":
"description": |-
Whether to link the deployment to the bundle source.
"tags":
"description": |-
The tags for the bundle deployment.
"trigger_pause_status":
"description": |-
A pause status to apply to all job triggers and schedules. Valid values are PAUSED or UNPAUSED.
github.com/databricks/cli/bundle/config.PyDABs:
"enabled":
"description": |-
Whether or not PyDABs (Private Preview) is enabled
"import":
"description": |-
The PyDABs project to import to discover resources, resource generator and mutators
"venv_path":
"description": |-
The Python virtual environment path
github.com/databricks/cli/bundle/config.Resources:
"clusters":
"description": |-
The cluster definitions for the bundle.
"markdown_description": |-
The cluster definitions for the bundle. See [_](/dev-tools/bundles/resources.md#cluster)
"dashboards":
"description": |-
The dashboard definitions for the bundle.
"markdown_description": |-
The dashboard definitions for the bundle. See [_](/dev-tools/bundles/resources.md#dashboard)
"experiments":
"description": |-
The experiment definitions for the bundle.
"markdown_description": |-
The experiment definitions for the bundle. See [_](/dev-tools/bundles/resources.md#experiment)
"jobs":
"description": |-
The job definitions for the bundle.
"markdown_description": |-
The job definitions for the bundle. See [_](/dev-tools/bundles/resources.md#job)
"model_serving_endpoints":
"description": |-
The model serving endpoint definitions for the bundle.
"markdown_description": |-
The model serving endpoint definitions for the bundle. See [_](/dev-tools/bundles/resources.md#model_serving_endpoint)
"models":
"description": |-
The model definitions for the bundle.
"markdown_description": |-
The model definitions for the bundle. See [_](/dev-tools/bundles/resources.md#model)
"pipelines":
"description": |-
The pipeline definitions for the bundle.
"markdown_description": |-
The pipeline definitions for the bundle. See [_](/dev-tools/bundles/resources.md#pipeline)
"quality_monitors":
"description": |-
The quality monitor definitions for the bundle.
"markdown_description": |-
The quality monitor definitions for the bundle. See [_](/dev-tools/bundles/resources.md#quality_monitor)
"registered_models":
"description": |-
The registered model definitions for the bundle.
"markdown_description": |-
The registered model definitions for the bundle. See [_](/dev-tools/bundles/resources.md#registered_model)
"schemas":
"description": |-
The schema definitions for the bundle.
"markdown_description": |-
The schema definitions for the bundle. See [_](/dev-tools/bundles/resources.md#schema)
"volumes":
"description": |-
PLACEHOLDER
github.com/databricks/cli/bundle/config.Root:
"artifacts":
"description": |-
Defines the attributes to build an artifact
"bundle":
"description": |-
The attributes of the bundle.
"markdown_description": |-
The attributes of the bundle. See [_](/dev-tools/bundles/settings.md#bundle)
"experimental":
"description": |-
Defines attributes for experimental features.
"include":
"description": |-
Specifies a list of path globs that contain configuration files to include within the bundle.
"markdown_description": |-
Specifies a list of path globs that contain configuration files to include within the bundle. See [_](/dev-tools/bundles/settings.md#include)
"permissions":
"description": |-
Defines the permissions to apply to experiments, jobs, pipelines, and models defined in the bundle
"markdown_description": |-
Defines the permissions to apply to experiments, jobs, pipelines, and models defined in the bundle. See [_](/dev-tools/bundles/settings.md#permissions) and [_](/dev-tools/bundles/permissions.md).
"presets":
"description": |-
Defines bundle deployment presets.
"markdown_description": |-
Defines bundle deployment presets. See [_](/dev-tools/bundles/deployment-modes.md#presets).
"resources":
"description": |-
Specifies information about the Databricks resources used by the bundle
"markdown_description": |-
Specifies information about the Databricks resources used by the bundle. See [_](/dev-tools/bundles/resources.md).
"run_as":
"description": |-
The identity to use to run the bundle.
"sync":
"description": |-
The files and file paths to include or exclude in the bundle.
"markdown_description": |-
The files and file paths to include or exclude in the bundle. See [_](/dev-tools/bundles/)
"targets":
"description": |-
Defines deployment targets for the bundle.
"variables":
"description": |-
A Map that defines the custom variables for the bundle, where each key is the name of the variable, and the value is a Map that defines the variable.
"workspace":
"description": |-
Defines the Databricks workspace for the bundle.
github.com/databricks/cli/bundle/config.Sync:
"exclude":
"description": |-
A list of files or folders to exclude from the bundle.
"include":
"description": |-
A list of files or folders to include in the bundle.
"paths":
"description": |-
The local folder paths, which can be outside the bundle root, to synchronize to the workspace when the bundle is deployed.
github.com/databricks/cli/bundle/config.Target:
"artifacts":
"description": |-
The artifacts to include in the target deployment.
"markdown_description": |-
The artifacts to include in the target deployment. See [_](#artifact)
"bundle":
"description": |-
The name of the bundle when deploying to this target.
"cluster_id":
"description": |-
The ID of the cluster to use for this target.
"compute_id":
"description": |-
Deprecated. The ID of the compute to use for this target.
"default":
"description": |-
Whether this target is the default target.
"git":
"description": |-
The Git version control settings for the target.
"markdown_description": |-
The Git version control settings for the target. See [_](#git).
"mode":
"description": |-
The deployment mode for the target.
"markdown_description": |-
The deployment mode for the target. Valid values are `development` or `production`. See [_](/dev-tools/bundles/deployment-modes.md).
"permissions":
"description": |-
The permissions for deploying and running the bundle in the target.
"markdown_description": |-
The permissions for deploying and running the bundle in the target. See [_](#permission).
"presets":
"description": |-
The deployment presets for the target.
"markdown_description": |-
The deployment presets for the target. See [_](#preset).
"resources":
"description": |-
The resource definitions for the target.
"markdown_description": |-
The resource definitions for the target. See [_](#resources).
"run_as":
"description": |-
The identity to use to run the bundle.
"markdown_description": |-
The identity to use to run the bundle. See [_](#job_run_as) and [_](/dev-tools/bundles/run_as.md).
"sync":
"description": |-
The local paths to sync to the target workspace when a bundle is run or deployed.
"markdown_description": |-
The local paths to sync to the target workspace when a bundle is run or deployed. See [_](#sync).
"variables":
"description": |-
The custom variable definitions for the target.
"markdown_description": |-
The custom variable definitions for the target. See [_](/dev-tools/bundles/settings.md#variables) and [_](/dev-tools/bundles/variables.md).
"workspace":
"description": |-
The Databricks workspace for the target.
"markdown_description": |-
The Databricks workspace for the target. [_](#workspace)
github.com/databricks/cli/bundle/config.Workspace:
"artifact_path":
"description": |-
The artifact path to use within the workspace for both deployments and workflow runs
"auth_type":
"description": |-
The authentication type.
"azure_client_id":
"description": |-
The Azure client ID
"azure_environment":
"description": |-
The Azure environment
"azure_login_app_id":
"description": |-
The Azure login app ID
"azure_tenant_id":
"description": |-
The Azure tenant ID
"azure_use_msi":
"description": |-
Whether to use MSI for Azure
"azure_workspace_resource_id":
"description": |-
The Azure workspace resource ID
"client_id":
"description": |-
The client ID for the workspace
"file_path":
"description": |-
The file path to use within the workspace for both deployments and workflow runs
"google_service_account":
"description": |-
The Google service account name
"host":
"description": |-
The Databricks workspace host URL
"profile":
"description": |-
The Databricks workspace profile name
"resource_path":
"description": |-
The workspace resource path
"root_path":
"description": |-
The Databricks workspace root path
"state_path":
"description": |-
The workspace state path
github.com/databricks/cli/bundle/config/resources.Grant:
"principal":
"description": |-
The name of the principal that will be granted privileges
"privileges":
"description": |-
The privileges to grant to the specified entity
github.com/databricks/cli/bundle/config/resources.Permission:
"group_name":
"description": |-
The name of the group that has the permission set in level.
"level":
"description": |-
The allowed permission for user, group, service principal defined for this permission.
"service_principal_name":
"description": |-
The name of the service principal that has the permission set in level.
"user_name":
"description": |-
The name of the user that has the permission set in level.
github.com/databricks/cli/bundle/config/variable.Lookup:
"alert":
"description": |-
PLACEHOLDER
"cluster":
"description": |-
PLACEHOLDER
"cluster_policy":
"description": |-
PLACEHOLDER
"dashboard":
"description": |-
PLACEHOLDER
"instance_pool":
"description": |-
PLACEHOLDER
"job":
"description": |-
PLACEHOLDER
"metastore":
"description": |-
PLACEHOLDER
"notification_destination":
"description": |-
PLACEHOLDER
"pipeline":
"description": |-
PLACEHOLDER
"query":
"description": |-
PLACEHOLDER
"service_principal":
"description": |-
PLACEHOLDER
"warehouse":
"description": |-
PLACEHOLDER
github.com/databricks/cli/bundle/config/variable.TargetVariable:
"default":
"description": |-
PLACEHOLDER
"description":
"description": |-
The description of the variable.
"lookup":
"description": |-
The name of the alert, cluster_policy, cluster, dashboard, instance_pool, job, metastore, pipeline, query, service_principal, or warehouse object for which to retrieve an ID.
"type":
"description": |-
The type of the variable.
"markdown_description":
"description": |-
The type of the variable.
github.com/databricks/cli/bundle/config/variable.Variable:
"default":
"description": |-
PLACEHOLDER
"description":
"description": |-
The description of the variable
"lookup":
"description": |-
The name of the alert, cluster_policy, cluster, dashboard, instance_pool, job, metastore, pipeline, query, service_principal, or warehouse object for which to retrieve an ID.
"markdown_description": |-
The name of the `alert`, `cluster_policy`, `cluster`, `dashboard`, `instance_pool`, `job`, `metastore`, `pipeline`, `query`, `service_principal`, or `warehouse` object for which to retrieve an ID."
"type":
"description": |-
The type of the variable.
github.com/databricks/databricks-sdk-go/service/serving.Ai21LabsConfig:
"ai21labs_api_key":
"description": |-
PLACEHOLDER
"ai21labs_api_key_plaintext":
"description": |-
PLACEHOLDER
github.com/databricks/databricks-sdk-go/service/serving.GoogleCloudVertexAiConfig:
"private_key":
"description": |-
PLACEHOLDER
"private_key_plaintext":
"description": |-
PLACEHOLDER
"project_id":
"description": |-
PLACEHOLDER
"region":
"description": |-
PLACEHOLDER
github.com/databricks/databricks-sdk-go/service/serving.OpenAiConfig:
"microsoft_entra_client_id":
"description": |-
PLACEHOLDER
"microsoft_entra_client_secret":
"description": |-
PLACEHOLDER
"microsoft_entra_client_secret_plaintext":
"description": |-
PLACEHOLDER
"microsoft_entra_tenant_id":
"description": |-
PLACEHOLDER
"openai_api_base":
"description": |-
PLACEHOLDER
"openai_api_key":
"description": |-
PLACEHOLDER
"openai_api_key_plaintext":
"description": |-
PLACEHOLDER
"openai_api_type":
"description": |-
PLACEHOLDER
"openai_api_version":
"description": |-
PLACEHOLDER
"openai_deployment_name":
"description": |-
PLACEHOLDER
"openai_organization":
"description": |-
PLACEHOLDER
github.com/databricks/databricks-sdk-go/service/serving.PaLmConfig:
"palm_api_key":
"description": |-
PLACEHOLDER
"palm_api_key_plaintext":
"description": |-
PLACEHOLDER

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,155 @@
github.com/databricks/cli/bundle/config/resources.Cluster:
"data_security_mode":
"description": |-
PLACEHOLDER
"docker_image":
"description": |-
PLACEHOLDER
"permissions":
"description": |-
PLACEHOLDER
"runtime_engine":
"description": |-
PLACEHOLDER
"workload_type":
"description": |-
PLACEHOLDER
github.com/databricks/cli/bundle/config/resources.Dashboard:
"embed_credentials":
"description": |-
PLACEHOLDER
"file_path":
"description": |-
PLACEHOLDER
"permissions":
"description": |-
PLACEHOLDER
github.com/databricks/cli/bundle/config/resources.Job:
"health":
"description": |-
PLACEHOLDER
"permissions":
"description": |-
PLACEHOLDER
"run_as":
"description": |-
PLACEHOLDER
github.com/databricks/cli/bundle/config/resources.MlflowExperiment:
"permissions":
"description": |-
PLACEHOLDER
github.com/databricks/cli/bundle/config/resources.MlflowModel:
"permissions":
"description": |-
PLACEHOLDER
github.com/databricks/cli/bundle/config/resources.ModelServingEndpoint:
"permissions":
"description": |-
PLACEHOLDER
github.com/databricks/cli/bundle/config/resources.Pipeline:
"permissions":
"description": |-
PLACEHOLDER
github.com/databricks/cli/bundle/config/resources.QualityMonitor:
"table_name":
"description": |-
PLACEHOLDER
github.com/databricks/cli/bundle/config/resources.RegisteredModel:
"grants":
"description": |-
PLACEHOLDER
github.com/databricks/cli/bundle/config/resources.Schema:
"grants":
"description": |-
PLACEHOLDER
"properties":
"description": |-
PLACEHOLDER
github.com/databricks/cli/bundle/config/resources.Volume:
"grants":
"description": |-
PLACEHOLDER
"volume_type":
"description": |-
PLACEHOLDER
github.com/databricks/databricks-sdk-go/service/compute.AwsAttributes:
"availability":
"description": |-
PLACEHOLDER
"ebs_volume_type":
"description": |-
PLACEHOLDER
github.com/databricks/databricks-sdk-go/service/compute.AzureAttributes:
"availability":
"description": |-
PLACEHOLDER
github.com/databricks/databricks-sdk-go/service/compute.ClusterSpec:
"data_security_mode":
"description": |-
PLACEHOLDER
"docker_image":
"description": |-
PLACEHOLDER
"runtime_engine":
"description": |-
PLACEHOLDER
"workload_type":
"description": |-
PLACEHOLDER
github.com/databricks/databricks-sdk-go/service/compute.DockerImage:
"basic_auth":
"description": |-
PLACEHOLDER
github.com/databricks/databricks-sdk-go/service/compute.GcpAttributes:
"availability":
"description": |-
PLACEHOLDER
github.com/databricks/databricks-sdk-go/service/jobs.GitSource:
"git_snapshot":
"description": |-
PLACEHOLDER
github.com/databricks/databricks-sdk-go/service/jobs.JobEnvironment:
"spec":
"description": |-
PLACEHOLDER
github.com/databricks/databricks-sdk-go/service/jobs.JobsHealthRule:
"metric":
"description": |-
PLACEHOLDER
"op":
"description": |-
PLACEHOLDER
github.com/databricks/databricks-sdk-go/service/jobs.JobsHealthRules:
"rules":
"description": |-
PLACEHOLDER
github.com/databricks/databricks-sdk-go/service/jobs.RunJobTask:
"python_named_params":
"description": |-
PLACEHOLDER
github.com/databricks/databricks-sdk-go/service/jobs.Task:
"health":
"description": |-
PLACEHOLDER
github.com/databricks/databricks-sdk-go/service/jobs.TriggerSettings:
"table_update":
"description": |-
PLACEHOLDER
github.com/databricks/databricks-sdk-go/service/jobs.Webhook:
"id":
"description": |-
PLACEHOLDER
github.com/databricks/databricks-sdk-go/service/pipelines.CronTrigger:
"quartz_cron_schedule":
"description": |-
PLACEHOLDER
"timezone_id":
"description": |-
PLACEHOLDER
github.com/databricks/databricks-sdk-go/service/pipelines.PipelineTrigger:
"cron":
"description": |-
PLACEHOLDER
"manual":
"description": |-
PLACEHOLDER

View File

@ -0,0 +1,44 @@
package main
import (
"testing"
)
func TestConvertLinksToAbsoluteUrl(t *testing.T) {
tests := []struct {
input string
expected string
}{
{
input: "",
expected: "",
},
{
input: "Some text (not a link)",
expected: "Some text (not a link)",
},
{
input: "This is a link to [_](#section)",
expected: "This is a link to [section](https://docs.databricks.com/dev-tools/bundles/reference.html#section)",
},
{
input: "This is a link to [_](/dev-tools/bundles/resources.html#dashboard)",
expected: "This is a link to [dashboard](https://docs.databricks.com/dev-tools/bundles/resources.html#dashboard)",
},
{
input: "This is a link to [_](/dev-tools/bundles/resources.html)",
expected: "This is a link to [link](https://docs.databricks.com/dev-tools/bundles/resources.html)",
},
{
input: "This is a link to [external](https://external.com)",
expected: "This is a link to [external](https://external.com)",
},
}
for _, test := range tests {
result := convertLinksToAbsoluteUrl(test.input)
if result != test.expected {
t.Errorf("For input '%s', expected '%s', but got '%s'", test.input, test.expected, result)
}
}
}

View File

@ -5,6 +5,7 @@ import (
"fmt"
"log"
"os"
"path/filepath"
"reflect"
"github.com/databricks/cli/bundle/config"
@ -43,7 +44,8 @@ func addInterpolationPatterns(typ reflect.Type, s jsonschema.Schema) jsonschema.
case jsonschema.ArrayType, jsonschema.ObjectType:
// arrays and objects can have complex variable values specified.
return jsonschema.Schema{
AnyOf: []jsonschema.Schema{
// OneOf is used because we don't expect more than 1 match and schema-based auto-complete works better with OneOf
OneOf: []jsonschema.Schema{
s,
{
Type: jsonschema.StringType,
@ -55,7 +57,7 @@ func addInterpolationPatterns(typ reflect.Type, s jsonschema.Schema) jsonschema.
// primitives can have variable values, or references like ${bundle.xyz}
// or ${workspace.xyz}
return jsonschema.Schema{
AnyOf: []jsonschema.Schema{
OneOf: []jsonschema.Schema{
s,
{Type: jsonschema.StringType, Pattern: interpolationPattern("resources")},
{Type: jsonschema.StringType, Pattern: interpolationPattern("bundle")},
@ -113,37 +115,60 @@ func makeVolumeTypeOptional(typ reflect.Type, s jsonschema.Schema) jsonschema.Sc
}
func main() {
if len(os.Args) != 2 {
fmt.Println("Usage: go run main.go <output-file>")
if len(os.Args) != 3 {
fmt.Println("Usage: go run main.go <work-dir> <output-file>")
os.Exit(1)
}
// Directory with annotation files
workdir := os.Args[1]
// Output file, where the generated JSON schema will be written to.
outputFile := os.Args[1]
outputFile := os.Args[2]
generateSchema(workdir, outputFile)
}
func generateSchema(workdir, outputFile string) {
annotationsPath := filepath.Join(workdir, "annotations.yml")
annotationsOpenApiPath := filepath.Join(workdir, "annotations_openapi.yml")
annotationsOpenApiOverridesPath := filepath.Join(workdir, "annotations_openapi_overrides.yml")
// Input file, the databricks openapi spec.
inputFile := os.Getenv("DATABRICKS_OPENAPI_SPEC")
if inputFile == "" {
log.Fatal("DATABRICKS_OPENAPI_SPEC environment variable not set")
if inputFile != "" {
p, err := newParser(inputFile)
if err != nil {
log.Fatal(err)
}
fmt.Printf("Writing OpenAPI annotations to %s\n", annotationsOpenApiPath)
err = p.extractAnnotations(reflect.TypeOf(config.Root{}), annotationsOpenApiPath, annotationsOpenApiOverridesPath)
if err != nil {
log.Fatal(err)
}
}
p, err := newParser(inputFile)
a, err := newAnnotationHandler([]string{annotationsOpenApiPath, annotationsOpenApiOverridesPath, annotationsPath})
if err != nil {
log.Fatal(err)
}
// Generate the JSON schema from the bundle Go struct.
s, err := jsonschema.FromType(reflect.TypeOf(config.Root{}), []func(reflect.Type, jsonschema.Schema) jsonschema.Schema{
p.addDescriptions,
p.addEnums,
removeJobsFields,
makeVolumeTypeOptional,
a.addAnnotations,
addInterpolationPatterns,
})
if err != nil {
log.Fatal(err)
}
// Overwrite the input annotation file, adding missing annotations
err = a.syncWithMissingAnnotations(annotationsPath)
if err != nil {
log.Fatal(err)
}
b, err := json.MarshalIndent(s, "", " ")
if err != nil {
log.Fatal(err)

View File

@ -0,0 +1,125 @@
package main
import (
"bytes"
"fmt"
"io"
"os"
"path"
"reflect"
"strings"
"testing"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/libs/dyn"
"github.com/databricks/cli/libs/dyn/merge"
"github.com/databricks/cli/libs/dyn/yamlloader"
"github.com/databricks/cli/libs/jsonschema"
"github.com/ghodss/yaml"
"github.com/stretchr/testify/assert"
)
func copyFile(src, dst string) error {
in, err := os.Open(src)
if err != nil {
return err
}
defer in.Close()
out, err := os.Create(dst)
if err != nil {
return err
}
defer out.Close()
_, err = io.Copy(out, in)
if err != nil {
return err
}
return out.Close()
}
// Checks whether descriptions are added for new config fields in the annotations.yml file
// If this test fails either manually add descriptions to the `annotations.yml` or do the following:
// 1. run `make schema` from the repository root to add placeholder descriptions
// 2. replace all "PLACEHOLDER" values with the actual descriptions if possible
// 3. run `make schema` again to regenerate the schema with acutal descriptions
func TestRequiredAnnotationsForNewFields(t *testing.T) {
workdir := t.TempDir()
annotationsPath := path.Join(workdir, "annotations.yml")
annotationsOpenApiPath := path.Join(workdir, "annotations_openapi.yml")
annotationsOpenApiOverridesPath := path.Join(workdir, "annotations_openapi_overrides.yml")
// Copy existing annotation files from the same folder as this test
err := copyFile("annotations.yml", annotationsPath)
assert.NoError(t, err)
err = copyFile("annotations_openapi.yml", annotationsOpenApiPath)
assert.NoError(t, err)
err = copyFile("annotations_openapi_overrides.yml", annotationsOpenApiOverridesPath)
assert.NoError(t, err)
generateSchema(workdir, path.Join(t.TempDir(), "schema.json"))
originalFile, err := os.ReadFile("annotations.yml")
assert.NoError(t, err)
currentFile, err := os.ReadFile(annotationsPath)
assert.NoError(t, err)
original, err := yamlloader.LoadYAML("", bytes.NewBuffer(originalFile))
assert.NoError(t, err)
current, err := yamlloader.LoadYAML("", bytes.NewBuffer(currentFile))
assert.NoError(t, err)
// Collect added paths.
var updatedFieldPaths []string
_, err = merge.Override(original, current, merge.OverrideVisitor{
VisitInsert: func(basePath dyn.Path, right dyn.Value) (dyn.Value, error) {
updatedFieldPaths = append(updatedFieldPaths, basePath.String())
return right, nil
},
})
assert.NoError(t, err)
assert.Empty(t, updatedFieldPaths, fmt.Sprintf("Missing JSON-schema descriptions for new config fields in bundle/internal/schema/annotations.yml:\n%s", strings.Join(updatedFieldPaths, "\n")))
}
// Checks whether types in annotation files are still present in Config type
func TestNoDetachedAnnotations(t *testing.T) {
files := []string{
"annotations.yml",
"annotations_openapi.yml",
"annotations_openapi_overrides.yml",
}
types := map[string]bool{}
for _, file := range files {
annotations, err := getAnnotations(file)
assert.NoError(t, err)
for k := range annotations {
types[k] = false
}
}
_, err := jsonschema.FromType(reflect.TypeOf(config.Root{}), []func(reflect.Type, jsonschema.Schema) jsonschema.Schema{
func(typ reflect.Type, s jsonschema.Schema) jsonschema.Schema {
delete(types, getPath(typ))
return s
},
})
assert.NoError(t, err)
for typ := range types {
t.Errorf("Type `%s` in annotations file is not found in `root.Config` type", typ)
}
assert.Empty(t, types, "Detached annotations found, regenerate schema and check for package path changes")
}
func getAnnotations(path string) (annotationFile, error) {
b, err := os.ReadFile(path)
if err != nil {
return nil, err
}
var data annotationFile
err = yaml.Unmarshal(b, &data)
return data, err
}

View File

@ -1,6 +1,7 @@
package main
import (
"bytes"
"encoding/json"
"fmt"
"os"
@ -8,6 +9,9 @@ import (
"reflect"
"strings"
"github.com/ghodss/yaml"
"github.com/databricks/cli/libs/dyn/yamlloader"
"github.com/databricks/cli/libs/jsonschema"
)
@ -23,6 +27,8 @@ type openapiParser struct {
ref map[string]jsonschema.Schema
}
const RootTypeKey = "_"
func newParser(path string) (*openapiParser, error) {
b, err := os.ReadFile(path)
if err != nil {
@ -89,35 +95,95 @@ func (p *openapiParser) findRef(typ reflect.Type) (jsonschema.Schema, bool) {
}
// Use the OpenAPI spec to load descriptions for the given type.
func (p *openapiParser) addDescriptions(typ reflect.Type, s jsonschema.Schema) jsonschema.Schema {
ref, ok := p.findRef(typ)
if !ok {
return s
func (p *openapiParser) extractAnnotations(typ reflect.Type, outputPath, overridesPath string) error {
annotations := annotationFile{}
overrides := annotationFile{}
b, err := os.ReadFile(overridesPath)
if err != nil {
return err
}
err = yaml.Unmarshal(b, &overrides)
if err != nil {
return err
}
if overrides == nil {
overrides = annotationFile{}
}
s.Description = ref.Description
for k, v := range s.Properties {
if refProp, ok := ref.Properties[k]; ok {
v.Description = refProp.Description
}
_, err = jsonschema.FromType(typ, []func(reflect.Type, jsonschema.Schema) jsonschema.Schema{
func(typ reflect.Type, s jsonschema.Schema) jsonschema.Schema {
ref, ok := p.findRef(typ)
if !ok {
return s
}
basePath := getPath(typ)
pkg := map[string]annotation{}
annotations[basePath] = pkg
if ref.Description != "" || ref.Enum != nil {
pkg[RootTypeKey] = annotation{Description: ref.Description, Enum: ref.Enum}
}
for k := range s.Properties {
if refProp, ok := ref.Properties[k]; ok {
pkg[k] = annotation{Description: refProp.Description, Enum: refProp.Enum}
if refProp.Description == "" {
addEmptyOverride(k, basePath, overrides)
}
} else {
addEmptyOverride(k, basePath, overrides)
}
}
return s
},
})
if err != nil {
return err
}
return s
b, err = yaml.Marshal(overrides)
if err != nil {
return err
}
o, err := yamlloader.LoadYAML("", bytes.NewBuffer(b))
if err != nil {
return err
}
err = saveYamlWithStyle(overridesPath, o)
if err != nil {
return err
}
b, err = yaml.Marshal(annotations)
if err != nil {
return err
}
b = bytes.Join([][]byte{[]byte("# This file is auto-generated. DO NOT EDIT."), b}, []byte("\n"))
err = os.WriteFile(outputPath, b, 0o644)
if err != nil {
return err
}
return nil
}
// Use the OpenAPI spec add enum values for the given type.
func (p *openapiParser) addEnums(typ reflect.Type, s jsonschema.Schema) jsonschema.Schema {
ref, ok := p.findRef(typ)
func addEmptyOverride(key, pkg string, overridesFile annotationFile) {
if overridesFile[pkg] == nil {
overridesFile[pkg] = map[string]annotation{}
}
overrides := overridesFile[pkg]
if overrides[key].Description == "" {
overrides[key] = annotation{Description: Placeholder}
}
a, ok := overrides[key]
if !ok {
return s
a = annotation{}
}
s.Enum = append(s.Enum, ref.Enum...)
for k, v := range s.Properties {
if refProp, ok := ref.Properties[k]; ok {
v.Enum = append(v.Enum, refProp.Enum...)
}
if a.Description == "" {
a.Description = Placeholder
}
return s
overrides[key] = a
}

View File

@ -0,0 +1,5 @@
targets:
production:
variables:
myvar:
default: true

View File

@ -41,21 +41,21 @@ func TestJsonSchema(t *testing.T) {
resourceJob := walk(s.Definitions, "github.com", "databricks", "cli", "bundle", "config", "resources.Job")
fields := []string{"name", "continuous", "tasks", "trigger"}
for _, field := range fields {
assert.NotEmpty(t, resourceJob.AnyOf[0].Properties[field].Description)
assert.NotEmpty(t, resourceJob.OneOf[0].Properties[field].Description)
}
// Assert descriptions were also loaded for a job task definition.
jobTask := walk(s.Definitions, "github.com", "databricks", "databricks-sdk-go", "service", "jobs.Task")
fields = []string{"notebook_task", "spark_jar_task", "spark_python_task", "spark_submit_task", "description", "depends_on", "environment_key", "for_each_task", "existing_cluster_id"}
for _, field := range fields {
assert.NotEmpty(t, jobTask.AnyOf[0].Properties[field].Description)
assert.NotEmpty(t, jobTask.OneOf[0].Properties[field].Description)
}
// Assert descriptions are loaded for pipelines
pipeline := walk(s.Definitions, "github.com", "databricks", "cli", "bundle", "config", "resources.Pipeline")
fields = []string{"name", "catalog", "clusters", "channel", "continuous", "development"}
for _, field := range fields {
assert.NotEmpty(t, pipeline.AnyOf[0].Properties[field].Description)
assert.NotEmpty(t, pipeline.OneOf[0].Properties[field].Description)
}
providers := walk(s.Definitions, "github.com", "databricks", "databricks-sdk-go", "service", "jobs.GitProvider")

File diff suppressed because it is too large Load Diff

View File

@ -34,4 +34,10 @@ type Extension struct {
// Version of the schema. This is used to determine if the schema is
// compatible with the current CLI version.
Version *int `json:"version,omitempty"`
// This field is not in JSON schema spec, but it is supported in VSCode and in the Databricks Workspace
// It is used to provide a rich description of the field in the hover tooltip.
// https://code.visualstudio.com/docs/languages/json#_use-rich-formatting-in-hovers
// Also it can be used in documentation generation.
MarkdownDescription string `json:"markdownDescription,omitempty"`
}

View File

@ -69,6 +69,13 @@ type Schema struct {
// Schema that must match any of the schemas in the array
AnyOf []Schema `json:"anyOf,omitempty"`
// Schema that must match one of the schemas in the array
OneOf []Schema `json:"oneOf,omitempty"`
// Title of the object, rendered as inline documentation in the IDE.
// https://json-schema.org/understanding-json-schema/reference/annotations
Title string `json:"title,omitempty"`
}
// Default value defined in a JSON Schema, represented as a string.