Merge remote-tracking branch 'origin/main' into dashboards

This commit is contained in:
Pieter Noordhuis 2024-09-12 16:03:58 +02:00
commit 6dadf667b5
No known key found for this signature in database
GPG Key ID: 12ACCCC104CF2930
62 changed files with 7322 additions and 9903 deletions

View File

@ -11,10 +11,10 @@
"toolchain": {
"required": ["go"],
"post_generate": [
"go run ./bundle/internal/bundle/schema/main.go ./bundle/schema/docs/bundle_descriptions.json",
"go run ./bundle/internal/schema/*.go ./bundle/schema/jsonschema.json",
"echo 'bundle/internal/tf/schema/\\*.go linguist-generated=true' >> ./.gitattributes",
"echo 'go.sum linguist-generated=true' >> ./.gitattributes",
"echo 'bundle/schema/docs/bundle_descriptions.json linguist-generated=true' >> ./.gitattributes"
"echo 'bundle/schema/jsonschema.json linguist-generated=true' >> ./.gitattributes"
]
}
}

View File

@ -1 +1 @@
3eae49b444cac5a0118a3503e5b7ecef7f96527a
d05898328669a3f8ab0c2ecee37db2673d3ea3f7

2
.gitattributes vendored
View File

@ -120,4 +120,4 @@ cmd/workspace/workspace-conf/workspace-conf.go linguist-generated=true
cmd/workspace/workspace/workspace.go linguist-generated=true
bundle/internal/tf/schema/\*.go linguist-generated=true
go.sum linguist-generated=true
bundle/schema/docs/bundle_descriptions.json linguist-generated=true
bundle/schema/jsonschema.json linguist-generated=true

View File

@ -107,11 +107,18 @@ jobs:
run: npm install -g ajv-cli@5.0.0
# Assert that the generated bundle schema is a valid JSON schema by using
# ajv-cli to validate it against a sample configuration file.
# ajv-cli to validate it against bundle configuration files.
# By default the ajv-cli runs in strict mode which will fail if the schema
# itself is not valid. Strict mode is more strict than the JSON schema
# specification. See for details: https://ajv.js.org/options.html#strict-mode-options
- name: Validate bundle schema
run: |
go run main.go bundle schema > schema.json
ajv -s schema.json -d ./bundle/tests/basic/databricks.yml
for file in ./bundle/internal/schema/testdata/pass/*.yml; do
ajv test -s schema.json -d $file --valid
done
for file in ./bundle/internal/schema/testdata/fail/*.yml; do
ajv test -s schema.json -d $file --invalid
done

View File

@ -25,6 +25,20 @@ func ConvertJobToValue(job *jobs.Job) (dyn.Value, error) {
value["tasks"] = dyn.NewValue(tasks, []dyn.Location{{Line: jobOrder.Get("tasks")}})
}
// We're processing job.Settings.Parameters separately to retain empty default values.
if len(job.Settings.Parameters) > 0 {
params := make([]dyn.Value, 0)
for _, parameter := range job.Settings.Parameters {
p := map[string]dyn.Value{
"name": dyn.NewValue(parameter.Name, []dyn.Location{{Line: 0}}), // We use Line: 0 to ensure that the name goes first.
"default": dyn.NewValue(parameter.Default, []dyn.Location{{Line: 1}}),
}
params = append(params, dyn.NewValue(p, []dyn.Location{}))
}
value["parameters"] = dyn.NewValue(params, []dyn.Location{{Line: jobOrder.Get("parameters")}})
}
return yamlsaver.ConvertToMapValue(job.Settings, jobOrder, []string{"format", "new_cluster", "existing_cluster_id"}, value)
}

View File

@ -139,7 +139,7 @@ func TestRootMergeTargetOverridesWithVariables(t *testing.T) {
},
Targets: map[string]*Target{
"development": {
Variables: map[string]*variable.Variable{
Variables: map[string]*variable.TargetVariable{
"foo": {
Default: "bar",
Description: "wrong",

View File

@ -38,7 +38,26 @@ type Target struct {
// Override default values or lookup name for defined variables
// Does not permit defining new variables or redefining existing ones
// in the scope of an target
Variables map[string]*variable.Variable `json:"variables,omitempty"`
//
// There are two valid ways to define a variable override in a target:
// 1. Direct value override. We normalize this to the variable.Variable
// struct format when loading the configuration YAML:
//
// variables:
// foo: "value"
//
// 2. Override matching the variable.Variable struct.
//
// variables:
// foo:
// default: "value"
//
// OR
//
// variables:
// foo:
// lookup: "resource_name"
Variables map[string]*variable.TargetVariable `json:"variables,omitempty"`
Git Git `json:"git,omitempty"`

View File

@ -16,6 +16,11 @@ const (
VariableTypeComplex VariableType = "complex"
)
// We alias it here to override the JSON schema associated with a variable value
// in a target override. This is because we allow for directly specifying the value
// in addition to the variable.Variable struct format in a target override.
type TargetVariable Variable
// An input variable for the bundle config
type Variable struct {
// A type of the variable. This is used to validate the value of the variable

View File

@ -1,42 +0,0 @@
package main
import (
"encoding/json"
"fmt"
"log"
"os"
"github.com/databricks/cli/bundle/schema"
)
func main() {
if len(os.Args) != 2 {
fmt.Println("Usage: go run main.go <output-file>")
os.Exit(1)
}
// Output file, to write the generated schema descriptions to.
outputFile := os.Args[1]
// Input file, the databricks openapi spec.
inputFile := os.Getenv("DATABRICKS_OPENAPI_SPEC")
if inputFile == "" {
log.Fatal("DATABRICKS_OPENAPI_SPEC environment variable not set")
}
// Generate the schema descriptions.
docs, err := schema.UpdateBundleDescriptions(inputFile)
if err != nil {
log.Fatal(err)
}
result, err := json.MarshalIndent(docs, "", " ")
if err != nil {
log.Fatal(err)
}
// Write the schema descriptions to the output file.
err = os.WriteFile(outputFile, result, 0644)
if err != nil {
log.Fatal(err)
}
}

View File

@ -0,0 +1,109 @@
package main
import (
"encoding/json"
"fmt"
"log"
"os"
"reflect"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/config/variable"
"github.com/databricks/cli/libs/jsonschema"
)
func interpolationPattern(s string) string {
return fmt.Sprintf(`\$\{(%s(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\[[0-9]+\])*)+)\}`, s)
}
func addInterpolationPatterns(typ reflect.Type, s jsonschema.Schema) jsonschema.Schema {
if typ == reflect.TypeOf(config.Root{}) || typ == reflect.TypeOf(variable.Variable{}) {
return s
}
// The variables block in a target override allows for directly specifying
// the value of the variable.
if typ == reflect.TypeOf(variable.TargetVariable{}) {
return jsonschema.Schema{
AnyOf: []jsonschema.Schema{
// We keep the original schema so that autocomplete suggestions
// continue to work.
s,
// All values are valid for a variable value, be it primitive types
// like string/bool or complex ones like objects/arrays. Thus we override
// the schema to allow all valid JSON values.
{},
},
}
}
switch s.Type {
case jsonschema.ArrayType, jsonschema.ObjectType:
// arrays and objects can have complex variable values specified.
return jsonschema.Schema{
AnyOf: []jsonschema.Schema{
s,
{
Type: jsonschema.StringType,
Pattern: interpolationPattern("var"),
}},
}
case jsonschema.IntegerType, jsonschema.NumberType, jsonschema.BooleanType:
// primitives can have variable values, or references like ${bundle.xyz}
// or ${workspace.xyz}
return jsonschema.Schema{
AnyOf: []jsonschema.Schema{
s,
{Type: jsonschema.StringType, Pattern: interpolationPattern("resources")},
{Type: jsonschema.StringType, Pattern: interpolationPattern("bundle")},
{Type: jsonschema.StringType, Pattern: interpolationPattern("workspace")},
{Type: jsonschema.StringType, Pattern: interpolationPattern("artifacts")},
{Type: jsonschema.StringType, Pattern: interpolationPattern("var")},
},
}
default:
return s
}
}
func main() {
if len(os.Args) != 2 {
fmt.Println("Usage: go run main.go <output-file>")
os.Exit(1)
}
// Output file, where the generated JSON schema will be written to.
outputFile := os.Args[1]
// Input file, the databricks openapi spec.
inputFile := os.Getenv("DATABRICKS_OPENAPI_SPEC")
if inputFile == "" {
log.Fatal("DATABRICKS_OPENAPI_SPEC environment variable not set")
}
p, err := newParser(inputFile)
if err != nil {
log.Fatal(err)
}
// Generate the JSON schema from the bundle Go struct.
s, err := jsonschema.FromType(reflect.TypeOf(config.Root{}), []func(reflect.Type, jsonschema.Schema) jsonschema.Schema{
p.addDescriptions,
p.addEnums,
addInterpolationPatterns,
})
if err != nil {
log.Fatal(err)
}
b, err := json.MarshalIndent(s, "", " ")
if err != nil {
log.Fatal(err)
}
// Write the schema descriptions to the output file.
err = os.WriteFile(outputFile, b, 0644)
if err != nil {
log.Fatal(err)
}
}

View File

@ -0,0 +1,123 @@
package main
import (
"encoding/json"
"fmt"
"os"
"path"
"reflect"
"strings"
"github.com/databricks/cli/libs/jsonschema"
)
type Components struct {
Schemas map[string]jsonschema.Schema `json:"schemas,omitempty"`
}
type Specification struct {
Components Components `json:"components"`
}
type openapiParser struct {
ref map[string]jsonschema.Schema
}
func newParser(path string) (*openapiParser, error) {
b, err := os.ReadFile(path)
if err != nil {
return nil, err
}
spec := Specification{}
err = json.Unmarshal(b, &spec)
if err != nil {
return nil, err
}
p := &openapiParser{}
p.ref = spec.Components.Schemas
return p, nil
}
// This function checks if the input type:
// 1. Is a Databricks Go SDK type.
// 2. Has a Databricks Go SDK type embedded in it.
//
// If the above conditions are met, the function returns the JSON schema
// corresponding to the Databricks Go SDK type from the OpenAPI spec.
func (p *openapiParser) findRef(typ reflect.Type) (jsonschema.Schema, bool) {
typs := []reflect.Type{typ}
// Check for embedded Databricks Go SDK types.
if typ.Kind() == reflect.Struct {
for i := 0; i < typ.NumField(); i++ {
if !typ.Field(i).Anonymous {
continue
}
// Deference current type if it's a pointer.
ctyp := typ.Field(i).Type
for ctyp.Kind() == reflect.Ptr {
ctyp = ctyp.Elem()
}
typs = append(typs, ctyp)
}
}
for _, ctyp := range typs {
// Skip if it's not a Go SDK type.
if !strings.HasPrefix(ctyp.PkgPath(), "github.com/databricks/databricks-sdk-go") {
continue
}
pkgName := path.Base(ctyp.PkgPath())
k := fmt.Sprintf("%s.%s", pkgName, ctyp.Name())
// Skip if the type is not in the openapi spec.
_, ok := p.ref[k]
if !ok {
continue
}
// Return the first Go SDK type found in the openapi spec.
return p.ref[k], true
}
return jsonschema.Schema{}, false
}
// Use the OpenAPI spec to load descriptions for the given type.
func (p *openapiParser) addDescriptions(typ reflect.Type, s jsonschema.Schema) jsonschema.Schema {
ref, ok := p.findRef(typ)
if !ok {
return s
}
s.Description = ref.Description
for k, v := range s.Properties {
if refProp, ok := ref.Properties[k]; ok {
v.Description = refProp.Description
}
}
return s
}
// Use the OpenAPI spec add enum values for the given type.
func (p *openapiParser) addEnums(typ reflect.Type, s jsonschema.Schema) jsonschema.Schema {
ref, ok := p.findRef(typ)
if !ok {
return s
}
s.Enum = append(s.Enum, ref.Enum...)
for k, v := range s.Properties {
if refProp, ok := ref.Properties[k]; ok {
v.Enum = append(v.Enum, refProp.Enum...)
}
}
return s
}

View File

@ -0,0 +1,3 @@
bundle:
# expected type is 'string'
name: 1234

View File

@ -0,0 +1,4 @@
resources:
jobs:
myjob:
format: INVALID_VALUE

View File

@ -0,0 +1,6 @@
resources:
models:
mymodel:
latest_versions:
- creation_timestamp: 123
status: INVALID_VALUE

View File

@ -0,0 +1,8 @@
resources:
jobs:
outer:
name: outer job
tasks:
- task_key: run job task 1
run_job_task:
job_id: ${invalid.reference}

View File

@ -0,0 +1,5 @@
resources:
models:
mymodel:
latest_versions:
- creation_timestamp: ${invalid.reference}

View File

@ -0,0 +1,9 @@
resources:
jobs:
foo:
name: my job
tasks:
# All tasks need to have a task_key.
- notebook_task:
notebook_path: /Users/abc/notebooks/inner
existing_cluster_id: abcd

View File

@ -0,0 +1,5 @@
resources:
jobs:
myjob:
# unknown fields should cause schema failure.
unknown_field: "value"

View File

@ -0,0 +1,6 @@
resources:
models:
mymodel:
creation_timestamp: 123
description: "my model"
unknown: "value"

View File

@ -0,0 +1 @@
unknown: value

View File

@ -0,0 +1,11 @@
artifacts:
abc:
path: /Workspace/a/b/c
type: wheel
files:
- source: ./x.whl
resources:
jobs:
foo:
name: ${artifacts.abc.type}

View File

@ -0,0 +1,2 @@
bundle:
name: basic

View File

@ -0,0 +1,4 @@
targets:
development:
variables:
myvar: value

View File

@ -0,0 +1,63 @@
bundle:
name: a job
workspace:
host: "https://myworkspace.com"
root_path: /abc
presets:
name_prefix: "[DEV]"
jobs_max_concurrent_runs: 10
variables:
simplevar:
default: true
description: "simplevar description"
complexvar:
default:
key1: value1
key2: value2
key3:
- value3
- value4
description: "complexvar description"
run_as:
service_principal_name: myserviceprincipal
resources:
jobs:
myjob:
name: myjob
continuous:
pause_status: PAUSED
edit_mode: EDITABLE
max_concurrent_runs: 10
description: "my job description"
email_notifications:
no_alert_for_skipped_runs: true
environments:
- environment_key: venv
spec:
dependencies:
- python=3.7
client: "myclient"
format: MULTI_TASK
tags:
foo: bar
bar: baz
tasks:
- task_key: mytask
notebook_task:
notebook_path: ${var.simplevar}
existing_cluster_id: abcd
- task_key: mytask2
for_each_task:
inputs: av
concurrency: 10
task:
task_key: inside_for_each
notebook_task:
notebook_path: ${var.complexvar.key3[0]}
- ${var.complexvar}

View File

@ -0,0 +1,72 @@
bundle:
name: ML
workspace:
host: "https://myworkspace.com"
root_path: /abc
presets:
name_prefix: "[DEV]"
jobs_max_concurrent_runs: 10
variables:
simplevar:
default: "true"
description: "simplevar description"
complexvar:
default:
key1: value1
key2: value2
key3:
- value3
- value4
description: "complexvar description"
resources:
models:
mymodel:
creation_timestamp: 123
description: "my model"
latest_versions:
- creation_timestamp: 123
tags: ${var.complexvar.key1}
status: READY
permissions:
- service_principal_name: myserviceprincipal
level: CAN_MANAGE
experiments:
myexperiment:
artifact_location: /dbfs/myexperiment
last_update_time: ${var.complexvar.key2}
lifecycle_stage: ${var.simplevar}
permissions:
- service_principal_name: myserviceprincipal
level: CAN_MANAGE
model_serving_endpoints:
myendpoint:
config:
served_models:
- model_name: ${resources.models.mymodel.name}
model_version: abc
scale_to_zero_enabled: true
workload_size: Large
name: myendpoint
schemas:
myschema:
catalog_name: mycatalog
name: myschema
registered_models:
myregisteredmodel:
catalog_name: mycatalog
name: myregisteredmodel
schema_name: ${resources.schemas.myschema.name}
grants:
- principal: abcd
privileges:
- SELECT
- INSERT

View File

@ -0,0 +1,54 @@
bundle:
name: a pipeline
workspace:
host: "https://myworkspace.com"
root_path: /abc
presets:
name_prefix: "[DEV]"
jobs_max_concurrent_runs: 10
variables:
simplevar:
default: true
description: "simplevar description"
complexvar:
default:
key1: value1
key2: value2
key3:
- value3
- value4
description: "complexvar description"
artifacts:
mywheel:
path: ./mywheel.whl
type: WHEEL
run_as:
service_principal_name: myserviceprincipal
resources:
jobs:
myjob:
name: myjob
tasks:
- task_key: ${bundle.name} pipeline trigger
pipeline_task:
pipeline_id: ${resources.mypipeline.id}
pipelines:
mypipeline:
name: mypipeline
libraries:
- whl: ./mywheel.whl
catalog: 3{var.complexvar.key2}
development: true
clusters:
- autoscale:
mode: ENHANCED
max_workers: 10
min_workers: 1

View File

@ -0,0 +1,16 @@
bundle:
name: quality_monitor
resources:
quality_monitors:
myqualitymonitor:
inference_log:
granularities:
- a
- b
model_id_col: a
prediction_col: b
timestamp_col: c
problem_type: PROBLEM_TYPE_CLASSIFICATION
assets_dir: /dbfs/mnt/abc
output_schema_name: default

View File

@ -0,0 +1,56 @@
bundle:
name: a run job task
databricks_cli_version: 0.200.0
compute_id: "mycompute"
variables:
simplevar:
default: 5678
description: "simplevar description"
complexvar:
default:
key1: 1234
key2: value2
key3:
- value3
- 9999
description: "complexvar description"
resources:
jobs:
inner:
permissions:
- user_name: user1
level: CAN_MANAGE
name: inner job
tasks:
- task_key: inner notebook task
notebook_task:
notebook_path: /Users/abc/notebooks/inner
existing_cluster_id: abcd
outer:
name: outer job
tasks:
- task_key: run job task 1
run_job_task:
job_id: 1234
- task_key: run job task 2
run_job_task:
job_id: ${var.complexvar.key1}
- task_key: run job task 3
run_job_task:
job_id: ${var.simplevar}
- task_key: run job task 4
run_job_task:
job_id: ${resources.inner.id}
- task_key: run job task 5
run_job_task:
job_id: ${var.complexvar.key3[1]}

View File

@ -0,0 +1,24 @@
bundle:
name: basic
variables:
complexvar:
default:
key1: 1234
key2: value2
key3:
- value3
- 9999
description: complexvar description
resources:
schemas:
myschema:
name: myschema
catalog_name: main
grants:
- ${var.complexvar}
- principal: ${workspace.current_user.me}
privileges:
- ${var.complexvar.key3[0]}
- ${var.complexvar.key2}

View File

@ -1,18 +0,0 @@
### Overview
`docs/bundle_descriptions.json` contains both autogenerated as well as manually written
descriptions for the json schema. Specifically
1. `resources` : almost all descriptions are autogenerated from the OpenAPI spec
2. `targets` : almost all descriptions are copied over from root level entities (eg: `bundle`, `artifacts`)
3. `bundle` : manually editted
4. `include` : manually editted
5. `workspace` : manually editted
6. `artifacts` : manually editted
These descriptions are rendered in the inline documentation in an IDE
### SOP: Add schema descriptions for new fields in bundle config
Manually edit bundle_descriptions.json to add your descriptions. Note that the
descriptions in `resources` block is generated from the OpenAPI spec, and thus
any changes there will be overwritten.

View File

@ -1,109 +0,0 @@
package schema
import (
_ "embed"
"encoding/json"
"fmt"
"os"
"reflect"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/libs/jsonschema"
)
// A subset of Schema struct
type Docs struct {
Description string `json:"description"`
Properties map[string]*Docs `json:"properties,omitempty"`
Items *Docs `json:"items,omitempty"`
AdditionalProperties *Docs `json:"additionalproperties,omitempty"`
}
//go:embed docs/bundle_descriptions.json
var bundleDocs []byte
func (docs *Docs) refreshTargetsDocs() error {
targetsDocs, ok := docs.Properties["targets"]
if !ok || targetsDocs.AdditionalProperties == nil ||
targetsDocs.AdditionalProperties.Properties == nil {
return fmt.Errorf("invalid targets descriptions")
}
targetProperties := targetsDocs.AdditionalProperties.Properties
propertiesToCopy := []string{"artifacts", "bundle", "resources", "workspace"}
for _, p := range propertiesToCopy {
targetProperties[p] = docs.Properties[p]
}
return nil
}
func LoadBundleDescriptions() (*Docs, error) {
embedded := Docs{}
err := json.Unmarshal(bundleDocs, &embedded)
return &embedded, err
}
func UpdateBundleDescriptions(openapiSpecPath string) (*Docs, error) {
embedded, err := LoadBundleDescriptions()
if err != nil {
return nil, err
}
// Generate schema from the embedded descriptions, and convert it back to docs.
// This creates empty descriptions for any properties that were missing in the
// embedded descriptions.
schema, err := New(reflect.TypeOf(config.Root{}), embedded)
if err != nil {
return nil, err
}
docs := schemaToDocs(schema)
// Load the Databricks OpenAPI spec
openapiSpec, err := os.ReadFile(openapiSpecPath)
if err != nil {
return nil, err
}
spec := &Specification{}
err = json.Unmarshal(openapiSpec, spec)
if err != nil {
return nil, err
}
openapiReader := &OpenapiReader{
OpenapiSpec: spec,
memo: make(map[string]jsonschema.Schema),
}
// Generate descriptions for the "resources" field
resourcesDocs, err := openapiReader.ResourcesDocs()
if err != nil {
return nil, err
}
resourceSchema, err := New(reflect.TypeOf(config.Resources{}), resourcesDocs)
if err != nil {
return nil, err
}
docs.Properties["resources"] = schemaToDocs(resourceSchema)
docs.refreshTargetsDocs()
return docs, nil
}
// *Docs are a subset of *Schema, this function selects that subset
func schemaToDocs(jsonSchema *jsonschema.Schema) *Docs {
// terminate recursion if schema is nil
if jsonSchema == nil {
return nil
}
docs := &Docs{
Description: jsonSchema.Description,
}
if len(jsonSchema.Properties) > 0 {
docs.Properties = make(map[string]*Docs)
}
for k, v := range jsonSchema.Properties {
docs.Properties[k] = schemaToDocs(v)
}
docs.Items = schemaToDocs(jsonSchema.Items)
if additionalProperties, ok := jsonSchema.AdditionalProperties.(*jsonschema.Schema); ok {
docs.AdditionalProperties = schemaToDocs(additionalProperties)
}
return docs
}

File diff suppressed because it is too large Load Diff

View File

@ -1,62 +0,0 @@
package schema
import (
"encoding/json"
"testing"
"github.com/databricks/cli/libs/jsonschema"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestSchemaToDocs(t *testing.T) {
jsonSchema := &jsonschema.Schema{
Type: "object",
Description: "root doc",
Properties: map[string]*jsonschema.Schema{
"foo": {Type: "number", Description: "foo doc"},
"bar": {Type: "string"},
"octave": {
Type: "object",
AdditionalProperties: &jsonschema.Schema{Type: "number"},
Description: "octave docs",
},
"scales": {
Type: "object",
Description: "scale docs",
Items: &jsonschema.Schema{Type: "string"},
},
},
}
docs := schemaToDocs(jsonSchema)
docsJson, err := json.MarshalIndent(docs, " ", " ")
require.NoError(t, err)
expected :=
`{
"description": "root doc",
"properties": {
"bar": {
"description": ""
},
"foo": {
"description": "foo doc"
},
"octave": {
"description": "octave docs",
"additionalproperties": {
"description": ""
}
},
"scales": {
"description": "scale docs",
"items": {
"description": ""
}
}
}
}`
t.Log("[DEBUG] actual: ", string(docsJson))
t.Log("[DEBUG] expected: ", expected)
assert.Equal(t, expected, string(docsJson))
}

6
bundle/schema/embed.go Normal file
View File

@ -0,0 +1,6 @@
package schema
import _ "embed"
//go:embed jsonschema.json
var Bytes []byte

View File

@ -0,0 +1,71 @@
package schema_test
import (
"encoding/json"
"testing"
"github.com/databricks/cli/bundle/schema"
"github.com/databricks/cli/libs/jsonschema"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func walk(defs map[string]any, p ...string) jsonschema.Schema {
v, ok := defs[p[0]]
if !ok {
panic("not found: " + p[0])
}
if len(p) == 1 {
b, err := json.Marshal(v)
if err != nil {
panic(err)
}
res := jsonschema.Schema{}
err = json.Unmarshal(b, &res)
if err != nil {
panic(err)
}
return res
}
return walk(v.(map[string]any), p[1:]...)
}
func TestJsonSchema(t *testing.T) {
s := jsonschema.Schema{}
err := json.Unmarshal(schema.Bytes, &s)
require.NoError(t, err)
// Assert job fields have their descriptions loaded.
resourceJob := walk(s.Definitions, "github.com", "databricks", "cli", "bundle", "config", "resources.Job")
fields := []string{"name", "continuous", "deployment", "tasks", "trigger"}
for _, field := range fields {
assert.NotEmpty(t, resourceJob.AnyOf[0].Properties[field].Description)
}
// Assert descriptions were also loaded for a job task definition.
jobTask := walk(s.Definitions, "github.com", "databricks", "databricks-sdk-go", "service", "jobs.Task")
fields = []string{"notebook_task", "spark_jar_task", "spark_python_task", "spark_submit_task", "description", "depends_on", "environment_key", "for_each_task", "existing_cluster_id"}
for _, field := range fields {
assert.NotEmpty(t, jobTask.AnyOf[0].Properties[field].Description)
}
// Assert descriptions are loaded for pipelines
pipeline := walk(s.Definitions, "github.com", "databricks", "cli", "bundle", "config", "resources.Pipeline")
fields = []string{"name", "catalog", "clusters", "channel", "continuous", "deployment", "development"}
for _, field := range fields {
assert.NotEmpty(t, pipeline.AnyOf[0].Properties[field].Description)
}
// Assert enum values are loaded
schedule := walk(s.Definitions, "github.com", "databricks", "databricks-sdk-go", "service", "catalog.MonitorCronSchedule")
assert.Contains(t, schedule.AnyOf[0].Properties["pause_status"].Enum, "PAUSED")
assert.Contains(t, schedule.AnyOf[0].Properties["pause_status"].Enum, "UNPAUSED")
providers := walk(s.Definitions, "github.com", "databricks", "databricks-sdk-go", "service", "jobs.GitProvider")
assert.Contains(t, providers.Enum, "gitHub")
assert.Contains(t, providers.Enum, "bitbucketCloud")
assert.Contains(t, providers.Enum, "gitHubEnterprise")
assert.Contains(t, providers.Enum, "bitbucketServer")
}

5561
bundle/schema/jsonschema.json generated Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,293 +0,0 @@
package schema
import (
"encoding/json"
"fmt"
"strings"
"github.com/databricks/cli/libs/jsonschema"
)
type OpenapiReader struct {
// OpenAPI spec to read schemas from.
OpenapiSpec *Specification
// In-memory cache of schemas read from the OpenAPI spec.
memo map[string]jsonschema.Schema
}
const SchemaPathPrefix = "#/components/schemas/"
// Read a schema directly from the OpenAPI spec.
func (reader *OpenapiReader) readOpenapiSchema(path string) (jsonschema.Schema, error) {
schemaKey := strings.TrimPrefix(path, SchemaPathPrefix)
// return early if we already have a computed schema
memoSchema, ok := reader.memo[schemaKey]
if ok {
return memoSchema, nil
}
// check path is present in openapi spec
openapiSchema, ok := reader.OpenapiSpec.Components.Schemas[schemaKey]
if !ok {
return jsonschema.Schema{}, fmt.Errorf("schema with path %s not found in openapi spec", path)
}
// convert openapi schema to the native schema struct
bytes, err := json.Marshal(*openapiSchema)
if err != nil {
return jsonschema.Schema{}, err
}
jsonSchema := jsonschema.Schema{}
err = json.Unmarshal(bytes, &jsonSchema)
if err != nil {
return jsonschema.Schema{}, err
}
// A hack to convert a map[string]interface{} to *Schema
// We rely on the type of a AdditionalProperties in downstream functions
// to do reference interpolation
_, ok = jsonSchema.AdditionalProperties.(map[string]interface{})
if ok {
b, err := json.Marshal(jsonSchema.AdditionalProperties)
if err != nil {
return jsonschema.Schema{}, err
}
additionalProperties := &jsonschema.Schema{}
err = json.Unmarshal(b, additionalProperties)
if err != nil {
return jsonschema.Schema{}, err
}
jsonSchema.AdditionalProperties = additionalProperties
}
// store read schema into memo
reader.memo[schemaKey] = jsonSchema
return jsonSchema, nil
}
// Resolve all nested "$ref" references in the schema. This function unrolls a single
// level of "$ref" in the schema and calls into traverseSchema to resolve nested references.
// Thus this function and traverseSchema are mutually recursive.
//
// This function is safe against reference loops. If a reference loop is detected, an error
// is returned.
func (reader *OpenapiReader) safeResolveRefs(root *jsonschema.Schema, tracker *tracker) (*jsonschema.Schema, error) {
if root.Reference == nil {
return reader.traverseSchema(root, tracker)
}
key := *root.Reference
// HACK to unblock CLI release (13th Feb 2024). This is temporary until proper
// support for recursive types is added to the docs generator. PR: https://github.com/databricks/cli/pull/1204
if strings.Contains(key, "ForEachTask") {
return root, nil
}
if tracker.hasCycle(key) {
// self reference loops can be supported however the logic is non-trivial because
// cross refernce loops are not allowed (see: http://json-schema.org/understanding-json-schema/structuring.html#recursion)
return nil, fmt.Errorf("references loop detected")
}
ref := *root.Reference
description := root.Description
tracker.push(ref, ref)
// Mark reference nil, so we do not traverse this again. This is tracked
// in the memo
root.Reference = nil
// unroll one level of reference.
selfRef, err := reader.readOpenapiSchema(ref)
if err != nil {
return nil, err
}
root = &selfRef
root.Description = description
// traverse again to find new references
root, err = reader.traverseSchema(root, tracker)
if err != nil {
return nil, err
}
tracker.pop(ref)
return root, err
}
// Traverse the nested properties of the schema to resolve "$ref" references. This function
// and safeResolveRefs are mutually recursive.
func (reader *OpenapiReader) traverseSchema(root *jsonschema.Schema, tracker *tracker) (*jsonschema.Schema, error) {
// case primitive (or invalid)
if root.Type != jsonschema.ObjectType && root.Type != jsonschema.ArrayType {
return root, nil
}
// only root references are resolved
if root.Reference != nil {
return reader.safeResolveRefs(root, tracker)
}
// case struct
if len(root.Properties) > 0 {
for k, v := range root.Properties {
childSchema, err := reader.safeResolveRefs(v, tracker)
if err != nil {
return nil, err
}
root.Properties[k] = childSchema
}
}
// case array
if root.Items != nil {
itemsSchema, err := reader.safeResolveRefs(root.Items, tracker)
if err != nil {
return nil, err
}
root.Items = itemsSchema
}
// case map
additionalProperties, ok := root.AdditionalProperties.(*jsonschema.Schema)
if ok && additionalProperties != nil {
valueSchema, err := reader.safeResolveRefs(additionalProperties, tracker)
if err != nil {
return nil, err
}
root.AdditionalProperties = valueSchema
}
return root, nil
}
func (reader *OpenapiReader) readResolvedSchema(path string) (*jsonschema.Schema, error) {
root, err := reader.readOpenapiSchema(path)
if err != nil {
return nil, err
}
tracker := newTracker()
tracker.push(path, path)
resolvedRoot, err := reader.safeResolveRefs(&root, tracker)
if err != nil {
return nil, tracker.errWithTrace(err.Error(), "")
}
return resolvedRoot, nil
}
func (reader *OpenapiReader) jobsDocs() (*Docs, error) {
jobSettingsSchema, err := reader.readResolvedSchema(SchemaPathPrefix + "jobs.JobSettings")
if err != nil {
return nil, err
}
jobDocs := schemaToDocs(jobSettingsSchema)
// TODO: add description for id if needed.
// Tracked in https://github.com/databricks/cli/issues/242
jobsDocs := &Docs{
Description: "List of Databricks jobs",
AdditionalProperties: jobDocs,
}
return jobsDocs, nil
}
func (reader *OpenapiReader) pipelinesDocs() (*Docs, error) {
pipelineSpecSchema, err := reader.readResolvedSchema(SchemaPathPrefix + "pipelines.PipelineSpec")
if err != nil {
return nil, err
}
pipelineDocs := schemaToDocs(pipelineSpecSchema)
// TODO: Two fields in resources.Pipeline have the json tag id. Clarify the
// semantics and then add a description if needed. (https://github.com/databricks/cli/issues/242)
pipelinesDocs := &Docs{
Description: "List of DLT pipelines",
AdditionalProperties: pipelineDocs,
}
return pipelinesDocs, nil
}
func (reader *OpenapiReader) experimentsDocs() (*Docs, error) {
experimentSpecSchema, err := reader.readResolvedSchema(SchemaPathPrefix + "ml.Experiment")
if err != nil {
return nil, err
}
experimentDocs := schemaToDocs(experimentSpecSchema)
experimentsDocs := &Docs{
Description: "List of MLflow experiments",
AdditionalProperties: experimentDocs,
}
return experimentsDocs, nil
}
func (reader *OpenapiReader) modelsDocs() (*Docs, error) {
modelSpecSchema, err := reader.readResolvedSchema(SchemaPathPrefix + "ml.Model")
if err != nil {
return nil, err
}
modelDocs := schemaToDocs(modelSpecSchema)
modelsDocs := &Docs{
Description: "List of MLflow models",
AdditionalProperties: modelDocs,
}
return modelsDocs, nil
}
func (reader *OpenapiReader) modelServingEndpointsDocs() (*Docs, error) {
modelServingEndpointsSpecSchema, err := reader.readResolvedSchema(SchemaPathPrefix + "serving.CreateServingEndpoint")
if err != nil {
return nil, err
}
modelServingEndpointsDocs := schemaToDocs(modelServingEndpointsSpecSchema)
modelServingEndpointsAllDocs := &Docs{
Description: "List of Model Serving Endpoints",
AdditionalProperties: modelServingEndpointsDocs,
}
return modelServingEndpointsAllDocs, nil
}
func (reader *OpenapiReader) registeredModelDocs() (*Docs, error) {
registeredModelsSpecSchema, err := reader.readResolvedSchema(SchemaPathPrefix + "catalog.CreateRegisteredModelRequest")
if err != nil {
return nil, err
}
registeredModelsDocs := schemaToDocs(registeredModelsSpecSchema)
registeredModelsAllDocs := &Docs{
Description: "List of Registered Models",
AdditionalProperties: registeredModelsDocs,
}
return registeredModelsAllDocs, nil
}
func (reader *OpenapiReader) ResourcesDocs() (*Docs, error) {
jobsDocs, err := reader.jobsDocs()
if err != nil {
return nil, err
}
pipelinesDocs, err := reader.pipelinesDocs()
if err != nil {
return nil, err
}
experimentsDocs, err := reader.experimentsDocs()
if err != nil {
return nil, err
}
modelsDocs, err := reader.modelsDocs()
if err != nil {
return nil, err
}
modelServingEndpointsDocs, err := reader.modelServingEndpointsDocs()
if err != nil {
return nil, err
}
registeredModelsDocs, err := reader.registeredModelDocs()
if err != nil {
return nil, err
}
return &Docs{
Description: "Collection of Databricks resources to deploy.",
Properties: map[string]*Docs{
"jobs": jobsDocs,
"pipelines": pipelinesDocs,
"experiments": experimentsDocs,
"models": modelsDocs,
"model_serving_endpoints": modelServingEndpointsDocs,
"registered_models": registeredModelsDocs,
},
}, nil
}

View File

@ -1,493 +0,0 @@
package schema
import (
"encoding/json"
"testing"
"github.com/databricks/cli/libs/jsonschema"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestReadSchemaForObject(t *testing.T) {
specString := `
{
"components": {
"schemas": {
"foo": {
"type": "number"
},
"fruits": {
"type": "object",
"description": "fruits that are cool",
"properties": {
"guava": {
"type": "string",
"description": "a guava for my schema"
},
"mango": {
"type": "object",
"description": "a mango for my schema",
"$ref": "#/components/schemas/mango"
}
}
},
"mango": {
"type": "object",
"properties": {
"foo": {
"$ref": "#/components/schemas/foo"
}
}
}
}
}
}
`
spec := &Specification{}
reader := &OpenapiReader{
OpenapiSpec: spec,
memo: make(map[string]jsonschema.Schema),
}
err := json.Unmarshal([]byte(specString), spec)
require.NoError(t, err)
fruitsSchema, err := reader.readResolvedSchema("#/components/schemas/fruits")
require.NoError(t, err)
fruitsSchemaJson, err := json.MarshalIndent(fruitsSchema, " ", " ")
require.NoError(t, err)
expected := `{
"type": "object",
"description": "fruits that are cool",
"properties": {
"guava": {
"type": "string",
"description": "a guava for my schema"
},
"mango": {
"type": "object",
"description": "a mango for my schema",
"properties": {
"foo": {
"type": "number"
}
}
}
}
}`
t.Log("[DEBUG] actual: ", string(fruitsSchemaJson))
t.Log("[DEBUG] expected: ", expected)
assert.Equal(t, expected, string(fruitsSchemaJson))
}
func TestReadSchemaForArray(t *testing.T) {
specString := `
{
"components": {
"schemas": {
"fruits": {
"type": "object",
"description": "fruits that are cool",
"items": {
"description": "some papayas, because papayas are fruits too",
"$ref": "#/components/schemas/papaya"
}
},
"papaya": {
"type": "number"
}
}
}
}`
spec := &Specification{}
reader := &OpenapiReader{
OpenapiSpec: spec,
memo: make(map[string]jsonschema.Schema),
}
err := json.Unmarshal([]byte(specString), spec)
require.NoError(t, err)
fruitsSchema, err := reader.readResolvedSchema("#/components/schemas/fruits")
require.NoError(t, err)
fruitsSchemaJson, err := json.MarshalIndent(fruitsSchema, " ", " ")
require.NoError(t, err)
expected := `{
"type": "object",
"description": "fruits that are cool",
"items": {
"type": "number",
"description": "some papayas, because papayas are fruits too"
}
}`
t.Log("[DEBUG] actual: ", string(fruitsSchemaJson))
t.Log("[DEBUG] expected: ", expected)
assert.Equal(t, expected, string(fruitsSchemaJson))
}
func TestReadSchemaForMap(t *testing.T) {
specString := `{
"components": {
"schemas": {
"fruits": {
"type": "object",
"description": "fruits that are meh",
"additionalProperties": {
"description": "watermelons. watermelons.",
"$ref": "#/components/schemas/watermelon"
}
},
"watermelon": {
"type": "number"
}
}
}
}`
spec := &Specification{}
reader := &OpenapiReader{
OpenapiSpec: spec,
memo: make(map[string]jsonschema.Schema),
}
err := json.Unmarshal([]byte(specString), spec)
require.NoError(t, err)
fruitsSchema, err := reader.readResolvedSchema("#/components/schemas/fruits")
require.NoError(t, err)
fruitsSchemaJson, err := json.MarshalIndent(fruitsSchema, " ", " ")
require.NoError(t, err)
expected := `{
"type": "object",
"description": "fruits that are meh",
"additionalProperties": {
"type": "number",
"description": "watermelons. watermelons."
}
}`
t.Log("[DEBUG] actual: ", string(fruitsSchemaJson))
t.Log("[DEBUG] expected: ", expected)
assert.Equal(t, expected, string(fruitsSchemaJson))
}
func TestRootReferenceIsResolved(t *testing.T) {
specString := `{
"components": {
"schemas": {
"foo": {
"type": "object",
"description": "this description is ignored",
"properties": {
"abc": {
"type": "string"
}
}
},
"fruits": {
"type": "object",
"description": "foo fighters fighting fruits",
"$ref": "#/components/schemas/foo"
}
}
}
}`
spec := &Specification{}
reader := &OpenapiReader{
OpenapiSpec: spec,
memo: make(map[string]jsonschema.Schema),
}
err := json.Unmarshal([]byte(specString), spec)
require.NoError(t, err)
schema, err := reader.readResolvedSchema("#/components/schemas/fruits")
require.NoError(t, err)
fruitsSchemaJson, err := json.MarshalIndent(schema, " ", " ")
require.NoError(t, err)
expected := `{
"type": "object",
"description": "foo fighters fighting fruits",
"properties": {
"abc": {
"type": "string"
}
}
}`
t.Log("[DEBUG] actual: ", string(fruitsSchemaJson))
t.Log("[DEBUG] expected: ", expected)
assert.Equal(t, expected, string(fruitsSchemaJson))
}
func TestSelfReferenceLoopErrors(t *testing.T) {
specString := `{
"components": {
"schemas": {
"foo": {
"type": "object",
"description": "this description is ignored",
"properties": {
"bar": {
"type": "object",
"$ref": "#/components/schemas/foo"
}
}
},
"fruits": {
"type": "object",
"description": "foo fighters fighting fruits",
"$ref": "#/components/schemas/foo"
}
}
}
}`
spec := &Specification{}
reader := &OpenapiReader{
OpenapiSpec: spec,
memo: make(map[string]jsonschema.Schema),
}
err := json.Unmarshal([]byte(specString), spec)
require.NoError(t, err)
_, err = reader.readResolvedSchema("#/components/schemas/fruits")
assert.ErrorContains(t, err, "references loop detected. traversal trace: -> #/components/schemas/fruits -> #/components/schemas/foo")
}
func TestCrossReferenceLoopErrors(t *testing.T) {
specString := `{
"components": {
"schemas": {
"foo": {
"type": "object",
"description": "this description is ignored",
"properties": {
"bar": {
"type": "object",
"$ref": "#/components/schemas/fruits"
}
}
},
"fruits": {
"type": "object",
"description": "foo fighters fighting fruits",
"$ref": "#/components/schemas/foo"
}
}
}
}`
spec := &Specification{}
reader := &OpenapiReader{
OpenapiSpec: spec,
memo: make(map[string]jsonschema.Schema),
}
err := json.Unmarshal([]byte(specString), spec)
require.NoError(t, err)
_, err = reader.readResolvedSchema("#/components/schemas/fruits")
assert.ErrorContains(t, err, "references loop detected. traversal trace: -> #/components/schemas/fruits -> #/components/schemas/foo")
}
func TestReferenceResolutionForMapInObject(t *testing.T) {
specString := `
{
"components": {
"schemas": {
"foo": {
"type": "number"
},
"fruits": {
"type": "object",
"description": "fruits that are cool",
"properties": {
"guava": {
"type": "string",
"description": "a guava for my schema"
},
"mangos": {
"type": "object",
"description": "multiple mangos",
"$ref": "#/components/schemas/mango"
}
}
},
"mango": {
"type": "object",
"additionalProperties": {
"description": "a single mango",
"$ref": "#/components/schemas/foo"
}
}
}
}
}`
spec := &Specification{}
reader := &OpenapiReader{
OpenapiSpec: spec,
memo: make(map[string]jsonschema.Schema),
}
err := json.Unmarshal([]byte(specString), spec)
require.NoError(t, err)
fruitsSchema, err := reader.readResolvedSchema("#/components/schemas/fruits")
require.NoError(t, err)
fruitsSchemaJson, err := json.MarshalIndent(fruitsSchema, " ", " ")
require.NoError(t, err)
expected := `{
"type": "object",
"description": "fruits that are cool",
"properties": {
"guava": {
"type": "string",
"description": "a guava for my schema"
},
"mangos": {
"type": "object",
"description": "multiple mangos",
"additionalProperties": {
"type": "number",
"description": "a single mango"
}
}
}
}`
t.Log("[DEBUG] actual: ", string(fruitsSchemaJson))
t.Log("[DEBUG] expected: ", expected)
assert.Equal(t, expected, string(fruitsSchemaJson))
}
func TestReferenceResolutionForArrayInObject(t *testing.T) {
specString := `{
"components": {
"schemas": {
"foo": {
"type": "number"
},
"fruits": {
"type": "object",
"description": "fruits that are cool",
"properties": {
"guava": {
"type": "string",
"description": "a guava for my schema"
},
"mangos": {
"type": "object",
"description": "multiple mangos",
"$ref": "#/components/schemas/mango"
}
}
},
"mango": {
"type": "object",
"items": {
"description": "a single mango",
"$ref": "#/components/schemas/foo"
}
}
}
}
}`
spec := &Specification{}
reader := &OpenapiReader{
OpenapiSpec: spec,
memo: make(map[string]jsonschema.Schema),
}
err := json.Unmarshal([]byte(specString), spec)
require.NoError(t, err)
fruitsSchema, err := reader.readResolvedSchema("#/components/schemas/fruits")
require.NoError(t, err)
fruitsSchemaJson, err := json.MarshalIndent(fruitsSchema, " ", " ")
require.NoError(t, err)
expected := `{
"type": "object",
"description": "fruits that are cool",
"properties": {
"guava": {
"type": "string",
"description": "a guava for my schema"
},
"mangos": {
"type": "object",
"description": "multiple mangos",
"items": {
"type": "number",
"description": "a single mango"
}
}
}
}`
t.Log("[DEBUG] actual: ", string(fruitsSchemaJson))
t.Log("[DEBUG] expected: ", expected)
assert.Equal(t, expected, string(fruitsSchemaJson))
}
func TestReferenceResolutionDoesNotOverwriteDescriptions(t *testing.T) {
specString := `{
"components": {
"schemas": {
"foo": {
"type": "number"
},
"fruits": {
"type": "object",
"properties": {
"guava": {
"type": "object",
"description": "Guava is a fruit",
"$ref": "#/components/schemas/foo"
},
"mango": {
"type": "object",
"description": "What is a mango?",
"$ref": "#/components/schemas/foo"
}
}
}
}
}
}`
spec := &Specification{}
reader := &OpenapiReader{
OpenapiSpec: spec,
memo: make(map[string]jsonschema.Schema),
}
err := json.Unmarshal([]byte(specString), spec)
require.NoError(t, err)
fruitsSchema, err := reader.readResolvedSchema("#/components/schemas/fruits")
require.NoError(t, err)
fruitsSchemaJson, err := json.MarshalIndent(fruitsSchema, " ", " ")
require.NoError(t, err)
expected := `{
"type": "object",
"properties": {
"guava": {
"type": "number",
"description": "Guava is a fruit"
},
"mango": {
"type": "number",
"description": "What is a mango?"
}
}
}`
t.Log("[DEBUG] actual: ", string(fruitsSchemaJson))
t.Log("[DEBUG] expected: ", expected)
assert.Equal(t, expected, string(fruitsSchemaJson))
}

View File

@ -1,287 +0,0 @@
package schema
import (
"container/list"
"fmt"
"reflect"
"strings"
"github.com/databricks/cli/libs/dyn/dynvar"
"github.com/databricks/cli/libs/jsonschema"
)
// Fields tagged "readonly" should not be emitted in the schema as they are
// computed at runtime, and should not be assigned a value by the bundle author.
const readonlyTag = "readonly"
// Annotation for internal bundle fields that should not be exposed to customers.
// Fields can be tagged as "internal" to remove them from the generated schema.
const internalTag = "internal"
// Annotation for bundle fields that have been deprecated.
// Fields tagged as "deprecated" are removed/omitted from the generated schema.
const deprecatedTag = "deprecated"
// This function translates golang types into json schema. Here is the mapping
// between json schema types and golang types
//
// - GolangType -> Javascript type / Json Schema2
//
// - bool -> boolean
//
// - string -> string
//
// - int (all variants) -> number
//
// - float (all variants) -> number
//
// - map[string]MyStruct -> { type: object, additionalProperties: {}}
// for details visit: https://json-schema.org/understanding-json-schema/reference/object.html#additional-properties
//
// - []MyStruct -> {type: array, items: {}}
// for details visit: https://json-schema.org/understanding-json-schema/reference/array.html#items
//
// - []MyStruct -> {type: object, properties: {}, additionalProperties: false}
// for details visit: https://json-schema.org/understanding-json-schema/reference/object.html#properties
func New(golangType reflect.Type, docs *Docs) (*jsonschema.Schema, error) {
tracker := newTracker()
schema, err := safeToSchema(golangType, docs, "", tracker)
if err != nil {
return nil, tracker.errWithTrace(err.Error(), "root")
}
return schema, nil
}
func jsonSchemaType(golangType reflect.Type) (jsonschema.Type, error) {
switch golangType.Kind() {
case reflect.Bool:
return jsonschema.BooleanType, nil
case reflect.String:
return jsonschema.StringType, nil
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64,
reflect.Float32, reflect.Float64:
return jsonschema.NumberType, nil
case reflect.Struct:
return jsonschema.ObjectType, nil
case reflect.Map:
if golangType.Key().Kind() != reflect.String {
return jsonschema.InvalidType, fmt.Errorf("only strings map keys are valid. key type: %v", golangType.Key().Kind())
}
return jsonschema.ObjectType, nil
case reflect.Array, reflect.Slice:
return jsonschema.ArrayType, nil
default:
return jsonschema.InvalidType, fmt.Errorf("unhandled golang type: %s", golangType)
}
}
// A wrapper over toSchema function to:
// 1. Detect cycles in the bundle config struct.
// 2. Update tracker
//
// params:
//
// - golangType: Golang type to generate json schema for
//
// - docs: Contains documentation to be injected into the generated json schema
//
// - traceId: An identifier for the current type, to trace recursive traversal.
// Its value is the first json tag in case of struct fields and "" in other cases
// like array, map or no json tags
//
// - tracker: Keeps track of types / traceIds seen during recursive traversal
func safeToSchema(golangType reflect.Type, docs *Docs, traceId string, tracker *tracker) (*jsonschema.Schema, error) {
// HACK to unblock CLI release (13th Feb 2024). This is temporary until proper
// support for recursive types is added to the schema generator. PR: https://github.com/databricks/cli/pull/1204
if traceId == "for_each_task" {
return &jsonschema.Schema{
Type: jsonschema.ObjectType,
}, nil
}
// WE ERROR OUT IF THERE ARE CYCLES IN THE JSON SCHEMA
// There are mechanisms to deal with cycles though recursive identifiers in json
// schema. However if we use them, we would need to make sure we are able to detect
// cycles where two properties (directly or indirectly) pointing to each other
//
// see: https://json-schema.org/understanding-json-schema/structuring.html#recursion
// for details
if tracker.hasCycle(golangType) {
return nil, fmt.Errorf("cycle detected")
}
tracker.push(golangType, traceId)
props, err := toSchema(golangType, docs, tracker)
if err != nil {
return nil, err
}
tracker.pop(golangType)
return props, nil
}
// This function returns all member fields of the provided type.
// If the type has embedded (aka anonymous) fields, this function traverses
// those in a breadth first manner
func getStructFields(golangType reflect.Type) []reflect.StructField {
fields := []reflect.StructField{}
bfsQueue := list.New()
for i := 0; i < golangType.NumField(); i++ {
bfsQueue.PushBack(golangType.Field(i))
}
for bfsQueue.Len() > 0 {
front := bfsQueue.Front()
field := front.Value.(reflect.StructField)
bfsQueue.Remove(front)
if !field.Anonymous {
fields = append(fields, field)
continue
}
fieldType := field.Type
if fieldType.Kind() == reflect.Pointer {
fieldType = fieldType.Elem()
}
for i := 0; i < fieldType.NumField(); i++ {
bfsQueue.PushBack(fieldType.Field(i))
}
}
return fields
}
func toSchema(golangType reflect.Type, docs *Docs, tracker *tracker) (*jsonschema.Schema, error) {
// *Struct and Struct generate identical json schemas
if golangType.Kind() == reflect.Pointer {
return safeToSchema(golangType.Elem(), docs, "", tracker)
}
if golangType.Kind() == reflect.Interface {
return &jsonschema.Schema{}, nil
}
rootJavascriptType, err := jsonSchemaType(golangType)
if err != nil {
return nil, err
}
jsonSchema := &jsonschema.Schema{Type: rootJavascriptType}
// If the type is a non-string primitive, then we allow it to be a string
// provided it's a pure variable reference (ie only a single variable reference).
if rootJavascriptType == jsonschema.BooleanType || rootJavascriptType == jsonschema.NumberType {
jsonSchema = &jsonschema.Schema{
AnyOf: []*jsonschema.Schema{
{
Type: rootJavascriptType,
},
{
Type: jsonschema.StringType,
Pattern: dynvar.VariableRegex,
},
},
}
}
if docs != nil {
jsonSchema.Description = docs.Description
}
// case array/slice
if golangType.Kind() == reflect.Array || golangType.Kind() == reflect.Slice {
elemGolangType := golangType.Elem()
elemJavascriptType, err := jsonSchemaType(elemGolangType)
if err != nil {
return nil, err
}
var childDocs *Docs
if docs != nil {
childDocs = docs.Items
}
elemProps, err := safeToSchema(elemGolangType, childDocs, "", tracker)
if err != nil {
return nil, err
}
jsonSchema.Items = &jsonschema.Schema{
Type: elemJavascriptType,
Properties: elemProps.Properties,
AdditionalProperties: elemProps.AdditionalProperties,
Items: elemProps.Items,
Required: elemProps.Required,
}
}
// case map
if golangType.Kind() == reflect.Map {
if golangType.Key().Kind() != reflect.String {
return nil, fmt.Errorf("only string keyed maps allowed")
}
var childDocs *Docs
if docs != nil {
childDocs = docs.AdditionalProperties
}
jsonSchema.AdditionalProperties, err = safeToSchema(golangType.Elem(), childDocs, "", tracker)
if err != nil {
return nil, err
}
}
// case struct
if golangType.Kind() == reflect.Struct {
children := getStructFields(golangType)
properties := map[string]*jsonschema.Schema{}
required := []string{}
for _, child := range children {
bundleTag := child.Tag.Get("bundle")
// Fields marked as "readonly", "internal" or "deprecated" are skipped
// while generating the schema
if bundleTag == readonlyTag || bundleTag == internalTag || bundleTag == deprecatedTag {
continue
}
// get child json tags
childJsonTag := strings.Split(child.Tag.Get("json"), ",")
childName := childJsonTag[0]
// skip children that have no json tags, the first json tag is ""
// or the first json tag is "-"
if childName == "" || childName == "-" {
continue
}
// get docs for the child if they exist
var childDocs *Docs
if docs != nil {
if val, ok := docs.Properties[childName]; ok {
childDocs = val
}
}
// compute if the child is a required field. Determined by the
// presence of "omitempty" in the json tags
hasOmitEmptyTag := false
for i := 1; i < len(childJsonTag); i++ {
if childJsonTag[i] == "omitempty" {
hasOmitEmptyTag = true
}
}
if !hasOmitEmptyTag {
required = append(required, childName)
}
// compute Schema.Properties for the child recursively
fieldProps, err := safeToSchema(child.Type, childDocs, childName, tracker)
if err != nil {
return nil, err
}
properties[childName] = fieldProps
}
jsonSchema.AdditionalProperties = false
jsonSchema.Properties = properties
jsonSchema.Required = required
}
return jsonSchema, nil
}

File diff suppressed because it is too large Load Diff

View File

@ -1,11 +0,0 @@
package schema
import "github.com/databricks/cli/libs/jsonschema"
type Specification struct {
Components *Components `json:"components"`
}
type Components struct {
Schemas map[string]*jsonschema.Schema `json:"schemas,omitempty"`
}

View File

@ -1,53 +0,0 @@
package schema
import (
"container/list"
"fmt"
)
type tracker struct {
// Nodes encountered in current path during the recursive traversal. Used to
// check for cycles
seenNodes map[interface{}]struct{}
// List of node names encountered in order in current path during the recursive traversal.
// Used to hydrate errors with path to the exact node where error occured.
//
// NOTE: node and node names can be the same
listOfNodes *list.List
}
func newTracker() *tracker {
return &tracker{
seenNodes: map[interface{}]struct{}{},
listOfNodes: list.New(),
}
}
func (t *tracker) errWithTrace(prefix string, initTrace string) error {
traceString := initTrace
curr := t.listOfNodes.Front()
for curr != nil {
if curr.Value.(string) != "" {
traceString += " -> " + curr.Value.(string)
}
curr = curr.Next()
}
return fmt.Errorf(prefix + ". traversal trace: " + traceString)
}
func (t *tracker) hasCycle(node interface{}) bool {
_, ok := t.seenNodes[node]
return ok
}
func (t *tracker) push(node interface{}, name string) {
t.seenNodes[node] = struct{}{}
t.listOfNodes.PushBack(name)
}
func (t *tracker) pop(nodeType interface{}) {
back := t.listOfNodes.Back()
t.listOfNodes.Remove(back)
delete(t.seenNodes, nodeType)
}

View File

@ -152,6 +152,12 @@ func TestGenerateJobCommand(t *testing.T) {
},
},
},
Parameters: []jobs.JobParameterDefinition{
{
Name: "empty",
Default: "",
},
},
},
}, nil)
@ -198,6 +204,9 @@ func TestGenerateJobCommand(t *testing.T) {
- task_key: notebook_task
notebook_task:
notebook_path: %s
parameters:
- name: empty
default: ""
`, filepath.Join("..", "src", "notebook.py")), string(data))
data, err = os.ReadFile(filepath.Join(srcDir, "notebook.py"))

View File

@ -1,13 +1,8 @@
package bundle
import (
"encoding/json"
"reflect"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/schema"
"github.com/databricks/cli/cmd/root"
"github.com/databricks/cli/libs/jsonschema"
"github.com/spf13/cobra"
)
@ -19,33 +14,9 @@ func newSchemaCommand() *cobra.Command {
}
cmd.RunE = func(cmd *cobra.Command, args []string) error {
// Load embedded schema descriptions.
docs, err := schema.LoadBundleDescriptions()
if err != nil {
_, err := cmd.OutOrStdout().Write(schema.Bytes)
return err
}
// Generate the JSON schema from the bundle configuration struct in Go.
schema, err := schema.New(reflect.TypeOf(config.Root{}), docs)
if err != nil {
return err
}
// Target variable value overrides can be primitives, maps or sequences.
// Set an empty schema for them.
err = schema.SetByPath("targets.*.variables.*", jsonschema.Schema{})
if err != nil {
return err
}
// Print the JSON schema to stdout.
result, err := json.MarshalIndent(schema, "", " ")
if err != nil {
return err
}
cmd.OutOrStdout().Write(result)
return nil
}
return cmd
}

View File

@ -941,7 +941,12 @@ func newListArtifacts() *cobra.Command {
cmd.Long = `Get all artifacts.
List artifacts for a run. Takes an optional artifact_path prefix. If it is
specified, the response contains only artifacts with the specified prefix.",`
specified, the response contains only artifacts with the specified prefix.
This API does not support pagination when listing artifacts in UC Volumes. A
maximum of 1000 artifacts will be retrieved for UC Volumes. Please call
/api/2.0/fs/directories{directory_path} for listing artifacts in UC Volumes,
which supports pagination. See [List directory contents | Files
API](/api/workspace/files/listdirectorycontents).`
cmd.Annotations = make(map[string]string)

View File

@ -88,7 +88,9 @@ func newAssign() *cobra.Command {
Arguments:
WORKSPACE_ID: A workspace ID.
METASTORE_ID: The unique ID of the metastore.
DEFAULT_CATALOG_NAME: The name of the default catalog in the metastore.`
DEFAULT_CATALOG_NAME: The name of the default catalog in the metastore. This field is depracted.
Please use "Default Namespace API" to configure the default catalog for a
Databricks workspace.`
cmd.Annotations = make(map[string]string)
@ -665,7 +667,7 @@ func newUpdateAssignment() *cobra.Command {
// TODO: short flags
cmd.Flags().Var(&updateAssignmentJson, "json", `either inline JSON string or @path/to/file.json with request body`)
cmd.Flags().StringVar(&updateAssignmentReq.DefaultCatalogName, "default-catalog-name", updateAssignmentReq.DefaultCatalogName, `The name of the default catalog for the metastore.`)
cmd.Flags().StringVar(&updateAssignmentReq.DefaultCatalogName, "default-catalog-name", updateAssignmentReq.DefaultCatalogName, `The name of the default catalog in the metastore.`)
cmd.Flags().StringVar(&updateAssignmentReq.MetastoreId, "metastore-id", updateAssignmentReq.MetastoreId, `The unique ID of the metastore.`)
cmd.Use = "update-assignment WORKSPACE_ID"

View File

@ -117,9 +117,10 @@ func newGet() *cobra.Command {
Arguments:
REQUEST_OBJECT_TYPE: The type of the request object. Can be one of the following: alerts,
authorization, clusters, cluster-policies, dbsql-dashboards, directories,
experiments, files, instance-pools, jobs, notebooks, pipelines, queries,
registered-models, repos, serving-endpoints, or warehouses.
authorization, clusters, cluster-policies, dashboards, dbsql-dashboards,
directories, experiments, files, instance-pools, jobs, notebooks,
pipelines, queries, registered-models, repos, serving-endpoints, or
warehouses.
REQUEST_OBJECT_ID: The id of the request object.`
cmd.Annotations = make(map[string]string)
@ -245,9 +246,10 @@ func newSet() *cobra.Command {
Arguments:
REQUEST_OBJECT_TYPE: The type of the request object. Can be one of the following: alerts,
authorization, clusters, cluster-policies, dbsql-dashboards, directories,
experiments, files, instance-pools, jobs, notebooks, pipelines, queries,
registered-models, repos, serving-endpoints, or warehouses.
authorization, clusters, cluster-policies, dashboards, dbsql-dashboards,
directories, experiments, files, instance-pools, jobs, notebooks,
pipelines, queries, registered-models, repos, serving-endpoints, or
warehouses.
REQUEST_OBJECT_ID: The id of the request object.`
cmd.Annotations = make(map[string]string)
@ -319,9 +321,10 @@ func newUpdate() *cobra.Command {
Arguments:
REQUEST_OBJECT_TYPE: The type of the request object. Can be one of the following: alerts,
authorization, clusters, cluster-policies, dbsql-dashboards, directories,
experiments, files, instance-pools, jobs, notebooks, pipelines, queries,
registered-models, repos, serving-endpoints, or warehouses.
authorization, clusters, cluster-policies, dashboards, dbsql-dashboards,
directories, experiments, files, instance-pools, jobs, notebooks,
pipelines, queries, registered-models, repos, serving-endpoints, or
warehouses.
REQUEST_OBJECT_ID: The id of the request object.`
cmd.Annotations = make(map[string]string)

View File

@ -41,6 +41,7 @@ func New() *cobra.Command {
cmd.AddCommand(newGet())
cmd.AddCommand(newGetRefresh())
cmd.AddCommand(newListRefreshes())
cmd.AddCommand(newRegenerateDashboard())
cmd.AddCommand(newRunRefresh())
cmd.AddCommand(newUpdate())
@ -503,6 +504,87 @@ func newListRefreshes() *cobra.Command {
return cmd
}
// start regenerate-dashboard command
// Slice with functions to override default command behavior.
// Functions can be added from the `init()` function in manually curated files in this directory.
var regenerateDashboardOverrides []func(
*cobra.Command,
*catalog.RegenerateDashboardRequest,
)
func newRegenerateDashboard() *cobra.Command {
cmd := &cobra.Command{}
var regenerateDashboardReq catalog.RegenerateDashboardRequest
var regenerateDashboardJson flags.JsonFlag
// TODO: short flags
cmd.Flags().Var(&regenerateDashboardJson, "json", `either inline JSON string or @path/to/file.json with request body`)
cmd.Flags().StringVar(&regenerateDashboardReq.WarehouseId, "warehouse-id", regenerateDashboardReq.WarehouseId, `Optional argument to specify the warehouse for dashboard regeneration.`)
cmd.Use = "regenerate-dashboard TABLE_NAME"
cmd.Short = `Regenerate a monitoring dashboard.`
cmd.Long = `Regenerate a monitoring dashboard.
Regenerates the monitoring dashboard for the specified table.
The caller must either: 1. be an owner of the table's parent catalog 2. have
**USE_CATALOG** on the table's parent catalog and be an owner of the table's
parent schema 3. have the following permissions: - **USE_CATALOG** on the
table's parent catalog - **USE_SCHEMA** on the table's parent schema - be an
owner of the table
The call must be made from the workspace where the monitor was created. The
dashboard will be regenerated in the assets directory that was specified when
the monitor was created.
Arguments:
TABLE_NAME: Full name of the table.`
// This command is being previewed; hide from help output.
cmd.Hidden = true
cmd.Annotations = make(map[string]string)
cmd.Args = func(cmd *cobra.Command, args []string) error {
check := root.ExactArgs(1)
return check(cmd, args)
}
cmd.PreRunE = root.MustWorkspaceClient
cmd.RunE = func(cmd *cobra.Command, args []string) (err error) {
ctx := cmd.Context()
w := root.WorkspaceClient(ctx)
if cmd.Flags().Changed("json") {
err = regenerateDashboardJson.Unmarshal(&regenerateDashboardReq)
if err != nil {
return err
}
}
regenerateDashboardReq.TableName = args[0]
response, err := w.QualityMonitors.RegenerateDashboard(ctx, regenerateDashboardReq)
if err != nil {
return err
}
return cmdio.Render(ctx, response)
}
// Disable completions since they are not applicable.
// Can be overridden by manual implementation in `override.go`.
cmd.ValidArgsFunction = cobra.NoFileCompletions
// Apply optional overrides to this command.
for _, fn := range regenerateDashboardOverrides {
fn(cmd, &regenerateDashboardReq)
}
return cmd
}
// start run-refresh command
// Slice with functions to override default command behavior.

2
go.mod
View File

@ -5,7 +5,7 @@ go 1.22
require (
github.com/Masterminds/semver/v3 v3.3.0 // MIT
github.com/briandowns/spinner v1.23.1 // Apache 2.0
github.com/databricks/databricks-sdk-go v0.45.0 // Apache 2.0
github.com/databricks/databricks-sdk-go v0.46.0 // Apache 2.0
github.com/fatih/color v1.17.0 // MIT
github.com/ghodss/yaml v1.0.0 // MIT + NOTICE
github.com/google/uuid v1.6.0 // BSD-3-Clause

4
go.sum generated
View File

@ -32,8 +32,8 @@ github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGX
github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/cyphar/filepath-securejoin v0.2.4 h1:Ugdm7cg7i6ZK6x3xDF1oEu1nfkyfH53EtKeQYTC3kyg=
github.com/cyphar/filepath-securejoin v0.2.4/go.mod h1:aPGpWjXOXUn2NCNjFvBE6aRxGGx79pTxQpKOJNYHHl4=
github.com/databricks/databricks-sdk-go v0.45.0 h1:wdx5Wm/ESrahdHeq62WrjLeGjV4r722LLanD8ahI0Mo=
github.com/databricks/databricks-sdk-go v0.45.0/go.mod h1:ds+zbv5mlQG7nFEU5ojLtgN/u0/9YzZmKQES/CfedzU=
github.com/databricks/databricks-sdk-go v0.46.0 h1:D0TxmtSVAOsdnfzH4OGtAmcq+8TyA7Z6fA6JEYhupeY=
github.com/databricks/databricks-sdk-go v0.46.0/go.mod h1:ds+zbv5mlQG7nFEU5ojLtgN/u0/9YzZmKQES/CfedzU=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=

View File

@ -37,6 +37,7 @@ func TestAccUploadArtifactFileToCorrectRemotePath(t *testing.T) {
b := &bundle.Bundle{
RootPath: dir,
SyncRootPath: dir,
Config: config.Root{
Bundle: config.Bundle{
Target: "whatever",
@ -101,6 +102,7 @@ func TestAccUploadArtifactFileToCorrectRemotePathWithEnvironments(t *testing.T)
b := &bundle.Bundle{
RootPath: dir,
SyncRootPath: dir,
Config: config.Root{
Bundle: config.Bundle{
Target: "whatever",
@ -170,6 +172,7 @@ func TestAccUploadArtifactFileToCorrectRemotePathForVolumes(t *testing.T) {
b := &bundle.Bundle{
RootPath: dir,
SyncRootPath: dir,
Config: config.Root{
Bundle: config.Bundle{
Target: "whatever",

View File

@ -6,9 +6,7 @@ import (
"github.com/databricks/cli/libs/dyn"
)
const VariableRegex = `\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\[[0-9]+\])*)*(\[[0-9]+\])*)\}`
var re = regexp.MustCompile(VariableRegex)
var re = regexp.MustCompile(`\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\[[0-9]+\])*)*(\[[0-9]+\])*)\}`)
// ref represents a variable reference.
// It is a string [dyn.Value] contained in a larger [dyn.Value].

View File

@ -151,6 +151,8 @@ func isScalarValueInString(v dyn.Value) bool {
switch v.MustString() {
case "true", "false":
return true
case "":
return true
default:
_, err := parseNumber(v.MustString())
return err == nil

View File

@ -0,0 +1,356 @@
package jsonschema
import (
"container/list"
"fmt"
"maps"
"path"
"reflect"
"slices"
"strings"
)
var skipTags = []string{
// Fields tagged "readonly" should not be emitted in the schema as they are
// computed at runtime, and should not be assigned a value by the bundle author.
"readonly",
// Annotation for internal bundle fields that should not be exposed to customers.
// Fields can be tagged as "internal" to remove them from the generated schema.
"internal",
// Annotation for bundle fields that have been deprecated.
// Fields tagged as "deprecated" are omitted from the generated schema.
"deprecated",
}
type constructor struct {
// Map of typ.PkgPath() + "." + typ.Name() to the schema for that type.
// Example key: github.com/databricks/databricks-sdk-go/service/jobs.JobSettings
definitions map[string]Schema
// Map of typ.PkgPath() + "." + typ.Name() to the corresponding type. Used to
// track types that have been seen to avoid infinite recursion.
seen map[string]reflect.Type
// The root type for which the schema is being generated.
root reflect.Type
}
// JSON pointers use "/" as a delimiter to represent nested objects. This means
// we would instead need to use "~1" to represent "/" if we wish to refer to a
// key in a JSON object with a "/" in it. Instead of doing that we replace "/" with an
// additional level of nesting in the output map. Thus the $refs in the generated
// JSON schema can contain "/" without any issues.
// see: https://datatracker.ietf.org/doc/html/rfc6901
//
// For example:
// {"a/b/c": "value"} is converted to {"a": {"b": {"c": "value"}}}
// the $ref for "value" would be "#/$defs/a/b/c" in the generated JSON schema.
func (c *constructor) Definitions() map[string]any {
defs := maps.Clone(c.definitions)
// Remove the root type from the definitions. We don't need to include it in
// the definitions because it will be inlined as the root of the generated JSON schema.
delete(defs, typePath(c.root))
if len(defs) == 0 {
return nil
}
res := make(map[string]any)
for k, v := range defs {
parts := strings.Split(k, "/")
cur := res
for i, p := range parts {
// Set the value for the last part.
if i == len(parts)-1 {
cur[p] = v
break
}
// For all but the last part, create a new map value to add a level
// of nesting.
if _, ok := cur[p]; !ok {
cur[p] = make(map[string]any)
}
cur = cur[p].(map[string]any)
}
}
return res
}
// FromType converts a [reflect.Type] to a [Schema]. Nodes in the final JSON
// schema are guaranteed to be one level deep, which is done using defining $defs
// for every Go type and referring them using $ref in the corresponding node in
// the JSON schema.
//
// fns is a list of transformation functions that will be applied in order to all $defs
// in the schema.
func FromType(typ reflect.Type, fns []func(typ reflect.Type, s Schema) Schema) (Schema, error) {
c := constructor{
definitions: make(map[string]Schema),
seen: make(map[string]reflect.Type),
root: typ,
}
_, err := c.walk(typ)
if err != nil {
return Schema{}, err
}
for _, fn := range fns {
for k := range c.definitions {
c.definitions[k] = fn(c.seen[k], c.definitions[k])
}
}
res := c.definitions[typePath(typ)]
res.Definitions = c.Definitions()
return res, nil
}
// typePath computes a unique string representation of the type. $ref in the generated
// JSON schema will refer to this path. See TestTypePath for examples outputs.
func typePath(typ reflect.Type) string {
// Pointers have a typ.Name() of "". Dereference them to get the underlying type.
for typ.Kind() == reflect.Ptr {
typ = typ.Elem()
}
if typ.Kind() == reflect.Interface {
return "interface"
}
// Recursively call typePath, to handle slices of slices / maps.
if typ.Kind() == reflect.Slice {
return path.Join("slice", typePath(typ.Elem()))
}
if typ.Kind() == reflect.Map {
if typ.Key().Kind() != reflect.String {
panic(fmt.Sprintf("found map with non-string key: %v", typ.Key()))
}
// Recursively call typePath, to handle maps of maps / slices.
return path.Join("map", typePath(typ.Elem()))
}
switch {
case typ.PkgPath() != "" && typ.Name() != "":
return typ.PkgPath() + "." + typ.Name()
case typ.Name() != "":
return typ.Name()
default:
// Invariant. This function should return a non-empty string
// for all types.
panic("unexpected empty type name for type: " + typ.String())
}
}
// Walk the Go type, generating $defs for every type encountered, and populating
// the corresponding $ref in the JSON schema.
func (c *constructor) walk(typ reflect.Type) (string, error) {
// Dereference pointers if necessary.
for typ.Kind() == reflect.Ptr {
typ = typ.Elem()
}
typPath := typePath(typ)
// Return early if the type has already been seen, to avoid infinite recursion.
if _, ok := c.seen[typPath]; ok {
return typPath, nil
}
c.seen[typPath] = typ
var s Schema
var err error
switch typ.Kind() {
case reflect.Struct:
s, err = c.fromTypeStruct(typ)
case reflect.Slice:
s, err = c.fromTypeSlice(typ)
case reflect.Map:
s, err = c.fromTypeMap(typ)
case reflect.String:
s = Schema{Type: StringType}
case reflect.Bool:
s = Schema{Type: BooleanType}
case reflect.Int, reflect.Int32, reflect.Int64:
s = Schema{Type: IntegerType}
case reflect.Float32, reflect.Float64:
s = Schema{Type: NumberType}
case reflect.Interface:
// We cannot determine the schema for fields of interface type just based
// on the type information. Thus we'll set the empty schema here and allow
// arbitrary values.
s = Schema{}
default:
return "", fmt.Errorf("unsupported type: %s", typ.Kind())
}
if err != nil {
return "", err
}
// Store the computed JSON schema for the type.
c.definitions[typPath] = s
return typPath, nil
}
// This function returns all member fields of the provided type.
// If the type has embedded (aka anonymous) fields, this function traverses
// those in a breadth first manner
//
// BFS is important because we want the a field defined at a higher level embedded
// struct to be given preference over a field with the same name defined at a lower
// level embedded struct. For example see: TestHigherLevelEmbeddedFieldIsInSchema
func getStructFields(typ reflect.Type) []reflect.StructField {
fields := []reflect.StructField{}
bfsQueue := list.New()
for i := 0; i < typ.NumField(); i++ {
bfsQueue.PushBack(typ.Field(i))
}
for bfsQueue.Len() > 0 {
front := bfsQueue.Front()
field := front.Value.(reflect.StructField)
bfsQueue.Remove(front)
if !field.Anonymous {
fields = append(fields, field)
continue
}
fieldType := field.Type
// Embedded types can only be struct{} or pointer to struct{}. Multiple
// levels of pointers are not allowed by the Go compiler. So we only
// dereference pointers once.
if fieldType.Kind() == reflect.Pointer {
fieldType = fieldType.Elem()
}
for i := 0; i < fieldType.NumField(); i++ {
bfsQueue.PushBack(fieldType.Field(i))
}
}
return fields
}
func (c *constructor) fromTypeStruct(typ reflect.Type) (Schema, error) {
if typ.Kind() != reflect.Struct {
return Schema{}, fmt.Errorf("expected struct, got %s", typ.Kind())
}
res := Schema{
Type: ObjectType,
Properties: make(map[string]*Schema),
Required: []string{},
AdditionalProperties: false,
}
structFields := getStructFields(typ)
for _, structField := range structFields {
bundleTags := strings.Split(structField.Tag.Get("bundle"), ",")
// Fields marked as "readonly", "internal" or "deprecated" are skipped
// while generating the schema
skip := false
for _, tag := range skipTags {
if slices.Contains(bundleTags, tag) {
skip = true
break
}
}
if skip {
continue
}
jsonTags := strings.Split(structField.Tag.Get("json"), ",")
fieldName := jsonTags[0]
// Do not include fields in the schema that will not be serialized during
// JSON marshalling.
if fieldName == "" || fieldName == "-" || !structField.IsExported() {
continue
}
// Skip property if it is already present in the schema.
// This can happen if the same field is defined multiple times across
// a tree of embedded structs. For example see: TestHigherLevelEmbeddedFieldIsInSchema
if _, ok := res.Properties[fieldName]; ok {
continue
}
// "omitempty" tags in the Go SDK structs represent fields that not are
// required to be present in the API payload. Thus its absence in the
// tags list indicates that the field is required.
if !slices.Contains(jsonTags, "omitempty") {
res.Required = append(res.Required, fieldName)
}
// Walk the fields of the struct.
typPath, err := c.walk(structField.Type)
if err != nil {
return Schema{}, err
}
// For every property in the struct, add a $ref to the corresponding
// $defs block.
refPath := path.Join("#/$defs", typPath)
res.Properties[fieldName] = &Schema{
Reference: &refPath,
}
}
return res, nil
}
func (c *constructor) fromTypeSlice(typ reflect.Type) (Schema, error) {
if typ.Kind() != reflect.Slice {
return Schema{}, fmt.Errorf("expected slice, got %s", typ.Kind())
}
res := Schema{
Type: ArrayType,
}
// Walk the slice element type.
typPath, err := c.walk(typ.Elem())
if err != nil {
return Schema{}, err
}
refPath := path.Join("#/$defs", typPath)
// Add a $ref to the corresponding $defs block for the slice element type.
res.Items = &Schema{
Reference: &refPath,
}
return res, nil
}
func (c *constructor) fromTypeMap(typ reflect.Type) (Schema, error) {
if typ.Kind() != reflect.Map {
return Schema{}, fmt.Errorf("expected map, got %s", typ.Kind())
}
res := Schema{
Type: ObjectType,
}
// Walk the map value type.
typPath, err := c.walk(typ.Elem())
if err != nil {
return Schema{}, err
}
refPath := path.Join("#/$defs", typPath)
// Add a $ref to the corresponding $defs block for the map value type.
res.AdditionalProperties = &Schema{
Reference: &refPath,
}
return res, nil
}

View File

@ -0,0 +1,521 @@
package jsonschema
import (
"reflect"
"testing"
"github.com/databricks/cli/libs/jsonschema/test_types"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestFromTypeBasic(t *testing.T) {
type myStruct struct {
S string `json:"s"`
I *int `json:"i,omitempty"`
V interface{} `json:"v,omitempty"`
TriplePointer ***int `json:"triple_pointer,omitempty"`
// These fields should be ignored in the resulting schema.
NotAnnotated string
DashedTag string `json:"-"`
InternalTagged string `json:"internal_tagged" bundle:"internal"`
DeprecatedTagged string `json:"deprecated_tagged" bundle:"deprecated"`
ReadOnlyTagged string `json:"readonly_tagged" bundle:"readonly"`
}
strRef := "#/$defs/string"
boolRef := "#/$defs/bool"
intRef := "#/$defs/int"
interfaceRef := "#/$defs/interface"
tcases := []struct {
name string
typ reflect.Type
expected Schema
}{
{
name: "int",
typ: reflect.TypeOf(int(0)),
expected: Schema{
Type: "integer",
},
},
{
name: "string",
typ: reflect.TypeOf(string("")),
expected: Schema{
Type: "string",
},
},
{
name: "bool",
typ: reflect.TypeOf(bool(true)),
expected: Schema{
Type: "boolean",
},
},
{
name: "float64",
typ: reflect.TypeOf(float64(0)),
expected: Schema{
Type: "number",
},
},
{
name: "struct",
typ: reflect.TypeOf(myStruct{}),
expected: Schema{
Type: "object",
Definitions: map[string]any{
"interface": Schema{},
"string": Schema{
Type: "string",
},
"int": Schema{
Type: "integer",
},
},
Properties: map[string]*Schema{
"s": {
Reference: &strRef,
},
"i": {
Reference: &intRef,
},
"v": {
Reference: &interfaceRef,
},
"triple_pointer": {
Reference: &intRef,
},
},
AdditionalProperties: false,
Required: []string{"s"},
},
},
{
name: "slice",
typ: reflect.TypeOf([]bool{}),
expected: Schema{
Type: "array",
Definitions: map[string]any{
"bool": Schema{
Type: "boolean",
},
},
Items: &Schema{
Reference: &boolRef,
},
},
},
{
name: "map",
typ: reflect.TypeOf(map[string]int{}),
expected: Schema{
Type: "object",
Definitions: map[string]any{
"int": Schema{
Type: "integer",
},
},
AdditionalProperties: &Schema{
Reference: &intRef,
},
},
},
}
for _, tc := range tcases {
t.Run(tc.name, func(t *testing.T) {
s, err := FromType(tc.typ, nil)
assert.NoError(t, err)
assert.Equal(t, tc.expected, s)
})
}
}
func TestGetStructFields(t *testing.T) {
type InnerEmbeddedStruct struct {
InnerField float64
}
type EmbeddedStructOne struct {
FieldOne int
*InnerEmbeddedStruct
}
type EmbeddedStructTwo struct {
FieldTwo bool
}
type MyStruct struct {
*EmbeddedStructOne
EmbeddedStructTwo
OuterField string
}
fields := getStructFields(reflect.TypeOf(MyStruct{}))
assert.Len(t, fields, 4)
assert.Equal(t, "OuterField", fields[0].Name)
assert.Equal(t, "FieldOne", fields[1].Name)
// InnerField occurring after FieldTwo ensures BFS as opposed to DFS traversal.
assert.Equal(t, "FieldTwo", fields[2].Name)
assert.Equal(t, "InnerField", fields[3].Name)
}
func TestHigherLevelEmbeddedFieldIsInSchema(t *testing.T) {
type Inner struct {
Override string `json:"override,omitempty"`
}
type EmbeddedOne struct {
Inner
}
type EmbeddedTwo struct {
Override int `json:"override,omitempty"`
}
type Outer struct {
EmbeddedOne
EmbeddedTwo
}
intRef := "#/$defs/int"
expected := Schema{
Type: "object",
Definitions: map[string]any{
"int": Schema{
Type: "integer",
},
},
Properties: map[string]*Schema{
"override": {
Reference: &intRef,
},
},
AdditionalProperties: false,
Required: []string{},
}
s, err := FromType(reflect.TypeOf(Outer{}), nil)
require.NoError(t, err)
assert.Equal(t, expected, s)
}
func TestFromTypeNested(t *testing.T) {
type Inner struct {
S string `json:"s"`
}
type Outer struct {
I string `json:"i"`
Inner Inner `json:"inner"`
}
innerRef := "#/$defs/github.com/databricks/cli/libs/jsonschema.Inner"
strRef := "#/$defs/string"
expectedDefinitions := map[string]any{
"github.com": map[string]any{
"databricks": map[string]any{
"cli": map[string]any{
"libs": map[string]any{
"jsonschema.Inner": Schema{
Type: "object",
Properties: map[string]*Schema{
"s": {
Reference: &strRef,
},
},
AdditionalProperties: false,
Required: []string{"s"},
},
},
},
},
},
"string": Schema{
Type: "string",
},
}
tcases := []struct {
name string
typ reflect.Type
expected Schema
}{
{
name: "struct in struct",
typ: reflect.TypeOf(Outer{}),
expected: Schema{
Type: "object",
Definitions: expectedDefinitions,
Properties: map[string]*Schema{
"i": {
Reference: &strRef,
},
"inner": {
Reference: &innerRef,
},
},
AdditionalProperties: false,
Required: []string{"i", "inner"},
},
},
{
name: "struct as a map value",
typ: reflect.TypeOf(map[string]*Inner{}),
expected: Schema{
Type: "object",
Definitions: expectedDefinitions,
AdditionalProperties: &Schema{
Reference: &innerRef,
},
},
},
{
name: "struct as a slice element",
typ: reflect.TypeOf([]Inner{}),
expected: Schema{
Type: "array",
Definitions: expectedDefinitions,
Items: &Schema{
Reference: &innerRef,
},
},
},
}
for _, tc := range tcases {
t.Run(tc.name, func(t *testing.T) {
s, err := FromType(tc.typ, nil)
assert.NoError(t, err)
assert.Equal(t, tc.expected, s)
})
}
}
func TestFromTypeRecursive(t *testing.T) {
fooRef := "#/$defs/github.com/databricks/cli/libs/jsonschema/test_types.Foo"
barRef := "#/$defs/github.com/databricks/cli/libs/jsonschema/test_types.Bar"
expected := Schema{
Type: "object",
Definitions: map[string]any{
"github.com": map[string]any{
"databricks": map[string]any{
"cli": map[string]any{
"libs": map[string]any{
"jsonschema": map[string]any{
"test_types.Bar": Schema{
Type: "object",
Properties: map[string]*Schema{
"foo": {
Reference: &fooRef,
},
},
AdditionalProperties: false,
Required: []string{},
},
"test_types.Foo": Schema{
Type: "object",
Properties: map[string]*Schema{
"bar": {
Reference: &barRef,
},
},
AdditionalProperties: false,
Required: []string{},
},
},
},
},
},
},
},
Properties: map[string]*Schema{
"foo": {
Reference: &fooRef,
},
},
AdditionalProperties: false,
Required: []string{"foo"},
}
s, err := FromType(reflect.TypeOf(test_types.Outer{}), nil)
assert.NoError(t, err)
assert.Equal(t, expected, s)
}
func TestFromTypeSelfReferential(t *testing.T) {
selfRef := "#/$defs/github.com/databricks/cli/libs/jsonschema/test_types.Self"
stringRef := "#/$defs/string"
expected := Schema{
Type: "object",
Definitions: map[string]any{
"github.com": map[string]any{
"databricks": map[string]any{
"cli": map[string]any{
"libs": map[string]any{
"jsonschema": map[string]any{
"test_types.Self": Schema{
Type: "object",
Properties: map[string]*Schema{
"self": {
Reference: &selfRef,
},
"s": {
Reference: &stringRef,
},
},
AdditionalProperties: false,
Required: []string{},
},
},
},
},
},
},
"string": Schema{
Type: "string",
},
},
Properties: map[string]*Schema{
"self": {
Reference: &selfRef,
},
},
AdditionalProperties: false,
Required: []string{},
}
s, err := FromType(reflect.TypeOf(test_types.OuterSelf{}), nil)
assert.NoError(t, err)
assert.Equal(t, expected, s)
}
func TestFromTypeError(t *testing.T) {
// Maps with non-string keys should panic.
type mapOfInts map[int]int
assert.PanicsWithValue(t, "found map with non-string key: int", func() {
FromType(reflect.TypeOf(mapOfInts{}), nil)
})
// Unsupported types should return an error.
_, err := FromType(reflect.TypeOf(complex64(0)), nil)
assert.EqualError(t, err, "unsupported type: complex64")
}
func TestFromTypeFunctionsArg(t *testing.T) {
type myStruct struct {
S string `json:"s"`
}
strRef := "#/$defs/string"
expected := Schema{
Type: "object",
Definitions: map[string]any{
"string": Schema{
Type: "string",
Description: "a string",
Enum: []any{"a", "b", "c"},
},
},
Properties: map[string]*Schema{
"s": {
Reference: &strRef,
},
},
AdditionalProperties: false,
Required: []string{"s"},
}
addDescription := func(typ reflect.Type, s Schema) Schema {
if typ.Kind() != reflect.String {
return s
}
s.Description = "a string"
return s
}
addEnums := func(typ reflect.Type, s Schema) Schema {
if typ.Kind() != reflect.String {
return s
}
s.Enum = []any{"a", "b", "c"}
return s
}
s, err := FromType(reflect.TypeOf(myStruct{}), []func(reflect.Type, Schema) Schema{
addDescription,
addEnums,
})
assert.NoError(t, err)
assert.Equal(t, expected, s)
}
func TestTypePath(t *testing.T) {
type myStruct struct{}
tcases := []struct {
typ reflect.Type
path string
}{
{
typ: reflect.TypeOf(""),
path: "string",
},
{
typ: reflect.TypeOf(int(0)),
path: "int",
},
{
typ: reflect.TypeOf(true),
path: "bool",
},
{
typ: reflect.TypeOf(float64(0)),
path: "float64",
},
{
typ: reflect.TypeOf(myStruct{}),
path: "github.com/databricks/cli/libs/jsonschema.myStruct",
},
{
typ: reflect.TypeOf([]int{}),
path: "slice/int",
},
{
typ: reflect.TypeOf(map[string]int{}),
path: "map/int",
},
{
typ: reflect.TypeOf([]myStruct{}),
path: "slice/github.com/databricks/cli/libs/jsonschema.myStruct",
},
{
typ: reflect.TypeOf([][]map[string]map[string]myStruct{}),
path: "slice/slice/map/map/github.com/databricks/cli/libs/jsonschema.myStruct",
},
{
typ: reflect.TypeOf(map[string]myStruct{}),
path: "map/github.com/databricks/cli/libs/jsonschema.myStruct",
},
}
for _, tc := range tcases {
t.Run(tc.typ.String(), func(t *testing.T) {
assert.Equal(t, tc.path, typePath(tc.typ))
})
}
// Maps with non-string keys should panic.
assert.PanicsWithValue(t, "found map with non-string key: int", func() {
typePath(reflect.TypeOf(map[int]int{}))
})
}

View File

@ -6,7 +6,6 @@ import (
"os"
"regexp"
"slices"
"strings"
"github.com/databricks/cli/internal/build"
"golang.org/x/mod/semver"
@ -14,6 +13,10 @@ import (
// defines schema for a json object
type Schema struct {
// Definitions that can be reused and referenced throughout the schema. The
// syntax for a reference is $ref: #/$defs/<path.to.definition>
Definitions map[string]any `json:"$defs,omitempty"`
// Type of the object
Type Type `json:"type,omitempty"`
@ -63,7 +66,7 @@ type Schema struct {
Extension
// Schema that must match any of the schemas in the array
AnyOf []*Schema `json:"anyOf,omitempty"`
AnyOf []Schema `json:"anyOf,omitempty"`
}
// Default value defined in a JSON Schema, represented as a string.
@ -82,41 +85,6 @@ func (s *Schema) ParseString(v string) (any, error) {
return fromString(v, s.Type)
}
func (s *Schema) getByPath(path string) (*Schema, error) {
p := strings.Split(path, ".")
res := s
for _, node := range p {
if node == "*" {
res = res.AdditionalProperties.(*Schema)
continue
}
var ok bool
res, ok = res.Properties[node]
if !ok {
return nil, fmt.Errorf("property %q not found in schema. Query path: %s", node, path)
}
}
return res, nil
}
func (s *Schema) GetByPath(path string) (Schema, error) {
v, err := s.getByPath(path)
if err != nil {
return Schema{}, err
}
return *v, nil
}
func (s *Schema) SetByPath(path string, v Schema) error {
dst, err := s.getByPath(path)
if err != nil {
return err
}
*dst = v
return nil
}
type Type string
const (

View File

@ -4,7 +4,6 @@ import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestSchemaValidateTypeNames(t *testing.T) {
@ -306,92 +305,3 @@ func TestValidateSchemaSkippedPropertiesHaveDefaults(t *testing.T) {
err = s.validate()
assert.NoError(t, err)
}
func testSchema() *Schema {
return &Schema{
Type: "object",
Properties: map[string]*Schema{
"int_val": {
Type: "integer",
Default: int64(123),
},
"string_val": {
Type: "string",
},
"object_val": {
Type: "object",
Properties: map[string]*Schema{
"bar": {
Type: "string",
Default: "baz",
},
},
AdditionalProperties: &Schema{
Type: "object",
Properties: map[string]*Schema{
"foo": {
Type: "string",
Default: "zab",
},
},
},
},
},
}
}
func TestSchemaGetByPath(t *testing.T) {
s := testSchema()
ss, err := s.GetByPath("int_val")
require.NoError(t, err)
assert.Equal(t, Schema{
Type: IntegerType,
Default: int64(123),
}, ss)
ss, err = s.GetByPath("string_val")
require.NoError(t, err)
assert.Equal(t, Schema{
Type: StringType,
}, ss)
ss, err = s.GetByPath("object_val.bar")
require.NoError(t, err)
assert.Equal(t, Schema{
Type: StringType,
Default: "baz",
}, ss)
ss, err = s.GetByPath("object_val.*.foo")
require.NoError(t, err)
assert.Equal(t, Schema{
Type: StringType,
Default: "zab",
}, ss)
}
func TestSchemaSetByPath(t *testing.T) {
s := testSchema()
err := s.SetByPath("int_val", Schema{
Type: IntegerType,
Default: int64(456),
})
require.NoError(t, err)
assert.Equal(t, int64(456), s.Properties["int_val"].Default)
err = s.SetByPath("object_val.*.foo", Schema{
Type: StringType,
Default: "zooby",
})
require.NoError(t, err)
ns, err := s.GetByPath("object_val.*.foo")
require.NoError(t, err)
assert.Equal(t, Schema{
Type: StringType,
Default: "zooby",
}, ns)
}

View File

@ -0,0 +1,25 @@
package test_types
// Recursive types cannot be defined inline without making them anonymous,
// so we define them here instead.
type Foo struct {
Bar *Bar `json:"bar,omitempty"`
}
type Bar struct {
Foo Foo `json:"foo,omitempty"`
}
type Outer struct {
Foo Foo `json:"foo"`
}
type Self struct {
Self *Self `json:"self,omitempty"`
S string `json:"s,omitempty"`
}
type OuterSelf struct {
Self Self `json:"self,omitempty"`
}

View File

@ -461,7 +461,7 @@ func TestPromptIsSkippedAnyOf(t *testing.T) {
Default: "hello-world",
Extension: jsonschema.Extension{
SkipPromptIf: &jsonschema.Schema{
AnyOf: []*jsonschema.Schema{
AnyOf: []jsonschema.Schema{
{
Properties: map[string]*jsonschema.Schema{
"abc": {

View File

@ -3,10 +3,11 @@ resources:
{{.project_name}}_job:
name: {{.project_name}}_job
schedule:
# Run every day at 9:27 AM
quartz_cron_expression: 21 27 9 * * ?
timezone_id: UTC
trigger:
# Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger
periodic:
interval: 1
unit: DAYS
email_notifications:
on_failure:

View File

@ -10,10 +10,11 @@ resources:
{{.project_name}}_job:
name: {{.project_name}}_job
schedule:
# Run every day at 8:37 AM
quartz_cron_expression: '44 37 8 * * ?'
timezone_id: Europe/Amsterdam
trigger:
# Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger
periodic:
interval: 1
unit: DAYS
{{- if not is_service_principal}}

View File

@ -4,10 +4,11 @@ resources:
{{.project_name}}_sql_job:
name: {{.project_name}}_sql_job
schedule:
# Run every day at 7:17 AM
quartz_cron_expression: '44 17 7 * * ?'
timezone_id: Europe/Amsterdam
trigger:
# Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger
periodic:
interval: 1
unit: DAYS
{{- if not is_service_principal}}