fix: Empty schema fields in OpenAPI spec (#2045)

## Changes

1. Removes default yaml-fields during schema generation, caused by [this
PR](https://github.com/databricks/cli/pull/2032) (current yaml package
can't read `json` annotations in struct fields)
2. Addresses missing annotations for fields from OpenAPI spec, which are
named differently in go SDK
3. Adds filtering for annotations.yaml to include only CLI package
fields
4. Implements alphabetical sort for yaml keys to avoid unnecessary diff
in PRs

## Tests

Manually tested
This commit is contained in:
Ilya Kuznetsov 2024-12-23 13:08:01 +01:00 committed by GitHub
parent e0952491c9
commit 793bf2b995
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 2097 additions and 1769 deletions

View File

@ -6,6 +6,7 @@ import (
"os"
"reflect"
"regexp"
"slices"
"strings"
yaml3 "gopkg.in/yaml.v3"
@ -119,7 +120,15 @@ func (d *annotationHandler) syncWithMissingAnnotations(outputPath string) error
if err != nil {
return err
}
missingAnnotations, err := convert.FromTyped(&d.missingAnnotations, dyn.NilValue)
for k := range d.missingAnnotations {
if !isCliPath(k) {
delete(d.missingAnnotations, k)
fmt.Printf("Missing annotations for `%s` that are not in CLI package, try to fetch latest OpenAPI spec and regenerate annotations", k)
}
}
missingAnnotations, err := convert.FromTyped(d.missingAnnotations, dyn.NilValue)
if err != nil {
return err
}
@ -129,7 +138,13 @@ func (d *annotationHandler) syncWithMissingAnnotations(outputPath string) error
return err
}
err = saveYamlWithStyle(outputPath, output)
var outputTyped annotationFile
err = convert.ToTyped(&outputTyped, output)
if err != nil {
return err
}
err = saveYamlWithStyle(outputPath, outputTyped)
if err != nil {
return err
}
@ -153,21 +168,50 @@ func assignAnnotation(s *jsonschema.Schema, a annotation) {
s.Enum = a.Enum
}
func saveYamlWithStyle(outputPath string, input dyn.Value) error {
func saveYamlWithStyle(outputPath string, annotations annotationFile) error {
annotationOrder := yamlsaver.NewOrder([]string{"description", "markdown_description", "title", "default", "enum"})
style := map[string]yaml3.Style{}
file, _ := input.AsMap()
for _, v := range file.Keys() {
style[v.MustString()] = yaml3.LiteralStyle
order := getAlphabeticalOrder(annotations)
dynMap := map[string]dyn.Value{}
for k, v := range annotations {
style[k] = yaml3.LiteralStyle
properties := map[string]dyn.Value{}
propertiesOrder := getAlphabeticalOrder(v)
for key, value := range v {
d, err := convert.FromTyped(value, dyn.NilValue)
if d.Kind() == dyn.KindNil || err != nil {
properties[key] = dyn.NewValue(map[string]dyn.Value{}, []dyn.Location{{Line: propertiesOrder.Get(key)}})
continue
}
val, err := yamlsaver.ConvertToMapValue(value, annotationOrder, []string{}, map[string]dyn.Value{})
if err != nil {
return err
}
properties[key] = val.WithLocations([]dyn.Location{{Line: propertiesOrder.Get(key)}})
}
dynMap[k] = dyn.NewValue(properties, []dyn.Location{{Line: order.Get(k)}})
}
saver := yamlsaver.NewSaverWithStyle(style)
err := saver.SaveAsYAML(file, outputPath, true)
err := saver.SaveAsYAML(dynMap, outputPath, true)
if err != nil {
return err
}
return nil
}
func getAlphabeticalOrder[T any](mapping map[string]T) *yamlsaver.Order {
order := []string{}
for k := range mapping {
order = append(order, k)
}
slices.Sort(order)
return yamlsaver.NewOrder(order)
}
func convertLinksToAbsoluteUrl(s string) string {
if s == "" {
return s
@ -207,3 +251,7 @@ func convertLinksToAbsoluteUrl(s string) string {
return result
}
func isCliPath(path string) bool {
return !strings.HasPrefix(path, "github.com/databricks/databricks-sdk-go")
}

View File

@ -417,10 +417,10 @@ github.com/databricks/cli/bundle/config/variable.TargetVariable:
"lookup":
"description": |-
The name of the alert, cluster_policy, cluster, dashboard, instance_pool, job, metastore, pipeline, query, service_principal, or warehouse object for which to retrieve an ID.
"type":
"markdown_description":
"description": |-
The type of the variable.
"markdown_description":
"type":
"description": |-
The type of the variable.
github.com/databricks/cli/bundle/config/variable.Variable:
@ -438,64 +438,3 @@ github.com/databricks/cli/bundle/config/variable.Variable:
"type":
"description": |-
The type of the variable.
github.com/databricks/databricks-sdk-go/service/serving.Ai21LabsConfig:
"ai21labs_api_key":
"description": |-
PLACEHOLDER
"ai21labs_api_key_plaintext":
"description": |-
PLACEHOLDER
github.com/databricks/databricks-sdk-go/service/serving.GoogleCloudVertexAiConfig:
"private_key":
"description": |-
PLACEHOLDER
"private_key_plaintext":
"description": |-
PLACEHOLDER
"project_id":
"description": |-
PLACEHOLDER
"region":
"description": |-
PLACEHOLDER
github.com/databricks/databricks-sdk-go/service/serving.OpenAiConfig:
"microsoft_entra_client_id":
"description": |-
PLACEHOLDER
"microsoft_entra_client_secret":
"description": |-
PLACEHOLDER
"microsoft_entra_client_secret_plaintext":
"description": |-
PLACEHOLDER
"microsoft_entra_tenant_id":
"description": |-
PLACEHOLDER
"openai_api_base":
"description": |-
PLACEHOLDER
"openai_api_key":
"description": |-
PLACEHOLDER
"openai_api_key_plaintext":
"description": |-
PLACEHOLDER
"openai_api_type":
"description": |-
PLACEHOLDER
"openai_api_version":
"description": |-
PLACEHOLDER
"openai_deployment_name":
"description": |-
PLACEHOLDER
"openai_organization":
"description": |-
PLACEHOLDER
github.com/databricks/databricks-sdk-go/service/serving.PaLmConfig:
"palm_api_key":
"description": |-
PLACEHOLDER
"palm_api_key_plaintext":
"description": |-
PLACEHOLDER

File diff suppressed because it is too large Load Diff

View File

@ -42,7 +42,8 @@ func copyFile(src, dst string) error {
// Checks whether descriptions are added for new config fields in the annotations.yml file
// If this test fails either manually add descriptions to the `annotations.yml` or do the following:
// 1. run `make schema` from the repository root to add placeholder descriptions
// 1. for fields described outside of CLI package fetch latest schema from the OpenAPI spec and add path to file to DATABRICKS_OPENAPI_SPEC env variable
// 2. run `make schema` from the repository root to add placeholder descriptions
// 2. replace all "PLACEHOLDER" values with the actual descriptions if possible
// 3. run `make schema` again to regenerate the schema with acutal descriptions
func TestRequiredAnnotationsForNewFields(t *testing.T) {

View File

@ -1,7 +1,6 @@
package main
import (
"bytes"
"encoding/json"
"fmt"
"os"
@ -9,7 +8,6 @@ import (
"reflect"
"strings"
"github.com/databricks/cli/libs/dyn/yamlloader"
"github.com/databricks/cli/libs/jsonschema"
"gopkg.in/yaml.v3"
)
@ -82,9 +80,13 @@ func (p *openapiParser) findRef(typ reflect.Type) (jsonschema.Schema, bool) {
// Skip if the type is not in the openapi spec.
_, ok := p.ref[k]
if !ok {
k = mapIncorrectTypNames(k)
_, ok = p.ref[k]
if !ok {
continue
}
}
// Return the first Go SDK type found in the openapi spec.
return p.ref[k], true
@ -93,6 +95,23 @@ func (p *openapiParser) findRef(typ reflect.Type) (jsonschema.Schema, bool) {
return jsonschema.Schema{}, false
}
// Fix inconsistent type names between the Go SDK and the OpenAPI spec.
// E.g. "serving.PaLmConfig" in the Go SDK is "serving.PaLMConfig" in the OpenAPI spec.
func mapIncorrectTypNames(ref string) string {
switch ref {
case "serving.PaLmConfig":
return "serving.PaLMConfig"
case "serving.OpenAiConfig":
return "serving.OpenAIConfig"
case "serving.GoogleCloudVertexAiConfig":
return "serving.GoogleCloudVertexAIConfig"
case "serving.Ai21LabsConfig":
return "serving.AI21LabsConfig"
default:
return ref
}
}
// Use the OpenAPI spec to load descriptions for the given type.
func (p *openapiParser) extractAnnotations(typ reflect.Type, outputPath, overridesPath string) error {
annotations := annotationFile{}
@ -142,31 +161,40 @@ func (p *openapiParser) extractAnnotations(typ reflect.Type, outputPath, overrid
return err
}
b, err = yaml.Marshal(overrides)
err = saveYamlWithStyle(overridesPath, overrides)
if err != nil {
return err
}
o, err := yamlloader.LoadYAML("", bytes.NewBuffer(b))
err = saveYamlWithStyle(outputPath, annotations)
if err != nil {
return err
}
err = saveYamlWithStyle(overridesPath, o)
err = prependCommentToFile(outputPath, "# This file is auto-generated. DO NOT EDIT.\n")
if err != nil {
return err
}
b, err = yaml.Marshal(annotations)
if err != nil {
return err
}
b = bytes.Join([][]byte{[]byte("# This file is auto-generated. DO NOT EDIT."), b}, []byte("\n"))
err = os.WriteFile(outputPath, b, 0o644)
if err != nil {
return err
}
return nil
}
func prependCommentToFile(outputPath, comment string) error {
b, err := os.ReadFile(outputPath)
if err != nil {
return err
}
f, err := os.OpenFile(outputPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o644)
if err != nil {
return err
}
defer f.Close()
_, err = f.WriteString(comment)
if err != nil {
return err
}
_, err = f.Write(b)
return err
}
func addEmptyOverride(key, pkg string, overridesFile annotationFile) {
if overridesFile[pkg] == nil {
overridesFile[pkg] = map[string]annotation{}

View File

@ -4772,9 +4772,11 @@
"type": "object",
"properties": {
"ai21labs_api_key": {
"description": "The Databricks secret key reference for an AI21 Labs API key. If you prefer to paste your API key directly, see `ai21labs_api_key_plaintext`. You must provide an API key using one of the following fields: `ai21labs_api_key` or `ai21labs_api_key_plaintext`.",
"$ref": "#/$defs/string"
},
"ai21labs_api_key_plaintext": {
"description": "An AI21 Labs API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `ai21labs_api_key`. You must provide an API key using one of the following fields: `ai21labs_api_key` or `ai21labs_api_key_plaintext`.",
"$ref": "#/$defs/string"
}
},
@ -5287,15 +5289,19 @@
"type": "object",
"properties": {
"private_key": {
"description": "The Databricks secret key reference for a private key for the service account which has access to the Google Cloud Vertex AI Service. See [Best practices for managing service account keys](https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys). If you prefer to paste your API key directly, see `private_key_plaintext`. You must provide an API key using one of the following fields: `private_key` or `private_key_plaintext`",
"$ref": "#/$defs/string"
},
"private_key_plaintext": {
"description": "The private key for the service account which has access to the Google Cloud Vertex AI Service provided as a plaintext secret. See [Best practices for managing service account keys](https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys). If you prefer to reference your key using Databricks Secrets, see `private_key`. You must provide an API key using one of the following fields: `private_key` or `private_key_plaintext`.",
"$ref": "#/$defs/string"
},
"project_id": {
"description": "This is the Google Cloud project id that the service account is associated with.",
"$ref": "#/$defs/string"
},
"region": {
"description": "This is the region for the Google Cloud Vertex AI Service. See [supported regions](https://cloud.google.com/vertex-ai/docs/general/locations) for more details. Some models are only available in specific regions.",
"$ref": "#/$defs/string"
}
},
@ -5313,36 +5319,47 @@
"type": "object",
"properties": {
"microsoft_entra_client_id": {
"description": "This field is only required for Azure AD OpenAI and is the Microsoft Entra Client ID.\n",
"$ref": "#/$defs/string"
},
"microsoft_entra_client_secret": {
"description": "The Databricks secret key reference for a client secret used for Microsoft Entra ID authentication.\nIf you prefer to paste your client secret directly, see `microsoft_entra_client_secret_plaintext`.\nYou must provide an API key using one of the following fields: `microsoft_entra_client_secret` or `microsoft_entra_client_secret_plaintext`.\n",
"$ref": "#/$defs/string"
},
"microsoft_entra_client_secret_plaintext": {
"description": "The client secret used for Microsoft Entra ID authentication provided as a plaintext string.\nIf you prefer to reference your key using Databricks Secrets, see `microsoft_entra_client_secret`.\nYou must provide an API key using one of the following fields: `microsoft_entra_client_secret` or `microsoft_entra_client_secret_plaintext`.\n",
"$ref": "#/$defs/string"
},
"microsoft_entra_tenant_id": {
"description": "This field is only required for Azure AD OpenAI and is the Microsoft Entra Tenant ID.\n",
"$ref": "#/$defs/string"
},
"openai_api_base": {
"description": "This is a field to provide a customized base URl for the OpenAI API.\nFor Azure OpenAI, this field is required, and is the base URL for the Azure OpenAI API service\nprovided by Azure.\nFor other OpenAI API types, this field is optional, and if left unspecified, the standard OpenAI base URL is used.\n",
"$ref": "#/$defs/string"
},
"openai_api_key": {
"description": "The Databricks secret key reference for an OpenAI API key using the OpenAI or Azure service. If you prefer to paste your API key directly, see `openai_api_key_plaintext`. You must provide an API key using one of the following fields: `openai_api_key` or `openai_api_key_plaintext`.",
"$ref": "#/$defs/string"
},
"openai_api_key_plaintext": {
"description": "The OpenAI API key using the OpenAI or Azure service provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `openai_api_key`. You must provide an API key using one of the following fields: `openai_api_key` or `openai_api_key_plaintext`.",
"$ref": "#/$defs/string"
},
"openai_api_type": {
"description": "This is an optional field to specify the type of OpenAI API to use.\nFor Azure OpenAI, this field is required, and adjust this parameter to represent the preferred security\naccess validation protocol. For access token validation, use azure. For authentication using Azure Active\nDirectory (Azure AD) use, azuread.\n",
"$ref": "#/$defs/string"
},
"openai_api_version": {
"description": "This is an optional field to specify the OpenAI API version.\nFor Azure OpenAI, this field is required, and is the version of the Azure OpenAI service to\nutilize, specified by a date.\n",
"$ref": "#/$defs/string"
},
"openai_deployment_name": {
"description": "This field is only required for Azure OpenAI and is the name of the deployment resource for the\nAzure OpenAI service.\n",
"$ref": "#/$defs/string"
},
"openai_organization": {
"description": "This is an optional field to specify the organization in OpenAI or Azure OpenAI.\n",
"$ref": "#/$defs/string"
}
},
@ -5360,9 +5377,11 @@
"type": "object",
"properties": {
"palm_api_key": {
"description": "The Databricks secret key reference for a PaLM API key. If you prefer to paste your API key directly, see `palm_api_key_plaintext`. You must provide an API key using one of the following fields: `palm_api_key` or `palm_api_key_plaintext`.",
"$ref": "#/$defs/string"
},
"palm_api_key_plaintext": {
"description": "The PaLM API key provided as a plaintext string. If you prefer to reference your key using Databricks Secrets, see `palm_api_key`. You must provide an API key using one of the following fields: `palm_api_key` or `palm_api_key_plaintext`.",
"$ref": "#/$defs/string"
}
},