First look for databricks.yml before falling back to bundle.yml (#580)

## Changes
* Add support for using `databricks.yml` as config file. If
`databricks.yml` is not found then falling back to `bundle.yml` for
backwards compatibility.
* Add support for `.yaml` extension.
* Give an error when more than one config file is found

## Tests
* added unit test
* manual testing the different cases

---------

Co-authored-by: Pieter Noordhuis <pieter.noordhuis@databricks.com>
This commit is contained in:
Fabian Jakobs 2023-07-18 12:16:34 +02:00 committed by GitHub
parent a7a109a5d8
commit 8cfb1c133e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
29 changed files with 132 additions and 29 deletions

View File

@ -45,7 +45,11 @@ type Bundle struct {
func Load(path string) (*Bundle, error) {
bundle := &Bundle{}
err := bundle.Config.Load(filepath.Join(path, config.FileName))
configFile, err := config.FileNames.FindInPath(path)
if err != nil {
return nil, err
}
err = bundle.Config.Load(configFile)
if err != nil {
return nil, err
}

View File

@ -23,7 +23,7 @@ func TestLoadExists(t *testing.T) {
func TestBundleCacheDir(t *testing.T) {
projectDir := t.TempDir()
f1, err := os.Create(filepath.Join(projectDir, "bundle.yml"))
f1, err := os.Create(filepath.Join(projectDir, "databricks.yml"))
require.NoError(t, err)
f1.Close()
@ -47,7 +47,7 @@ func TestBundleCacheDir(t *testing.T) {
func TestBundleCacheDirOverride(t *testing.T) {
projectDir := t.TempDir()
bundleTmpDir := t.TempDir()
f1, err := os.Create(filepath.Join(projectDir, "bundle.yml"))
f1, err := os.Create(filepath.Join(projectDir, "databricks.yml"))
require.NoError(t, err)
f1.Close()

View File

@ -27,8 +27,10 @@ func (m *processRootIncludes) Apply(ctx context.Context, b *bundle.Bundle) error
var out []bundle.Mutator
// Map with files we've already seen to avoid loading them twice.
var seen = map[string]bool{
config.FileName: true,
var seen = map[string]bool{}
for _, file := range config.FileNames {
seen[file] = true
}
// Maintain list of files in order of files being loaded.

View File

@ -61,7 +61,7 @@ func TestProcessRootIncludesSingleGlob(t *testing.T) {
},
}
touch(t, bundle.Config.Path, "bundle.yml")
touch(t, bundle.Config.Path, "databricks.yml")
touch(t, bundle.Config.Path, "a.yml")
touch(t, bundle.Config.Path, "b.yml")

View File

@ -11,12 +11,39 @@ import (
"github.com/imdario/mergo"
)
// FileName is the name of bundle configuration file.
const FileName = "bundle.yml"
type ConfigFileNames []string
// FileNames contains allowed names of bundle configuration files.
var FileNames = ConfigFileNames{"databricks.yml", "databricks.yaml", "bundle.yml", "bundle.yaml"}
func (c ConfigFileNames) FindInPath(path string) (string, error) {
result := ""
var firstErr error
for _, file := range c {
filePath := filepath.Join(path, file)
_, err := os.Stat(filePath)
if err == nil {
if result != "" {
return "", fmt.Errorf("multiple bundle root configuration files found in %s", path)
}
result = filePath
} else {
if firstErr == nil {
firstErr = err
}
}
}
if result == "" {
return "", firstErr
}
return result, nil
}
type Root struct {
// Path contains the directory path to the root of the bundle.
// It is set when loading `bundle.yml`.
// It is set when loading `databricks.yml`.
Path string `json:"-" bundle:"readonly"`
// Contains user defined variables
@ -27,7 +54,7 @@ type Root struct {
Bundle Bundle `json:"bundle"`
// Include specifies a list of patterns of file names to load and
// merge into the this configuration. If not set in `bundle.yml`,
// merge into the this configuration. If not set in `databricks.yml`,
// it defaults to loading `*.yml` and `*/*.yml`.
//
// Also see [mutator.DefineDefaultInclude].
@ -62,7 +89,10 @@ func Load(path string) (*Root, error) {
// If we were given a directory, assume this is the bundle root.
if stat.IsDir() {
path = filepath.Join(path, FileName)
path, err = FileNames.FindInPath(path)
if err != nil {
return nil, err
}
}
if err := r.Load(path); err != nil {

View File

@ -2,7 +2,11 @@ package config
import (
"encoding/json"
"os"
"path/filepath"
"reflect"
"runtime"
"strings"
"testing"
"github.com/databricks/cli/bundle/config/variable"
@ -26,7 +30,7 @@ func TestRootMarshalUnmarshal(t *testing.T) {
func TestRootLoad(t *testing.T) {
root := &Root{}
err := root.Load("../tests/basic/bundle.yml")
err := root.Load("../tests/basic/databricks.yml")
require.NoError(t, err)
assert.Equal(t, "basic", root.Bundle.Name)
}
@ -78,13 +82,13 @@ func TestRootMergeMap(t *testing.T) {
func TestDuplicateIdOnLoadReturnsError(t *testing.T) {
root := &Root{}
err := root.Load("./testdata/duplicate_resource_names_in_root/bundle.yml")
assert.ErrorContains(t, err, "multiple resources named foo (job at ./testdata/duplicate_resource_names_in_root/bundle.yml, pipeline at ./testdata/duplicate_resource_names_in_root/bundle.yml)")
err := root.Load("./testdata/duplicate_resource_names_in_root/databricks.yml")
assert.ErrorContains(t, err, "multiple resources named foo (job at ./testdata/duplicate_resource_names_in_root/databricks.yml, pipeline at ./testdata/duplicate_resource_names_in_root/databricks.yml)")
}
func TestDuplicateIdOnMergeReturnsError(t *testing.T) {
root := &Root{}
err := root.Load("./testdata/duplicate_resource_name_in_subconfiguration/bundle.yml")
err := root.Load("./testdata/duplicate_resource_name_in_subconfiguration/databricks.yml")
require.NoError(t, err)
other := &Root{}
@ -92,7 +96,7 @@ func TestDuplicateIdOnMergeReturnsError(t *testing.T) {
require.NoError(t, err)
err = root.Merge(other)
assert.ErrorContains(t, err, "multiple resources named foo (job at ./testdata/duplicate_resource_name_in_subconfiguration/bundle.yml, pipeline at ./testdata/duplicate_resource_name_in_subconfiguration/resources.yml)")
assert.ErrorContains(t, err, "multiple resources named foo (job at ./testdata/duplicate_resource_name_in_subconfiguration/databricks.yml, pipeline at ./testdata/duplicate_resource_name_in_subconfiguration/resources.yml)")
}
func TestInitializeVariables(t *testing.T) {
@ -163,3 +167,62 @@ func TestRootMergeEnvironmentWithMode(t *testing.T) {
require.NoError(t, root.MergeEnvironment(env))
assert.Equal(t, Development, root.Bundle.Mode)
}
func TestConfigFileNames_FindInPath(t *testing.T) {
testCases := []struct {
name string
files []string
expected string
err string
}{
{
name: "file found",
files: []string{"databricks.yml"},
expected: "BASE/databricks.yml",
err: "",
},
{
name: "file found",
files: []string{"bundle.yml"},
expected: "BASE/bundle.yml",
err: "",
},
{
name: "multiple files found",
files: []string{"databricks.yaml", "bundle.yml"},
expected: "",
err: "multiple bundle root configuration files found",
},
{
name: "file not found",
files: []string{},
expected: "",
err: "no such file or directory",
},
}
if runtime.GOOS == "windows" {
testCases[3].err = "The system cannot find the file specified."
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
projectDir := t.TempDir()
for _, file := range tc.files {
f1, _ := os.Create(filepath.Join(projectDir, file))
f1.Close()
}
result, err := FileNames.FindInPath(projectDir)
expected := strings.Replace(tc.expected, "BASE/", projectDir+string(os.PathSeparator), 1)
assert.Equal(t, expected, result)
if tc.err != "" {
assert.ErrorContains(t, err, tc.err)
} else {
assert.NoError(t, err)
}
})
}
}

View File

@ -36,11 +36,15 @@ func getRootWithTraversal() (string, error) {
if err != nil {
return "", err
}
path, err := folders.FindDirWithLeaf(wd, config.FileName)
if err != nil {
return "", fmt.Errorf(`unable to locate bundle root: %s not found`, config.FileName)
}
for _, file := range config.FileNames {
path, err := folders.FindDirWithLeaf(wd, file)
if err == nil {
return path, nil
}
}
return "", fmt.Errorf(`unable to locate bundle root: %s not found`, config.FileNames[0])
}
// mustGetRoot returns a bundle root or an error if one cannot be found.

View File

@ -76,8 +76,8 @@ func TestRootLookup(t *testing.T) {
chdir(t, t.TempDir())
// Create bundle.yml file.
f, err := os.Create(config.FileName)
// Create databricks.yml file.
f, err := os.Create(config.FileNames[0])
require.NoError(t, err)
defer f.Close()

View File

@ -14,7 +14,7 @@ import (
func TestConflictingResourceIdsNoSubconfig(t *testing.T) {
_, err := bundle.Load("./conflicting_resource_ids/no_subconfigurations")
bundleConfigPath := filepath.FromSlash("conflicting_resource_ids/no_subconfigurations/bundle.yml")
bundleConfigPath := filepath.FromSlash("conflicting_resource_ids/no_subconfigurations/databricks.yml")
assert.ErrorContains(t, err, fmt.Sprintf("multiple resources named foo (job at %s, pipeline at %s)", bundleConfigPath, bundleConfigPath))
}
@ -22,7 +22,7 @@ func TestConflictingResourceIdsOneSubconfig(t *testing.T) {
b, err := bundle.Load("./conflicting_resource_ids/one_subconfiguration")
require.NoError(t, err)
err = bundle.Apply(context.Background(), b, bundle.Seq(mutator.DefaultMutators()...))
bundleConfigPath := filepath.FromSlash("conflicting_resource_ids/one_subconfiguration/bundle.yml")
bundleConfigPath := filepath.FromSlash("conflicting_resource_ids/one_subconfiguration/databricks.yml")
resourcesConfigPath := filepath.FromSlash("conflicting_resource_ids/one_subconfiguration/resources.yml")
assert.ErrorContains(t, err, fmt.Sprintf("multiple resources named foo (job at %s, pipeline at %s)", bundleConfigPath, resourcesConfigPath))
}

View File

@ -15,7 +15,7 @@ func TestJobAndPipelineDevelopment(t *testing.T) {
assert.Len(t, b.Config.Resources.Pipelines, 1)
p := b.Config.Resources.Pipelines["nyc_taxi_pipeline"]
assert.Equal(t, "job_and_pipeline/bundle.yml", filepath.ToSlash(p.ConfigFilePath))
assert.Equal(t, "job_and_pipeline/databricks.yml", filepath.ToSlash(p.ConfigFilePath))
assert.Equal(t, b.Config.Bundle.Mode, config.Development)
assert.True(t, p.Development)
require.Len(t, p.Libraries, 1)
@ -29,7 +29,7 @@ func TestJobAndPipelineStaging(t *testing.T) {
assert.Len(t, b.Config.Resources.Pipelines, 1)
p := b.Config.Resources.Pipelines["nyc_taxi_pipeline"]
assert.Equal(t, "job_and_pipeline/bundle.yml", filepath.ToSlash(p.ConfigFilePath))
assert.Equal(t, "job_and_pipeline/databricks.yml", filepath.ToSlash(p.ConfigFilePath))
assert.False(t, p.Development)
require.Len(t, p.Libraries, 1)
assert.Equal(t, "./dlt/nyc_taxi_loader", p.Libraries[0].Notebook.Path)
@ -42,14 +42,14 @@ func TestJobAndPipelineProduction(t *testing.T) {
assert.Len(t, b.Config.Resources.Pipelines, 1)
p := b.Config.Resources.Pipelines["nyc_taxi_pipeline"]
assert.Equal(t, "job_and_pipeline/bundle.yml", filepath.ToSlash(p.ConfigFilePath))
assert.Equal(t, "job_and_pipeline/databricks.yml", filepath.ToSlash(p.ConfigFilePath))
assert.False(t, p.Development)
require.Len(t, p.Libraries, 1)
assert.Equal(t, "./dlt/nyc_taxi_loader", p.Libraries[0].Notebook.Path)
assert.Equal(t, "nyc_taxi_production", p.Target)
j := b.Config.Resources.Jobs["pipeline_schedule"]
assert.Equal(t, "job_and_pipeline/bundle.yml", filepath.ToSlash(j.ConfigFilePath))
assert.Equal(t, "job_and_pipeline/databricks.yml", filepath.ToSlash(j.ConfigFilePath))
assert.Equal(t, "Daily refresh of production pipeline", j.Name)
require.Len(t, j.Tasks, 1)
assert.NotEmpty(t, j.Tasks[0].PipelineTask.PipelineId)

View File

@ -72,7 +72,7 @@ var syncCmd = &cobra.Command{
//
// To be uncommented and used once our VS Code extension is bundle aware.
// Until then, this could interfere with extension usage where a `bundle.yml` file is present.
// Until then, this could interfere with extension usage where a `databricks.yml` file is present.
// See https://github.com/databricks/cli/pull/207.
//
// b := bundle.GetOrNil(cmd.Context())