First look for databricks.yml before falling back to bundle.yml (#580)

## Changes
* Add support for using `databricks.yml` as config file. If
`databricks.yml` is not found then falling back to `bundle.yml` for
backwards compatibility.
* Add support for `.yaml` extension.
* Give an error when more than one config file is found

## Tests
* added unit test
* manual testing the different cases

---------

Co-authored-by: Pieter Noordhuis <pieter.noordhuis@databricks.com>
This commit is contained in:
Fabian Jakobs 2023-07-18 12:16:34 +02:00 committed by GitHub
parent a7a109a5d8
commit 8cfb1c133e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
29 changed files with 132 additions and 29 deletions

View File

@ -45,7 +45,11 @@ type Bundle struct {
func Load(path string) (*Bundle, error) { func Load(path string) (*Bundle, error) {
bundle := &Bundle{} bundle := &Bundle{}
err := bundle.Config.Load(filepath.Join(path, config.FileName)) configFile, err := config.FileNames.FindInPath(path)
if err != nil {
return nil, err
}
err = bundle.Config.Load(configFile)
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@ -23,7 +23,7 @@ func TestLoadExists(t *testing.T) {
func TestBundleCacheDir(t *testing.T) { func TestBundleCacheDir(t *testing.T) {
projectDir := t.TempDir() projectDir := t.TempDir()
f1, err := os.Create(filepath.Join(projectDir, "bundle.yml")) f1, err := os.Create(filepath.Join(projectDir, "databricks.yml"))
require.NoError(t, err) require.NoError(t, err)
f1.Close() f1.Close()
@ -47,7 +47,7 @@ func TestBundleCacheDir(t *testing.T) {
func TestBundleCacheDirOverride(t *testing.T) { func TestBundleCacheDirOverride(t *testing.T) {
projectDir := t.TempDir() projectDir := t.TempDir()
bundleTmpDir := t.TempDir() bundleTmpDir := t.TempDir()
f1, err := os.Create(filepath.Join(projectDir, "bundle.yml")) f1, err := os.Create(filepath.Join(projectDir, "databricks.yml"))
require.NoError(t, err) require.NoError(t, err)
f1.Close() f1.Close()

View File

@ -27,8 +27,10 @@ func (m *processRootIncludes) Apply(ctx context.Context, b *bundle.Bundle) error
var out []bundle.Mutator var out []bundle.Mutator
// Map with files we've already seen to avoid loading them twice. // Map with files we've already seen to avoid loading them twice.
var seen = map[string]bool{ var seen = map[string]bool{}
config.FileName: true,
for _, file := range config.FileNames {
seen[file] = true
} }
// Maintain list of files in order of files being loaded. // Maintain list of files in order of files being loaded.

View File

@ -61,7 +61,7 @@ func TestProcessRootIncludesSingleGlob(t *testing.T) {
}, },
} }
touch(t, bundle.Config.Path, "bundle.yml") touch(t, bundle.Config.Path, "databricks.yml")
touch(t, bundle.Config.Path, "a.yml") touch(t, bundle.Config.Path, "a.yml")
touch(t, bundle.Config.Path, "b.yml") touch(t, bundle.Config.Path, "b.yml")

View File

@ -11,12 +11,39 @@ import (
"github.com/imdario/mergo" "github.com/imdario/mergo"
) )
// FileName is the name of bundle configuration file. type ConfigFileNames []string
const FileName = "bundle.yml"
// FileNames contains allowed names of bundle configuration files.
var FileNames = ConfigFileNames{"databricks.yml", "databricks.yaml", "bundle.yml", "bundle.yaml"}
func (c ConfigFileNames) FindInPath(path string) (string, error) {
result := ""
var firstErr error
for _, file := range c {
filePath := filepath.Join(path, file)
_, err := os.Stat(filePath)
if err == nil {
if result != "" {
return "", fmt.Errorf("multiple bundle root configuration files found in %s", path)
}
result = filePath
} else {
if firstErr == nil {
firstErr = err
}
}
}
if result == "" {
return "", firstErr
}
return result, nil
}
type Root struct { type Root struct {
// Path contains the directory path to the root of the bundle. // Path contains the directory path to the root of the bundle.
// It is set when loading `bundle.yml`. // It is set when loading `databricks.yml`.
Path string `json:"-" bundle:"readonly"` Path string `json:"-" bundle:"readonly"`
// Contains user defined variables // Contains user defined variables
@ -27,7 +54,7 @@ type Root struct {
Bundle Bundle `json:"bundle"` Bundle Bundle `json:"bundle"`
// Include specifies a list of patterns of file names to load and // Include specifies a list of patterns of file names to load and
// merge into the this configuration. If not set in `bundle.yml`, // merge into the this configuration. If not set in `databricks.yml`,
// it defaults to loading `*.yml` and `*/*.yml`. // it defaults to loading `*.yml` and `*/*.yml`.
// //
// Also see [mutator.DefineDefaultInclude]. // Also see [mutator.DefineDefaultInclude].
@ -62,7 +89,10 @@ func Load(path string) (*Root, error) {
// If we were given a directory, assume this is the bundle root. // If we were given a directory, assume this is the bundle root.
if stat.IsDir() { if stat.IsDir() {
path = filepath.Join(path, FileName) path, err = FileNames.FindInPath(path)
if err != nil {
return nil, err
}
} }
if err := r.Load(path); err != nil { if err := r.Load(path); err != nil {

View File

@ -2,7 +2,11 @@ package config
import ( import (
"encoding/json" "encoding/json"
"os"
"path/filepath"
"reflect" "reflect"
"runtime"
"strings"
"testing" "testing"
"github.com/databricks/cli/bundle/config/variable" "github.com/databricks/cli/bundle/config/variable"
@ -26,7 +30,7 @@ func TestRootMarshalUnmarshal(t *testing.T) {
func TestRootLoad(t *testing.T) { func TestRootLoad(t *testing.T) {
root := &Root{} root := &Root{}
err := root.Load("../tests/basic/bundle.yml") err := root.Load("../tests/basic/databricks.yml")
require.NoError(t, err) require.NoError(t, err)
assert.Equal(t, "basic", root.Bundle.Name) assert.Equal(t, "basic", root.Bundle.Name)
} }
@ -78,13 +82,13 @@ func TestRootMergeMap(t *testing.T) {
func TestDuplicateIdOnLoadReturnsError(t *testing.T) { func TestDuplicateIdOnLoadReturnsError(t *testing.T) {
root := &Root{} root := &Root{}
err := root.Load("./testdata/duplicate_resource_names_in_root/bundle.yml") err := root.Load("./testdata/duplicate_resource_names_in_root/databricks.yml")
assert.ErrorContains(t, err, "multiple resources named foo (job at ./testdata/duplicate_resource_names_in_root/bundle.yml, pipeline at ./testdata/duplicate_resource_names_in_root/bundle.yml)") assert.ErrorContains(t, err, "multiple resources named foo (job at ./testdata/duplicate_resource_names_in_root/databricks.yml, pipeline at ./testdata/duplicate_resource_names_in_root/databricks.yml)")
} }
func TestDuplicateIdOnMergeReturnsError(t *testing.T) { func TestDuplicateIdOnMergeReturnsError(t *testing.T) {
root := &Root{} root := &Root{}
err := root.Load("./testdata/duplicate_resource_name_in_subconfiguration/bundle.yml") err := root.Load("./testdata/duplicate_resource_name_in_subconfiguration/databricks.yml")
require.NoError(t, err) require.NoError(t, err)
other := &Root{} other := &Root{}
@ -92,7 +96,7 @@ func TestDuplicateIdOnMergeReturnsError(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
err = root.Merge(other) err = root.Merge(other)
assert.ErrorContains(t, err, "multiple resources named foo (job at ./testdata/duplicate_resource_name_in_subconfiguration/bundle.yml, pipeline at ./testdata/duplicate_resource_name_in_subconfiguration/resources.yml)") assert.ErrorContains(t, err, "multiple resources named foo (job at ./testdata/duplicate_resource_name_in_subconfiguration/databricks.yml, pipeline at ./testdata/duplicate_resource_name_in_subconfiguration/resources.yml)")
} }
func TestInitializeVariables(t *testing.T) { func TestInitializeVariables(t *testing.T) {
@ -163,3 +167,62 @@ func TestRootMergeEnvironmentWithMode(t *testing.T) {
require.NoError(t, root.MergeEnvironment(env)) require.NoError(t, root.MergeEnvironment(env))
assert.Equal(t, Development, root.Bundle.Mode) assert.Equal(t, Development, root.Bundle.Mode)
} }
func TestConfigFileNames_FindInPath(t *testing.T) {
testCases := []struct {
name string
files []string
expected string
err string
}{
{
name: "file found",
files: []string{"databricks.yml"},
expected: "BASE/databricks.yml",
err: "",
},
{
name: "file found",
files: []string{"bundle.yml"},
expected: "BASE/bundle.yml",
err: "",
},
{
name: "multiple files found",
files: []string{"databricks.yaml", "bundle.yml"},
expected: "",
err: "multiple bundle root configuration files found",
},
{
name: "file not found",
files: []string{},
expected: "",
err: "no such file or directory",
},
}
if runtime.GOOS == "windows" {
testCases[3].err = "The system cannot find the file specified."
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
projectDir := t.TempDir()
for _, file := range tc.files {
f1, _ := os.Create(filepath.Join(projectDir, file))
f1.Close()
}
result, err := FileNames.FindInPath(projectDir)
expected := strings.Replace(tc.expected, "BASE/", projectDir+string(os.PathSeparator), 1)
assert.Equal(t, expected, result)
if tc.err != "" {
assert.ErrorContains(t, err, tc.err)
} else {
assert.NoError(t, err)
}
})
}
}

View File

@ -36,12 +36,16 @@ func getRootWithTraversal() (string, error) {
if err != nil { if err != nil {
return "", err return "", err
} }
path, err := folders.FindDirWithLeaf(wd, config.FileName)
if err != nil { for _, file := range config.FileNames {
return "", fmt.Errorf(`unable to locate bundle root: %s not found`, config.FileName) path, err := folders.FindDirWithLeaf(wd, file)
} if err == nil {
return path, nil return path, nil
} }
}
return "", fmt.Errorf(`unable to locate bundle root: %s not found`, config.FileNames[0])
}
// mustGetRoot returns a bundle root or an error if one cannot be found. // mustGetRoot returns a bundle root or an error if one cannot be found.
func mustGetRoot() (string, error) { func mustGetRoot() (string, error) {

View File

@ -76,8 +76,8 @@ func TestRootLookup(t *testing.T) {
chdir(t, t.TempDir()) chdir(t, t.TempDir())
// Create bundle.yml file. // Create databricks.yml file.
f, err := os.Create(config.FileName) f, err := os.Create(config.FileNames[0])
require.NoError(t, err) require.NoError(t, err)
defer f.Close() defer f.Close()

View File

@ -14,7 +14,7 @@ import (
func TestConflictingResourceIdsNoSubconfig(t *testing.T) { func TestConflictingResourceIdsNoSubconfig(t *testing.T) {
_, err := bundle.Load("./conflicting_resource_ids/no_subconfigurations") _, err := bundle.Load("./conflicting_resource_ids/no_subconfigurations")
bundleConfigPath := filepath.FromSlash("conflicting_resource_ids/no_subconfigurations/bundle.yml") bundleConfigPath := filepath.FromSlash("conflicting_resource_ids/no_subconfigurations/databricks.yml")
assert.ErrorContains(t, err, fmt.Sprintf("multiple resources named foo (job at %s, pipeline at %s)", bundleConfigPath, bundleConfigPath)) assert.ErrorContains(t, err, fmt.Sprintf("multiple resources named foo (job at %s, pipeline at %s)", bundleConfigPath, bundleConfigPath))
} }
@ -22,7 +22,7 @@ func TestConflictingResourceIdsOneSubconfig(t *testing.T) {
b, err := bundle.Load("./conflicting_resource_ids/one_subconfiguration") b, err := bundle.Load("./conflicting_resource_ids/one_subconfiguration")
require.NoError(t, err) require.NoError(t, err)
err = bundle.Apply(context.Background(), b, bundle.Seq(mutator.DefaultMutators()...)) err = bundle.Apply(context.Background(), b, bundle.Seq(mutator.DefaultMutators()...))
bundleConfigPath := filepath.FromSlash("conflicting_resource_ids/one_subconfiguration/bundle.yml") bundleConfigPath := filepath.FromSlash("conflicting_resource_ids/one_subconfiguration/databricks.yml")
resourcesConfigPath := filepath.FromSlash("conflicting_resource_ids/one_subconfiguration/resources.yml") resourcesConfigPath := filepath.FromSlash("conflicting_resource_ids/one_subconfiguration/resources.yml")
assert.ErrorContains(t, err, fmt.Sprintf("multiple resources named foo (job at %s, pipeline at %s)", bundleConfigPath, resourcesConfigPath)) assert.ErrorContains(t, err, fmt.Sprintf("multiple resources named foo (job at %s, pipeline at %s)", bundleConfigPath, resourcesConfigPath))
} }

View File

@ -15,7 +15,7 @@ func TestJobAndPipelineDevelopment(t *testing.T) {
assert.Len(t, b.Config.Resources.Pipelines, 1) assert.Len(t, b.Config.Resources.Pipelines, 1)
p := b.Config.Resources.Pipelines["nyc_taxi_pipeline"] p := b.Config.Resources.Pipelines["nyc_taxi_pipeline"]
assert.Equal(t, "job_and_pipeline/bundle.yml", filepath.ToSlash(p.ConfigFilePath)) assert.Equal(t, "job_and_pipeline/databricks.yml", filepath.ToSlash(p.ConfigFilePath))
assert.Equal(t, b.Config.Bundle.Mode, config.Development) assert.Equal(t, b.Config.Bundle.Mode, config.Development)
assert.True(t, p.Development) assert.True(t, p.Development)
require.Len(t, p.Libraries, 1) require.Len(t, p.Libraries, 1)
@ -29,7 +29,7 @@ func TestJobAndPipelineStaging(t *testing.T) {
assert.Len(t, b.Config.Resources.Pipelines, 1) assert.Len(t, b.Config.Resources.Pipelines, 1)
p := b.Config.Resources.Pipelines["nyc_taxi_pipeline"] p := b.Config.Resources.Pipelines["nyc_taxi_pipeline"]
assert.Equal(t, "job_and_pipeline/bundle.yml", filepath.ToSlash(p.ConfigFilePath)) assert.Equal(t, "job_and_pipeline/databricks.yml", filepath.ToSlash(p.ConfigFilePath))
assert.False(t, p.Development) assert.False(t, p.Development)
require.Len(t, p.Libraries, 1) require.Len(t, p.Libraries, 1)
assert.Equal(t, "./dlt/nyc_taxi_loader", p.Libraries[0].Notebook.Path) assert.Equal(t, "./dlt/nyc_taxi_loader", p.Libraries[0].Notebook.Path)
@ -42,14 +42,14 @@ func TestJobAndPipelineProduction(t *testing.T) {
assert.Len(t, b.Config.Resources.Pipelines, 1) assert.Len(t, b.Config.Resources.Pipelines, 1)
p := b.Config.Resources.Pipelines["nyc_taxi_pipeline"] p := b.Config.Resources.Pipelines["nyc_taxi_pipeline"]
assert.Equal(t, "job_and_pipeline/bundle.yml", filepath.ToSlash(p.ConfigFilePath)) assert.Equal(t, "job_and_pipeline/databricks.yml", filepath.ToSlash(p.ConfigFilePath))
assert.False(t, p.Development) assert.False(t, p.Development)
require.Len(t, p.Libraries, 1) require.Len(t, p.Libraries, 1)
assert.Equal(t, "./dlt/nyc_taxi_loader", p.Libraries[0].Notebook.Path) assert.Equal(t, "./dlt/nyc_taxi_loader", p.Libraries[0].Notebook.Path)
assert.Equal(t, "nyc_taxi_production", p.Target) assert.Equal(t, "nyc_taxi_production", p.Target)
j := b.Config.Resources.Jobs["pipeline_schedule"] j := b.Config.Resources.Jobs["pipeline_schedule"]
assert.Equal(t, "job_and_pipeline/bundle.yml", filepath.ToSlash(j.ConfigFilePath)) assert.Equal(t, "job_and_pipeline/databricks.yml", filepath.ToSlash(j.ConfigFilePath))
assert.Equal(t, "Daily refresh of production pipeline", j.Name) assert.Equal(t, "Daily refresh of production pipeline", j.Name)
require.Len(t, j.Tasks, 1) require.Len(t, j.Tasks, 1)
assert.NotEmpty(t, j.Tasks[0].PipelineTask.PipelineId) assert.NotEmpty(t, j.Tasks[0].PipelineTask.PipelineId)

View File

@ -72,7 +72,7 @@ var syncCmd = &cobra.Command{
// //
// To be uncommented and used once our VS Code extension is bundle aware. // To be uncommented and used once our VS Code extension is bundle aware.
// Until then, this could interfere with extension usage where a `bundle.yml` file is present. // Until then, this could interfere with extension usage where a `databricks.yml` file is present.
// See https://github.com/databricks/cli/pull/207. // See https://github.com/databricks/cli/pull/207.
// //
// b := bundle.GetOrNil(cmd.Context()) // b := bundle.GetOrNil(cmd.Context())