add custom prefixing behaviour for volumes

This commit is contained in:
Shreyas Goenka 2024-10-15 18:04:20 +02:00
parent d241c2b39c
commit 6192835d63
No known key found for this signature in database
GPG Key ID: 92A07DF49CCB0622
7 changed files with 176 additions and 5 deletions

View File

@ -11,6 +11,8 @@ import (
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
"github.com/databricks/cli/libs/dyn/dynvar"
"github.com/databricks/cli/libs/log"
"github.com/databricks/cli/libs/textutil"
"github.com/databricks/databricks-sdk-go/service/catalog"
"github.com/databricks/databricks-sdk-go/service/jobs"
@ -194,8 +196,25 @@ func (m *applyPresets) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnos
}
// Apply the prefix to volumes
for i := range r.Volumes {
r.Volumes[i].Name = normalizePrefix(prefix) + r.Volumes[i].Name
for _, v := range r.Volumes {
if containsUserIdentity(v.CatalogName, b.Config.Workspace.CurrentUser) {
log.Debugf(ctx, "Skipping prefix for volume %s because catalog %s contains the current user's identity", v.Name, v.CatalogName)
continue
}
if containsUserIdentity(v.SchemaName, b.Config.Workspace.CurrentUser) {
log.Debugf(ctx, "Skipping prefix for volume %s because schema %s contains the current user's identity", v.Name, v.SchemaName)
continue
}
// We only have to check for ${resources.schemas...} references because any
// other valid reference (like ${var.foo}) would have been interpolated by this point.
if p, ok := dynvar.PureReferenceToPath(v.SchemaName); ok && p.HasPrefix(dyn.Path{dyn.Key("resources"), dyn.Key("schemas")}) {
log.Debugf(ctx, "Skipping prefix for volume %s because schema name %s is defined in the bundle and will be interpolated at runtime", v.Name, v.SchemaName)
continue
}
v.Name = normalizePrefix(prefix) + v.Name
// HTTP API for volumes doesn't yet support tags. It's only supported in
// the Databricks UI and via the SQL API.
}

View File

@ -9,7 +9,9 @@ import (
"github.com/databricks/cli/bundle/config/mutator"
"github.com/databricks/cli/bundle/config/resources"
"github.com/databricks/databricks-sdk-go/service/catalog"
"github.com/databricks/databricks-sdk-go/service/iam"
"github.com/databricks/databricks-sdk-go/service/jobs"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
@ -125,6 +127,15 @@ func TestApplyPresetsPrefixForUcSchema(t *testing.T) {
}
}
func TestApplyPresentsReminderToAddSupportForSkippingPrefixes(t *testing.T) {
_, ok := config.SupportedResources()["catalogs"]
assert.False(t, ok,
`Since you are adding support for UC catalogs to DABs please ensure that
you add logic to skip applying presets.name_prefix for UC schemas, UC volumes and
any other resources that fall under a catalog in the UC hierarchy (like registered models).
Once you do so feel free to remove this test.`)
}
func TestApplyPresetsPrefixForUcVolumes(t *testing.T) {
tests := []struct {
name string
@ -138,6 +149,8 @@ func TestApplyPresetsPrefixForUcVolumes(t *testing.T) {
volume: &resources.Volume{
CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{
Name: "volume1",
CatalogName: "catalog1",
SchemaName: "schema1",
},
},
want: "prefix_volume1",
@ -152,6 +165,72 @@ func TestApplyPresetsPrefixForUcVolumes(t *testing.T) {
},
want: "volume1",
},
{
name: "skip prefix when catalog name contains user short name",
prefix: "[prefix]",
volume: &resources.Volume{
CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{
Name: "volume1",
CatalogName: "dev_john_wick_targets",
},
},
want: "volume1",
},
{
name: "skip prefix when catalog name contains user email",
prefix: "[prefix]",
volume: &resources.Volume{
CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{
Name: "volume1",
CatalogName: "dev_john.wick@continental.com_targets",
},
},
want: "volume1",
},
{
name: "skip prefix when schema name contains user short name",
prefix: "[prefix]",
volume: &resources.Volume{
CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{
Name: "volume1",
SchemaName: "dev_john_wick_weapons",
},
},
want: "volume1",
},
{
name: "skip prefix when schema name contains user email",
prefix: "[prefix]",
volume: &resources.Volume{
CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{
Name: "volume1",
SchemaName: "dev_john.wick@continental.com_targets",
},
},
want: "volume1",
},
{
name: "skip prefix when schema is defined in the bundle and will be interpolated at runtime",
prefix: "[prefix]",
volume: &resources.Volume{
CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{
Name: "volume1",
SchemaName: "${resources.schemas.schema1.name}",
},
},
want: "volume1",
},
{
name: "add prefix when schema is a reference without the resources.schemas prefix",
prefix: "[prefix]",
volume: &resources.Volume{
CreateVolumeRequestContent: &catalog.CreateVolumeRequestContent{
Name: "volume1",
SchemaName: "${foo.bar.baz}",
},
},
want: "prefix_volume1",
},
}
for _, tt := range tests {
@ -166,6 +245,14 @@ func TestApplyPresetsPrefixForUcVolumes(t *testing.T) {
Presets: config.Presets{
NamePrefix: tt.prefix,
},
Workspace: config.Workspace{
CurrentUser: &config.User{
ShortName: "john_wick",
User: &iam.User{
UserName: "john.wick@continental.com",
},
},
},
},
}

View File

@ -63,6 +63,10 @@ func transformDevelopmentMode(ctx context.Context, b *bundle.Bundle) {
}
}
func containsUserIdentity(s string, u *config.User) bool {
return strings.Contains(s, u.ShortName) || strings.Contains(s, u.UserName)
}
func validateDevelopmentMode(b *bundle.Bundle) diag.Diagnostics {
var diags diag.Diagnostics
p := b.Config.Presets
@ -92,7 +96,7 @@ func validateDevelopmentMode(b *bundle.Bundle) diag.Diagnostics {
diags = diags.Extend(diag.Errorf("%s must start with '~/' or contain the current username to ensure uniqueness when using 'mode: development'", path))
}
}
if p.NamePrefix != "" && !strings.Contains(p.NamePrefix, u.ShortName) && !strings.Contains(p.NamePrefix, u.UserName) {
if p.NamePrefix != "" && !containsUserIdentity(p.NamePrefix, u) {
// Resources such as pipelines require a unique name, e.g. '[dev steve] my_pipeline'.
// For this reason we require the name prefix to contain the current username;
// it's a pitfall for users if they don't include it and later find out that

View File

@ -87,7 +87,11 @@ func findVolumeInBundle(b *bundle.Bundle, catalogName, schemaName, volumeName st
// at runtime via the ${resources.schemas.<name>} syntax. Thus we match the volume
// definition if the schema name is the same as the one in the bundle, or if the
// schema name is interpolated.
if v.SchemaName != schemaName && !dynvar.IsPureVariableReference(v.SchemaName) {
// We only have to check for ${resources.schemas...} references because any
// other valid reference (like ${var.foo}) would have been interpolated by this point.
p, ok := dynvar.PureReferenceToPath(v.SchemaName)
isSchemaDefinedInBundle := ok && p.HasPrefix(dyn.Path{dyn.Key("resources"), dyn.Key("schemas")})
if v.SchemaName != schemaName && !isSchemaDefinedInBundle {
continue
}
pathString := fmt.Sprintf("resources.volumes.%s", k)

View File

@ -68,8 +68,14 @@ func TestFindVolumeInBundle(t *testing.T) {
_, _, ok = findVolumeInBundle(b, "doesnotexist", "my_schema", "my_volume")
assert.False(t, ok)
// schema name is interpolated but does not have the right prefix. In this case
// we should not match the volume.
b.Config.Resources.Volumes["foo"].SchemaName = "${foo.bar.baz}"
_, _, ok = findVolumeInBundle(b, "main", "my_schema", "my_volume")
assert.False(t, ok)
// schema name is interpolated.
b.Config.Resources.Volumes["foo"].SchemaName = "${resources.schemas.my_schema}"
b.Config.Resources.Volumes["foo"].SchemaName = "${resources.schemas.my_schema.name}"
path, locations, ok = findVolumeInBundle(b, "main", "valuedoesnotmatter", "my_volume")
assert.True(t, ok)
assert.Equal(t, []dyn.Location{{

View File

@ -71,3 +71,23 @@ func (v ref) references() []string {
func IsPureVariableReference(s string) bool {
return len(s) > 0 && re.FindString(s) == s
}
// If s is a pure variable reference, this function returns the corresponding
// dyn.Path. Otherwise, it returns false.
func PureReferenceToPath(s string) (dyn.Path, bool) {
ref, ok := newRef(dyn.V(s))
if !ok {
return nil, false
}
if !ref.isPure() {
return nil, false
}
p, err := dyn.NewPathFromString(ref.references()[0])
if err != nil {
return nil, false
}
return p, true
}

View File

@ -51,3 +51,34 @@ func TestIsPureVariableReference(t *testing.T) {
assert.False(t, IsPureVariableReference("prefix ${foo.bar}"))
assert.True(t, IsPureVariableReference("${foo.bar}"))
}
func TestPureReferenceToPath(t *testing.T) {
for _, tc := range []struct {
in string
out string
ok bool
}{
{"${foo.bar}", "foo.bar", true},
{"${foo.bar.baz}", "foo.bar.baz", true},
{"${foo.bar.baz[0]}", "foo.bar.baz[0]", true},
{"${foo.bar.baz[0][1]}", "foo.bar.baz[0][1]", true},
{"${foo.bar.baz[0][1].qux}", "foo.bar.baz[0][1].qux", true},
{"${foo.one}${foo.two}", "", false},
{"prefix ${foo.bar}", "", false},
{"${foo.bar} suffix", "", false},
{"${foo.bar", "", false},
{"foo.bar}", "", false},
{"foo.bar", "", false},
{"{foo.bar}", "", false},
{"", "", false},
} {
path, ok := PureReferenceToPath(tc.in)
if tc.ok {
assert.True(t, ok)
assert.Equal(t, dyn.MustPathFromString(tc.out), path)
} else {
assert.False(t, ok)
}
}
}