databricks-cli/libs/template/writer_test.go

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

115 lines
3.1 KiB
Go
Raw Permalink Normal View History

Refactor `bundle init` (#2074) ## Summary of changes This PR introduces three new abstractions: 1. `Resolver`: Resolves which reader and writer to use for a template. 2. `Writer`: Writes a template project to disk. Prompts the user if necessary. 3. `Reader`: Reads a template specification from disk, built into the CLI or from GitHub. Introducing these abstractions helps decouple reading a template from writing it. When I tried adding telemetry for the `bundle init` command, I noticed that the code in `cmd/init.go` was getting convoluted and hard to test. A future change could have accidentally logged PII when a user initialised a custom template. Hedging against that risk is important here because we use a generic untyped `map<string, string>` representation in the backend to log telemetry for the `databricks bundle init`. Otherwise, we risk accidentally breaking our compliance with our centralization requirements. ### Details After this PR there are two classes of templates that can be initialized: 1. A `databricks` template: This could be a builtin template or a template outside the CLI like mlops-stacks, which is still owned and managed by Databricks. These templates log their telemetry arguments and template name. 2. A `custom` template: These are templates created by and managed by the end user. In these templates we do not log the template name and args. Instead a generic placeholder string of "custom" is logged in our telemetry system. NOTE: The functionality of the `databricks bundle init` command remains the same after this PR. Only the internal abstractions used are changed. ## Tests New unit tests. Existing golden and unit tests. Also a fair bit of manual testing.
2025-01-20 12:09:28 +00:00
package template
import (
"context"
"runtime"
"testing"
"github.com/databricks/cli/cmd/root"
"github.com/databricks/cli/libs/dbr"
"github.com/databricks/cli/libs/filer"
2025-01-20 17:15:29 +00:00
"github.com/databricks/cli/libs/jsonschema"
"github.com/databricks/cli/libs/telemetry"
"github.com/databricks/cli/libs/telemetry/protos"
Refactor `bundle init` (#2074) ## Summary of changes This PR introduces three new abstractions: 1. `Resolver`: Resolves which reader and writer to use for a template. 2. `Writer`: Writes a template project to disk. Prompts the user if necessary. 3. `Reader`: Reads a template specification from disk, built into the CLI or from GitHub. Introducing these abstractions helps decouple reading a template from writing it. When I tried adding telemetry for the `bundle init` command, I noticed that the code in `cmd/init.go` was getting convoluted and hard to test. A future change could have accidentally logged PII when a user initialised a custom template. Hedging against that risk is important here because we use a generic untyped `map<string, string>` representation in the backend to log telemetry for the `databricks bundle init`. Otherwise, we risk accidentally breaking our compliance with our centralization requirements. ### Details After this PR there are two classes of templates that can be initialized: 1. A `databricks` template: This could be a builtin template or a template outside the CLI like mlops-stacks, which is still owned and managed by Databricks. These templates log their telemetry arguments and template name. 2. A `custom` template: These are templates created by and managed by the end user. In these templates we do not log the template name and args. Instead a generic placeholder string of "custom" is logged in our telemetry system. NOTE: The functionality of the `databricks bundle init` command remains the same after this PR. Only the internal abstractions used are changed. ## Tests New unit tests. Existing golden and unit tests. Also a fair bit of manual testing.
2025-01-20 12:09:28 +00:00
"github.com/databricks/databricks-sdk-go"
workspaceConfig "github.com/databricks/databricks-sdk-go/config"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestDefaultWriterConfigure(t *testing.T) {
// Test on local file system.
w := &defaultWriter{}
err := w.Configure(context.Background(), "/foo/bar", "/out/abc")
assert.NoError(t, err)
assert.Equal(t, "/foo/bar", w.configPath)
assert.IsType(t, &filer.LocalClient{}, w.outputFiler)
}
func TestDefaultWriterConfigureOnDBR(t *testing.T) {
// This test is not valid on windows because a DBR image is always based on
// Linux.
if runtime.GOOS == "windows" {
t.Skip("Skipping test on Windows")
}
ctx := dbr.MockRuntime(context.Background(), true)
ctx = root.SetWorkspaceClient(ctx, &databricks.WorkspaceClient{
Config: &workspaceConfig.Config{Host: "https://myhost.com"},
})
w := &defaultWriter{}
err := w.Configure(ctx, "/foo/bar", "/Workspace/out/abc")
assert.NoError(t, err)
assert.Equal(t, "/foo/bar", w.configPath)
assert.IsType(t, &filer.WorkspaceFilesExtensionsClient{}, w.outputFiler)
}
func TestMaterializeForNonTemplateDirectory(t *testing.T) {
tmpDir1 := t.TempDir()
tmpDir2 := t.TempDir()
ctx := context.Background()
w := &defaultWriter{}
err := w.Configure(ctx, "/foo/bar", tmpDir1)
require.NoError(t, err)
// Try to materialize a non-template directory.
err = w.Materialize(ctx, &localReader{path: tmpDir2})
assert.EqualError(t, err, "not a bundle template: expected to find a template schema file at databricks_template_schema.json")
}
2025-01-20 17:15:29 +00:00
func TestDefaultWriterLogTelemetry(t *testing.T) {
ctx := telemetry.WithMockLogger(context.Background())
w := &defaultWriter{templateName: Custom}
w.LogTelemetry(ctx)
logs := telemetry.Introspect(ctx)
assert.Len(t, logs, 1)
assert.Equal(t, &protos.BundleInitEvent{
TemplateName: string(Custom),
Uuid: bundleUuid,
}, logs[0].BundleInitEvent)
}
func TestWriterWithFullTelemetry(t *testing.T) {
ctx := telemetry.WithMockLogger(context.Background())
w := &writerWithFullTelemetry{
defaultWriter: defaultWriter{
templateName: DefaultPython,
config: &config{
values: map[string]any{
"foo": "v1",
"bar": "v2",
},
schema: &jsonschema.Schema{
Properties: map[string]*jsonschema.Schema{
"foo": {
Type: jsonschema.StringType,
Enum: []any{"v1", "v2"},
},
"bar": {
Type: jsonschema.StringType,
},
},
},
},
},
}
w.LogTelemetry(ctx)
logs := telemetry.Introspect(ctx)
assert.Len(t, logs, 1)
assert.Equal(t, &protos.BundleInitEvent{
TemplateName: string(DefaultPython),
TemplateEnumArgs: []protos.BundleInitTemplateEnumArg{
{
Key: "foo",
Value: "v1",
},
},
Uuid: bundleUuid,
}, logs[0].BundleInitEvent)
}