diff --git a/acceptance/bundle/state/databricks.yml b/acceptance/bundle/state/databricks.yml new file mode 100644 index 000000000..4775ba33d --- /dev/null +++ b/acceptance/bundle/state/databricks.yml @@ -0,0 +1,21 @@ +bundle: + name: state + +resources: + jobs: + test: + name: "test" + tasks: + - task_key: "test-task" + spark_python_task: + python_file: ./test.py + new_cluster: + spark_version: 15.4.x-scala2.12 + node_type_id: i3.xlarge + data_security_mode: SINGLE_USER + num_workers: 0 + spark_conf: + spark.master: "local[*, 4]" + spark.databricks.cluster.profile: singleNode + custom_tags: + ResourceClass: SingleNode diff --git a/acceptance/bundle/state/out.state.txt b/acceptance/bundle/state/out.state.txt new file mode 100644 index 000000000..96966812b --- /dev/null +++ b/acceptance/bundle/state/out.state.txt @@ -0,0 +1,4 @@ +{ + "method": "GET", + "path": "/api/2.0/workspace-files/Workspace/Users/[USERNAME]/.bundle/state/default/state/terraform.tfstate" +} diff --git a/acceptance/bundle/state/output.txt b/acceptance/bundle/state/output.txt new file mode 100644 index 000000000..ac13a7ba4 --- /dev/null +++ b/acceptance/bundle/state/output.txt @@ -0,0 +1,12 @@ + +>>> [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/state/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! + +>>> [CLI] bundle deploy +Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/state/default/files... +Deploying resources... +Updating deployment state... +Deployment complete! diff --git a/acceptance/bundle/state/script b/acceptance/bundle/state/script new file mode 100644 index 000000000..9977f7d52 --- /dev/null +++ b/acceptance/bundle/state/script @@ -0,0 +1,4 @@ +trace $CLI bundle deploy +trace $CLI bundle deploy # We do 2 deploys because only 2nd deploy will pull state from remote after 1st created it +jq 'select(.path == "/api/2.0/workspace-files/Workspace/Users/[USERNAME]/.bundle/state/default/state/terraform.tfstate")' out.requests.txt > out.state.txt +rm out.requests.txt diff --git a/acceptance/bundle/state/test.py b/acceptance/bundle/state/test.py new file mode 100644 index 000000000..f1a18139c --- /dev/null +++ b/acceptance/bundle/state/test.py @@ -0,0 +1 @@ +print("Hello world!") diff --git a/acceptance/bundle/state/test.toml b/acceptance/bundle/state/test.toml new file mode 100644 index 000000000..7061886de --- /dev/null +++ b/acceptance/bundle/state/test.toml @@ -0,0 +1,2 @@ +Cloud = false +RecordRequests = true diff --git a/acceptance/server_test.go b/acceptance/server_test.go index 402e3ca5f..7ef705931 100644 --- a/acceptance/server_test.go +++ b/acceptance/server_test.go @@ -111,6 +111,11 @@ func AddHandlers(server *testserver.Server) { return "" }) + server.Handle("GET", "/api/2.0/workspace-files/{path:.*}", func(req testserver.Request) any { + path := req.Vars["path"] + return req.Workspace.WorkspaceFilesExportFile(path) + }) + server.Handle("GET", "/api/2.1/unity-catalog/current-metastore-assignment", func(req testserver.Request) any { return testMetastore }) diff --git a/bundle/deploy/filer.go b/bundle/deploy/filer.go index c0fd839ef..b65f08a67 100644 --- a/bundle/deploy/filer.go +++ b/bundle/deploy/filer.go @@ -1,14 +1,94 @@ package deploy import ( + "bytes" + "context" + "fmt" + "io" + "io/fs" + "net/http" + "net/url" + "strings" + "github.com/databricks/cli/bundle" "github.com/databricks/cli/libs/filer" + "github.com/databricks/databricks-sdk-go/client" ) // FilerFactory is a function that returns a filer.Filer. type FilerFactory func(b *bundle.Bundle) (filer.Filer, error) -// StateFiler returns a filer.Filer that can be used to read/write state files. -func StateFiler(b *bundle.Bundle) (filer.Filer, error) { - return filer.NewWorkspaceFilesClient(b.WorkspaceClient(), b.Config.Workspace.StatePath) +type stateFiler struct { + filer filer.Filer + + apiClient *client.DatabricksClient + root filer.WorkspaceRootPath +} + +func (s stateFiler) Delete(ctx context.Context, path string, mode ...filer.DeleteMode) error { + return s.filer.Delete(ctx, path, mode...) +} + +// Mkdir implements filer.Filer. +func (s stateFiler) Mkdir(ctx context.Context, path string) error { + return s.filer.Mkdir(ctx, path) +} + +func (s stateFiler) Read(ctx context.Context, path string) (io.ReadCloser, error) { + absPath, err := s.root.Join(path) + if err != nil { + return nil, err + } + + stat, err := s.Stat(ctx, path) + if err != nil { + return nil, err + } + if stat.IsDir() { + return nil, fmt.Errorf("not a file: %s", absPath) + } + + var buf bytes.Buffer + urlPath := "/api/2.0/workspace-files/" + url.PathEscape(strings.TrimLeft(absPath, "/")) + err = s.apiClient.Do(ctx, http.MethodGet, urlPath, nil, nil, nil, &buf) + if err != nil { + return nil, err + } + + return io.NopCloser(&buf), nil +} + +func (s stateFiler) ReadDir(ctx context.Context, path string) ([]fs.DirEntry, error) { + return s.filer.ReadDir(ctx, path) +} + +func (s stateFiler) Stat(ctx context.Context, name string) (fs.FileInfo, error) { + return s.filer.Stat(ctx, name) +} + +func (s stateFiler) Write(ctx context.Context, path string, reader io.Reader, mode ...filer.WriteMode) error { + return s.filer.Write(ctx, path, reader, mode...) +} + +// StateFiler returns a filer.Filer that can be used to read/write state files. +// We use a custom workspace filer which uses workspace-files API to read state files. +// This API has a higher than 10 MB limits and allows to export large state files. +// We don't use the same API for read because it doesn't correct get the file content for notebooks and returns +// "File Not Found" error instead. +func StateFiler(b *bundle.Bundle) (filer.Filer, error) { + f, err := filer.NewWorkspaceFilesClient(b.WorkspaceClient(), b.Config.Workspace.StatePath) + if err != nil { + return nil, err + } + + apiClient, err := client.New(b.WorkspaceClient().Config) + if err != nil { + return nil, fmt.Errorf("failed to create API client: %w", err) + } + + return stateFiler{ + filer: f, + root: filer.NewWorkspaceRootPath(b.Config.Workspace.StatePath), + apiClient: apiClient, + }, nil } diff --git a/bundle/deploy/state_push.go b/bundle/deploy/state_push.go index 6912414c1..176a907c8 100644 --- a/bundle/deploy/state_push.go +++ b/bundle/deploy/state_push.go @@ -10,8 +10,6 @@ import ( "github.com/databricks/cli/libs/log" ) -const MaxStateFileSize = 10 * 1024 * 1024 // 10MB - type statePush struct { filerFactory FilerFactory } @@ -37,17 +35,6 @@ func (s *statePush) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostic } defer local.Close() - if !b.Config.Bundle.Force { - state, err := local.Stat() - if err != nil { - return diag.FromErr(err) - } - - if state.Size() > MaxStateFileSize { - return diag.Errorf("Deployment state file size exceeds the maximum allowed size of %d bytes. Please reduce the number of resources in your bundle, split your bundle into multiple or re-run the command with --force flag.", MaxStateFileSize) - } - } - log.Infof(ctx, "Writing local deployment state file to remote state directory") err = f.Write(ctx, DeploymentStateFileName, local, filer.CreateParentDirectories, filer.OverwriteIfExists) if err != nil { diff --git a/bundle/deploy/terraform/state_push.go b/bundle/deploy/terraform/state_push.go index 84d8e7670..6cdde1371 100644 --- a/bundle/deploy/terraform/state_push.go +++ b/bundle/deploy/terraform/state_push.go @@ -47,17 +47,6 @@ func (l *statePush) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostic } defer local.Close() - if !b.Config.Bundle.Force { - state, err := local.Stat() - if err != nil { - return diag.FromErr(err) - } - - if state.Size() > deploy.MaxStateFileSize { - return diag.Errorf("Terraform state file size exceeds the maximum allowed size of %d bytes. Please reduce the number of resources in your bundle, split your bundle into multiple or re-run the command with --force flag", deploy.MaxStateFileSize) - } - } - // Upload state file from local cache directory to filer. cmdio.LogString(ctx, "Updating deployment state...") log.Infof(ctx, "Writing local state file to remote state directory") diff --git a/bundle/deploy/terraform/state_push_test.go b/bundle/deploy/terraform/state_push_test.go index 54e7f621c..e022dee1b 100644 --- a/bundle/deploy/terraform/state_push_test.go +++ b/bundle/deploy/terraform/state_push_test.go @@ -3,7 +3,6 @@ package terraform import ( "context" "encoding/json" - "fmt" "io" "testing" @@ -60,29 +59,3 @@ func TestStatePush(t *testing.T) { diags := bundle.Apply(ctx, b, m) assert.NoError(t, diags.Error()) } - -func TestStatePushLargeState(t *testing.T) { - mock := mockfiler.NewMockFiler(t) - m := &statePush{ - identityFiler(mock), - } - - ctx := context.Background() - b := statePushTestBundle(t) - - largeState := map[string]any{} - for i := range 1000000 { - largeState[fmt.Sprintf("field_%d", i)] = i - } - - // Write a stale local state file. - writeLocalState(t, ctx, b, largeState) - diags := bundle.Apply(ctx, b, m) - assert.ErrorContains(t, diags.Error(), "Terraform state file size exceeds the maximum allowed size of 10485760 bytes. Please reduce the number of resources in your bundle, split your bundle into multiple or re-run the command with --force flag") - - // Force the write. - b = statePushTestBundle(t) - b.Config.Bundle.Force = true - diags = bundle.Apply(ctx, b, m) - assert.NoError(t, diags.Error()) -} diff --git a/libs/testserver/fake_workspace.go b/libs/testserver/fake_workspace.go index 4e943f828..80e88941d 100644 --- a/libs/testserver/fake_workspace.go +++ b/libs/testserver/fake_workspace.go @@ -83,6 +83,13 @@ func (s *FakeWorkspace) WorkspaceFilesImportFile(path string, body []byte) { s.files[path] = body } +func (s *FakeWorkspace) WorkspaceFilesExportFile(path string) []byte { + if !strings.HasPrefix(path, "/") { + path = "/" + path + } + return s.files[path] +} + func (s *FakeWorkspace) JobsCreate(request jobs.CreateJob) Response { jobId := s.nextJobId s.nextJobId++