diff --git a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/databricks.yml b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/databricks.yml index 3651ef12d..4285a44eb 100644 --- a/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/databricks.yml +++ b/acceptance/bundle/templates/dbt-sql/output/my_dbt_sql/databricks.yml @@ -12,12 +12,12 @@ include: # The default schema, catalog, etc. for dbt are defined in dbt_profiles/profiles.yml targets: dev: - default: true # The default target uses 'mode: development' to create a development copy. # - Deployed resources get prefixed with '[dev my_user_name]' # - Any job schedules and triggers are paused by default. # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. mode: development + default: true workspace: host: [DATABRICKS_URL] @@ -25,10 +25,8 @@ targets: mode: production workspace: host: [DATABRICKS_URL] - # We explicitly specify /Workspace/Users/[USERNAME] to make sure we only have a single copy. + # We explicitly deploy to /Workspace/Users/[USERNAME] to make sure we only have a single copy. root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target} permissions: - user_name: [USERNAME] level: CAN_MANAGE - run_as: - user_name: [USERNAME] diff --git a/acceptance/bundle/templates/default-python/output/my_default_python/databricks.yml b/acceptance/bundle/templates/default-python/output/my_default_python/databricks.yml index 6df75c209..6080a368f 100644 --- a/acceptance/bundle/templates/default-python/output/my_default_python/databricks.yml +++ b/acceptance/bundle/templates/default-python/output/my_default_python/databricks.yml @@ -22,10 +22,8 @@ targets: mode: production workspace: host: [DATABRICKS_URL] - # We explicitly specify /Workspace/Users/[USERNAME] to make sure we only have a single copy. + # We explicitly deploy to /Workspace/Users/[USERNAME] to make sure we only have a single copy. root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target} permissions: - user_name: [USERNAME] level: CAN_MANAGE - run_as: - user_name: [USERNAME] diff --git a/acceptance/bundle/templates/default-sql/output/my_default_sql/databricks.yml b/acceptance/bundle/templates/default-sql/output/my_default_sql/databricks.yml index 6ef09cf3b..07562ce7a 100644 --- a/acceptance/bundle/templates/default-sql/output/my_default_sql/databricks.yml +++ b/acceptance/bundle/templates/default-sql/output/my_default_sql/databricks.yml @@ -35,7 +35,7 @@ targets: mode: production workspace: host: [DATABRICKS_URL] - # We explicitly specify /Workspace/Users/[USERNAME] to make sure we only have a single copy. + # We explicitly deploy to /Workspace/Users/[USERNAME] to make sure we only have a single copy. root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target} variables: warehouse_id: f00dcafe @@ -44,5 +44,3 @@ targets: permissions: - user_name: [USERNAME] level: CAN_MANAGE - run_as: - user_name: [USERNAME] diff --git a/bundle/config/mutator/process_target_mode.go b/bundle/config/mutator/process_target_mode.go index 576f0c352..a33e6f944 100644 --- a/bundle/config/mutator/process_target_mode.go +++ b/bundle/config/mutator/process_target_mode.go @@ -144,8 +144,11 @@ func validateProductionMode(ctx context.Context, b *bundle.Bundle, isPrincipalUs // We need to verify that there is only a single deployment of the current target. // The best way to enforce this is to explicitly set root_path. - advice := fmt.Sprintf( - "set 'workspace.root_path' to make sure only one copy is deployed. A common practice is to use a username or principal name in this path, i.e. root_path: /Workspace/Users/%s/.bundle/${bundle.name}/${bundle.target}", + advice := "set 'workspace.root_path' to make sure only one copy is deployed" + adviceDetail := fmt.Sprintf( + "A common practice is to use a username or principal name in this path, i.e. use\n"+ + "\n"+ + " root_path: /Workspace/Users/%s/.bundle/${bundle.name}/${bundle.target}", b.Config.Workspace.CurrentUser.UserName, ) if !isExplicitRootSet(b) { @@ -154,9 +157,21 @@ func validateProductionMode(ctx context.Context, b *bundle.Bundle, isPrincipalUs // and neither is setting a principal. // We only show a warning for these cases since we didn't historically // report an error for them. - return diag.Recommendationf("target with 'mode: production' should %s", advice) + return diag.Diagnostics{ + { + Severity: diag.Recommendation, + Summary: "target with 'mode: production' should " + advice, + Detail: adviceDetail, + }, + } + } + return diag.Diagnostics{ + { + Severity: diag.Error, + Summary: "target with 'mode: production' must " + advice, + Detail: adviceDetail, + }, } - return diag.Errorf("target with 'mode: production' must %s", advice) } return nil } diff --git a/bundle/config/mutator/process_target_mode_test.go b/bundle/config/mutator/process_target_mode_test.go index 6a0fd8e03..c35c79f91 100644 --- a/bundle/config/mutator/process_target_mode_test.go +++ b/bundle/config/mutator/process_target_mode_test.go @@ -329,7 +329,7 @@ func TestProcessTargetModeProduction(t *testing.T) { b := mockBundle(config.Production) diags := validateProductionMode(context.Background(), b, false) - require.ErrorContains(t, diags.Error(), "target with 'mode: production' must set 'workspace.root_path' to make sure only one copy is deployed. A common practice is to use a username or principal name in this path, i.e. root_path: /Workspace/Users/lennart@company.com/.bundle/${bundle.name}/${bundle.target}") + assert.ErrorContains(t, diags.Error(), "A common practice is to use a username or principal name in this path, i.e. use\n\n root_path: /Workspace/Users/lennart@company.com/.bundle/${bundle.name}/${bundle.target}") b.Config.Workspace.StatePath = "/Shared/.bundle/x/y/state" b.Config.Workspace.ArtifactPath = "/Shared/.bundle/x/y/artifacts" @@ -337,7 +337,7 @@ func TestProcessTargetModeProduction(t *testing.T) { b.Config.Workspace.ResourcePath = "/Shared/.bundle/x/y/resources" diags = validateProductionMode(context.Background(), b, false) - require.ErrorContains(t, diags.Error(), "target with 'mode: production' must set 'workspace.root_path' to make sure only one copy is deployed. A common practice is to use a username or principal name in this path, i.e. root_path: /Workspace/Users/lennart@company.com/.bundle/${bundle.name}/${bundle.target}") + assert.ErrorContains(t, diags.Error(), "A common practice is to use a username or principal name in this path, i.e. use\n\n root_path: /Workspace/Users/lennart@company.com/.bundle/${bundle.name}/${bundle.target}") permissions := []resources.Permission{ { diff --git a/go.mod b/go.mod index 2e2505361..b4157c61b 100644 --- a/go.mod +++ b/go.mod @@ -37,6 +37,8 @@ require ( gopkg.in/yaml.v3 v3.0.1 ) +require golang.org/x/sys v0.30.0 + require ( cloud.google.com/go/auth v0.4.2 // indirect cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect @@ -72,7 +74,6 @@ require ( go.opentelemetry.io/otel/trace v1.24.0 // indirect golang.org/x/crypto v0.31.0 // indirect golang.org/x/net v0.33.0 // indirect - golang.org/x/sys v0.30.0 // indirect golang.org/x/time v0.5.0 // indirect google.golang.org/api v0.182.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240521202816-d264139d666e // indirect diff --git a/libs/daemon/daemon.go b/libs/daemon/daemon.go new file mode 100644 index 000000000..91914477b --- /dev/null +++ b/libs/daemon/daemon.go @@ -0,0 +1,120 @@ +package daemon + +import ( + "fmt" + "io" + "os" + "os/exec" + "strconv" +) + +const DatabricksCliParentPid = "DATABRICKS_CLI_PARENT_PID" + +type Daemon struct { + // If provided, the child process's pid will be written in the file at this + // path. + PidFilePath string + + // Environment variables to set in the child process. + Env []string + + // Path to executable to run. If empty, the current executable is used. + Executable string + + // Arguments to pass to the child process. + Args []string + + // Log file to write the child process's output to. + LogFile string + + logFile *os.File + cmd *exec.Cmd + stdin io.WriteCloser +} + +func (d *Daemon) Start() error { + cli, err := os.Executable() + if err != nil { + return err + } + + executable := d.Executable + if executable == "" { + executable = cli + } + + d.cmd = exec.Command(executable, d.Args...) + + // Set environment variable so that the child process knows its parent's PID. + // In unix systems orphaned processes are automatically re-parented to init (pid 1) + // so we cannot rely on os.Getppid() to get the original parent's pid. + d.Env = append(d.Env, fmt.Sprintf("%s=%d", DatabricksCliParentPid, os.Getpid())) + d.cmd.Env = d.Env + + d.cmd.SysProcAttr = sysProcAttr() + + // By default redirect stdout and stderr to /dev/null. + d.cmd.Stdout = nil + d.cmd.Stderr = nil + + // If a log file is provided, redirect stdout and stderr to the log file. + if d.LogFile != "" { + d.logFile, err = os.OpenFile(d.LogFile, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o644) + if err != nil { + return fmt.Errorf("failed to open log file: %w", err) + } + + d.cmd.Stdout = d.logFile + d.cmd.Stderr = d.logFile + } + + d.stdin, err = d.cmd.StdinPipe() + if err != nil { + return fmt.Errorf("failed to get stdin pipe: %w", err) + } + + err = d.cmd.Start() + if err != nil { + return err + } + + if d.PidFilePath != "" { + err = os.WriteFile(d.PidFilePath, []byte(strconv.Itoa(d.cmd.Process.Pid)), 0o644) + if err != nil { + return fmt.Errorf("failed to write pid file: %w", err) + } + } + + return nil +} + +func (d *Daemon) WriteInput(b []byte) error { + _, err := d.stdin.Write(b) + return err +} + +func (d *Daemon) Release() error { + if d.stdin != nil { + err := d.stdin.Close() + if err != nil { + return fmt.Errorf("failed to close stdin: %w", err) + } + } + + // Note that the child process will stream it's output directly to the log file. + // So it's safe to close this file handle even if the child process is still running. + if d.logFile != nil { + err := d.logFile.Close() + if err != nil { + return fmt.Errorf("failed to close log file: %w", err) + } + } + + if d.cmd == nil { + return nil + } + + // The docs for [os.Process.Release] recommend calling Release if Wait is not called. + // It's probably not necessary but we call it just to be safe. + return d.cmd.Process.Release() +} diff --git a/libs/daemon/daemon_test.go b/libs/daemon/daemon_test.go new file mode 100644 index 000000000..ee9d92baa --- /dev/null +++ b/libs/daemon/daemon_test.go @@ -0,0 +1,51 @@ +package daemon + +import ( + "io" + "net/http" + "os" + "os/exec" + "path/filepath" + "strconv" + "testing" + "time" + + "github.com/databricks/cli/internal/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestDaemon(t *testing.T) { + tmpDir := t.TempDir() + cmd := exec.Command("go", "run", "internal/parent_process/main.go", tmpDir) + + // cmd.Run() will block until the parent process exits. + err := cmd.Run() + require.NoError(t, err) + + // Assert that a PID file was created for the child process. + assert.FileExists(t, filepath.Join(tmpDir, "child.pid")) + + // Wait 10 seconds for the server to start and to write the port number to + // a file. + portFilePath := filepath.Join(tmpDir, "port.txt") + assert.Eventually(t, func() bool { + _, err := os.Stat(portFilePath) + return err == nil + }, 10*time.Second, 100*time.Millisecond) + + port, err := strconv.Atoi(testutil.ReadFile(t, portFilePath)) + require.NoError(t, err) + + // Query the local server, which should be alive even after the parent process + // has exited. + r, err := http.Get("http://localhost:" + strconv.Itoa(port)) + require.NoError(t, err) + defer r.Body.Close() + + // The server should respond with "child says hi". + assert.Equal(t, http.StatusOK, r.StatusCode) + b, err := io.ReadAll(r.Body) + require.NoError(t, err) + assert.Equal(t, "child says hi", string(b)) +} diff --git a/libs/daemon/daemon_unix.go b/libs/daemon/daemon_unix.go new file mode 100644 index 000000000..b9a7023a7 --- /dev/null +++ b/libs/daemon/daemon_unix.go @@ -0,0 +1,17 @@ +//go:build linux || darwin + +package daemon + +import "syscall" + +// References: +// 1. linux: https://go.dev/src/syscall/exec_linux.go +// 2. macos (arm): https://go.dev/src/syscall/exec_libc2.go +func sysProcAttr() *syscall.SysProcAttr { + return &syscall.SysProcAttr{ + // Create a new session for the child process. This ensures that the daemon + // is not terminated when the parent session is closed. This can happen + // for example when a ssh session is terminated. + Setsid: true, + } +} diff --git a/libs/daemon/daemon_windows.go b/libs/daemon/daemon_windows.go new file mode 100644 index 000000000..bccf22e4b --- /dev/null +++ b/libs/daemon/daemon_windows.go @@ -0,0 +1,16 @@ +//go:build windows + +package daemon + +import ( + "syscall" + + "golang.org/x/sys/windows" +) + +func sysProcAttr() *syscall.SysProcAttr { + return &syscall.SysProcAttr{ + HideWindow: true, + CreationFlags: windows.CREATE_NEW_PROCESS_GROUP | windows.DETACHED_PROCESS, + } +} diff --git a/libs/daemon/internal/parent_process/main.go b/libs/daemon/internal/parent_process/main.go new file mode 100644 index 000000000..87c1bdda2 --- /dev/null +++ b/libs/daemon/internal/parent_process/main.go @@ -0,0 +1,30 @@ +package main + +import ( + "os" + "path/filepath" + + "github.com/databricks/cli/libs/daemon" +) + +func main() { + tmpDir := os.Args[1] + + d := daemon.Daemon{ + PidFilePath: filepath.Join(tmpDir, "child.pid"), + Executable: "python3", + // The server script writes the port number the server is listening on + // to the specified file. + Args: []string{"./internal/parent_process/server.py", filepath.Join(tmpDir, "port.txt")}, + } + + err := d.Start() + if err != nil { + panic(err) + } + + err = d.Release() + if err != nil { + panic(err) + } +} diff --git a/libs/daemon/internal/parent_process/server.py b/libs/daemon/internal/parent_process/server.py new file mode 100644 index 000000000..ad341f992 --- /dev/null +++ b/libs/daemon/internal/parent_process/server.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 +import sys +from http.server import BaseHTTPRequestHandler, HTTPServer + +if len(sys.argv) < 2: + print("Usage: python script.py ") + sys.exit(1) + +port_file_path = sys.argv[1] + + +class SimpleHandler(BaseHTTPRequestHandler): + def do_GET(self): + # Send HTTP 200 response with plain text content + self.send_response(200) + self.send_header("Content-type", "text/plain") + self.end_headers() + self.wfile.write(b"child says hi") + + +# Bind to localhost on port 0 to let the OS pick an available port. +server_address = ("localhost", 0) +httpd = HTTPServer(server_address, SimpleHandler) + +# Retrieve the assigned port. +assigned_port = httpd.server_address[1] + +# Write the port number to the provided file path. +with open(port_file_path, "w") as f: + f.write(str(assigned_port)) + +try: + # Automatically shut down the server after 2 minutes. This is a precaution to + # prevent the server from running indefinitely incase the GET API is never called. + httpd.timeout = 120 + + # This server will exit after one request. + httpd.handle_request() +except KeyboardInterrupt: + print("\nServer is shutting down.") diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl index ba336f6a1..d991c06ff 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/databricks.yml.tmpl @@ -12,12 +12,12 @@ include: # The default schema, catalog, etc. for dbt are defined in dbt_profiles/profiles.yml targets: dev: - default: true # The default target uses 'mode: development' to create a development copy. # - Deployed resources get prefixed with '[dev my_user_name]' # - Any job schedules and triggers are paused by default. # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html. mode: development + default: true workspace: host: {{workspace_host}} @@ -25,10 +25,8 @@ targets: mode: production workspace: host: {{workspace_host}} - # We explicitly specify /Workspace/Users/{{user_name}} to make sure we only have a single copy. + # We explicitly deploy to /Workspace/Users/{{user_name}} to make sure we only have a single copy. root_path: /Workspace/Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target} permissions: - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} level: CAN_MANAGE - run_as: - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl index 4d052e38e..04d22a764 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl @@ -22,10 +22,8 @@ targets: mode: production workspace: host: {{workspace_host}} - # We explicitly specify /Workspace/Users/{{user_name}} to make sure we only have a single copy. + # We explicitly deploy to /Workspace/Users/{{user_name}} to make sure we only have a single copy. root_path: /Workspace/Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target} permissions: - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} level: CAN_MANAGE - run_as: - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} diff --git a/libs/template/templates/default-sql/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/default-sql/template/{{.project_name}}/databricks.yml.tmpl index 84e07df17..6acdf40e7 100644 --- a/libs/template/templates/default-sql/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/default-sql/template/{{.project_name}}/databricks.yml.tmpl @@ -42,7 +42,7 @@ targets: mode: production workspace: host: {{workspace_host}} - # We explicitly specify /Workspace/Users/{{user_name}} to make sure we only have a single copy. + # We explicitly deploy to /Workspace/Users/{{user_name}} to make sure we only have a single copy. root_path: /Workspace/Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target} variables: warehouse_id: {{index ((regexp "[^/]+$").FindStringSubmatch .http_path) 0}} @@ -51,5 +51,3 @@ targets: permissions: - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} level: CAN_MANAGE - run_as: - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}}