diff --git a/acceptance/bin/wait_pid b/acceptance/bin/wait_pid index f1375bf51..093943f3d 100755 --- a/acceptance/bin/wait_pid +++ b/acceptance/bin/wait_pid @@ -1,11 +1,10 @@ #!/bin/bash - # wait in bash only works for processes that are direct children to the calling # shell. This script is more general purpose. wait_pid() { local pid=$1 - local max_attempts=100 # 100 * 0.1 seconds = 10 seconds + local max_attempts=600 # 600 * 0.1 seconds = 1 Minute local attempt=0 local sleep_time=0.1 @@ -13,13 +12,13 @@ wait_pid() { if [[ "$OSTYPE" == "msys"* || "$OSTYPE" == "cygwin"* ]]; then # Windows approach if ! tasklist | grep -q $pid; then - echo "Process has ended" + echo "[wait_pid] process has ended" return 0 fi else # Linux/macOS approach if ! kill -0 $pid 2>/dev/null; then - echo "Process has ended" + echo "[wait_pid] process has ended" return 0 fi fi @@ -28,7 +27,7 @@ wait_pid() { attempt=$((attempt + 1)) done - echo "Timeout: Process $pid did not end within 10 seconds" + echo "Timeout: Process $pid did not end within 1 minute" return 1 } diff --git a/acceptance/daemon/child-detaches/out.parentchild.txt b/acceptance/daemon/child-detaches/out.parentchild.txt new file mode 100644 index 000000000..e4223edfd --- /dev/null +++ b/acceptance/daemon/child-detaches/out.parentchild.txt @@ -0,0 +1,7 @@ +[parent] started child +[parent] input sent to child: Hello from the other side +[parent] exiting +[child][wait_pid] process has ended +[child] Parent process has exited +[child] input from parent: Hello from the other side + diff --git a/acceptance/daemon/child-detaches/output.txt b/acceptance/daemon/child-detaches/output.txt new file mode 100644 index 000000000..1e14c4514 --- /dev/null +++ b/acceptance/daemon/child-detaches/output.txt @@ -0,0 +1,2 @@ +[script] waiting for child process to end... +[script][wait_pid] process has ended diff --git a/acceptance/daemon/child-detaches/script b/acceptance/daemon/child-detaches/script new file mode 100644 index 000000000..32534c6f5 --- /dev/null +++ b/acceptance/daemon/child-detaches/script @@ -0,0 +1,21 @@ +export DATABRICKS_CLI_SELFTEST_CHILD_OUTPUT_FILE="out.parentchild.txt" + +$CLI selftest parent > "out.parentchild.txt" + +echo "[script] waiting for child process to end..." +echo -n "[script]" +wait_pid $(cat ./child.pid) + +rm ./child.pid + +# README: +# This test validates two features of libs/daemon: +# 1. The parent process does not wait for the child process to end. +# 2. The child process correctly receives input from the parent process. +# +# Property (1) is validated because the child process waits for its parent process +# to end before it writes to stdout. +# +# Property (2) is directly validated by the output of the child process. +# +# See the code for "selftest parent" and "selftest child" commands for more details. diff --git a/acceptance/daemon/child-discards-output/output.txt b/acceptance/daemon/child-discards-output/output.txt new file mode 100644 index 000000000..3e92a5cb3 --- /dev/null +++ b/acceptance/daemon/child-discards-output/output.txt @@ -0,0 +1,6 @@ +[parent] started child +[parent] input sent to child: Hello from the other side +[parent] exiting +[script] waiting for child process to end... +[script][wait_pid] process has ended +[script] child process should not have written any output. diff --git a/acceptance/daemon/child-discards-output/script b/acceptance/daemon/child-discards-output/script new file mode 100644 index 000000000..bef0fa43d --- /dev/null +++ b/acceptance/daemon/child-discards-output/script @@ -0,0 +1,13 @@ +$CLI selftest parent + +echo "[script] waiting for child process to end..." +echo -n "[script]" +wait_pid $(cat ./child.pid) + +echo "[script] child process should not have written any output." + +rm ./child.pid + +# README: +# THis test validates that stdout / stderr from the child process are not leaked +# to the parent process's output. diff --git a/cmd/cmd.go b/cmd/cmd.go index 060a89f2b..00e9512d4 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -12,6 +12,7 @@ import ( "github.com/databricks/cli/cmd/fs" "github.com/databricks/cli/cmd/labs" "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/cmd/selftest" "github.com/databricks/cli/cmd/sync" "github.com/databricks/cli/cmd/telemetry" "github.com/databricks/cli/cmd/version" @@ -76,5 +77,7 @@ func New(ctx context.Context) *cobra.Command { cli.AddCommand(sync.New()) cli.AddCommand(version.New()) cli.AddCommand(telemetry.New()) + cli.AddCommand(selftest.New()) + return cli } diff --git a/cmd/selftest/child.go b/cmd/selftest/child.go new file mode 100644 index 000000000..55c696e16 --- /dev/null +++ b/cmd/selftest/child.go @@ -0,0 +1,41 @@ +package selftest + +import ( + "fmt" + "io" + "os" + "os/exec" + + "github.com/databricks/cli/libs/daemon" + "github.com/spf13/cobra" +) + +func newChildCommand() *cobra.Command { + return &cobra.Command{ + Use: "child", + RunE: func(cmd *cobra.Command, args []string) error { + // wait_pid lives in acceptance/bin. We expect this command to only be called + // from acceptance tests. + // + // Note: The golang standard library only provides a way to wait on + // processes that are children of the current process. While it's possible to + // rely on os native syscalls to wait on arbitrary processes, it's hard + // to get right and test. + waitCmd := exec.Command("bash", "-euo", "pipefail", "wait_pid", os.Getenv(daemon.DatabricksCliParentPid)) + b, err := waitCmd.Output() + if err != nil { + return fmt.Errorf("failed to wait for parent process: %w", err) + } + fmt.Print("[child]" + string(b)) + fmt.Println("[child] Parent process has exited") + + in, err := io.ReadAll(os.Stdin) + if err != nil { + return fmt.Errorf("failed to read from stdin: %w", err) + } + + fmt.Println("[child] input from parent: " + string(in)) + return nil + }, + } +} diff --git a/cmd/selftest/parent.go b/cmd/selftest/parent.go new file mode 100644 index 000000000..77d1cfe64 --- /dev/null +++ b/cmd/selftest/parent.go @@ -0,0 +1,44 @@ +package selftest + +import ( + "fmt" + "os" + + "github.com/databricks/cli/libs/daemon" + "github.com/spf13/cobra" +) + +const OutputFile = "DATABRICKS_CLI_SELFTEST_CHILD_OUTPUT_FILE" + +func newParentCommand() *cobra.Command { + return &cobra.Command{ + Use: "parent", + RunE: func(cmd *cobra.Command, args []string) error { + d := daemon.Daemon{ + Env: os.Environ(), + Args: []string{"selftest", "child"}, + LogFile: os.Getenv(OutputFile), + PidFilePath: "child.pid", + } + + err := d.Start() + if err != nil { + return fmt.Errorf("failed to start child process: %w", err) + } + fmt.Println("[parent] started child") + + err = d.WriteInput([]byte("Hello from the other side\n")) + if err != nil { + return fmt.Errorf("failed to write to child process: %w", err) + } + fmt.Println("[parent] input sent to child: Hello from the other side") + + err = d.Release() + if err != nil { + return fmt.Errorf("failed to release child process: %w", err) + } + fmt.Println("[parent] exiting") + return nil + }, + } +} diff --git a/cmd/selftest/selftest.go b/cmd/selftest/selftest.go new file mode 100644 index 000000000..3291e0633 --- /dev/null +++ b/cmd/selftest/selftest.go @@ -0,0 +1,17 @@ +package selftest + +import ( + "github.com/spf13/cobra" +) + +func New() *cobra.Command { + cmd := &cobra.Command{ + Use: "selftest", + Short: "Non functional CLI commands that are useful for testing", + Hidden: true, + } + + cmd.AddCommand(newChildCommand()) + cmd.AddCommand(newParentCommand()) + return cmd +} diff --git a/go.mod b/go.mod index 2e2505361..b4157c61b 100644 --- a/go.mod +++ b/go.mod @@ -37,6 +37,8 @@ require ( gopkg.in/yaml.v3 v3.0.1 ) +require golang.org/x/sys v0.30.0 + require ( cloud.google.com/go/auth v0.4.2 // indirect cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect @@ -72,7 +74,6 @@ require ( go.opentelemetry.io/otel/trace v1.24.0 // indirect golang.org/x/crypto v0.31.0 // indirect golang.org/x/net v0.33.0 // indirect - golang.org/x/sys v0.30.0 // indirect golang.org/x/time v0.5.0 // indirect google.golang.org/api v0.182.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240521202816-d264139d666e // indirect diff --git a/libs/daemon/daemon.go b/libs/daemon/daemon.go new file mode 100644 index 000000000..f074d057f --- /dev/null +++ b/libs/daemon/daemon.go @@ -0,0 +1,117 @@ +package daemon + +import ( + "fmt" + "io" + "os" + "os/exec" + "strconv" +) + +const DatabricksCliParentPid = "DATABRICKS_CLI_PARENT_PID" + +type Daemon struct { + // If provided, the child process will create a pid file at this path. + PidFilePath string + + // Environment variables to set in the child process. + Env []string + + // Path to executable to run. If empty, the current executable is used. + Executable string + + // Arguments to pass to the child process. + Args []string + + // Log file to write the child process's output to. + LogFile string + + logFile *os.File + cmd *exec.Cmd + stdin io.WriteCloser +} + +func (d *Daemon) Start() error { + cli, err := os.Executable() + if err != nil { + return err + } + + executable := d.Executable + if executable == "" { + executable = cli + } + + d.cmd = exec.Command(executable, d.Args...) + + // Set environment variable so that the child process knows its parent's PID. + // In unix systems orphaned processes are automatically re-parented to init (pid 1) + // so we cannot rely on os.Getppid() to get the original parent's pid. + d.Env = append(d.Env, fmt.Sprintf("%s=%d", DatabricksCliParentPid, os.Getpid())) + d.cmd.Env = d.Env + + d.cmd.SysProcAttr = sysProcAttr() + + // By default redirect stdout and stderr to /dev/null. + d.cmd.Stdout = nil + d.cmd.Stderr = nil + + // If a log file is provided, redirect stdout and stderr to the log file. + if d.LogFile != "" { + d.logFile, err = os.OpenFile(d.LogFile, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o644) + if err != nil { + return fmt.Errorf("failed to open log file: %w", err) + } + + d.cmd.Stdout = d.logFile + d.cmd.Stderr = d.logFile + } + + d.stdin, err = d.cmd.StdinPipe() + if err != nil { + return fmt.Errorf("failed to get stdin pipe: %w", err) + } + + err = d.cmd.Start() + if err != nil { + return err + } + + if d.PidFilePath != "" { + err = os.WriteFile(d.PidFilePath, []byte(strconv.Itoa(d.cmd.Process.Pid)), 0o644) + if err != nil { + return fmt.Errorf("failed to write pid file: %w", err) + } + } + + return nil +} + +func (d *Daemon) WriteInput(b []byte) error { + _, err := d.stdin.Write(b) + return err +} + +func (d *Daemon) Release() error { + if d.stdin != nil { + err := d.stdin.Close() + if err != nil { + return fmt.Errorf("failed to close stdin: %w", err) + } + } + + if d.logFile != nil { + err := d.logFile.Close() + if err != nil { + return fmt.Errorf("failed to close log file: %w", err) + } + } + + if d.cmd == nil { + return nil + } + + // The docs for [os.Process.Release] recommend calling Release if Wait is not called. + // It's probably not necessary but we call it just to be safe. + return d.cmd.Process.Release() +} diff --git a/libs/daemon/daemon_unix.go b/libs/daemon/daemon_unix.go new file mode 100644 index 000000000..b9a7023a7 --- /dev/null +++ b/libs/daemon/daemon_unix.go @@ -0,0 +1,17 @@ +//go:build linux || darwin + +package daemon + +import "syscall" + +// References: +// 1. linux: https://go.dev/src/syscall/exec_linux.go +// 2. macos (arm): https://go.dev/src/syscall/exec_libc2.go +func sysProcAttr() *syscall.SysProcAttr { + return &syscall.SysProcAttr{ + // Create a new session for the child process. This ensures that the daemon + // is not terminated when the parent session is closed. This can happen + // for example when a ssh session is terminated. + Setsid: true, + } +} diff --git a/libs/daemon/daemon_windows.go b/libs/daemon/daemon_windows.go new file mode 100644 index 000000000..bccf22e4b --- /dev/null +++ b/libs/daemon/daemon_windows.go @@ -0,0 +1,16 @@ +//go:build windows + +package daemon + +import ( + "syscall" + + "golang.org/x/sys/windows" +) + +func sysProcAttr() *syscall.SysProcAttr { + return &syscall.SysProcAttr{ + HideWindow: true, + CreationFlags: windows.CREATE_NEW_PROCESS_GROUP | windows.DETACHED_PROCESS, + } +}