Merge branch 'main' into feat/offlineinstall

This commit is contained in:
Pieter Noordhuis 2025-02-24 10:35:10 +01:00 committed by GitHub
commit d8abfc4252
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 305 additions and 27 deletions

View File

@ -12,12 +12,12 @@ include:
# The default schema, catalog, etc. for dbt are defined in dbt_profiles/profiles.yml
targets:
dev:
default: true
# The default target uses 'mode: development' to create a development copy.
# - Deployed resources get prefixed with '[dev my_user_name]'
# - Any job schedules and triggers are paused by default.
# See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html.
mode: development
default: true
workspace:
host: [DATABRICKS_URL]
@ -25,10 +25,8 @@ targets:
mode: production
workspace:
host: [DATABRICKS_URL]
# We explicitly specify /Workspace/Users/[USERNAME] to make sure we only have a single copy.
# We explicitly deploy to /Workspace/Users/[USERNAME] to make sure we only have a single copy.
root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target}
permissions:
- user_name: [USERNAME]
level: CAN_MANAGE
run_as:
user_name: [USERNAME]

View File

@ -22,10 +22,8 @@ targets:
mode: production
workspace:
host: [DATABRICKS_URL]
# We explicitly specify /Workspace/Users/[USERNAME] to make sure we only have a single copy.
# We explicitly deploy to /Workspace/Users/[USERNAME] to make sure we only have a single copy.
root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target}
permissions:
- user_name: [USERNAME]
level: CAN_MANAGE
run_as:
user_name: [USERNAME]

View File

@ -35,7 +35,7 @@ targets:
mode: production
workspace:
host: [DATABRICKS_URL]
# We explicitly specify /Workspace/Users/[USERNAME] to make sure we only have a single copy.
# We explicitly deploy to /Workspace/Users/[USERNAME] to make sure we only have a single copy.
root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target}
variables:
warehouse_id: f00dcafe
@ -44,5 +44,3 @@ targets:
permissions:
- user_name: [USERNAME]
level: CAN_MANAGE
run_as:
user_name: [USERNAME]

View File

@ -144,8 +144,11 @@ func validateProductionMode(ctx context.Context, b *bundle.Bundle, isPrincipalUs
// We need to verify that there is only a single deployment of the current target.
// The best way to enforce this is to explicitly set root_path.
advice := fmt.Sprintf(
"set 'workspace.root_path' to make sure only one copy is deployed. A common practice is to use a username or principal name in this path, i.e. root_path: /Workspace/Users/%s/.bundle/${bundle.name}/${bundle.target}",
advice := "set 'workspace.root_path' to make sure only one copy is deployed"
adviceDetail := fmt.Sprintf(
"A common practice is to use a username or principal name in this path, i.e. use\n"+
"\n"+
" root_path: /Workspace/Users/%s/.bundle/${bundle.name}/${bundle.target}",
b.Config.Workspace.CurrentUser.UserName,
)
if !isExplicitRootSet(b) {
@ -154,9 +157,21 @@ func validateProductionMode(ctx context.Context, b *bundle.Bundle, isPrincipalUs
// and neither is setting a principal.
// We only show a warning for these cases since we didn't historically
// report an error for them.
return diag.Recommendationf("target with 'mode: production' should %s", advice)
return diag.Diagnostics{
{
Severity: diag.Recommendation,
Summary: "target with 'mode: production' should " + advice,
Detail: adviceDetail,
},
}
}
return diag.Diagnostics{
{
Severity: diag.Error,
Summary: "target with 'mode: production' must " + advice,
Detail: adviceDetail,
},
}
return diag.Errorf("target with 'mode: production' must %s", advice)
}
return nil
}

View File

@ -329,7 +329,7 @@ func TestProcessTargetModeProduction(t *testing.T) {
b := mockBundle(config.Production)
diags := validateProductionMode(context.Background(), b, false)
require.ErrorContains(t, diags.Error(), "target with 'mode: production' must set 'workspace.root_path' to make sure only one copy is deployed. A common practice is to use a username or principal name in this path, i.e. root_path: /Workspace/Users/lennart@company.com/.bundle/${bundle.name}/${bundle.target}")
assert.ErrorContains(t, diags.Error(), "A common practice is to use a username or principal name in this path, i.e. use\n\n root_path: /Workspace/Users/lennart@company.com/.bundle/${bundle.name}/${bundle.target}")
b.Config.Workspace.StatePath = "/Shared/.bundle/x/y/state"
b.Config.Workspace.ArtifactPath = "/Shared/.bundle/x/y/artifacts"
@ -337,7 +337,7 @@ func TestProcessTargetModeProduction(t *testing.T) {
b.Config.Workspace.ResourcePath = "/Shared/.bundle/x/y/resources"
diags = validateProductionMode(context.Background(), b, false)
require.ErrorContains(t, diags.Error(), "target with 'mode: production' must set 'workspace.root_path' to make sure only one copy is deployed. A common practice is to use a username or principal name in this path, i.e. root_path: /Workspace/Users/lennart@company.com/.bundle/${bundle.name}/${bundle.target}")
assert.ErrorContains(t, diags.Error(), "A common practice is to use a username or principal name in this path, i.e. use\n\n root_path: /Workspace/Users/lennart@company.com/.bundle/${bundle.name}/${bundle.target}")
permissions := []resources.Permission{
{

3
go.mod
View File

@ -37,6 +37,8 @@ require (
gopkg.in/yaml.v3 v3.0.1
)
require golang.org/x/sys v0.30.0
require (
cloud.google.com/go/auth v0.4.2 // indirect
cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect
@ -72,7 +74,6 @@ require (
go.opentelemetry.io/otel/trace v1.24.0 // indirect
golang.org/x/crypto v0.31.0 // indirect
golang.org/x/net v0.33.0 // indirect
golang.org/x/sys v0.30.0 // indirect
golang.org/x/time v0.5.0 // indirect
google.golang.org/api v0.182.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240521202816-d264139d666e // indirect

120
libs/daemon/daemon.go Normal file
View File

@ -0,0 +1,120 @@
package daemon
import (
"fmt"
"io"
"os"
"os/exec"
"strconv"
)
const DatabricksCliParentPid = "DATABRICKS_CLI_PARENT_PID"
type Daemon struct {
// If provided, the child process's pid will be written in the file at this
// path.
PidFilePath string
// Environment variables to set in the child process.
Env []string
// Path to executable to run. If empty, the current executable is used.
Executable string
// Arguments to pass to the child process.
Args []string
// Log file to write the child process's output to.
LogFile string
logFile *os.File
cmd *exec.Cmd
stdin io.WriteCloser
}
func (d *Daemon) Start() error {
cli, err := os.Executable()
if err != nil {
return err
}
executable := d.Executable
if executable == "" {
executable = cli
}
d.cmd = exec.Command(executable, d.Args...)
// Set environment variable so that the child process knows its parent's PID.
// In unix systems orphaned processes are automatically re-parented to init (pid 1)
// so we cannot rely on os.Getppid() to get the original parent's pid.
d.Env = append(d.Env, fmt.Sprintf("%s=%d", DatabricksCliParentPid, os.Getpid()))
d.cmd.Env = d.Env
d.cmd.SysProcAttr = sysProcAttr()
// By default redirect stdout and stderr to /dev/null.
d.cmd.Stdout = nil
d.cmd.Stderr = nil
// If a log file is provided, redirect stdout and stderr to the log file.
if d.LogFile != "" {
d.logFile, err = os.OpenFile(d.LogFile, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o644)
if err != nil {
return fmt.Errorf("failed to open log file: %w", err)
}
d.cmd.Stdout = d.logFile
d.cmd.Stderr = d.logFile
}
d.stdin, err = d.cmd.StdinPipe()
if err != nil {
return fmt.Errorf("failed to get stdin pipe: %w", err)
}
err = d.cmd.Start()
if err != nil {
return err
}
if d.PidFilePath != "" {
err = os.WriteFile(d.PidFilePath, []byte(strconv.Itoa(d.cmd.Process.Pid)), 0o644)
if err != nil {
return fmt.Errorf("failed to write pid file: %w", err)
}
}
return nil
}
func (d *Daemon) WriteInput(b []byte) error {
_, err := d.stdin.Write(b)
return err
}
func (d *Daemon) Release() error {
if d.stdin != nil {
err := d.stdin.Close()
if err != nil {
return fmt.Errorf("failed to close stdin: %w", err)
}
}
// Note that the child process will stream it's output directly to the log file.
// So it's safe to close this file handle even if the child process is still running.
if d.logFile != nil {
err := d.logFile.Close()
if err != nil {
return fmt.Errorf("failed to close log file: %w", err)
}
}
if d.cmd == nil {
return nil
}
// The docs for [os.Process.Release] recommend calling Release if Wait is not called.
// It's probably not necessary but we call it just to be safe.
return d.cmd.Process.Release()
}

View File

@ -0,0 +1,51 @@
package daemon
import (
"io"
"net/http"
"os"
"os/exec"
"path/filepath"
"strconv"
"testing"
"time"
"github.com/databricks/cli/internal/testutil"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestDaemon(t *testing.T) {
tmpDir := t.TempDir()
cmd := exec.Command("go", "run", "internal/parent_process/main.go", tmpDir)
// cmd.Run() will block until the parent process exits.
err := cmd.Run()
require.NoError(t, err)
// Assert that a PID file was created for the child process.
assert.FileExists(t, filepath.Join(tmpDir, "child.pid"))
// Wait 10 seconds for the server to start and to write the port number to
// a file.
portFilePath := filepath.Join(tmpDir, "port.txt")
assert.Eventually(t, func() bool {
_, err := os.Stat(portFilePath)
return err == nil
}, 10*time.Second, 100*time.Millisecond)
port, err := strconv.Atoi(testutil.ReadFile(t, portFilePath))
require.NoError(t, err)
// Query the local server, which should be alive even after the parent process
// has exited.
r, err := http.Get("http://localhost:" + strconv.Itoa(port))
require.NoError(t, err)
defer r.Body.Close()
// The server should respond with "child says hi".
assert.Equal(t, http.StatusOK, r.StatusCode)
b, err := io.ReadAll(r.Body)
require.NoError(t, err)
assert.Equal(t, "child says hi", string(b))
}

View File

@ -0,0 +1,17 @@
//go:build linux || darwin
package daemon
import "syscall"
// References:
// 1. linux: https://go.dev/src/syscall/exec_linux.go
// 2. macos (arm): https://go.dev/src/syscall/exec_libc2.go
func sysProcAttr() *syscall.SysProcAttr {
return &syscall.SysProcAttr{
// Create a new session for the child process. This ensures that the daemon
// is not terminated when the parent session is closed. This can happen
// for example when a ssh session is terminated.
Setsid: true,
}
}

View File

@ -0,0 +1,16 @@
//go:build windows
package daemon
import (
"syscall"
"golang.org/x/sys/windows"
)
func sysProcAttr() *syscall.SysProcAttr {
return &syscall.SysProcAttr{
HideWindow: true,
CreationFlags: windows.CREATE_NEW_PROCESS_GROUP | windows.DETACHED_PROCESS,
}
}

View File

@ -0,0 +1,30 @@
package main
import (
"os"
"path/filepath"
"github.com/databricks/cli/libs/daemon"
)
func main() {
tmpDir := os.Args[1]
d := daemon.Daemon{
PidFilePath: filepath.Join(tmpDir, "child.pid"),
Executable: "python3",
// The server script writes the port number the server is listening on
// to the specified file.
Args: []string{"./internal/parent_process/server.py", filepath.Join(tmpDir, "port.txt")},
}
err := d.Start()
if err != nil {
panic(err)
}
err = d.Release()
if err != nil {
panic(err)
}
}

View File

@ -0,0 +1,40 @@
#!/usr/bin/env python3
import sys
from http.server import BaseHTTPRequestHandler, HTTPServer
if len(sys.argv) < 2:
print("Usage: python script.py <port_file_path>")
sys.exit(1)
port_file_path = sys.argv[1]
class SimpleHandler(BaseHTTPRequestHandler):
def do_GET(self):
# Send HTTP 200 response with plain text content
self.send_response(200)
self.send_header("Content-type", "text/plain")
self.end_headers()
self.wfile.write(b"child says hi")
# Bind to localhost on port 0 to let the OS pick an available port.
server_address = ("localhost", 0)
httpd = HTTPServer(server_address, SimpleHandler)
# Retrieve the assigned port.
assigned_port = httpd.server_address[1]
# Write the port number to the provided file path.
with open(port_file_path, "w") as f:
f.write(str(assigned_port))
try:
# Automatically shut down the server after 2 minutes. This is a precaution to
# prevent the server from running indefinitely incase the GET API is never called.
httpd.timeout = 120
# This server will exit after one request.
httpd.handle_request()
except KeyboardInterrupt:
print("\nServer is shutting down.")

View File

@ -12,12 +12,12 @@ include:
# The default schema, catalog, etc. for dbt are defined in dbt_profiles/profiles.yml
targets:
dev:
default: true
# The default target uses 'mode: development' to create a development copy.
# - Deployed resources get prefixed with '[dev my_user_name]'
# - Any job schedules and triggers are paused by default.
# See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html.
mode: development
default: true
workspace:
host: {{workspace_host}}
@ -25,10 +25,8 @@ targets:
mode: production
workspace:
host: {{workspace_host}}
# We explicitly specify /Workspace/Users/{{user_name}} to make sure we only have a single copy.
# We explicitly deploy to /Workspace/Users/{{user_name}} to make sure we only have a single copy.
root_path: /Workspace/Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target}
permissions:
- {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}}
level: CAN_MANAGE
run_as:
{{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}}

View File

@ -22,10 +22,8 @@ targets:
mode: production
workspace:
host: {{workspace_host}}
# We explicitly specify /Workspace/Users/{{user_name}} to make sure we only have a single copy.
# We explicitly deploy to /Workspace/Users/{{user_name}} to make sure we only have a single copy.
root_path: /Workspace/Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target}
permissions:
- {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}}
level: CAN_MANAGE
run_as:
{{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}}

View File

@ -42,7 +42,7 @@ targets:
mode: production
workspace:
host: {{workspace_host}}
# We explicitly specify /Workspace/Users/{{user_name}} to make sure we only have a single copy.
# We explicitly deploy to /Workspace/Users/{{user_name}} to make sure we only have a single copy.
root_path: /Workspace/Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target}
variables:
warehouse_id: {{index ((regexp "[^/]+$").FindStringSubmatch .http_path) 0}}
@ -51,5 +51,3 @@ targets:
permissions:
- {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}}
level: CAN_MANAGE
run_as:
{{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}}