package project import ( "context" "errors" "fmt" "io/fs" "net/http" "os" "path/filepath" "strings" "github.com/databricks/cli/bundle" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/internal/build" "github.com/databricks/cli/libs/cmdio" "github.com/databricks/cli/libs/env" "github.com/databricks/cli/libs/log" "github.com/databricks/databricks-sdk-go" "github.com/databricks/databricks-sdk-go/config" "github.com/databricks/databricks-sdk-go/logger" "github.com/spf13/cobra" ) type Entrypoint struct { *Project RequireRunningCluster bool `yaml:"require_running_cluster,omitempty"` IsUnauthenticated bool `yaml:"is_unauthenticated,omitempty"` IsAccountLevel bool `yaml:"is_account_level,omitempty"` IsBundleAware bool `yaml:"is_bundle_aware,omitempty"` } var ErrNoLoginConfig = errors.New("no login configuration found") var ErrMissingClusterID = errors.New("missing a cluster compatible with Databricks Connect") var ErrMissingWarehouseID = errors.New("missing a SQL warehouse") var ErrNotInTTY = errors.New("not in an interactive terminal") func (e *Entrypoint) NeedsCluster() bool { if e.Installer == nil { return false } if e.Installer.RequireDatabricksConnect && e.Installer.MinRuntimeVersion == "" { e.Installer.MinRuntimeVersion = "13.1" } return e.Installer.MinRuntimeVersion != "" } func (e *Entrypoint) NeedsWarehouse() bool { if e.Installer == nil { return false } return len(e.Installer.WarehouseTypes) != 0 } func (e *Entrypoint) Prepare(cmd *cobra.Command) (map[string]string, error) { ctx := cmd.Context() libDir := e.EffectiveLibDir() environment := map[string]string{ "DATABRICKS_CLI_VERSION": build.GetInfo().Version, "DATABRICKS_LABS_CACHE_DIR": e.CacheDir(), "DATABRICKS_LABS_CONFIG_DIR": e.ConfigDir(), "DATABRICKS_LABS_STATE_DIR": e.StateDir(), "DATABRICKS_LABS_LIB_DIR": libDir, } if e.IsPythonProject() { e.preparePython(ctx, environment) } cfg, err := e.validLogin(cmd) if err != nil { return nil, fmt.Errorf("login: %w", err) } // cleanup auth profile and config file location, // so that we don't confuse SDKs cfg.Profile = "" cfg.ConfigFile = "" varNames := []string{} for k, v := range e.environmentFromConfig(cfg) { environment[k] = v varNames = append(varNames, k) } if e.NeedsCluster() && e.RequireRunningCluster { err = e.ensureRunningCluster(ctx, cfg) if err != nil { return nil, fmt.Errorf("running cluster: %w", err) } } log.Debugf(ctx, "Passing down environment variables: %s", strings.Join(varNames, ", ")) return environment, nil } func (e *Entrypoint) preparePython(ctx context.Context, environment map[string]string) { venv := e.virtualEnvPath(ctx) environment["PATH"] = e.joinPaths(filepath.Join(venv, "bin"), env.Get(ctx, "PATH")) // PYTHONPATH extends the standard lookup locations for module files. It follows the same structure as // the shell's PATH, where you specify one or more directory paths separated by the appropriate delimiter // (such as colons for Unix or semicolons for Windows). If a directory listed in PYTHONPATH doesn't exist, // it is disregarded without any notifications. // // Beyond regular directories, individual entries in PYTHONPATH can point to zipfiles that contain pure // Python modules in either their source or compiled forms. It's important to note that extension modules // cannot be imported from zipfiles. // // The initial search path varies depending on your installation but typically commences with the // prefix/lib/pythonversion path (as indicated by PYTHONHOME). This default path is always included // in PYTHONPATH. // // An extra directory can be included at the beginning of the search path, coming before PYTHONPATH, // as explained in the Interface options section. You can control the search path from within a Python // script using the sys.path variable. // // Here we are also supporting the "src" layout for python projects. // // See https://docs.python.org/3/using/cmdline.html#envvar-PYTHONPATH libDir := e.EffectiveLibDir() // The intention for every install is to be sandboxed - not dependent on anything else than Python binary. // Having ability to override PYTHONPATH in the mix will break this assumption. Need strong evidence that // this is really needed. environment["PYTHONPATH"] = e.joinPaths(libDir, filepath.Join(libDir, "src")) } func (e *Entrypoint) ensureRunningCluster(ctx context.Context, cfg *config.Config) error { feedback := cmdio.Spinner(ctx) defer close(feedback) w, err := databricks.NewWorkspaceClient((*databricks.Config)(cfg)) if err != nil { return fmt.Errorf("workspace client: %w", err) } // TODO: add in-progress callback to EnsureClusterIsRunning() in SDK feedback <- "Ensuring the cluster is running..." err = w.Clusters.EnsureClusterIsRunning(ctx, cfg.ClusterID) if err != nil { return fmt.Errorf("ensure running: %w", err) } return nil } func (e *Entrypoint) joinPaths(paths ...string) string { return strings.Join(paths, string(os.PathListSeparator)) } func (e *Entrypoint) envAwareConfig(ctx context.Context) (*config.Config, error) { home, err := env.UserHomeDir(ctx) if err != nil { return nil, err } return &config.Config{ ConfigFile: filepath.Join(home, ".databrickscfg"), Loaders: []config.Loader{ env.NewConfigLoader(ctx), config.ConfigAttributes, config.ConfigFile, }, }, nil } func (e *Entrypoint) envAwareConfigWithProfile(ctx context.Context, profile string) (*config.Config, error) { cfg, err := e.envAwareConfig(ctx) if err != nil { return nil, err } cfg.Profile = profile return cfg, nil } func (e *Entrypoint) getLoginConfig(cmd *cobra.Command) (*loginConfig, *config.Config, error) { ctx := cmd.Context() // it's okay for this config file not to exist, because some environments, // like GitHub Actions, don't (need) to have it. There's a small downside of // a warning log message from within Go SDK. profileOverride := e.profileOverride(cmd) if profileOverride != "" { log.Infof(ctx, "Overriding login profile: %s", profileOverride) cfg, err := e.envAwareConfigWithProfile(ctx, profileOverride) if err != nil { return nil, nil, err } return &loginConfig{}, cfg, nil } lc, err := e.loadLoginConfig(ctx) isNoLoginConfig := errors.Is(err, fs.ErrNotExist) defaultConfig, err := e.envAwareConfig(ctx) if err != nil { return nil, nil, err } if isNoLoginConfig && !e.IsBundleAware && e.isAuthConfigured(defaultConfig) { log.Debugf(ctx, "Login is configured via environment variables") return &loginConfig{}, defaultConfig, nil } if isNoLoginConfig && !e.IsBundleAware { return nil, nil, ErrNoLoginConfig } if !isNoLoginConfig && err != nil { return nil, nil, fmt.Errorf("load: %w", err) } if e.IsAccountLevel { log.Debugf(ctx, "Using account-level login profile: %s", lc.AccountProfile) cfg, err := e.envAwareConfigWithProfile(ctx, lc.AccountProfile) if err != nil { return nil, nil, err } return lc, cfg, nil } if e.IsBundleAware { err = root.TryConfigureBundle(cmd, []string{}) if err != nil { return nil, nil, fmt.Errorf("bundle: %w", err) } if b := bundle.GetOrNil(cmd.Context()); b != nil { log.Infof(ctx, "Using login configuration from Databricks Asset Bundle") return &loginConfig{}, b.WorkspaceClient().Config, nil } } log.Debugf(ctx, "Using workspace-level login profile: %s", lc.WorkspaceProfile) cfg, err := e.envAwareConfigWithProfile(ctx, lc.WorkspaceProfile) if err != nil { return nil, nil, err } return lc, cfg, nil } func (e *Entrypoint) validLogin(cmd *cobra.Command) (*config.Config, error) { if e.IsUnauthenticated { return &config.Config{}, nil } lc, cfg, err := e.getLoginConfig(cmd) if err != nil { return nil, fmt.Errorf("login config: %w", err) } err = cfg.EnsureResolved() if err != nil { return nil, err } ctx := cmd.Context() logger.Debugf(ctx, "Resolved login: %s", config.ConfigAttributes.DebugString(cfg)) // merge ~/.databrickscfg and ~/.databricks/labs/x/config/login.json when // it comes to project-specific configuration if e.NeedsCluster() && cfg.ClusterID == "" { cfg.ClusterID = lc.ClusterID } if e.NeedsWarehouse() && cfg.WarehouseID == "" { cfg.WarehouseID = lc.WarehouseID } // there's a lot of end-user friction for projects, that require account-level commands. // this is mainly related to the fact, that, as of January 2024, workspace administrators // do not necessarily have access to call account-level APIs. There are ongoing discussions // on how to best implement this on a platform level. // // Current temporary workaround is creating dummy ~/.databrickscfg profile with `account_id` // field, though it doesn't really remove the end-user friction, hence we don't require // an account profile during installation (anymore) and just prompt for it, when context // does require it. This also means that we always prompt for account-level commands, unless // users specify a `--profile` flag. isACC := cfg.IsAccountClient() if e.IsAccountLevel && cfg.Profile == "" { if !cmdio.IsPromptSupported(ctx) { return nil, config.ErrCannotConfigureAuth } replaceCfg, err := e.envAwareConfig(ctx) if err != nil { return nil, fmt.Errorf("replace config: %w", err) } err = lc.askAccountProfile(ctx, replaceCfg) if err != nil { return nil, fmt.Errorf("account: %w", err) } err = replaceCfg.EnsureResolved() if err != nil { return nil, fmt.Errorf("resolve: %w", err) } return replaceCfg, nil } else if e.IsAccountLevel && !isACC { return nil, databricks.ErrNotAccountClient } if e.NeedsCluster() && !isACC && cfg.ClusterID == "" { return nil, ErrMissingClusterID } if e.NeedsWarehouse() && !isACC && cfg.WarehouseID == "" { return nil, ErrMissingWarehouseID } return cfg, nil } func (e *Entrypoint) environmentFromConfig(cfg *config.Config) map[string]string { env := map[string]string{} for _, a := range config.ConfigAttributes { if a.IsZero(cfg) { continue } for _, ev := range a.EnvVars { env[ev] = a.GetString(cfg) } } return env } func (e *Entrypoint) isAuthConfigured(cfg *config.Config) bool { r := &http.Request{Header: http.Header{}} err := cfg.Authenticate(r.WithContext(context.Background())) return err == nil }